From 60c139a8448c462b7970bc560fa6e82019445dbf Mon Sep 17 00:00:00 2001
From: Daniel Lemire <lemire@gmail.com>
Date: Wed, 23 Sep 2020 10:00:39 -0400
Subject: [PATCH] Faster and more correct serialization (#1168)

* Adding new files.

* Better.

* Fixing minifier and adding tests.

* Adding benchmarks.

* Including the array header.

* Replacing old stream-based code by the new code.

* Doubling up the itoa.

* Hidden away to_chars in internal namespace.

* Removing the repetitions.

* Documented the atoi functions.

* Tuning the escape sequences.

* Moving the operators off the main namespace.

* Added more tests.

* Tweaking the implementation so that it works with and without exp.

* The string_builder template and mini_formatter class
 are not part of  our public API and are subject to change
 at any time!

* Adding a benchmark and some optimization.

* Cleaning.

* Strictly speaking, this header is needed.
---
 benchmark/bench_dom_api.cpp              | 122 ++-
 include/simdjson.h                       |   3 +
 include/simdjson/common_defs.h           |   9 +
 include/simdjson/dom/array-inl.h         |  30 -
 include/simdjson/dom/array.h             |  34 +-
 include/simdjson/dom/document.h          |   3 -
 include/simdjson/dom/element-inl.h       | 140 ----
 include/simdjson/dom/element.h           |  43 +-
 include/simdjson/dom/object-inl.h        |  39 -
 include/simdjson/dom/object.h            |  44 +-
 include/simdjson/dom/parser-inl.h        |   6 +-
 include/simdjson/dom/parser.h            |   1 -
 include/simdjson/dom/serialization-inl.h | 421 ++++++++++
 include/simdjson/dom/serialization.h     | 219 ++++++
 include/simdjson/minify.h                |  44 --
 src/simdjson.cpp                         |   2 +-
 src/to_chars.cpp                         | 946 +++++++++++++++++++++++
 tests/CMakeLists.txt                     |   1 +
 tests/basictests.cpp                     | 178 ++++-
 tests/minify_tests.cpp                   |  80 ++
 tests/test_macros.h                      |   5 +
 21 files changed, 1986 insertions(+), 384 deletions(-)
 create mode 100644 include/simdjson/dom/serialization-inl.h
 create mode 100644 include/simdjson/dom/serialization.h
 create mode 100644 src/to_chars.cpp
 create mode 100644 tests/minify_tests.cpp
diff --git a/benchmark/bench_dom_api.cpp b/benchmark/bench_dom_api.cpp
index 4f7eccad..b80578f1 100644
--- a/benchmark/bench_dom_api.cpp
+++ b/benchmark/bench_dom_api.cpp
@@ -63,16 +63,136 @@ static void serialize_twitter(State& state) {
     bytes += serial.size();
     benchmark::DoNotOptimize(serial);
   }
+  // we validate the result
+  {
+    auto serial = simdjson::minify(doc);
+    dom::element doc2; // we parse the minified output
+    if ((error = parser.parse(serial).get(doc2))) { throw std::runtime_error("serialization error"); }
+    auto serial2 = simdjson::minify(doc2); // we minify a second time
+    if(serial != serial2) { throw std::runtime_error("serialization mismatch"); }
+  }
   // Gigabyte: https://en.wikipedia.org/wiki/Gigabyte
   state.counters["Gigabytes"] = benchmark::Counter(
 	        double(bytes), benchmark::Counter::kIsRate,
 	        benchmark::Counter::OneK::kIs1000); // For GiB : kIs1024
   state.counters["docs"] = Counter(double(state.iterations()), benchmark::Counter::kIsRate);
 }
-BENCHMARK(serialize_twitter)->Repetitions(10)->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
+BENCHMARK(serialize_twitter)->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
     return *(std::max_element(std::begin(v), std::end(v)));
   })->DisplayAggregatesOnly(true);
 
+
+static void serialize_big_string_to_string(State& state) {
+  dom::parser parser;
+  std::vector<char> content;
+  content.push_back('\"');
+  for(size_t i = 0 ; i < 100000; i ++) {
+    content.push_back('0' + char(i%10)); // we add what looks like a long list of digits 
+  } 
+  content.push_back('\"');
+  dom::element doc;
+  simdjson::error_code error;
+  if ((error = parser.parse(content.data(), content.size()).get(doc))) {
+    cerr << "could not parse big string" << error << endl;
+    return;
+  }
+  size_t bytes = 0;
+  for (SIMDJSON_UNUSED auto _ : state) {
+    auto serial = simdjson::to_string(doc);
+    bytes += serial.size();
+    benchmark::DoNotOptimize(serial);
+  }
+  // Gigabyte: https://en.wikipedia.org/wiki/Gigabyte
+  state.counters["Gigabytes"] = benchmark::Counter(
+	        double(bytes), benchmark::Counter::kIsRate,
+	        benchmark::Counter::OneK::kIs1000); // For GiB : kIs1024
+  state.counters["docs"] = Counter(double(state.iterations()), benchmark::Counter::kIsRate);
+}
+BENCHMARK(serialize_big_string_to_string)->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
+    return *(std::max_element(std::begin(v), std::end(v)));
+  })->DisplayAggregatesOnly(true);
+
+
+static void serialize_twitter_to_string(State& state) {
+  dom::parser parser;
+  padded_string docdata;
+  auto error = padded_string::load(TWITTER_JSON).get(docdata);
+  if(error) {
+      cerr << "could not parse twitter.json" << error << endl;
+      return;
+  }
+  // we do not want mem. alloc. in the loop.
+  if((error = parser.allocate(docdata.size()))) {
+      cout << error << endl;
+      return;
+  }
+  dom::element doc;
+  if ((error = parser.parse(docdata).get(doc))) {
+    cerr << "could not parse twitter.json" << error << endl;
+    return;
+  }
+  size_t bytes = 0;
+  for (SIMDJSON_UNUSED auto _ : state) {
+    auto serial = simdjson::to_string(doc);
+    bytes += serial.size();
+    benchmark::DoNotOptimize(serial);
+  }
+  // we validate the result
+  {
+    auto serial = simdjson::to_string(doc); 
+    dom::element doc2; // we parse the stringify output
+    if ((error = parser.parse(serial).get(doc2))) { throw std::runtime_error("serialization error"); }
+    auto serial2 = simdjson::to_string(doc2); // we stringify again
+    if(serial != serial2) { throw std::runtime_error("serialization mismatch"); }
+  }
+  // Gigabyte: https://en.wikipedia.org/wiki/Gigabyte
+  state.counters["Gigabytes"] = benchmark::Counter(
+	        double(bytes), benchmark::Counter::kIsRate,
+	        benchmark::Counter::OneK::kIs1000); // For GiB : kIs1024
+  state.counters["docs"] = Counter(double(state.iterations()), benchmark::Counter::kIsRate);
+}
+BENCHMARK(serialize_twitter_to_string)->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
+    return *(std::max_element(std::begin(v), std::end(v)));
+  })->DisplayAggregatesOnly(true);
+
+static void serialize_twitter_string_builder(State& state) {
+  dom::parser parser;
+  padded_string docdata;
+  auto error = padded_string::load(TWITTER_JSON).get(docdata);
+  if(error) {
+      cerr << "could not parse twitter.json" << error << endl;
+      return;
+  }
+  // we do not want mem. alloc. in the loop.
+  if((error = parser.allocate(docdata.size()))) {
+      cout << error << endl;
+      return;
+  }
+  dom::element doc;
+  if ((error = parser.parse(docdata).get(doc))) {
+    cerr << "could not parse twitter.json" << error << endl;
+    return;
+  }
+  size_t bytes = 0;
+  simdjson::internal::string_builder<> sb;// not part of our public API, for internal use
+  for (SIMDJSON_UNUSED auto _ : state) {
+    sb.clear();
+    sb.append(doc);
+    std::string_view serial = sb.str();
+    bytes += serial.size();
+    benchmark::DoNotOptimize(serial);
+  }
+  // Gigabyte: https://en.wikipedia.org/wiki/Gigabyte
+  state.counters["Gigabytes"] = benchmark::Counter(
+	        double(bytes), benchmark::Counter::kIsRate,
+	        benchmark::Counter::OneK::kIs1000); // For GiB : kIs1024
+  state.counters["docs"] = Counter(double(state.iterations()), benchmark::Counter::kIsRate);
+}
+BENCHMARK(serialize_twitter_string_builder)->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
+    return *(std::max_element(std::begin(v), std::end(v)));
+  })->DisplayAggregatesOnly(true);
+
+
 static void numbers_scan(State& state) {
   // Prints the number of results in twitter.json
   dom::parser parser;
diff --git a/include/simdjson.h b/include/simdjson.h
index eef1aced..1660d73d 100644
--- a/include/simdjson.h
+++ b/include/simdjson.h
@@ -43,6 +43,7 @@ SIMDJSON_DISABLE_UNDESIRED_WARNINGS
 // Public API
 #include "simdjson/simdjson_version.h"
 #include "simdjson/error.h"
+#include "simdjson/minify.h"
 #include "simdjson/padded_string.h"
 #include "simdjson/implementation.h"
 #include "simdjson/dom/array.h"
@@ -51,6 +52,7 @@ SIMDJSON_DISABLE_UNDESIRED_WARNINGS
 #include "simdjson/dom/element.h"
 #include "simdjson/dom/object.h"
 #include "simdjson/dom/parser.h"
+#include "simdjson/dom/serialization.h"
 
 // Deprecated API
 #include "simdjson/dom/jsonparser.h"
@@ -68,6 +70,7 @@ SIMDJSON_DISABLE_UNDESIRED_WARNINGS
 #include "simdjson/dom/parsedjson_iterator-inl.h"
 #include "simdjson/dom/parser-inl.h"
 #include "simdjson/internal/tape_ref-inl.h"
+#include "simdjson/dom/serialization-inl.h"
 
 SIMDJSON_POP_DISABLE_WARNINGS
 
diff --git a/include/simdjson/common_defs.h b/include/simdjson/common_defs.h
index d2d42a52..5786d2d4 100644
--- a/include/simdjson/common_defs.h
+++ b/include/simdjson/common_defs.h
@@ -6,6 +6,15 @@
 
 namespace simdjson {
 
+namespace internal {
+/**
+ * @private
+ * Our own implementation of the C++17 to_chars function.
+ * Defined in src/to_chars
+ */
+char *to_chars(char *first, const char *last, double value);
+}
+
 #ifndef SIMDJSON_EXCEPTIONS
 #if __cpp_exceptions
 #define SIMDJSON_EXCEPTIONS 1
diff --git a/include/simdjson/dom/array-inl.h b/include/simdjson/dom/array-inl.h
index c196d0bc..39d51a83 100644
--- a/include/simdjson/dom/array-inl.h
+++ b/include/simdjson/dom/array-inl.h
@@ -144,39 +144,9 @@ inline bool array::iterator::operator>=(const array::iterator& other) const noex
 inline bool array::iterator::operator>(const array::iterator& other) const noexcept {
   return tape.json_index > other.tape.json_index;
 }
-inline std::ostream& operator<<(std::ostream& out, const array &value) {
-  return out << minify<array>(value);
-}
 
 } // namespace dom
 
-template<>
-inline std::ostream& minifier<dom::array>::print(std::ostream& out) {
-  out << '[';
-  auto iter = value.begin();
-  auto end = value.end();
-  if (iter != end) {
-    out << minify<dom::element>(*iter);
-    for (++iter; iter != end; ++iter) {
-      out << "," << minify<dom::element>(*iter);
-    }
-  }
-  return out << ']';
-}
-
-#if SIMDJSON_EXCEPTIONS
-
-template<>
-inline std::ostream& minifier<simdjson_result<dom::array>>::print(std::ostream& out) {
-  if (value.error()) { throw simdjson_error(value.error()); }
-  return out << minify<dom::array>(value.first);
-}
-
-inline std::ostream& operator<<(std::ostream& out, const simdjson_result<dom::array> &value) noexcept(false) {
-  return out << minify<simdjson_result<dom::array>>(value);
-}
-
-#endif
 
 } // namespace simdjson
 
diff --git a/include/simdjson/dom/array.h b/include/simdjson/dom/array.h
index 1eaf00a1..82ef5296 100644
--- a/include/simdjson/dom/array.h
+++ b/include/simdjson/dom/array.h
@@ -4,10 +4,13 @@
 #include "simdjson/common_defs.h"
 #include "simdjson/error.h"
 #include "simdjson/internal/tape_ref.h"
-#include "simdjson/minify.h"
-#include <ostream>
 
 namespace simdjson {
+
+namespace internal {
+template<typename T>
+class string_builder;
+}
 namespace dom {
 
 class document;
@@ -125,19 +128,9 @@ private:
   friend class element;
   friend struct simdjson_result<element>;
   template<typename T>
-  friend class simdjson::minifier;
+  friend class simdjson::internal::string_builder;
 };
 
-/**
- * Print JSON to an output stream.
- *
- * By default, the value will be printed minified.
- *
- * @param out The output stream.
- * @param value The value to print.
- * @throw if there is an error with the underlying output stream. simdjson itself will not throw.
- */
-inline std::ostream& operator<<(std::ostream& out, const array &value);
 
 } // namespace dom
 
@@ -159,20 +152,7 @@ public:
 #endif // SIMDJSON_EXCEPTIONS
 };
 
-#if SIMDJSON_EXCEPTIONS
-/**
- * Print JSON to an output stream.
- *
- * By default, the value will be printed minified.
- *
- * @param out The output stream.
- * @param value The value to print.
- * @throw simdjson_error if the result being printed has an error. If there is an error with the
- *        underlying output stream, that error will be propagated (simdjson_error will not be
- *        thrown).
- */
-inline std::ostream& operator<<(std::ostream& out, const simdjson_result<dom::array> &value) noexcept(false);
-#endif
+
 
 } // namespace simdjson
 
diff --git a/include/simdjson/dom/document.h b/include/simdjson/dom/document.h
index 7b8f1e57..89a50473 100644
--- a/include/simdjson/dom/document.h
+++ b/include/simdjson/dom/document.h
@@ -2,7 +2,6 @@
 #define SIMDJSON_DOM_DOCUMENT_H
 
 #include "simdjson/common_defs.h"
-#include "simdjson/minify.h"
 #include <memory>
 #include <ostream>
 
@@ -67,8 +66,6 @@ public:
 
 private:
   inline error_code allocate(size_t len) noexcept;
-  template<typename T>
-  friend class simdjson::minifier;
   friend class parser;
 }; // class document
 
diff --git a/include/simdjson/dom/element-inl.h b/include/simdjson/dom/element-inl.h
index c258a0e0..6793558e 100644
--- a/include/simdjson/dom/element-inl.h
+++ b/include/simdjson/dom/element-inl.h
@@ -387,9 +387,6 @@ inline bool element::dump_raw_tape(std::ostream &out) const noexcept {
   return tape.doc->dump_raw_tape(out);
 }
 
-inline std::ostream& operator<<(std::ostream& out, const element &value) {
-  return out << minify<element>(value);
-}
 
 inline std::ostream& operator<<(std::ostream& out, element_type type) {
   switch (type) {
@@ -416,143 +413,6 @@ inline std::ostream& operator<<(std::ostream& out, element_type type) {
 
 } // namespace dom
 
-template<>
-inline std::ostream& minifier<dom::element>::print(std::ostream& out) {
-  using tape_type=internal::tape_type;
-  size_t depth = 0;
-  constexpr size_t MAX_DEPTH = 16;
-  bool is_object[MAX_DEPTH];
-  is_object[0] = false;
-  bool after_value = false;
-
-  internal::tape_ref iter(value.tape);
-  do {
-    // print commas after each value
-    if (after_value) {
-      out << ",";
-    }
-    // If we are in an object, print the next key and :, and skip to the next value.
-    if (is_object[depth]) {
-      out << '"' << internal::escape_json_string(iter.get_string_view()) << "\":";
-      iter.json_index++;
-    }
-    switch (iter.tape_ref_type()) {
-
-    // Arrays
-    case tape_type::START_ARRAY: {
-      // If we're too deep, we need to recurse to go deeper.
-      depth++;
-      if (simdjson_unlikely(depth >= MAX_DEPTH)) {
-        out << minify<dom::array>(dom::array(iter));
-        iter.json_index = iter.matching_brace_index() - 1; // Jump to the ]
-        depth--;
-        break;
-      }
-
-      // Output start [
-      out << '[';
-      iter.json_index++;
-
-      // Handle empty [] (we don't want to come back around and print commas)
-      if (iter.tape_ref_type() == tape_type::END_ARRAY) {
-        out << ']';
-        depth--;
-        break;
-      }
-
-      is_object[depth] = false;
-      after_value = false;
-      continue;
-    }
-
-    // Objects
-    case tape_type::START_OBJECT: {
-      // If we're too deep, we need to recurse to go deeper.
-      depth++;
-      if (simdjson_unlikely(depth >= MAX_DEPTH)) {
-        out << minify<dom::object>(dom::object(iter));
-        iter.json_index = iter.matching_brace_index() - 1; // Jump to the }
-        depth--;
-        break;
-      }
-
-      // Output start {
-      out << '{';
-      iter.json_index++;
-
-      // Handle empty {} (we don't want to come back around and print commas)
-      if (iter.tape_ref_type() == tape_type::END_OBJECT) {
-        out << '}';
-        depth--;
-        break;
-      }
-
-      is_object[depth] = true;
-      after_value = false;
-      continue;
-    }
-
-    // Scalars
-    case tape_type::STRING:
-      out << '"' << internal::escape_json_string(iter.get_string_view()) << '"';
-      break;
-    case tape_type::INT64:
-      out << iter.next_tape_value<int64_t>();
-      iter.json_index++; // numbers take up 2 spots, so we need to increment extra
-      break;
-    case tape_type::UINT64:
-      out << iter.next_tape_value<uint64_t>();
-      iter.json_index++; // numbers take up 2 spots, so we need to increment extra
-      break;
-    case tape_type::DOUBLE:
-      out << iter.next_tape_value<double>();
-      iter.json_index++; // numbers take up 2 spots, so we need to increment extra
-      break;
-    case tape_type::TRUE_VALUE:
-      out << "true";
-      break;
-    case tape_type::FALSE_VALUE:
-      out << "false";
-      break;
-    case tape_type::NULL_VALUE:
-      out << "null";
-      break;
-
-    // These are impossible
-    case tape_type::END_ARRAY:
-    case tape_type::END_OBJECT:
-    case tape_type::ROOT:
-      out << "unexpected content!!!"; // abort() usage is forbidden in the library
-    }
-    iter.json_index++;
-    after_value = true;
-
-    // Handle multiple ends in a row
-    while (depth != 0 && (iter.tape_ref_type() == tape_type::END_ARRAY || iter.tape_ref_type() == tape_type::END_OBJECT)) {
-      out << char(iter.tape_ref_type());
-      depth--;
-      iter.json_index++;
-    }
-
-    // Stop when we're at depth 0
-  } while (depth != 0);
-
-  return out;
-}
-
-#if SIMDJSON_EXCEPTIONS
-
-template<>
-simdjson_really_inline std::ostream& minifier<simdjson_result<dom::element>>::print(std::ostream& out) {
-  if (value.error()) { throw simdjson_error(value.error()); }
-  return out << minify<dom::element>(value.first);
-}
-
-simdjson_really_inline std::ostream& operator<<(std::ostream& out, const simdjson_result<dom::element> &value) noexcept(false) {
-  return out << minify<simdjson_result<dom::element>>(value);
-}
-#endif
-
 } // namespace simdjson
 
 #endif // SIMDJSON_INLINE_ELEMENT_H
diff --git a/include/simdjson/dom/element.h b/include/simdjson/dom/element.h
index 44b373d2..a54288e4 100644
--- a/include/simdjson/dom/element.h
+++ b/include/simdjson/dom/element.h
@@ -4,12 +4,14 @@
 #include "simdjson/common_defs.h"
 #include "simdjson/error.h"
 #include "simdjson/internal/tape_ref.h"
-#include "simdjson/minify.h"
 #include <ostream>
 
 namespace simdjson {
+namespace internal {
+template<typename T>
+class string_builder;
+}
 namespace dom {
-
 class array;
 class document;
 class object;
@@ -473,29 +475,10 @@ private:
   friend class array;
   friend struct simdjson_result<element>;
   template<typename T>
-  friend class simdjson::minifier;
+  friend class simdjson::internal::string_builder;
+
 };
 
-/**
- * Print JSON to an output stream.
- *
- * By default, the value will be printed minified.
- *
- * @param out The output stream.
- * @param value The value to print.
- * @throw if there is an error with the underlying output stream. simdjson itself will not throw.
- */
-inline std::ostream& operator<<(std::ostream& out, const element &value);
-
-/**
- * Print element type to an output stream.
- *
- * @param out The output stream.
- * @param value The value to print.
- * @throw if there is an error with the underlying output stream. simdjson itself will not throw.
- */
-inline std::ostream& operator<<(std::ostream& out, element_type type);
-
 } // namespace dom
 
 /** The result of a JSON navigation that may fail. */
@@ -557,20 +540,6 @@ public:
 #endif // SIMDJSON_EXCEPTIONS
 };
 
-#if SIMDJSON_EXCEPTIONS
-/**
- * Print JSON to an output stream.
- *
- * By default, the value will be printed minified.
- *
- * @param out The output stream.
- * @param value The value to print.
- * @throw simdjson_error if the result being printed has an error. If there is an error with the
- *        underlying output stream, that error will be propagated (simdjson_error will not be
- *        thrown).
- */
-simdjson_really_inline std::ostream& operator<<(std::ostream& out, const simdjson_result<dom::element> &value) noexcept(false);
-#endif
 
 } // namespace simdjson
 
diff --git a/include/simdjson/dom/object-inl.h b/include/simdjson/dom/object-inl.h
index 7a3027f7..a2c4e02a 100644
--- a/include/simdjson/dom/object-inl.h
+++ b/include/simdjson/dom/object-inl.h
@@ -236,47 +236,8 @@ inline bool object::iterator::key_equals_case_insensitive(std::string_view o) co
 inline key_value_pair::key_value_pair(std::string_view _key, element _value) noexcept :
   key(_key), value(_value) {}
 
-inline std::ostream& operator<<(std::ostream& out, const object &value) {
-  return out << minify<object>(value);
-}
-inline std::ostream& operator<<(std::ostream& out, const key_value_pair &value) {
-  return out << minify<key_value_pair>(value);
-}
-
 } // namespace dom
 
-template<>
-inline std::ostream& minifier<dom::object>::print(std::ostream& out) {
-  out << '{';
-  auto pair = value.begin();
-  auto end = value.end();
-  if (pair != end) {
-    out << minify<dom::key_value_pair>(*pair);
-    for (++pair; pair != end; ++pair) {
-      out << "," << minify<dom::key_value_pair>(*pair);
-    }
-  }
-  return out << '}';
-}
-
-template<>
-inline std::ostream& minifier<dom::key_value_pair>::print(std::ostream& out) {
-  return out << '"' << internal::escape_json_string(value.key) << "\":" << value.value;
-}
-
-#if SIMDJSON_EXCEPTIONS
-
-template<>
-inline std::ostream& minifier<simdjson_result<dom::object>>::print(std::ostream& out) {
-  if (value.error()) { throw simdjson_error(value.error()); }
-  return out << minify<dom::object>(value.first);
-}
-
-inline std::ostream& operator<<(std::ostream& out, const simdjson_result<dom::object> &value) noexcept(false) {
-  return out << minify<simdjson_result<dom::object>>(value);
-}
-#endif // SIMDJSON_EXCEPTIONS
-
 } // namespace simdjson
 
 #if defined(__cpp_lib_ranges)
diff --git a/include/simdjson/dom/object.h b/include/simdjson/dom/object.h
index 2e10d17a..d16dd8c8 100644
--- a/include/simdjson/dom/object.h
+++ b/include/simdjson/dom/object.h
@@ -4,10 +4,12 @@
 #include "simdjson/common_defs.h"
 #include "simdjson/error.h"
 #include "simdjson/internal/tape_ref.h"
-#include "simdjson/minify.h"
-#include <ostream>
 
 namespace simdjson {
+namespace internal {
+template<typename T>
+class string_builder;
+}
 namespace dom {
 
 class document;
@@ -211,7 +213,7 @@ private:
   friend class element;
   friend struct simdjson_result<element>;
   template<typename T>
-  friend class simdjson::minifier;
+  friend class simdjson::internal::string_builder;
 };
 
 /**
@@ -229,27 +231,6 @@ private:
   friend class object;
 };
 
-/**
- * Print JSON to an output stream.
- *
- * By default, the value will be printed minified.
- *
- * @param out The output stream.
- * @param value The value to print.
- * @throw if there is an error with the underlying output stream. simdjson itself will not throw.
- */
-inline std::ostream& operator<<(std::ostream& out, const object &value);
-/**
- * Print JSON to an output stream.
- *
- * By default, the value will be printed minified.
- *
- * @param out The output stream.
- * @param value The value to print.
- * @throw if there is an error with the underlying output stream. simdjson itself will not throw.
- */
-inline std::ostream& operator<<(std::ostream& out, const key_value_pair &value);
-
 } // namespace dom
 
 /** The result of a JSON conversion that may fail. */
@@ -273,21 +254,6 @@ public:
 #endif // SIMDJSON_EXCEPTIONS
 };
 
-#if SIMDJSON_EXCEPTIONS
-/**
- * Print JSON to an output stream.
- *
- * By default, the value will be printed minified.
- *
- * @param out The output stream.
- * @param value The value to print.
- * @throw simdjson_error if the result being printed has an error. If there is an error with the
- *        underlying output stream, that error will be propagated (simdjson_error will not be
- *        thrown).
- */
-inline std::ostream& operator<<(std::ostream& out, const simdjson_result<dom::object> &value) noexcept(false);
-#endif // SIMDJSON_EXCEPTIONS
-
 } // namespace simdjson
 
 #if defined(__cpp_lib_ranges)
diff --git a/include/simdjson/dom/parser-inl.h b/include/simdjson/dom/parser-inl.h
index 8121cb72..480fb14e 100644
--- a/include/simdjson/dom/parser-inl.h
+++ b/include/simdjson/dom/parser-inl.h
@@ -25,11 +25,7 @@ simdjson_really_inline parser &parser::operator=(parser &&other) noexcept = defa
 inline bool parser::is_valid() const noexcept { return valid; }
 inline int parser::get_error_code() const noexcept { return error; }
 inline std::string parser::get_error_message() const noexcept { return error_message(error); }
-inline bool parser::print_json(std::ostream &os) const noexcept {
-  if (!valid) { return false; }
-  os << doc.root();
-  return true;
-}
+
 inline bool parser::dump_raw_tape(std::ostream &os) const noexcept {
   return valid ? doc.dump_raw_tape(os) : false;
 }
diff --git a/include/simdjson/dom/parser.h b/include/simdjson/dom/parser.h
index 2e73c2a3..bb36d3e5 100644
--- a/include/simdjson/dom/parser.h
+++ b/include/simdjson/dom/parser.h
@@ -6,7 +6,6 @@
 #include "simdjson/error.h"
 #include "simdjson/internal/dom_parser_implementation.h"
 #include "simdjson/internal/tape_ref.h"
-#include "simdjson/minify.h"
 #include "simdjson/padded_string.h"
 #include "simdjson/portability.h"
 #include <memory>
diff --git a/include/simdjson/dom/serialization-inl.h b/include/simdjson/dom/serialization-inl.h
new file mode 100644
index 00000000..96c4dfec
--- /dev/null
+++ b/include/simdjson/dom/serialization-inl.h
@@ -0,0 +1,421 @@
+
+#ifndef SIMDJSON_SERIALIZATION_INL_H
+#define SIMDJSON_SERIALIZATION_INL_H
+
+#include "simdjson/dom/serialization.h"
+
+#include <cinttypes>
+#include <type_traits>
+
+namespace simdjson {
+namespace dom {
+inline bool parser::print_json(std::ostream &os) const noexcept {
+  if (!valid) { return false; }
+  simdjson::internal::string_builder<> sb;
+  sb.append(doc.root());
+  std::string_view answer = sb.str();
+  os << answer;
+  return true;
+}
+}
+/***
+ * Number utility functions
+ **/
+
+
+namespace {
+/**@private
+ * Escape sequence like \b or \u0001
+ * We expect that most compilers will use 8 bytes for this data structure.
+ **/
+struct escape_sequence {
+    uint8_t length;
+    const char string[7]; // technically, we only ever need 6 characters, we pad to 8
+};
+/**@private
+ * This converts a signed integer into a character sequence.
+ * The caller is responsible for providing enough memory (at least
+ * 20 characters.)
+ * Though various runtime libraries provide itoa functions,
+ * it is not part of the C++ standard. The C++17 standard
+ * adds the to_chars functions which would do as well, but
+ * we want to support C++11.
+ */
+char *fast_itoa(char *output, int64_t value) noexcept {
+  // This is a standard implementation of itoa.
+  // We first write in reverse order and then reverse.
+  if(value < 0) {
+    *output++ = '-';
+    value = -value;
+  }
+  char *write_pointer = output;
+  do {
+    *write_pointer++ = char('0' + (value % 10));
+    value /= 10;
+  } while (value != 0);
+  // then we reverse the result
+  char *const answer = write_pointer;
+  char *second_write_pointer = output;
+  write_pointer -= 1;
+  while (second_write_pointer < write_pointer) {
+    char c1 = *write_pointer;
+    char c2 = *second_write_pointer;
+    *second_write_pointer = c1;
+    *write_pointer = c2;
+    write_pointer--;
+    second_write_pointer++;
+  }
+  return answer;
+}
+/**@private
+ * This converts an unsigned integer into a character sequence.
+ * The caller is responsible for providing enough memory (at least
+ * 19 characters.)
+ * Though various runtime libraries provide itoa functions,
+ * it is not part of the C++ standard. The C++17 standard
+ * adds the to_chars functions which would do as well, but
+ * we want to support C++11.
+ */
+char *fast_itoa(char *output, uint64_t value) noexcept {
+  // This is a standard implementation of itoa.
+  // We first write in reverse order and then reverse.
+  char *write_pointer = output;
+  do {
+    *write_pointer++ = char('0' + (value % 10));
+    value /= 10;
+  } while (value != 0);
+  // then we reverse the result
+  char *const answer = write_pointer;
+  char *second_write_pointer = output;
+  write_pointer -= 1;
+  while (second_write_pointer < write_pointer) {
+    char c1 = *write_pointer;
+    char c2 = *second_write_pointer;
+    *second_write_pointer = c1;
+    *write_pointer = c2;
+    write_pointer--;
+    second_write_pointer++;
+  }
+  return answer;
+}
+} // anonymous namespace
+namespace internal {
+
+/***
+ * Minifier/formatter code.
+ **/
+
+simdjson_really_inline void mini_formatter::number(uint64_t x) {
+  char number_buffer[24];
+  char *newp = fast_itoa(number_buffer, x);
+  buffer.insert(buffer.end(), number_buffer, newp);
+}
+
+simdjson_really_inline void mini_formatter::number(int64_t x) {
+  char number_buffer[24];
+  char *newp = fast_itoa(number_buffer, x);
+  buffer.insert(buffer.end(), number_buffer, newp);
+}
+
+simdjson_really_inline void mini_formatter::number(double x) {
+  char number_buffer[24];
+  // Currently, passing the nullptr to the second argument is
+  // safe because our implementation does not check the second 
+  // argument.
+  char *newp = internal::to_chars(number_buffer, nullptr, x);
+  buffer.insert(buffer.end(), number_buffer, newp);
+}
+
+simdjson_really_inline void mini_formatter::start_array() { one_char('['); }
+simdjson_really_inline void mini_formatter::end_array() { one_char(']'); }
+simdjson_really_inline void mini_formatter::start_object() { one_char('{'); }
+simdjson_really_inline void mini_formatter::end_object() { one_char('}'); }
+simdjson_really_inline void mini_formatter::comma() { one_char(','); }
+
+
+simdjson_really_inline void mini_formatter::true_atom() { 
+  const char * s = "true";
+  buffer.insert(buffer.end(), s, s + 4);
+}
+simdjson_really_inline void mini_formatter::false_atom() {
+  const char * s = "false";
+  buffer.insert(buffer.end(), s, s + 5);
+}
+simdjson_really_inline void mini_formatter::null_atom() {
+  const char * s = "null";
+  buffer.insert(buffer.end(), s, s + 4);
+}
+simdjson_really_inline void mini_formatter::one_char(char c) { buffer.push_back(c); }
+simdjson_really_inline void mini_formatter::key(std::string_view unescaped) {
+  string(unescaped);
+  one_char(':');
+}
+simdjson_really_inline void mini_formatter::string(std::string_view unescaped) {
+  one_char('\"');
+  size_t i = 0;
+  // Fast path for the case where we have no control character, no ", and no backslash.
+  // This should include most keys.
+  constexpr static bool needs_escaping[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+  for(;i + 8 <= unescaped.length(); i += 8) { 
+    // Poor's man vectorization. This could get much faster if we used SIMD.
+    if(needs_escaping[uint8_t(unescaped[i])] | needs_escaping[uint8_t(unescaped[i+1])] 
+      | needs_escaping[uint8_t(unescaped[i+2])] | needs_escaping[uint8_t(unescaped[i+3])]
+      | needs_escaping[uint8_t(unescaped[i+4])] | needs_escaping[uint8_t(unescaped[i+5])] 
+      | needs_escaping[uint8_t(unescaped[i+6])] | needs_escaping[uint8_t(unescaped[i+7])]
+      ) { break; }
+  }
+  for(;i < unescaped.length(); i++) { 
+    if(needs_escaping[uint8_t(unescaped[i])]) { break; }
+  }
+  // The following is also possible and omits a 256-byte table, but it is slower:
+  // for (; (i < unescaped.length()) && (uint8_t(unescaped[i]) > 0x1F) 
+  //      && (unescaped[i] != '\"') && (unescaped[i] != '\\'); i++) {}
+
+  // At least for long strings, the following should be fast. We could
+  // do better by integrating the checks and the insertion.
+  buffer.insert(buffer.end(), unescaped.data(), unescaped.data() + i);
+  // We caught a control character if we enter this loop (slow).
+  // Note that we are do not restart from the beginning, but rather we continue
+  // from the point where we encountered something that requires escaping.
+  for (; i < unescaped.length(); i++) {
+    switch (unescaped[i]) {
+    case '\"':
+      {
+        const char * s = "\\\"";
+        buffer.insert(buffer.end(), s, s + 2);
+      }
+      break;
+    case '\\':
+      {
+        const char * s = "\\\\";
+        buffer.insert(buffer.end(), s, s + 2);
+      }
+      break;
+    default:
+      if (uint8_t(unescaped[i]) <= 0x1F) {
+        // If packed, this uses 8 * 32 bytes.
+        // Note that we expect most compilers to embed this code in the data
+        // section.
+        constexpr static escape_sequence escaped[32] = {
+          {6, "\\u0000"}, {6, "\\u0001"}, {6, "\\u0002"}, {6, "\\u0003"},
+          {6, "\\u0004"}, {6, "\\u0005"}, {6, "\\u0006"}, {6, "\\u0007"},
+          {2, "\\b"},     {2, "\\t"},     {2, "\\n"},     {6, "\\u000b"},
+          {2, "\\f"},     {2, "\\r"},     {6, "\\u000e"}, {6, "\\u000f"},
+          {6, "\\u0010"}, {6, "\\u0011"}, {6, "\\u0012"}, {6, "\\u0013"},
+          {6, "\\u0014"}, {6, "\\u0015"}, {6, "\\u0016"}, {6, "\\u0017"},
+          {6, "\\u0018"}, {6, "\\u0019"}, {6, "\\u001a"}, {6, "\\u001b"},
+          {6, "\\u001c"}, {6, "\\u001d"}, {6, "\\u001e"}, {6, "\\u001f"}};
+        auto u = escaped[uint8_t(unescaped[i])];
+        buffer.insert(buffer.end(), u.string, u.string + u.length);
+      } else {
+        one_char(unescaped[i]);
+      }
+    } // switch
+  }   // for
+  one_char('\"');
+}
+
+inline void mini_formatter::clear() {
+  buffer.clear();
+}
+
+simdjson_really_inline std::string_view mini_formatter::str() const {
+  return std::string_view(buffer.data(), buffer.size());
+}
+
+
+/***
+ * String building code.
+ **/
+
+template <class serializer>
+inline void string_builder<serializer>::append(simdjson::dom::element value) {
+  // using tape_type = simdjson::internal::tape_type;
+  size_t depth = 0;
+  constexpr size_t MAX_DEPTH = 16;
+  bool is_object[MAX_DEPTH];
+  is_object[0] = false;
+  bool after_value = false;
+
+  internal::tape_ref iter(value.tape);
+  do {
+    // print commas after each value
+    if (after_value) {
+      format.comma();
+    }
+    // If we are in an object, print the next key and :, and skip to the next
+    // value.
+    if (is_object[depth]) {
+      format.key(iter.get_string_view());
+      iter.json_index++;
+    }
+    switch (iter.tape_ref_type()) {
+
+    // Arrays
+    case tape_type::START_ARRAY: {
+      // If we're too deep, we need to recurse to go deeper.
+      depth++;
+      if (simdjson_unlikely(depth >= MAX_DEPTH)) {
+        append(simdjson::dom::array(iter));
+        iter.json_index = iter.matching_brace_index() - 1; // Jump to the ]
+        depth--;
+        break;
+      }
+
+      // Output start [
+      format.start_array();
+      iter.json_index++;
+
+      // Handle empty [] (we don't want to come back around and print commas)
+      if (iter.tape_ref_type() == tape_type::END_ARRAY) {
+        format.end_array();
+        depth--;
+        break;
+      }
+
+      is_object[depth] = false;
+      after_value = false;
+      continue;
+    }
+
+    // Objects
+    case tape_type::START_OBJECT: {
+      // If we're too deep, we need to recurse to go deeper.
+      depth++;
+      if (simdjson_unlikely(depth >= MAX_DEPTH)) {
+        append(simdjson::dom::object(iter));
+        iter.json_index = iter.matching_brace_index() - 1; // Jump to the }
+        depth--;
+        break;
+      }
+
+      // Output start {
+      format.start_object();
+      iter.json_index++;
+
+      // Handle empty {} (we don't want to come back around and print commas)
+      if (iter.tape_ref_type() == tape_type::END_OBJECT) {
+        format.end_object();
+        depth--;
+        break;
+      }
+
+      is_object[depth] = true;
+      after_value = false;
+      continue;
+    }
+
+    // Scalars
+    case tape_type::STRING:
+      format.string(iter.get_string_view());
+      break;
+    case tape_type::INT64:
+      format.number(iter.next_tape_value<int64_t>());
+      iter.json_index++; // numbers take up 2 spots, so we need to increment
+                         // extra
+      break;
+    case tape_type::UINT64:
+      format.number(iter.next_tape_value<uint64_t>());
+      iter.json_index++; // numbers take up 2 spots, so we need to increment
+                         // extra
+      break;
+    case tape_type::DOUBLE:
+      format.number(iter.next_tape_value<double>());
+      iter.json_index++; // numbers take up 2 spots, so we need to increment
+                         // extra
+      break;
+    case tape_type::TRUE_VALUE:
+      format.true_atom();
+      break;
+    case tape_type::FALSE_VALUE:
+      format.false_atom();
+      break;
+    case tape_type::NULL_VALUE:
+      format.null_atom();
+      break;
+
+    // These are impossible
+    case tape_type::END_ARRAY:
+    case tape_type::END_OBJECT:
+    case tape_type::ROOT:
+      SIMDJSON_UNREACHABLE();
+    }
+    iter.json_index++;
+    after_value = true;
+
+    // Handle multiple ends in a row
+    while (depth != 0 && (iter.tape_ref_type() == tape_type::END_ARRAY ||
+                          iter.tape_ref_type() == tape_type::END_OBJECT)) {
+      if (iter.tape_ref_type() == tape_type::END_ARRAY) {
+        format.end_array();
+      } else {
+        format.end_object();
+      }
+      depth--;
+      iter.json_index++;
+    }
+
+    // Stop when we're at depth 0
+  } while (depth != 0);
+}
+
+template <class serializer>
+inline void string_builder<serializer>::append(simdjson::dom::object value) {
+  format.start_object();
+  auto pair = value.begin();
+  auto end = value.end();
+  if (pair != end) {
+    append(*pair);
+    for (++pair; pair != end; ++pair) {
+      format.comma();
+      append(*pair);
+    }
+  }
+  format.end_object();
+}
+
+template <class serializer>
+inline void string_builder<serializer>::append(simdjson::dom::array value) {
+  format.start_array();
+  auto iter = value.begin();
+  auto end = value.end();
+  if (iter != end) {
+    append(*iter);
+    for (++iter; iter != end; ++iter) {
+      format.comma();
+      append(*iter);
+    }
+  }
+  format.end_array();
+}
+
+template <class serializer>
+simdjson_really_inline void string_builder<serializer>::append(simdjson::dom::key_value_pair kv) {
+  format.key(kv.key);
+  append(kv.value);
+}
+
+template <class serializer>
+simdjson_really_inline void string_builder<serializer>::clear() {
+  format.clear();
+}
+
+template <class serializer>
+simdjson_really_inline std::string_view string_builder<serializer>::str() const {
+  return format.str();
+}
+
+
+} // namespace internal
+} // namespace simdjson
+
+#endif
\ No newline at end of file
diff --git a/include/simdjson/dom/serialization.h b/include/simdjson/dom/serialization.h
new file mode 100644
index 00000000..7bfbeb3a
--- /dev/null
+++ b/include/simdjson/dom/serialization.h
@@ -0,0 +1,219 @@
+#ifndef SIMDJSON_SERIALIZATION_H
+#define SIMDJSON_SERIALIZATION_H
+
+#include "simdjson/common_defs.h"
+#include "simdjson/dom/document.h"
+#include "simdjson/error.h"
+#include "simdjson/internal/dom_parser_implementation.h"
+#include "simdjson/internal/tape_ref.h"
+#include "simdjson/padded_string.h"
+#include "simdjson/portability.h"
+#include <vector>
+
+namespace simdjson {
+
+/**
+ * The string_builder template and mini_formatter class
+ * are not part of  our public API and are subject to change 
+ * at any time!
+ */
+namespace internal {
+
+class mini_formatter;
+
+/**
+ * @private The string_builder template allows us to construct
+ * a string from a document element. It is parametrized
+ * by a "formatter" which handles the details. Thus
+ * the string_builder template could support both minification
+ * and prettification, and various other tradeoffs.
+ */
+template <class formatter = mini_formatter> 
+class string_builder {
+public:
+  /** Construct an initially empty builder, would print the empty string **/
+  string_builder() = default;
+  /** Append an element to the builder (to be printed) **/
+  inline void append(simdjson::dom::element value);
+  /** Append an array to the builder (to be printed) **/
+  inline void append(simdjson::dom::array value);
+  /** Append an objet to the builder (to be printed) **/
+  inline void append(simdjson::dom::object value);
+  /** Reset the builder (so that it would print the empty string) **/
+  simdjson_really_inline void clear();
+  /** 
+   * Get access to the string. The string_view is owned by the builder
+   * and it is invalid to use it after the string_builder has been 
+   * destroyed.
+   * However you can make a copy of the string_view on memory that you
+   * own. 
+   */
+  simdjson_really_inline std::string_view str() const;
+  /** Append a key_value_pair to the builder (to be printed) **/
+  simdjson_really_inline void append(simdjson::dom::key_value_pair value);
+private:
+  formatter format{};
+};
+
+/**
+ * @private This is the class that we expect to use with the string_builder
+ * template. It tries to produce a compact version of the JSON element
+ * as quickly as possible.
+ */
+class mini_formatter {
+public:
+  mini_formatter() = default;
+  /** Add a comma **/
+  simdjson_really_inline void comma();
+  /** Start an array, prints [ **/
+  simdjson_really_inline void start_array();
+  /** End an array, prints ] **/
+  simdjson_really_inline void end_array();
+  /** Start an array, prints { **/
+  simdjson_really_inline void start_object();
+  /** Start an array, prints } **/
+  simdjson_really_inline void end_object();
+  /** Prints a true **/
+  simdjson_really_inline void true_atom();
+  /** Prints a false **/
+  simdjson_really_inline void false_atom();
+  /** Prints a null **/
+  simdjson_really_inline void null_atom();
+  /** Prints a number **/
+  simdjson_really_inline void number(int64_t x);
+  /** Prints a number **/
+  simdjson_really_inline void number(uint64_t x);
+  /** Prints a number **/
+  simdjson_really_inline void number(double x);
+  /** Prints a key (string + colon) **/
+  simdjson_really_inline void key(std::string_view unescaped);
+  /** Prints a string. The string is escaped as needed. **/
+  simdjson_really_inline void string(std::string_view unescaped);
+  /** Clears out the content. **/
+  simdjson_really_inline void clear();
+  /** 
+   * Get access to the buffer, it is own by the instance, but
+   * the user can make a copy. 
+   **/
+  simdjson_really_inline std::string_view str() const;
+
+private:
+  // implementation details (subject to change)
+  /** Prints one character **/
+  simdjson_really_inline void one_char(char c);
+  /** Backing buffer **/
+  std::vector<char> buffer{}; // not ideal!
+};
+
+} // internal
+
+namespace dom {
+
+/**
+ * Print JSON to an output stream.
+ *
+ * @param out The output stream.
+ * @param value The element.
+ * @throw if there is an error with the underlying output stream. simdjson itself will not throw.
+ */
+inline std::ostream& operator<<(std::ostream& out, simdjson::dom::element value) { 
+    simdjson::internal::string_builder<> sb;
+    sb.append(value);
+    return (out << sb.str());
+}
+#if SIMDJSON_EXCEPTIONS
+inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<simdjson::dom::element> x) { 
+    if (x.error()) { throw simdjson::simdjson_error(x.error()); }
+    return (out << x.value());
+}
+#endif
+/**
+ * Print JSON to an output stream.
+ *
+ * @param out The output stream.
+ * @param value The array.
+ * @throw if there is an error with the underlying output stream. simdjson itself will not throw.
+ */
+inline std::ostream& operator<<(std::ostream& out, simdjson::dom::array value)  { 
+    simdjson::internal::string_builder<> sb;
+    sb.append(value);
+    return (out << sb.str());
+}
+#if SIMDJSON_EXCEPTIONS
+inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<simdjson::dom::array> x) { 
+    if (x.error()) { throw simdjson::simdjson_error(x.error()); }
+    return (out << x.value());
+}
+#endif
+/**
+ * Print JSON to an output stream.
+ *
+ * @param out The output stream.
+ * @param value The objet.
+ * @throw if there is an error with the underlying output stream. simdjson itself will not throw.
+ */
+inline std::ostream& operator<<(std::ostream& out, simdjson::dom::object value)   { 
+    simdjson::internal::string_builder<> sb;
+    sb.append(value);
+    return (out << sb.str());
+}
+#if SIMDJSON_EXCEPTIONS
+inline std::ostream& operator<<(std::ostream& out,  simdjson::simdjson_result<simdjson::dom::object> x) { 
+    if (x.error()) { throw  simdjson::simdjson_error(x.error()); }
+    return (out << x.value());
+}
+#endif 
+} // namespace dom
+
+/**
+ * Converts JSON to a string.
+ *
+ *   dom::parser parser;
+ *   element doc = parser.parse("   [ 1 , 2 , 3 ] "_padded);
+ *   cout << to_string(doc) << endl; // prints [1,2,3]
+ *
+ */
+template <class T> 
+std::string to_string(T x)   {
+    // in C++, to_string is standard: http://www.cplusplus.com/reference/string/to_string/
+    // Currently minify and to_string are identical but in the future, they may 
+    // differ.
+    simdjson::internal::string_builder<> sb;
+    sb.append(x);
+    std::string_view answer = sb.str();
+    return std::string(answer.data(), answer.size());
+}
+#if SIMDJSON_EXCEPTIONS
+template <class T> 
+std::string to_string(simdjson_result<T> x) {
+    if (x.error()) { throw simdjson_error(x.error()); }
+    return to_string(x.value());
+}
+#endif 
+
+/**
+ * Minifies a JSON element or document, printing the smallest possible valid JSON.
+ *
+ *   dom::parser parser;
+ *   element doc = parser.parse("   [ 1 , 2 , 3 ] "_padded);
+ *   cout << minify(doc) << endl; // prints [1,2,3]
+ *
+ */
+template <class T> 
+std::string minify(T x)  {
+  return to_string(x);
+}
+
+#if SIMDJSON_EXCEPTIONS
+template <class T> 
+std::string minify(simdjson_result<T> x) {
+    if (x.error()) { throw simdjson_error(x.error()); }
+    return to_string(x.value());
+}
+#endif 
+
+
+} // namespace simdjson
+
+
+#endif
\ No newline at end of file
diff --git a/include/simdjson/minify.h b/include/simdjson/minify.h
index 488a4cdf..e85c2a40 100644
--- a/include/simdjson/minify.h
+++ b/include/simdjson/minify.h
@@ -27,50 +27,6 @@ namespace simdjson {
  */
 SIMDJSON_WARN_UNUSED error_code minify(const char *buf, size_t len, char *dst, size_t &dst_len) noexcept;
 
-/**
- * Minifies a JSON element or document, printing the smallest possible valid JSON.
- *
- *   dom::parser parser;
- *   element doc = parser.parse("   [ 1 , 2 , 3 ] "_padded);
- *   cout << minify(doc) << endl; // prints [1,2,3]
- *
- */
-template<typename T>
-class minifier {
-public:
-  /**
-   * Create a new minifier.
-   *
-   * @param _value The document or element to minify.
-   */
-  inline minifier(const T &_value) noexcept : value{_value} {}
-
-  /**
-   * Minify JSON to a string.
-   */
-  inline operator std::string() const noexcept { std::stringstream s; s << *this; return s.str(); }
-
-  /**
-   * Minify JSON to an output stream.
-   */
-  inline std::ostream& print(std::ostream& out);
-private:
-  const T &value;
-};
-
-template<typename T>
-inline minifier<T> minify(const T &value) noexcept { return minifier<T>(value); }
-
-/**
- * Minify JSON to an output stream.
- *
- * @param out The output stream.
- * @param formatter The minifier.
- * @throw if there is an error with the underlying output stream. simdjson itself will not throw.
- */
-template<typename T>
-inline std::ostream& operator<<(std::ostream& out, minifier<T> formatter) { return formatter.print(out); }
-
 } // namespace simdjson
 
 #endif // SIMDJSON_MINIFY_H
\ No newline at end of file
diff --git a/src/simdjson.cpp b/src/simdjson.cpp
index 48af5147..4ab5bf48 100644
--- a/src/simdjson.cpp
+++ b/src/simdjson.cpp
@@ -2,7 +2,7 @@
 
 SIMDJSON_PUSH_DISABLE_WARNINGS
 SIMDJSON_DISABLE_UNDESIRED_WARNINGS
-
+#include "to_chars.cpp"
 #include "error.cpp"
 #include "implementation.cpp"
 
diff --git a/src/to_chars.cpp b/src/to_chars.cpp
new file mode 100644
index 00000000..45aa12cb
--- /dev/null
+++ b/src/to_chars.cpp
@@ -0,0 +1,946 @@
+#include <cmath>
+#include <cstring>
+#include <cstdint>
+#include <array>
+namespace simdjson {
+namespace internal {
+/*!
+implements the Grisu2 algorithm for binary to decimal floating-point
+conversion.
+Adapted from JSON for Modern C++
+
+This implementation is a slightly modified version of the reference
+implementation which may be obtained from
+http://florian.loitsch.com/publications (bench.tar.gz).
+The code is distributed under the MIT license, Copyright (c) 2009 Florian
+Loitsch. For a detailed description of the algorithm see: [1] Loitsch, "Printing
+Floating-Point Numbers Quickly and Accurately with Integers", Proceedings of the
+ACM SIGPLAN 2010 Conference on Programming Language Design and Implementation,
+PLDI 2010 [2] Burger, Dybvig, "Printing Floating-Point Numbers Quickly and
+Accurately", Proceedings of the ACM SIGPLAN 1996 Conference on Programming
+Language Design and Implementation, PLDI 1996
+*/
+namespace dtoa_impl {
+
+template <typename Target, typename Source>
+Target reinterpret_bits(const Source source) {
+  static_assert(sizeof(Target) == sizeof(Source), "size mismatch");
+
+  Target target;
+  std::memcpy(&target, &source, sizeof(Source));
+  return target;
+}
+
+struct diyfp // f * 2^e
+{
+  static constexpr int kPrecision = 64; // = q
+
+  std::uint64_t f = 0;
+  int e = 0;
+
+  constexpr diyfp(std::uint64_t f_, int e_) noexcept : f(f_), e(e_) {}
+
+  /*!
+  @brief returns x - y
+  @pre x.e == y.e and x.f >= y.f
+  */
+  static diyfp sub(const diyfp &x, const diyfp &y) noexcept {
+
+    return {x.f - y.f, x.e};
+  }
+
+  /*!
+  @brief returns x * y
+  @note The result is rounded. (Only the upper q bits are returned.)
+  */
+  static diyfp mul(const diyfp &x, const diyfp &y) noexcept {
+    static_assert(kPrecision == 64, "internal error");
+
+    // Computes:
+    //  f = round((x.f * y.f) / 2^q)
+    //  e = x.e + y.e + q
+
+    // Emulate the 64-bit * 64-bit multiplication:
+    //
+    // p = u * v
+    //   = (u_lo + 2^32 u_hi) (v_lo + 2^32 v_hi)
+    //   = (u_lo v_lo         ) + 2^32 ((u_lo v_hi         ) + (u_hi v_lo )) +
+    //   2^64 (u_hi v_hi         ) = (p0                ) + 2^32 ((p1 ) + (p2 ))
+    //   + 2^64 (p3                ) = (p0_lo + 2^32 p0_hi) + 2^32 ((p1_lo +
+    //   2^32 p1_hi) + (p2_lo + 2^32 p2_hi)) + 2^64 (p3                ) =
+    //   (p0_lo             ) + 2^32 (p0_hi + p1_lo + p2_lo ) + 2^64 (p1_hi +
+    //   p2_hi + p3) = (p0_lo             ) + 2^32 (Q ) + 2^64 (H ) = (p0_lo ) +
+    //   2^32 (Q_lo + 2^32 Q_hi                           ) + 2^64 (H )
+    //
+    // (Since Q might be larger than 2^32 - 1)
+    //
+    //   = (p0_lo + 2^32 Q_lo) + 2^64 (Q_hi + H)
+    //
+    // (Q_hi + H does not overflow a 64-bit int)
+    //
+    //   = p_lo + 2^64 p_hi
+
+    const std::uint64_t u_lo = x.f & 0xFFFFFFFFu;
+    const std::uint64_t u_hi = x.f >> 32u;
+    const std::uint64_t v_lo = y.f & 0xFFFFFFFFu;
+    const std::uint64_t v_hi = y.f >> 32u;
+
+    const std::uint64_t p0 = u_lo * v_lo;
+    const std::uint64_t p1 = u_lo * v_hi;
+    const std::uint64_t p2 = u_hi * v_lo;
+    const std::uint64_t p3 = u_hi * v_hi;
+
+    const std::uint64_t p0_hi = p0 >> 32u;
+    const std::uint64_t p1_lo = p1 & 0xFFFFFFFFu;
+    const std::uint64_t p1_hi = p1 >> 32u;
+    const std::uint64_t p2_lo = p2 & 0xFFFFFFFFu;
+    const std::uint64_t p2_hi = p2 >> 32u;
+
+    std::uint64_t Q = p0_hi + p1_lo + p2_lo;
+
+    // The full product might now be computed as
+    //
+    // p_hi = p3 + p2_hi + p1_hi + (Q >> 32)
+    // p_lo = p0_lo + (Q << 32)
+    //
+    // But in this particular case here, the full p_lo is not required.
+    // Effectively we only need to add the highest bit in p_lo to p_hi (and
+    // Q_hi + 1 does not overflow).
+
+    Q += std::uint64_t{1} << (64u - 32u - 1u); // round, ties up
+
+    const std::uint64_t h = p3 + p2_hi + p1_hi + (Q >> 32u);
+
+    return {h, x.e + y.e + 64};
+  }
+
+  /*!
+  @brief normalize x such that the significand is >= 2^(q-1)
+  @pre x.f != 0
+  */
+  static diyfp normalize(diyfp x) noexcept {
+
+    while ((x.f >> 63u) == 0) {
+      x.f <<= 1u;
+      x.e--;
+    }
+
+    return x;
+  }
+
+  /*!
+  @brief normalize x such that the result has the exponent E
+  @pre e >= x.e and the upper e - x.e bits of x.f must be zero.
+  */
+  static diyfp normalize_to(const diyfp &x,
+                            const int target_exponent) noexcept {
+    const int delta = x.e - target_exponent;
+
+    return {x.f << delta, target_exponent};
+  }
+};
+
+struct boundaries {
+  diyfp w;
+  diyfp minus;
+  diyfp plus;
+};
+
+/*!
+Compute the (normalized) diyfp representing the input number 'value' and its
+boundaries.
+@pre value must be finite and positive
+*/
+template <typename FloatType> boundaries compute_boundaries(FloatType value) {
+
+  // Convert the IEEE representation into a diyfp.
+  //
+  // If v is denormal:
+  //      value = 0.F * 2^(1 - bias) = (          F) * 2^(1 - bias - (p-1))
+  // If v is normalized:
+  //      value = 1.F * 2^(E - bias) = (2^(p-1) + F) * 2^(E - bias - (p-1))
+
+  static_assert(std::numeric_limits<FloatType>::is_iec559,
+                "internal error: dtoa_short requires an IEEE-754 "
+                "floating-point implementation");
+
+  constexpr int kPrecision =
+      std::numeric_limits<FloatType>::digits; // = p (includes the hidden bit)
+  constexpr int kBias =
+      std::numeric_limits<FloatType>::max_exponent - 1 + (kPrecision - 1);
+  constexpr int kMinExp = 1 - kBias;
+  constexpr std::uint64_t kHiddenBit = std::uint64_t{1}
+                                       << (kPrecision - 1); // = 2^(p-1)
+
+  using bits_type = typename std::conditional<kPrecision == 24, std::uint32_t,
+                                              std::uint64_t>::type;
+
+  const std::uint64_t bits = reinterpret_bits<bits_type>(value);
+  const std::uint64_t E = bits >> (kPrecision - 1);
+  const std::uint64_t F = bits & (kHiddenBit - 1);
+
+  const bool is_denormal = E == 0;
+  const diyfp v = is_denormal
+                      ? diyfp(F, kMinExp)
+                      : diyfp(F + kHiddenBit, static_cast<int>(E) - kBias);
+
+  // Compute the boundaries m- and m+ of the floating-point value
+  // v = f * 2^e.
+  //
+  // Determine v- and v+, the floating-point predecessor and successor if v,
+  // respectively.
+  //
+  //      v- = v - 2^e        if f != 2^(p-1) or e == e_min                (A)
+  //         = v - 2^(e-1)    if f == 2^(p-1) and e > e_min                (B)
+  //
+  //      v+ = v + 2^e
+  //
+  // Let m- = (v- + v) / 2 and m+ = (v + v+) / 2. All real numbers _strictly_
+  // between m- and m+ round to v, regardless of how the input rounding
+  // algorithm breaks ties.
+  //
+  //      ---+-------------+-------------+-------------+-------------+---  (A)
+  //         v-            m-            v             m+            v+
+  //
+  //      -----------------+------+------+-------------+-------------+---  (B)
+  //                       v-     m-     v             m+            v+
+
+  const bool lower_boundary_is_closer = F == 0 && E > 1;
+  const diyfp m_plus = diyfp(2 * v.f + 1, v.e - 1);
+  const diyfp m_minus = lower_boundary_is_closer
+                            ? diyfp(4 * v.f - 1, v.e - 2)  // (B)
+                            : diyfp(2 * v.f - 1, v.e - 1); // (A)
+
+  // Determine the normalized w+ = m+.
+  const diyfp w_plus = diyfp::normalize(m_plus);
+
+  // Determine w- = m- such that e_(w-) = e_(w+).
+  const diyfp w_minus = diyfp::normalize_to(m_minus, w_plus.e);
+
+  return {diyfp::normalize(v), w_minus, w_plus};
+}
+
+// Given normalized diyfp w, Grisu needs to find a (normalized) cached
+// power-of-ten c, such that the exponent of the product c * w = f * 2^e lies
+// within a certain range [alpha, gamma] (Definition 3.2 from [1])
+//
+//      alpha <= e = e_c + e_w + q <= gamma
+//
+// or
+//
+//      f_c * f_w * 2^alpha <= f_c 2^(e_c) * f_w 2^(e_w) * 2^q
+//                          <= f_c * f_w * 2^gamma
+//
+// Since c and w are normalized, i.e. 2^(q-1) <= f < 2^q, this implies
+//
+//      2^(q-1) * 2^(q-1) * 2^alpha <= c * w * 2^q < 2^q * 2^q * 2^gamma
+//
+// or
+//
+//      2^(q - 2 + alpha) <= c * w < 2^(q + gamma)
+//
+// The choice of (alpha,gamma) determines the size of the table and the form of
+// the digit generation procedure. Using (alpha,gamma)=(-60,-32) works out well
+// in practice:
+//
+// The idea is to cut the number c * w = f * 2^e into two parts, which can be
+// processed independently: An integral part p1, and a fractional part p2:
+//
+//      f * 2^e = ( (f div 2^-e) * 2^-e + (f mod 2^-e) ) * 2^e
+//              = (f div 2^-e) + (f mod 2^-e) * 2^e
+//              = p1 + p2 * 2^e
+//
+// The conversion of p1 into decimal form requires a series of divisions and
+// modulos by (a power of) 10. These operations are faster for 32-bit than for
+// 64-bit integers, so p1 should ideally fit into a 32-bit integer. This can be
+// achieved by choosing
+//
+//      -e >= 32   or   e <= -32 := gamma
+//
+// In order to convert the fractional part
+//
+//      p2 * 2^e = p2 / 2^-e = d[-1] / 10^1 + d[-2] / 10^2 + ...
+//
+// into decimal form, the fraction is repeatedly multiplied by 10 and the digits
+// d[-i] are extracted in order:
+//
+//      (10 * p2) div 2^-e = d[-1]
+//      (10 * p2) mod 2^-e = d[-2] / 10^1 + ...
+//
+// The multiplication by 10 must not overflow. It is sufficient to choose
+//
+//      10 * p2 < 16 * p2 = 2^4 * p2 <= 2^64.
+//
+// Since p2 = f mod 2^-e < 2^-e,
+//
+//      -e <= 60   or   e >= -60 := alpha
+
+constexpr int kAlpha = -60;
+constexpr int kGamma = -32;
+
+struct cached_power // c = f * 2^e ~= 10^k
+{
+  std::uint64_t f;
+  int e;
+  int k;
+};
+
+/*!
+For a normalized diyfp w = f * 2^e, this function returns a (normalized) cached
+power-of-ten c = f_c * 2^e_c, such that the exponent of the product w * c
+satisfies (Definition 3.2 from [1])
+     alpha <= e_c + e + q <= gamma.
+*/
+inline cached_power get_cached_power_for_binary_exponent(int e) {
+  // Now
+  //
+  //      alpha <= e_c + e + q <= gamma                                    (1)
+  //      ==> f_c * 2^alpha <= c * 2^e * 2^q
+  //
+  // and since the c's are normalized, 2^(q-1) <= f_c,
+  //
+  //      ==> 2^(q - 1 + alpha) <= c * 2^(e + q)
+  //      ==> 2^(alpha - e - 1) <= c
+  //
+  // If c were an exact power of ten, i.e. c = 10^k, one may determine k as
+  //
+  //      k = ceil( log_10( 2^(alpha - e - 1) ) )
+  //        = ceil( (alpha - e - 1) * log_10(2) )
+  //
+  // From the paper:
+  // "In theory the result of the procedure could be wrong since c is rounded,
+  //  and the computation itself is approximated [...]. In practice, however,
+  //  this simple function is sufficient."
+  //
+  // For IEEE double precision floating-point numbers converted into
+  // normalized diyfp's w = f * 2^e, with q = 64,
+  //
+  //      e >= -1022      (min IEEE exponent)
+  //           -52        (p - 1)
+  //           -52        (p - 1, possibly normalize denormal IEEE numbers)
+  //           -11        (normalize the diyfp)
+  //         = -1137
+  //
+  // and
+  //
+  //      e <= +1023      (max IEEE exponent)
+  //           -52        (p - 1)
+  //           -11        (normalize the diyfp)
+  //         = 960
+  //
+  // This binary exponent range [-1137,960] results in a decimal exponent
+  // range [-307,324]. One does not need to store a cached power for each
+  // k in this range. For each such k it suffices to find a cached power
+  // such that the exponent of the product lies in [alpha,gamma].
+  // This implies that the difference of the decimal exponents of adjacent
+  // table entries must be less than or equal to
+  //
+  //      floor( (gamma - alpha) * log_10(2) ) = 8.
+  //
+  // (A smaller distance gamma-alpha would require a larger table.)
+
+  // NB:
+  // Actually this function returns c, such that -60 <= e_c + e + 64 <= -34.
+
+  constexpr int kCachedPowersMinDecExp = -300;
+  constexpr int kCachedPowersDecStep = 8;
+
+  static constexpr std::array<cached_power, 79> kCachedPowers = {{
+      {0xAB70FE17C79AC6CA, -1060, -300}, {0xFF77B1FCBEBCDC4F, -1034, -292},
+      {0xBE5691EF416BD60C, -1007, -284}, {0x8DD01FAD907FFC3C, -980, -276},
+      {0xD3515C2831559A83, -954, -268},  {0x9D71AC8FADA6C9B5, -927, -260},
+      {0xEA9C227723EE8BCB, -901, -252},  {0xAECC49914078536D, -874, -244},
+      {0x823C12795DB6CE57, -847, -236},  {0xC21094364DFB5637, -821, -228},
+      {0x9096EA6F3848984F, -794, -220},  {0xD77485CB25823AC7, -768, -212},
+      {0xA086CFCD97BF97F4, -741, -204},  {0xEF340A98172AACE5, -715, -196},
+      {0xB23867FB2A35B28E, -688, -188},  {0x84C8D4DFD2C63F3B, -661, -180},
+      {0xC5DD44271AD3CDBA, -635, -172},  {0x936B9FCEBB25C996, -608, -164},
+      {0xDBAC6C247D62A584, -582, -156},  {0xA3AB66580D5FDAF6, -555, -148},
+      {0xF3E2F893DEC3F126, -529, -140},  {0xB5B5ADA8AAFF80B8, -502, -132},
+      {0x87625F056C7C4A8B, -475, -124},  {0xC9BCFF6034C13053, -449, -116},
+      {0x964E858C91BA2655, -422, -108},  {0xDFF9772470297EBD, -396, -100},
+      {0xA6DFBD9FB8E5B88F, -369, -92},   {0xF8A95FCF88747D94, -343, -84},
+      {0xB94470938FA89BCF, -316, -76},   {0x8A08F0F8BF0F156B, -289, -68},
+      {0xCDB02555653131B6, -263, -60},   {0x993FE2C6D07B7FAC, -236, -52},
+      {0xE45C10C42A2B3B06, -210, -44},   {0xAA242499697392D3, -183, -36},
+      {0xFD87B5F28300CA0E, -157, -28},   {0xBCE5086492111AEB, -130, -20},
+      {0x8CBCCC096F5088CC, -103, -12},   {0xD1B71758E219652C, -77, -4},
+      {0x9C40000000000000, -50, 4},      {0xE8D4A51000000000, -24, 12},
+      {0xAD78EBC5AC620000, 3, 20},       {0x813F3978F8940984, 30, 28},
+      {0xC097CE7BC90715B3, 56, 36},      {0x8F7E32CE7BEA5C70, 83, 44},
+      {0xD5D238A4ABE98068, 109, 52},     {0x9F4F2726179A2245, 136, 60},
+      {0xED63A231D4C4FB27, 162, 68},     {0xB0DE65388CC8ADA8, 189, 76},
+      {0x83C7088E1AAB65DB, 216, 84},     {0xC45D1DF942711D9A, 242, 92},
+      {0x924D692CA61BE758, 269, 100},    {0xDA01EE641A708DEA, 295, 108},
+      {0xA26DA3999AEF774A, 322, 116},    {0xF209787BB47D6B85, 348, 124},
+      {0xB454E4A179DD1877, 375, 132},    {0x865B86925B9BC5C2, 402, 140},
+      {0xC83553C5C8965D3D, 428, 148},    {0x952AB45CFA97A0B3, 455, 156},
+      {0xDE469FBD99A05FE3, 481, 164},    {0xA59BC234DB398C25, 508, 172},
+      {0xF6C69A72A3989F5C, 534, 180},    {0xB7DCBF5354E9BECE, 561, 188},
+      {0x88FCF317F22241E2, 588, 196},    {0xCC20CE9BD35C78A5, 614, 204},
+      {0x98165AF37B2153DF, 641, 212},    {0xE2A0B5DC971F303A, 667, 220},
+      {0xA8D9D1535CE3B396, 694, 228},    {0xFB9B7CD9A4A7443C, 720, 236},
+      {0xBB764C4CA7A44410, 747, 244},    {0x8BAB8EEFB6409C1A, 774, 252},
+      {0xD01FEF10A657842C, 800, 260},    {0x9B10A4E5E9913129, 827, 268},
+      {0xE7109BFBA19C0C9D, 853, 276},    {0xAC2820D9623BF429, 880, 284},
+      {0x80444B5E7AA7CF85, 907, 292},    {0xBF21E44003ACDD2D, 933, 300},
+      {0x8E679C2F5E44FF8F, 960, 308},    {0xD433179D9C8CB841, 986, 316},
+      {0x9E19DB92B4E31BA9, 1013, 324},
+  }};
+
+  // This computation gives exactly the same results for k as
+  //      k = ceil((kAlpha - e - 1) * 0.30102999566398114)
+  // for |e| <= 1500, but doesn't require floating-point operations.
+  // NB: log_10(2) ~= 78913 / 2^18
+  const int f = kAlpha - e - 1;
+  const int k = (f * 78913) / (1 << 18) + static_cast<int>(f > 0);
+
+  const int index = (-kCachedPowersMinDecExp + k + (kCachedPowersDecStep - 1)) /
+                    kCachedPowersDecStep;
+
+  const cached_power cached = kCachedPowers[static_cast<std::size_t>(index)];
+
+  return cached;
+}
+
+/*!
+For n != 0, returns k, such that pow10 := 10^(k-1) <= n < 10^k.
+For n == 0, returns 1 and sets pow10 := 1.
+*/
+inline int find_largest_pow10(const std::uint32_t n, std::uint32_t &pow10) {
+  // LCOV_EXCL_START
+  if (n >= 1000000000) {
+    pow10 = 1000000000;
+    return 10;
+  }
+  // LCOV_EXCL_STOP
+  else if (n >= 100000000) {
+    pow10 = 100000000;
+    return 9;
+  } else if (n >= 10000000) {
+    pow10 = 10000000;
+    return 8;
+  } else if (n >= 1000000) {
+    pow10 = 1000000;
+    return 7;
+  } else if (n >= 100000) {
+    pow10 = 100000;
+    return 6;
+  } else if (n >= 10000) {
+    pow10 = 10000;
+    return 5;
+  } else if (n >= 1000) {
+    pow10 = 1000;
+    return 4;
+  } else if (n >= 100) {
+    pow10 = 100;
+    return 3;
+  } else if (n >= 10) {
+    pow10 = 10;
+    return 2;
+  } else {
+    pow10 = 1;
+    return 1;
+  }
+}
+
+inline void grisu2_round(char *buf, int len, std::uint64_t dist,
+                         std::uint64_t delta, std::uint64_t rest,
+                         std::uint64_t ten_k) {
+
+  //               <--------------------------- delta ---->
+  //                                  <---- dist --------->
+  // --------------[------------------+-------------------]--------------
+  //               M-                 w                   M+
+  //
+  //                                  ten_k
+  //                                <------>
+  //                                       <---- rest ---->
+  // --------------[------------------+----+--------------]--------------
+  //                                  w    V
+  //                                       = buf * 10^k
+  //
+  // ten_k represents a unit-in-the-last-place in the decimal representation
+  // stored in buf.
+  // Decrement buf by ten_k while this takes buf closer to w.
+
+  // The tests are written in this order to avoid overflow in unsigned
+  // integer arithmetic.
+
+  while (rest < dist && delta - rest >= ten_k &&
+         (rest + ten_k < dist || dist - rest > rest + ten_k - dist)) {
+    buf[len - 1]--;
+    rest += ten_k;
+  }
+}
+
+/*!
+Generates V = buffer * 10^decimal_exponent, such that M- <= V <= M+.
+M- and M+ must be normalized and share the same exponent -60 <= e <= -32.
+*/
+inline void grisu2_digit_gen(char *buffer, int &length, int &decimal_exponent,
+                             diyfp M_minus, diyfp w, diyfp M_plus) {
+  static_assert(kAlpha >= -60, "internal error");
+  static_assert(kGamma <= -32, "internal error");
+
+  // Generates the digits (and the exponent) of a decimal floating-point
+  // number V = buffer * 10^decimal_exponent in the range [M-, M+]. The diyfp's
+  // w, M- and M+ share the same exponent e, which satisfies alpha <= e <=
+  // gamma.
+  //
+  //               <--------------------------- delta ---->
+  //                                  <---- dist --------->
+  // --------------[------------------+-------------------]--------------
+  //               M-                 w                   M+
+  //
+  // Grisu2 generates the digits of M+ from left to right and stops as soon as
+  // V is in [M-,M+].
+
+  std::uint64_t delta =
+      diyfp::sub(M_plus, M_minus)
+          .f; // (significand of (M+ - M-), implicit exponent is e)
+  std::uint64_t dist =
+      diyfp::sub(M_plus, w)
+          .f; // (significand of (M+ - w ), implicit exponent is e)
+
+  // Split M+ = f * 2^e into two parts p1 and p2 (note: e < 0):
+  //
+  //      M+ = f * 2^e
+  //         = ((f div 2^-e) * 2^-e + (f mod 2^-e)) * 2^e
+  //         = ((p1        ) * 2^-e + (p2        )) * 2^e
+  //         = p1 + p2 * 2^e
+
+  const diyfp one(std::uint64_t{1} << -M_plus.e, M_plus.e);
+
+  auto p1 = static_cast<std::uint32_t>(
+      M_plus.f >>
+      -one.e); // p1 = f div 2^-e (Since -e >= 32, p1 fits into a 32-bit int.)
+  std::uint64_t p2 = M_plus.f & (one.f - 1); // p2 = f mod 2^-e
+
+  // 1)
+  //
+  // Generate the digits of the integral part p1 = d[n-1]...d[1]d[0]
+
+  std::uint32_t pow10;
+  const int k = find_largest_pow10(p1, pow10);
+
+  //      10^(k-1) <= p1 < 10^k, pow10 = 10^(k-1)
+  //
+  //      p1 = (p1 div 10^(k-1)) * 10^(k-1) + (p1 mod 10^(k-1))
+  //         = (d[k-1]         ) * 10^(k-1) + (p1 mod 10^(k-1))
+  //
+  //      M+ = p1                                             + p2 * 2^e
+  //         = d[k-1] * 10^(k-1) + (p1 mod 10^(k-1))          + p2 * 2^e
+  //         = d[k-1] * 10^(k-1) + ((p1 mod 10^(k-1)) * 2^-e + p2) * 2^e
+  //         = d[k-1] * 10^(k-1) + (                         rest) * 2^e
+  //
+  // Now generate the digits d[n] of p1 from left to right (n = k-1,...,0)
+  //
+  //      p1 = d[k-1]...d[n] * 10^n + d[n-1]...d[0]
+  //
+  // but stop as soon as
+  //
+  //      rest * 2^e = (d[n-1]...d[0] * 2^-e + p2) * 2^e <= delta * 2^e
+
+  int n = k;
+  while (n > 0) {
+    // Invariants:
+    //      M+ = buffer * 10^n + (p1 + p2 * 2^e)    (buffer = 0 for n = k)
+    //      pow10 = 10^(n-1) <= p1 < 10^n
+    //
+    const std::uint32_t d = p1 / pow10; // d = p1 div 10^(n-1)
+    const std::uint32_t r = p1 % pow10; // r = p1 mod 10^(n-1)
+    //
+    //      M+ = buffer * 10^n + (d * 10^(n-1) + r) + p2 * 2^e
+    //         = (buffer * 10 + d) * 10^(n-1) + (r + p2 * 2^e)
+    //
+    buffer[length++] = static_cast<char>('0' + d); // buffer := buffer * 10 + d
+    //
+    //      M+ = buffer * 10^(n-1) + (r + p2 * 2^e)
+    //
+    p1 = r;
+    n--;
+    //
+    //      M+ = buffer * 10^n + (p1 + p2 * 2^e)
+    //      pow10 = 10^n
+    //
+
+    // Now check if enough digits have been generated.
+    // Compute
+    //
+    //      p1 + p2 * 2^e = (p1 * 2^-e + p2) * 2^e = rest * 2^e
+    //
+    // Note:
+    // Since rest and delta share the same exponent e, it suffices to
+    // compare the significands.
+    const std::uint64_t rest = (std::uint64_t{p1} << -one.e) + p2;
+    if (rest <= delta) {
+      // V = buffer * 10^n, with M- <= V <= M+.
+
+      decimal_exponent += n;
+
+      // We may now just stop. But instead look if the buffer could be
+      // decremented to bring V closer to w.
+      //
+      // pow10 = 10^n is now 1 ulp in the decimal representation V.
+      // The rounding procedure works with diyfp's with an implicit
+      // exponent of e.
+      //
+      //      10^n = (10^n * 2^-e) * 2^e = ulp * 2^e
+      //
+      const std::uint64_t ten_n = std::uint64_t{pow10} << -one.e;
+      grisu2_round(buffer, length, dist, delta, rest, ten_n);
+
+      return;
+    }
+
+    pow10 /= 10;
+    //
+    //      pow10 = 10^(n-1) <= p1 < 10^n
+    // Invariants restored.
+  }
+
+  // 2)
+  //
+  // The digits of the integral part have been generated:
+  //
+  //      M+ = d[k-1]...d[1]d[0] + p2 * 2^e
+  //         = buffer            + p2 * 2^e
+  //
+  // Now generate the digits of the fractional part p2 * 2^e.
+  //
+  // Note:
+  // No decimal point is generated: the exponent is adjusted instead.
+  //
+  // p2 actually represents the fraction
+  //
+  //      p2 * 2^e
+  //          = p2 / 2^-e
+  //          = d[-1] / 10^1 + d[-2] / 10^2 + ...
+  //
+  // Now generate the digits d[-m] of p1 from left to right (m = 1,2,...)
+  //
+  //      p2 * 2^e = d[-1]d[-2]...d[-m] * 10^-m
+  //                      + 10^-m * (d[-m-1] / 10^1 + d[-m-2] / 10^2 + ...)
+  //
+  // using
+  //
+  //      10^m * p2 = ((10^m * p2) div 2^-e) * 2^-e + ((10^m * p2) mod 2^-e)
+  //                = (                   d) * 2^-e + (                   r)
+  //
+  // or
+  //      10^m * p2 * 2^e = d + r * 2^e
+  //
+  // i.e.
+  //
+  //      M+ = buffer + p2 * 2^e
+  //         = buffer + 10^-m * (d + r * 2^e)
+  //         = (buffer * 10^m + d) * 10^-m + 10^-m * r * 2^e
+  //
+  // and stop as soon as 10^-m * r * 2^e <= delta * 2^e
+
+  int m = 0;
+  for (;;) {
+    // Invariant:
+    //      M+ = buffer * 10^-m + 10^-m * (d[-m-1] / 10 + d[-m-2] / 10^2 + ...)
+    //      * 2^e
+    //         = buffer * 10^-m + 10^-m * (p2                                 )
+    //         * 2^e = buffer * 10^-m + 10^-m * (1/10 * (10 * p2) ) * 2^e =
+    //         buffer * 10^-m + 10^-m * (1/10 * ((10*p2 div 2^-e) * 2^-e +
+    //         (10*p2 mod 2^-e)) * 2^e
+    //
+    p2 *= 10;
+    const std::uint64_t d = p2 >> -one.e;     // d = (10 * p2) div 2^-e
+    const std::uint64_t r = p2 & (one.f - 1); // r = (10 * p2) mod 2^-e
+    //
+    //      M+ = buffer * 10^-m + 10^-m * (1/10 * (d * 2^-e + r) * 2^e
+    //         = buffer * 10^-m + 10^-m * (1/10 * (d + r * 2^e))
+    //         = (buffer * 10 + d) * 10^(-m-1) + 10^(-m-1) * r * 2^e
+    //
+    buffer[length++] = static_cast<char>('0' + d); // buffer := buffer * 10 + d
+    //
+    //      M+ = buffer * 10^(-m-1) + 10^(-m-1) * r * 2^e
+    //
+    p2 = r;
+    m++;
+    //
+    //      M+ = buffer * 10^-m + 10^-m * p2 * 2^e
+    // Invariant restored.
+
+    // Check if enough digits have been generated.
+    //
+    //      10^-m * p2 * 2^e <= delta * 2^e
+    //              p2 * 2^e <= 10^m * delta * 2^e
+    //                    p2 <= 10^m * delta
+    delta *= 10;
+    dist *= 10;
+    if (p2 <= delta) {
+      break;
+    }
+  }
+
+  // V = buffer * 10^-m, with M- <= V <= M+.
+
+  decimal_exponent -= m;
+
+  // 1 ulp in the decimal representation is now 10^-m.
+  // Since delta and dist are now scaled by 10^m, we need to do the
+  // same with ulp in order to keep the units in sync.
+  //
+  //      10^m * 10^-m = 1 = 2^-e * 2^e = ten_m * 2^e
+  //
+  const std::uint64_t ten_m = one.f;
+  grisu2_round(buffer, length, dist, delta, p2, ten_m);
+
+  // By construction this algorithm generates the shortest possible decimal
+  // number (Loitsch, Theorem 6.2) which rounds back to w.
+  // For an input number of precision p, at least
+  //
+  //      N = 1 + ceil(p * log_10(2))
+  //
+  // decimal digits are sufficient to identify all binary floating-point
+  // numbers (Matula, "In-and-Out conversions").
+  // This implies that the algorithm does not produce more than N decimal
+  // digits.
+  //
+  //      N = 17 for p = 53 (IEEE double precision)
+  //      N = 9  for p = 24 (IEEE single precision)
+}
+
+/*!
+v = buf * 10^decimal_exponent
+len is the length of the buffer (number of decimal digits)
+The buffer must be large enough, i.e. >= max_digits10.
+*/
+inline void grisu2(char *buf, int &len, int &decimal_exponent, diyfp m_minus,
+                   diyfp v, diyfp m_plus) {
+
+  //  --------(-----------------------+-----------------------)--------    (A)
+  //          m-                      v                       m+
+  //
+  //  --------------------(-----------+-----------------------)--------    (B)
+  //                      m-          v                       m+
+  //
+  // First scale v (and m- and m+) such that the exponent is in the range
+  // [alpha, gamma].
+
+  const cached_power cached = get_cached_power_for_binary_exponent(m_plus.e);
+
+  const diyfp c_minus_k(cached.f, cached.e); // = c ~= 10^-k
+
+  // The exponent of the products is = v.e + c_minus_k.e + q and is in the range
+  // [alpha,gamma]
+  const diyfp w = diyfp::mul(v, c_minus_k);
+  const diyfp w_minus = diyfp::mul(m_minus, c_minus_k);
+  const diyfp w_plus = diyfp::mul(m_plus, c_minus_k);
+
+  //  ----(---+---)---------------(---+---)---------------(---+---)----
+  //          w-                      w                       w+
+  //          = c*m-                  = c*v                   = c*m+
+  //
+  // diyfp::mul rounds its result and c_minus_k is approximated too. w, w- and
+  // w+ are now off by a small amount.
+  // In fact:
+  //
+  //      w - v * 10^k < 1 ulp
+  //
+  // To account for this inaccuracy, add resp. subtract 1 ulp.
+  //
+  //  --------+---[---------------(---+---)---------------]---+--------
+  //          w-  M-                  w                   M+  w+
+  //
+  // Now any number in [M-, M+] (bounds included) will round to w when input,
+  // regardless of how the input rounding algorithm breaks ties.
+  //
+  // And digit_gen generates the shortest possible such number in [M-, M+].
+  // Note that this does not mean that Grisu2 always generates the shortest
+  // possible number in the interval (m-, m+).
+  const diyfp M_minus(w_minus.f + 1, w_minus.e);
+  const diyfp M_plus(w_plus.f - 1, w_plus.e);
+
+  decimal_exponent = -cached.k; // = -(-k) = k
+
+  grisu2_digit_gen(buf, len, decimal_exponent, M_minus, w, M_plus);
+}
+
+/*!
+v = buf * 10^decimal_exponent
+len is the length of the buffer (number of decimal digits)
+The buffer must be large enough, i.e. >= max_digits10.
+*/
+template <typename FloatType>
+void grisu2(char *buf, int &len, int &decimal_exponent, FloatType value) {
+  static_assert(diyfp::kPrecision >= std::numeric_limits<FloatType>::digits + 3,
+                "internal error: not enough precision");
+
+  // If the neighbors (and boundaries) of 'value' are always computed for
+  // double-precision numbers, all float's can be recovered using strtod (and
+  // strtof). However, the resulting decimal representations are not exactly
+  // "short".
+  //
+  // The documentation for 'std::to_chars'
+  // (https://en.cppreference.com/w/cpp/utility/to_chars) says "value is
+  // converted to a string as if by std::sprintf in the default ("C") locale"
+  // and since sprintf promotes float's to double's, I think this is exactly
+  // what 'std::to_chars' does. On the other hand, the documentation for
+  // 'std::to_chars' requires that "parsing the representation using the
+  // corresponding std::from_chars function recovers value exactly". That
+  // indicates that single precision floating-point numbers should be recovered
+  // using 'std::strtof'.
+  //
+  // NB: If the neighbors are computed for single-precision numbers, there is a
+  // single float
+  //     (7.0385307e-26f) which can't be recovered using strtod. The resulting
+  //     double precision value is off by 1 ulp.
+#if 0
+    const boundaries w = compute_boundaries(static_cast<double>(value));
+#else
+  const boundaries w = compute_boundaries(value);
+#endif
+
+  grisu2(buf, len, decimal_exponent, w.minus, w.w, w.plus);
+}
+
+/*!
+@brief appends a decimal representation of e to buf
+@return a pointer to the element following the exponent.
+@pre -1000 < e < 1000
+*/
+inline char *append_exponent(char *buf, int e) {
+
+  if (e < 0) {
+    e = -e;
+    *buf++ = '-';
+  } else {
+    *buf++ = '+';
+  }
+
+  auto k = static_cast<std::uint32_t>(e);
+  if (k < 10) {
+    // Always print at least two digits in the exponent.
+    // This is for compatibility with printf("%g").
+    *buf++ = '0';
+    *buf++ = static_cast<char>('0' + k);
+  } else if (k < 100) {
+    *buf++ = static_cast<char>('0' + k / 10);
+    k %= 10;
+    *buf++ = static_cast<char>('0' + k);
+  } else {
+    *buf++ = static_cast<char>('0' + k / 100);
+    k %= 100;
+    *buf++ = static_cast<char>('0' + k / 10);
+    k %= 10;
+    *buf++ = static_cast<char>('0' + k);
+  }
+
+  return buf;
+}
+
+/*!
+@brief prettify v = buf * 10^decimal_exponent
+If v is in the range [10^min_exp, 10^max_exp) it will be printed in fixed-point
+notation. Otherwise it will be printed in exponential notation.
+@pre min_exp < 0
+@pre max_exp > 0
+*/
+inline char *format_buffer(char *buf, int len, int decimal_exponent,
+                           int min_exp, int max_exp) {
+
+  const int k = len;
+  const int n = len + decimal_exponent;
+
+  // v = buf * 10^(n-k)
+  // k is the length of the buffer (number of decimal digits)
+  // n is the position of the decimal point relative to the start of the buffer.
+
+  if (k <= n && n <= max_exp) {
+    // digits[000]
+    // len <= max_exp + 2
+
+    std::memset(buf + k, '0', static_cast<size_t>(n) - static_cast<size_t>(k));
+    // Make it look like a floating-point number (#362, #378)
+    buf[n + 0] = '.';
+    buf[n + 1] = '0';
+    return buf + (static_cast<size_t>(n) + 2);
+  }
+
+  if (0 < n && n <= max_exp) {
+    // dig.its
+    // len <= max_digits10 + 1
+    std::memmove(buf + (static_cast<size_t>(n) + 1), buf + n,
+                 static_cast<size_t>(k) - static_cast<size_t>(n));
+    buf[n] = '.';
+    return buf + (static_cast<size_t>(k) + 1U);
+  }
+
+  if (min_exp < n && n <= 0) {
+    // 0.[000]digits
+    // len <= 2 + (-min_exp - 1) + max_digits10
+
+    std::memmove(buf + (2 + static_cast<size_t>(-n)), buf,
+                 static_cast<size_t>(k));
+    buf[0] = '0';
+    buf[1] = '.';
+    std::memset(buf + 2, '0', static_cast<size_t>(-n));
+    return buf + (2U + static_cast<size_t>(-n) + static_cast<size_t>(k));
+  }
+
+  if (k == 1) {
+    // dE+123
+    // len <= 1 + 5
+
+    buf += 1;
+  } else {
+    // d.igitsE+123
+    // len <= max_digits10 + 1 + 5
+
+    std::memmove(buf + 2, buf + 1, static_cast<size_t>(k) - 1);
+    buf[1] = '.';
+    buf += 1 + static_cast<size_t>(k);
+  }
+
+  *buf++ = 'e';
+  return append_exponent(buf, n - 1);
+}
+
+} // namespace dtoa_impl
+
+/*!
+The format of the resulting decimal representation is similar to printf's %g
+format. Returns an iterator pointing past-the-end of the decimal representation.
+@note The input number must be finite, i.e. NaN's and Inf's are not supported.
+@note The buffer must be large enough.
+@note The result is NOT null-terminated.
+*/
+char *to_chars(char *first, const char *last, double value) {
+  static_cast<void>(last); // maybe unused - fix warning
+  // Use signbit(value) instead of (value < 0) since signbit works for -0.
+  if (std::signbit(value)) {
+    value = -value;
+    *first++ = '-';
+  }
+
+  if (value == 0) // +-0
+  {
+    *first++ = '0';
+    // Make it look like a floating-point number (#362, #378)
+    *first++ = '.';
+    *first++ = '0';
+    return first;
+  }
+  // Compute v = buffer * 10^decimal_exponent.
+  // The decimal digits are stored in the buffer, which needs to be interpreted
+  // as an unsigned decimal integer.
+  // len is the length of the buffer, i.e. the number of decimal digits.
+  int len = 0;
+  int decimal_exponent = 0;
+  dtoa_impl::grisu2(first, len, decimal_exponent, value);
+  // Format the buffer like printf("%.*g", prec, value)
+  constexpr int kMinExp = -4;
+  constexpr int kMaxExp = std::numeric_limits<double>::digits10;
+
+  return dtoa_impl::format_buffer(first, len, decimal_exponent, kMinExp,
+                                  kMaxExp);
+}
+} // namespace internal
+} // namespace simdjson
\ No newline at end of file
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 5f7c02e2..224e8941 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -52,6 +52,7 @@ target_compile_definitions(stringparsingcheck PRIVATE NOMINMAX)
 # All remaining tests link with simdjson proper
 link_libraries(simdjson)
 add_cpp_test(basictests LABELS acceptance per_implementation)
+add_cpp_test(minify_tests LABELS acceptance per_implementation)
 add_cpp_test(document_stream_tests LABELS acceptance per_implementation)
 add_cpp_test(document_tests LABELS acceptance per_implementation)
 add_cpp_test(errortests LABELS acceptance per_implementation)
diff --git a/tests/basictests.cpp b/tests/basictests.cpp
index 7f39023a..42b0b2d4 100644
--- a/tests/basictests.cpp
+++ b/tests/basictests.cpp
@@ -1365,8 +1365,8 @@ namespace minify_tests {
 
   bool test_minify() {
     std::cout << "Running " << __func__ << std::endl;
-    const std::string test = R"({ "foo" : 1, "bar" : [ 1, 2, 3 ], "baz": { "a": 1, "b": 2, "c": 3 } })";
-    const std::string minified(R"({"foo":1,"bar":[1,2,3],"baz":{"a":1,"b":2,"c":3}})");
+    const std::string test = R"({ "foo" : 1, "bar" : [ 1, 2, 0.11111111111111113 ], "baz": { "a": 3.1415926535897936, "b": 2, "c": 3.141592653589794 } })";
+    const std::string minified(R"({"foo":1,"bar":[1,2,0.11111111111111113],"baz":{"a":3.1415926535897936,"b":2,"c":3.141592653589794}})");
     return check_minification(test.c_str(), test.size(), minified.c_str(), minified.size());
   }
   bool test_minify_array() {
@@ -1394,8 +1394,8 @@ namespace format_tests {
   using namespace simdjson;
   using namespace simdjson::dom;
   using namespace std;
-  const padded_string DOCUMENT = R"({ "foo" : 1, "bar" : [ 1, 2, 3 ], "baz": { "a": 1, "b": 2, "c": 3 } })"_padded;
-  const string MINIFIED(R"({"foo":1,"bar":[1,2,3],"baz":{"a":1,"b":2,"c":3}})");
+  const padded_string DOCUMENT = R"({ "foo" : 1, "bar" : [ 1, 2, 0.11111111111111113 ], "baz": { "a": 3.1415926535897936, "b": 2, "c": 3.141592653589794 } })"_padded;
+  const string MINIFIED(R"({"foo":1,"bar":[1,2,0.11111111111111113],"baz":{"a":3.1415926535897936,"b":2,"c":3.141592653589794}})");
   bool assert_minified(ostringstream &actual, const std::string &expected=MINIFIED) {
     if (actual.str() != expected) {
       cerr << "Failed to correctly minify " << DOCUMENT << endl;
@@ -1451,7 +1451,7 @@ namespace format_tests {
     ASSERT_SUCCESS( parser.parse(DOCUMENT)["bar"].get(array) );
     ostringstream s;
     s << array;
-    return assert_minified(s, "[1,2,3]");
+    return assert_minified(s, "[1,2,0.11111111111111113]");
   }
   bool print_minify_array() {
     std::cout << "Running " << __func__ << std::endl;
@@ -1460,7 +1460,7 @@ namespace format_tests {
     ASSERT_SUCCESS( parser.parse(DOCUMENT)["bar"].get(array) );
     ostringstream s;
     s << minify(array);
-    return assert_minified(s, "[1,2,3]");
+    return assert_minified(s, "[1,2,0.11111111111111113]");
   }
 
   bool print_object() {
@@ -1470,7 +1470,7 @@ namespace format_tests {
     ASSERT_SUCCESS( parser.parse(DOCUMENT)["baz"].get(object) );
     ostringstream s;
     s << object;
-    return assert_minified(s, R"({"a":1,"b":2,"c":3})");
+    return assert_minified(s, R"({"a":3.1415926535897936,"b":2,"c":3.141592653589794})");
   }
   bool print_minify_object() {
     std::cout << "Running " << __func__ << std::endl;
@@ -1479,7 +1479,7 @@ namespace format_tests {
     ASSERT_SUCCESS( parser.parse(DOCUMENT)["baz"].get(object) );
     ostringstream s;
     s << minify(object);
-    return assert_minified(s, R"({"a":1,"b":2,"c":3})");
+    return assert_minified(s, R"({"a":3.1415926535897936,"b":2,"c":3.141592653589794})");
   }
 
 #if SIMDJSON_EXCEPTIONS
@@ -1536,14 +1536,14 @@ namespace format_tests {
     dom::parser parser;
     ostringstream s;
     s << parser.parse(DOCUMENT)["bar"].get<dom::array>();
-    return assert_minified(s, "[1,2,3]");
+    return assert_minified(s, "[1,2,0.11111111111111113]");
   }
   bool print_minify_array_result_exception() {
     std::cout << "Running " << __func__ << std::endl;
     dom::parser parser;
     ostringstream s;
     s << minify(parser.parse(DOCUMENT)["bar"].get<dom::array>());
-    return assert_minified(s, "[1,2,3]");
+    return assert_minified(s, "[1,2,0.11111111111111113]");
   }
 
   bool print_object_result_exception() {
@@ -1551,14 +1551,14 @@ namespace format_tests {
     dom::parser parser;
     ostringstream s;
     s << parser.parse(DOCUMENT)["baz"].get<dom::object>();
-    return assert_minified(s, R"({"a":1,"b":2,"c":3})");
+    return assert_minified(s, R"({"a":3.1415926535897936,"b":2,"c":3.141592653589794})");
   }
   bool print_minify_object_result_exception() {
     std::cout << "Running " << __func__ << std::endl;
     dom::parser parser;
     ostringstream s;
     s << minify(parser.parse(DOCUMENT)["baz"].get<dom::object>());
-    return assert_minified(s, R"({"a":1,"b":2,"c":3})");
+    return assert_minified(s, R"({"a":3.1415926535897936,"b":2,"c":3.141592653589794})");
   }
 
   bool print_array_exception() {
@@ -1567,7 +1567,7 @@ namespace format_tests {
     dom::array array = parser.parse(DOCUMENT)["bar"];
     ostringstream s;
     s << array;
-    return assert_minified(s, "[1,2,3]");
+    return assert_minified(s, "[1,2,0.11111111111111113]");
   }
   bool print_minify_array_exception() {
     std::cout << "Running " << __func__ << std::endl;
@@ -1575,7 +1575,7 @@ namespace format_tests {
     dom::array array = parser.parse(DOCUMENT)["bar"];
     ostringstream s;
     s << minify(array);
-    return assert_minified(s, "[1,2,3]");
+    return assert_minified(s, "[1,2,0.11111111111111113]");
   }
 
   bool print_object_exception() {
@@ -1584,7 +1584,7 @@ namespace format_tests {
     dom::object object = parser.parse(DOCUMENT)["baz"];
     ostringstream s;
     s << object;
-    return assert_minified(s, R"({"a":1,"b":2,"c":3})");
+    return assert_minified(s, R"({"a":3.1415926535897936,"b":2,"c":3.141592653589794})");
   }
   bool print_minify_object_exception() {
     std::cout << "Running " << __func__ << std::endl;
@@ -1592,7 +1592,7 @@ namespace format_tests {
     dom::object object = parser.parse(DOCUMENT)["baz"];
     ostringstream s;
     s << minify(object);
-    return assert_minified(s, R"({"a":1,"b":2,"c":3})");
+    return assert_minified(s, R"({"a":3.1415926535897936,"b":2,"c":3.141592653589794})");
   }
 #endif // SIMDJSON_EXCEPTIONS
 
@@ -1615,6 +1615,149 @@ namespace format_tests {
 }
 
 
+namespace to_string_tests {
+  using namespace simdjson;
+  using namespace simdjson::dom;
+  using namespace std;
+  const padded_string DOCUMENT = R"({ "foo" : 1, "bar" : [ 1, 2, 0.11111111111111113 ], "baz": { "a": 3.1415926535897936, "b": 2, "c": 3.141592653589794 } })"_padded;
+  const string MINIFIED(R"({"foo":1,"bar":[1,2,0.11111111111111113],"baz":{"a":3.1415926535897936,"b":2,"c":3.141592653589794}})");
+  bool assert_minified(ostringstream &actual, const std::string &expected=MINIFIED) {
+    if (actual.str() != expected) {
+      cerr << "Failed to correctly to_string " << DOCUMENT << endl;
+      cerr << "Expected: " << expected << endl;
+      cerr << "Actual:   " << actual.str() << endl;
+      return false;
+    }
+    return true;
+  }
+
+
+  bool print_to_string_parser_parse() {
+    std::cout << "Running " << __func__ << std::endl;
+    dom::parser parser;
+    dom::element doc;
+    ASSERT_SUCCESS( parser.parse(DOCUMENT).get(doc) );
+    ostringstream s;
+    s << to_string(doc);
+    return assert_minified(s);
+  }
+
+
+  bool print_to_string_element() {
+    std::cout << "Running " << __func__ << std::endl;
+    dom::parser parser;
+    dom::element value;
+    ASSERT_SUCCESS( parser.parse(DOCUMENT)["foo"].get(value) );
+    ostringstream s;
+    s << to_string(value);
+    return assert_minified(s, "1");
+  }
+
+
+  bool print_to_string_array() {
+    std::cout << "Running " << __func__ << std::endl;
+    dom::parser parser;
+    dom::array array;
+    ASSERT_SUCCESS( parser.parse(DOCUMENT)["bar"].get(array) );
+    ostringstream s;
+    s << to_string(array);
+    return assert_minified(s, "[1,2,0.11111111111111113]");
+  }
+
+  bool print_to_string_object() {
+    std::cout << "Running " << __func__ << std::endl;
+    dom::parser parser;
+    dom::object object;
+    ASSERT_SUCCESS( parser.parse(DOCUMENT)["baz"].get(object) );
+    ostringstream s;
+    s << to_string(object);
+    return assert_minified(s, R"({"a":3.1415926535897936,"b":2,"c":3.141592653589794})");
+  }
+
+#if SIMDJSON_EXCEPTIONS
+
+  bool print_to_string_parser_parse_exception() {
+    std::cout << "Running " << __func__ << std::endl;
+    dom::parser parser;
+    ostringstream s;
+    s << to_string(parser.parse(DOCUMENT));
+    return assert_minified(s);
+  }
+
+  bool print_to_string_element_result_exception() {
+    std::cout << "Running " << __func__ << std::endl;
+    dom::parser parser;
+    ostringstream s;
+    s << to_string(parser.parse(DOCUMENT)["foo"]);
+    return assert_minified(s, "1");
+  }
+
+  bool print_to_string_element_exception() {
+    std::cout << "Running " << __func__ << std::endl;
+    dom::parser parser;
+    element value = parser.parse(DOCUMENT)["foo"];
+    ostringstream s;
+    s << to_string(value);
+    return assert_minified(s, "1");
+  }
+
+  bool print_to_string_array_result_exception() {
+    std::cout << "Running " << __func__ << std::endl;
+    dom::parser parser;
+    ostringstream s;
+    s << to_string(parser.parse(DOCUMENT)["bar"].get<dom::array>());
+    return assert_minified(s, "[1,2,0.11111111111111113]");
+  }
+
+
+  bool print_to_string_object_result_exception() {
+    std::cout << "Running " << __func__ << std::endl;
+    dom::parser parser;
+    ostringstream s;
+    s << to_string(parser.parse(DOCUMENT)["baz"].get<dom::object>());
+    return assert_minified(s, R"({"a":3.1415926535897936,"b":2,"c":3.141592653589794})");
+  }
+
+
+  bool print_to_string_array_exception() {
+    std::cout << "Running " << __func__ << std::endl;
+    dom::parser parser;
+    dom::array array = parser.parse(DOCUMENT)["bar"];
+    ostringstream s;
+    s << to_string(array);
+    return assert_minified(s, "[1,2,0.11111111111111113]");
+  }
+
+  bool print_to_string_object_exception() {
+    std::cout << "Running " << __func__ << std::endl;
+    dom::parser parser;
+    dom::object object = parser.parse(DOCUMENT)["baz"];
+    ostringstream s;
+    s << to_string(object);
+    return assert_minified(s, R"({"a":3.1415926535897936,"b":2,"c":3.141592653589794})");
+  }
+#endif // SIMDJSON_EXCEPTIONS
+
+  bool run() {
+    return print_to_string_parser_parse() &&
+           print_to_string_element() &&
+           print_to_string_array() &&
+          print_to_string_object() &&
+#if SIMDJSON_EXCEPTIONS
+           print_to_string_parser_parse_exception() &&
+           print_to_string_element_result_exception() &&
+           print_to_string_array_result_exception() &&
+           print_to_string_object_result_exception() &&
+           print_to_string_element_exception() &&
+           print_to_string_array_exception() &&
+           print_to_string_object_exception() &&
+#endif
+           true;
+  }
+}
+
+
+
 int main(int argc, char *argv[]) {
   std::cout << std::unitbuf;
   int c;
@@ -1646,7 +1789,8 @@ int main(int argc, char *argv[]) {
   std::cout << "------------------------------------------------------------" << std::endl;
 
   std::cout << "Running basic tests." << std::endl;
-  if (validate_tests::run() &&
+  if (to_string_tests::run() &&
+      validate_tests::run() &&
       minify_tests::run() &&
       parse_api_tests::run() &&
       dom_api_tests::run() &&
diff --git a/tests/minify_tests.cpp b/tests/minify_tests.cpp
new file mode 100644
index 00000000..cc0e8ec7
--- /dev/null
+++ b/tests/minify_tests.cpp
@@ -0,0 +1,80 @@
+#include <cinttypes>
+#include <ciso646>
+#include <cmath>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <iostream>
+#include <set>
+#include <sstream>
+#include <string>
+#include <unistd.h>
+#include <utility>
+#include <vector>
+
+#include "cast_tester.h"
+#include "simdjson.h"
+#include "test_macros.h"
+
+const char *test_files[] = {
+    TWITTER_JSON, TWITTER_TIMELINE_JSON, REPEAT_JSON, CANADA_JSON,
+    MESH_JSON,    APACHE_JSON,           GSOC_JSON};
+/**
+ * The general idea of these tests if that if you take a JSON file,
+ * load it, then convert it into a string, then parse that, and
+ * convert it again into a second string, then the two strings should
+ * be  identifical. If not, then something was lost or added in the
+ * process.
+ */
+
+bool load_to_string(const char *filename) {
+  std::cout << "Loading " << filename << std::endl;
+  simdjson::dom::parser parser;
+  simdjson::dom::element doc;
+  auto error = parser.load(filename).get(doc);
+  if (error) { std::cerr << error << std::endl; return false; }
+  auto serial1 = simdjson::to_string(doc);
+  error = parser.parse(serial1).get(doc);
+  if (error) { std::cerr << error << std::endl; return false; }
+  auto serial2 = simdjson::to_string(doc);
+  bool match = (serial1 == serial2);
+  if (match) {
+    std::cout << "Parsing to_string and calling to_string again results in the "
+                 "same content."
+              << std::endl;
+  }
+  return match;
+}
+
+bool load_minify(const char *filename) {
+  std::cout << "Loading " << filename << std::endl;
+  simdjson::dom::parser parser;
+  simdjson::dom::element doc;
+  auto error = parser.load(filename).get(doc);
+  if (error) { std::cerr << error << std::endl; return false; }
+  auto serial1 = simdjson::minify(doc);
+  error = parser.parse(serial1).get(doc);
+  if (error) { std::cerr << error << std::endl; return false; }
+  auto serial2 = simdjson::minify(doc);
+  bool match = (serial1 == serial2);
+  if (match) {
+    std::cout << "Parsing minify and calling minify again results in the same "
+                 "content."
+              << std::endl;
+  }
+  return match;
+}
+
+bool minify_test() {
+  std::cout << "Running " << __func__ << std::endl;
+
+  for (size_t i = 0; i < sizeof(test_files) / sizeof(test_files[0]); i++) {
+    bool ok = load_to_string(test_files[i]) && load_minify(test_files[i]);
+    if (!ok) {
+      return false;
+    }
+  }
+  return true;
+}
+
+int main() { return minify_test() ? EXIT_SUCCESS : EXIT_FAILURE; }
diff --git a/tests/test_macros.h b/tests/test_macros.h
index 18a4630f..38bca779 100644
--- a/tests/test_macros.h
+++ b/tests/test_macros.h
@@ -7,6 +7,11 @@
 const char *TWITTER_JSON = SIMDJSON_BENCHMARK_DATA_DIR "twitter.json";
 const char *TWITTER_TIMELINE_JSON = SIMDJSON_BENCHMARK_DATA_DIR "twitter_timeline.json";
 const char *REPEAT_JSON = SIMDJSON_BENCHMARK_DATA_DIR "repeat.json";
+const char *CANADA_JSON = SIMDJSON_BENCHMARK_DATA_DIR "canada.json";
+const char *MESH_JSON = SIMDJSON_BENCHMARK_DATA_DIR "mesh.json";
+const char *APACHE_JSON = SIMDJSON_BENCHMARK_DATA_DIR "apache_builds.json";
+const char *GSOC_JSON = SIMDJSON_BENCHMARK_DATA_DIR "gsoc-2018.json";
+
 const char *AMAZON_CELLPHONES_NDJSON = SIMDJSON_BENCHMARK_DATA_DIR "amazon_cellphones.ndjson";
 
 #define SIMDJSON_BENCHMARK_SMALLDATA_DIR SIMDJSON_BENCHMARK_DATA_DIR "small/"