diff --git a/benchmark/bench_sax.cpp b/benchmark/bench_sax.cpp
index 97c79d6c..0965aeb6 100644
--- a/benchmark/bench_sax.cpp
+++ b/benchmark/bench_sax.cpp
@@ -2,6 +2,10 @@
 #define SIMDJSON_IMPLEMENTATION_WESTMERE 0
 #define SIMDJSON_IMPLEMENTATION_AMD64 0
 
+#include <iostream>
+#include <sstream>
+#include <random>
+
 #include "simdjson.h"
 
 SIMDJSON_PUSH_DISABLE_ALL_WARNINGS
@@ -9,7 +13,8 @@ SIMDJSON_PUSH_DISABLE_ALL_WARNINGS
 SIMDJSON_POP_DISABLE_WARNINGS
 
 #include "simdjson.cpp"
-#include "twitter/sax_tweet_reader.h"
+
+#if SIMDJSON_EXCEPTIONS
 
 using namespace benchmark;
 using namespace simdjson;
@@ -19,6 +24,10 @@ using std::endl;
 const char *TWITTER_JSON = SIMDJSON_BENCHMARK_DATA_DIR "twitter.json";
 const int REPETITIONS = 10;
 
+#if SIMDJSON_IMPLEMENTATION_HASWELL
+
+#include "twitter/sax_tweet_reader.h"
+
 static void sax_tweets(State &state) {
   // Load twitter.json to a buffer
   padded_string json;
@@ -50,7 +59,9 @@ BENCHMARK(sax_tweets)->Repetitions(REPETITIONS)->ComputeStatistics("max", [](con
     return *(std::max_element(std::begin(v), std::end(v)));
   })->DisplayAggregatesOnly(true);
 
-#if SIMDJSON_EXCEPTIONS
+#endif // SIMDJSON_IMPLEMENTATION_HASWELL
+
+#include "twitter/tweet.h"
 
 simdjson_really_inline uint64_t nullable_int(dom::element element) {
   if (element.is_null()) { return 0; }
@@ -106,8 +117,6 @@ BENCHMARK(dom_tweets)->Repetitions(REPETITIONS)->ComputeStatistics("max", [](con
     return *(std::max_element(std::begin(v), std::end(v)));
   })->DisplayAggregatesOnly(true);
 
-#endif // SIMDJSON_EXCEPTIONS
-
 static void dom_parse(State &state) {
   // Load twitter.json to a buffer
   padded_string json;
@@ -133,4 +142,218 @@ BENCHMARK(dom_parse)->Repetitions(REPETITIONS)->ComputeStatistics("max", [](cons
     return *(std::max_element(std::begin(v), std::end(v)));
   })->DisplayAggregatesOnly(true);
 
+
+/********************
+ * Large file parsing benchmarks:
+ ********************/
+
+static std::string build_json_array(size_t N) {
+  std::default_random_engine e;
+  std::uniform_real_distribution<> dis(0, 1);
+  std::stringstream myss;
+  myss << "[" << std::endl;
+  if(N > 0) {
+    myss << "{ \"x\":" << dis(e) << ",  \"y\":" << dis(e) << ", \"z\":" << dis(e) << "}" << std::endl;
+  }
+  for(size_t i = 1; i < N; i++) {
+    myss << "," << std::endl;
+    myss << "{ \"x\":" << dis(e) << ",  \"y\":" << dis(e) << ", \"z\":" << dis(e) << "}";
+  }
+  myss << std::endl;
+  myss << "]" << std::endl;
+  std::string answer = myss.str();
+  std::cout << "Creating a source file spanning " << (answer.size() + 512) / 1024 << " KB " << std::endl;  
+  return answer;
+}
+
+static const simdjson::padded_string& get_my_json_str() {
+  static simdjson::padded_string s = build_json_array(1000000);
+  return s;
+}
+
+struct my_point {
+  double x;
+  double y;
+  double z;
+};
+
+// ./benchmark/bench_sax --benchmark_filter=largerandom
+
+
+/*** 
+ * We start with the naive DOM-based approach.
+ **/
+static void dom_parse_largerandom(State &state) {
+  // Load twitter.json to a buffer
+  const padded_string& json = get_my_json_str();
+
+  // Allocate
+  dom::parser parser;
+  if (auto error = parser.allocate(json.size())) { cerr << error << endl; return; };
+
+  // Read
+  size_t bytes = 0;
+  simdjson::error_code error;
+  for (SIMDJSON_UNUSED auto _ : state) {
+    std::vector<my_point> container;
+    dom::element doc;
+    if ((error = parser.parse(json).get(doc))) { 
+      std::cerr << "failure: " << error << std::endl;
+      throw "Parsing failed"; 
+    };
+    for (auto p : doc) {
+      container.emplace_back(my_point{p["x"], p["y"], p["z"]});
+    }
+    bytes += json.size();
+    benchmark::DoNotOptimize(container.data());
+
+  }
+  // Gigabyte: https://en.wikipedia.org/wiki/Gigabyte
+  state.counters["Gigabytes"] = benchmark::Counter(
+	        double(bytes), benchmark::Counter::kIsRate,
+	        benchmark::Counter::OneK::kIs1000); // For GiB : kIs1024
+  state.counters["docs"] = Counter(double(state.iterations()), benchmark::Counter::kIsRate);
+}
+
+BENCHMARK(dom_parse_largerandom)->Repetitions(REPETITIONS)->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
+    return *(std::max_element(std::begin(v), std::end(v)));
+  })->DisplayAggregatesOnly(true);
+
+#if SIMDJSON_IMPLEMENTATION_HASWELL
+
+/*** 
+ * Next we are going to code the SAX approach.
+ **/
+
+SIMDJSON_TARGET_HASWELL
+
+namespace largerandom {
+namespace {
+
+using namespace simdjson;
+using namespace haswell;
+using namespace haswell::stage2;
+struct sax_point_reader_visitor {
+public:
+  sax_point_reader_visitor(std::vector<my_point> &_points) : points(_points) {
+  }
+
+  simdjson_really_inline error_code visit_document_start(json_iterator &) { return SUCCESS; }
+  simdjson_really_inline error_code visit_object_start(json_iterator &) { return SUCCESS; }
+  simdjson_really_inline error_code visit_key(json_iterator &, const uint8_t *key) {
+    switch(key[0]) {
+      case 'x':
+        idx = 0;
+        break;
+      case 'y':
+        idx = 2;
+        break;
+      case 'z':
+        idx = 3;
+        break;  
+    }
+    return SUCCESS; 
+  }
+  simdjson_really_inline error_code visit_primitive(json_iterator &, const uint8_t *value) {
+    return numberparsing::parse_double(value).get(buffer[idx]);
+  }
+  simdjson_really_inline error_code visit_array_start(json_iterator &)  { return SUCCESS; }
+  simdjson_really_inline error_code visit_array_end(json_iterator &) { return SUCCESS; }
+  simdjson_really_inline error_code visit_object_end(json_iterator &)  { return SUCCESS; }
+  simdjson_really_inline error_code visit_document_end(json_iterator &)  { return SUCCESS; }
+  simdjson_really_inline error_code visit_empty_array(json_iterator &)  { return SUCCESS; }
+  simdjson_really_inline error_code visit_empty_object(json_iterator &)  { return SUCCESS; }
+  simdjson_really_inline error_code visit_root_primitive(json_iterator &, const uint8_t *)  { return SUCCESS; }
+  simdjson_really_inline error_code increment_count(json_iterator &) { return SUCCESS; }
+  std::vector<my_point> &points;
+  size_t idx{0};
+  double buffer[3];
+};
+
+struct sax_point_reader {
+  std::vector<my_point> points;
+  std::unique_ptr<uint8_t[]> string_buf;
+  size_t capacity;
+  dom_parser_implementation dom_parser;
+
+  sax_point_reader();
+  error_code set_capacity(size_t new_capacity);
+  error_code read_points(const padded_string &json);
+}; // struct sax_point_reader
+
+sax_point_reader::sax_point_reader() : points{}, string_buf{}, capacity{0}, dom_parser() {
+}
+
+error_code sax_point_reader::set_capacity(size_t new_capacity) {
+  // string_capacity copied from document::allocate
+  size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + 32, 64);
+  string_buf.reset(new (std::nothrow) uint8_t[string_capacity]);
+  if (auto error = dom_parser.set_capacity(new_capacity)) { return error; }
+  if (capacity == 0) { // set max depth the first time only
+    if (auto error = dom_parser.set_max_depth(DEFAULT_MAX_DEPTH)) { return error; }
+  }
+  capacity = new_capacity;
+  return SUCCESS;
+}
+
+error_code sax_point_reader::read_points(const padded_string &json) {
+  // Allocate capacity if needed
+  points.clear();
+  if (capacity < json.size()) {
+    if (auto error = set_capacity(capacity)) { return error; }
+  }
+
+  // Run stage 1 first.
+  if (auto error = dom_parser.stage1((uint8_t *)json.data(), json.size(), false)) { return error; }
+
+  // Then walk the document, parsing the tweets as we go
+  json_iterator iter(dom_parser, 0);
+  sax_point_reader_visitor visitor(points);
+  if (auto error = iter.walk_document<false>(visitor)) { return error; }
+  return SUCCESS;
+}
+
+} // unnamed namespace
+} // namespace largerandom
+
+SIMDJSON_UNTARGET_REGION
+
+
+
+
+
+// ./benchmark/bench_sax --benchmark_filter=largerandom
+static void sax_parse_largerandom(State &state) {
+  // Load twitter.json to a buffer
+  const padded_string& json = get_my_json_str();
+
+  // Allocate
+  largerandom::sax_point_reader reader;
+  if (auto error = reader.set_capacity(json.size())) { throw error; }
+  // warming
+  for(size_t i = 0; i < 10; i++) {
+    if (auto error = reader.read_points(json)) { throw error; }
+  }
+
+  // Read
+  size_t bytes = 0;
+  for (SIMDJSON_UNUSED auto _ : state) {
+    if (auto error = reader.read_points(json)) { throw error; }
+    bytes += json.size();
+    benchmark::DoNotOptimize(reader.points.data());
+  }
+  // Gigabyte: https://en.wikipedia.org/wiki/Gigabyte
+  state.counters["Gigabytes"] = benchmark::Counter(
+	        double(bytes), benchmark::Counter::kIsRate,
+	        benchmark::Counter::OneK::kIs1000); // For GiB : kIs1024
+  state.counters["docs"] = Counter(double(state.iterations()), benchmark::Counter::kIsRate);
+}
+BENCHMARK(sax_parse_largerandom)->Repetitions(REPETITIONS)->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
+    return *(std::max_element(std::begin(v), std::end(v)));
+  })->DisplayAggregatesOnly(true);
+
+#endif // SIMDJSON_IMPLEMENTATION_HASWELL
+
+#endif // SIMDJSON_EXCEPTIONS
+
 BENCHMARK_MAIN();
diff --git a/src/generic/stage2/numberparsing.h b/src/generic/stage2/numberparsing.h
index 2a0d01ce..9d5b99b0 100644
--- a/src/generic/stage2/numberparsing.h
+++ b/src/generic/stage2/numberparsing.h
@@ -316,7 +316,7 @@ simdjson_really_inline bool parse_exponent(SIMDJSON_UNUSED const uint8_t *const
   // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may
   // not oblige and may, in fact, generate two distinct paths in any case. It might be
   // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off 
-  // instructions for a likely branch, an unconclusive gain.
+  // instructions for a simdjson_likely branch, an unconclusive gain.
 
   // If there were no digits, it's an error.
   if (simdjson_unlikely(p == start_exp)) {
@@ -501,6 +501,8 @@ simdjson_really_inline bool parse_number(const uint8_t *const src, W &writer) {
   return is_structural_or_whitespace(*p);
 }
 
+// SAX functions
+namespace {
 // Parse any number from 0 to 18,446,744,073,709,551,615
 SIMDJSON_UNUSED simdjson_really_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src) noexcept {
   const uint8_t *p = src;
@@ -542,59 +544,58 @@ SIMDJSON_UNUSED simdjson_really_inline simdjson_result<uint64_t> parse_unsigned(
 
 // Parse any number from 0 to 18,446,744,073,709,551,615
 // Call this version of the method if you regularly expect 8- or 16-digit numbers.
-// simdjson_really_inline simdjson_result<uint64_t> parse_large_unsigned(const uint8_t * const src) noexcept {
-//   const uint8_t *p = src;
+SIMDJSON_UNUSED simdjson_really_inline simdjson_result<uint64_t> parse_large_unsigned(const uint8_t * const src) noexcept {
+  const uint8_t *p = src;
 
-//   //
-//   // Parse the integer part.
-//   //
-//   const uint8_t *const start_digits = p;
-//   uint64_t i = 0;
-//   if (is_made_of_eight_digits_fast(p)) {
-//     i = i * 100000000 + parse_eight_digits_unrolled(p);
-//     p += 8;
-//     if (is_made_of_eight_digits_fast(p)) {
-//       i = i * 100000000 + parse_eight_digits_unrolled(p);
-//       p += 8;
-//       if (parse_digit(*p, i)) { // digit 17
-//         p++;
-//         if (parse_digit(*p, i)) { // digit 18
-//           p++;
-//           if (parse_digit(*p, i)) { // digit 19
-//             p++;
-//             if (parse_digit(*p, i)) { // digit 20
-//               p++;
-//               if (parse_digit(*p, i)) { return NUMBER_ERROR; } // 21 digits is an error
-//               // Positive overflow check:
-//               // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
-//               //   biggest uint64_t.
-//               // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
-//               //   If we got here, it's a 20 digit number starting with the digit "1".
-//               // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
-//               //   than 1,553,255,926,290,448,384.
-//               // - That is smaller than the smallest possible 20-digit number the user could write:
-//               //   10,000,000,000,000,000,000.
-//               // - Therefore, if the number is positive and lower than that, it's overflow.
-//               // - The value we are looking at is less than or equal to 9,223,372,036,854,775,808 (INT64_MAX).
-//               //
-//               if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return NUMBER_ERROR; }
-//             }
-//           }
-//         }
-//       }
-//     } // 16 digits
-//   } else { // 8 digits
-//     // Less than 8 digits can't overflow, simpler logic here.
-//     if (parse_digit(*p, i)) { p++; } else { return NUMBER_ERROR; }
-//     while (parse_digit(*p, i)) { p++; }
-//   }
+  //
+  // Parse the integer part.
+  //
+  uint64_t i = 0;
+  if (is_made_of_eight_digits_fast(p)) {
+    i = i * 100000000 + parse_eight_digits_unrolled(p);
+    p += 8;
+    if (is_made_of_eight_digits_fast(p)) {
+      i = i * 100000000 + parse_eight_digits_unrolled(p);
+      p += 8;
+      if (parse_digit(*p, i)) { // digit 17
+        p++;
+        if (parse_digit(*p, i)) { // digit 18
+          p++;
+          if (parse_digit(*p, i)) { // digit 19
+            p++;
+            if (parse_digit(*p, i)) { // digit 20
+              p++;
+              if (parse_digit(*p, i)) { return NUMBER_ERROR; } // 21 digits is an error
+              // Positive overflow check:
+              // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
+              //   biggest uint64_t.
+              // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
+              //   If we got here, it's a 20 digit number starting with the digit "1".
+              // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
+              //   than 1,553,255,926,290,448,384.
+              // - That is smaller than the smallest possible 20-digit number the user could write:
+              //   10,000,000,000,000,000,000.
+              // - Therefore, if the number is positive and lower than that, it's overflow.
+              // - The value we are looking at is less than or equal to 9,223,372,036,854,775,808 (INT64_MAX).
+              //
+              if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return NUMBER_ERROR; }
+            }
+          }
+        }
+      }
+    } // 16 digits
+  } else { // 8 digits
+    // Less than 8 digits can't overflow, simpler logic here.
+    if (parse_digit(*p, i)) { p++; } else { return NUMBER_ERROR; }
+    while (parse_digit(*p, i)) { p++; }
+  }
 
-//   if (!is_structural_or_whitespace(*p, i)) { return NUMBER_ERROR; }
-//   // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
-//   int digit_count = int(p - src);
-//   if (digit_count == 0 || ('0' == *src && digit_count > 1)) { return NUMBER_ERROR; }
-//   return i;
-// }
+  if (!is_structural_or_whitespace(*p)) { return NUMBER_ERROR; }
+  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
+  int digit_count = int(p - src);
+  if (digit_count == 0 || ('0' == *src && digit_count > 1)) { return NUMBER_ERROR; }
+  return i;
+}
 
 // Parse any number from  -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
 SIMDJSON_UNUSED simdjson_really_inline simdjson_result<int64_t> parse_integer(const uint8_t *src) noexcept {
@@ -646,82 +647,82 @@ SIMDJSON_UNUSED simdjson_really_inline simdjson_result<int64_t> parse_integer(co
   return negative ? (~i+1) : i;
 }
 
-// simdjson_really_inline simdjson_result<double> parse_double(const uint8_t * src) noexcept {
-//   //
-//   // Check for minus sign
-//   //
-//   bool negative = (*src == '-');
-//   src += negative;
+SIMDJSON_UNUSED simdjson_really_inline simdjson_result<double> parse_double(const uint8_t * src) noexcept {
+  //
+  // Check for minus sign
+  //
+  bool negative = (*src == '-');
+  src += negative;
 
-//   //
-//   // Parse the integer part.
-//   //
-//   uint64_t i = 0;
-//   const uint8_t *p = src;
-//   p += parse_digit(*p, i);
-//   bool leading_zero = (i == 0);
-//   while (parse_digit(*p, i)) { p++; }
-//   // no integer digits, or 0123 (zero must be solo)
-//   if ( p == src || (leading_zero && p != src+1)) { return NUMBER_ERROR; }
+  //
+  // Parse the integer part.
+  //
+  uint64_t i = 0;
+  const uint8_t *p = src;
+  p += parse_digit(*p, i);
+  bool leading_zero = (i == 0);
+  while (parse_digit(*p, i)) { p++; }
+  // no integer digits, or 0123 (zero must be solo)
+  if ( p == src || (leading_zero && p != src+1)) { return NUMBER_ERROR; }
 
-//   //
-//   // Parse the decimal part.
-//   //
-//   int64_t exponent = 0;
-//   bool overflow;
-//   if (likely(*p == '.')) {
-//     p++;
-//     const uint8_t *start_decimal_digits = p;
-//     if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits
-//     p++;
-//     while (parse_digit(*p, i)) { p++; }
-//     exponent = -(p - start_decimal_digits);
+  //
+  // Parse the decimal part.
+  //
+  int64_t exponent = 0;
+  bool overflow;
+  if (simdjson_likely(*p == '.')) {
+    p++;
+    const uint8_t *start_decimal_digits = p;
+    if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits
+    p++;
+    while (parse_digit(*p, i)) { p++; }
+    exponent = -(p - start_decimal_digits);
 
-//     // Overflow check. 19 digits (minus the decimal) may be overflow.
-//     overflow = p-src-1 >= 19;
-//     if (SIMDJSON_unlikely(overflow && leading_zero)) {
-//       // Skip leading 0.00000 and see if it still overflows
-//       const uint8_t *start_digits = src + 2;
-//       while (*start_digits == '0') { start_digits++; }
-//       overflow = start_digits-src >= 19;
-//     }
-//   } else {
-//     overflow = p-src >= 19;
-//   }
+    // Overflow check. 19 digits (minus the decimal) may be overflow.
+    overflow = p-src-1 >= 19;
+    if (simdjson_unlikely(overflow && leading_zero)) {
+      // Skip leading 0.00000 and see if it still overflows
+      const uint8_t *start_digits = src + 2;
+      while (*start_digits == '0') { start_digits++; }
+      overflow = start_digits-src >= 19;
+    }
+  } else {
+    overflow = p-src >= 19;
+  }
 
-//   //
-//   // Parse the exponent
-//   //
-//   if (*p == 'e' || *p == 'E') {
-//     p++;
-//     bool exp_neg = *p == '-';
-//     p += exp_neg || *p == '+';
+  //
+  // Parse the exponent
+  //
+  if (*p == 'e' || *p == 'E') {
+    p++;
+    bool exp_neg = *p == '-';
+    p += exp_neg || *p == '+';
 
-//     uint64_t exp = 0;
-//     const uint8_t *start_exp_digits = p;
-//     while (parse_digit(*p, exp)) { p++; }
-//     // no exp digits, or 20+ exp digits
-//     if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; }
+    uint64_t exp = 0;
+    const uint8_t *start_exp_digits = p;
+    while (parse_digit(*p, exp)) { p++; }
+    // no exp digits, or 20+ exp digits
+    if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; }
 
-//     exponent += exp_neg ? 0-exp : exp;
-//     overflow = overflow || exponent < FASTFLOAT_SMALLEST_POWER || exponent > FASTFLOAT_LARGEST_POWER;
-//   }
-
-//   //
-//   // Assemble (or slow-parse) the float
-//   //
-//   if (likely(!overflow)) {
-//     bool success = false;
-//     double d = compute_float_64(exponent, i, negative, &success);
-//     if (success) { return d; }
-//   }
-//   double d;
-//   if (!parse_float_strtod(src-negative, &d)) {
-//     return NUMBER_ERROR;
-//   }
-//   return d;
-// }
+    exponent += exp_neg ? 0-exp : exp;
+    overflow = overflow || exponent < FASTFLOAT_SMALLEST_POWER || exponent > FASTFLOAT_LARGEST_POWER;
+  }
 
+  //
+  // Assemble (or slow-parse) the float
+  //
+  if (simdjson_likely(!overflow)) {
+    bool success = true;
+    double d = compute_float_64(exponent, i, negative, &success);
+    if (success) { return d; }
+  }
+  double d;
+  if (!parse_float_strtod(src-negative, &d)) {
+    return NUMBER_ERROR;
+  }
+  return d;
+}
+} //namespace {}
 #endif // SIMDJSON_SKIPNUMBERPARSING
 
 } // namespace numberparsing