Merge pull request #876 from simdjson/jkeiser/doc-writing-stage-2

Move tape writes into stage 2 code
2020-05-12 10:05:40 -07:00 · 2020-05-12 10:05:40 -07:00 · c615d52cf4
parent 2a6e6b3dbd dbb3316511
commit c615d52cf4
6 changed files with 157 additions and 248 deletions
--- a/include/simdjson/document.h
+++ b/include/simdjson/document.h
@ -985,9 +985,6 @@ public:
  std::unique_ptr<char[]> ret_address{};
 #endif

-  /** @private Next write location in the string buf for stage 2 parsing */
-  uint8_t *current_string_buf_loc{};
-
  /** @private Use `if (parser.parse(...).error())` instead */
  bool valid{false};
  /** @private Use `parser.parse(...).error()` instead */
@ -1018,32 +1015,6 @@ public:
  /** @private Private and deprecated: use `parser.parse(...).doc.dump_raw_tape()` instead */
  inline bool dump_raw_tape(std::ostream &os) const noexcept;

-  //
-  // Parser callbacks: these are internal!
-  //
-
-  /** @private this should be called when parsing (right before writing the tapes) */
-  inline void init_stage2() noexcept;
-  really_inline error_code on_error(error_code new_error_code) noexcept; ///< @private
-  really_inline error_code on_success(error_code success_code) noexcept; ///< @private
-  really_inline bool on_start_document(uint32_t depth) noexcept; ///< @private
-  really_inline bool on_start_object(uint32_t depth) noexcept; ///< @private
-  really_inline bool on_start_array(uint32_t depth) noexcept; ///< @private
-  // TODO we're not checking this bool
-  really_inline bool on_end_document(uint32_t depth) noexcept; ///< @private
-  really_inline bool on_end_object(uint32_t depth) noexcept; ///< @private
-  really_inline bool on_end_array(uint32_t depth) noexcept; ///< @private
-  really_inline bool on_true_atom() noexcept; ///< @private
-  really_inline bool on_false_atom() noexcept; ///< @private
-  really_inline bool on_null_atom() noexcept; ///< @private
-  really_inline uint8_t *on_start_string() noexcept; ///< @private
-  really_inline bool on_end_string(uint8_t *dst) noexcept; ///< @private
-  really_inline bool on_number_s64(int64_t value) noexcept; ///< @private
-  really_inline bool on_number_u64(uint64_t value) noexcept; ///< @private
-  really_inline bool on_number_double(double value) noexcept; ///< @private
-
-  really_inline void increment_count(uint32_t depth) noexcept; ///< @private
-  really_inline void end_scope(uint32_t depth) noexcept; ///< @private
 private:
  /**
   * The maximum document length this parser will automatically support.
@ -1088,8 +1059,6 @@ private:
  //
  //

-  inline void write_tape(uint64_t val, internal::tape_type t) noexcept;
-
  /**
   * Ensure we have enough capacity to handle at least desired_capacity bytes,
   * and auto-allocate if not.
--- a/src/document_parser_callbacks.h
+++ b/src/document_parser_callbacks.h
@ -1,145 +0,0 @@
-#ifndef SIMDJSON_DOCUMENT_PARSER_CALLBACKS_H
-#define SIMDJSON_DOCUMENT_PARSER_CALLBACKS_H
-
-#include "simdjson.h"
-
-namespace simdjson {
-namespace dom {
-
-//
-// Parser callbacks
-//
-
-inline void parser::init_stage2() noexcept {
-  current_string_buf_loc = doc.string_buf.get();
-  current_loc = 0;
-  valid = false;
-  error = UNINITIALIZED;
-}
-
-really_inline error_code parser::on_error(error_code new_error_code) noexcept {
-  error = new_error_code;
-  return new_error_code;
-}
-really_inline error_code parser::on_success(error_code success_code) noexcept {
-  error = success_code;
-  valid = true;
-  return success_code;
-}
-// increment_count increments the count of keys in an object or values in an array.
-// Note that if you are at the level of the values or elements, the count
-// must be increment in the preceding depth (depth-1) where the array or
-// the object resides.
-really_inline void parser::increment_count(uint32_t depth) noexcept {
-  containing_scope[depth].count++;
-}
-
-really_inline bool parser::on_start_document(uint32_t depth) noexcept {
-  containing_scope[depth].tape_index = current_loc;
-  containing_scope[depth].count = 0;
-  write_tape(0, internal::tape_type::ROOT); // if the document is correct, this gets rewritten later
-  return true;
-}
-really_inline bool parser::on_start_object(uint32_t depth) noexcept {
-  containing_scope[depth].tape_index = current_loc;
-  containing_scope[depth].count = 0;
-  write_tape(0, internal::tape_type::START_OBJECT);  // if the document is correct, this gets rewritten later
-  return true;
-}
-really_inline bool parser::on_start_array(uint32_t depth) noexcept {
-  containing_scope[depth].tape_index = current_loc;
-  containing_scope[depth].count = 0;
-  write_tape(0, internal::tape_type::START_ARRAY);  // if the document is correct, this gets rewritten later
-  return true;
-}
-// TODO we're not checking this bool
-really_inline bool parser::on_end_document(uint32_t depth) noexcept {
-  // write our doc.tape location to the header scope
-  // The root scope gets written *at* the previous location.
-  write_tape(containing_scope[depth].tape_index, internal::tape_type::ROOT);
-  end_scope(depth);
-  return true;
-}
-really_inline bool parser::on_end_object(uint32_t depth) noexcept {
-  // write our doc.tape location to the header scope
-  write_tape(containing_scope[depth].tape_index, internal::tape_type::END_OBJECT);
-  end_scope(depth);
-  return true;
-}
-really_inline bool parser::on_end_array(uint32_t depth) noexcept {
-  // write our doc.tape location to the header scope
-  write_tape(containing_scope[depth].tape_index, internal::tape_type::END_ARRAY);
-  end_scope(depth);
-  return true;
-}
-
-really_inline bool parser::on_true_atom() noexcept {
-  write_tape(0, internal::tape_type::TRUE_VALUE);
-  return true;
-}
-really_inline bool parser::on_false_atom() noexcept {
-  write_tape(0, internal::tape_type::FALSE_VALUE);
-  return true;
-}
-really_inline bool parser::on_null_atom() noexcept {
-  write_tape(0, internal::tape_type::NULL_VALUE);
-  return true;
-}
-
-really_inline uint8_t *parser::on_start_string() noexcept {
-  /* we advance the point, accounting for the fact that we have a NULL
-    * termination         */
-  write_tape(current_string_buf_loc - doc.string_buf.get(), internal::tape_type::STRING);
-  return current_string_buf_loc + sizeof(uint32_t);
-}
-
-really_inline bool parser::on_end_string(uint8_t *dst) noexcept {
-  uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t)));
-  // TODO check for overflow in case someone has a crazy string (>=4GB?)
-  // But only add the overflow check when the document itself exceeds 4GB
-  // Currently unneeded because we refuse to parse docs larger or equal to 4GB.
-  memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t));
-  // NULL termination is still handy if you expect all your strings to
-  // be NULL terminated? It comes at a small cost
-  *dst = 0;
-  current_string_buf_loc = dst + 1;
-  return true;
-}
-
-really_inline bool parser::on_number_s64(int64_t value) noexcept {
-  write_tape(0, internal::tape_type::INT64);
-  std::memcpy(&doc.tape[current_loc], &value, sizeof(value));
-  ++current_loc;
-  return true;
-}
-really_inline bool parser::on_number_u64(uint64_t value) noexcept {
-  write_tape(0, internal::tape_type::UINT64);
-  doc.tape[current_loc++] = value;
-  return true;
-}
-really_inline bool parser::on_number_double(double value) noexcept {
-  write_tape(0, internal::tape_type::DOUBLE);
-  static_assert(sizeof(value) == sizeof(doc.tape[current_loc]), "mismatch size");
-  memcpy(&doc.tape[current_loc++], &value, sizeof(double));
-  // doc.tape[doc.current_loc++] = *((uint64_t *)&d);
-  return true;
-}
-
-really_inline void parser::write_tape(uint64_t val, internal::tape_type t) noexcept {
-  doc.tape[current_loc++] = val | ((uint64_t(char(t))) << 56);
-}
-
-// this function is responsible for annotating the start of the scope
-really_inline void parser::end_scope(uint32_t depth) noexcept {
-  scope_descriptor d = containing_scope[depth];
-  // count can overflow if it exceeds 24 bits... so we saturate
-  // the convention being that a cnt of 0xffffff or more is undetermined in value (>=  0xffffff).
-  const uint32_t cntsat =  d.count > 0xFFFFFF ? 0xFFFFFF : d.count;
-  // This is a load and an OR. It would be possible to just write once at doc.tape[d.tape_index]
-  doc.tape[d.tape_index] |= current_loc | (uint64_t(cntsat) << 32);
-}
-
-} // namespace simdjson
-} // namespace dom
-
-#endif // SIMDJSON_DOCUMENT_PARSER_CALLBACKS_H
--- a/src/generic/numberparsing.h
+++ b/src/generic/numberparsing.h
@ -1,6 +1,5 @@
 namespace numberparsing {

-
 // Attempts to compute i * 10^(power) exactly; and if "negative" is
 // true, negate the result.
 // This function will only work in some cases, when it does not work, success is
@ -261,8 +260,9 @@ really_inline bool is_made_of_eight_digits_fast(const char *chars) {
 //
 // This function will almost never be called!!!
 //
+template<typename W>
 never_inline bool parse_large_integer(const uint8_t *const src,
-                                      parser &parser,
+                                      W writer,
                                      bool found_minus) {
  const char *p = reinterpret_cast<const char *>(src);

@ -310,14 +310,14 @@ never_inline bool parse_large_integer(const uint8_t *const src,
      // as a positive signed integer, but the negative version is
      // possible.
      constexpr int64_t signed_answer = INT64_MIN;
-      parser.on_number_s64(signed_answer);
+      writer.write_s64(signed_answer);
 #ifdef JSON_TEST_NUMBERS // for unit testing
      found_integer(signed_answer, src);
 #endif
    } else {
      // we can negate safely
      int64_t signed_answer = -static_cast<int64_t>(i);
-      parser.on_number_s64(signed_answer);
+      writer.write_s64(signed_answer);
 #ifdef JSON_TEST_NUMBERS // for unit testing
      found_integer(signed_answer, src);
 #endif
@ -330,21 +330,22 @@ never_inline bool parse_large_integer(const uint8_t *const src,
 #ifdef JSON_TEST_NUMBERS // for unit testing
      found_integer(i, src);
 #endif
-      parser.on_number_s64(i);
+      writer.write_s64(i);
    } else {
 #ifdef JSON_TEST_NUMBERS // for unit testing
      found_unsigned_integer(i, src);
 #endif
-      parser.on_number_u64(i);
+      writer.write_u64(i);
    }
  }
  return is_structural_or_whitespace(*p);
 }

-bool slow_float_parsing(UNUSED const char * src, parser &parser) {
+template<typename W>
+bool slow_float_parsing(UNUSED const char * src, W writer) {
  double d;
  if (parse_float_strtod(src, &d)) {
-    parser.on_number_double(d);
+    writer.write_double(d);
 #ifdef JSON_TEST_NUMBERS // for unit testing
    found_float(d, (const uint8_t *)src);
 #endif
@ -365,12 +366,13 @@ bool slow_float_parsing(UNUSED const char * src, parser &parser) {
 // content and append a space before calling this function.
 //
 // Our objective is accurate parsing (ULP of 0) at high speed.
+template<typename W>
 really_inline bool parse_number(UNUSED const uint8_t *const src,
                                UNUSED bool found_minus,
-                                parser &parser) {
+                                W writer) {
 #ifdef SIMDJSON_SKIPNUMBERPARSING // for performance analysis, it is sometimes
                                  // useful to skip parsing
-  parser.on_number_s64(0);        // always write zero
+  writer.write_s64(0);        // always write zero
  return true;                    // always succeeds
 #else
  const char *p = reinterpret_cast<const char *>(src);
@ -522,14 +524,14 @@ really_inline bool parse_number(UNUSED const uint8_t *const src,
        // 10000000000000000000000000000000000000000000e+308
        // 3.1415926535897932384626433832795028841971693993751
        //
-        return slow_float_parsing((const char *) src, parser);
+        return slow_float_parsing((const char *) src, writer);
      }
    }
    if (unlikely(exponent < FASTFLOAT_SMALLEST_POWER) ||
        (exponent > FASTFLOAT_LARGEST_POWER)) { // this is uncommon!!!
      // this is almost never going to get called!!!
      // we start anew, going slowly!!!
-      return slow_float_parsing((const char *) src, parser);
+      return slow_float_parsing((const char *) src, writer);
    }
    bool success = true;
    double d = compute_float_64(exponent, i, negative, &success);
@ -538,7 +540,7 @@ really_inline bool parse_number(UNUSED const uint8_t *const src,
      success = parse_float_strtod((const char *)src, &d);
    }
    if (success) {
-      parser.on_number_double(d);
+      writer.write_double(d);
 #ifdef JSON_TEST_NUMBERS // for unit testing
      found_float(d, src);
 #endif
@ -553,10 +555,10 @@ really_inline bool parse_number(UNUSED const uint8_t *const src,
    if (unlikely(digit_count >= 18)) { // this is uncommon!!!
      // there is a good chance that we had an overflow, so we need
      // need to recover: we parse the whole thing again.
-      return parse_large_integer(src, parser, found_minus);
+      return parse_large_integer(src, writer, found_minus);
    }
    i = negative ? 0 - i : i;
-    parser.on_number_s64(i);
+    writer.write_s64(i);
 #ifdef JSON_TEST_NUMBERS // for unit testing
    found_integer(i, src);
 #endif
--- a/src/generic/stage2_build_tape.h
+++ b/src/generic/stage2_build_tape.h
@ -113,9 +113,34 @@ public:
  uint8_t c{0};  // used to track the (structural) character we are looking at
 };

+struct number_writer {
+  parser &doc_parser;
+  
+  really_inline void write_s64(int64_t value) noexcept {
+    write_tape(0, internal::tape_type::INT64);
+    std::memcpy(&doc_parser.doc.tape[doc_parser.current_loc], &value, sizeof(value));
+    ++doc_parser.current_loc;
+  }
+  really_inline void write_u64(uint64_t value) noexcept {
+    write_tape(0, internal::tape_type::UINT64);
+    doc_parser.doc.tape[doc_parser.current_loc++] = value;
+  }
+  really_inline void write_double(double value) noexcept {
+    write_tape(0, internal::tape_type::DOUBLE);
+    static_assert(sizeof(value) == sizeof(doc_parser.doc.tape[doc_parser.current_loc]), "mismatch size");
+    memcpy(&doc_parser.doc.tape[doc_parser.current_loc++], &value, sizeof(double));
+    // doc.tape[doc.current_loc++] = *((uint64_t *)&d);
+  }
+  really_inline void write_tape(uint64_t val, internal::tape_type t) noexcept {
+    doc_parser.doc.tape[doc_parser.current_loc++] = val | ((uint64_t(char(t))) << 56);
+  }
+}; // struct number_writer
+
 struct structural_parser {
  structural_iterator structurals;
  parser &doc_parser;
+  /** Next write location in the string buf for stage 2 parsing */
+  uint8_t *current_string_buf_loc{};
  uint32_t depth;

  really_inline structural_parser(
@ -125,54 +150,96 @@ struct structural_parser {
    uint32_t next_structural = 0
  ) : structurals(buf, len, _doc_parser.structural_indexes.get(), next_structural), doc_parser{_doc_parser}, depth{0} {}

-  WARN_UNUSED really_inline bool start_document(ret_address continue_state) {
-    doc_parser.on_start_document(depth);
+  WARN_UNUSED really_inline bool start_scope(internal::tape_type type, ret_address continue_state) {
+    doc_parser.containing_scope[depth].tape_index = doc_parser.current_loc;
+    doc_parser.containing_scope[depth].count = 0;
+    write_tape(0, type); // if the document is correct, this gets rewritten later
    doc_parser.ret_address[depth] = continue_state;
    depth++;
    return depth >= doc_parser.max_depth();
  }

+  WARN_UNUSED really_inline bool start_document(ret_address continue_state) {
+    return start_scope(internal::tape_type::ROOT, continue_state);
+  }
+
  WARN_UNUSED really_inline bool start_object(ret_address continue_state) {
-    doc_parser.on_start_object(depth);
-    doc_parser.ret_address[depth] = continue_state;
-    depth++;
-    return depth >= doc_parser.max_depth();
+    return start_scope(internal::tape_type::START_OBJECT, continue_state);
  }

  WARN_UNUSED really_inline bool start_array(ret_address continue_state) {
-    doc_parser.on_start_array(depth);
-    doc_parser.ret_address[depth] = continue_state;
-    depth++;
-    return depth >= doc_parser.max_depth();
+    return start_scope(internal::tape_type::START_ARRAY, continue_state);
  }

-  really_inline bool end_object() {
+  // this function is responsible for annotating the start of the scope
+  really_inline void end_scope(internal::tape_type type) noexcept {
    depth--;
-    doc_parser.on_end_object(depth);
-    return false;
+    // write our doc.tape location to the header scope
+    // The root scope gets written *at* the previous location.
+    write_tape(doc_parser.containing_scope[depth].tape_index, type);
+    // count can overflow if it exceeds 24 bits... so we saturate
+    // the convention being that a cnt of 0xffffff or more is undetermined in value (>=  0xffffff).
+    const uint32_t start_tape_index = doc_parser.containing_scope[depth].tape_index;
+    const uint32_t count = doc_parser.containing_scope[depth].count;
+    const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count;
+    // This is a load and an OR. It would be possible to just write once at doc.tape[d.tape_index]
+    doc_parser.doc.tape[start_tape_index] |= doc_parser.current_loc | (uint64_t(cntsat) << 32);
  }
-  really_inline bool end_array() {
-    depth--;
-    doc_parser.on_end_array(depth);
-    return false;
+
+  really_inline void end_object() {
+    end_scope(internal::tape_type::END_OBJECT);
  }
-  really_inline bool end_document() {
-    depth--;
-    doc_parser.on_end_document(depth);
-    return false;
+  really_inline void end_array() {
+    end_scope(internal::tape_type::END_ARRAY);
+  }
+  really_inline void end_document() {
+    end_scope(internal::tape_type::ROOT);
+  }
+
+  really_inline void write_tape(uint64_t val, internal::tape_type t) noexcept {
+    doc_parser.doc.tape[doc_parser.current_loc++] = val | ((uint64_t(char(t))) << 56);
+  }
+
+  // increment_count increments the count of keys in an object or values in an array.
+  // Note that if you are at the level of the values or elements, the count
+  // must be increment in the preceding depth (depth-1) where the array or
+  // the object resides.
+  really_inline void increment_count() {
+    doc_parser.containing_scope[depth - 1].count++; // we have a key value pair in the object at parser.depth - 1
+  }
+
+  really_inline uint8_t *on_start_string() noexcept {
+    /* we advance the point, accounting for the fact that we have a NULL
+      * termination         */
+    write_tape(current_string_buf_loc - doc_parser.doc.string_buf.get(), internal::tape_type::STRING);
+    return current_string_buf_loc + sizeof(uint32_t);
+  }
+
+  really_inline bool on_end_string(uint8_t *dst) noexcept {
+    uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t)));
+    // TODO check for overflow in case someone has a crazy string (>=4GB?)
+    // But only add the overflow check when the document itself exceeds 4GB
+    // Currently unneeded because we refuse to parse docs larger or equal to 4GB.
+    memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t));
+    // NULL termination is still handy if you expect all your strings to
+    // be NULL terminated? It comes at a small cost
+    *dst = 0;
+    current_string_buf_loc = dst + 1;
+    return true;
  }

  WARN_UNUSED really_inline bool parse_string() {
-    uint8_t *dst = doc_parser.on_start_string();
+    uint8_t *dst = on_start_string();
    dst = stringparsing::parse_string(structurals.current(), dst);
    if (dst == nullptr) {
      return true;
    }
-    return !doc_parser.on_end_string(dst);
+    return !on_end_string(dst);
  }

  WARN_UNUSED really_inline bool parse_number(const uint8_t *src, bool found_minus) {
-    return !numberparsing::parse_number(src, found_minus, doc_parser);
+    number_writer writer{doc_parser};
+    return !numberparsing::parse_number(src, found_minus, writer);
  }
  WARN_UNUSED really_inline bool parse_number(bool found_minus) {
    return parse_number(structurals.current(), found_minus);
@ -182,15 +249,15 @@ struct structural_parser {
    switch (structurals.current_char()) {
      case 't':
        if (!atomparsing::is_valid_true_atom(structurals.current())) { return true; }
-        doc_parser.on_true_atom();
+        write_tape(0, internal::tape_type::TRUE_VALUE);
        break;
      case 'f':
        if (!atomparsing::is_valid_false_atom(structurals.current())) { return true; }
-        doc_parser.on_false_atom();
+        write_tape(0, internal::tape_type::FALSE_VALUE);
        break;
      case 'n':
        if (!atomparsing::is_valid_null_atom(structurals.current())) { return true; }
-        doc_parser.on_null_atom();
+        write_tape(0, internal::tape_type::NULL_VALUE);
        break;
      default:
        return true;
@ -202,15 +269,15 @@ struct structural_parser {
    switch (structurals.current_char()) {
      case 't':
        if (!atomparsing::is_valid_true_atom(structurals.current(), structurals.remaining_len())) { return true; }
-        doc_parser.on_true_atom();
+        write_tape(0, internal::tape_type::TRUE_VALUE);
        break;
      case 'f':
        if (!atomparsing::is_valid_false_atom(structurals.current(), structurals.remaining_len())) { return true; }
-        doc_parser.on_false_atom();
+        write_tape(0, internal::tape_type::FALSE_VALUE);
        break;
      case 'n':
        if (!atomparsing::is_valid_null_atom(structurals.current(), structurals.remaining_len())) { return true; }
-        doc_parser.on_null_atom();
+        write_tape(0, internal::tape_type::NULL_VALUE);
        break;
      default:
        return true;
@ -247,17 +314,27 @@ struct structural_parser {
  WARN_UNUSED really_inline error_code finish() {
    // the string might not be NULL terminated.
    if ( !structurals.at_end(doc_parser.n_structural_indexes) ) {
-      return doc_parser.on_error(TAPE_ERROR);
+      return on_error(TAPE_ERROR);
    }
    end_document();
    if (depth != 0) {
-      return doc_parser.on_error(TAPE_ERROR);
+      return on_error(TAPE_ERROR);
    }
    if (doc_parser.containing_scope[depth].tape_index != 0) {
-      return doc_parser.on_error(TAPE_ERROR);
+      return on_error(TAPE_ERROR);
    }

-    return doc_parser.on_success(SUCCESS);
+    return on_success(SUCCESS);
+  }
+
+  really_inline error_code on_error(error_code new_error_code) noexcept {
+    doc_parser.error = new_error_code;
+    return new_error_code;
+  }
+  really_inline error_code on_success(error_code success_code) noexcept {
+    doc_parser.error = success_code;
+    doc_parser.valid = true;
+    return success_code;
  }

  WARN_UNUSED really_inline error_code error() {
@ -272,11 +349,11 @@ struct structural_parser {
    * carefully,
    * all without any added cost. */
    if (depth >= doc_parser.max_depth()) {
-      return doc_parser.on_error(DEPTH_ERROR);
+      return on_error(DEPTH_ERROR);
    }
    switch (structurals.current_char()) {
    case '"':
-      return doc_parser.on_error(STRING_ERROR);
+      return on_error(STRING_ERROR);
    case '0':
    case '1':
    case '2':
@ -288,20 +365,27 @@ struct structural_parser {
    case '8':
    case '9':
    case '-':
-      return doc_parser.on_error(NUMBER_ERROR);
+      return on_error(NUMBER_ERROR);
    case 't':
-      return doc_parser.on_error(T_ATOM_ERROR);
+      return on_error(T_ATOM_ERROR);
    case 'n':
-      return doc_parser.on_error(N_ATOM_ERROR);
+      return on_error(N_ATOM_ERROR);
    case 'f':
-      return doc_parser.on_error(F_ATOM_ERROR);
+      return on_error(F_ATOM_ERROR);
    default:
-      return doc_parser.on_error(TAPE_ERROR);
+      return on_error(TAPE_ERROR);
    }
  }

+  really_inline void init() {
+    current_string_buf_loc = doc_parser.doc.string_buf.get();
+    doc_parser.current_loc = 0;
+    doc_parser.valid = false;
+    doc_parser.error = UNINITIALIZED;
+  }
+
  WARN_UNUSED really_inline error_code start(size_t len, ret_address finish_state) {
-    doc_parser.init_stage2(); // sets is_valid to false
+    init(); // sets is_valid to false
    if (len > doc_parser.capacity()) {
      return CAPACITY;
    }
@ -309,7 +393,7 @@ struct structural_parser {
    structurals.advance_char();
    // Push the root scope (there is always at least one scope)
    if (start_document(finish_state)) {
-      return doc_parser.on_error(DEPTH_ERROR);
+      return on_error(DEPTH_ERROR);
    }
    return SUCCESS;
  }
@ -376,7 +460,7 @@ WARN_UNUSED error_code implementation::stage2(const uint8_t *buf, size_t len, pa
 object_begin:
  switch (parser.advance_char()) {
  case '"': {
-    doc_parser.increment_count(parser.depth - 1); // we have a key value pair in the object at parser.depth - 1
+    parser.increment_count();
    FAIL_IF( parser.parse_string() );
    goto object_key_state;
  }
@ -395,7 +479,7 @@ object_key_state:
 object_continue:
  switch (parser.advance_char()) {
  case ',':
-    doc_parser.increment_count(parser.depth - 1); // we have a key value pair in the object at parser.depth - 1
+    parser.increment_count();
    FAIL_IF( parser.advance_char() != '"' );
    FAIL_IF( parser.parse_string() );
    goto object_key_state;
@ -417,7 +501,7 @@ array_begin:
    parser.end_array();
    goto scope_end;
  }
-  doc_parser.increment_count(parser.depth - 1); // we have a new value in the array at parser.depth - 1
+  parser.increment_count();

 main_array_switch:
  /* we call update char on all paths in, so we can peek at parser.c on the
@ -427,7 +511,7 @@ main_array_switch:
 array_continue:
  switch (parser.advance_char()) {
  case ',':
-    doc_parser.increment_count(parser.depth - 1); // we have a new value in the array at parser.depth - 1
+    parser.increment_count();
    parser.advance_char();
    goto main_array_switch;
  case ']':
--- a/src/generic/stage2_streaming_build_tape.h
+++ b/src/generic/stage2_streaming_build_tape.h
@ -5,13 +5,13 @@ struct streaming_structural_parser: structural_parser {

  // override to add streaming
  WARN_UNUSED really_inline error_code start(UNUSED size_t len, ret_address finish_parser) {
-    doc_parser.init_stage2(); // sets is_valid to false
+    init(); // sets is_valid to false
    // Capacity ain't no thang for streaming, so we don't check it.
    // Advance to the first character as soon as possible
    advance_char();
    // Push the root scope (there is always at least one scope)
    if (start_document(finish_parser)) {
-      return doc_parser.on_error(DEPTH_ERROR);
+      return on_error(DEPTH_ERROR);
    }
    return SUCCESS;
  }
@ -19,17 +19,17 @@ struct streaming_structural_parser: structural_parser {
  // override to add streaming
  WARN_UNUSED really_inline error_code finish() {
    if ( structurals.past_end(doc_parser.n_structural_indexes) ) {
-      return doc_parser.on_error(TAPE_ERROR);
+      return on_error(TAPE_ERROR);
    }
    end_document();
    if (depth != 0) {
-      return doc_parser.on_error(TAPE_ERROR);
+      return on_error(TAPE_ERROR);
    }
    if (doc_parser.containing_scope[depth].tape_index != 0) {
-      return doc_parser.on_error(TAPE_ERROR);
+      return on_error(TAPE_ERROR);
    }
    bool finished = structurals.at_end(doc_parser.n_structural_indexes);
-    return doc_parser.on_success(finished ? SUCCESS : SUCCESS_AND_HAS_MORE);
+    return on_success(finished ? SUCCESS : SUCCESS_AND_HAS_MORE);
  }
 };

@ -97,7 +97,7 @@ object_begin:

 object_key_parser:
  FAIL_IF( parser.advance_char() != ':' );
-  doc_parser.increment_count(parser.depth - 1); // we have a key value pair in the object at parser.depth - 1
+  parser.increment_count();
  parser.advance_char();
  GOTO( parser.parse_value(addresses, addresses.object_continue) );

@ -125,7 +125,7 @@ array_begin:
    parser.end_array();
    goto scope_end;
  }
-  doc_parser.increment_count(parser.depth - 1); // we have a new value in the array at parser.depth - 1
+  parser.increment_count();

 main_array_switch:
  /* we call update char on all paths in, so we can peek at parser.c on the
@ -135,7 +135,7 @@ main_array_switch:
 array_continue:
  switch (parser.advance_char()) {
  case ',':
-    doc_parser.increment_count(parser.depth - 1); // we have a new value in the array at parser.depth - 1
+    parser.increment_count();
    parser.advance_char();
    goto main_array_switch;
  case ']':
--- a/src/stage2_build_tape.cpp
+++ b/src/stage2_build_tape.cpp
@ -2,7 +2,6 @@
 #include <cassert>
 #include <cstring>
 #include "jsoncharutils.h"
-#include "document_parser_callbacks.h"

 using namespace simdjson;