Merge branch 'master' of github.com:lemire/simdjson

2020-01-02 15:27:00 -05:00 · 2020-01-02 15:27:00 -05:00 · a2d05b21ff
parent f4f5f670a2 165e23773f
commit a2d05b21ff
7 changed files with 479 additions and 970 deletions
--- a/.github/workflows/fuzzers.yml
+++ b/.github/workflows/fuzzers.yml
@ -64,7 +64,8 @@ jobs:
        done
    - name: Run the other fuzzer variants for $fuzzer, with sanitizers etc
      run: |
-        for fuzzer in $allfuzzers; do      
+        set -x
+        for fuzzer in $allfuzzers; do
          build-ossfuzz-withavx/fuzz/fuzz_$fuzzer       out/$fuzzer -max_total_time=20 $artifactsprefix || touch failed
          build-ossfuzz-noavx/fuzz/fuzz_$fuzzer         out/$fuzzer -max_total_time=10 $artifactsprefix || touch failed
          build-ossfuzz-noavx8/fuzz/fuzz_$fuzzer        out/$fuzzer -max_total_time=10 $artifactsprefix || touch failed
--- a/include/simdjson/stage2_build_tape.h
+++ b/include/simdjson/stage2_build_tape.h
@ -14,7 +14,8 @@ WARN_UNUSED int
 unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj);

 template <Architecture T = Architecture::NATIVE>
-int unified_machine(const char *buf, size_t len, ParsedJson &pj) {
+WARN_UNUSED int
+unified_machine(const char *buf, size_t len, ParsedJson &pj) {
  return unified_machine<T>(reinterpret_cast<const uint8_t *>(buf), len, pj);
 }

--- a/src/arm64/stage2_build_tape.h
+++ b/src/arm64/stage2_build_tape.h
@ -21,13 +21,13 @@ namespace simdjson {
 template <>
 WARN_UNUSED int
 unified_machine<Architecture::ARM64>(const uint8_t *buf, size_t len, ParsedJson &pj) {
-  return arm64::unified_machine(buf, len, pj);
+  return arm64::stage2::unified_machine(buf, len, pj);
 }

 template <>
 WARN_UNUSED int
 unified_machine<Architecture::ARM64>(const uint8_t *buf, size_t len, ParsedJson &pj, size_t &next_json) {
-    return arm64::unified_machine(buf, len, pj, next_json);
+    return arm64::stage2::unified_machine(buf, len, pj, next_json);
 }

 } // namespace simdjson
--- a/src/generic/stage2_build_tape.h
+++ b/src/generic/stage2_build_tape.h
@ -3,192 +3,86 @@
 // We assume the file in which it is include already includes
 // "simdjson/stage2_build_tape.h" (this simplifies amalgation)

-// this macro reads the next structural character, updating idx, i and c.
-#define UPDATE_CHAR()                                                          \
-  {                                                                            \
-    idx = pj.structural_indexes[i++];                                          \
-    c = buf[idx];                                                              \
-  }
+namespace stage2 {

 #ifdef SIMDJSON_USE_COMPUTED_GOTO
-#define SET_GOTO_ARRAY_CONTINUE() pj.ret_address[depth] = &&array_continue;
-#define SET_GOTO_OBJECT_CONTINUE() pj.ret_address[depth] = &&object_continue;
-#define SET_GOTO_START_CONTINUE() pj.ret_address[depth] = &&start_continue;
-#define GOTO_CONTINUE() goto *pj.ret_address[depth];
+typedef void* ret_address;
+#define INIT_ADDRESSES() { &&array_begin, &&array_continue, &&error, &&finish, &&object_begin, &&object_continue }
+#define GOTO(address) { goto *(address); }
+#define CONTINUE(address) { goto *(address); }
 #else
-#define SET_GOTO_ARRAY_CONTINUE() pj.ret_address[depth] = 'a';
-#define SET_GOTO_OBJECT_CONTINUE() pj.ret_address[depth] = 'o';
-#define SET_GOTO_START_CONTINUE() pj.ret_address[depth] = 's';
-#define GOTO_CONTINUE()                                                        \
-  {                                                                            \
-    if (pj.ret_address[depth] == 'a') {                                        \
-      goto array_continue;                                                     \
-    } else if (pj.ret_address[depth] == 'o') {                                 \
-      goto object_continue;                                                    \
-    } else {                                                                   \
-      goto start_continue;                                                     \
-    }                                                                          \
+typedef char ret_address;
+#define INIT_ADDRESSES() { '[', 'a', 'e', 'f', '{', 'o' };
+#define GOTO(address)                 \
+  {                                   \
+    switch(address) {                 \
+      case '[': goto array_begin;     \
+      case 'a': goto array_continue;  \
+      case 'e': goto error;           \
+      case 'f': goto finish;          \
+      case '{': goto object_begin;    \
+      case 'o': goto object_continue; \
+    }                                 \
+  }
+// For the more constrained pop_scope() situation
+#define CONTINUE(address)             \
+  {                                   \
+    switch(address) {                 \
+      case 'a': goto array_continue;  \
+      case 'o': goto object_continue; \
+      case 'f': goto finish;          \
+    }                                 \
  }
 #endif

-/************
- * The JSON is parsed to a tape, see the accompanying tape.md file
- * for documentation.
- ***********/
-WARN_UNUSED  int
-unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
-  uint32_t i = 0; /* index of the structural character (0,1,2,3...) */
-  uint32_t idx; /* location of the structural character in the input (buf)   */
-  uint8_t c;    /* used to track the (structural) character we are looking at,
-                   updated */
-  /* by UPDATE_CHAR macro */
-  uint32_t depth = 0; /* could have an arbitrary starting depth */
-  pj.init();          /* sets is_valid to false          */
-  if (pj.byte_capacity < len) {
-    pj.error_code = simdjson::CAPACITY;
-    return pj.error_code;
+struct unified_machine_addresses {
+  ret_address array_begin;
+  ret_address array_continue;
+  ret_address error;
+  ret_address finish;
+  ret_address object_begin;
+  ret_address object_continue;
+};
+
+#undef FAIL_IF
+#define FAIL_IF(EXPR) { if (EXPR) { return addresses.error; } }
+
+// This is just so we can call parse_string() from parser.parse_string() without conflict.
+WARN_UNUSED really_inline bool
+really_parse_string(const uint8_t *buf, size_t len, ParsedJson &pj, uint32_t depth, uint32_t idx) {
+  return parse_string(buf, len, pj, depth, idx);
+}
+WARN_UNUSED really_inline bool
+really_parse_number(const uint8_t *const buf, ParsedJson &pj, const uint32_t offset, bool found_minus) {
+  return parse_number(buf, pj, offset, found_minus);
+}
+
+struct structural_parser {
+  const uint8_t* const buf;
+  const size_t len;
+  ParsedJson &pj;
+  uint32_t i; // next structural index
+  uint32_t idx; // location of the structural character in the input (buf)
+  uint8_t c;    // used to track the (structural) character we are looking at
+  uint32_t depth = 0; // could have an arbitrary starting depth
+
+  really_inline structural_parser(const uint8_t *_buf, size_t _len, ParsedJson &_pj, uint32_t _i = 0) : buf{_buf}, len{_len}, pj{_pj}, i{_i} {}
+
+  WARN_UNUSED really_inline int set_error_code(ErrorValues error_code) {
+    pj.error_code = error_code;
+    return error_code;
  }

-  /*//////////////////////////// START STATE /////////////////////////////
-   */
-  SET_GOTO_START_CONTINUE()
-  pj.containing_scope_offset[depth] = pj.get_current_loc();
-  pj.write_tape(0, 'r'); /* r for root, 0 is going to get overwritten */
-  /* the root is used, if nothing else, to capture the size of the tape */
-  depth++; /* everything starts at depth = 1, depth = 0 is just for the
-              root, the root may contain an object, an array or something
-              else. */
-  if (depth >= pj.depth_capacity) {
-    goto fail;
+  really_inline char advance_char() {
+    idx = pj.structural_indexes[i++];
+    c = buf[idx];
+    return c;
  }

-  UPDATE_CHAR();
-  switch (c) {
-  case '{':
-    pj.containing_scope_offset[depth] = pj.get_current_loc();
-    SET_GOTO_START_CONTINUE();
-    depth++;
-    if (depth >= pj.depth_capacity) {
-      goto fail;
-    }
-    pj.write_tape(
-        0, c); /* strangely, moving this to object_begin slows things down */
-    goto object_begin;
-  case '[':
-    pj.containing_scope_offset[depth] = pj.get_current_loc();
-    SET_GOTO_START_CONTINUE();
-    depth++;
-    if (depth >= pj.depth_capacity) {
-      goto fail;
-    }
-    pj.write_tape(0, c);
-    goto array_begin;
+  template<typename F>
+  really_inline bool with_space_terminated_copy(const F& f) {
    /**
-     * A JSON text is a serialized value.  Note that certain previous
-     * specifications of JSON constrained a JSON text to be an object or an
-     * array.  Implementations that generate only objects or arrays where a
-     * JSON text is called for will be interoperable in the sense that all
-     * implementations will accept these as conforming JSON texts.
-     * https://tools.ietf.org/html/rfc8259
-     **/
-  case '"': {
-    if (!parse_string(buf, len, pj, depth, idx)) {
-      goto fail;
-    }
-    break;
-  }
-  case 't': {
-    /* We need to make a copy to make sure that the string is space
-     * terminated.
-     * This only applies to the JSON document made solely of the true
-     * value.
-     * This is not about padding the input, which should already be padded up
-     * to len + SIMDJSON_PADDING. However, we have no control at this stage
-     * on how the padding was done. What if the input string was padded with nulls?
-     * It is quite common for an input string to have an extra null character (C string).
-     * This copy is relatively expensive, but it will almost never be called in
-     * practice unless you are in the strange scenario where you have many JSON
-     * documents made of single atoms.
-     */
-    char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
-    if (copy == nullptr) {
-      goto fail;
-    }
-    memcpy(copy, buf, len);
-    memset(copy + len, ' ', sizeof(uint64_t));
-    if (!is_valid_true_atom(reinterpret_cast<const uint8_t *>(copy) + idx)) {
-      free(copy);
-      goto fail;
-    }
-    free(copy);
-    pj.write_tape(0, c);
-    break;
-  }
-  case 'f': {
-    /* We need to make a copy to make sure that the string is space
-     * terminated.
-     * This only applies to the JSON document made solely of the false
-     * value.
-     * This is not about padding the input, which should already be padded up
-     * to len + SIMDJSON_PADDING. However, we have no control at this stage
-     * on how the padding was done. What if the input string was padded with nulls?
-     * It is quite common for an input string to have an extra null character (C string).
-     * This copy is relatively expensive, but it will almost never be called in
-     * practice unless you are in the strange scenario where you have many JSON
-     * documents made of single atoms.
-     */
-    char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
-    if (copy == nullptr) {
-      goto fail;
-    }
-    memcpy(copy, buf, len);
-    memset(copy + len, ' ', sizeof(uint64_t));
-    if (!is_valid_false_atom(reinterpret_cast<const uint8_t *>(copy) + idx)) {
-      free(copy);
-      goto fail;
-    }
-    free(copy);
-    pj.write_tape(0, c);
-    break;
-  }
-  case 'n': {
-    /* We need to make a copy to make sure that the string is space
-     * terminated.
-     * This is not about padding the input, which should already padded up
-     * to len + SIMDJSON_PADDING. However, we have no control at this stage
-     * on how the padding was done. What if the input string was padded with nulls?
-     * It is quite common for an input string to have an extra null character (C string).
-     * This only applies to the JSON document made solely of the null value.
-     * This copy is relatively expensive, but it will almost never be called in
-     * practice unless you are in the strange scenario where you have many JSON
-     * documents made of single atoms.
-     */
-    char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
-    if (copy == nullptr) {
-      goto fail;
-    }
-    memcpy(copy, buf, len);
-    memset(copy + len, ' ', sizeof(uint64_t));
-    if (!is_valid_null_atom(reinterpret_cast<const uint8_t *>(copy) + idx)) {
-      free(copy);
-      goto fail;
-    }
-    free(copy);
-    pj.write_tape(0, c);
-    break;
-  }
-  case '0':
-  case '1':
-  case '2':
-  case '3':
-  case '4':
-  case '5':
-  case '6':
-  case '7':
-  case '8':
-  case '9': {
-    /**
-    * We need to make a copy to make sure that the input string is space terminated.
+    * We need to make a copy to make sure that the string is space terminated.
    * This is not about padding the input, which should already padded up
    * to len + SIMDJSON_PADDING. However, we have no control at this stage
    * on how the padding was done. What if the input string was padded with nulls?
@ -202,352 +96,301 @@ unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
    */
    char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
    if (copy == nullptr) {
-      goto fail;
+      return true;
    }
    memcpy(copy, buf, len);
    memset(copy + len, ' ', SIMDJSON_PADDING);
-    if (!parse_number(reinterpret_cast<const uint8_t *>(copy), pj, idx,
-                      false)) {
-      free(copy);
-      goto fail;
-    }
+    bool result = f(reinterpret_cast<const uint8_t*>(copy), idx);
    free(copy);
-    break;
+    return result;
  }
-  case '-': {
-    /**
-    * We need to make a copy to make sure that the input string is space terminated.
-    * This is not about padding the input, which should already padded up
-    * to len + SIMDJSON_PADDING. However, we have no control at this stage
-    * on how the padding was done. What if the input string was padded with nulls?
-    * It is quite common for an input string to have an extra null character (C string).
-    * We do not want to allow -9\0 (where \0 is the null character) inside a JSON
-    * document, but the string "-9\0" by itself is fine. So we make a copy and
-    * pad the input with spaces when we know that there is just one input element.
-    * This copy is relatively expensive, but it will almost never be called in
-    * practice unless you are in the strange scenario where you have many JSON
-    * documents made of single atoms.
-    */
-    char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
-    if (copy == nullptr) {
-      goto fail;
-    }
-    memcpy(copy, buf, len);
-    memset(copy + len, ' ', SIMDJSON_PADDING);
-    if (!parse_number(reinterpret_cast<const uint8_t *>(copy), pj, idx, true)) {
-      free(copy);
-      goto fail;
-    }
-    free(copy);
-    break;
-  }
-  default:
-    goto fail;
-  }
-start_continue:
-  /* the string might not be NULL terminated. */
-  if (i + 1 == pj.n_structural_indexes) {
-    goto succeed;
-  } else {
-    goto fail;
-  }
-  /*//////////////////////////// OBJECT STATES ///////////////////////////*/

-object_begin:
-  UPDATE_CHAR();
-  switch (c) {
-  case '"': {
-    if (!parse_string(buf, len, pj, depth, idx)) {
-      goto fail;
+  WARN_UNUSED really_inline bool push_start_scope(ret_address continue_state, char type) {
+    pj.containing_scope_offset[depth] = pj.get_current_loc();
+    pj.ret_address[depth] = continue_state;
+    depth++;
+    pj.write_tape(0, type);
+    return depth >= pj.depth_capacity;
+  }
+
+  WARN_UNUSED really_inline bool push_start_scope(ret_address continue_state) {
+    return push_start_scope(continue_state, c);
+  }
+
+  WARN_UNUSED really_inline bool push_scope(ret_address continue_state) {
+    pj.containing_scope_offset[depth] = pj.get_current_loc();
+    pj.write_tape(0, c); // Do this as early as possible
+    pj.ret_address[depth] = continue_state;
+    depth++;
+    return depth >= pj.depth_capacity;
+  }
+
+  WARN_UNUSED really_inline ret_address pop_scope() {
+    // write our tape location to the header scope
+    depth--;
+    pj.write_tape(pj.containing_scope_offset[depth], c);
+    pj.annotate_previous_loc(pj.containing_scope_offset[depth], pj.get_current_loc());
+    return pj.ret_address[depth];
+  }
+  really_inline void pop_root_scope() {
+    // write our tape location to the header scope
+    // The root scope gets written *at* the previous location.
+    depth--;
+    pj.annotate_previous_loc(pj.containing_scope_offset[depth], pj.get_current_loc());
+    pj.write_tape(pj.containing_scope_offset[depth], 'r');
+  }
+
+  WARN_UNUSED really_inline bool parse_string() {
+    return !really_parse_string(buf, len, pj, depth, idx);
+  }
+
+  WARN_UNUSED really_inline bool parse_number(const uint8_t *copy, uint32_t offset, bool found_minus) {
+    return !really_parse_number(copy, pj, offset, found_minus);
+  }
+  WARN_UNUSED really_inline bool parse_number(bool found_minus) {
+    return parse_number(buf, idx, found_minus);
+  }
+
+  WARN_UNUSED really_inline bool parse_atom(const uint8_t *copy, uint32_t offset) {
+    switch (c) {
+      case 't':
+        if (!is_valid_true_atom(copy + offset)) { return true; };
+        break;
+      case 'f':
+        if (!is_valid_false_atom(copy + offset)) { return true; }
+        break;
+      case 'n':
+        if (!is_valid_null_atom(copy + offset)) { return true; }
+        break;
+      default:
+        return false;
    }
+    pj.write_tape(0, c);
+    return false;
+  }
+
+  WARN_UNUSED really_inline bool parse_atom() {
+    return parse_atom(buf, idx);
+  }
+
+  WARN_UNUSED really_inline ret_address parse_value(const unified_machine_addresses &addresses, ret_address continue_state) {
+    switch (c) {
+    case '"':
+      FAIL_IF( parse_string() );
+      return continue_state;
+    case 't': case 'f': case 'n':
+      FAIL_IF( parse_atom() );
+      return continue_state;
+    case '0': case '1': case '2': case '3': case '4':
+    case '5': case '6': case '7': case '8': case '9':
+      FAIL_IF( parse_number(false) );
+      return continue_state;
+    case '-':
+      FAIL_IF( parse_number(true) );
+      return continue_state;
+    case '{':
+      FAIL_IF( push_scope(continue_state) );
+      return addresses.object_begin;
+    case '[':
+      FAIL_IF( push_scope(continue_state) );
+      return addresses.array_begin;
+    default:
+      return addresses.error;
+    }
+  }
+
+  WARN_UNUSED really_inline int finish() {
+    // the string might not be NULL terminated.
+    if ( i + 1 != pj.n_structural_indexes ) {
+      return set_error_code(TAPE_ERROR);
+    }
+    pop_root_scope();
+    if (depth != 0) {
+      return set_error_code(TAPE_ERROR);
+    }
+    if (pj.containing_scope_offset[depth] != 0) {
+      return set_error_code(TAPE_ERROR);
+    }
+
+    pj.valid = true;
+    return set_error_code(SUCCESS);
+  }
+
+  WARN_UNUSED really_inline int error() {
+    /* We do not need the next line because this is done by pj.init(),
+    * pessimistically.
+    * pj.is_valid  = false;
+    * At this point in the code, we have all the time in the world.
+    * Note that we know exactly where we are in the document so we could,
+    * without any overhead on the processing code, report a specific
+    * location.
+    * We could even trigger special code paths to assess what happened
+    * carefully,
+    * all without any added cost. */
+    if (depth >= pj.depth_capacity) {
+      return set_error_code(DEPTH_ERROR);
+    }
+    switch (c) {
+    case '"':
+      return set_error_code(STRING_ERROR);
+    case '0':
+    case '1':
+    case '2':
+    case '3':
+    case '4':
+    case '5':
+    case '6':
+    case '7':
+    case '8':
+    case '9':
+    case '-':
+      return set_error_code(NUMBER_ERROR);
+    case 't':
+      return set_error_code(T_ATOM_ERROR);
+    case 'n':
+      return set_error_code(N_ATOM_ERROR);
+    case 'f':
+      return set_error_code(F_ATOM_ERROR);
+    default:
+      return set_error_code(TAPE_ERROR);
+    }
+  }
+
+  WARN_UNUSED really_inline int start(ret_address finish_state) {
+    pj.init(); // sets is_valid to false
+    if (len > pj.byte_capacity) {
+      return CAPACITY;
+    }
+    // Advance to the first character as soon as possible
+    advance_char();
+    // Push the root scope (there is always at least one scope)
+    if (push_start_scope(finish_state, 'r')) {
+      return DEPTH_ERROR;
+    }
+    return SUCCESS;
+  }
+};
+
+// Redefine FAIL_IF to use goto since it'll be used inside the function now
+#undef FAIL_IF
+#define FAIL_IF(EXPR) { if (EXPR) { goto error; } }
+
+/************
+ * The JSON is parsed to a tape, see the accompanying tape.md file
+ * for documentation.
+ ***********/
+WARN_UNUSED  int
+unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
+  static constexpr unified_machine_addresses addresses = INIT_ADDRESSES();
+  structural_parser parser(buf, len, pj);
+  int result = parser.start(addresses.finish);
+  if (result) { return result; }
+
+  //
+  // Read first value
+  //
+  switch (parser.c) {
+  case '{':
+    FAIL_IF( parser.push_start_scope(addresses.finish) );
+    goto object_begin;
+  case '[':
+    FAIL_IF( parser.push_start_scope(addresses.finish) );
+    goto array_begin;
+  case '"':
+    FAIL_IF( parser.parse_string() );
+    goto finish;
+  case 't': case 'f': case 'n':
+    FAIL_IF(
+      parser.with_space_terminated_copy([&](auto copy, auto idx) {
+        return parser.parse_atom(copy, idx);
+      })
+    );
+    goto finish;
+  case '0': case '1': case '2': case '3': case '4':
+  case '5': case '6': case '7': case '8': case '9':
+    FAIL_IF(
+      parser.with_space_terminated_copy([&](auto copy, auto idx) {
+        return parser.parse_number(copy, idx, false);
+      })
+    );
+    goto finish;
+  case '-':
+    FAIL_IF(
+      parser.with_space_terminated_copy([&](auto copy, auto idx) {
+        return parser.parse_number(copy, idx, true);
+      })
+    );
+    goto finish;
+  default:
+    goto error;
+  }
+
+//
+// Object parser states
+//
+object_begin:
+  parser.advance_char();
+  switch (parser.c) {
+  case '"': {
+    FAIL_IF( parser.parse_string() );
    goto object_key_state;
  }
  case '}':
-    goto scope_end; /* could also go to object_continue */
+    goto scope_end; // could also go to object_continue
  default:
-    goto fail;
+    goto error;
  }

 object_key_state:
-  UPDATE_CHAR();
-  if (c != ':') {
-    goto fail;
-  }
-  UPDATE_CHAR();
-  switch (c) {
-  case '"': {
-    if (!parse_string(buf, len, pj, depth, idx)) {
-      goto fail;
-    }
-    break;
-  }
-  case 't':
-    if (!is_valid_true_atom(buf + idx)) {
-      goto fail;
-    }
-    pj.write_tape(0, c);
-    break;
-  case 'f':
-    if (!is_valid_false_atom(buf + idx)) {
-      goto fail;
-    }
-    pj.write_tape(0, c);
-    break;
-  case 'n':
-    if (!is_valid_null_atom(buf + idx)) {
-      goto fail;
-    }
-    pj.write_tape(0, c);
-    break;
-  case '0':
-  case '1':
-  case '2':
-  case '3':
-  case '4':
-  case '5':
-  case '6':
-  case '7':
-  case '8':
-  case '9': {
-    if (!parse_number(buf, pj, idx, false)) {
-      goto fail;
-    }
-    break;
-  }
-  case '-': {
-    if (!parse_number(buf, pj, idx, true)) {
-      goto fail;
-    }
-    break;
-  }
-  case '{': {
-    pj.containing_scope_offset[depth] = pj.get_current_loc();
-    pj.write_tape(0, c); /* here the compilers knows what c is so this gets
-                            optimized */
-    /* we have not yet encountered } so we need to come back for it */
-    SET_GOTO_OBJECT_CONTINUE()
-    /* we found an object inside an object, so we need to increment the
-     * depth                                                             */
-    depth++;
-    if (depth >= pj.depth_capacity) {
-      goto fail;
-    }
+  FAIL_IF( parser.advance_char() != ':' );

-    goto object_begin;
-  }
-  case '[': {
-    pj.containing_scope_offset[depth] = pj.get_current_loc();
-    pj.write_tape(0, c); /* here the compilers knows what c is so this gets
-                            optimized */
-    /* we have not yet encountered } so we need to come back for it */
-    SET_GOTO_OBJECT_CONTINUE()
-    /* we found an array inside an object, so we need to increment the depth
-     */
-    depth++;
-    if (depth >= pj.depth_capacity) {
-      goto fail;
-    }
-    goto array_begin;
-  }
-  default:
-    goto fail;
-  }
+  parser.advance_char();
+  GOTO( parser.parse_value(addresses, addresses.object_continue) );

 object_continue:
-  UPDATE_CHAR();
-  switch (c) {
+  switch (parser.advance_char()) {
  case ',':
-    UPDATE_CHAR();
-    if (c != '"') {
-      goto fail;
-    } else {
-      if (!parse_string(buf, len, pj, depth, idx)) {
-        goto fail;
-      }
-      goto object_key_state;
-    }
+    FAIL_IF( parser.advance_char() != '"' );
+    FAIL_IF( parser.parse_string() );
+    goto object_key_state;
  case '}':
    goto scope_end;
  default:
-    goto fail;
+    goto error;
  }

-  /*//////////////////////////// COMMON STATE ///////////////////////////*/
-
 scope_end:
-  /* write our tape location to the header scope */
-  depth--;
-  pj.write_tape(pj.containing_scope_offset[depth], c);
-  pj.annotate_previous_loc(pj.containing_scope_offset[depth],
-                           pj.get_current_loc());
-  /* goto saved_state */
-  GOTO_CONTINUE()
+  CONTINUE( parser.pop_scope() );

-  /*//////////////////////////// ARRAY STATES ///////////////////////////*/
+//
+// Array parser states
+//
 array_begin:
-  UPDATE_CHAR();
-  if (c == ']') {
-    goto scope_end; /* could also go to array_continue */
+  if (parser.advance_char() == ']') {
+    goto scope_end; // could also go to array_continue
  }

 main_array_switch:
-  /* we call update char on all paths in, so we can peek at c on the
+  /* we call update char on all paths in, so we can peek at parser.c on the
   * on paths that can accept a close square brace (post-, and at start) */
-  switch (c) {
-  case '"': {
-    if (!parse_string(buf, len, pj, depth, idx)) {
-      goto fail;
-    }
-    break;
-  }
-  case 't':
-    if (!is_valid_true_atom(buf + idx)) {
-      goto fail;
-    }
-    pj.write_tape(0, c);
-    break;
-  case 'f':
-    if (!is_valid_false_atom(buf + idx)) {
-      goto fail;
-    }
-    pj.write_tape(0, c);
-    break;
-  case 'n':
-    if (!is_valid_null_atom(buf + idx)) {
-      goto fail;
-    }
-    pj.write_tape(0, c);
-    break; /* goto array_continue; */
-
-  case '0':
-  case '1':
-  case '2':
-  case '3':
-  case '4':
-  case '5':
-  case '6':
-  case '7':
-  case '8':
-  case '9': {
-    if (!parse_number(buf, pj, idx, false)) {
-      goto fail;
-    }
-    break; /* goto array_continue; */
-  }
-  case '-': {
-    if (!parse_number(buf, pj, idx, true)) {
-      goto fail;
-    }
-    break; /* goto array_continue; */
-  }
-  case '{': {
-    /* we have not yet encountered ] so we need to come back for it */
-    pj.containing_scope_offset[depth] = pj.get_current_loc();
-    pj.write_tape(0, c); /* here the compilers knows what c is so this gets
-                            optimized */
-    SET_GOTO_ARRAY_CONTINUE()
-    /* we found an object inside an array, so we need to increment the depth
-     */
-    depth++;
-    if (depth >= pj.depth_capacity) {
-      goto fail;
-    }
-
-    goto object_begin;
-  }
-  case '[': {
-    /* we have not yet encountered ] so we need to come back for it */
-    pj.containing_scope_offset[depth] = pj.get_current_loc();
-    pj.write_tape(0, c); /* here the compilers knows what c is so this gets
-                            optimized */
-    SET_GOTO_ARRAY_CONTINUE()
-    /* we found an array inside an array, so we need to increment the depth
-     */
-    depth++;
-    if (depth >= pj.depth_capacity) {
-      goto fail;
-    }
-    goto array_begin;
-  }
-  default:
-    goto fail;
-  }
+  GOTO( parser.parse_value(addresses, addresses.array_continue) );

 array_continue:
-  UPDATE_CHAR();
-  switch (c) {
+  switch (parser.advance_char()) {
  case ',':
-    UPDATE_CHAR();
+    parser.advance_char();
    goto main_array_switch;
  case ']':
    goto scope_end;
  default:
-    goto fail;
+    goto error;
  }

-  /*//////////////////////////// FINAL STATES ///////////////////////////*/
+finish:
+  return parser.finish();

-succeed:
-  depth--;
-  if (depth != 0) {
-    fprintf(stderr, "internal bug\n");
-    abort();
-  }
-  if (pj.containing_scope_offset[depth] != 0) {
-    fprintf(stderr, "internal bug\n");
-    abort();
-  }
-  pj.annotate_previous_loc(pj.containing_scope_offset[depth],
-                           pj.get_current_loc());
-  pj.write_tape(pj.containing_scope_offset[depth], 'r'); /* r is root */
-
-  pj.valid = true;
-  pj.error_code = simdjson::SUCCESS;
-  return pj.error_code;
-fail:
-  /* we do not need the next line because this is done by pj.init(),
-   * pessimistically.
-   * pj.is_valid  = false;
-   * At this point in the code, we have all the time in the world.
-   * Note that we know exactly where we are in the document so we could,
-   * without any overhead on the processing code, report a specific
-   * location.
-   * We could even trigger special code paths to assess what happened
-   * carefully,
-   * all without any added cost. */
-  if (depth >= pj.depth_capacity) {
-    pj.error_code = simdjson::DEPTH_ERROR;
-    return pj.error_code;
-  }
-  switch (c) {
-  case '"':
-    pj.error_code = simdjson::STRING_ERROR;
-    return pj.error_code;
-  case '0':
-  case '1':
-  case '2':
-  case '3':
-  case '4':
-  case '5':
-  case '6':
-  case '7':
-  case '8':
-  case '9':
-  case '-':
-    pj.error_code = simdjson::NUMBER_ERROR;
-    return pj.error_code;
-  case 't':
-    pj.error_code = simdjson::T_ATOM_ERROR;
-    return pj.error_code;
-  case 'n':
-    pj.error_code = simdjson::N_ATOM_ERROR;
-    return pj.error_code;
-  case 'f':
-    pj.error_code = simdjson::F_ATOM_ERROR;
-    return pj.error_code;
-  default:
-    break;
-  }
-  pj.error_code = simdjson::TAPE_ERROR;
-  return pj.error_code;
+error:
+  return parser.error();
 }
+
+} // namespace stage2
--- a/src/generic/stage2_streaming_build_tape.h
+++ b/src/generic/stage2_streaming_build_tape.h
@ -1,497 +1,161 @@
+namespace stage2 {
+
+struct streaming_structural_parser: structural_parser {
+  really_inline streaming_structural_parser(const uint8_t *_buf, size_t _len, ParsedJson &_pj, size_t _i) : structural_parser(_buf, _len, _pj, _i) {}
+
+  // override to add streaming
+  WARN_UNUSED really_inline int start(ret_address finish_parser) {
+    pj.init(); // sets is_valid to false
+    // Capacity ain't no thang for streaming, so we don't check it.
+    // Advance to the first character as soon as possible
+    advance_char();
+    // Push the root scope (there is always at least one scope)
+    if (push_start_scope(finish_parser, 'r')) {
+      return DEPTH_ERROR;
+    }
+    return SUCCESS;
+  }
+
+  // override to add streaming
+  WARN_UNUSED really_inline int finish() {
+    /* the string might not be NULL terminated. */
+    if ( i + 1 > pj.n_structural_indexes ) {
+      return set_error_code(TAPE_ERROR);
+    }
+    bool finished = i + 1 == pj.n_structural_indexes;
+    if (finished && buf[idx+2] != '\0') {
+      return set_error_code(TAPE_ERROR);
+    }
+    pop_root_scope();
+    if (depth != 0) {
+      return set_error_code(TAPE_ERROR);
+    }
+    if (pj.containing_scope_offset[depth] != 0) {
+      return set_error_code(TAPE_ERROR);
+    }
+
+    pj.valid = true;
+    return set_error_code(finished ? SUCCESS : SUCCESS_AND_HAS_MORE);
+  }
+};
+
 /************
 * The JSON is parsed to a tape, see the accompanying tape.md file
 * for documentation.
 ***********/
 WARN_UNUSED  int
 unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj, size_t &next_json) {
-    size_t i{next_json}; /* index of the structural character (0,1,2,3...) */
-    size_t idx; /* location of the structural character in the input (buf)   */
-    uint8_t c;    /* used to track the (structural) character we are looking at,
-                   updated */
-    /* by UPDATE_CHAR macro */
-    size_t depth = 0; /* could have an arbitrary starting depth */
-    pj.init();          /* sets is_valid to false          */
-    /*//////////////////////////// START STATE /////////////////////////////
-     */
-    SET_GOTO_START_CONTINUE()
-    pj.containing_scope_offset[depth] = pj.get_current_loc();
-    pj.write_tape(0, 'r'); /* r for root, 0 is going to get overwritten */
-    /* the root is used, if nothing else, to capture the size of the tape */
-    depth++; /* everything starts at depth = 1, depth = 0 is just for the
-              root, the root may contain an object, an array or something
-              else. */
-    if (depth >= pj.depth_capacity) {
-        goto fail;
-    }
+  static constexpr unified_machine_addresses addresses = INIT_ADDRESSES();
+  streaming_structural_parser parser(buf, len, pj, next_json);
+  int result = parser.start(addresses.finish);
+  if (result) { return result; }

-    UPDATE_CHAR();
-    switch (c) {
-        case '{':
-            pj.containing_scope_offset[depth] = pj.get_current_loc();
-            SET_GOTO_START_CONTINUE();
-            depth++;
-            if (depth >= pj.depth_capacity) {
-                goto fail;
-            }
-            pj.write_tape(
-                    0, c); /* strangely, moving this to object_begin slows things down */
-            goto object_begin;
-        case '[':
-            pj.containing_scope_offset[depth] = pj.get_current_loc();
-            SET_GOTO_START_CONTINUE();
-            depth++;
-            if (depth >= pj.depth_capacity) {
-                goto fail;
-            }
-            pj.write_tape(0, c);
-            goto array_begin;
-            /* #define SIMDJSON_ALLOWANYTHINGINROOT
-             * A JSON text is a serialized value.  Note that certain previous
-             * specifications of JSON constrained a JSON text to be an object or an
-             * array.  Implementations that generate only objects or arrays where a
-             * JSON text is called for will be interoperable in the sense that all
-             * implementations will accept these as conforming JSON texts.
-             * https://tools.ietf.org/html/rfc8259
-             * #ifdef SIMDJSON_ALLOWANYTHINGINROOT */
-        case '"': {
-            if (!parse_string(buf, len, pj, depth, idx)) {
-                goto fail;
-            }
-            break;
-        }
-        case 't': {
-            /* we need to make a copy to make sure that the string is space
-             * terminated.
-             * this only applies to the JSON document made solely of the true value.
-             * this will almost never be called in practice */
-            char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
-            if (copy == nullptr) {
-                goto fail;
-            }
-            memcpy(copy, buf, len);
-            copy[len] = ' ';
-            if (!is_valid_true_atom(reinterpret_cast<const uint8_t *>(copy) + idx)) {
-                free(copy);
-                goto fail;
-            }
-            free(copy);
-            pj.write_tape(0, c);
-            break;
-        }
-        case 'f': {
-            /* we need to make a copy to make sure that the string is space
-             * terminated.
-             * this only applies to the JSON document made solely of the false
-             * value.
-             * this will almost never be called in practice */
-            char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
-            if (copy == nullptr) {
-                goto fail;
-            }
-            memcpy(copy, buf, len);
-            copy[len] = ' ';
-            if (!is_valid_false_atom(reinterpret_cast<const uint8_t *>(copy) + idx)) {
-                free(copy);
-                goto fail;
-            }
-            free(copy);
-            pj.write_tape(0, c);
-            break;
-        }
-        case 'n': {
-            /* we need to make a copy to make sure that the string is space
-             * terminated.
-             * this only applies to the JSON document made solely of the null value.
-             * this will almost never be called in practice */
-            char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
-            if (copy == nullptr) {
-                goto fail;
-            }
-            memcpy(copy, buf, len);
-            copy[len] = ' ';
-            if (!is_valid_null_atom(reinterpret_cast<const uint8_t *>(copy) + idx)) {
-                free(copy);
-                goto fail;
-            }
-            free(copy);
-            pj.write_tape(0, c);
-            break;
-        }
-        case '0':
-        case '1':
-        case '2':
-        case '3':
-        case '4':
-        case '5':
-        case '6':
-        case '7':
-        case '8':
-        case '9': {
-            /* we need to make a copy to make sure that the string is space
-             * terminated.
-             * this is done only for JSON documents made of a sole number
-             * this will almost never be called in practice. We terminate with a
-             * space
-             * because we do not want to allow NULLs in the middle of a number
-             * (whereas a
-             * space in the middle of a number would be identified in stage 1). */
-            char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
-            if (copy == nullptr) {
-                goto fail;
-            }
-            memcpy(copy, buf, len);
-            copy[len] = ' ';
-            if (!parse_number(reinterpret_cast<const uint8_t *>(copy), pj, idx,
-                              false)) {
-                free(copy);
-                goto fail;
-            }
-            free(copy);
-            break;
-        }
-        case '-': {
-            /* we need to make a copy to make sure that the string is NULL
-             * terminated.
-             * this is done only for JSON documents made of a sole number
-             * this will almost never be called in practice */
-            char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
-            if (copy == nullptr) {
-                goto fail;
-            }
-            memcpy(copy, buf, len);
-            copy[len] = ' ';
-            if (!parse_number(reinterpret_cast<const uint8_t *>(copy), pj, idx, true)) {
-                free(copy);
-                goto fail;
-            }
-            free(copy);
-            break;
-        }
-        default:
-            goto fail;
-    }
-    start_continue:
-    /* the string might not be NULL terminated. */
-    if (i + 1 == pj.n_structural_indexes && buf[idx+2] == '\0') {
-        goto succeed;
-    } else if(depth == 1 && i<=pj.n_structural_indexes) {
-        goto succeedAndHasMore;
-    } else {
-        goto fail;
-    }
-    /*//////////////////////////// OBJECT STATES ///////////////////////////*/
+  //
+  // Read first value
+  //
+  switch (parser.c) {
+  case '{':
+    FAIL_IF( parser.push_start_scope(addresses.finish) );
+    goto object_begin;
+  case '[':
+    FAIL_IF( parser.push_start_scope(addresses.finish) );
+    goto array_begin;
+  case '"':
+    FAIL_IF( parser.parse_string() );
+    goto finish;
+  case 't': case 'f': case 'n':
+    FAIL_IF(
+      parser.with_space_terminated_copy([&](auto copy, auto idx) {
+        return parser.parse_atom(copy, idx);
+      })
+    );
+    goto finish;
+  case '0': case '1': case '2': case '3': case '4':
+  case '5': case '6': case '7': case '8': case '9':
+    FAIL_IF(
+      parser.with_space_terminated_copy([&](auto copy, auto idx) {
+        return parser.parse_number(copy, idx, false);
+      })
+    );
+    goto finish;
+  case '-':
+    FAIL_IF(
+      parser.with_space_terminated_copy([&](auto copy, auto idx) {
+        return parser.parse_number(copy, idx, true);
+      })
+    );
+    goto finish;
+  default:
+    goto error;
+  }

-    object_begin:
-    UPDATE_CHAR();
-    switch (c) {
-        case '"': {
-            if (!parse_string(buf, len, pj, depth, idx)) {
-                goto fail;
-            }
-            goto object_key_state;
-        }
-        case '}':
-            goto scope_end; /* could also go to object_continue */
-        default:
-            goto fail;
-    }
+//
+// Object parser parsers
+//
+object_begin:
+  parser.advance_char();
+  switch (parser.c) {
+  case '"': {
+    FAIL_IF( parser.parse_string() );
+    goto object_key_parser;
+  }
+  case '}':
+    goto scope_end; // could also go to object_continue
+  default:
+    goto error;
+  }

-    object_key_state:
-    UPDATE_CHAR();
-    if (c != ':') {
-        goto fail;
-    }
-    UPDATE_CHAR();
-    switch (c) {
-        case '"': {
-            if (!parse_string(buf, len, pj, depth, idx)) {
-                goto fail;
-            }
-            break;
-        }
-        case 't':
-            if (!is_valid_true_atom(buf + idx)) {
-                goto fail;
-            }
-            pj.write_tape(0, c);
-            break;
-        case 'f':
-            if (!is_valid_false_atom(buf + idx)) {
-                goto fail;
-            }
-            pj.write_tape(0, c);
-            break;
-        case 'n':
-            if (!is_valid_null_atom(buf + idx)) {
-                goto fail;
-            }
-            pj.write_tape(0, c);
-            break;
-        case '0':
-        case '1':
-        case '2':
-        case '3':
-        case '4':
-        case '5':
-        case '6':
-        case '7':
-        case '8':
-        case '9': {
-            if (!parse_number(buf, pj, idx, false)) {
-                goto fail;
-            }
-            break;
-        }
-        case '-': {
-            if (!parse_number(buf, pj, idx, true)) {
-                goto fail;
-            }
-            break;
-        }
-        case '{': {
-            pj.containing_scope_offset[depth] = pj.get_current_loc();
-            pj.write_tape(0, c); /* here the compilers knows what c is so this gets
-                            optimized */
-            /* we have not yet encountered } so we need to come back for it */
-            SET_GOTO_OBJECT_CONTINUE()
-            /* we found an object inside an object, so we need to increment the
-             * depth                                                             */
-            depth++;
-            if (depth >= pj.depth_capacity) {
-                goto fail;
-            }
+object_key_parser:
+  FAIL_IF( parser.advance_char() != ':' );

-            goto object_begin;
-        }
-        case '[': {
-            pj.containing_scope_offset[depth] = pj.get_current_loc();
-            pj.write_tape(0, c); /* here the compilers knows what c is so this gets
-                            optimized */
-            /* we have not yet encountered } so we need to come back for it */
-            SET_GOTO_OBJECT_CONTINUE()
-            /* we found an array inside an object, so we need to increment the depth
-             */
-            depth++;
-            if (depth >= pj.depth_capacity) {
-                goto fail;
-            }
-            goto array_begin;
-        }
-        default:
-            goto fail;
-    }
+  parser.advance_char();
+  GOTO( parser.parse_value(addresses, addresses.object_continue) );

-    object_continue:
-    UPDATE_CHAR();
-    switch (c) {
-        case ',':
-        UPDATE_CHAR();
-            if (c != '"') {
-                goto fail;
-            } else {
-                if (!parse_string(buf, len, pj, depth, idx)) {
-                    goto fail;
-                }
-                goto object_key_state;
-            }
-        case '}':
-            goto scope_end;
-        default:
-            goto fail;
-    }
+object_continue:
+  switch (parser.advance_char()) {
+  case ',':
+    FAIL_IF( parser.advance_char() != '"' );
+    FAIL_IF( parser.parse_string() );
+    goto object_key_parser;
+  case '}':
+    goto scope_end;
+  default:
+    goto error;
+  }

-    /*//////////////////////////// COMMON STATE ///////////////////////////*/
+scope_end:
+  CONTINUE( parser.pop_scope() );

-    scope_end:
-    /* write our tape location to the header scope */
-    depth--;
-    pj.write_tape(pj.containing_scope_offset[depth], c);
-    pj.annotate_previous_loc(pj.containing_scope_offset[depth],
-                             pj.get_current_loc());
-    /* goto saved_state */
-    GOTO_CONTINUE()
+//
+// Array parser parsers
+//
+array_begin:
+  if (parser.advance_char() == ']') {
+    goto scope_end; // could also go to array_continue
+  }

-    /*//////////////////////////// ARRAY STATES ///////////////////////////*/
-    array_begin:
-    UPDATE_CHAR();
-    if (c == ']') {
-        goto scope_end; /* could also go to array_continue */
-    }
+main_array_switch:
+  /* we call update char on all paths in, so we can peek at parser.c on the
+   * on paths that can accept a close square brace (post-, and at start) */
+  GOTO( parser.parse_value(addresses, addresses.array_continue) );

-    main_array_switch:
-    /* we call update char on all paths in, so we can peek at c on the
-     * on paths that can accept a close square brace (post-, and at start) */
-    switch (c) {
-        case '"': {
-            if (!parse_string(buf, len, pj, depth, idx)) {
-                goto fail;
-            }
-            break;
-        }
-        case 't':
-            if (!is_valid_true_atom(buf + idx)) {
-                goto fail;
-            }
-            pj.write_tape(0, c);
-            break;
-        case 'f':
-            if (!is_valid_false_atom(buf + idx)) {
-                goto fail;
-            }
-            pj.write_tape(0, c);
-            break;
-        case 'n':
-            if (!is_valid_null_atom(buf + idx)) {
-                goto fail;
-            }
-            pj.write_tape(0, c);
-            break; /* goto array_continue; */
+array_continue:
+  switch (parser.advance_char()) {
+  case ',':
+    parser.advance_char();
+    goto main_array_switch;
+  case ']':
+    goto scope_end;
+  default:
+    goto error;
+  }

-        case '0':
-        case '1':
-        case '2':
-        case '3':
-        case '4':
-        case '5':
-        case '6':
-        case '7':
-        case '8':
-        case '9': {
-            if (!parse_number(buf, pj, idx, false)) {
-                goto fail;
-            }
-            break; /* goto array_continue; */
-        }
-        case '-': {
-            if (!parse_number(buf, pj, idx, true)) {
-                goto fail;
-            }
-            break; /* goto array_continue; */
-        }
-        case '{': {
-            /* we have not yet encountered ] so we need to come back for it */
-            pj.containing_scope_offset[depth] = pj.get_current_loc();
-            pj.write_tape(0, c); /* here the compilers knows what c is so this gets
-                            optimized */
-            SET_GOTO_ARRAY_CONTINUE()
-            /* we found an object inside an array, so we need to increment the depth
-             */
-            depth++;
-            if (depth >= pj.depth_capacity) {
-                goto fail;
-            }
+finish:
+  next_json = parser.i;
+  return parser.finish();

-            goto object_begin;
-        }
-        case '[': {
-            /* we have not yet encountered ] so we need to come back for it */
-            pj.containing_scope_offset[depth] = pj.get_current_loc();
-            pj.write_tape(0, c); /* here the compilers knows what c is so this gets
-                            optimized */
-            SET_GOTO_ARRAY_CONTINUE()
-            /* we found an array inside an array, so we need to increment the depth
-             */
-            depth++;
-            if (depth >= pj.depth_capacity) {
-                goto fail;
-            }
-            goto array_begin;
-        }
-        default:
-            goto fail;
-    }
-
-    array_continue:
-    UPDATE_CHAR();
-    switch (c) {
-        case ',':
-        UPDATE_CHAR();
-            goto main_array_switch;
-        case ']':
-            goto scope_end;
-        default:
-            goto fail;
-    }
-
-    /*//////////////////////////// FINAL STATES ///////////////////////////*/
-    succeedAndHasMore:
-        depth--;
-        if (pj.containing_scope_offset[depth] != 0) {
-            fprintf(stderr, "internal bug\n");
-            abort();
-        }
-        pj.annotate_previous_loc(pj.containing_scope_offset[depth],
-                                 pj.get_current_loc());
-        pj.write_tape(pj.containing_scope_offset[depth], 'r'); /* r is root */
-
-
-        next_json = i;
-
-        pj.valid = true;
-        pj.error_code = simdjson::SUCCESS_AND_HAS_MORE;
-        return pj.error_code;
-
-    succeed:
-    depth--;
-    if (depth != 0) {
-        fprintf(stderr, "internal bug\n");
-        abort();
-    }
-    if (pj.containing_scope_offset[depth] != 0) {
-        fprintf(stderr, "internal bug\n");
-        abort();
-    }
-    pj.annotate_previous_loc(pj.containing_scope_offset[depth],
-                             pj.get_current_loc());
-    pj.write_tape(pj.containing_scope_offset[depth], 'r'); /* r is root */
-
-    pj.valid = true;
-    pj.error_code = simdjson::SUCCESS;
-    return pj.error_code;
-    fail:
-    /* we do not need the next line because this is done by pj.init(),
-     * pessimistically.
-     * pj.is_valid  = false;
-     * At this point in the code, we have all the time in the world.
-     * Note that we know exactly where we are in the document so we could,
-     * without any overhead on the processing code, report a specific
-     * location.
-     * We could even trigger special code paths to assess what happened
-     * carefully,
-     * all without any added cost. */
-    if (depth >= pj.depth_capacity) {
-        pj.error_code = simdjson::DEPTH_ERROR;
-        return pj.error_code;
-    }
-    switch (c) {
-        case '"':
-            pj.error_code = simdjson::STRING_ERROR;
-            return pj.error_code;
-        case '0':
-        case '1':
-        case '2':
-        case '3':
-        case '4':
-        case '5':
-        case '6':
-        case '7':
-        case '8':
-        case '9':
-        case '-':
-            pj.error_code = simdjson::NUMBER_ERROR;
-            return pj.error_code;
-        case 't':
-            pj.error_code = simdjson::T_ATOM_ERROR;
-            return pj.error_code;
-        case 'n':
-            pj.error_code = simdjson::N_ATOM_ERROR;
-            return pj.error_code;
-        case 'f':
-            pj.error_code = simdjson::F_ATOM_ERROR;
-            return pj.error_code;
-        default:
-            break;
-    }
-    pj.error_code = simdjson::TAPE_ERROR;
-    return pj.error_code;
+error:
+  return parser.error();
 }
+
+} // namespace stage2
--- a/src/haswell/stage2_build_tape.h
+++ b/src/haswell/stage2_build_tape.h
@ -24,13 +24,13 @@ namespace simdjson {
 template <>
 WARN_UNUSED int
 unified_machine<Architecture::HASWELL>(const uint8_t *buf, size_t len, ParsedJson &pj) {
-  return haswell::unified_machine(buf, len, pj);
+  return haswell::stage2::unified_machine(buf, len, pj);
 }

 template <>
 WARN_UNUSED int
 unified_machine<Architecture::HASWELL>(const uint8_t *buf, size_t len, ParsedJson &pj, size_t &next_json) {
-    return haswell::unified_machine(buf, len, pj, next_json);
+  return haswell::stage2::unified_machine(buf, len, pj, next_json);
 }

 } // namespace simdjson
--- a/src/westmere/stage2_build_tape.h
+++ b/src/westmere/stage2_build_tape.h
@ -24,13 +24,13 @@ namespace simdjson {
 template <>
 WARN_UNUSED int
 unified_machine<Architecture::WESTMERE>(const uint8_t *buf, size_t len, ParsedJson &pj) {
-  return westmere::unified_machine(buf, len, pj);
+  return westmere::stage2::unified_machine(buf, len, pj);
 }

 template <>
 WARN_UNUSED int
 unified_machine<Architecture::WESTMERE>(const uint8_t *buf, size_t len, ParsedJson &pj, size_t &next_json) {
-    return westmere::unified_machine(buf, len, pj, next_json);
+    return westmere::stage2::unified_machine(buf, len, pj, next_json);
 }