Make valstat-ish parse APIs

2020-02-13 13:30:12 -08:00 · 2020-02-13 13:30:12 -08:00 · 1f76737510
parent bc8bc7d1a8
commit 1f76737510
16 changed files with 687 additions and 767 deletions
--- a/8
+++ b/8
@ -70,10 +70,10 @@ LIBHEADERS_HASWELL=  src/haswell/bitmanipulation.h  src/haswell/bitmask.h  src/h
 LIBHEADERS_WESTMERE=src/westmere/bitmanipulation.h src/westmere/bitmask.h src/westmere/intrinsics.h src/westmere/numberparsing.h src/westmere/simd.h src/westmere/stage1_find_marks.h src/westmere/stage2_build_tape.h src/westmere/stringparsing.h
 LIBHEADERS=src/jsoncharutils.h src/simdprune_tables.h $(LIBHEADERS_GENERIC) $(LIBHEADERS_ARM64) $(LIBHEADERS_HASWELL) $(LIBHEADERS_WESTMERE)

-PUBHEADERS=include/simdjson/common_defs.h include/simdjson/isadetection.h include/simdjson/jsonformatutils.h include/simdjson/jsonioutil.h include/simdjson/jsonminifier.h include/simdjson/jsonparser.h include/simdjson/padded_string.h include/simdjson/document.h include/simdjson/document/iterator.h include/simdjson/document/parser.h include/simdjson/parsedjson.h include/simdjson/jsonstream.h include/simdjson/portability.h include/simdjson/architecture.h include/simdjson/error.h include/simdjson/simdjson.h include/simdjson/simdjson_version.h include/simdjson/stage1_find_marks.h include/simdjson/stage2_build_tape.h
+PUBHEADERS=include/simdjson/common_defs.h include/simdjson/isadetection.h include/simdjson/jsonformatutils.h include/simdjson/jsonioutil.h include/simdjson/jsonminifier.h include/simdjson/jsonparser.h include/simdjson/padded_string.h include/simdjson/document.h include/simdjson/document_iterator.h include/simdjson/document_parser.h include/simdjson/parsedjson.h include/simdjson/jsonstream.h include/simdjson/portability.h include/simdjson/architecture.h include/simdjson/error.h include/simdjson/simdjson.h include/simdjson/simdjson_version.h include/simdjson/stage1_find_marks.h include/simdjson/stage2_build_tape.h
 HEADERS=$(PUBHEADERS) $(LIBHEADERS)

-LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/error.cpp src/stage1_find_marks.cpp src/stage2_build_tape.cpp src/document.cpp src/document/parser.cpp
+LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/error.cpp src/stage1_find_marks.cpp src/stage2_build_tape.cpp src/document.cpp src/document_parser.cpp
 MINIFIERHEADERS=include/simdjson/jsonminifier.h
 MINIFIERLIBFILES=src/jsonminifier.cpp

@ -205,7 +205,7 @@ basictests:tests/basictests.cpp $(HEADERS) $(LIBFILES)


 numberparsingcheck:tests/numberparsingcheck.cpp $(HEADERS) $(LIBFILES)
-	$(CXX) $(CXXFLAGS) -o numberparsingcheck src/jsonioutil.cpp src/jsonparser.cpp src/error.cpp src/stage1_find_marks.cpp src/document.cpp src/document/parser.cpp tests/numberparsingcheck.cpp -I. $(LIBFLAGS) -DJSON_TEST_NUMBERS
+	$(CXX) $(CXXFLAGS) -o numberparsingcheck src/jsonioutil.cpp src/jsonparser.cpp src/error.cpp src/stage1_find_marks.cpp src/document.cpp src/document_parser.cpp tests/numberparsingcheck.cpp -I. $(LIBFLAGS) -DJSON_TEST_NUMBERS

 integer_tests:tests/integer_tests.cpp $(HEADERS) $(LIBFILES)
 	$(CXX) $(CXXFLAGS) -o integer_tests $(LIBFILES) tests/integer_tests.cpp -I. $(LIBFLAGS)
@ -213,7 +213,7 @@ integer_tests:tests/integer_tests.cpp $(HEADERS) $(LIBFILES)


 stringparsingcheck:tests/stringparsingcheck.cpp $(HEADERS) $(LIBFILES)
-	$(CXX) $(CXXFLAGS) -o stringparsingcheck src/jsonioutil.cpp src/jsonparser.cpp src/error.cpp src/stage1_find_marks.cpp src/document.cpp src/document/parser.cpp tests/stringparsingcheck.cpp -I. $(LIBFLAGS) -DJSON_TEST_STRINGS
+	$(CXX) $(CXXFLAGS) -o stringparsingcheck src/jsonioutil.cpp src/jsonparser.cpp src/error.cpp src/stage1_find_marks.cpp src/document.cpp src/document_parser.cpp tests/stringparsingcheck.cpp -I. $(LIBFLAGS) -DJSON_TEST_STRINGS

 pointercheck:tests/pointercheck.cpp $(HEADERS) $(LIBFILES)
 	$(CXX) $(CXXFLAGS) -o pointercheck $(LIBFILES) tests/pointercheck.cpp -I. $(LIBFLAGS)
--- a/README.md
+++ b/README.md
@ -121,35 +121,29 @@ Another strategy is to reuse pre-allocated buffers. That is, you avoid reallocat

 The main API involves populating a `ParsedJson` object which hosts a fully navigable document-object-model (DOM) view of the JSON document. The DOM can be accessed using [JSON Pointer](https://tools.ietf.org/html/rfc6901) paths, for example. The main function is `json_parse` which takes a string containing the JSON document as well as a reference to pre-allocated `ParsedJson` object (which can be reused multiple time). Once you have populated the `ParsedJson` object you can navigate through the DOM with an iterator (e.g., created by `ParsedJson::Iterator pjh(pj)`, see 'Navigating the parsed document').

-```C
-#include "simdjson/jsonparser.h"
-using namespace simdjson;
+// Samples:
+// Load a document from a file
+// Read a particular key / value from the document
+// Iterate over an array of things

-/...
-
-const char * filename = ... //
-
-// use whatever means you want to get a string (UTF-8) of your JSON document
-padded_string p = get_corpus(filename);
-ParsedJson pj;
-pj.allocate_capacity(p.size()); // allocate memory for parsing up to p.size() bytes
-const int res = json_parse(p, pj); // do the parsing, return 0 on success
-// parsing is done!
-if (res != 0) {
-    // You can use the "simdjson/simdjson.h" header to access the error message
-    std::cout << "Error parsing:" << simdjson::error_message(res) << std::endl;
+```c++
+#include "simdjson.h"
+auto doc = simdjson::document::load("myfile.json");
+cout << doc;
+for (auto i=doc.begin(); i<doc.end(); i++) {
+  cout << doc[i];
 }
-// the ParsedJson document can be used here
-// pj can be reused with other json_parse calls.
 ```

-It is also possible to use a simpler API if you do not mind having the overhead
+A slightly simpler API is available if you don't mind having the overhead
 of memory allocation with each new JSON document:

 ```C
 #include "simdjson/jsonparser.h"
 using namespace simdjson;

+document doc = document::parse("myfile.json");
+cout << doc;
 /...

 const char * filename = ... //
--- a/amalgamation.sh
+++ b/amalgamation.sh
@ -23,7 +23,7 @@ jsonparser.cpp
 stage1_find_marks.cpp
 stage2_build_tape.cpp
 document.cpp
-document/parser.cpp
+document_parser.cpp
 "

 # order matters
@ -38,8 +38,8 @@ simdjson/padded_string.h
 simdjson/jsonioutil.h
 simdjson/jsonminifier.h
 simdjson/document.h
-simdjson/document/iterator.h
-simdjson/document/parser.h
+simdjson/document_iterator.h
+simdjson/document_parser.h
 simdjson/parsedjson.h
 simdjson/stage1_find_marks.h
 simdjson/stage2_build_tape.h
@ -151,11 +151,11 @@ int main(int argc, char *argv[]) {
  }
  const char * filename = argv[1];
  simdjson::padded_string p = simdjson::get_corpus(filename);
-  simdjson::document doc;
-  if (!simdjson::document::try_parse(p, doc)) { // do the parsing
-    std::cout << "document::try_parse not valid" << std::endl;
+  auto [doc, error] = simdjson::document::parse(p); // do the parsing
+  if (error) {
+    std::cout << "document::parse not valid" << std::endl;
  } else {
-    std::cout << "document::try_parse valid" << std::endl;
+    std::cout << "document::parse valid" << std::endl;
  }
  if(argc == 2) {
    return EXIT_SUCCESS;
--- a/benchmark/distinctuseridcompetition.cpp
+++ b/benchmark/distinctuseridcompetition.cpp
@ -68,8 +68,7 @@ simdjson_compute_stats(const simdjson::padded_string &p) {

 __attribute__((noinline)) bool
 simdjson_just_parse(const simdjson::padded_string &p) {
-  simdjson::document doc;
-  return simdjson::document::try_parse(p, doc) == simdjson::SUCCESS;
+  return simdjson::document::parse(p).error != simdjson::SUCCESS;
 }

 void sajson_traverse(std::vector<int64_t> &answer, const sajson::value &node) {
--- a/include/CMakeLists.txt
+++ b/include/CMakeLists.txt
@ -2,8 +2,8 @@ set(SIMDJSON_INCLUDE_DIR ${PROJECT_SOURCE_DIR}/include)
 set(SIMDJSON_INCLUDE
    ${SIMDJSON_INCLUDE_DIR}/simdjson/common_defs.h
    ${SIMDJSON_INCLUDE_DIR}/simdjson/document.h
-    ${SIMDJSON_INCLUDE_DIR}/simdjson/document/iterator.h
-    ${SIMDJSON_INCLUDE_DIR}/simdjson/document/parser.h
+    ${SIMDJSON_INCLUDE_DIR}/simdjson/document_iterator.h
+    ${SIMDJSON_INCLUDE_DIR}/simdjson/document_parser.h
    ${SIMDJSON_INCLUDE_DIR}/simdjson/isadetection.h
    ${SIMDJSON_INCLUDE_DIR}/simdjson/jsonformatutils.h
    ${SIMDJSON_INCLUDE_DIR}/simdjson/jsonioutil.h
--- a/include/simdjson/document.h
+++ b/include/simdjson/document.h
@ -13,6 +13,7 @@
 namespace simdjson {

 template<size_t max_depth> class document_iterator;
+class document_parser;

 class document {
 public:
@ -27,6 +28,9 @@ public:
  document &operator=(const document &o) = delete;

  using iterator = document_iterator<DEFAULT_MAX_DEPTH>;
+  class parser;
+  class doc_result;
+  class doc_ref_result;

  //
  // Tell whether this document has been parsed, or is just empty.
@ -43,8 +47,6 @@ public:
  WARN_UNUSED
  bool dump_raw_tape(std::ostream &os) const;

-  class parser;
-
  //
  // Parse a JSON document.
  //
@ -53,35 +55,10 @@ public:
  //
  // Throws invalid_json if the JSON is invalid.
  //
-  static document parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true);
-  static document parse(const char *buf, size_t len, bool realloc_if_needed = true) {
-      return parse((const uint8_t *)buf, len, realloc_if_needed);
-  }
-  static document parse(const std::string &s, bool realloc_if_needed = true) {
-      return parse(s.data(), s.length(), realloc_if_needed);
-  }
-  static document parse(const padded_string &s) {
-      return parse(s.data(), s.length(), false);
-  }
-
-  //
-  // Parse a JSON document.
-  //
-  // If you will be parsing more than one JSON document, it's recommended to create a
-  // document::parser object instead, keeping internal buffers around for efficiency reasons.
-  //
-  // Returns != SUCCESS if the JSON is invalid.
-  //
-  static WARN_UNUSED error_code try_parse(const uint8_t *buf, size_t len, document &dst, bool realloc_if_needed = true) noexcept;
-  static WARN_UNUSED error_code try_parse(const char *buf, size_t len, document &dst, bool realloc_if_needed = true) {
-      return try_parse((const uint8_t *)buf, len, dst, realloc_if_needed);
-  }
-  static WARN_UNUSED error_code try_parse(const std::string &s, document &dst, bool realloc_if_needed = true) {
-      return try_parse(s.data(), s.length(), dst, realloc_if_needed);
-  }
-  static WARN_UNUSED error_code try_parse(const padded_string &s, document &dst) {
-      return try_parse(s.data(), s.length(), dst, false);
-  }
+  static doc_result parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true);
+  static doc_result parse(const char *buf, size_t len, bool realloc_if_needed = true);
+  static doc_result parse(const std::string &s, bool realloc_if_needed = true);
+  static doc_result parse(const padded_string &s);

  std::unique_ptr<uint64_t[]> tape;
  std::unique_ptr<uint8_t[]> string_buf;// should be at least byte_capacity
@ -90,30 +67,46 @@ private:
  bool set_capacity(size_t len);
 };

+class document::doc_result {
+private:
+  doc_result(document &&_doc, error_code _error) : doc(std::move(_doc)), error(_error) { }
+  doc_result(document &&_doc) : doc(std::move(_doc)), error(SUCCESS) { }
+  doc_result(error_code _error) : doc(), error(_error) { }
+  friend class document;
+public:
+  ~doc_result()=default;
+
+  operator bool() noexcept { return error == SUCCESS; }
+  operator document() {
+    if (!*this) {
+      throw invalid_json(error);
+    }
+    return std::move(doc);
+  }
+  document doc;
+  error_code error;
+};
+
+class document::doc_ref_result {
+public:
+  doc_ref_result(document &_doc, error_code _error) : doc(_doc), error(_error) { }
+  ~doc_ref_result()=default;
+
+  operator bool() noexcept { return error == SUCCESS; }
+  operator document&() {
+    if (!*this) {
+      throw invalid_json(error);
+    }
+    return doc;
+  }
+  document& doc;
+  error_code error;
+};
+
+
 } // namespace simdjson

-#include "simdjson/document/parser.h"
-#include "simdjson/document/iterator.h"
-
-// Implementations
-namespace simdjson {
-
-inline WARN_UNUSED document document::parse(const uint8_t *buf, size_t len, bool realloc_if_needed) {
-  document::parser parser;
-  if (!parser.allocate_capacity(len)) {
-    throw invalid_json(parser.error = MEMALLOC);
-  }
-  return parser.parse_new(buf, len, realloc_if_needed);
-}
-
-inline WARN_UNUSED error_code document::try_parse(const uint8_t *buf, size_t len, document &dst, bool realloc_if_needed) noexcept {
-  document::parser parser;
-  if (!parser.allocate_capacity(len)) {
-    return parser.error = MEMALLOC;
-  }
-  return parser.try_parse_into(buf, len, dst, realloc_if_needed);
-}
-
-} // namespace simdjson
+#include "simdjson/document_parser.h"
+#include "simdjson/document_iterator.h"

 #endif // SIMDJSON_DOCUMENT_H
--- a/include/simdjson/document/parser.h
+++ b/include/simdjson/document/parser.h
@ -1,349 +0,0 @@
-#ifndef SIMDJSON_DOCUMENT_PARSER_H
-#define SIMDJSON_DOCUMENT_PARSER_H
-
-#include <cstring>
-#include <memory>
-#include "simdjson/common_defs.h"
-#include "simdjson/simdjson.h"
-#include "simdjson/document.h"
-#include "simdjson/padded_string.h"
-
-namespace simdjson {
-
-class document::parser {
-public:
-  //
-  // Create a JSON parser with zero capacity. Call allocate_capacity() to initialize it.
-  //
-  parser()=default;
-  ~parser()=default;
-
-  // this is a move only class
-  parser(parser &&p) = default;
-  parser(const parser &p) = delete;
-  parser &operator=(parser &&o) = default;
-  parser &operator=(const parser &o) = delete;
-
-  //
-  // Parse a JSON document and return a reference to it.
-  //
-  // The JSON document still lives in the parser: this is the most efficient way to parse JSON
-  // documents because it reuses the same buffers, but you *must* use the document before you
-  // destroy the parser or call parse() again.
-  //
-  // Throws invalid_json if the JSON is invalid.
-  //
-  const document &parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true);
-  const document &parse(const char *buf, size_t len, bool realloc_if_needed = true) {
-    return parse((const uint8_t *)buf, len, realloc_if_needed);
-  }
-  const document &parse(const std::string &s, bool realloc_if_needed = true) {
-    return parse(s.data(), s.length(), realloc_if_needed);
-  }
-  const document &parse(const padded_string &s) {
-    return parse(s.data(), s.length(), false);
-  }
-
-  //
-  // Parse a JSON document and take the result.
-  //
-  // The document can be used even after the parser is deallocated or parse() is called again.
-  //
-  // Throws invalid_json if the JSON is invalid.
-  //
-  document parse_new(const uint8_t *buf, size_t len, bool realloc_if_needed = true);
-  document parse_new(const char *buf, size_t len, bool realloc_if_needed = true) {
-    return parse_new((const uint8_t *)buf, len, realloc_if_needed);
-  }
-  document parse_new(const std::string &s, bool realloc_if_needed = true) {
-    return parse_new(s.data(), s.length(), realloc_if_needed);
-  }
-  document parse_new(const padded_string &s) {
-    return parse_new(s.data(), s.length(), false);
-  }
-
-  //
-  // Parse a JSON document and set doc to a pointer to it.
-  //
-  // The JSON document still lives in the parser: this is the most efficient way to parse JSON
-  // documents because it reuses the same buffers, but you *must* use the document before you
-  // destroy the parser or call parse() again.
-  //
-  // Returns != SUCCESS if the JSON is invalid.
-  //
-  WARN_UNUSED error_code try_parse(const uint8_t *buf, size_t len, const document *& dst, bool realloc_if_needed = true) noexcept;
-  WARN_UNUSED error_code try_parse(const char *buf, size_t len, const document *& dst, bool realloc_if_needed = true) noexcept {
-    return try_parse((const uint8_t *)buf, len, dst, realloc_if_needed);
-  }
-  WARN_UNUSED error_code try_parse(const std::string &s, const document *&dst, bool realloc_if_needed = true) noexcept {
-    return try_parse(s.data(), s.length(), dst, realloc_if_needed);
-  }
-  WARN_UNUSED error_code try_parse(const padded_string &s, const document *&dst) noexcept {
-    return try_parse(s.data(), s.length(), dst, false);
-  }
-
-  //
-  // Parse a JSON document and fill in dst.
-  //
-  // The document can be used even after the parser is deallocated or parse() is called again.
-  //
-  // Returns != SUCCESS if the JSON is invalid.
-  //
-  WARN_UNUSED error_code try_parse_into(const uint8_t *buf, size_t len, document &dst, bool realloc_if_needed = true) noexcept;
-  WARN_UNUSED error_code try_parse_into(const char *buf, size_t len, document &dst, bool realloc_if_needed = true) noexcept {
-    return try_parse_into((const uint8_t *)buf, len, dst, realloc_if_needed);
-  }
-  WARN_UNUSED error_code try_parse_into(const std::string &s, document &dst, bool realloc_if_needed = true) noexcept {
-    return try_parse_into(s.data(), s.length(), dst, realloc_if_needed);
-  }
-  WARN_UNUSED error_code try_parse_into(const padded_string &s, document &dst) noexcept {
-    return try_parse_into(s.data(), s.length(), dst, false);
-  }
-
-  //
-  // Current capacity: the largest document this parser can support without reallocating.
-  //
-  size_t capacity() {
-    return _capacity;
-  }
-
-  //
-  // The maximum level of nested object and arrays supported by this parser.
-  //
-  size_t max_depth() {
-    return _max_depth;
-  }
-
-  // if needed, allocate memory so that the object is able to process JSON
-  // documents having up to capacity bytes and max_depth "depth"
-  WARN_UNUSED bool allocate_capacity(size_t capacity, size_t max_depth = DEFAULT_MAX_DEPTH) {
-    return set_capacity(capacity) && set_max_depth(max_depth);
-  }
-
-  // type aliases for backcompat
-  using Iterator = document::iterator;
-  using InvalidJSON = invalid_json;
-
-  // Next location to write to in the tape
-  uint32_t current_loc{0};
-
-  // structural indices passed from stage 1 to stage 2
-  uint32_t n_structural_indexes{0};
-  std::unique_ptr<uint32_t[]> structural_indexes;
-
-  // location and return address of each open { or [
-  std::unique_ptr<uint32_t[]> containing_scope_offset;
-#ifdef SIMDJSON_USE_COMPUTED_GOTO
-  std::unique_ptr<void*[]> ret_address;
-#else
-  std::unique_ptr<char[]> ret_address;
-#endif
-
-  // Next place to write a string
-  uint8_t *current_string_buf_loc;
-
-  bool valid{false};
-  error_code error{simdjson::UNINITIALIZED};
-
-  // Document we're writing to
-  document doc;
-
-  // returns true if the document parsed was valid
-  bool is_valid() const;
-
-  // return an error code corresponding to the last parsing attempt, see
-  // simdjson.h will return simdjson::UNITIALIZED if no parsing was attempted
-  int get_error_code() const;
-
-  // return the string equivalent of "get_error_code"
-  std::string get_error_message() const;
-
-  //
-  // for backcompat with ParsedJson
-  //
-
-  // print the json to std::ostream (should be valid)
-  // return false if the tape is likely wrong (e.g., you did not parse a valid
-  // JSON).
-  WARN_UNUSED
-  bool print_json(std::ostream &os) const;
-  WARN_UNUSED
-  bool dump_raw_tape(std::ostream &os) const;
-
-  // this should be called when parsing (right before writing the tapes)
-  void init_stage2();
-
-  really_inline error_code on_error(error_code new_error_code) {
-    error = new_error_code;
-    return new_error_code;
-  }
-  really_inline error_code on_success(error_code success_code) {
-    error = success_code;
-    valid = true;
-    return success_code;
-  }
-  really_inline bool on_start_document(uint32_t depth) {
-    containing_scope_offset[depth] = current_loc;
-    write_tape(0, 'r');
-    return true;
-  }
-  really_inline bool on_start_object(uint32_t depth) {
-    containing_scope_offset[depth] = current_loc;
-    write_tape(0, '{');
-    return true;
-  }
-  really_inline bool on_start_array(uint32_t depth) {
-    containing_scope_offset[depth] = current_loc;
-    write_tape(0, '[');
-    return true;
-  }
-  // TODO we're not checking this bool
-  really_inline bool on_end_document(uint32_t depth) {
-    // write our doc.tape location to the header scope
-    // The root scope gets written *at* the previous location.
-    annotate_previous_loc(containing_scope_offset[depth], current_loc);
-    write_tape(containing_scope_offset[depth], 'r');
-    return true;
-  }
-  really_inline bool on_end_object(uint32_t depth) {
-    // write our doc.tape location to the header scope
-    write_tape(containing_scope_offset[depth], '}');
-    annotate_previous_loc(containing_scope_offset[depth], current_loc);
-    return true;
-  }
-  really_inline bool on_end_array(uint32_t depth) {
-    // write our doc.tape location to the header scope
-    write_tape(containing_scope_offset[depth], ']');
-    annotate_previous_loc(containing_scope_offset[depth], current_loc);
-    return true;
-  }
-
-  really_inline bool on_true_atom() {
-    write_tape(0, 't');
-    return true;
-  }
-  really_inline bool on_false_atom() {
-    write_tape(0, 'f');
-    return true;
-  }
-  really_inline bool on_null_atom() {
-    write_tape(0, 'n');
-    return true;
-  }
-
-  really_inline uint8_t *on_start_string() {
-    /* we advance the point, accounting for the fact that we have a NULL
-      * termination         */
-    write_tape(current_string_buf_loc - doc.string_buf.get(), '"');
-    return current_string_buf_loc + sizeof(uint32_t);
-  }
-
-  really_inline bool on_end_string(uint8_t *dst) {
-    uint32_t str_length = dst - (current_string_buf_loc + sizeof(uint32_t));
-    // TODO check for overflow in case someone has a crazy string (>=4GB?)
-    // But only add the overflow check when the document itself exceeds 4GB
-    // Currently unneeded because we refuse to parse docs larger or equal to 4GB.
-    memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t));
-    // NULL termination is still handy if you expect all your strings to
-    // be NULL terminated? It comes at a small cost
-    *dst = 0;
-    current_string_buf_loc = dst + 1;
-    return true;
-  }
-
-  really_inline bool on_number_s64(int64_t value) {
-    write_tape(0, 'l');
-    std::memcpy(&doc.tape[current_loc], &value, sizeof(value));
-    ++current_loc;
-    return true;
-  }
-  really_inline bool on_number_u64(uint64_t value) {
-    write_tape(0, 'u');
-    doc.tape[current_loc++] = value;
-    return true;
-  }
-  really_inline bool on_number_double(double value) {
-    write_tape(0, 'd');
-    static_assert(sizeof(value) == sizeof(doc.tape[current_loc]), "mismatch size");
-    memcpy(&doc.tape[current_loc++], &value, sizeof(double));
-    // doc.tape[doc.current_loc++] = *((uint64_t *)&d);
-    return true;
-  }
-
-  //
-  // Called before a parse is initiated.
-  //
-  // - Returns CAPACITY if the document is too large
-  // - Returns MEMALLOC if we needed to allocate memory and could not
-  //
-  WARN_UNUSED error_code init_parse(size_t len);
-
-  const document &get_document() const {
-    if (!is_valid()) {
-      throw invalid_json(error);
-    }
-    return doc;
-  }
-
-private:
-  //
-  // The maximum document length this parser supports.
-  //
-  // Buffers are large enough to handle any document up to this length.
-  //
-  size_t _capacity{0};
-
-  //
-  // The maximum depth (number of nested objects and arrays) supported by this parser.
-  //
-  // Defaults to DEFAULT_MAX_DEPTH.
-  //
-  size_t _max_depth{0};
-
-  // all nodes are stored on the doc.tape using a 64-bit word.
-  //
-  // strings, double and ints are stored as
-  //  a 64-bit word with a pointer to the actual value
-  //
-  //
-  //
-  // for objects or arrays, store [ or {  at the beginning and } and ] at the
-  // end. For the openings ([ or {), we annotate them with a reference to the
-  // location on the doc.tape of the end, and for then closings (} and ]), we
-  // annotate them with a reference to the location of the opening
-  //
-  //
-
-  // this should be considered a private function
-  really_inline void write_tape(uint64_t val, uint8_t c) {
-    doc.tape[current_loc++] = val | ((static_cast<uint64_t>(c)) << 56);
-  }
-
-  really_inline void annotate_previous_loc(uint32_t saved_loc, uint64_t val) {
-    doc.tape[saved_loc] |= val;
-  }
-
-  WARN_UNUSED error_code try_parse(const uint8_t *buf, size_t len, bool realloc_if_needed) noexcept;
-
-  //
-  // Set the current capacity: the largest document this parser can support without reallocating.
-  //
-  // This will allocate *or deallocate* as necessary.
-  //
-  // Returns false if allocation fails.
-  //
-  WARN_UNUSED bool set_capacity(size_t capacity);
-
-  //
-  // Set the maximum level of nested object and arrays supported by this parser.
-  //
-  // This will allocate *or deallocate* as necessary.
-  //
-  // Returns false if allocation fails.
-  //
-  WARN_UNUSED bool set_max_depth(size_t max_depth);
-};
-
-} // namespace simdjson
-
-#endif // SIMDJSON_DOCUMENT_PARSER_H
--- a/include/simdjson/document_iterator.h
+++ b/include/simdjson/document_iterator.h
--- a/include/simdjson/document_parser.h
+++ b/include/simdjson/document_parser.h
@ -0,0 +1,597 @@
+#ifndef SIMDJSON_DOCUMENT_PARSER_H
+#define SIMDJSON_DOCUMENT_PARSER_H
+
+#include <cstring>
+#include <memory>
+#include "simdjson/common_defs.h"
+#include "simdjson/simdjson.h"
+#include "simdjson/document.h"
+#include "simdjson/padded_string.h"
+
+namespace simdjson {
+
+class document::parser {
+public:
+  //
+  // Create a JSON parser with zero capacity. Call allocate_capacity() to initialize it.
+  //
+  parser()=default;
+  ~parser()=default;
+
+  // this is a move only class
+  parser(document::parser &&p) = default;
+  parser(const document::parser &p) = delete;
+  parser &operator=(document::parser &&o) = default;
+  parser &operator=(const document::parser &o) = delete;
+
+  //
+  // Parse a JSON document and return a reference to it.
+  //
+  // The JSON document still lives in the parser: this is the most efficient way to parse JSON
+  // documents because it reuses the same buffers, but you *must* use the document before you
+  // destroy the parser or call parse() again.
+  //
+  // Throws invalid_json if the JSON is invalid.
+  //
+  inline doc_ref_result parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true);
+  inline doc_ref_result parse(const char *buf, size_t len, bool realloc_if_needed = true);
+  inline doc_ref_result parse(const std::string &s, bool realloc_if_needed = true);
+  inline doc_ref_result parse(const padded_string &s);
+
+  //
+  // Current capacity: the largest document this parser can support without reallocating.
+  //
+  size_t capacity() {
+    return _capacity;
+  }
+
+  //
+  // The maximum level of nested object and arrays supported by this parser.
+  //
+  size_t max_depth() {
+    return _max_depth;
+  }
+
+  // if needed, allocate memory so that the object is able to process JSON
+  // documents having up to capacity bytes and max_depth "depth"
+  WARN_UNUSED bool allocate_capacity(size_t capacity, size_t max_depth = DEFAULT_MAX_DEPTH) {
+    return set_capacity(capacity) && set_max_depth(max_depth);
+  }
+
+  // type aliases for backcompat
+  using Iterator = document::iterator;
+  using InvalidJSON = invalid_json;
+  class doc_result;
+
+  // Next location to write to in the tape
+  uint32_t current_loc{0};
+
+  // structural indices passed from stage 1 to stage 2
+  uint32_t n_structural_indexes{0};
+  std::unique_ptr<uint32_t[]> structural_indexes;
+
+  // location and return address of each open { or [
+  std::unique_ptr<uint32_t[]> containing_scope_offset;
+#ifdef SIMDJSON_USE_COMPUTED_GOTO
+  std::unique_ptr<void*[]> ret_address;
+#else
+  std::unique_ptr<char[]> ret_address;
+#endif
+
+  // Next place to write a string
+  uint8_t *current_string_buf_loc;
+
+  bool valid{false};
+  error_code error{simdjson::UNINITIALIZED};
+
+  // Document we're writing to
+  document doc;
+
+  // returns true if the document parsed was valid
+  bool is_valid() const;
+
+  // return an error code corresponding to the last parsing attempt, see
+  // simdjson.h will return simdjson::UNITIALIZED if no parsing was attempted
+  int get_error_code() const;
+
+  // return the string equivalent of "get_error_code"
+  std::string get_error_message() const;
+
+  //
+  // for backcompat with ParsedJson
+  //
+
+  // print the json to std::ostream (should be valid)
+  // return false if the tape is likely wrong (e.g., you did not parse a valid
+  // JSON).
+  WARN_UNUSED
+  bool print_json(std::ostream &os) const;
+  WARN_UNUSED
+  bool dump_raw_tape(std::ostream &os) const;
+
+  // this should be called when parsing (right before writing the tapes)
+  void init_stage2();
+
+  really_inline error_code on_error(error_code new_error_code) {
+    error = new_error_code;
+    return new_error_code;
+  }
+  really_inline error_code on_success(error_code success_code) {
+    error = success_code;
+    valid = true;
+    return success_code;
+  }
+  really_inline bool on_start_document(uint32_t depth) {
+    containing_scope_offset[depth] = current_loc;
+    write_tape(0, 'r');
+    return true;
+  }
+  really_inline bool on_start_object(uint32_t depth) {
+    containing_scope_offset[depth] = current_loc;
+    write_tape(0, '{');
+    return true;
+  }
+  really_inline bool on_start_array(uint32_t depth) {
+    containing_scope_offset[depth] = current_loc;
+    write_tape(0, '[');
+    return true;
+  }
+  // TODO we're not checking this bool
+  really_inline bool on_end_document(uint32_t depth) {
+    // write our doc.tape location to the header scope
+    // The root scope gets written *at* the previous location.
+    annotate_previous_loc(containing_scope_offset[depth], current_loc);
+    write_tape(containing_scope_offset[depth], 'r');
+    return true;
+  }
+  really_inline bool on_end_object(uint32_t depth) {
+    // write our doc.tape location to the header scope
+    write_tape(containing_scope_offset[depth], '}');
+    annotate_previous_loc(containing_scope_offset[depth], current_loc);
+    return true;
+  }
+  really_inline bool on_end_array(uint32_t depth) {
+    // write our doc.tape location to the header scope
+    write_tape(containing_scope_offset[depth], ']');
+    annotate_previous_loc(containing_scope_offset[depth], current_loc);
+    return true;
+  }
+
+  really_inline bool on_true_atom() {
+    write_tape(0, 't');
+    return true;
+  }
+  really_inline bool on_false_atom() {
+    write_tape(0, 'f');
+    return true;
+  }
+  really_inline bool on_null_atom() {
+    write_tape(0, 'n');
+    return true;
+  }
+
+  really_inline uint8_t *on_start_string() {
+    /* we advance the point, accounting for the fact that we have a NULL
+      * termination         */
+    write_tape(current_string_buf_loc - doc.string_buf.get(), '"');
+    return current_string_buf_loc + sizeof(uint32_t);
+  }
+
+  really_inline bool on_end_string(uint8_t *dst) {
+    uint32_t str_length = dst - (current_string_buf_loc + sizeof(uint32_t));
+    // TODO check for overflow in case someone has a crazy string (>=4GB?)
+    // But only add the overflow check when the document itself exceeds 4GB
+    // Currently unneeded because we refuse to parse docs larger or equal to 4GB.
+    memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t));
+    // NULL termination is still handy if you expect all your strings to
+    // be NULL terminated? It comes at a small cost
+    *dst = 0;
+    current_string_buf_loc = dst + 1;
+    return true;
+  }
+
+  really_inline bool on_number_s64(int64_t value) {
+    write_tape(0, 'l');
+    std::memcpy(&doc.tape[current_loc], &value, sizeof(value));
+    ++current_loc;
+    return true;
+  }
+  really_inline bool on_number_u64(uint64_t value) {
+    write_tape(0, 'u');
+    doc.tape[current_loc++] = value;
+    return true;
+  }
+  really_inline bool on_number_double(double value) {
+    write_tape(0, 'd');
+    static_assert(sizeof(value) == sizeof(doc.tape[current_loc]), "mismatch size");
+    memcpy(&doc.tape[current_loc++], &value, sizeof(double));
+    // doc.tape[doc.current_loc++] = *((uint64_t *)&d);
+    return true;
+  }
+
+  //
+  // Called before a parse is initiated.
+  //
+  // - Returns CAPACITY if the document is too large
+  // - Returns MEMALLOC if we needed to allocate memory and could not
+  //
+  WARN_UNUSED error_code init_parse(size_t len);
+
+  const document &get_document() const {
+    if (!is_valid()) {
+      throw invalid_json(error);
+    }
+    return doc;
+  }
+
+private:
+  //
+  // The maximum document length this parser supports.
+  //
+  // Buffers are large enough to handle any document up to this length.
+  //
+  size_t _capacity{0};
+
+  //
+  // The maximum depth (number of nested objects and arrays) supported by this parser.
+  //
+  // Defaults to DEFAULT_MAX_DEPTH.
+  //
+  size_t _max_depth{0};
+
+  // all nodes are stored on the doc.tape using a 64-bit word.
+  //
+  // strings, double and ints are stored as
+  //  a 64-bit word with a pointer to the actual value
+  //
+  //
+  //
+  // for objects or arrays, store [ or {  at the beginning and } and ] at the
+  // end. For the openings ([ or {), we annotate them with a reference to the
+  // location on the doc.tape of the end, and for then closings (} and ]), we
+  // annotate them with a reference to the location of the opening
+  //
+  //
+
+  // this should be considered a private function
+  really_inline void write_tape(uint64_t val, uint8_t c) {
+    doc.tape[current_loc++] = val | ((static_cast<uint64_t>(c)) << 56);
+  }
+
+  really_inline void annotate_previous_loc(uint32_t saved_loc, uint64_t val) {
+    doc.tape[saved_loc] |= val;
+  }
+
+  //
+  // Set the current capacity: the largest document this parser can support without reallocating.
+  //
+  // This will allocate *or deallocate* as necessary.
+  //
+  // Returns false if allocation fails.
+  //
+  WARN_UNUSED bool set_capacity(size_t capacity);
+
+  //
+  // Set the maximum level of nested object and arrays supported by this parser.
+  //
+  // This will allocate *or deallocate* as necessary.
+  //
+  // Returns false if allocation fails.
+  //
+  WARN_UNUSED bool set_max_depth(size_t max_depth);
+};
+
+//
+// C API (json_parse and build_parsed_json) declarations
+//
+
+// Parse a document found in buf.
+//
+// The content should be a valid JSON document encoded as UTF-8. If there is a
+// UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are
+// discouraged.
+//
+// You need to preallocate document::parser with a capacity of len (e.g.,
+// parser.allocate_capacity(len)).
+//
+// The function returns simdjson::SUCCESS (an integer = 0) in case of a success
+// or an error code from simdjson/simdjson.h in case of failure such as
+// simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth;
+// the simdjson::error_message function converts these error codes into a
+// string).
+//
+// You can also check validity by calling parser.is_valid(). The same document::parser can
+// be reused for other documents.
+//
+// If realloc_if_needed is true (default) then a temporary buffer is created
+// when needed during processing (a copy of the input string is made). The input
+// buf should be readable up to buf + len + SIMDJSON_PADDING if
+// realloc_if_needed is false, all bytes at and after buf + len  are ignored
+// (can be garbage). The document::parser object can be reused.
+
+int json_parse(const uint8_t *buf, size_t len, document::parser &parser,
+               bool realloc_if_needed = true);
+
+// Parse a document found in buf.
+//
+// The content should be a valid JSON document encoded as UTF-8. If there is a
+// UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are
+// discouraged.
+//
+// You need to preallocate document::parser with a capacity of len (e.g.,
+// parser.allocate_capacity(len)).
+//
+// The function returns simdjson::SUCCESS (an integer = 0) in case of a success
+// or an error code from simdjson/simdjson.h in case of failure such as
+// simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth;
+// the simdjson::error_message function converts these error codes into a
+// string).
+//
+// You can also check validity
+// by calling parser.is_valid(). The same document::parser can be reused for other
+// documents.
+//
+// If realloc_if_needed is true (default) then a temporary buffer is created
+// when needed during processing (a copy of the input string is made). The input
+// buf should be readable up to buf + len + SIMDJSON_PADDING  if
+// realloc_if_needed is false, all bytes at and after buf + len  are ignored
+// (can be garbage). The document::parser object can be reused.
+int json_parse(const char *buf, size_t len, document::parser &parser,
+               bool realloc_if_needed = true);
+
+// We do not want to allow implicit conversion from C string to std::string.
+int json_parse(const char *buf, document::parser &parser) = delete;
+
+// Parse a document found in in string s.
+// You need to preallocate document::parser with a capacity of len (e.g.,
+// parser.allocate_capacity(len)).
+//
+// The function returns simdjson::SUCCESS (an integer = 0) in case of a success
+// or an error code from simdjson/simdjson.h in case of failure such as
+// simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth;
+// the simdjson::error_message function converts these error codes into a
+// string).
+//
+// A temporary buffer is created when needed during processing
+// (a copy of the input string is made).
+inline int json_parse(const std::string &s, document::parser &parser) {
+  return json_parse(s.data(), s.length(), parser, true);
+}
+
+// Parse a document found in in string s.
+//
+// The content should be a valid JSON document encoded as UTF-8. If there is a
+// UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are
+// discouraged.
+//
+// You need to preallocate document::parser with a capacity of len (e.g.,
+// parser.allocate_capacity(len)).
+//
+// The function returns simdjson::SUCCESS (an integer = 0) in case of a success
+// or an error code from simdjson/simdjson.h in case of failure such as
+// simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth;
+// the simdjson::error_message function converts these error codes into a
+// string).
+//
+// You can also check validity
+// by calling parser.is_valid(). The same document::parser can be reused for other
+// documents.
+inline int json_parse(const padded_string &s, document::parser &parser) {
+  return json_parse(s.data(), s.length(), parser, false);
+}
+
+// Build a document::parser object. You can check validity
+// by calling parser.is_valid(). This does the memory allocation needed for
+// document::parser. If realloc_if_needed is true (default) then a temporary buffer is
+// created when needed during processing (a copy of the input string is made).
+//
+// The input buf should be readable up to buf + len + SIMDJSON_PADDING  if
+// realloc_if_needed is false, all bytes at and after buf + len  are ignored
+// (can be garbage).
+//
+// The content should be a valid JSON document encoded as UTF-8. If there is a
+// UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are
+// discouraged.
+//
+// This is a convenience function which calls json_parse.
+WARN_UNUSED
+document::parser build_parsed_json(const uint8_t *buf, size_t len,
+                             bool realloc_if_needed = true);
+
+WARN_UNUSED
+// Build a document::parser object. You can check validity
+// by calling parser.is_valid(). This does the memory allocation needed for
+// document::parser. If realloc_if_needed is true (default) then a temporary buffer is
+// created when needed during processing (a copy of the input string is made).
+//
+// The input buf should be readable up to buf + len + SIMDJSON_PADDING if
+// realloc_if_needed is false, all bytes at and after buf + len  are ignored
+// (can be garbage).
+//
+//
+// The content should be a valid JSON document encoded as UTF-8. If there is a
+// UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are
+// discouraged.
+//
+// This is a convenience function which calls json_parse.
+inline document::parser build_parsed_json(const char *buf, size_t len,
+                                    bool realloc_if_needed = true) {
+  return build_parsed_json(reinterpret_cast<const uint8_t *>(buf), len,
+                           realloc_if_needed);
+}
+
+// We do not want to allow implicit conversion from C string to std::string.
+document::parser build_parsed_json(const char *buf) = delete;
+
+// Parse a document found in in string s.
+// You need to preallocate document::parser with a capacity of len (e.g.,
+// parser.allocate_capacity(len)). Return SUCCESS (an integer = 0) in case of a
+// success. You can also check validity by calling parser.is_valid(). The same
+// document::parser can be reused for other documents.
+//
+// A temporary buffer is created when needed during processing
+// (a copy of the input string is made).
+//
+// The content should be a valid JSON document encoded as UTF-8. If there is a
+// UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are
+// discouraged.
+//
+// This is a convenience function which calls json_parse.
+WARN_UNUSED
+inline document::parser build_parsed_json(const std::string &s) {
+  return build_parsed_json(s.data(), s.length(), true);
+}
+
+// Parse a document found in in string s.
+// You need to preallocate document::parser with a capacity of len (e.g.,
+// parser.allocate_capacity(len)). Return SUCCESS (an integer = 0) in case of a
+// success. You can also check validity by calling parser.is_valid(). The same
+// document::parser can be reused for other documents.
+//
+// The content should be a valid JSON document encoded as UTF-8. If there is a
+// UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are
+// discouraged.
+//
+// This is a convenience function which calls json_parse.
+WARN_UNUSED
+inline document::parser build_parsed_json(const padded_string &s) {
+  return build_parsed_json(s.data(), s.length(), false);
+}
+
+
+//
+// Stage 1 implementation declarations
+//
+
+// Setting the streaming parameter to true allows the find_structural_bits to tolerate unclosed strings.
+// The caller should still ensure that the input is valid UTF-8. If you are processing substrings,
+// you may want to call on a function like trimmed_length_safe_utf8.
+// A function like find_last_json_buf_idx may also prove useful.
+template <architecture T = architecture::NATIVE>
+int find_structural_bits(const uint8_t *buf, size_t len, document::parser &parser, bool streaming);
+
+// Setting the streaming parameter to true allows the find_structural_bits to tolerate unclosed strings.
+// The caller should still ensure that the input is valid UTF-8. If you are processing substrings,
+// you may want to call on a function like trimmed_length_safe_utf8.
+// A function like find_last_json_buf_idx may also prove useful.
+template <architecture T = architecture::NATIVE>
+int find_structural_bits(const char *buf, size_t len, document::parser &parser, bool streaming) {
+  return find_structural_bits<T>((const uint8_t *)buf, len, parser, streaming);
+}
+
+template <architecture T = architecture::NATIVE>
+int find_structural_bits(const uint8_t *buf, size_t len, document::parser &parser) {
+  return find_structural_bits<T>(buf, len, parser, false);
+}
+
+template <architecture T = architecture::NATIVE>
+int find_structural_bits(const char *buf, size_t len, document::parser &parser) {
+  return find_structural_bits<T>((const uint8_t *)buf, len, parser);
+}
+
+//
+// Stage 2 implementation declarations
+//
+
+template <architecture T = architecture::NATIVE>
+WARN_UNUSED int
+unified_machine(const uint8_t *buf, size_t len, document::parser &parser);
+
+template <architecture T = architecture::NATIVE>
+WARN_UNUSED int
+unified_machine(const char *buf, size_t len, document::parser &parser) {
+  return unified_machine<T>(reinterpret_cast<const uint8_t *>(buf), len, parser);
+}
+
+
+// Streaming
+template <architecture T = architecture::NATIVE>
+WARN_UNUSED int
+unified_machine(const uint8_t *buf, size_t len, document::parser &parser, size_t &next_json);
+
+template <architecture T = architecture::NATIVE>
+int unified_machine(const char *buf, size_t len, document::parser &parser, size_t &next_json) {
+    return unified_machine<T>(reinterpret_cast<const uint8_t *>(buf), len, parser, next_json);
+}
+
+} // namespace simdjson
+
+//
+// Inline implementation
+//
+
+#include "simdjson/document_parser.h"
+#include "simdjson/stage1_find_marks.h"
+#include "simdjson/stage2_build_tape.h"
+
+namespace simdjson {
+
+inline document::doc_ref_result document::parser::parse(const uint8_t *buf, size_t len, bool realloc_if_needed) {
+  auto code = (error_code)json_parse(buf, len, *this, realloc_if_needed);
+  valid = false;
+  error = UNINITIALIZED;
+  return document::doc_ref_result(doc, code);
+}
+really_inline document::doc_ref_result document::parser::parse(const char *buf, size_t len, bool realloc_if_needed) {
+  return parse((const uint8_t *)buf, len, realloc_if_needed);
+}
+really_inline document::doc_ref_result document::parser::parse(const std::string &s, bool realloc_if_needed) {
+  return parse(s.data(), s.length(), realloc_if_needed);
+}
+really_inline document::doc_ref_result document::parser::parse(const padded_string &s) {
+  return parse(s.data(), s.length(), false);
+}
+
+inline document::doc_result document::parse(const uint8_t *buf, size_t len, bool realloc_if_needed) {
+  document::parser parser;
+  if (!parser.allocate_capacity(len)) {
+    return MEMALLOC;
+  }
+  auto [doc, error] = parser.parse(buf, len, realloc_if_needed);
+  return document::doc_result((document &&)doc, error);
+}
+inline document::doc_result document::parse(const char *buf, size_t len, bool realloc_if_needed) {
+    return parse((const uint8_t *)buf, len, realloc_if_needed);
+}
+inline document::doc_result document::parse(const std::string &s, bool realloc_if_needed) {
+    return parse(s.data(), s.length(), realloc_if_needed);
+}
+inline document::doc_result document::parse(const padded_string &s) {
+    return parse(s.data(), s.length(), false);
+}
+
+// json_parse_implementation is the generic function, it is specialized for
+// various architectures, e.g., as
+// json_parse_implementation<architecture::HASWELL> or
+// json_parse_implementation<architecture::ARM64>
+template <architecture T>
+int json_parse_implementation(const uint8_t *buf, size_t len, document::parser &parser,
+                              bool realloc_if_needed = true) {
+  int result = parser.init_parse(len);
+  if (result != SUCCESS) { return result; }
+  bool reallocated = false;
+  if (realloc_if_needed) {
+      const uint8_t *tmp_buf = buf;
+      buf = (uint8_t *)allocate_padded_buffer(len);
+      if (buf == NULL)
+        return simdjson::MEMALLOC;
+      memcpy((void *)buf, tmp_buf, len);
+      reallocated = true;
+  }
+  int stage1_err = simdjson::find_structural_bits<T>(buf, len, parser);
+  if (stage1_err != simdjson::SUCCESS) {
+    if (reallocated) { // must free before we exit
+      aligned_free((void *)buf);
+    }
+    return stage1_err;
+  }
+  int res = unified_machine<T>(buf, len, parser);
+  if (reallocated) {
+    aligned_free((void *)buf);
+  }
+  return res;
+}
+
+} // namespace simdjson
+
+#endif // SIMDJSON_DOCUMENT_PARSER_H
--- a/include/simdjson/jsonparser.h
+++ b/include/simdjson/jsonparser.h
@ -1,219 +1,8 @@
 #ifndef SIMDJSON_JSONPARSER_H
 #define SIMDJSON_JSONPARSER_H
-#include "simdjson/common_defs.h"
-#include "simdjson/jsonioutil.h"
-#include "simdjson/padded_string.h"
+
+#include "simdjson/document.h"
 #include "simdjson/parsedjson.h"
-#include "simdjson/simdjson.h"
-#include "simdjson/stage1_find_marks.h"
-#include "simdjson/stage2_build_tape.h"
-#include <string>
+#include "simdjson/jsonioutil.h"

-namespace simdjson {
-// json_parse_implementation is the generic function, it is specialized for
-// various architectures, e.g., as
-// json_parse_implementation<architecture::HASWELL> or
-// json_parse_implementation<architecture::ARM64>
-template <architecture T>
-int json_parse_implementation(const uint8_t *buf, size_t len, document::parser &parser,
-                              bool realloc_if_needed = true) {
-  int result = parser.init_parse(len);
-  if (result != SUCCESS) { return result; }
-  bool reallocated = false;
-  if (realloc_if_needed) {
-      const uint8_t *tmp_buf = buf;
-      buf = (uint8_t *)allocate_padded_buffer(len);
-      if (buf == NULL)
-        return simdjson::MEMALLOC;
-      memcpy((void *)buf, tmp_buf, len);
-      reallocated = true;
-  }
-  int stage1_err = simdjson::find_structural_bits<T>(buf, len, parser);
-  if (stage1_err != simdjson::SUCCESS) {
-    if (reallocated) { // must free before we exit
-      aligned_free((void *)buf);
-    }
-    return stage1_err;
-  }
-  int res = unified_machine<T>(buf, len, parser);
-  if (reallocated) {
-    aligned_free((void *)buf);
-  }
-  return res;
-}
-
-// Parse a document found in buf.
-//
-// The content should be a valid JSON document encoded as UTF-8. If there is a
-// UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are
-// discouraged.
-//
-// You need to preallocate document::parser with a capacity of len (e.g.,
-// parser.allocate_capacity(len)).
-//
-// The function returns simdjson::SUCCESS (an integer = 0) in case of a success
-// or an error code from simdjson/simdjson.h in case of failure such as
-// simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth;
-// the simdjson::error_message function converts these error codes into a
-// string).
-//
-// You can also check validity by calling parser.is_valid(). The same document::parser can
-// be reused for other documents.
-//
-// If realloc_if_needed is true (default) then a temporary buffer is created
-// when needed during processing (a copy of the input string is made). The input
-// buf should be readable up to buf + len + SIMDJSON_PADDING if
-// realloc_if_needed is false, all bytes at and after buf + len  are ignored
-// (can be garbage). The document::parser object can be reused.
-
-int json_parse(const uint8_t *buf, size_t len, document::parser &parser,
-               bool realloc_if_needed = true);
-
-// Parse a document found in buf.
-//
-// The content should be a valid JSON document encoded as UTF-8. If there is a
-// UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are
-// discouraged.
-//
-// You need to preallocate document::parser with a capacity of len (e.g.,
-// parser.allocate_capacity(len)).
-//
-// The function returns simdjson::SUCCESS (an integer = 0) in case of a success
-// or an error code from simdjson/simdjson.h in case of failure such as
-// simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth;
-// the simdjson::error_message function converts these error codes into a
-// string).
-//
-// You can also check validity
-// by calling parser.is_valid(). The same document::parser can be reused for other
-// documents.
-//
-// If realloc_if_needed is true (default) then a temporary buffer is created
-// when needed during processing (a copy of the input string is made). The input
-// buf should be readable up to buf + len + SIMDJSON_PADDING  if
-// realloc_if_needed is false, all bytes at and after buf + len  are ignored
-// (can be garbage). The document::parser object can be reused.
-int json_parse(const char *buf, size_t len, document::parser &parser,
-               bool realloc_if_needed = true);
-
-// We do not want to allow implicit conversion from C string to std::string.
-int json_parse(const char *buf, document::parser &parser) = delete;
-
-// Parse a document found in in string s.
-// You need to preallocate document::parser with a capacity of len (e.g.,
-// parser.allocate_capacity(len)).
-//
-// The function returns simdjson::SUCCESS (an integer = 0) in case of a success
-// or an error code from simdjson/simdjson.h in case of failure such as
-// simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth;
-// the simdjson::error_message function converts these error codes into a
-// string).
-//
-// A temporary buffer is created when needed during processing
-// (a copy of the input string is made).
-inline int json_parse(const std::string &s, document::parser &parser) {
-  return json_parse(s.data(), s.length(), parser, true);
-}
-
-// Parse a document found in in string s.
-//
-// The content should be a valid JSON document encoded as UTF-8. If there is a
-// UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are
-// discouraged.
-//
-// You need to preallocate document::parser with a capacity of len (e.g.,
-// parser.allocate_capacity(len)).
-//
-// The function returns simdjson::SUCCESS (an integer = 0) in case of a success
-// or an error code from simdjson/simdjson.h in case of failure such as
-// simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth;
-// the simdjson::error_message function converts these error codes into a
-// string).
-//
-// You can also check validity
-// by calling parser.is_valid(). The same document::parser can be reused for other
-// documents.
-inline int json_parse(const padded_string &s, document::parser &parser) {
-  return json_parse(s.data(), s.length(), parser, false);
-}
-
-// Build a document::parser object. You can check validity
-// by calling parser.is_valid(). This does the memory allocation needed for
-// document::parser. If realloc_if_needed is true (default) then a temporary buffer is
-// created when needed during processing (a copy of the input string is made).
-//
-// The input buf should be readable up to buf + len + SIMDJSON_PADDING  if
-// realloc_if_needed is false, all bytes at and after buf + len  are ignored
-// (can be garbage).
-//
-// The content should be a valid JSON document encoded as UTF-8. If there is a
-// UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are
-// discouraged.
-//
-// This is a convenience function which calls json_parse.
-WARN_UNUSED
-document::parser build_parsed_json(const uint8_t *buf, size_t len,
-                             bool realloc_if_needed = true);
-
-WARN_UNUSED
-// Build a document::parser object. You can check validity
-// by calling parser.is_valid(). This does the memory allocation needed for
-// document::parser. If realloc_if_needed is true (default) then a temporary buffer is
-// created when needed during processing (a copy of the input string is made).
-//
-// The input buf should be readable up to buf + len + SIMDJSON_PADDING if
-// realloc_if_needed is false, all bytes at and after buf + len  are ignored
-// (can be garbage).
-//
-//
-// The content should be a valid JSON document encoded as UTF-8. If there is a
-// UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are
-// discouraged.
-//
-// This is a convenience function which calls json_parse.
-inline document::parser build_parsed_json(const char *buf, size_t len,
-                                    bool realloc_if_needed = true) {
-  return build_parsed_json(reinterpret_cast<const uint8_t *>(buf), len,
-                           realloc_if_needed);
-}
-
-// We do not want to allow implicit conversion from C string to std::string.
-document::parser build_parsed_json(const char *buf) = delete;
-
-// Parse a document found in in string s.
-// You need to preallocate document::parser with a capacity of len (e.g.,
-// parser.allocate_capacity(len)). Return SUCCESS (an integer = 0) in case of a
-// success. You can also check validity by calling parser.is_valid(). The same
-// document::parser can be reused for other documents.
-//
-// A temporary buffer is created when needed during processing
-// (a copy of the input string is made).
-//
-// The content should be a valid JSON document encoded as UTF-8. If there is a
-// UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are
-// discouraged.
-//
-// This is a convenience function which calls json_parse.
-WARN_UNUSED
-inline document::parser build_parsed_json(const std::string &s) {
-  return build_parsed_json(s.data(), s.length(), true);
-}
-
-// Parse a document found in in string s.
-// You need to preallocate document::parser with a capacity of len (e.g.,
-// parser.allocate_capacity(len)). Return SUCCESS (an integer = 0) in case of a
-// success. You can also check validity by calling parser.is_valid(). The same
-// document::parser can be reused for other documents.
-//
-// The content should be a valid JSON document encoded as UTF-8. If there is a
-// UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are
-// discouraged.
-//
-// This is a convenience function which calls json_parse.
-WARN_UNUSED
-inline document::parser build_parsed_json(const padded_string &s) {
-  return build_parsed_json(s.data(), s.length(), false);
-}
-
-} // namespace simdjson
 #endif
--- a/include/simdjson/parsedjsoniterator.h
+++ b/include/simdjson/parsedjsoniterator.h
@ -1,6 +1,6 @@
 #ifndef SIMDJSON_PARSEDJSONITERATOR_H
 #define SIMDJSON_PARSEDJSONITERATOR_H

-#include "document/iterator.h"
+#include "document_iterator.h"

 #endif
--- a/include/simdjson/stage1_find_marks.h
+++ b/include/simdjson/stage1_find_marks.h
@ -1,39 +1,6 @@
 #ifndef SIMDJSON_STAGE1_FIND_MARKS_H
 #define SIMDJSON_STAGE1_FIND_MARKS_H

-#include "simdjson/parsedjson.h"
-#include "simdjson/simdjson.h"
-
-namespace simdjson {
-
-// Setting the streaming parameter to true allows the find_structural_bits to tolerate unclosed strings.
-// The caller should still ensure that the input is valid UTF-8. If you are processing substrings,
-// you may want to call on a function like trimmed_length_safe_utf8.
-// A function like find_last_json_buf_idx may also prove useful.
-template <architecture T = architecture::NATIVE>
-int find_structural_bits(const uint8_t *buf, size_t len, document::parser &parser, bool streaming);
-
-// Setting the streaming parameter to true allows the find_structural_bits to tolerate unclosed strings.
-// The caller should still ensure that the input is valid UTF-8. If you are processing substrings,
-// you may want to call on a function like trimmed_length_safe_utf8.
-// A function like find_last_json_buf_idx may also prove useful.
-template <architecture T = architecture::NATIVE>
-int find_structural_bits(const char *buf, size_t len, document::parser &parser, bool streaming) {
-  return find_structural_bits<T>((const uint8_t *)buf, len, parser, streaming);
-}
-
-
-
-template <architecture T = architecture::NATIVE>
-int find_structural_bits(const uint8_t *buf, size_t len, document::parser &parser) {
-     return find_structural_bits<T>(buf, len, parser, false);
-}
-
-template <architecture T = architecture::NATIVE>
-int find_structural_bits(const char *buf, size_t len, document::parser &parser) {
-  return find_structural_bits<T>((const uint8_t *)buf, len, parser);
-}
-
-} // namespace simdjson
+#include "simdjson/document.h"

 #endif
--- a/include/simdjson/stage2_build_tape.h
+++ b/include/simdjson/stage2_build_tape.h
@ -1,35 +1,6 @@
 #ifndef SIMDJSON_STAGE2_BUILD_TAPE_H
 #define SIMDJSON_STAGE2_BUILD_TAPE_H

-#include "simdjson/common_defs.h"
-#include "simdjson/parsedjson.h"
-#include "simdjson/simdjson.h"
-
-namespace simdjson {
-
-template <architecture T = architecture::NATIVE>
-WARN_UNUSED int
-unified_machine(const uint8_t *buf, size_t len, document::parser &parser);
-
-template <architecture T = architecture::NATIVE>
-WARN_UNUSED int
-unified_machine(const char *buf, size_t len, document::parser &parser) {
-  return unified_machine<T>(reinterpret_cast<const uint8_t *>(buf), len, parser);
-}
-
-
-
-// Streaming
-template <architecture T = architecture::NATIVE>
-WARN_UNUSED int
-unified_machine(const uint8_t *buf, size_t len, document::parser &parser, size_t &next_json);
-
-template <architecture T = architecture::NATIVE>
-int unified_machine(const char *buf, size_t len, document::parser &parser, size_t &next_json) {
-    return unified_machine<T>(reinterpret_cast<const uint8_t *>(buf), len, parser, next_json);
-}
-
-
-} // namespace simdjson
+#include "simdjson/document.h"

 #endif
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -28,7 +28,7 @@ set(SIMDJSON_SRC
  stage1_find_marks.cpp
  stage2_build_tape.cpp
  document.cpp
-  document/parser.cpp
+  document_parser.cpp
  error.cpp
 )

--- a/src/document_parser.cpp
+++ b/src/document_parser.cpp
@ -3,47 +3,6 @@

 namespace simdjson {

-// This is the internal one all others end up calling
-error_code document::parser::try_parse(const uint8_t *buf, size_t len, bool realloc_if_needed) noexcept {
-  return (error_code)json_parse(buf, len, *this, realloc_if_needed);
-}
-
-error_code document::parser::try_parse(const uint8_t *buf, size_t len, const document *& dst, bool realloc_if_needed) noexcept {
-  auto result = try_parse(buf, len, realloc_if_needed);
-  dst = result == SUCCESS ? &doc : nullptr;
-  return result;
-}
-
-error_code document::parser::try_parse_into(const uint8_t *buf, size_t len, document & dst, bool realloc_if_needed) noexcept {
-  auto result = try_parse(buf, len, realloc_if_needed);
-  if (result != SUCCESS) {
-    return result;
-  }
-  // Take the document
-  dst = (document&&)doc;
-  valid = false; // Document has been taken; there is no valid document anymore
-  error = UNINITIALIZED;
-  return result;
-}
-
-const document &document::parser::parse(const uint8_t *buf, size_t len, bool realloc_if_needed) {
-  const document *dst;
-  error_code result = try_parse(buf, len, dst, realloc_if_needed);
-  if (result) {
-    throw invalid_json(result);
-  }
-  return *dst;
-}
-
-document document::parser::parse_new(const uint8_t *buf, size_t len, bool realloc_if_needed) {
-  document dst;
-  error_code result = try_parse_into(buf, len, dst, realloc_if_needed);
-  if (result) {
-    throw invalid_json(result);
-  }
-  return dst;
-}
-
 WARN_UNUSED
 error_code document::parser::init_parse(size_t len) {
  if (len > capacity()) {
--- a/src/jsonparser.cpp
+++ b/src/jsonparser.cpp
@ -11,17 +11,17 @@ namespace simdjson {
 // instruction sets.

 // function pointer type for json_parse
-using json_parse_functype = int(const uint8_t *buf, size_t len, ParsedJson &pj, bool realloc);
+using json_parse_functype = int(const uint8_t *buf, size_t len, document::parser &pj, bool realloc);

 // Pointer that holds the json_parse implementation corresponding to the
 // available SIMD instruction set
 extern std::atomic<json_parse_functype *> json_parse_ptr;

-int json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool realloc) {
+int json_parse(const uint8_t *buf, size_t len, document::parser &pj, bool realloc) {
  return json_parse_ptr.load(std::memory_order_relaxed)(buf, len, pj, realloc);
 }

-int json_parse(const char *buf, size_t len, ParsedJson &pj, bool realloc) {
+int json_parse(const char *buf, size_t len, document::parser &pj, bool realloc) {
  return json_parse_ptr.load(std::memory_order_relaxed)(reinterpret_cast<const uint8_t *>(buf), len, pj,
                                                        realloc);
 }
@ -53,7 +53,7 @@ architecture parse_architecture(char *arch) {
 }

 // Responsible to select the best json_parse implementation
-int json_parse_dispatch(const uint8_t *buf, size_t len, ParsedJson &pj, bool realloc) {
+int json_parse_dispatch(const uint8_t *buf, size_t len, document::parser &pj, bool realloc) {
  architecture best_implementation = find_best_supported_architecture();
  // Selecting the best implementation
  switch (best_implementation) {
@ -81,12 +81,12 @@ int json_parse_dispatch(const uint8_t *buf, size_t len, ParsedJson &pj, bool rea
 std::atomic<json_parse_functype *> json_parse_ptr{&json_parse_dispatch};

 WARN_UNUSED
-ParsedJson build_parsed_json(const uint8_t *buf, size_t len, bool realloc) {
-  ParsedJson pj;
-  bool ok = pj.allocate_capacity(len);
+document::parser build_parsed_json(const uint8_t *buf, size_t len, bool realloc) {
+  document::parser parser;
+  bool ok = parser.allocate_capacity(len);
  if (ok) {
-    json_parse(buf, len, pj, realloc);
+    json_parse(buf, len, parser, realloc);
  } 
-  return pj;
+  return parser;
 }
 } // namespace simdjson