Create parser.parse_many() API

2020-03-04 08:42:29 -08:00 · 2020-03-04 08:42:29 -08:00 · cfef4ff2ad
parent b2220d6157
commit cfef4ff2ad
12 changed files with 859 additions and 317 deletions
--- a/2
+++ b/2
@ -65,7 +65,7 @@ SRCHEADERS_WESTMERE=src/westmere/bitmanipulation.h src/westmere/bitmask.h src/we
 SRCHEADERS_SRC=src/isadetection.h src/jsoncharutils.h src/simdprune_tables.h src/error.cpp src/jsonioutil.cpp src/implementation.cpp src/stage1_find_marks.cpp src/stage2_build_tape.cpp src/document_parser_callbacks.h
 SRCHEADERS=$(SRCHEADERS_SRC) $(SRCHEADERS_GENERIC) $(SRCHEADERS_ARM64) $(SRCHEADERS_HASWELL) $(SRCHEADERS_WESTMERE)

-INCLUDEHEADERS=include/simdjson.h include/simdjson/common_defs.h include/simdjson/internal/jsonformatutils.h include/simdjson/jsonioutil.h include/simdjson/jsonminifier.h include/simdjson/jsonparser.h include/simdjson/padded_string.h include/simdjson/document.h include/simdjson/inline/document.h include/simdjson/document_iterator.h include/simdjson/inline/document_iterator.h include/simdjson/implementation.h include/simdjson/parsedjson.h include/simdjson/jsonstream.h include/simdjson/inline/jsonstream.h include/simdjson/portability.h include/simdjson/error.h include/simdjson/simdjson.h include/simdjson/simdjson_version.h
+INCLUDEHEADERS=include/simdjson.h include/simdjson/common_defs.h include/simdjson/internal/jsonformatutils.h include/simdjson/jsonioutil.h include/simdjson/jsonminifier.h include/simdjson/jsonparser.h include/simdjson/padded_string.h include/simdjson/document.h include/simdjson/inline/document.h include/simdjson/document_iterator.h include/simdjson/inline/document_iterator.h include/simdjson/document_stream.h include/simdjson/inline/document_stream.h include/simdjson/implementation.h include/simdjson/parsedjson.h include/simdjson/jsonstream.h include/simdjson/inline/jsonstream.h include/simdjson/portability.h include/simdjson/error.h include/simdjson/simdjson.h include/simdjson/simdjson_version.h

 ifeq ($(SIMDJSON_TEST_AMALGAMATED_HEADERS),1)
 	HEADERS=singleheader/simdjson.h
--- a/include/CMakeLists.txt
+++ b/include/CMakeLists.txt
@ -4,11 +4,13 @@ set(SIMDJSON_INCLUDE
    ${SIMDJSON_INCLUDE_DIR}/simdjson/common_defs.h
    ${SIMDJSON_INCLUDE_DIR}/simdjson/compiler_check.h
    ${SIMDJSON_INCLUDE_DIR}/simdjson/document_iterator.h
+    ${SIMDJSON_INCLUDE_DIR}/simdjson/document_stream.h
    ${SIMDJSON_INCLUDE_DIR}/simdjson/document.h
    ${SIMDJSON_INCLUDE_DIR}/simdjson/error.h
    ${SIMDJSON_INCLUDE_DIR}/simdjson/implementation.h
-    ${SIMDJSON_INCLUDE_DIR}/simdjson/inline/document.h
+    ${SIMDJSON_INCLUDE_DIR}/simdjson/inline/document_stream.h
    ${SIMDJSON_INCLUDE_DIR}/simdjson/inline/document_iterator.h
+    ${SIMDJSON_INCLUDE_DIR}/simdjson/inline/document.h
    ${SIMDJSON_INCLUDE_DIR}/simdjson/inline/jsonstream.h
    ${SIMDJSON_INCLUDE_DIR}/simdjson/internal/jsonformatutils.h
    ${SIMDJSON_INCLUDE_DIR}/simdjson/jsonioutil.h
--- a/include/simdjson.h
+++ b/include/simdjson.h
@ -9,18 +9,20 @@
 #include "simdjson/padded_string.h"
 #include "simdjson/implementation.h"
 #include "simdjson/document.h"
-#include "simdjson/jsonstream.h"
+#include "simdjson/document_stream.h"
 #include "simdjson/jsonminifier.h"

 // Deprecated API
 #include "simdjson/parsedjsoniterator.h"
 #include "simdjson/jsonparser.h"
 #include "simdjson/parsedjson.h"
+#include "simdjson/jsonstream.h"
 #include "simdjson/document_iterator.h"

 // Inline functions
 #include "simdjson/inline/document.h"
 #include "simdjson/inline/document_iterator.h"
+#include "simdjson/inline/document_stream.h"
 #include "simdjson/inline/jsonstream.h"

 #endif // SIMDJSON_H
--- a/include/simdjson/document.h
+++ b/include/simdjson/document.h
@ -52,11 +52,13 @@ public:
  class object;
  class key_value_pair;
  class parser;
+  class stream;

  template<typename T=element>
  class element_result;
  class doc_result;
  class doc_ref_result;
+  class stream_result;

  // Nested classes. See definitions later in file.
  using iterator = document_iterator<DEFAULT_MAX_DEPTH>;
@ -315,6 +317,7 @@ public:
 private:
  doc_ref_result(document &_doc, error_code _error) noexcept;
  friend class document::parser;
+  friend class document::stream;
 }; // class document::doc_ref_result

 /**
@ -927,6 +930,255 @@ public:
  // We do not want to allow implicit conversion from C string to std::string.
  really_inline doc_ref_result parse(const char *buf) noexcept = delete;

+  /**
+   * Parse a buffer containing many JSON documents.
+   *
+   *   document::parser parser;
+   *   for (const document &doc : parser.parse_many(buf, len)) {
+   *     cout << std::string(doc["title"]) << endl;
+   *   }
+   *
+   * ### Format
+   *
+   * The buffer must contain a series of one or more JSON documents, concatenated into a single
+   * buffer, separated by whitespace. It effectively parses until it has a fully valid document,
+   * then starts parsing the next document at that point. (It does this with more parallelism and
+   * lookahead than you might think, though.)
+   *
+   * documents that consist of an object or array may omit the whitespace between them, concatenating
+   * with no separator. documents that consist of a single primitive (i.e. documents that are not
+   * arrays or objects) MUST be separated with whitespace.
+   *
+   * ### Error Handling
+   *
+   * All errors are returned during iteration: if there is a global error such as memory allocation,
+   * it will be yielded as the first result. Iteration always stops after the first error.
+   *
+   * As with all other simdjson methods, non-exception error handling is readily available through
+   * the same interface, requiring you to check the error before using the document:
+   *
+   *   document::parser parser;
+   *   for (auto [doc, error] : parser.parse_many(buf, len)) {
+   *     if (error) { cerr << error_message(error) << endl; exit(1); }
+   *     cout << std::string(doc["title"]) << endl;
+   *   }
+   *
+   * ### REQUIRED: Buffer Padding
+   *
+   * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what
+   * those bytes are initialized to, as long as they are allocated.
+   *
+   * ### Threads
+   *
+   * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the
+   * hood to do some lookahead.
+   *
+   * ### Parser Capacity
+   *
+   * If the parser is unallocated, it will be auto-allocated to batch_size. If it is already
+   * allocated, it must have a capacity at least as large as batch_size.
+   *
+   * @param buf The concatenated JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes.
+   * @param len The length of the concatenated JSON.
+   * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet
+   *                   spot is cache-related: small enough to fit in cache, yet big enough to
+   *                   parse as many documents as possible in one tight loop.
+   *                   Defaults to 10MB, which has been a reasonable sweet spot in our tests.
+   * @return The stream. If there is an error, it will be returned during iteration. An empty input
+   *         will yield 0 documents rather than an EMPTY error. Errors:
+   *         - MEMALLOC if the parser is unallocated and memory allocation fails
+   *         - CAPACITY if the parser already has a capacity, and it is less than batch_size
+   *         - other json errors if parsing fails.
+   */
+  inline stream parse_many(const uint8_t *buf, size_t len, size_t batch_size = 1000000) noexcept;
+
+  /**
+   * Parse a buffer containing many JSON documents.
+   *
+   *   document::parser parser;
+   *   for (const document &doc : parser.parse_many(buf, len)) {
+   *     cout << std::string(doc["title"]) << endl;
+   *   }
+   *
+   * ### Format
+   *
+   * The buffer must contain a series of one or more JSON documents, concatenated into a single
+   * buffer, separated by whitespace. It effectively parses until it has a fully valid document,
+   * then starts parsing the next document at that point. (It does this with more parallelism and
+   * lookahead than you might think, though.)
+   *
+   * documents that consist of an object or array may omit the whitespace between them, concatenating
+   * with no separator. documents that consist of a single primitive (i.e. documents that are not
+   * arrays or objects) MUST be separated with whitespace.
+   *
+   * ### Error Handling
+   *
+   * All errors are returned during iteration: if there is a global error such as memory allocation,
+   * it will be yielded as the first result. Iteration always stops after the first error.
+   *
+   * As with all other simdjson methods, non-exception error handling is readily available through
+   * the same interface, requiring you to check the error before using the document:
+   *
+   *   document::parser parser;
+   *   for (auto [doc, error] : parser.parse_many(buf, len)) {
+   *     if (error) { cerr << error_message(error) << endl; exit(1); }
+   *     cout << std::string(doc["title"]) << endl;
+   *   }
+   *
+   * ### REQUIRED: Buffer Padding
+   *
+   * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what
+   * those bytes are initialized to, as long as they are allocated.
+   *
+   * ### Threads
+   *
+   * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the
+   * hood to do some lookahead.
+   *
+   * ### Parser Capacity
+   *
+   * If the parser is unallocated, it will be auto-allocated to batch_size. If it is already
+   * allocated, it must have a capacity at least as large as batch_size.
+   *
+   * @param buf The concatenated JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes.
+   * @param len The length of the concatenated JSON.
+   * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet
+   *                   spot is cache-related: small enough to fit in cache, yet big enough to
+   *                   parse as many documents as possible in one tight loop.
+   *                   Defaults to 10MB, which has been a reasonable sweet spot in our tests.
+   * @return The stream. If there is an error, it will be returned during iteration. An empty input
+   *         will yield 0 documents rather than an EMPTY error. Errors:
+   *         - MEMALLOC if the parser is unallocated and memory allocation fails
+   *         - CAPACITY if the parser already has a capacity, and it is less than batch_size
+   *         - other json errors if parsing fails
+   */
+  inline stream parse_many(const char *buf, size_t len, size_t batch_size = 1000000) noexcept;
+
+  /**
+   * Parse a buffer containing many JSON documents.
+   *
+   *   document::parser parser;
+   *   for (const document &doc : parser.parse_many(buf, len)) {
+   *     cout << std::string(doc["title"]) << endl;
+   *   }
+   *
+   * ### Format
+   *
+   * The buffer must contain a series of one or more JSON documents, concatenated into a single
+   * buffer, separated by whitespace. It effectively parses until it has a fully valid document,
+   * then starts parsing the next document at that point. (It does this with more parallelism and
+   * lookahead than you might think, though.)
+   *
+   * documents that consist of an object or array may omit the whitespace between them, concatenating
+   * with no separator. documents that consist of a single primitive (i.e. documents that are not
+   * arrays or objects) MUST be separated with whitespace.
+   *
+   * ### Error Handling
+   *
+   * All errors are returned during iteration: if there is a global error such as memory allocation,
+   * it will be yielded as the first result. Iteration always stops after the first error.
+   *
+   * As with all other simdjson methods, non-exception error handling is readily available through
+   * the same interface, requiring you to check the error before using the document:
+   *
+   *   document::parser parser;
+   *   for (auto [doc, error] : parser.parse_many(buf, len)) {
+   *     if (error) { cerr << error_message(error) << endl; exit(1); }
+   *     cout << std::string(doc["title"]) << endl;
+   *   }
+   *
+   * ### REQUIRED: Buffer Padding
+   *
+   * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what
+   * those bytes are initialized to, as long as they are allocated.
+   *
+   * ### Threads
+   *
+   * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the
+   * hood to do some lookahead.
+   *
+   * ### Parser Capacity
+   *
+   * If the parser is unallocated, it will be auto-allocated to batch_size. If it is already
+   * allocated, it must have a capacity at least as large as batch_size.
+   *
+   * @param s The concatenated JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes.
+   * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet
+   *                   spot is cache-related: small enough to fit in cache, yet big enough to
+   *                   parse as many documents as possible in one tight loop.
+   *                   Defaults to 10MB, which has been a reasonable sweet spot in our tests.
+   * @return he stream. If there is an error, it will be returned during iteration. An empty input
+   *         will yield 0 documents rather than an EMPTY error. Errors:
+   *         - MEMALLOC if the parser is unallocated and memory allocation fails
+   *         - CAPACITY if the parser already has a capacity, and it is less than batch_size
+   *         - other json errors if parsing fails
+   */
+  inline stream parse_many(const std::string &s, size_t batch_size = 1000000) noexcept;
+
+  /**
+   * Parse a buffer containing many JSON documents.
+   *
+   *   document::parser parser;
+   *   for (const document &doc : parser.parse_many(buf, len)) {
+   *     cout << std::string(doc["title"]) << endl;
+   *   }
+   *
+   * ### Format
+   *
+   * The buffer must contain a series of one or more JSON documents, concatenated into a single
+   * buffer, separated by whitespace. It effectively parses until it has a fully valid document,
+   * then starts parsing the next document at that point. (It does this with more parallelism and
+   * lookahead than you might think, though.)
+   *
+   * documents that consist of an object or array may omit the whitespace between them, concatenating
+   * with no separator. documents that consist of a single primitive (i.e. documents that are not
+   * arrays or objects) MUST be separated with whitespace.
+   *
+   * ### Error Handling
+   *
+   * All errors are returned during iteration: if there is a global error such as memory allocation,
+   * it will be yielded as the first result. Iteration always stops after the first error.
+   *
+   * As with all other simdjson methods, non-exception error handling is readily available through
+   * the same interface, requiring you to check the error before using the document:
+   *
+   *   document::parser parser;
+   *   for (auto [doc, error] : parser.parse_many(buf, len)) {
+   *     if (error) { cerr << error_message(error) << endl; exit(1); }
+   *     cout << std::string(doc["title"]) << endl;
+   *   }
+   *
+   * ### REQUIRED: Buffer Padding
+   *
+   * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what
+   * those bytes are initialized to, as long as they are allocated.
+   *
+   * ### Threads
+   *
+   * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the
+   * hood to do some lookahead.
+   *
+   * ### Parser Capacity
+   *
+   * If the parser is unallocated, it will be auto-allocated to batch_size. If it is already
+   * allocated, it must have a capacity at least as large as batch_size.
+   *
+   * @param s The concatenated JSON to parse.
+   * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet
+   *                   spot is cache-related: small enough to fit in cache, yet big enough to
+   *                   parse as many documents as possible in one tight loop.
+   *                   Defaults to 10MB, which has been a reasonable sweet spot in our tests.
+   * @return he stream. If there is an error, it will be returned during iteration. An empty input
+   *         will yield 0 documents rather than an EMPTY error. Errors:
+   *         - MEMALLOC if the parser is unallocated and memory allocation fails
+   *         - CAPACITY if the parser already has a capacity, and it is less than batch_size
+   *         - other json errors if parsing fails
+   */
+  inline stream parse_many(const padded_string &s, size_t batch_size = 1000000) noexcept;
+
+  // We do not want to allow implicit conversion from C string to std::string.
+  really_inline doc_ref_result parse_many(const char *buf, size_t batch_size = 1000000) noexcept = delete;
+
  /**
   * Current capacity: the largest document this parser can support without reallocating.
   */
--- a/include/simdjson/document_stream.h
+++ b/include/simdjson/document_stream.h
@ -0,0 +1,155 @@
+#ifndef SIMDJSON_DOCUMENT_STREAM_H
+#define SIMDJSON_DOCUMENT_STREAM_H
+
+#include <thread>
+#include "simdjson/document.h"
+
+namespace simdjson {
+
+template <class string_container = padded_string> class JsonStream;
+
+/**
+ * A forward-only stream of documents.
+ *
+ * Produced by document::parser::parse_many.
+ *
+ */
+class document::stream {
+public:
+  really_inline ~stream() noexcept;
+
+  /**
+   * Take another stream's buffers and state.
+   *
+   * @param other The stream to take. Its capacity is zeroed.
+   */
+  stream(document::stream &&other) = default;
+  stream(const document::stream &) = delete; // Disallow copying
+  /**
+   * Take another stream's buffers and state.
+   *
+   * @param other The stream to take. Its capacity is zeroed.
+   */
+  stream &operator=(document::stream &&other) = default;
+  stream &operator=(const document::stream &) = delete; // Disallow copying
+
+  /**
+   * An iterator through a forward-only stream of documents.
+   */
+  class iterator {
+  public:
+    /**
+     * Get the current document (or error).
+     */
+    really_inline doc_ref_result operator*() noexcept;
+    /**
+     * Advance to the next document.
+     */
+    inline iterator& operator++() noexcept;
+    /**
+     * Check if we're at the end yet.
+     * @param other the end iterator to compare to.
+     */
+    really_inline bool operator!=(const iterator &other) const noexcept;
+
+  private:
+    iterator(stream& stream, bool finished) noexcept;
+    /** The stream parser we're iterating through. */
+    stream& _stream;
+    /** Whether we're finished or not. */
+    bool finished;
+    friend class stream;
+  };
+
+  /**
+   * Start iterating the documents in the stream.
+   */
+  really_inline iterator begin() noexcept;
+  /**
+   * The end of the stream, for iterator comparison purposes.
+   */
+  really_inline iterator end() noexcept;
+
+private:
+  really_inline stream(document::parser &parser, const uint8_t *buf, size_t len, size_t batch_size = 1000000) noexcept;
+
+  /**
+   * Parse the next document found in the buffer previously given to stream.
+   *
+   * The content should be a valid JSON document encoded as UTF-8. If there is a
+   * UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are
+   * discouraged.
+   *
+   * You do NOT need to pre-allocate a parser.  This function takes care of
+   * pre-allocating a capacity defined by the batch_size defined when creating the
+   * stream object.
+   *
+   * The function returns simdjson::SUCCESS_AND_HAS_MORE (an integer = 1) in case
+   * of success and indicates that the buffer still contains more data to be parsed,
+   * meaning this function can be called again to return the next JSON document
+   * after this one.
+   *
+   * The function returns simdjson::SUCCESS (as integer = 0) in case of success
+   * and indicates that the buffer has successfully been parsed to the end.
+   * Every document it contained has been parsed without error.
+   *
+   * The function returns an error code from simdjson/simdjson.h in case of failure
+   * such as simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth;
+   * the simdjson::error_message function converts these error codes into a string).
+   *
+   * You can also check validity by calling parser.is_valid(). The same parser can
+   * and should be reused for the other documents in the buffer. */
+  inline error_code json_parse() noexcept;
+
+  /**
+   * Returns the location (index) of where the next document should be in the
+   * buffer.
+   * Can be used for debugging, it tells the user the position of the end of the
+   * last
+   * valid JSON document parsed
+   */
+  inline size_t get_current_buffer_loc() const { return current_buffer_loc; }
+
+  /**
+   * Returns the total amount of complete documents parsed by the JsonStream,
+   * in the current buffer, at the given time.
+   */
+  inline size_t get_n_parsed_docs() const { return n_parsed_docs; }
+
+  /**
+   * Returns the total amount of data (in bytes) parsed by the JsonStream,
+   * in the current buffer, at the given time.
+   */
+  inline size_t get_n_bytes_parsed() const { return n_bytes_parsed; }
+
+  inline const uint8_t *buf() const { return _buf + buf_start; }
+
+  inline void advance(size_t offset) { buf_start += offset; }
+
+  inline size_t remaining() const { return _len - buf_start; }
+
+  document::parser &parser;
+  const uint8_t *_buf;
+  const size_t _len;
+  size_t _batch_size; // this is actually variable!
+  size_t buf_start{0};
+  size_t next_json{0};
+  bool load_next_batch{true};
+  size_t current_buffer_loc{0};
+#ifdef SIMDJSON_THREADS_ENABLED
+  size_t last_json_buffer_loc{0};
+#endif
+  size_t n_parsed_docs{0};
+  size_t n_bytes_parsed{0};
+  error_code error{SUCCESS_AND_HAS_MORE};
+#ifdef SIMDJSON_THREADS_ENABLED
+  error_code stage1_is_ok_thread{SUCCESS};
+  std::thread stage_1_thread;
+  document::parser parser_thread;
+#endif
+  template <class string_container> friend class JsonStream;
+  friend class document::parser;
+}; // end of class JsonStream
+
+} // end of namespace simdjson
+#endif // SIMDJSON_DOCUMENT_STREAM_H
--- a/include/simdjson/inline/document.h
+++ b/include/simdjson/inline/document.h
@ -4,6 +4,7 @@
 // Inline implementations go in here.

 #include "simdjson/document.h"
+#include "simdjson/document_stream.h"
 #include "simdjson/implementation.h"
 #include "simdjson/internal/jsonformatutils.h"
 #include <iostream>
@ -515,6 +516,19 @@ really_inline document::doc_ref_result document::parser::parse(const padded_stri
  return parse(s.data(), s.length(), false);
 }

+inline document::stream document::parser::parse_many(const uint8_t *buf, size_t len, size_t batch_size) noexcept {
+  return stream(*this, buf, len, batch_size);
+}
+inline document::stream document::parser::parse_many(const char *buf, size_t len, size_t batch_size) noexcept {
+  return parse_many((const uint8_t *)buf, len, batch_size);
+}
+inline document::stream document::parser::parse_many(const std::string &s, size_t batch_size) noexcept {
+  return parse_many(s.data(), s.length(), batch_size);
+}
+inline document::stream document::parser::parse_many(const padded_string &s, size_t batch_size) noexcept {
+  return parse_many(s.data(), s.length(), batch_size);
+}
+
 really_inline size_t document::parser::capacity() const noexcept {
  return _capacity;
 }
--- a/include/simdjson/inline/document_stream.h
+++ b/include/simdjson/inline/document_stream.h
@ -0,0 +1,288 @@
+#ifndef SIMDJSON_INLINE_DOCUMENT_STREAM_H
+#define SIMDJSON_INLINE_DOCUMENT_STREAM_H
+
+#include "simdjson/jsonstream.h"
+#include <algorithm>
+#include <limits>
+#include <stdexcept>
+#include <thread>
+
+namespace simdjson::internal {
+
+/**
+ * This algorithm is used to quickly identify the buffer position of
+ * the last JSON document inside the current batch.
+ *
+ * It does its work by finding the last pair of structural characters
+ * that represent the end followed by the start of a document.
+ *
+ * Simply put, we iterate over the structural characters, starting from
+ * the end. We consider that we found the end of a JSON document when the
+ * first element of the pair is NOT one of these characters: '{' '[' ';' ','
+ * and when the second element is NOT one of these characters: '}' '}' ';' ','.
+ *
+ * This simple comparison works most of the time, but it does not cover cases
+ * where the batch's structural indexes contain a perfect amount of documents.
+ * In such a case, we do not have access to the structural index which follows
+ * the last document, therefore, we do not have access to the second element in
+ * the pair, and means that we cannot identify the last document. To fix this
+ * issue, we keep a count of the open and closed curly/square braces we found
+ * while searching for the pair. When we find a pair AND the count of open and
+ * closed curly/square braces is the same, we know that we just passed a
+ * complete
+ * document, therefore the last json buffer location is the end of the batch
+ * */
+inline size_t find_last_json_buf_idx(const uint8_t *buf, size_t size, const document::parser &parser) {
+  // this function can be generally useful
+  if (parser.n_structural_indexes == 0)
+    return 0;
+  auto last_i = parser.n_structural_indexes - 1;
+  if (parser.structural_indexes[last_i] == size) {
+    if (last_i == 0)
+      return 0;
+    last_i = parser.n_structural_indexes - 2;
+  }
+  auto arr_cnt = 0;
+  auto obj_cnt = 0;
+  for (auto i = last_i; i > 0; i--) {
+    auto idxb = parser.structural_indexes[i];
+    switch (buf[idxb]) {
+    case ':':
+    case ',':
+      continue;
+    case '}':
+      obj_cnt--;
+      continue;
+    case ']':
+      arr_cnt--;
+      continue;
+    case '{':
+      obj_cnt++;
+      break;
+    case '[':
+      arr_cnt++;
+      break;
+    }
+    auto idxa = parser.structural_indexes[i - 1];
+    switch (buf[idxa]) {
+    case '{':
+    case '[':
+    case ':':
+    case ',':
+      continue;
+    }
+    if (!arr_cnt && !obj_cnt) {
+      return last_i + 1;
+    }
+    return i;
+  }
+  return 0;
+}
+
+// returns true if the provided byte value is an ASCII character
+static inline bool is_ascii(char c) {
+  return ((unsigned char)c) <= 127;
+}
+
+// if the string ends with  UTF-8 values, backtrack 
+// up to the first ASCII character. May return 0.
+static inline size_t trimmed_length_safe_utf8(const char * c, size_t len) {
+  while ((len > 0) and (not is_ascii(c[len - 1]))) {
+    len--;
+  }
+  return len;
+}
+
+} // namespace simdjson::internal
+
+namespace simdjson {
+
+really_inline document::stream::stream(
+  document::parser &_parser,
+  const uint8_t *buf,
+  size_t len,
+  size_t batch_size
+) noexcept : parser{_parser}, _buf{buf}, _len{len}, _batch_size(batch_size) {
+  error = json_parse();
+}
+
+inline document::stream::~stream() noexcept {
+#ifdef SIMDJSON_THREADS_ENABLED
+  if (stage_1_thread.joinable()) {
+    stage_1_thread.join();
+  }
+#endif
+}
+
+really_inline document::stream::iterator document::stream::begin() noexcept {
+  return iterator(*this, false);
+}
+
+really_inline document::stream::iterator document::stream::end() noexcept {
+  return iterator(*this, true);
+}
+
+really_inline document::stream::iterator::iterator(stream& stream, bool _is_end) noexcept
+  : _stream{stream}, finished{_is_end} {
+}
+
+really_inline document::doc_ref_result document::stream::iterator::operator*() noexcept {
+  return doc_ref_result(_stream.parser.doc, _stream.error == SUCCESS_AND_HAS_MORE ? SUCCESS : _stream.error);
+}
+
+really_inline document::stream::iterator& document::stream::iterator::operator++() noexcept {
+  if (_stream.error == SUCCESS_AND_HAS_MORE) {
+    _stream.error = _stream.json_parse();
+  } else {
+    finished = true;
+  }
+  return *this;
+}
+
+really_inline bool document::stream::iterator::operator!=(const document::stream::iterator &other) const noexcept {
+  return finished != other.finished;
+}
+
+#ifdef SIMDJSON_THREADS_ENABLED
+
+// threaded version of json_parse
+// todo: simplify this code further
+inline error_code document::stream::json_parse() noexcept {
+  // TODO we should bump the parser *anytime* capacity is less than batch size, not just 0.
+  if (unlikely(parser.capacity() == 0)) {
+    const bool allocok = parser.allocate_capacity(_batch_size);
+    if (!allocok) {
+      return simdjson::MEMALLOC;
+    }
+  } else if (unlikely(parser.capacity() < _batch_size)) {
+    return simdjson::CAPACITY;
+  }
+  if (unlikely(parser_thread.capacity() < _batch_size)) {
+    const bool allocok_thread = parser_thread.allocate_capacity(_batch_size);
+    if (!allocok_thread) {
+      return simdjson::MEMALLOC;
+    }
+  }
+  if (unlikely(load_next_batch)) {
+    // First time loading
+    if (!stage_1_thread.joinable()) {
+      _batch_size = (std::min)(_batch_size, remaining());
+      _batch_size = internal::trimmed_length_safe_utf8((const char *)buf(), _batch_size);
+      if (_batch_size == 0) {
+        return simdjson::UTF8_ERROR;
+      }
+      auto stage1_is_ok = error_code(simdjson::active_implementation->stage1(buf(), _batch_size, parser, true));
+      if (stage1_is_ok != simdjson::SUCCESS) {
+        return stage1_is_ok;
+      }
+      size_t last_index = internal::find_last_json_buf_idx(buf(), _batch_size, parser);
+      if (last_index == 0) {
+        if (parser.n_structural_indexes == 0) {
+          return simdjson::EMPTY;
+        }
+      } else {
+        parser.n_structural_indexes = last_index + 1;
+      }
+    }
+    // the second thread is running or done.
+    else {
+      stage_1_thread.join();
+      if (stage1_is_ok_thread != simdjson::SUCCESS) {
+        return stage1_is_ok_thread;
+      }
+      std::swap(parser.structural_indexes, parser_thread.structural_indexes);
+      parser.n_structural_indexes = parser_thread.n_structural_indexes;
+      advance(last_json_buffer_loc);
+      n_bytes_parsed += last_json_buffer_loc;
+    }
+    // let us decide whether we will start a new thread
+    if (remaining() - _batch_size > 0) {
+      last_json_buffer_loc =
+          parser.structural_indexes[internal::find_last_json_buf_idx(buf(), _batch_size, parser)];
+      _batch_size = (std::min)(_batch_size, remaining() - last_json_buffer_loc);
+      if (_batch_size > 0) {
+        _batch_size = internal::trimmed_length_safe_utf8(
+            (const char *)(buf() + last_json_buffer_loc), _batch_size);
+        if (_batch_size == 0) {
+          return simdjson::UTF8_ERROR;
+        }
+        // let us capture read-only variables
+        const uint8_t *const b = buf() + last_json_buffer_loc;
+        const size_t bs = _batch_size;
+        // we call the thread on a lambda that will update
+        // this->stage1_is_ok_thread
+        // there is only one thread that may write to this value
+        stage_1_thread = std::thread([this, b, bs] {
+          this->stage1_is_ok_thread = error_code(simdjson::active_implementation->stage1(b, bs, this->parser_thread, true));
+        });
+      }
+    }
+    next_json = 0;
+    load_next_batch = false;
+  } // load_next_batch
+  error_code res = simdjson::active_implementation->stage2(buf(), remaining(), parser, next_json);
+  if (res == simdjson::SUCCESS_AND_HAS_MORE) {
+    n_parsed_docs++;
+    current_buffer_loc = parser.structural_indexes[next_json];
+    load_next_batch = (current_buffer_loc == last_json_buffer_loc);
+  } else if (res == simdjson::SUCCESS) {
+    n_parsed_docs++;
+    if (remaining() > _batch_size) {
+      current_buffer_loc = parser.structural_indexes[next_json - 1];
+      load_next_batch = true;
+      res = simdjson::SUCCESS_AND_HAS_MORE;
+    }
+  }
+  return res;
+}
+
+#else  // SIMDJSON_THREADS_ENABLED
+
+// single-threaded version of json_parse
+inline error_code document::stream::json_parse() noexcept {
+  if (unlikely(parser.capacity() == 0)) {
+    const bool allocok = parser.allocate_capacity(_batch_size);
+    if (!allocok) {
+      return MEMALLOC;
+    }
+  } else if (unlikely(parser.capacity() < _batch_size)) {
+      return CAPACITY;
+  }
+  if (unlikely(load_next_batch)) {
+    advance(current_buffer_loc);
+    n_bytes_parsed += current_buffer_loc;
+    _batch_size = (std::min)(_batch_size, remaining());
+    _batch_size = internal::trimmed_length_safe_utf8((const char *)buf(), _batch_size);
+    auto stage1_is_ok = (error_code)simdjson::active_implementation->stage1(buf(), _batch_size, parser, true);
+    if (stage1_is_ok != simdjson::SUCCESS) {
+      return stage1_is_ok;
+    }
+    size_t last_index = internal::find_last_json_buf_idx(buf(), _batch_size, parser);
+    if (last_index == 0) {
+      if (parser.n_structural_indexes == 0) {
+        return EMPTY;
+      }
+    } else {
+      parser.n_structural_indexes = last_index + 1;
+    }
+    load_next_batch = false;
+  } // load_next_batch
+  error_code res = simdjson::active_implementation->stage2(buf(), remaining(), parser, next_json);
+  if (likely(res == simdjson::SUCCESS_AND_HAS_MORE)) {
+    n_parsed_docs++;
+    current_buffer_loc = parser.structural_indexes[next_json];
+  } else if (res == simdjson::SUCCESS) {
+    n_parsed_docs++;
+    if (remaining() > _batch_size) {
+      current_buffer_loc = parser.structural_indexes[next_json - 1];
+      next_json = 1;
+      load_next_batch = true;
+      res = simdjson::SUCCESS_AND_HAS_MORE;
+    }
+  }
+  return res;
+}
+#endif // SIMDJSON_THREADS_ENABLED
+
+} // end of namespace simdjson
+#endif // SIMDJSON_INLINE_DOCUMENT_STREAM_H
--- a/include/simdjson/inline/jsonstream.h
+++ b/include/simdjson/inline/jsonstream.h
@ -1,262 +1,35 @@
+// TODO Remove this -- deprecated API and files
+
 #ifndef SIMDJSON_INLINE_JSONSTREAM_H
 #define SIMDJSON_INLINE_JSONSTREAM_H

 #include "simdjson/jsonstream.h"
-#include <algorithm>
-#include <limits>
-#include <stdexcept>
-#include <thread>
-
-namespace simdjson::internal {
-
-/* This algorithm is used to quickly identify the buffer position of
- * the last JSON document inside the current batch.
- *
- * It does its work by finding the last pair of structural characters
- * that represent the end followed by the start of a document.
- *
- * Simply put, we iterate over the structural characters, starting from
- * the end. We consider that we found the end of a JSON document when the
- * first element of the pair is NOT one of these characters: '{' '[' ';' ','
- * and when the second element is NOT one of these characters: '}' '}' ';' ','.
- *
- * This simple comparison works most of the time, but it does not cover cases
- * where the batch's structural indexes contain a perfect amount of documents.
- * In such a case, we do not have access to the structural index which follows
- * the last document, therefore, we do not have access to the second element in
- * the pair, and means that we cannot identify the last document. To fix this
- * issue, we keep a count of the open and closed curly/square braces we found
- * while searching for the pair. When we find a pair AND the count of open and
- * closed curly/square braces is the same, we know that we just passed a
- * complete
- * document, therefore the last json buffer location is the end of the batch
- * */
-inline size_t find_last_json_buf_idx(const uint8_t *buf, size_t size, const document::parser &parser) {
-  // this function can be generally useful
-  if (parser.n_structural_indexes == 0)
-    return 0;
-  auto last_i = parser.n_structural_indexes - 1;
-  if (parser.structural_indexes[last_i] == size) {
-    if (last_i == 0)
-      return 0;
-    last_i = parser.n_structural_indexes - 2;
-  }
-  auto arr_cnt = 0;
-  auto obj_cnt = 0;
-  for (auto i = last_i; i > 0; i--) {
-    auto idxb = parser.structural_indexes[i];
-    switch (buf[idxb]) {
-    case ':':
-    case ',':
-      continue;
-    case '}':
-      obj_cnt--;
-      continue;
-    case ']':
-      arr_cnt--;
-      continue;
-    case '{':
-      obj_cnt++;
-      break;
-    case '[':
-      arr_cnt++;
-      break;
-    }
-    auto idxa = parser.structural_indexes[i - 1];
-    switch (buf[idxa]) {
-    case '{':
-    case '[':
-    case ':':
-    case ',':
-      continue;
-    }
-    if (!arr_cnt && !obj_cnt) {
-      return last_i + 1;
-    }
-    return i;
-  }
-  return 0;
-}
-
-// returns true if the provided byte value is an ASCII character
-static inline bool is_ascii(char c) {
-  return ((unsigned char)c) <= 127;
-}
-
-// if the string ends with  UTF-8 values, backtrack 
-// up to the first ASCII character. May return 0.
-static inline size_t trimmed_length_safe_utf8(const char * c, size_t len) {
-  while ((len > 0) and (not is_ascii(c[len - 1]))) {
-    len--;
-  }
-  return len;
-}
-
-} // namespace simdjson::internal
+#include "simdjson/document.h"
+#include "simdjson/document_stream.h"

 namespace simdjson {

 template <class string_container>
-JsonStream<string_container>::JsonStream(const string_container &s,
-                                         size_t batchSize)
-    : str(s), _batch_size(batchSize) {
+inline JsonStream<string_container>::JsonStream(const string_container &s, size_t _batch_size) noexcept
+    : str(s), batch_size(_batch_size) {
 }

-template <class string_container> JsonStream<string_container>::~JsonStream() {
-#ifdef SIMDJSON_THREADS_ENABLED
-  if (stage_1_thread.joinable()) {
-    stage_1_thread.join();
-  }
-#endif
-}
-
-#ifdef SIMDJSON_THREADS_ENABLED
-
-// threaded version of json_parse
-// todo: simplify this code further
 template <class string_container>
-int JsonStream<string_container>::json_parse(document::parser &parser) {
-  if (unlikely(parser.capacity() == 0)) {
-    const bool allocok = parser.allocate_capacity(_batch_size);
-    if (!allocok) {
-      return parser.error = simdjson::MEMALLOC;
-    }
-  } else if (unlikely(parser.capacity() < _batch_size)) {
-    return parser.error = simdjson::CAPACITY;
-  }
-  if (unlikely(parser_thread.capacity() < _batch_size)) {
-    const bool allocok_thread = parser_thread.allocate_capacity(_batch_size);
-    if (!allocok_thread) {
-      return parser.error = simdjson::MEMALLOC;
-    }
-  }
-  if (unlikely(load_next_batch)) {
-    // First time loading
-    if (!stage_1_thread.joinable()) {
-      _batch_size = (std::min)(_batch_size, remaining());
-      _batch_size = internal::trimmed_length_safe_utf8((const char *)buf(), _batch_size);
-      if (_batch_size == 0) {
-        return parser.error = simdjson::UTF8_ERROR;
-      }
-      auto stage1_is_ok = error_code(simdjson::active_implementation->stage1(buf(), _batch_size, parser, true));
-      if (stage1_is_ok != simdjson::SUCCESS) {
-        return parser.error = stage1_is_ok;
-      }
-      size_t last_index = internal::find_last_json_buf_idx(buf(), _batch_size, parser);
-      if (last_index == 0) {
-        if (parser.n_structural_indexes == 0) {
-          return parser.error = simdjson::EMPTY;
-        }
-      } else {
-        parser.n_structural_indexes = last_index + 1;
-      }
-    }
-    // the second thread is running or done.
-    else {
-      stage_1_thread.join();
-      if (stage1_is_ok_thread != simdjson::SUCCESS) {
-        return parser.error = stage1_is_ok_thread;
-      }
-      std::swap(parser.structural_indexes, parser_thread.structural_indexes);
-      parser.n_structural_indexes = parser_thread.n_structural_indexes;
-      advance(last_json_buffer_loc);
-      n_bytes_parsed += last_json_buffer_loc;
-    }
-    // let us decide whether we will start a new thread
-    if (remaining() - _batch_size > 0) {
-      last_json_buffer_loc =
-          parser.structural_indexes[internal::find_last_json_buf_idx(buf(), _batch_size, parser)];
-      _batch_size = (std::min)(_batch_size, remaining() - last_json_buffer_loc);
-      if (_batch_size > 0) {
-        _batch_size = internal::trimmed_length_safe_utf8(
-            (const char *)(buf() + last_json_buffer_loc), _batch_size);
-        if (_batch_size == 0) {
-          return parser.error = simdjson::UTF8_ERROR;
-        }
-        // let us capture read-only variables
-        const uint8_t *const b = buf() + last_json_buffer_loc;
-        const size_t bs = _batch_size;
-        // we call the thread on a lambda that will update
-        // this->stage1_is_ok_thread
-        // there is only one thread that may write to this value
-        stage_1_thread = std::thread([this, b, bs] {
-          this->stage1_is_ok_thread = error_code(simdjson::active_implementation->stage1(b, bs, this->parser_thread, true));
-        });
-      }
-    }
-    next_json = 0;
-    load_next_batch = false;
-  } // load_next_batch
-  int res = simdjson::active_implementation->stage2(buf(), remaining(), parser, next_json);
-  if (res == simdjson::SUCCESS_AND_HAS_MORE) {
-    n_parsed_docs++;
-    current_buffer_loc = parser.structural_indexes[next_json];
-    load_next_batch = (current_buffer_loc == last_json_buffer_loc);
-  } else if (res == simdjson::SUCCESS) {
-    n_parsed_docs++;
-    if (remaining() > _batch_size) {
-      current_buffer_loc = parser.structural_indexes[next_json - 1];
-      load_next_batch = true;
-      res = simdjson::SUCCESS_AND_HAS_MORE;
-    }
-  }
-  return res;
+inline JsonStream<string_container>::~JsonStream() noexcept {
+  if (stream) { delete stream; }
 }

-#else  // SIMDJSON_THREADS_ENABLED
-
-// single-threaded version of json_parse
 template <class string_container>
-int JsonStream<string_container>::json_parse(document::parser &parser) {
-  if (unlikely(parser.capacity() == 0)) {
-    const bool allocok = parser.allocate_capacity(_batch_size);
-    if (!allocok) {
-      parser.valid = false;
-      return parser.error = MEMALLOC;
-    }
-  } else if (unlikely(parser.capacity() < _batch_size)) {
-      parser.valid = false;
-      return parser.error = CAPACITY;
-  }
-  if (unlikely(load_next_batch)) {
-    advance(current_buffer_loc);
-    n_bytes_parsed += current_buffer_loc;
-    _batch_size = (std::min)(_batch_size, remaining());
-    _batch_size = internal::trimmed_length_safe_utf8((const char *)buf(), _batch_size);
-    auto stage1_is_ok = (error_code)simdjson::active_implementation->stage1(buf(), _batch_size, parser, true);
-    if (stage1_is_ok != simdjson::SUCCESS) {
-      parser.valid = false;
-      return parser.error = stage1_is_ok;
-    }
-    size_t last_index = internal::find_last_json_buf_idx(buf(), _batch_size, parser);
-    if (last_index == 0) {
-      if (parser.n_structural_indexes == 0) {
-        parser.valid = false;
-        return parser.error = EMPTY;
-      }
-    } else {
-      parser.n_structural_indexes = last_index + 1;
-    }
-    load_next_batch = false;
-  } // load_next_batch
-  int res = simdjson::active_implementation->stage2(buf(), remaining(), parser, next_json);
-  if (likely(res == simdjson::SUCCESS_AND_HAS_MORE)) {
-    n_parsed_docs++;
-    current_buffer_loc = parser.structural_indexes[next_json];
-  } else if (res == simdjson::SUCCESS) {
-    n_parsed_docs++;
-    if (remaining() > _batch_size) {
-      current_buffer_loc = parser.structural_indexes[next_json - 1];
-      next_json = 1;
-      load_next_batch = true;
-      res = simdjson::SUCCESS_AND_HAS_MORE;
-    }
+inline int JsonStream<string_container>::json_parse(document::parser &parser) noexcept {
+  if (unlikely(stream == nullptr)) {
+    stream = new document::stream(parser, reinterpret_cast<const uint8_t*>(str.data()), str.length(), batch_size);
  } else {
-    printf("E\n");
+    if (&parser != &stream->parser) { return stream->error = TAPE_ERROR; }
+    stream->error = stream->json_parse();
  }
-  return res;
+  return stream->error;
 }
-#endif // SIMDJSON_THREADS_ENABLED

-} // end of namespace simdjson
+} // namespace simdjson
+
 #endif // SIMDJSON_INLINE_JSONSTREAM_H
--- a/include/simdjson/jsonstream.h
+++ b/include/simdjson/jsonstream.h
@ -1,51 +1,46 @@
+// TODO Remove this -- deprecated API and files
+
 #ifndef SIMDJSON_JSONSTREAM_H
 #define SIMDJSON_JSONSTREAM_H

-#include <thread>
+#include "simdjson/document_stream.h"
 #include "simdjson/padded_string.h"
-#include "simdjson/simdjson.h"

 namespace simdjson {

-/*************************************************************************************
- * The main motivation for this piece of software is to achieve maximum speed
- *and offer
+/**
+ * @deprecated use document::stream instead.
+ *
+ * The main motivation for this piece of software is to achieve maximum speed and offer
 * good quality of life while parsing files containing multiple JSON documents.
 *
- * Since we want to offer flexibility and not restrict ourselves to a specific
- *file
- * format, we support any file that contains any valid JSON documents separated
- *by one
+ * Since we want to offer flexibility and not restrict ourselves to a specific file
+ * format, we support any file that contains any valid JSON documents separated by one
 * or more character that is considered a whitespace by the JSON spec.
 * Namely: space, nothing, linefeed, carriage return, horizontal tab.
- * Anything that is not whitespace will be parsed as a JSON document and could
- *lead
+ * Anything that is not whitespace will be parsed as a JSON document and could lead
 * to failure.
 *
- * To offer maximum parsing speed, our implementation processes the data inside
- *the
+ * To offer maximum parsing speed, our implementation processes the data inside the
 * buffer by batches and their size is defined by the parameter "batch_size".
- * By loading data in batches, we can optimize the time spent allocating data in
- *the
+ * By loading data in batches, we can optimize the time spent allocating data in the
 * parser and can also open the possibility of multi-threading.
- * The batch_size must be at least as large as the biggest document in the file,
- *but
+ * The batch_size must be at least as large as the biggest document in the file, but
 * not too large in order to submerge the chached memory.  We found that 1MB is
 * somewhat a sweet spot for now.  Eventually, this batch_size could be fully
 * automated and be optimal at all times.
- ************************************************************************************/
-/**
-*  The template parameter (string_container) must
-* support the data() and size() methods, returning a pointer
-* to a char* and to the number of bytes respectively.
-* The simdjson parser may read up to SIMDJSON_PADDING bytes beyond the end
-* of the string, so if you do not use a padded_string container,
-* you have the responsability to overallocated. If you fail to
-* do so, your software may crash if you cross a page boundary,
-* and you should expect memory checkers to object.
-* Most users should use a simdjson::padded_string.
-*/
-template <class string_container = padded_string> class JsonStream {
+ *
+ *  The template parameter (string_container) must
+ * support the data() and size() methods, returning a pointer
+ * to a char* and to the number of bytes respectively.
+ * The simdjson parser may read up to SIMDJSON_PADDING bytes beyond the end
+ * of the string, so if you do not use a padded_string container,
+ * you have the responsability to overallocated. If you fail to
+ * do so, your software may crash if you cross a page boundary,
+ * and you should expect memory checkers to object.
+ * Most users should use a simdjson::padded_string.
+ */
+template <class string_container> class JsonStream {
 public:
  /* Create a JsonStream object that can be used to parse sequentially the valid
   * JSON documents found in the buffer "buf".
@ -66,9 +61,9 @@ public:
   * get_n_parsed_docs, get_n_bytes_parsed, etc.
   *
   * */
-  JsonStream(const string_container &s, size_t batch_size = 1000000);
+  JsonStream(const string_container &s, size_t _batch_size = 1000000) noexcept;

-  ~JsonStream();
+  ~JsonStream() noexcept;

  /* Parse the next document found in the buffer previously given to JsonStream.

@ -102,48 +97,29 @@ public:
   * You can also check validity by calling parser.is_valid(). The same parser
   can
   * and should be reused for the other documents in the buffer. */
-  int json_parse(document::parser &parser);
+  int json_parse(document::parser &parser) noexcept;

  /* Returns the location (index) of where the next document should be in the
   * buffer.
   * Can be used for debugging, it tells the user the position of the end of the
   * last
   * valid JSON document parsed*/
-  inline size_t get_current_buffer_loc() const { return current_buffer_loc; }
+  inline size_t get_current_buffer_loc() const noexcept { return stream ? stream->current_buffer_loc : 0; }

  /* Returns the total amount of complete documents parsed by the JsonStream,
   * in the current buffer, at the given time.*/
-  inline size_t get_n_parsed_docs() const { return n_parsed_docs; }
+  inline size_t get_n_parsed_docs() const noexcept { return stream ? stream->n_parsed_docs : 0; }

  /* Returns the total amount of data (in bytes) parsed by the JsonStream,
   * in the current buffer, at the given time.*/
-  inline size_t get_n_bytes_parsed() const { return n_bytes_parsed; }
+  inline size_t get_n_bytes_parsed() const noexcept { return stream ? stream->n_bytes_parsed : 0; }

 private:
-  inline const uint8_t *buf() const { return reinterpret_cast<uint8_t*>(str.data()) + str_start; }
-
-  inline void advance(size_t offset) { str_start += offset; }
-
-  inline size_t remaining() const { return str.size() - str_start; }
-
  const string_container &str;
-  size_t _batch_size; // this is actually variable!
-  size_t str_start{0};
-  size_t next_json{0};
-  bool load_next_batch{true};
-  size_t current_buffer_loc{0};
-#ifdef SIMDJSON_THREADS_ENABLED
-  size_t last_json_buffer_loc{0};
-#endif
-  size_t n_parsed_docs{0};
-  size_t n_bytes_parsed{0};
-  simdjson::implementation *stage_parser;
-#ifdef SIMDJSON_THREADS_ENABLED
-  error_code stage1_is_ok_thread{SUCCESS};
-  std::thread stage_1_thread;
-  document::parser parser_thread;
-#endif
+  const size_t batch_size;
+  document::stream *stream{nullptr};
 }; // end of class JsonStream

 } // end of namespace simdjson
+
 #endif // SIMDJSON_JSONSTREAM_H
--- a/include/simdjson/padded_string.h
+++ b/include/simdjson/padded_string.h
@ -91,7 +91,7 @@ struct padded_string final {
  }

  ~padded_string() {
-      aligned_free_char(data_ptr);
+    aligned_free_char(data_ptr);
  }

  size_t size() const  { return viable_size; }
--- a/tests/basictests.cpp
+++ b/tests/basictests.cpp
@ -270,6 +270,7 @@ static bool parse_json_message_issue467(char const* message, std::size_t len, si
 }

 bool json_issue467() {
+    printf("Running json_issue467.\n");
    const char * single_message = "{\"error\":[],\"result\":{\"token\":\"xxx\"}}";
    const char* two_messages = "{\"error\":[],\"result\":{\"token\":\"xxx\"}}{\"error\":[],\"result\":{\"token\":\"xxx\"}}";

@ -393,8 +394,8 @@ bool navigate_test() {
 }

 // returns true if successful
-bool stream_utf8_test() {
-  printf("Running stream_utf8_test");
+bool JsonStream_utf8_test() {
+  printf("Running JsonStream_utf8_test");
  fflush(NULL);
  const size_t n_records = 10000;
  std::string data;
@ -406,11 +407,11 @@ bool stream_utf8_test() {
                     i, i, (i % 2) ? "⺃" : "⺕", i % 10, i % 10);
    data += std::string(buf, n);
  }
-  for(size_t i = 1000; i < 2000; i += (i>1050?10:1)) {
+  for(size_t batch_size = 1000; batch_size < 2000; batch_size += (batch_size>1050?10:1)) {
    printf(".");
    fflush(NULL);
    simdjson::padded_string str(data);
-    simdjson::JsonStream<simdjson::padded_string> js{str, i};
+    simdjson::JsonStream<simdjson::padded_string> js{str, batch_size};
    int parse_res = simdjson::SUCCESS_AND_HAS_MORE;
    size_t count = 0;
    simdjson::document::parser parser;
@ -446,7 +447,7 @@ bool stream_utf8_test() {
      count++;
    }
    if(count != n_records) {
-      printf("Something is wrong in stream_test at window size = %zu.\n", i);
+      printf("Something is wrong in JsonStream_utf8_test at window size = %zu.\n", batch_size);
      return false;
    }
  }
@ -455,8 +456,8 @@ bool stream_utf8_test() {
 }

 // returns true if successful
-bool stream_test() {
-  printf("Running stream_test");
+bool JsonStream_test() {
+  printf("Running JsonStream_test");
  fflush(NULL);
  const size_t n_records = 10000;
  std::string data;
@ -468,11 +469,11 @@ bool stream_test() {
                     i, i, (i % 2) ? "homme" : "femme", i % 10, i % 10);
    data += std::string(buf, n);
  }
-  for(size_t i = 1000; i < 2000; i += (i>1050?10:1)) {
+  for(size_t batch_size = 1000; batch_size < 2000; batch_size += (batch_size>1050?10:1)) {
    printf(".");
    fflush(NULL);
    simdjson::padded_string str(data);
-    simdjson::JsonStream<simdjson::padded_string> js{str, i};
+    simdjson::JsonStream<simdjson::padded_string> js{str, batch_size};
    int parse_res = simdjson::SUCCESS_AND_HAS_MORE;
    size_t count = 0;
    simdjson::document::parser parser;
@ -508,7 +509,55 @@ bool stream_test() {
      count++;
    }
    if(count != n_records) {
-      printf("Something is wrong in stream_test at window size = %zu.\n", i);
+      printf("Something is wrong in JsonStream_test at window size = %zu.\n", batch_size);
+      return false;
+    }
+  }
+  printf("ok\n");
+  return true;
+}
+
+// returns true if successful
+bool document_stream_test() {
+  printf("Running document_stream_test");
+  fflush(NULL);
+  const size_t n_records = 10000;
+  std::string data;
+  char buf[1024];
+  for (size_t i = 0; i < n_records; ++i) {
+    auto n = sprintf(buf,
+                     "{\"id\": %zu, \"name\": \"name%zu\", \"gender\": \"%s\", "
+                     "\"ete\": {\"id\": %zu, \"name\": \"eventail%zu\"}}",
+                     i, i, (i % 2) ? "homme" : "femme", i % 10, i % 10);
+    data += std::string(buf, n);
+  }
+  for(size_t batch_size = 1000; batch_size < 2000; batch_size += (batch_size>1050?10:1)) {
+    printf(".");
+    fflush(NULL);
+    simdjson::padded_string str(data);
+    simdjson::document::parser parser;
+    size_t count = 0;
+    for (auto [doc, error] : parser.parse_many(str, batch_size)) {
+      if (error) {
+        printf("Error at on document %zd at batch size %zu: %s\n", count, batch_size, simdjson::error_message(error).c_str());
+        return false;
+      }
+
+      auto [keyid, error2] = doc["id"].as_int64_t();
+      if (error2) {
+        printf("Error getting id as int64 on document %zd at batch size %zu: %s\n", count, batch_size, simdjson::error_message(error2).c_str());
+        return false;
+      }
+
+      if (keyid != int64_t(count)) {
+        printf("key does not match %ld, expected %zd on document %zd at batch size %zu\n", keyid, count, count, batch_size);
+        return false;
+      }
+
+      count++;
+    }
+    if(count != n_records) {
+      printf("Found wrong number of documents %zd, expected %zd at batch size %zu\n", count, n_records, batch_size);
      return false;
    }
  }
@ -781,9 +830,11 @@ int main() {
  std::cout << "Running basic tests." << std::endl;
  if(!json_issue467())
    return EXIT_FAILURE;
-  if(!stream_test())
+  if(!JsonStream_test())
    return EXIT_FAILURE;
-  if(!stream_utf8_test())
+  if(!JsonStream_utf8_test())
+    return EXIT_FAILURE;
+  if(!document_stream_test())
    return EXIT_FAILURE;
  if(!number_test_small_integers())
    return EXIT_FAILURE;
--- a/tests/readme_examples.cpp
+++ b/tests/readme_examples.cpp
@ -57,6 +57,33 @@ void parser_parse() {
  }
 }

+void parser_parse_many_error_code() {
+  cout << __func__ << endl;
+
+  // Read files with the parser
+  padded_string json = string("[1, 2, 3] true [ true, false ]");
+  cout << "Parsing " << json.data() << " ..." << endl;
+  document::parser parser;
+  for (auto [doc, error] : parser.parse_many(json)) {
+    if (error) { cerr << "Error: " << error_message(error) << endl; exit(1); }
+    if (!doc.print_json(cout)) { exit(1); }
+    cout << endl;
+  }
+}
+
+void parser_parse_many_exception() {
+  cout << __func__ << endl;
+
+  // Read files with the parser
+  padded_string json = string("[1, 2, 3] true [ true, false ]");
+  cout << "Parsing " << json.data() << " ..." << endl;
+  document::parser parser;
+  for (const document &doc : parser.parse_many(json)) {
+    if (!doc.print_json(cout)) { exit(1); }
+    cout << endl;
+  }
+}
+
 int main() {
  cout << "Running examples." << endl;
  document_parse_error_code();
@ -64,6 +91,8 @@ int main() {
  document_parse_padded_string();
  document_parse_get_corpus();
  parser_parse();
+  parser_parse_many_error_code();
+  parser_parse_many_exception();
  cout << "Ran to completion!" << endl;
  return 0;
 }