Document json_iterator

This commit is contained in:
John Keiser 2020-08-12 10:06:58 -07:00
parent 24f5936cbf
commit 74c47995a3
1 changed files with 85 additions and 13 deletions

View File

@ -11,22 +11,94 @@ public:
dom_parser_implementation &dom_parser; dom_parser_implementation &dom_parser;
uint32_t depth{0}; uint32_t depth{0};
/**
* Walk the JSON document.
*
* The visitor receives callbacks when values are encountered. All callbacks pass the iterator as
* the first parameter; some callbacks have other parameters as well:
*
* - visit_document_start() - at the beginning.
* - visit_document_end() - at the end (if things were successful).
*
* - visit_array_start() - at the start `[` of a non-empty array.
* - visit_array_end() - at the end `]` of a non-empty array.
* - visit_empty_array() - when an empty array is encountered.
*
* - visit_object_end() - at the start `]` of a non-empty object.
* - visit_object_start() - at the end `]` of a non-empty object.
* - visit_empty_object() - when an empty object is encountered.
* - visit_key(const uint8_t *key) - when a key in an object field is encountered. key is
* guaranteed to point at the first quote of the string (`"key"`).
* - visit_primitive(const uint8_t *value) - when a value is a string, number, boolean or null.
* - visit_root_primitive(iter, uint8_t *value) - when the top-level value is a string, number, boolean or null.
*
* - increment_count(iter) - each time a value is found in an array or object.
*/
template<bool STREAMING, typename V> template<bool STREAMING, typename V>
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code walk_document(V &visitor) noexcept; SIMDJSON_WARN_UNUSED simdjson_really_inline error_code walk_document(V &visitor) noexcept;
/**
* Create an iterator capable of walking a JSON document.
*
* The document must have already passed through stage 1.
*/
simdjson_really_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); simdjson_really_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index);
// Get the buffer position of the current structural character /**
simdjson_really_inline char peek_next_char() const noexcept; * Look at the next token.
*
* Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)).
*
* They may include invalid JSON as well (such as `1.2.3` or `ture`).
*/
simdjson_really_inline const uint8_t *peek() const noexcept;
/**
* Advance to the next token.
*
* Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)).
*
* They may include invalid JSON as well (such as `1.2.3` or `ture`).
*/
simdjson_really_inline const uint8_t *advance() noexcept; simdjson_really_inline const uint8_t *advance() noexcept;
/**
* Get the remaining length of the document, from the start of the current token.
*/
simdjson_really_inline size_t remaining_len() const noexcept; simdjson_really_inline size_t remaining_len() const noexcept;
simdjson_really_inline bool at_end() const noexcept; /**
* Check if we are at the end of the document.
*
* If this is true, there are no more tokens.
*/
simdjson_really_inline bool at_eof() const noexcept;
/**
* Check if we are at the beginning of the document.
*/
simdjson_really_inline bool at_beginning() const noexcept; simdjson_really_inline bool at_beginning() const noexcept;
simdjson_really_inline uint8_t last_structural() const noexcept; simdjson_really_inline uint8_t last_structural() const noexcept;
/**
* Log that a value has been found.
*
* Set ENABLE_LOGGING=true in logger.h to see logging.
*/
simdjson_really_inline void log_value(const char *type) const noexcept; simdjson_really_inline void log_value(const char *type) const noexcept;
/**
* Log the start of a multipart value.
*
* Set ENABLE_LOGGING=true in logger.h to see logging.
*/
simdjson_really_inline void log_start_value(const char *type) const noexcept; simdjson_really_inline void log_start_value(const char *type) const noexcept;
/**
* Log the end of a multipart value.
*
* Set ENABLE_LOGGING=true in logger.h to see logging.
*/
simdjson_really_inline void log_end_value(const char *type) const noexcept; simdjson_really_inline void log_end_value(const char *type) const noexcept;
/**
* Log an error.
*
* Set ENABLE_LOGGING=true in logger.h to see logging.
*/
simdjson_really_inline void log_error(const char *error) const noexcept; simdjson_really_inline void log_error(const char *error) const noexcept;
}; };
@ -37,7 +109,7 @@ SIMDJSON_WARN_UNUSED simdjson_really_inline error_code json_iterator::walk_docum
// //
// Start the document // Start the document
// //
if (at_end()) { return EMPTY; } if (at_eof()) { return EMPTY; }
SIMDJSON_TRY( visitor.visit_document_start(*this) ); SIMDJSON_TRY( visitor.visit_document_start(*this) );
// //
@ -64,8 +136,8 @@ SIMDJSON_WARN_UNUSED simdjson_really_inline error_code json_iterator::walk_docum
} }
switch (*value) { switch (*value) {
case '{': if (peek_next_char() == '}') { advance(); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; case '{': if (*peek() == '}') { advance(); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin;
case '[': if (peek_next_char() == ']') { advance(); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; case '[': if (*peek() == ']') { advance(); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin;
default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break; default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break;
} }
} }
@ -92,8 +164,8 @@ object_field:
{ {
auto value = advance(); auto value = advance();
switch (*value) { switch (*value) {
case '{': if (peek_next_char() == '}') { advance(); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; case '{': if (*peek() == '}') { advance(); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin;
case '[': if (peek_next_char() == ']') { advance(); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; case '[': if (*peek() == ']') { advance(); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin;
default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break;
} }
} }
@ -132,8 +204,8 @@ array_value:
{ {
auto value = advance(); auto value = advance();
switch (*value) { switch (*value) {
case '{': if (peek_next_char() == '}') { advance(); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; case '{': if (*peek() == '}') { advance(); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin;
case '[': if (peek_next_char() == ']') { advance(); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; case '[': if (*peek() == ']') { advance(); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin;
default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break;
} }
} }
@ -166,8 +238,8 @@ simdjson_really_inline json_iterator::json_iterator(dom_parser_implementation &_
dom_parser{_dom_parser} { dom_parser{_dom_parser} {
} }
simdjson_really_inline char json_iterator::peek_next_char() const noexcept { simdjson_really_inline const uint8_t *json_iterator::peek() const noexcept {
return buf[*(next_structural)]; return &buf[*(next_structural)];
} }
simdjson_really_inline const uint8_t *json_iterator::advance() noexcept { simdjson_really_inline const uint8_t *json_iterator::advance() noexcept {
return &buf[*(next_structural++)]; return &buf[*(next_structural++)];
@ -176,7 +248,7 @@ simdjson_really_inline size_t json_iterator::remaining_len() const noexcept {
return dom_parser.len - *(next_structural-1); return dom_parser.len - *(next_structural-1);
} }
simdjson_really_inline bool json_iterator::at_end() const noexcept { simdjson_really_inline bool json_iterator::at_eof() const noexcept {
return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes];
} }
simdjson_really_inline bool json_iterator::at_beginning() const noexcept { simdjson_really_inline bool json_iterator::at_beginning() const noexcept {