diff --git a/src/generic/stage2/json_iterator.h b/src/generic/stage2/json_iterator.h index eadca6fd..6806a207 100644 --- a/src/generic/stage2/json_iterator.h +++ b/src/generic/stage2/json_iterator.h @@ -11,22 +11,94 @@ public: dom_parser_implementation &dom_parser; uint32_t depth{0}; + /** + * Walk the JSON document. + * + * The visitor receives callbacks when values are encountered. All callbacks pass the iterator as + * the first parameter; some callbacks have other parameters as well: + * + * - visit_document_start() - at the beginning. + * - visit_document_end() - at the end (if things were successful). + * + * - visit_array_start() - at the start `[` of a non-empty array. + * - visit_array_end() - at the end `]` of a non-empty array. + * - visit_empty_array() - when an empty array is encountered. + * + * - visit_object_end() - at the start `]` of a non-empty object. + * - visit_object_start() - at the end `]` of a non-empty object. + * - visit_empty_object() - when an empty object is encountered. + * - visit_key(const uint8_t *key) - when a key in an object field is encountered. key is + * guaranteed to point at the first quote of the string (`"key"`). + * - visit_primitive(const uint8_t *value) - when a value is a string, number, boolean or null. + * - visit_root_primitive(iter, uint8_t *value) - when the top-level value is a string, number, boolean or null. + * + * - increment_count(iter) - each time a value is found in an array or object. + */ template SIMDJSON_WARN_UNUSED simdjson_really_inline error_code walk_document(V &visitor) noexcept; + /** + * Create an iterator capable of walking a JSON document. + * + * The document must have already passed through stage 1. + */ simdjson_really_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); - // Get the buffer position of the current structural character - simdjson_really_inline char peek_next_char() const noexcept; + /** + * Look at the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_really_inline const uint8_t *peek() const noexcept; + /** + * Advance to the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ simdjson_really_inline const uint8_t *advance() noexcept; + /** + * Get the remaining length of the document, from the start of the current token. + */ simdjson_really_inline size_t remaining_len() const noexcept; - simdjson_really_inline bool at_end() const noexcept; + /** + * Check if we are at the end of the document. + * + * If this is true, there are no more tokens. + */ + simdjson_really_inline bool at_eof() const noexcept; + /** + * Check if we are at the beginning of the document. + */ simdjson_really_inline bool at_beginning() const noexcept; simdjson_really_inline uint8_t last_structural() const noexcept; + /** + * Log that a value has been found. + * + * Set ENABLE_LOGGING=true in logger.h to see logging. + */ simdjson_really_inline void log_value(const char *type) const noexcept; + /** + * Log the start of a multipart value. + * + * Set ENABLE_LOGGING=true in logger.h to see logging. + */ simdjson_really_inline void log_start_value(const char *type) const noexcept; + /** + * Log the end of a multipart value. + * + * Set ENABLE_LOGGING=true in logger.h to see logging. + */ simdjson_really_inline void log_end_value(const char *type) const noexcept; + /** + * Log an error. + * + * Set ENABLE_LOGGING=true in logger.h to see logging. + */ simdjson_really_inline void log_error(const char *error) const noexcept; }; @@ -37,7 +109,7 @@ SIMDJSON_WARN_UNUSED simdjson_really_inline error_code json_iterator::walk_docum // // Start the document // - if (at_end()) { return EMPTY; } + if (at_eof()) { return EMPTY; } SIMDJSON_TRY( visitor.visit_document_start(*this) ); // @@ -64,8 +136,8 @@ SIMDJSON_WARN_UNUSED simdjson_really_inline error_code json_iterator::walk_docum } switch (*value) { - case '{': if (peek_next_char() == '}') { advance(); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; - case '[': if (peek_next_char() == ']') { advance(); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + case '{': if (*peek() == '}') { advance(); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break; } } @@ -92,8 +164,8 @@ object_field: { auto value = advance(); switch (*value) { - case '{': if (peek_next_char() == '}') { advance(); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; - case '[': if (peek_next_char() == ']') { advance(); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + case '{': if (*peek() == '}') { advance(); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; } } @@ -132,8 +204,8 @@ array_value: { auto value = advance(); switch (*value) { - case '{': if (peek_next_char() == '}') { advance(); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; - case '[': if (peek_next_char() == ']') { advance(); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + case '{': if (*peek() == '}') { advance(); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; } } @@ -166,8 +238,8 @@ simdjson_really_inline json_iterator::json_iterator(dom_parser_implementation &_ dom_parser{_dom_parser} { } -simdjson_really_inline char json_iterator::peek_next_char() const noexcept { - return buf[*(next_structural)]; +simdjson_really_inline const uint8_t *json_iterator::peek() const noexcept { + return &buf[*(next_structural)]; } simdjson_really_inline const uint8_t *json_iterator::advance() noexcept { return &buf[*(next_structural++)]; @@ -176,7 +248,7 @@ simdjson_really_inline size_t json_iterator::remaining_len() const noexcept { return dom_parser.len - *(next_structural-1); } -simdjson_really_inline bool json_iterator::at_end() const noexcept { +simdjson_really_inline bool json_iterator::at_eof() const noexcept { return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; } simdjson_really_inline bool json_iterator::at_beginning() const noexcept {