Separate interface from implementation to make interface clearer
This commit is contained in:
parent
15eb1ad922
commit
bdfa8aca28
|
@ -14,52 +14,20 @@ public:
|
||||||
template<bool STREAMING, typename V>
|
template<bool STREAMING, typename V>
|
||||||
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code walk_document(V &visitor) noexcept;
|
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code walk_document(V &visitor) noexcept;
|
||||||
|
|
||||||
// Start a structural
|
simdjson_really_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index);
|
||||||
simdjson_really_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index)
|
|
||||||
: buf{_dom_parser.buf},
|
|
||||||
next_structural{&_dom_parser.structural_indexes[start_structural_index]},
|
|
||||||
dom_parser{_dom_parser} {
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get the buffer position of the current structural character
|
// Get the buffer position of the current structural character
|
||||||
simdjson_really_inline char peek_next_char() {
|
simdjson_really_inline char peek_next_char() const noexcept;
|
||||||
return buf[*(next_structural)];
|
simdjson_really_inline const uint8_t *advance() noexcept;
|
||||||
}
|
simdjson_really_inline size_t remaining_len() const noexcept;
|
||||||
simdjson_really_inline const uint8_t *advance() {
|
simdjson_really_inline bool at_end() const noexcept;
|
||||||
return &buf[*(next_structural++)];
|
simdjson_really_inline bool at_beginning() const noexcept;
|
||||||
}
|
simdjson_really_inline uint8_t last_structural() const noexcept;
|
||||||
simdjson_really_inline size_t remaining_len() {
|
|
||||||
return dom_parser.len - *(next_structural-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
simdjson_really_inline bool at_end() {
|
simdjson_really_inline void log_value(const char *type) const noexcept;
|
||||||
return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes];
|
simdjson_really_inline void log_start_value(const char *type) const noexcept;
|
||||||
}
|
simdjson_really_inline void log_end_value(const char *type) const noexcept;
|
||||||
simdjson_really_inline bool at_beginning() {
|
simdjson_really_inline void log_error(const char *error) const noexcept;
|
||||||
return next_structural == dom_parser.structural_indexes.get();
|
|
||||||
}
|
|
||||||
|
|
||||||
simdjson_really_inline void log_value(const char *type) {
|
|
||||||
logger::log_line(*this, "", type, "");
|
|
||||||
}
|
|
||||||
|
|
||||||
simdjson_really_inline void log_start_value(const char *type) {
|
|
||||||
logger::log_line(*this, "+", type, "");
|
|
||||||
if (logger::LOG_ENABLED) { logger::log_depth++; }
|
|
||||||
}
|
|
||||||
|
|
||||||
simdjson_really_inline void log_end_value(const char *type) {
|
|
||||||
if (logger::LOG_ENABLED) { logger::log_depth--; }
|
|
||||||
logger::log_line(*this, "-", type, "");
|
|
||||||
}
|
|
||||||
|
|
||||||
simdjson_really_inline void log_error(const char *error) {
|
|
||||||
logger::log_line(*this, "", "ERROR", error);
|
|
||||||
}
|
|
||||||
|
|
||||||
simdjson_really_inline uint8_t last_structural() {
|
|
||||||
return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]];
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template<bool STREAMING, typename V>
|
template<bool STREAMING, typename V>
|
||||||
|
@ -190,6 +158,50 @@ document_end:
|
||||||
|
|
||||||
} // walk_document()
|
} // walk_document()
|
||||||
|
|
||||||
|
simdjson_really_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index)
|
||||||
|
: buf{_dom_parser.buf},
|
||||||
|
next_structural{&_dom_parser.structural_indexes[start_structural_index]},
|
||||||
|
dom_parser{_dom_parser} {
|
||||||
|
}
|
||||||
|
|
||||||
|
simdjson_really_inline char json_iterator::peek_next_char() const noexcept {
|
||||||
|
return buf[*(next_structural)];
|
||||||
|
}
|
||||||
|
simdjson_really_inline const uint8_t *json_iterator::advance() noexcept {
|
||||||
|
return &buf[*(next_structural++)];
|
||||||
|
}
|
||||||
|
simdjson_really_inline size_t json_iterator::remaining_len() const noexcept {
|
||||||
|
return dom_parser.len - *(next_structural-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
simdjson_really_inline bool json_iterator::at_end() const noexcept {
|
||||||
|
return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes];
|
||||||
|
}
|
||||||
|
simdjson_really_inline bool json_iterator::at_beginning() const noexcept {
|
||||||
|
return next_structural == dom_parser.structural_indexes.get();
|
||||||
|
}
|
||||||
|
simdjson_really_inline uint8_t json_iterator::last_structural() const noexcept {
|
||||||
|
return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]];
|
||||||
|
}
|
||||||
|
|
||||||
|
simdjson_really_inline void json_iterator::log_value(const char *type) const noexcept {
|
||||||
|
logger::log_line(*this, "", type, "");
|
||||||
|
}
|
||||||
|
|
||||||
|
simdjson_really_inline void json_iterator::log_start_value(const char *type) const noexcept {
|
||||||
|
logger::log_line(*this, "+", type, "");
|
||||||
|
if (logger::LOG_ENABLED) { logger::log_depth++; }
|
||||||
|
}
|
||||||
|
|
||||||
|
simdjson_really_inline void json_iterator::log_end_value(const char *type) const noexcept {
|
||||||
|
if (logger::LOG_ENABLED) { logger::log_depth--; }
|
||||||
|
logger::log_line(*this, "-", type, "");
|
||||||
|
}
|
||||||
|
|
||||||
|
simdjson_really_inline void json_iterator::log_error(const char *error) const noexcept {
|
||||||
|
logger::log_line(*this, "", "ERROR", error);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace stage2
|
} // namespace stage2
|
||||||
} // namespace SIMDJSON_IMPLEMENTATION
|
} // namespace SIMDJSON_IMPLEMENTATION
|
||||||
} // unnamed namespace
|
} // unnamed namespace
|
||||||
|
|
|
@ -9,99 +9,26 @@ namespace stage2 {
|
||||||
struct tape_builder {
|
struct tape_builder {
|
||||||
template<bool STREAMING>
|
template<bool STREAMING>
|
||||||
SIMDJSON_WARN_UNUSED static simdjson_really_inline error_code parse_document(
|
SIMDJSON_WARN_UNUSED static simdjson_really_inline error_code parse_document(
|
||||||
dom_parser_implementation &dom_parser,
|
dom_parser_implementation &dom_parser,
|
||||||
dom::document &doc) noexcept {
|
dom::document &doc) noexcept;
|
||||||
dom_parser.doc = &doc;
|
|
||||||
json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0);
|
|
||||||
tape_builder builder(doc);
|
|
||||||
return iter.walk_document<STREAMING>(builder);
|
|
||||||
}
|
|
||||||
|
|
||||||
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) {
|
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept;
|
||||||
switch (*value) {
|
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept;
|
||||||
case '"': return visit_string(iter, value);
|
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code visit_empty_object(json_iterator &iter) noexcept;
|
||||||
case 't': return visit_root_true_atom(iter, value);
|
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code visit_empty_array(json_iterator &iter) noexcept;
|
||||||
case 'f': return visit_root_false_atom(iter, value);
|
|
||||||
case 'n': return visit_root_null_atom(iter, value);
|
|
||||||
case '-':
|
|
||||||
case '0': case '1': case '2': case '3': case '4':
|
|
||||||
case '5': case '6': case '7': case '8': case '9':
|
|
||||||
return visit_root_number(iter, value);
|
|
||||||
default:
|
|
||||||
iter.log_error("Document starts with a non-value character");
|
|
||||||
return TAPE_ERROR;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) {
|
|
||||||
switch (*value) {
|
|
||||||
case '"': return visit_string(iter, value);
|
|
||||||
case 't': return visit_true_atom(iter, value);
|
|
||||||
case 'f': return visit_false_atom(iter, value);
|
|
||||||
case 'n': return visit_null_atom(iter, value);
|
|
||||||
case '-':
|
|
||||||
case '0': case '1': case '2': case '3': case '4':
|
|
||||||
case '5': case '6': case '7': case '8': case '9':
|
|
||||||
return visit_number(iter, value);
|
|
||||||
default:
|
|
||||||
iter.log_error("Non-value found when value was expected!");
|
|
||||||
return TAPE_ERROR;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code visit_empty_object(json_iterator &iter) {
|
|
||||||
iter.log_value("empty object");
|
|
||||||
return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT);
|
|
||||||
}
|
|
||||||
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code visit_empty_array(json_iterator &iter) {
|
|
||||||
iter.log_value("empty array");
|
|
||||||
return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY);
|
|
||||||
}
|
|
||||||
|
|
||||||
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code visit_document_start(json_iterator &iter) {
|
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code visit_document_start(json_iterator &iter) noexcept;
|
||||||
iter.log_start_value("document");
|
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code visit_object_start(json_iterator &iter) noexcept;
|
||||||
start_container(iter);
|
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code visit_array_start(json_iterator &iter) noexcept;
|
||||||
iter.dom_parser.is_array[iter.depth] = false;
|
|
||||||
return SUCCESS;
|
|
||||||
}
|
|
||||||
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code visit_object_start(json_iterator &iter) {
|
|
||||||
iter.log_start_value("object");
|
|
||||||
start_container(iter);
|
|
||||||
iter.dom_parser.is_array[iter.depth] = false;
|
|
||||||
return SUCCESS;
|
|
||||||
}
|
|
||||||
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code visit_array_start(json_iterator &iter) {
|
|
||||||
iter.log_start_value("array");
|
|
||||||
start_container(iter);
|
|
||||||
iter.dom_parser.is_array[iter.depth] = true;
|
|
||||||
return SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code visit_object_end(json_iterator &iter) {
|
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code visit_object_end(json_iterator &iter) noexcept;
|
||||||
iter.log_end_value("object");
|
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code visit_array_end(json_iterator &iter) noexcept;
|
||||||
return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT);
|
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code visit_document_end(json_iterator &iter) noexcept;
|
||||||
}
|
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept;
|
||||||
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code visit_array_end(json_iterator &iter) {
|
|
||||||
iter.log_end_value("array");
|
|
||||||
return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY);
|
|
||||||
}
|
|
||||||
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code visit_document_end(json_iterator &iter) {
|
|
||||||
iter.log_end_value("document");
|
|
||||||
constexpr uint32_t start_tape_index = 0;
|
|
||||||
tape.append(start_tape_index, internal::tape_type::ROOT);
|
|
||||||
tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT);
|
|
||||||
return SUCCESS;
|
|
||||||
}
|
|
||||||
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code visit_key(json_iterator &iter, const uint8_t *key) {
|
|
||||||
return visit_string(iter, key, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
// increment_count increments the count of keys in an object or values in an array.
|
// increment_count increments the count of keys in an object or values in an array.
|
||||||
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code increment_count(json_iterator &iter) {
|
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code increment_count(json_iterator &iter) noexcept;
|
||||||
iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1
|
simdjson_really_inline bool in_array(json_iterator &iter) noexcept;
|
||||||
return SUCCESS;
|
|
||||||
}
|
|
||||||
simdjson_really_inline bool in_array(json_iterator &iter) noexcept {
|
|
||||||
return iter.dom_parser.is_array[iter.depth];
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/** Next location to write to tape */
|
/** Next location to write to tape */
|
||||||
|
@ -109,143 +36,255 @@ private:
|
||||||
/** Next write location in the string buf for stage 2 parsing */
|
/** Next write location in the string buf for stage 2 parsing */
|
||||||
uint8_t *current_string_buf_loc;
|
uint8_t *current_string_buf_loc;
|
||||||
|
|
||||||
simdjson_really_inline tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {}
|
simdjson_really_inline tape_builder(dom::document &doc) noexcept;
|
||||||
|
|
||||||
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) {
|
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept;
|
||||||
iter.log_value(key ? "key" : "string");
|
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept;
|
||||||
uint8_t *dst = on_start_string(iter);
|
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept;
|
||||||
dst = stringparsing::parse_string(value, dst);
|
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept;
|
||||||
if (dst == nullptr) {
|
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept;
|
||||||
iter.log_error("Invalid escape in string");
|
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept;
|
||||||
return STRING_ERROR;
|
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept;
|
||||||
}
|
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept;
|
||||||
on_end_string(dst);
|
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept;
|
||||||
return SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code visit_number(json_iterator &iter, const uint8_t *value) {
|
simdjson_really_inline uint32_t next_tape_index(json_iterator &iter) const noexcept;
|
||||||
iter.log_value("number");
|
simdjson_really_inline void start_container(json_iterator &iter) noexcept;
|
||||||
if (!numberparsing::parse_number(value, tape)) { iter.log_error("Invalid number"); return NUMBER_ERROR; }
|
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept;
|
||||||
return SUCCESS;
|
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept;
|
||||||
}
|
simdjson_really_inline uint8_t *on_start_string(json_iterator &iter) noexcept;
|
||||||
|
simdjson_really_inline void on_end_string(uint8_t *dst) noexcept;
|
||||||
|
}; // class tape_builder
|
||||||
|
|
||||||
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) {
|
template<bool STREAMING>
|
||||||
//
|
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code tape_builder::parse_document(
|
||||||
// We need to make a copy to make sure that the string is space terminated.
|
dom_parser_implementation &dom_parser,
|
||||||
// This is not about padding the input, which should already padded up
|
dom::document &doc) noexcept {
|
||||||
// to len + SIMDJSON_PADDING. However, we have no control at this stage
|
dom_parser.doc = &doc;
|
||||||
// on how the padding was done. What if the input string was padded with nulls?
|
json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0);
|
||||||
// It is quite common for an input string to have an extra null character (C string).
|
tape_builder builder(doc);
|
||||||
// We do not want to allow 9\0 (where \0 is the null character) inside a JSON
|
return iter.walk_document<STREAMING>(builder);
|
||||||
// document, but the string "9\0" by itself is fine. So we make a copy and
|
}
|
||||||
// pad the input with spaces when we know that there is just one input element.
|
|
||||||
// This copy is relatively expensive, but it will almost never be called in
|
|
||||||
// practice unless you are in the strange scenario where you have many JSON
|
|
||||||
// documents made of single atoms.
|
|
||||||
//
|
|
||||||
uint8_t *copy = static_cast<uint8_t *>(malloc(iter.remaining_len() + SIMDJSON_PADDING));
|
|
||||||
if (copy == nullptr) {
|
|
||||||
return MEMALLOC;
|
|
||||||
}
|
|
||||||
memcpy(copy, value, iter.remaining_len());
|
|
||||||
memset(copy + iter.remaining_len(), ' ', SIMDJSON_PADDING);
|
|
||||||
error_code error = visit_number(iter, copy);
|
|
||||||
free(copy);
|
|
||||||
return error;
|
|
||||||
}
|
|
||||||
|
|
||||||
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) {
|
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept {
|
||||||
iter.log_value("true");
|
switch (*value) {
|
||||||
if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; }
|
case '"': return visit_string(iter, value);
|
||||||
tape.append(0, internal::tape_type::TRUE_VALUE);
|
case 't': return visit_root_true_atom(iter, value);
|
||||||
return SUCCESS;
|
case 'f': return visit_root_false_atom(iter, value);
|
||||||
|
case 'n': return visit_root_null_atom(iter, value);
|
||||||
|
case '-':
|
||||||
|
case '0': case '1': case '2': case '3': case '4':
|
||||||
|
case '5': case '6': case '7': case '8': case '9':
|
||||||
|
return visit_root_number(iter, value);
|
||||||
|
default:
|
||||||
|
iter.log_error("Document starts with a non-value character");
|
||||||
|
return TAPE_ERROR;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept {
|
||||||
|
switch (*value) {
|
||||||
|
case '"': return visit_string(iter, value);
|
||||||
|
case 't': return visit_true_atom(iter, value);
|
||||||
|
case 'f': return visit_false_atom(iter, value);
|
||||||
|
case 'n': return visit_null_atom(iter, value);
|
||||||
|
case '-':
|
||||||
|
case '0': case '1': case '2': case '3': case '4':
|
||||||
|
case '5': case '6': case '7': case '8': case '9':
|
||||||
|
return visit_number(iter, value);
|
||||||
|
default:
|
||||||
|
iter.log_error("Non-value found when value was expected!");
|
||||||
|
return TAPE_ERROR;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept {
|
||||||
|
iter.log_value("empty object");
|
||||||
|
return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT);
|
||||||
|
}
|
||||||
|
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept {
|
||||||
|
iter.log_value("empty array");
|
||||||
|
return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY);
|
||||||
|
}
|
||||||
|
|
||||||
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) {
|
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept {
|
||||||
iter.log_value("true");
|
iter.log_start_value("document");
|
||||||
if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; }
|
start_container(iter);
|
||||||
tape.append(0, internal::tape_type::TRUE_VALUE);
|
iter.dom_parser.is_array[iter.depth] = false;
|
||||||
return SUCCESS;
|
return SUCCESS;
|
||||||
}
|
}
|
||||||
|
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept {
|
||||||
|
iter.log_start_value("object");
|
||||||
|
start_container(iter);
|
||||||
|
iter.dom_parser.is_array[iter.depth] = false;
|
||||||
|
return SUCCESS;
|
||||||
|
}
|
||||||
|
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept {
|
||||||
|
iter.log_start_value("array");
|
||||||
|
start_container(iter);
|
||||||
|
iter.dom_parser.is_array[iter.depth] = true;
|
||||||
|
return SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) {
|
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept {
|
||||||
iter.log_value("false");
|
iter.log_end_value("object");
|
||||||
if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; }
|
return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT);
|
||||||
tape.append(0, internal::tape_type::FALSE_VALUE);
|
}
|
||||||
return SUCCESS;
|
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept {
|
||||||
}
|
iter.log_end_value("array");
|
||||||
|
return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY);
|
||||||
|
}
|
||||||
|
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept {
|
||||||
|
iter.log_end_value("document");
|
||||||
|
constexpr uint32_t start_tape_index = 0;
|
||||||
|
tape.append(start_tape_index, internal::tape_type::ROOT);
|
||||||
|
tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT);
|
||||||
|
return SUCCESS;
|
||||||
|
}
|
||||||
|
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept {
|
||||||
|
return visit_string(iter, key, true);
|
||||||
|
}
|
||||||
|
|
||||||
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) {
|
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept {
|
||||||
iter.log_value("false");
|
iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1
|
||||||
if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; }
|
return SUCCESS;
|
||||||
tape.append(0, internal::tape_type::FALSE_VALUE);
|
}
|
||||||
return SUCCESS;
|
simdjson_really_inline bool tape_builder::in_array(json_iterator &iter) noexcept {
|
||||||
}
|
return iter.dom_parser.is_array[iter.depth];
|
||||||
|
}
|
||||||
|
|
||||||
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) {
|
simdjson_really_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {}
|
||||||
iter.log_value("null");
|
|
||||||
if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; }
|
|
||||||
tape.append(0, internal::tape_type::NULL_VALUE);
|
|
||||||
return SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) {
|
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept {
|
||||||
iter.log_value("null");
|
iter.log_value(key ? "key" : "string");
|
||||||
if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; }
|
uint8_t *dst = on_start_string(iter);
|
||||||
tape.append(0, internal::tape_type::NULL_VALUE);
|
dst = stringparsing::parse_string(value, dst);
|
||||||
return SUCCESS;
|
if (dst == nullptr) {
|
||||||
|
iter.log_error("Invalid escape in string");
|
||||||
|
return STRING_ERROR;
|
||||||
}
|
}
|
||||||
|
on_end_string(dst);
|
||||||
|
return SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept {
|
||||||
|
iter.log_value("number");
|
||||||
|
if (!numberparsing::parse_number(value, tape)) { iter.log_error("Invalid number"); return NUMBER_ERROR; }
|
||||||
|
return SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept {
|
||||||
|
//
|
||||||
|
// We need to make a copy to make sure that the string is space terminated.
|
||||||
|
// This is not about padding the input, which should already padded up
|
||||||
|
// to len + SIMDJSON_PADDING. However, we have no control at this stage
|
||||||
|
// on how the padding was done. What if the input string was padded with nulls?
|
||||||
|
// It is quite common for an input string to have an extra null character (C string).
|
||||||
|
// We do not want to allow 9\0 (where \0 is the null character) inside a JSON
|
||||||
|
// document, but the string "9\0" by itself is fine. So we make a copy and
|
||||||
|
// pad the input with spaces when we know that there is just one input element.
|
||||||
|
// This copy is relatively expensive, but it will almost never be called in
|
||||||
|
// practice unless you are in the strange scenario where you have many JSON
|
||||||
|
// documents made of single atoms.
|
||||||
|
//
|
||||||
|
uint8_t *copy = static_cast<uint8_t *>(malloc(iter.remaining_len() + SIMDJSON_PADDING));
|
||||||
|
if (copy == nullptr) { return MEMALLOC; }
|
||||||
|
memcpy(copy, value, iter.remaining_len());
|
||||||
|
memset(copy + iter.remaining_len(), ' ', SIMDJSON_PADDING);
|
||||||
|
error_code error = visit_number(iter, copy);
|
||||||
|
free(copy);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept {
|
||||||
|
iter.log_value("true");
|
||||||
|
if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; }
|
||||||
|
tape.append(0, internal::tape_type::TRUE_VALUE);
|
||||||
|
return SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept {
|
||||||
|
iter.log_value("true");
|
||||||
|
if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; }
|
||||||
|
tape.append(0, internal::tape_type::TRUE_VALUE);
|
||||||
|
return SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept {
|
||||||
|
iter.log_value("false");
|
||||||
|
if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; }
|
||||||
|
tape.append(0, internal::tape_type::FALSE_VALUE);
|
||||||
|
return SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept {
|
||||||
|
iter.log_value("false");
|
||||||
|
if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; }
|
||||||
|
tape.append(0, internal::tape_type::FALSE_VALUE);
|
||||||
|
return SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept {
|
||||||
|
iter.log_value("null");
|
||||||
|
if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; }
|
||||||
|
tape.append(0, internal::tape_type::NULL_VALUE);
|
||||||
|
return SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept {
|
||||||
|
iter.log_value("null");
|
||||||
|
if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; }
|
||||||
|
tape.append(0, internal::tape_type::NULL_VALUE);
|
||||||
|
return SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
// private:
|
// private:
|
||||||
|
|
||||||
simdjson_really_inline uint32_t next_tape_index(json_iterator &iter) {
|
simdjson_really_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept {
|
||||||
return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get());
|
return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get());
|
||||||
}
|
}
|
||||||
|
|
||||||
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) {
|
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept {
|
||||||
auto start_index = next_tape_index(iter);
|
auto start_index = next_tape_index(iter);
|
||||||
tape.append(start_index+2, start);
|
tape.append(start_index+2, start);
|
||||||
tape.append(start_index, end);
|
tape.append(start_index, end);
|
||||||
return SUCCESS;
|
return SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
simdjson_really_inline void start_container(json_iterator &iter) {
|
simdjson_really_inline void tape_builder::start_container(json_iterator &iter) noexcept {
|
||||||
iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter);
|
iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter);
|
||||||
iter.dom_parser.open_containers[iter.depth].count = 0;
|
iter.dom_parser.open_containers[iter.depth].count = 0;
|
||||||
tape.skip(); // We don't actually *write* the start element until the end.
|
tape.skip(); // We don't actually *write* the start element until the end.
|
||||||
}
|
}
|
||||||
|
|
||||||
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept {
|
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept {
|
||||||
// Write the ending tape element, pointing at the start location
|
// Write the ending tape element, pointing at the start location
|
||||||
const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index;
|
const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index;
|
||||||
tape.append(start_tape_index, end);
|
tape.append(start_tape_index, end);
|
||||||
// Write the start tape element, pointing at the end location (and including count)
|
// Write the start tape element, pointing at the end location (and including count)
|
||||||
// count can overflow if it exceeds 24 bits... so we saturate
|
// count can overflow if it exceeds 24 bits... so we saturate
|
||||||
// the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff).
|
// the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff).
|
||||||
const uint32_t count = iter.dom_parser.open_containers[iter.depth].count;
|
const uint32_t count = iter.dom_parser.open_containers[iter.depth].count;
|
||||||
const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count;
|
const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count;
|
||||||
tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start);
|
tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start);
|
||||||
return SUCCESS;
|
return SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
simdjson_really_inline uint8_t *on_start_string(json_iterator &iter) noexcept {
|
simdjson_really_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept {
|
||||||
// we advance the point, accounting for the fact that we have a NULL termination
|
// we advance the point, accounting for the fact that we have a NULL termination
|
||||||
tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING);
|
tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING);
|
||||||
return current_string_buf_loc + sizeof(uint32_t);
|
return current_string_buf_loc + sizeof(uint32_t);
|
||||||
}
|
}
|
||||||
|
|
||||||
simdjson_really_inline void on_end_string(uint8_t *dst) noexcept {
|
simdjson_really_inline void tape_builder::on_end_string(uint8_t *dst) noexcept {
|
||||||
uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t)));
|
uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t)));
|
||||||
// TODO check for overflow in case someone has a crazy string (>=4GB?)
|
// TODO check for overflow in case someone has a crazy string (>=4GB?)
|
||||||
// But only add the overflow check when the document itself exceeds 4GB
|
// But only add the overflow check when the document itself exceeds 4GB
|
||||||
// Currently unneeded because we refuse to parse docs larger or equal to 4GB.
|
// Currently unneeded because we refuse to parse docs larger or equal to 4GB.
|
||||||
memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t));
|
memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t));
|
||||||
// NULL termination is still handy if you expect all your strings to
|
// NULL termination is still handy if you expect all your strings to
|
||||||
// be NULL terminated? It comes at a small cost
|
// be NULL terminated? It comes at a small cost
|
||||||
*dst = 0;
|
*dst = 0;
|
||||||
current_string_buf_loc = dst + 1;
|
current_string_buf_loc = dst + 1;
|
||||||
}
|
}
|
||||||
}; // class tape_builder
|
|
||||||
|
|
||||||
} // namespace stage2
|
} // namespace stage2
|
||||||
} // namespace SIMDJSON_IMPLEMENTATION
|
} // namespace SIMDJSON_IMPLEMENTATION
|
||||||
|
|
Loading…
Reference in New Issue