diff --git a/include/simdjson/dom/array-inl.h b/include/simdjson/dom/array-inl.h index 39d51a83..adf94f78 100644 --- a/include/simdjson/dom/array-inl.h +++ b/include/simdjson/dom/array-inl.h @@ -61,6 +61,9 @@ inline array::iterator array::end() const noexcept { inline size_t array::size() const noexcept { return tape.scope_count(); } +inline size_t array::slots() const noexcept { + return tape.matching_brace_index() - tape.json_index; +} inline simdjson_result array::at_pointer(std::string_view json_pointer) const noexcept { if(json_pointer.empty()) { // an empty string means that we return the current node return element(this->tape); // copy the current node diff --git a/include/simdjson/dom/array.h b/include/simdjson/dom/array.h index 30b2dd99..0fc193e0 100644 --- a/include/simdjson/dom/array.h +++ b/include/simdjson/dom/array.h @@ -85,6 +85,17 @@ public: * is 0xFFFFFF then the size is 0xFFFFFF or greater. */ inline size_t size() const noexcept; + /** + * Get the total number of slots used by this array on the tape. + * + * Note that this is not the same thing as `size()`, which reports the + * number of actual elements within an array (not counting its children). + * + * Since an element can use 1 or 2 slots on the tape, you can only use this + * to figure out the total size of an array (including its children, + * recursively) if you know its structure ahead of time. + **/ + inline size_t slots() const noexcept; /** * Get the value associated with the given JSON pointer. We use the RFC 6901 * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node diff --git a/singleheader/simdjson.cpp b/singleheader/simdjson.cpp index 39c18497..1b9897ef 100644 --- a/singleheader/simdjson.cpp +++ b/singleheader/simdjson.cpp @@ -1,4 +1,4 @@ -/* auto-generated on 2020-11-03 21:40:10 +0100. Do not edit! */ +/* auto-generated on Sun Oct 25 19:17:26 EDT 2020. Do not edit! */ /* begin file src/simdjson.cpp */ #include "simdjson.h" @@ -960,7 +960,7 @@ namespace simdjson { namespace internal { /** - * The code in the internal::from_chars function is meant to handle the floating-point number parsing + * The code in the internal::from_chars function is meant to handle the floating-point number parsing * when we have more than 19 digits in the decimal mantissa. This should only be seen * in adversarial scenarios: we do not expect production systems to even produce * such floating-point numbers. @@ -1020,6 +1020,7 @@ decimal parse_decimal(const char *&p) noexcept { decimal answer; answer.num_digits = 0; answer.decimal_point = 0; + answer.negative = false; answer.truncated = false; answer.negative = (*p == '-'); if ((*p == '-') || (*p == '+')) { @@ -1030,15 +1031,17 @@ decimal parse_decimal(const char *&p) noexcept { ++p; } while (is_integer(*p)) { - if (answer.num_digits < max_digits) { - answer.digits[answer.num_digits] = uint8_t(*p - '0'); + if (answer.num_digits + 1 < max_digits) { + answer.digits[answer.num_digits++] = uint8_t(*p - '0'); + } else { + answer.truncated = true; } - answer.num_digits++; ++p; } + const char *first_after_period{}; if (*p == '.') { ++p; - const char *first_after_period = p; + first_after_period = p; // if we have not yet encountered a zero, we have to skip it as well if (answer.num_digits == 0) { // skip zeros @@ -1047,10 +1050,11 @@ decimal parse_decimal(const char *&p) noexcept { } } while (is_integer(*p)) { - if (answer.num_digits < max_digits) { - answer.digits[answer.num_digits] = uint8_t(*p - '0'); + if (answer.num_digits + 1 < max_digits) { + answer.digits[answer.num_digits++] = uint8_t(*p - '0'); + } else { + answer.truncated = true; } - answer.num_digits++; ++p; } answer.decimal_point = int32_t(first_after_period - p); @@ -1076,10 +1080,6 @@ decimal parse_decimal(const char *&p) noexcept { answer.decimal_point += (neg_exp ? -exp_number : exp_number); } answer.decimal_point += answer.num_digits; - if(answer.num_digits > max_digits ) { - answer.num_digits = max_digits; - answer.truncated = true; - } return answer; } @@ -1314,21 +1314,21 @@ template adjusted_mantissa compute_float(decimal &d) { } // At this point, going further, we can assume that d.num_digits > 0. // We want to guard against excessive decimal point values because - // they can result in long running times. Indeed, we do + // they can result in long running times. Indeed, we do // shifts by at most 60 bits. We have that log(10**400)/log(2**60) ~= 22 // which is fine, but log(10**299995)/log(2**60) ~= 16609 which is not // fine (runs for a long time). // if(d.decimal_point < -324) { // We have something smaller than 1e-324 which is always zero - // in binary64 and binary32. + // in binary64 and binary32. // It should be zero. answer.power2 = 0; answer.mantissa = 0; return answer; } else if(d.decimal_point >= 310) { // We have something at least as large as 0.1e310 which is - // always infinite. + // always infinite. answer.power2 = binary::infinite_power(); answer.mantissa = 0; return answer; @@ -1690,7 +1690,7 @@ SIMDJSON_DLLIMPORTEXPORT const double power_of_ten[] = { * are not a concern since they can be represented * exactly using the binary notation, only the powers of five * affect the binary significand. - */ + */ // The truncated powers of five from 5^-342 all the way to 5^308 @@ -2353,7 +2353,7 @@ SIMDJSON_DLLIMPORTEXPORT const uint64_t power_of_five_128[]= { } // namespace simdjson /* end file src/internal/numberparsing_tables.cpp */ /* begin file src/internal/simdprune_tables.cpp */ -#if SIMDJSON_IMPLEMENTATION_ARM64 || SIMDJSON_IMPLEMENTATION_HASWELL || SIMDJSON_IMPLEMENTATION_WESTMERE || SIMDJSON_IMPLEMENTATION_PPC64 +#if SIMDJSON_IMPLEMENTATION_ARM64 || SIMDJSON_IMPLEMENTATION_HASWELL || SIMDJSON_IMPLEMENTATION_WESTMERE #include @@ -2378,16 +2378,16 @@ SIMDJSON_DLLIMPORTEXPORT const unsigned char BitsSetTable256mul2[256] = { SIMDJSON_DLLIMPORTEXPORT const uint8_t pshufb_combine_table[272] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x08, - 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0x00, 0x01, 0x02, 0x03, - 0x04, 0x05, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, + 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x80, 0x00, 0x01, 0x02, 0x03, + 0x04, 0x05, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x80, 0x80, 0x00, 0x01, 0x02, 0x03, 0x04, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, - 0x0f, 0xff, 0xff, 0xff, 0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b, - 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x02, 0x08, - 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, - 0x00, 0x01, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0x00, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, - 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x08, 0x09, 0x0a, 0x0b, - 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x0f, 0x80, 0x80, 0x80, 0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b, + 0x0c, 0x0d, 0x0e, 0x0f, 0x80, 0x80, 0x80, 0x80, 0x00, 0x01, 0x02, 0x08, + 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x00, 0x01, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x00, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, + 0x0f, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x08, 0x09, 0x0a, 0x0b, + 0x0c, 0x0d, 0x0e, 0x0f, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, }; // 256 * 8 bytes = 2kB, easily fits in cache. @@ -2481,9 +2481,9 @@ SIMDJSON_DLLIMPORTEXPORT const uint64_t thintable_epi8[256] = { }; //static uint64_t thintable_epi8[256] } // namespace internal -} // namespace simdjson +} // namespace simdjson -#endif // SIMDJSON_IMPLEMENTATION_ARM64 || SIMDJSON_IMPLEMENTATION_HASWELL || SIMDJSON_IMPLEMENTATION_WESTMERE || SIMDJSON_IMPLEMENTATION_PPC64 +#endif // SIMDJSON_IMPLEMENTATION_ARM64 || SIMDJSON_IMPLEMENTATION_HASWELL || SIMDJSON_IMPLEMENTATION_WESTMERE /* end file src/internal/simdprune_tables.cpp */ /* begin file src/implementation.cpp */ #include @@ -2510,9 +2510,6 @@ const westmere::implementation westmere_singleton{}; #if SIMDJSON_IMPLEMENTATION_ARM64 const arm64::implementation arm64_singleton{}; #endif // SIMDJSON_IMPLEMENTATION_ARM64 -#if SIMDJSON_IMPLEMENTATION_PPC64 -const ppc64::implementation ppc64_singleton{}; -#endif // SIMDJSON_IMPLEMENTATION_PPC64 #if SIMDJSON_IMPLEMENTATION_FALLBACK const fallback::implementation fallback_singleton{}; #endif // SIMDJSON_IMPLEMENTATION_FALLBACK @@ -2555,9 +2552,6 @@ const std::initializer_list available_implementation_poi #if SIMDJSON_IMPLEMENTATION_ARM64 &arm64_singleton, #endif -#if SIMDJSON_IMPLEMENTATION_PPC64 - &ppc64_singleton, -#endif #if SIMDJSON_IMPLEMENTATION_FALLBACK &fallback_singleton, #endif @@ -2676,7 +2670,7 @@ simdjson_warn_unused error_code implementation::create_dom_parser_implementation /* begin file include/simdjson/arm64/end.h */ #undef SIMDJSON_IMPLEMENTATION /* end file include/simdjson/arm64/end.h */ -/* end file src/arm64/implementation.cpp */ +/* end file include/simdjson/arm64/end.h */ /* begin file src/arm64/dom_parser_implementation.cpp */ /* begin file include/simdjson/arm64/begin.h */ #define SIMDJSON_IMPLEMENTATION arm64 @@ -2927,7 +2921,7 @@ using namespace simd; } // The only problem that can happen at EOF is that a multibyte character is too short - // or a byte value too large in the last bytes: check_special_cases only checks for bytes + // or a byte value too large in the last bytes: check_special_cases only checks for bytes // too large in the first of two bytes. simdjson_really_inline void check_eof() { // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't @@ -3229,7 +3223,7 @@ namespace stage1 { * We seek to identify pseudo-structural characters. Anything that is inside * a string must be omitted (hence & ~_string.string_tail()). * Otherwise, pseudo-structural characters come in two forms. - * 1. We have the structural characters ([,],{,},:, comma). The + * 1. We have the structural characters ([,],{,},:, comma). The * term 'structural character' is from the JSON RFC. * 2. We have the 'scalar pseudo-structural characters'. * Scalars are quotes, and any character except structural characters and white space. @@ -3439,7 +3433,7 @@ error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, s // Index the last (remainder) block, padded with spaces uint8_t block[STEP_SIZE]; - size_t remaining_bytes = reader.get_remainder(block); + size_t remaining_bytes = reader.get_remainder(block); if (remaining_bytes > 0) { // We do not want to write directly to the output stream. Rather, we write // to a local buffer (for safety). @@ -3760,7 +3754,7 @@ simdjson_really_inline error_code json_structural_indexer::finish(dom_parser_imp } // unnamed namespace } // namespace SIMDJSON_IMPLEMENTATION } // namespace simdjson -/* end file src/generic/stage1/json_structural_indexer.h */ +/* end file src/generic/stage1/find_next_document_index.h */ /* begin file src/generic/stage1/utf8_validator.h */ namespace simdjson { namespace SIMDJSON_IMPLEMENTATION { @@ -4210,7 +4204,7 @@ simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_prim } // unnamed namespace } // namespace SIMDJSON_IMPLEMENTATION } // namespace simdjson -/* end file src/generic/stage2/json_iterator.h */ +/* end file src/generic/stage2/logger.h */ /* begin file src/generic/stage2/tape_writer.h */ namespace simdjson { namespace SIMDJSON_IMPLEMENTATION { @@ -4220,7 +4214,7 @@ namespace stage2 { struct tape_writer { /** The next place to write to tape */ uint64_t *next_tape_loc; - + /** Write a signed 64-bit value to tape. */ simdjson_really_inline void append_s64(int64_t value) noexcept; @@ -4600,7 +4594,7 @@ simdjson_really_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { } // unnamed namespace } // namespace SIMDJSON_IMPLEMENTATION } // namespace simdjson -/* end file src/generic/stage2/tape_builder.h */ +/* end file src/generic/stage2/tape_writer.h */ // // Implementation-specific overrides @@ -4654,7 +4648,7 @@ simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t * /* begin file include/simdjson/arm64/end.h */ #undef SIMDJSON_IMPLEMENTATION /* end file include/simdjson/arm64/end.h */ -/* end file src/arm64/dom_parser_implementation.cpp */ +/* end file include/simdjson/arm64/end.h */ #endif #if SIMDJSON_IMPLEMENTATION_FALLBACK /* begin file src/fallback/implementation.cpp */ @@ -4683,7 +4677,7 @@ simdjson_warn_unused error_code implementation::create_dom_parser_implementation /* begin file include/simdjson/fallback/end.h */ #undef SIMDJSON_IMPLEMENTATION /* end file include/simdjson/fallback/end.h */ -/* end file src/fallback/implementation.cpp */ +/* end file include/simdjson/fallback/end.h */ /* begin file src/fallback/dom_parser_implementation.cpp */ /* begin file include/simdjson/fallback/begin.h */ #define SIMDJSON_IMPLEMENTATION fallback @@ -5007,13 +5001,14 @@ simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_ } // credit: based on code from Google Fuchsia (Apache Licensed) -simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { +simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { const uint8_t *data = (const uint8_t *)buf; uint64_t pos = 0; + uint64_t next_pos = 0; uint32_t code_point = 0; while (pos < len) { // check of the next 8 bytes are ascii. - uint64_t next_pos = pos + 16; + next_pos = pos + 16; if (next_pos <= len) { // if it is safe to read 8 more bytes, check that they are ascii uint64_t v1; memcpy(&v1, data + pos, sizeof(uint64_t)); @@ -5483,7 +5478,7 @@ simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_prim } // unnamed namespace } // namespace SIMDJSON_IMPLEMENTATION } // namespace simdjson -/* end file src/generic/stage2/json_iterator.h */ +/* end file src/generic/stage2/logger.h */ /* begin file src/generic/stage2/tape_writer.h */ namespace simdjson { namespace SIMDJSON_IMPLEMENTATION { @@ -5493,7 +5488,7 @@ namespace stage2 { struct tape_writer { /** The next place to write to tape */ uint64_t *next_tape_loc; - + /** Write a signed 64-bit value to tape. */ simdjson_really_inline void append_s64(int64_t value) noexcept; @@ -5873,7 +5868,7 @@ simdjson_really_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { } // unnamed namespace } // namespace SIMDJSON_IMPLEMENTATION } // namespace simdjson -/* end file src/generic/stage2/tape_builder.h */ +/* end file src/generic/stage2/tape_writer.h */ namespace simdjson { namespace SIMDJSON_IMPLEMENTATION { @@ -5898,7 +5893,7 @@ simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t * /* begin file include/simdjson/fallback/end.h */ #undef SIMDJSON_IMPLEMENTATION /* end file include/simdjson/fallback/end.h */ -/* end file src/fallback/dom_parser_implementation.cpp */ +/* end file include/simdjson/fallback/end.h */ #endif #if SIMDJSON_IMPLEMENTATION_HASWELL /* begin file src/haswell/implementation.cpp */ @@ -5930,7 +5925,7 @@ SIMDJSON_UNTARGET_REGION #undef SIMDJSON_IMPLEMENTATION /* end file include/simdjson/haswell/end.h */ -/* end file src/haswell/implementation.cpp */ +/* end file include/simdjson/haswell/end.h */ /* begin file src/haswell/dom_parser_implementation.cpp */ /* begin file include/simdjson/haswell/begin.h */ #define SIMDJSON_IMPLEMENTATION haswell @@ -6015,7 +6010,7 @@ simdjson_really_inline json_character_block json_character_block::classify(const _mm256_shuffle_epi8(op_table, in.chunks[0]), _mm256_shuffle_epi8(op_table, in.chunks[1]) }); - + return { whitespace, op }; } @@ -6186,7 +6181,7 @@ using namespace simd; } // The only problem that can happen at EOF is that a multibyte character is too short - // or a byte value too large in the last bytes: check_special_cases only checks for bytes + // or a byte value too large in the last bytes: check_special_cases only checks for bytes // too large in the first of two bytes. simdjson_really_inline void check_eof() { // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't @@ -6488,7 +6483,7 @@ namespace stage1 { * We seek to identify pseudo-structural characters. Anything that is inside * a string must be omitted (hence & ~_string.string_tail()). * Otherwise, pseudo-structural characters come in two forms. - * 1. We have the structural characters ([,],{,},:, comma). The + * 1. We have the structural characters ([,],{,},:, comma). The * term 'structural character' is from the JSON RFC. * 2. We have the 'scalar pseudo-structural characters'. * Scalars are quotes, and any character except structural characters and white space. @@ -6698,7 +6693,7 @@ error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, s // Index the last (remainder) block, padded with spaces uint8_t block[STEP_SIZE]; - size_t remaining_bytes = reader.get_remainder(block); + size_t remaining_bytes = reader.get_remainder(block); if (remaining_bytes > 0) { // We do not want to write directly to the output stream. Rather, we write // to a local buffer (for safety). @@ -7019,7 +7014,7 @@ simdjson_really_inline error_code json_structural_indexer::finish(dom_parser_imp } // unnamed namespace } // namespace SIMDJSON_IMPLEMENTATION } // namespace simdjson -/* end file src/generic/stage1/json_structural_indexer.h */ +/* end file src/generic/stage1/find_next_document_index.h */ /* begin file src/generic/stage1/utf8_validator.h */ namespace simdjson { namespace SIMDJSON_IMPLEMENTATION { @@ -7468,7 +7463,7 @@ simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_prim } // unnamed namespace } // namespace SIMDJSON_IMPLEMENTATION } // namespace simdjson -/* end file src/generic/stage2/json_iterator.h */ +/* end file src/generic/stage2/logger.h */ /* begin file src/generic/stage2/tape_writer.h */ namespace simdjson { namespace SIMDJSON_IMPLEMENTATION { @@ -7478,7 +7473,7 @@ namespace stage2 { struct tape_writer { /** The next place to write to tape */ uint64_t *next_tape_loc; - + /** Write a signed 64-bit value to tape. */ simdjson_really_inline void append_s64(int64_t value) noexcept; @@ -7858,7 +7853,7 @@ simdjson_really_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { } // unnamed namespace } // namespace SIMDJSON_IMPLEMENTATION } // namespace simdjson -/* end file src/generic/stage2/tape_builder.h */ +/* end file src/generic/stage2/tape_writer.h */ // // Implementation-specific overrides @@ -7911,1984 +7906,7 @@ simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t * SIMDJSON_UNTARGET_REGION #undef SIMDJSON_IMPLEMENTATION /* end file include/simdjson/haswell/end.h */ -/* end file src/haswell/dom_parser_implementation.cpp */ -#endif -#if SIMDJSON_IMPLEMENTATION_PPC64 -/* begin file src/ppc64/implementation.cpp */ -/* begin file include/simdjson/ppc64/begin.h */ -#define SIMDJSON_IMPLEMENTATION ppc64 -/* end file include/simdjson/ppc64/begin.h */ - -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { - -simdjson_warn_unused error_code implementation::create_dom_parser_implementation( - size_t capacity, - size_t max_depth, - std::unique_ptr& dst -) const noexcept { - dst.reset( new (std::nothrow) dom_parser_implementation() ); - if (!dst) { return MEMALLOC; } - dst->set_capacity(capacity); - dst->set_max_depth(max_depth); - return SUCCESS; -} - -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson - -/* begin file include/simdjson/ppc64/end.h */ -#undef SIMDJSON_IMPLEMENTATION -/* end file include/simdjson/ppc64/end.h */ -/* end file src/ppc64/implementation.cpp */ -/* begin file src/ppc64/dom_parser_implementation.cpp */ -/* begin file include/simdjson/ppc64/begin.h */ -#define SIMDJSON_IMPLEMENTATION ppc64 -/* end file include/simdjson/ppc64/begin.h */ - -// -// Stage 1 -// -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { -namespace { - -using namespace simd; - -struct json_character_block { - static simdjson_really_inline json_character_block classify(const simd::simd8x64& in); - - simdjson_really_inline uint64_t whitespace() const { return _whitespace; } - simdjson_really_inline uint64_t op() const { return _op; } - simdjson_really_inline uint64_t scalar() { return ~(op() | whitespace()); } - - uint64_t _whitespace; - uint64_t _op; -}; - -simdjson_really_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { - const simd8 table1(16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0); - const simd8 table2(8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0); - - simd8x64 v( - (in.chunks[0] & 0xf).lookup_16(table1) & (in.chunks[0].shr<4>()).lookup_16(table2), - (in.chunks[1] & 0xf).lookup_16(table1) & (in.chunks[1].shr<4>()).lookup_16(table2), - (in.chunks[2] & 0xf).lookup_16(table1) & (in.chunks[2].shr<4>()).lookup_16(table2), - (in.chunks[3] & 0xf).lookup_16(table1) & (in.chunks[3].shr<4>()).lookup_16(table2) - ); - - uint64_t op = simd8x64( - v.chunks[0].any_bits_set(0x7), - v.chunks[1].any_bits_set(0x7), - v.chunks[2].any_bits_set(0x7), - v.chunks[3].any_bits_set(0x7) - ).to_bitmask(); - - uint64_t whitespace = simd8x64( - v.chunks[0].any_bits_set(0x18), - v.chunks[1].any_bits_set(0x18), - v.chunks[2].any_bits_set(0x18), - v.chunks[3].any_bits_set(0x18) - ).to_bitmask(); - - return { whitespace, op }; -} - -simdjson_really_inline bool is_ascii(const simd8x64& input) { - return input.reduce_or().saturating_sub(0b10000000u).bits_not_set_anywhere(); -} - -simdjson_unused simdjson_really_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { - simd8 is_second_byte = prev1.saturating_sub(0b11000000u-1); // Only 11______ will be > 0 - simd8 is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0 - simd8 is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0 - // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. - return simd8(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0); -} - -simdjson_really_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { - simd8 is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0 - simd8 is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0 - // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. - return simd8(is_third_byte | is_fourth_byte) > int8_t(0); -} - -} // unnamed namespace -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson - -/* begin file src/generic/stage1/utf8_lookup4_algorithm.h */ -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { -namespace { -namespace utf8_validation { - -using namespace simd; - - simdjson_really_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { -// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) -// Bit 1 = Too Long (ASCII followed by continuation) -// Bit 2 = Overlong 3-byte -// Bit 4 = Surrogate -// Bit 5 = Overlong 2-byte -// Bit 7 = Two Continuations - constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______ - // 11______ 11______ - constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______ - constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____ - constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____ - constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______ - constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______ - constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____ - // 11110100 101_____ - // 11110101 1001____ - // 11110101 101_____ - // 1111011_ 1001____ - // 1111011_ 101_____ - // 11111___ 1001____ - // 11111___ 101_____ - constexpr const uint8_t TOO_LARGE_1000 = 1<<6; - // 11110101 1000____ - // 1111011_ 1000____ - // 11111___ 1000____ - constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____ - - const simd8 byte_1_high = prev1.shr<4>().lookup_16( - // 0_______ ________ - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - // 10______ ________ - TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, - // 1100____ ________ - TOO_SHORT | OVERLONG_2, - // 1101____ ________ - TOO_SHORT, - // 1110____ ________ - TOO_SHORT | OVERLONG_3 | SURROGATE, - // 1111____ ________ - TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4 - ); - constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . - const simd8 byte_1_low = (prev1 & 0x0F).lookup_16( - // ____0000 ________ - CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, - // ____0001 ________ - CARRY | OVERLONG_2, - // ____001_ ________ - CARRY, - CARRY, - - // ____0100 ________ - CARRY | TOO_LARGE, - // ____0101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____011_ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - - // ____1___ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____1101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000 - ); - const simd8 byte_2_high = input.shr<4>().lookup_16( - // ________ 0_______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - - // ________ 1000____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4, - // ________ 1001____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, - // ________ 101_____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - - // ________ 11______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT - ); - return (byte_1_high & byte_1_low & byte_2_high); - } - simdjson_really_inline simd8 check_multibyte_lengths(const simd8 input, - const simd8 prev_input, const simd8 sc) { - simd8 prev2 = input.prev<2>(prev_input); - simd8 prev3 = input.prev<3>(prev_input); - simd8 must23 = simd8(must_be_2_3_continuation(prev2, prev3)); - simd8 must23_80 = must23 & uint8_t(0x80); - return must23_80 ^ sc; - } - - // - // Return nonzero if there are incomplete multibyte characters at the end of the block: - // e.g. if there is a 4-byte character, but it's 3 bytes from the end. - // - simdjson_really_inline simd8 is_incomplete(const simd8 input) { - // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): - // ... 1111____ 111_____ 11______ - static const uint8_t max_array[32] = { - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 0b11110000u-1, 0b11100000u-1, 0b11000000u-1 - }; - const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]); - return input.gt_bits(max_value); - } - - struct utf8_checker { - // If this is nonzero, there has been a UTF-8 error. - simd8 error; - // The last input we received - simd8 prev_input_block; - // Whether the last input we received was incomplete (used for ASCII fast path) - simd8 prev_incomplete; - - // - // Check whether the current bytes are valid UTF-8. - // - simdjson_really_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { - // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes - // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) - simd8 prev1 = input.prev<1>(prev_input); - simd8 sc = check_special_cases(input, prev1); - this->error |= check_multibyte_lengths(input, prev_input, sc); - } - - // The only problem that can happen at EOF is that a multibyte character is too short - // or a byte value too large in the last bytes: check_special_cases only checks for bytes - // too large in the first of two bytes. - simdjson_really_inline void check_eof() { - // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't - // possibly finish them. - this->error |= this->prev_incomplete; - } - - simdjson_really_inline void check_next_input(const simd8x64& input) { - if(simdjson_likely(is_ascii(input))) { - this->error |= this->prev_incomplete; - } else { - // you might think that a for-loop would work, but under Visual Studio, it is not good enough. - static_assert((simd8x64::NUM_CHUNKS == 2) || (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - if(simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if(simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]); - this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1]; - - } - } - // do not forget to call check_eof! - simdjson_really_inline error_code errors() { - return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; - } - - }; // struct utf8_checker -} // namespace utf8_validation - -using utf8_validation::utf8_checker; - -} // unnamed namespace -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson -/* end file src/generic/stage1/utf8_lookup4_algorithm.h */ -/* begin file src/generic/stage1/json_structural_indexer.h */ -// This file contains the common code every implementation uses in stage1 -// It is intended to be included multiple times and compiled multiple times -// We assume the file in which it is included already includes -// "simdjson/stage1.h" (this simplifies amalgation) - -/* begin file src/generic/stage1/buf_block_reader.h */ -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { -namespace { - -// Walks through a buffer in block-sized increments, loading the last part with spaces -template -struct buf_block_reader { -public: - simdjson_really_inline buf_block_reader(const uint8_t *_buf, size_t _len); - simdjson_really_inline size_t block_index(); - simdjson_really_inline bool has_full_block() const; - simdjson_really_inline const uint8_t *full_block() const; - /** - * Get the last block, padded with spaces. - * - * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this - * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there - * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding. - * - * @return the number of effective characters in the last block. - */ - simdjson_really_inline size_t get_remainder(uint8_t *dst) const; - simdjson_really_inline void advance(); -private: - const uint8_t *buf; - const size_t len; - const size_t lenminusstep; - size_t idx; -}; - -// Routines to print masks and text for debugging bitmask operations -simdjson_unused static char * format_input_text_64(const uint8_t *text) { - static char *buf = (char*)malloc(sizeof(simd8x64) + 1); - for (size_t i=0; i); i++) { - buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); - } - buf[sizeof(simd8x64)] = '\0'; - return buf; -} - -// Routines to print masks and text for debugging bitmask operations -simdjson_unused static char * format_input_text(const simd8x64& in) { - static char *buf = (char*)malloc(sizeof(simd8x64) + 1); - in.store((uint8_t*)buf); - for (size_t i=0; i); i++) { - if (buf[i] < ' ') { buf[i] = '_'; } - } - buf[sizeof(simd8x64)] = '\0'; - return buf; -} - -simdjson_unused static char * format_mask(uint64_t mask) { - static char *buf = (char*)malloc(64 + 1); - for (size_t i=0; i<64; i++) { - buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; - } - buf[64] = '\0'; - return buf; -} - -template -simdjson_really_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} - -template -simdjson_really_inline size_t buf_block_reader::block_index() { return idx; } - -template -simdjson_really_inline bool buf_block_reader::has_full_block() const { - return idx < lenminusstep; -} - -template -simdjson_really_inline const uint8_t *buf_block_reader::full_block() const { - return &buf[idx]; -} - -template -simdjson_really_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { - if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers - std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. - std::memcpy(dst, buf + idx, len - idx); - return len - idx; -} - -template -simdjson_really_inline void buf_block_reader::advance() { - idx += STEP_SIZE; -} - -} // unnamed namespace -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson -/* end file src/generic/stage1/buf_block_reader.h */ -/* begin file src/generic/stage1/json_string_scanner.h */ -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { -namespace { -namespace stage1 { - -struct json_string_block { - // Escaped characters (characters following an escape() character) - simdjson_really_inline uint64_t escaped() const { return _escaped; } - // Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \) - simdjson_really_inline uint64_t escape() const { return _backslash & ~_escaped; } - // Real (non-backslashed) quotes - simdjson_really_inline uint64_t quote() const { return _quote; } - // Start quotes of strings - simdjson_really_inline uint64_t string_start() const { return _quote & _in_string; } - // End quotes of strings - simdjson_really_inline uint64_t string_end() const { return _quote & ~_in_string; } - // Only characters inside the string (not including the quotes) - simdjson_really_inline uint64_t string_content() const { return _in_string & ~_quote; } - // Return a mask of whether the given characters are inside a string (only works on non-quotes) - simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } - // Return a mask of whether the given characters are inside a string (only works on non-quotes) - simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } - // Tail of string (everything except the start quote) - simdjson_really_inline uint64_t string_tail() const { return _in_string ^ _quote; } - - // backslash characters - uint64_t _backslash; - // escaped characters (backslashed--does not include the hex characters after \u) - uint64_t _escaped; - // real quotes (non-backslashed ones) - uint64_t _quote; - // string characters (includes start quote but not end quote) - uint64_t _in_string; -}; - -// Scans blocks for string characters, storing the state necessary to do so -class json_string_scanner { -public: - simdjson_really_inline json_string_block next(const simd::simd8x64& in); - simdjson_really_inline error_code finish(bool streaming); - -private: - // Intended to be defined by the implementation - simdjson_really_inline uint64_t find_escaped(uint64_t escape); - simdjson_really_inline uint64_t find_escaped_branchless(uint64_t escape); - - // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). - uint64_t prev_in_string = 0ULL; - // Whether the first character of the next iteration is escaped. - uint64_t prev_escaped = 0ULL; -}; - -// -// Finds escaped characters (characters following \). -// -// Handles runs of backslashes like \\\" and \\\\" correctly (yielding 0101 and 01010, respectively). -// -// Does this by: -// - Shift the escape mask to get potentially escaped characters (characters after backslashes). -// - Mask escaped sequences that start on *even* bits with 1010101010 (odd bits are escaped, even bits are not) -// - Mask escaped sequences that start on *odd* bits with 0101010101 (even bits are escaped, odd bits are not) -// -// To distinguish between escaped sequences starting on even/odd bits, it finds the start of all -// escape sequences, filters out the ones that start on even bits, and adds that to the mask of -// escape sequences. This causes the addition to clear out the sequences starting on odd bits (since -// the start bit causes a carry), and leaves even-bit sequences alone. -// -// Example: -// -// text | \\\ | \\\"\\\" \\\" \\"\\" | -// escape | xxx | xx xxx xxx xx xx | Removed overflow backslash; will | it into follows_escape -// odd_starts | x | x x x | escape & ~even_bits & ~follows_escape -// even_seq | c| cxxx c xx c | c = carry bit -- will be masked out later -// invert_mask | | cxxx c xx c| even_seq << 1 -// follows_escape | xx | x xx xxx xxx xx xx | Includes overflow bit -// escaped | x | x x x x x x x x | -// desired | x | x x x x x x x x | -// text | \\\ | \\\"\\\" \\\" \\"\\" | -// -simdjson_really_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) { - // If there was overflow, pretend the first character isn't a backslash - backslash &= ~prev_escaped; - uint64_t follows_escape = backslash << 1 | prev_escaped; - - // Get sequences starting on even bits by clearing out the odd series using + - const uint64_t even_bits = 0x5555555555555555ULL; - uint64_t odd_sequence_starts = backslash & ~even_bits & ~follows_escape; - uint64_t sequences_starting_on_even_bits; - prev_escaped = add_overflow(odd_sequence_starts, backslash, &sequences_starting_on_even_bits); - uint64_t invert_mask = sequences_starting_on_even_bits << 1; // The mask we want to return is the *escaped* bits, not escapes. - - // Mask every other backslashed character as an escaped character - // Flip the mask for sequences that start on even bits, to correct them - return (even_bits ^ invert_mask) & follows_escape; -} - -// -// Return a mask of all string characters plus end quotes. -// -// prev_escaped is overflow saying whether the next character is escaped. -// prev_in_string is overflow saying whether we're still in a string. -// -// Backslash sequences outside of quotes will be detected in stage 2. -// -simdjson_really_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { - const uint64_t backslash = in.eq('\\'); - const uint64_t escaped = find_escaped(backslash); - const uint64_t quote = in.eq('"') & ~escaped; - - // - // prefix_xor flips on bits inside the string (and flips off the end quote). - // - // Then we xor with prev_in_string: if we were in a string already, its effect is flipped - // (characters inside strings are outside, and characters outside strings are inside). - // - const uint64_t in_string = prefix_xor(quote) ^ prev_in_string; - - // - // Check if we're still in a string at the end of the box so the next block will know - // - // right shift of a signed value expected to be well-defined and standard - // compliant as of C++20, John Regher from Utah U. says this is fine code - // - prev_in_string = uint64_t(static_cast(in_string) >> 63); - - // Use ^ to turn the beginning quote off, and the end quote on. - return { - backslash, - escaped, - quote, - in_string - }; -} - -simdjson_really_inline error_code json_string_scanner::finish(bool streaming) { - if (prev_in_string and (not streaming)) { - return UNCLOSED_STRING; - } - return SUCCESS; -} - -} // namespace stage1 -} // unnamed namespace -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson -/* end file src/generic/stage1/json_string_scanner.h */ -/* begin file src/generic/stage1/json_scanner.h */ -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { -namespace { -namespace stage1 { - -/** - * A block of scanned json, with information on operators and scalars. - * - * We seek to identify pseudo-structural characters. Anything that is inside - * a string must be omitted (hence & ~_string.string_tail()). - * Otherwise, pseudo-structural characters come in two forms. - * 1. We have the structural characters ([,],{,},:, comma). The - * term 'structural character' is from the JSON RFC. - * 2. We have the 'scalar pseudo-structural characters'. - * Scalars are quotes, and any character except structural characters and white space. - * - * To identify the scalar pseudo-structural characters, we must look at what comes - * before them: it must be a space, a quote or a structural characters. - * Starting with simdjson v0.3, we identify them by - * negation: we identify everything that is followed by a non-quote scalar, - * and we negate that. Whatever remains must be a 'scalar pseudo-structural character'. - */ -struct json_block { -public: - /** - * The start of structurals. - * In simdjson prior to v0.3, these were called the pseudo-structural characters. - **/ - simdjson_really_inline uint64_t structural_start() { return potential_structural_start() & ~_string.string_tail(); } - /** All JSON whitespace (i.e. not in a string) */ - simdjson_really_inline uint64_t whitespace() { return non_quote_outside_string(_characters.whitespace()); } - - // Helpers - - /** Whether the given characters are inside a string (only works on non-quotes) */ - simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) { return _string.non_quote_inside_string(mask); } - /** Whether the given characters are outside a string (only works on non-quotes) */ - simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) { return _string.non_quote_outside_string(mask); } - - // string and escape characters - json_string_block _string; - // whitespace, structural characters ('operators'), scalars - json_character_block _characters; - // whether the previous character was a scalar - uint64_t _follows_potential_nonquote_scalar; -private: - // Potential structurals (i.e. disregarding strings) - - /** - * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". - * They may reside inside a string. - **/ - simdjson_really_inline uint64_t potential_structural_start() { return _characters.op() | potential_scalar_start(); } - /** - * The start of non-operator runs, like 123, true and "abc". - * It main reside inside a string. - **/ - simdjson_really_inline uint64_t potential_scalar_start() { - // The term "scalar" refers to anything except structural characters and white space - // (so letters, numbers, quotes). - // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space - // then we know that it is irrelevant structurally. - return _characters.scalar() & ~follows_potential_scalar(); - } - /** - * Whether the given character is immediately after a non-operator like 123, true. - * The characters following a quote are not included. - */ - simdjson_really_inline uint64_t follows_potential_scalar() { - // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character - // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a - // white space. - // It is understood that within quoted region, anything at all could be marked (irrelevant). - return _follows_potential_nonquote_scalar; - } -}; - -/** - * Scans JSON for important bits: structural characters or 'operators', strings, and scalars. - * - * The scanner starts by calculating two distinct things: - * - string characters (taking \" into account) - * - structural characters or 'operators' ([]{},:, comma) - * and scalars (runs of non-operators like 123, true and "abc") - * - * To minimize data dependency (a key component of the scanner's speed), it finds these in parallel: - * in particular, the operator/scalar bit will find plenty of things that are actually part of - * strings. When we're done, json_block will fuse the two together by masking out tokens that are - * part of a string. - */ -class json_scanner { -public: - json_scanner() {} - simdjson_really_inline json_block next(const simd::simd8x64& in); - simdjson_really_inline error_code finish(bool streaming); - -private: - // Whether the last character of the previous iteration is part of a scalar token - // (anything except whitespace or a structural character/'operator'). - uint64_t prev_scalar = 0ULL; - json_string_scanner string_scanner{}; -}; - - -// -// Check if the current character immediately follows a matching character. -// -// For example, this checks for quotes with backslashes in front of them: -// -// const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); -// -simdjson_really_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { - const uint64_t result = match << 1 | overflow; - overflow = match >> 63; - return result; -} - -simdjson_really_inline json_block json_scanner::next(const simd::simd8x64& in) { - json_string_block strings = string_scanner.next(in); - // identifies the white-space and the structurat characters - json_character_block characters = json_character_block::classify(in); - // The term "scalar" refers to anything except structural characters and white space - // (so letters, numbers, quotes). - // We want follows_scalar to mark anything that follows a non-quote scalar (so letters and numbers). - // - // A terminal quote should either be followed by a structural character (comma, brace, bracket, colon) - // or nothing. However, we still want ' "a string"true ' to mark the 't' of 'true' as a potential - // pseudo-structural character just like we would if we had ' "a string" true '; otherwise we - // may need to add an extra check when parsing strings. - // - // Performance: there are many ways to skin this cat. - const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote(); - uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar); - return { - strings, - characters, - follows_nonquote_scalar - }; -} - -simdjson_really_inline error_code json_scanner::finish(bool streaming) { - return string_scanner.finish(streaming); -} - -} // namespace stage1 -} // unnamed namespace -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson -/* end file src/generic/stage1/json_scanner.h */ -/* begin file src/generic/stage1/json_minifier.h */ -// This file contains the common code every implementation uses in stage1 -// It is intended to be included multiple times and compiled multiple times -// We assume the file in which it is included already includes -// "simdjson/stage1.h" (this simplifies amalgation) - -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { -namespace { -namespace stage1 { - -class json_minifier { -public: - template - static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept; - -private: - simdjson_really_inline json_minifier(uint8_t *_dst) - : dst{_dst} - {} - template - simdjson_really_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; - simdjson_really_inline void next(const simd::simd8x64& in, json_block block); - simdjson_really_inline error_code finish(uint8_t *dst_start, size_t &dst_len); - json_scanner scanner{}; - uint8_t *dst; -}; - -simdjson_really_inline void json_minifier::next(const simd::simd8x64& in, json_block block) { - uint64_t mask = block.whitespace(); - in.compress(mask, dst); - dst += 64 - count_ones(mask); -} - -simdjson_really_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { - error_code error = scanner.finish(false); - if (error) { dst_len = 0; return error; } - dst_len = dst - dst_start; - return SUCCESS; -} - -template<> -simdjson_really_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { - simd::simd8x64 in_1(block_buf); - simd::simd8x64 in_2(block_buf+64); - json_block block_1 = scanner.next(in_1); - json_block block_2 = scanner.next(in_2); - this->next(in_1, block_1); - this->next(in_2, block_2); - reader.advance(); -} - -template<> -simdjson_really_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { - simd::simd8x64 in_1(block_buf); - json_block block_1 = scanner.next(in_1); - this->next(block_buf, block_1); - reader.advance(); -} - -template -error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept { - buf_block_reader reader(buf, len); - json_minifier minifier(dst); - - // Index the first n-1 blocks - while (reader.has_full_block()) { - minifier.step(reader.full_block(), reader); - } - - // Index the last (remainder) block, padded with spaces - uint8_t block[STEP_SIZE]; - size_t remaining_bytes = reader.get_remainder(block); - if (remaining_bytes > 0) { - // We do not want to write directly to the output stream. Rather, we write - // to a local buffer (for safety). - uint8_t out_block[STEP_SIZE]; - uint8_t * const guarded_dst{minifier.dst}; - minifier.dst = out_block; - minifier.step(block, reader); - size_t to_write = minifier.dst - out_block; - // In some cases, we could be enticed to consider the padded spaces - // as part of the string. This is fine as long as we do not write more - // than we consumed. - if(to_write > remaining_bytes) { to_write = remaining_bytes; } - memcpy(guarded_dst, out_block, to_write); - minifier.dst = guarded_dst + to_write; - } - return minifier.finish(dst, dst_len); -} - -} // namespace stage1 -} // unnamed namespace -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson -/* end file src/generic/stage1/json_minifier.h */ -/* begin file src/generic/stage1/find_next_document_index.h */ -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { -namespace { - -/** - * This algorithm is used to quickly identify the last structural position that - * makes up a complete document. - * - * It does this by going backwards and finding the last *document boundary* (a - * place where one value follows another without a comma between them). If the - * last document (the characters after the boundary) has an equal number of - * start and end brackets, it is considered complete. - * - * Simply put, we iterate over the structural characters, starting from - * the end. We consider that we found the end of a JSON document when the - * first element of the pair is NOT one of these characters: '{' '[' ';' ',' - * and when the second element is NOT one of these characters: '}' '}' ';' ','. - * - * This simple comparison works most of the time, but it does not cover cases - * where the batch's structural indexes contain a perfect amount of documents. - * In such a case, we do not have access to the structural index which follows - * the last document, therefore, we do not have access to the second element in - * the pair, and that means we cannot identify the last document. To fix this - * issue, we keep a count of the open and closed curly/square braces we found - * while searching for the pair. When we find a pair AND the count of open and - * closed curly/square braces is the same, we know that we just passed a - * complete document, therefore the last json buffer location is the end of the - * batch. - */ -simdjson_really_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { - // TODO don't count separately, just figure out depth - auto arr_cnt = 0; - auto obj_cnt = 0; - for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { - auto idxb = parser.structural_indexes[i]; - switch (parser.buf[idxb]) { - case ':': - case ',': - continue; - case '}': - obj_cnt--; - continue; - case ']': - arr_cnt--; - continue; - case '{': - obj_cnt++; - break; - case '[': - arr_cnt++; - break; - } - auto idxa = parser.structural_indexes[i - 1]; - switch (parser.buf[idxa]) { - case '{': - case '[': - case ':': - case ',': - continue; - } - // Last document is complete, so the next document will appear after! - if (!arr_cnt && !obj_cnt) { - return parser.n_structural_indexes; - } - // Last document is incomplete; mark the document at i + 1 as the next one - return i; - } - return 0; -} - -} // unnamed namespace -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson -/* end file src/generic/stage1/find_next_document_index.h */ - -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { -namespace { -namespace stage1 { - -class bit_indexer { -public: - uint32_t *tail; - - simdjson_really_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} - - // flatten out values in 'bits' assuming that they are are to have values of idx - // plus their position in the bitvector, and store these indexes at - // base_ptr[base] incrementing base as we go - // will potentially store extra values beyond end of valid bits, so base_ptr - // needs to be large enough to handle this - simdjson_really_inline void write(uint32_t idx, uint64_t bits) { - // In some instances, the next branch is expensive because it is mispredicted. - // Unfortunately, in other cases, - // it helps tremendously. - if (bits == 0) - return; - int cnt = static_cast(count_ones(bits)); - - // Do the first 8 all together - for (int i=0; i<8; i++) { - this->tail[i] = idx + trailing_zeroes(bits); - bits = clear_lowest_bit(bits); - } - - // Do the next 8 all together (we hope in most cases it won't happen at all - // and the branch is easily predicted). - if (simdjson_unlikely(cnt > 8)) { - for (int i=8; i<16; i++) { - this->tail[i] = idx + trailing_zeroes(bits); - bits = clear_lowest_bit(bits); - } - - // Most files don't have 16+ structurals per block, so we take several basically guaranteed - // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) - // or the start of a value ("abc" true 123) every four characters. - if (simdjson_unlikely(cnt > 16)) { - int i = 16; - do { - this->tail[i] = idx + trailing_zeroes(bits); - bits = clear_lowest_bit(bits); - i++; - } while (i < cnt); - } - } - - this->tail += cnt; - } -}; - -class json_structural_indexer { -public: - /** - * Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes. - * - * @param partial Setting the partial parameter to true allows the find_structural_bits to - * tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If - * you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8. - */ - template - static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, bool partial) noexcept; - -private: - simdjson_really_inline json_structural_indexer(uint32_t *structural_indexes); - template - simdjson_really_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; - simdjson_really_inline void next(const simd::simd8x64& in, json_block block, size_t idx); - simdjson_really_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, bool partial); - - json_scanner scanner{}; - utf8_checker checker{}; - bit_indexer indexer; - uint64_t prev_structurals = 0; - uint64_t unescaped_chars_error = 0; -}; - -simdjson_really_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} - -// Skip the last character if it is partial -simdjson_really_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { - if (simdjson_unlikely(len < 3)) { - switch (len) { - case 2: - if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left - if (buf[len-2] >= 0b11100000) { return len-2; } // 3- and 4-byte characters with only 2 bytes left - return len; - case 1: - if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left - return len; - case 0: - return len; - } - } - if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left - if (buf[len-2] >= 0b11100000) { return len-2; } // 3- and 4-byte characters with only 1 byte left - if (buf[len-3] >= 0b11110000) { return len-3; } // 4-byte characters with only 3 bytes left - return len; -} - -// -// PERF NOTES: -// We pipe 2 inputs through these stages: -// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load -// 2 inputs' worth at once so that by the time step 2 is looking for them input, it's available. -// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path. -// The output of step 1 depends entirely on this information. These functions don't quite use -// up enough CPU: the second half of the functions is highly serial, only using 1 execution core -// at a time. The second input's scans has some dependency on the first ones finishing it, but -// they can make a lot of progress before they need that information. -// 3. Step 1 doesn't use enough capacity, so we run some extra stuff while we're waiting for that -// to finish: utf-8 checks and generating the output from the last iteration. -// -// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all -// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough -// workout. -// -template -error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, bool partial) noexcept { - if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; } - if (partial) { len = trim_partial_utf8(buf, len); } - - buf_block_reader reader(buf, len); - json_structural_indexer indexer(parser.structural_indexes.get()); - - // Read all but the last block - while (reader.has_full_block()) { - indexer.step(reader.full_block(), reader); - } - - // Take care of the last block (will always be there unless file is empty) - uint8_t block[STEP_SIZE]; - if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return EMPTY; } - indexer.step(block, reader); - - return indexer.finish(parser, reader.block_index(), len, partial); -} - -template<> -simdjson_really_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { - simd::simd8x64 in_1(block); - simd::simd8x64 in_2(block+64); - json_block block_1 = scanner.next(in_1); - json_block block_2 = scanner.next(in_2); - this->next(in_1, block_1, reader.block_index()); - this->next(in_2, block_2, reader.block_index()+64); - reader.advance(); -} - -template<> -simdjson_really_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { - simd::simd8x64 in_1(block); - json_block block_1 = scanner.next(in_1); - this->next(in_1, block_1, reader.block_index()); - reader.advance(); -} - -simdjson_really_inline void json_structural_indexer::next(const simd::simd8x64& in, json_block block, size_t idx) { - uint64_t unescaped = in.lteq(0x1F); - checker.check_next_input(in); - indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser - prev_structurals = block.structural_start(); - unescaped_chars_error |= block.non_quote_inside_string(unescaped); -} - -simdjson_really_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, bool partial) { - // Write out the final iteration's structurals - indexer.write(uint32_t(idx-64), prev_structurals); - - error_code error = scanner.finish(partial); - if (simdjson_unlikely(error != SUCCESS)) { return error; } - - if (unescaped_chars_error) { - return UNESCAPED_CHARS; - } - - parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); - /*** - * This is related to https://github.com/simdjson/simdjson/issues/906 - * Basically, we want to make sure that if the parsing continues beyond the last (valid) - * structural character, it quickly stops. - * Only three structural characters can be repeated without triggering an error in JSON: [,] and }. - * We repeat the padding character (at 'len'). We don't know what it is, but if the parsing - * continues, then it must be [,] or }. - * Suppose it is ] or }. We backtrack to the first character, what could it be that would - * not trigger an error? It could be ] or } but no, because you can't start a document that way. - * It can't be a comma, a colon or any simple value. So the only way we could continue is - * if the repeated character is [. But if so, the document must start with [. But if the document - * starts with [, it should end with ]. If we enforce that rule, then we would get - * ][[ which is invalid. - **/ - parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); - parser.structural_indexes[parser.n_structural_indexes + 1] = uint32_t(len); - parser.structural_indexes[parser.n_structural_indexes + 2] = 0; - parser.next_structural_index = 0; - // a valid JSON file cannot have zero structural indexes - we should have found something - if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { - return EMPTY; - } - if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) { - return UNEXPECTED_ERROR; - } - if (partial) { - auto new_structural_indexes = find_next_document_index(parser); - if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) { - return CAPACITY; // If the buffer is partial but the document is incomplete, it's too big to parse. - } - parser.n_structural_indexes = new_structural_indexes; - } - checker.check_eof(); - return checker.errors(); -} - -} // namespace stage1 -} // unnamed namespace -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson -/* end file src/generic/stage1/json_structural_indexer.h */ -/* begin file src/generic/stage1/utf8_validator.h */ -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { -namespace { -namespace stage1 { - -/** - * Validates that the string is actual UTF-8. - */ -template -bool generic_validate_utf8(const uint8_t * input, size_t length) { - checker c{}; - buf_block_reader<64> reader(input, length); - while (reader.has_full_block()) { - simd::simd8x64 in(reader.full_block()); - c.check_next_input(in); - reader.advance(); - } - uint8_t block[64]{}; - reader.get_remainder(block); - simd::simd8x64 in(block); - c.check_next_input(in); - reader.advance(); - c.check_eof(); - return c.errors() == error_code::SUCCESS; -} - -bool generic_validate_utf8(const char * input, size_t length) { - return generic_validate_utf8((const uint8_t *)input,length); -} - -} // namespace stage1 -} // unnamed namespace -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson -/* end file src/generic/stage1/utf8_validator.h */ - -// -// Stage 2 -// - -/* begin file src/generic/stage2/tape_builder.h */ -/* begin file src/generic/stage2/json_iterator.h */ -/* begin file src/generic/stage2/logger.h */ -// This is for an internal-only stage 2 specific logger. -// Set LOG_ENABLED = true to log what stage 2 is doing! -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { -namespace { -namespace logger { - - static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; - -#if SIMDJSON_VERBOSE_LOGGING - static constexpr const bool LOG_ENABLED = true; -#else - static constexpr const bool LOG_ENABLED = false; -#endif - static constexpr const int LOG_EVENT_LEN = 20; - static constexpr const int LOG_BUFFER_LEN = 30; - static constexpr const int LOG_SMALL_BUFFER_LEN = 10; - static constexpr const int LOG_INDEX_LEN = 5; - - static int log_depth; // Not threadsafe. Log only. - - // Helper to turn unprintable or newline characters into spaces - static simdjson_really_inline char printable_char(char c) { - if (c >= 0x20) { - return c; - } else { - return ' '; - } - } - - // Print the header and set up log_start - static simdjson_really_inline void log_start() { - if (LOG_ENABLED) { - log_depth = 0; - printf("\n"); - printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#"); - printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES); - } - } - - simdjson_unused static simdjson_really_inline void log_string(const char *message) { - if (LOG_ENABLED) { - printf("%s\n", message); - } - } - - // Logs a single line from the stage 2 DOM parser - template - static simdjson_really_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { - if (LOG_ENABLED) { - printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); - auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; - auto next_index = structurals.next_structural; - auto current = current_index ? &structurals.buf[*current_index] : (const uint8_t*)" "; - auto next = &structurals.buf[*next_index]; - { - // Print the next N characters in the buffer. - printf("| "); - // Otherwise, print the characters starting from the buffer position. - // Print spaces for unprintable or newline characters. - for (int i=0;i - simdjson_warn_unused simdjson_really_inline error_code walk_document(V &visitor) noexcept; - - /** - * Create an iterator capable of walking a JSON document. - * - * The document must have already passed through stage 1. - */ - simdjson_really_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); - - /** - * Look at the next token. - * - * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). - * - * They may include invalid JSON as well (such as `1.2.3` or `ture`). - */ - simdjson_really_inline const uint8_t *peek() const noexcept; - /** - * Advance to the next token. - * - * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). - * - * They may include invalid JSON as well (such as `1.2.3` or `ture`). - */ - simdjson_really_inline const uint8_t *advance() noexcept; - /** - * Get the remaining length of the document, from the start of the current token. - */ - simdjson_really_inline size_t remaining_len() const noexcept; - /** - * Check if we are at the end of the document. - * - * If this is true, there are no more tokens. - */ - simdjson_really_inline bool at_eof() const noexcept; - /** - * Check if we are at the beginning of the document. - */ - simdjson_really_inline bool at_beginning() const noexcept; - simdjson_really_inline uint8_t last_structural() const noexcept; - - /** - * Log that a value has been found. - * - * Set ENABLE_LOGGING=true in logger.h to see logging. - */ - simdjson_really_inline void log_value(const char *type) const noexcept; - /** - * Log the start of a multipart value. - * - * Set ENABLE_LOGGING=true in logger.h to see logging. - */ - simdjson_really_inline void log_start_value(const char *type) const noexcept; - /** - * Log the end of a multipart value. - * - * Set ENABLE_LOGGING=true in logger.h to see logging. - */ - simdjson_really_inline void log_end_value(const char *type) const noexcept; - /** - * Log an error. - * - * Set ENABLE_LOGGING=true in logger.h to see logging. - */ - simdjson_really_inline void log_error(const char *error) const noexcept; - - template - simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; - template - simdjson_warn_unused simdjson_really_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; -}; - -template -simdjson_warn_unused simdjson_really_inline error_code json_iterator::walk_document(V &visitor) noexcept { - logger::log_start(); - - // - // Start the document - // - if (at_eof()) { return EMPTY; } - log_start_value("document"); - SIMDJSON_TRY( visitor.visit_document_start(*this) ); - - // - // Read first value - // - { - auto value = advance(); - - // Make sure the outer hash or array is closed before continuing; otherwise, there are ways we - // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906 - if (!STREAMING) { - switch (*value) { - case '{': if (last_structural() != '}') { return TAPE_ERROR; }; break; - case '[': if (last_structural() != ']') { return TAPE_ERROR; }; break; - } - } - - switch (*value) { - case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; - case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; - default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break; - } - } - goto document_end; - -// -// Object parser states -// -object_begin: - log_start_value("object"); - depth++; - if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } - dom_parser.is_array[depth] = false; - SIMDJSON_TRY( visitor.visit_object_start(*this) ); - - { - auto key = advance(); - if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; } - SIMDJSON_TRY( visitor.increment_count(*this) ); - SIMDJSON_TRY( visitor.visit_key(*this, key) ); - } - -object_field: - if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; } - { - auto value = advance(); - switch (*value) { - case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; - case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; - default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; - } - } - -object_continue: - switch (*advance()) { - case ',': - SIMDJSON_TRY( visitor.increment_count(*this) ); - { - auto key = advance(); - if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; } - SIMDJSON_TRY( visitor.visit_key(*this, key) ); - } - goto object_field; - case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end; - default: log_error("No comma between object fields"); return TAPE_ERROR; - } - -scope_end: - depth--; - if (depth == 0) { goto document_end; } - if (dom_parser.is_array[depth]) { goto array_continue; } - goto object_continue; - -// -// Array parser states -// -array_begin: - log_start_value("array"); - depth++; - if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } - dom_parser.is_array[depth] = true; - SIMDJSON_TRY( visitor.visit_array_start(*this) ); - SIMDJSON_TRY( visitor.increment_count(*this) ); - -array_value: - { - auto value = advance(); - switch (*value) { - case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; - case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; - default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; - } - } - -array_continue: - switch (*advance()) { - case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value; - case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end; - default: log_error("Missing comma between array values"); return TAPE_ERROR; - } - -document_end: - log_end_value("document"); - SIMDJSON_TRY( visitor.visit_document_end(*this) ); - - dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]); - - // If we didn't make it to the end, it's an error - if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) { - log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); - return TAPE_ERROR; - } - - return SUCCESS; - -} // walk_document() - -simdjson_really_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) - : buf{_dom_parser.buf}, - next_structural{&_dom_parser.structural_indexes[start_structural_index]}, - dom_parser{_dom_parser} { -} - -simdjson_really_inline const uint8_t *json_iterator::peek() const noexcept { - return &buf[*(next_structural)]; -} -simdjson_really_inline const uint8_t *json_iterator::advance() noexcept { - return &buf[*(next_structural++)]; -} -simdjson_really_inline size_t json_iterator::remaining_len() const noexcept { - return dom_parser.len - *(next_structural-1); -} - -simdjson_really_inline bool json_iterator::at_eof() const noexcept { - return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; -} -simdjson_really_inline bool json_iterator::at_beginning() const noexcept { - return next_structural == dom_parser.structural_indexes.get(); -} -simdjson_really_inline uint8_t json_iterator::last_structural() const noexcept { - return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; -} - -simdjson_really_inline void json_iterator::log_value(const char *type) const noexcept { - logger::log_line(*this, "", type, ""); -} - -simdjson_really_inline void json_iterator::log_start_value(const char *type) const noexcept { - logger::log_line(*this, "+", type, ""); - if (logger::LOG_ENABLED) { logger::log_depth++; } -} - -simdjson_really_inline void json_iterator::log_end_value(const char *type) const noexcept { - if (logger::LOG_ENABLED) { logger::log_depth--; } - logger::log_line(*this, "-", type, ""); -} - -simdjson_really_inline void json_iterator::log_error(const char *error) const noexcept { - logger::log_line(*this, "", "ERROR", error); -} - -template -simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { - switch (*value) { - case '"': return visitor.visit_root_string(*this, value); - case 't': return visitor.visit_root_true_atom(*this, value); - case 'f': return visitor.visit_root_false_atom(*this, value); - case 'n': return visitor.visit_root_null_atom(*this, value); - case '-': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - return visitor.visit_root_number(*this, value); - default: - log_error("Document starts with a non-value character"); - return TAPE_ERROR; - } -} -template -simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { - switch (*value) { - case '"': return visitor.visit_string(*this, value); - case 't': return visitor.visit_true_atom(*this, value); - case 'f': return visitor.visit_false_atom(*this, value); - case 'n': return visitor.visit_null_atom(*this, value); - case '-': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - return visitor.visit_number(*this, value); - default: - log_error("Non-value found when value was expected!"); - return TAPE_ERROR; - } -} - -} // namespace stage2 -} // unnamed namespace -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson -/* end file src/generic/stage2/json_iterator.h */ -/* begin file src/generic/stage2/tape_writer.h */ -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { -namespace { -namespace stage2 { - -struct tape_writer { - /** The next place to write to tape */ - uint64_t *next_tape_loc; - - /** Write a signed 64-bit value to tape. */ - simdjson_really_inline void append_s64(int64_t value) noexcept; - - /** Write an unsigned 64-bit value to tape. */ - simdjson_really_inline void append_u64(uint64_t value) noexcept; - - /** Write a double value to tape. */ - simdjson_really_inline void append_double(double value) noexcept; - - /** - * Append a tape entry (an 8-bit type,and 56 bits worth of value). - */ - simdjson_really_inline void append(uint64_t val, internal::tape_type t) noexcept; - - /** - * Skip the current tape entry without writing. - * - * Used to skip the start of the container, since we'll come back later to fill it in when the - * container ends. - */ - simdjson_really_inline void skip() noexcept; - - /** - * Skip the number of tape entries necessary to write a large u64 or i64. - */ - simdjson_really_inline void skip_large_integer() noexcept; - - /** - * Skip the number of tape entries necessary to write a double. - */ - simdjson_really_inline void skip_double() noexcept; - - /** - * Write a value to a known location on tape. - * - * Used to go back and write out the start of a container after the container ends. - */ - simdjson_really_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; - -private: - /** - * Append both the tape entry, and a supplementary value following it. Used for types that need - * all 64 bits, such as double and uint64_t. - */ - template - simdjson_really_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; -}; // struct number_writer - -simdjson_really_inline void tape_writer::append_s64(int64_t value) noexcept { - append2(0, value, internal::tape_type::INT64); -} - -simdjson_really_inline void tape_writer::append_u64(uint64_t value) noexcept { - append(0, internal::tape_type::UINT64); - *next_tape_loc = value; - next_tape_loc++; -} - -/** Write a double value to tape. */ -simdjson_really_inline void tape_writer::append_double(double value) noexcept { - append2(0, value, internal::tape_type::DOUBLE); -} - -simdjson_really_inline void tape_writer::skip() noexcept { - next_tape_loc++; -} - -simdjson_really_inline void tape_writer::skip_large_integer() noexcept { - next_tape_loc += 2; -} - -simdjson_really_inline void tape_writer::skip_double() noexcept { - next_tape_loc += 2; -} - -simdjson_really_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { - *next_tape_loc = val | ((uint64_t(char(t))) << 56); - next_tape_loc++; -} - -template -simdjson_really_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { - append(val, t); - static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); - memcpy(next_tape_loc, &val2, sizeof(val2)); - next_tape_loc++; -} - -simdjson_really_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { - tape_loc = val | ((uint64_t(char(t))) << 56); -} - -} // namespace stage2 -} // unnamed namespace -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson -/* end file src/generic/stage2/tape_writer.h */ - -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { -namespace { -namespace stage2 { - -struct tape_builder { - template - simdjson_warn_unused static simdjson_really_inline error_code parse_document( - dom_parser_implementation &dom_parser, - dom::document &doc) noexcept; - - /** Called when a non-empty document starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_document_start(json_iterator &iter) noexcept; - /** Called when a non-empty document ends without error. */ - simdjson_warn_unused simdjson_really_inline error_code visit_document_end(json_iterator &iter) noexcept; - - /** Called when a non-empty array starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_array_start(json_iterator &iter) noexcept; - /** Called when a non-empty array ends. */ - simdjson_warn_unused simdjson_really_inline error_code visit_array_end(json_iterator &iter) noexcept; - /** Called when an empty array is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_empty_array(json_iterator &iter) noexcept; - - /** Called when a non-empty object starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_object_start(json_iterator &iter) noexcept; - /** - * Called when a key in a field is encountered. - * - * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array - * will be called after this with the field value. - */ - simdjson_warn_unused simdjson_really_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; - /** Called when a non-empty object ends. */ - simdjson_warn_unused simdjson_really_inline error_code visit_object_end(json_iterator &iter) noexcept; - /** Called when an empty object is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_empty_object(json_iterator &iter) noexcept; - - /** - * Called when a string, number, boolean or null is found. - */ - simdjson_warn_unused simdjson_really_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; - /** - * Called when a string, number, boolean or null is found at the top level of a document (i.e. - * when there is no array or object and the entire document is a single string, number, boolean or - * null. - * - * This is separate from primitive() because simdjson's normal primitive parsing routines assume - * there is at least one more token after the value, which is only true in an array or object. - */ - simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; - - simdjson_warn_unused simdjson_really_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; - - simdjson_warn_unused simdjson_really_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; - - /** Called each time a new field or element in an array or object is found. */ - simdjson_warn_unused simdjson_really_inline error_code increment_count(json_iterator &iter) noexcept; - - /** Next location to write to tape */ - tape_writer tape; -private: - /** Next write location in the string buf for stage 2 parsing */ - uint8_t *current_string_buf_loc; - - simdjson_really_inline tape_builder(dom::document &doc) noexcept; - - simdjson_really_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; - simdjson_really_inline void start_container(json_iterator &iter) noexcept; - simdjson_warn_unused simdjson_really_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; - simdjson_warn_unused simdjson_really_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; - simdjson_really_inline uint8_t *on_start_string(json_iterator &iter) noexcept; - simdjson_really_inline void on_end_string(uint8_t *dst) noexcept; -}; // class tape_builder - -template -simdjson_warn_unused simdjson_really_inline error_code tape_builder::parse_document( - dom_parser_implementation &dom_parser, - dom::document &doc) noexcept { - dom_parser.doc = &doc; - json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0); - tape_builder builder(doc); - return iter.walk_document(builder); -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { - return iter.visit_root_primitive(*this, value); -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { - return iter.visit_primitive(*this, value); -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { - return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { - return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { - start_container(iter); - return SUCCESS; -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { - start_container(iter); - return SUCCESS; -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { - start_container(iter); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { - return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { - return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { - constexpr uint32_t start_tape_index = 0; - tape.append(start_tape_index, internal::tape_type::ROOT); - tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); - return SUCCESS; -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { - return visit_string(iter, key, true); -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { - iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 - return SUCCESS; -} - -simdjson_really_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { - iter.log_value(key ? "key" : "string"); - uint8_t *dst = on_start_string(iter); - dst = stringparsing::parse_string(value+1, dst); - if (dst == nullptr) { - iter.log_error("Invalid escape in string"); - return STRING_ERROR; - } - on_end_string(dst); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { - return visit_string(iter, value); -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("number"); - return numberparsing::parse_number(value, tape); -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { - // - // We need to make a copy to make sure that the string is space terminated. - // This is not about padding the input, which should already padded up - // to len + SIMDJSON_PADDING. However, we have no control at this stage - // on how the padding was done. What if the input string was padded with nulls? - // It is quite common for an input string to have an extra null character (C string). - // We do not want to allow 9\0 (where \0 is the null character) inside a JSON - // document, but the string "9\0" by itself is fine. So we make a copy and - // pad the input with spaces when we know that there is just one input element. - // This copy is relatively expensive, but it will almost never be called in - // practice unless you are in the strange scenario where you have many JSON - // documents made of single atoms. - // - uint8_t *copy = static_cast(malloc(iter.remaining_len() + SIMDJSON_PADDING)); - if (copy == nullptr) { return MEMALLOC; } - std::memcpy(copy, value, iter.remaining_len()); - std::memset(copy + iter.remaining_len(), ' ', SIMDJSON_PADDING); - error_code error = visit_number(iter, copy); - free(copy); - return error; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("true"); - if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } - tape.append(0, internal::tape_type::TRUE_VALUE); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("true"); - if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } - tape.append(0, internal::tape_type::TRUE_VALUE); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("false"); - if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } - tape.append(0, internal::tape_type::FALSE_VALUE); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("false"); - if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } - tape.append(0, internal::tape_type::FALSE_VALUE); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("null"); - if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } - tape.append(0, internal::tape_type::NULL_VALUE); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("null"); - if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } - tape.append(0, internal::tape_type::NULL_VALUE); - return SUCCESS; -} - -// private: - -simdjson_really_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { - return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { - auto start_index = next_tape_index(iter); - tape.append(start_index+2, start); - tape.append(start_index, end); - return SUCCESS; -} - -simdjson_really_inline void tape_builder::start_container(json_iterator &iter) noexcept { - iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); - iter.dom_parser.open_containers[iter.depth].count = 0; - tape.skip(); // We don't actually *write* the start element until the end. -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { - // Write the ending tape element, pointing at the start location - const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; - tape.append(start_tape_index, end); - // Write the start tape element, pointing at the end location (and including count) - // count can overflow if it exceeds 24 bits... so we saturate - // the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff). - const uint32_t count = iter.dom_parser.open_containers[iter.depth].count; - const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count; - tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start); - return SUCCESS; -} - -simdjson_really_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { - // we advance the point, accounting for the fact that we have a NULL termination - tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); - return current_string_buf_loc + sizeof(uint32_t); -} - -simdjson_really_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { - uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); - // TODO check for overflow in case someone has a crazy string (>=4GB?) - // But only add the overflow check when the document itself exceeds 4GB - // Currently unneeded because we refuse to parse docs larger or equal to 4GB. - memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t)); - // NULL termination is still handy if you expect all your strings to - // be NULL terminated? It comes at a small cost - *dst = 0; - current_string_buf_loc = dst + 1; -} - -} // namespace stage2 -} // unnamed namespace -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson -/* end file src/generic/stage2/tape_builder.h */ - -// -// Implementation-specific overrides -// -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { -namespace { -namespace stage1 { - -simdjson_really_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) { - // On PPC, we don't short-circuit this if there are no backslashes, because the branch gives us no - // benefit and therefore makes things worse. - // if (!backslash) { uint64_t escaped = prev_escaped; prev_escaped = 0; return escaped; } - return find_escaped_branchless(backslash); -} - -} // namespace stage1 -} // unnamed namespace - -simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { - return ppc64::stage1::json_minifier::minify<64>(buf, len, dst, dst_len); -} - -simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, bool streaming) noexcept { - this->buf = _buf; - this->len = _len; - return ppc64::stage1::json_structural_indexer::index<64>(buf, len, *this, streaming); -} - -simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { - return ppc64::stage1::generic_validate_utf8(buf,len); -} - -simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { - return stage2::tape_builder::parse_document(*this, _doc); -} - -simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { - return stage2::tape_builder::parse_document(*this, _doc); -} - -simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { - auto error = stage1(_buf, _len, false); - if (error) { return error; } - return stage2(_doc); -} - -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson - -/* begin file include/simdjson/ppc64/end.h */ -#undef SIMDJSON_IMPLEMENTATION -/* end file include/simdjson/ppc64/end.h */ -/* end file src/ppc64/dom_parser_implementation.cpp */ +/* end file include/simdjson/haswell/end.h */ #endif #if SIMDJSON_IMPLEMENTATION_WESTMERE /* begin file src/westmere/implementation.cpp */ @@ -9919,7 +7937,7 @@ simdjson_warn_unused error_code implementation::create_dom_parser_implementation SIMDJSON_UNTARGET_REGION #undef SIMDJSON_IMPLEMENTATION /* end file include/simdjson/westmere/end.h */ -/* end file src/westmere/implementation.cpp */ +/* end file include/simdjson/westmere/end.h */ /* begin file src/westmere/dom_parser_implementation.cpp */ /* begin file include/simdjson/westmere/begin.h */ #define SIMDJSON_IMPLEMENTATION westmere @@ -10173,7 +8191,7 @@ using namespace simd; } // The only problem that can happen at EOF is that a multibyte character is too short - // or a byte value too large in the last bytes: check_special_cases only checks for bytes + // or a byte value too large in the last bytes: check_special_cases only checks for bytes // too large in the first of two bytes. simdjson_really_inline void check_eof() { // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't @@ -10475,7 +8493,7 @@ namespace stage1 { * We seek to identify pseudo-structural characters. Anything that is inside * a string must be omitted (hence & ~_string.string_tail()). * Otherwise, pseudo-structural characters come in two forms. - * 1. We have the structural characters ([,],{,},:, comma). The + * 1. We have the structural characters ([,],{,},:, comma). The * term 'structural character' is from the JSON RFC. * 2. We have the 'scalar pseudo-structural characters'. * Scalars are quotes, and any character except structural characters and white space. @@ -10685,7 +8703,7 @@ error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, s // Index the last (remainder) block, padded with spaces uint8_t block[STEP_SIZE]; - size_t remaining_bytes = reader.get_remainder(block); + size_t remaining_bytes = reader.get_remainder(block); if (remaining_bytes > 0) { // We do not want to write directly to the output stream. Rather, we write // to a local buffer (for safety). @@ -11006,7 +9024,7 @@ simdjson_really_inline error_code json_structural_indexer::finish(dom_parser_imp } // unnamed namespace } // namespace SIMDJSON_IMPLEMENTATION } // namespace simdjson -/* end file src/generic/stage1/json_structural_indexer.h */ +/* end file src/generic/stage1/find_next_document_index.h */ /* begin file src/generic/stage1/utf8_validator.h */ namespace simdjson { namespace SIMDJSON_IMPLEMENTATION { @@ -11455,7 +9473,7 @@ simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_prim } // unnamed namespace } // namespace SIMDJSON_IMPLEMENTATION } // namespace simdjson -/* end file src/generic/stage2/json_iterator.h */ +/* end file src/generic/stage2/logger.h */ /* begin file src/generic/stage2/tape_writer.h */ namespace simdjson { namespace SIMDJSON_IMPLEMENTATION { @@ -11465,7 +9483,7 @@ namespace stage2 { struct tape_writer { /** The next place to write to tape */ uint64_t *next_tape_loc; - + /** Write a signed 64-bit value to tape. */ simdjson_really_inline void append_s64(int64_t value) noexcept; @@ -11845,7 +9863,7 @@ simdjson_really_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { } // unnamed namespace } // namespace SIMDJSON_IMPLEMENTATION } // namespace simdjson -/* end file src/generic/stage2/tape_builder.h */ +/* end file src/generic/stage2/tape_writer.h */ // // Implementation-specific overrides @@ -11899,8 +9917,8 @@ simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t * SIMDJSON_UNTARGET_REGION #undef SIMDJSON_IMPLEMENTATION /* end file include/simdjson/westmere/end.h */ -/* end file src/westmere/dom_parser_implementation.cpp */ +/* end file include/simdjson/westmere/end.h */ #endif SIMDJSON_POP_DISABLE_WARNINGS -/* end file src/simdjson.cpp */ +/* end file include/simdjson/westmere/end.h */ diff --git a/singleheader/simdjson.h b/singleheader/simdjson.h index 5b8a8b66..9e6de00a 100644 --- a/singleheader/simdjson.h +++ b/singleheader/simdjson.h @@ -1,4 +1,4 @@ -/* auto-generated on 2020-11-03 21:40:10 +0100. Do not edit! */ +/* auto-generated on Sun Oct 25 19:17:26 EDT 2020. Do not edit! */ /* begin file include/simdjson.h */ #ifndef SIMDJSON_H #define SIMDJSON_H @@ -7,7 +7,7 @@ * @mainpage * * Check the [README.md](https://github.com/lemire/simdjson/blob/master/README.md#simdjson--parsing-gigabytes-of-json-per-second). - * + * * Sample code. See https://github.com/simdjson/simdjson/blob/master/doc/basics.md for more examples. #include "simdjson.h" @@ -23,7 +23,7 @@ { "12345" : {"a":12.34, "b":56.78, "c": 9998877} }, { "12545" : {"a":11.44, "b":12.78, "c": 11111111} } ] )"_padded; - + for (simdjson::dom::object obj : parser.parse(abstract_json)) { for(const auto& key_value : obj) { cout << "key: " << key_value.key << " : "; @@ -88,7 +88,7 @@ #include #include #ifndef _WIN32 -// strcasecmp, strncasecmp +// strcasecmp, strncasecmp #include #endif @@ -98,7 +98,7 @@ * We want to differentiate carefully between * clang under visual studio and regular visual * studio. - * + * * Under clang for Windows, we enable: * * target pragmas so that part and only part of the * code gets compiled for advanced instructions. @@ -124,9 +124,7 @@ #define SIMDJSON_IS_X86_64 1 #elif defined(__aarch64__) || defined(_M_ARM64) #define SIMDJSON_IS_ARM64 1 -#elif defined(__PPC64__) || defined(_M_PPC64) -#define SIMDJSON_IS_PPC64 1 -#else +#else #define SIMDJSON_IS_32BITS 1 // We do not support 32-bit platforms, but it can be @@ -135,20 +133,16 @@ #define SIMDJSON_IS_X86_32BITS 1 #elif defined(__arm__) || defined(_M_ARM) #define SIMDJSON_IS_ARM_32BITS 1 -#elif defined(__PPC__) || defined(_M_PPC) -#define SIMDJSON_IS_PPC_32BITS 1 #endif #endif // defined(__x86_64__) || defined(_M_AMD64) #ifdef SIMDJSON_IS_32BITS -#ifndef SIMDJSON_NO_PORTABILITY_WARNING #pragma message("The simdjson library is designed \ for 64-bit processors and it seems that you are not \ compiling for a known 64-bit platform. All fast kernels \ will be disabled and performance may be poor. Please \ -use a 64-bit target such as x64, 64-bit ARM or 64-bit PPC.") -#endif // SIMDJSON_NO_PORTABILITY_WARNING +use a 64-bit target such as x64 or 64-bit ARM.") #endif // SIMDJSON_IS_32BITS // this is almost standard? @@ -159,12 +153,12 @@ use a 64-bit target such as x64, 64-bit ARM or 64-bit PPC.") // Our fast kernels require 64-bit systems. // -// On 32-bit x86, we lack 64-bit popcnt, lzcnt, blsr instructions. -// Furthermore, the number of SIMD registers is reduced. +// On 32-bit x86, we lack 64-bit popcnt, lzcnt, blsr instructions. +// Furthermore, the number of SIMD registers is reduced. // // On 32-bit ARM, we would have smaller registers. // -// The simdjson users should still have the fallback kernel. It is +// The simdjson users should still have the fallback kernel. It is // slower, but it should run everywhere. // @@ -2002,7 +1996,7 @@ namespace std { #endif // SIMDJSON_COMMON_DEFS_H -/* end file include/simdjson/common_defs.h */ +/* end file include/simdjson/nonstd/string_view.hpp */ SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_UNDESIRED_WARNINGS @@ -2289,7 +2283,6 @@ struct simdjson_result : public internal::simdjson_result_base { #endif // SIMDJSON_EXCEPTIONS }; // struct simdjson_result -#ifndef SIMDJSON_DISABLE_DEPRECATED_API /** * @deprecated This is an alias and will be removed, use error_code instead */ @@ -2300,7 +2293,7 @@ using ErrorValues [[deprecated("This is an alias and will be removed, use error_ */ [[deprecated("Error codes should be stored and returned as `error_code`, use `error_message()` instead.")]] inline const std::string error_message(int error) noexcept; -#endif // SIMDJSON_DISABLE_DEPRECATED_API + } // namespace simdjson #endif // SIMDJSON_ERROR_H @@ -2454,11 +2447,11 @@ inline simdjson::padded_string operator "" _padded(const char *str, size_t len) namespace simdjson { namespace internal { -// The allocate_padded_buffer function is a low-level function to allocate memory -// with padding so we can read past the "length" bytes safely. It is used by +// The allocate_padded_buffer function is a low-level function to allocate memory +// with padding so we can read past the "length" bytes safely. It is used by // the padded_string class automatically. It returns nullptr in case // of error: the caller should check for a null pointer. -// The length parameter is the maximum size in bytes of the string. +// The length parameter is the maximum size in bytes of the string. // The caller is responsible to free the memory (e.g., delete[] (...)). inline char *allocate_padded_buffer(size_t length) noexcept; @@ -2479,7 +2472,7 @@ namespace simdjson { * * Minify the input string assuming that it represents a JSON string, does not parse or validate. * This function is much faster than parsing a JSON string and then writing a minified version of it. - * However, it does not validate the input. It will merely return an error in simple cases (e.g., if + * However, it does not validate the input. It will merely return an error in simple cases (e.g., if * there is a string that was never terminated). * * @@ -2494,7 +2487,7 @@ simdjson_warn_unused error_code minify(const char *buf, size_t len, char *dst, s } // namespace simdjson #endif // SIMDJSON_MINIFY_H -/* end file include/simdjson/minify.h */ +/* end file include/simdjson/padded_string.h */ /* begin file include/simdjson/implementation.h */ #ifndef SIMDJSON_IMPLEMENTATION_H #define SIMDJSON_IMPLEMENTATION_H @@ -2526,7 +2519,7 @@ public: * @private For internal implementation use * * Run a full JSON parse on a single document (stage1 + stage2). - * + * * Guaranteed only to be called when capacity > document length. * * Overridden by each implementation. @@ -2541,7 +2534,7 @@ public: * @private For internal implementation use * * Stage 1 of the document parser. - * + * * Guaranteed only to be called when capacity > document length. * * Overridden by each implementation. @@ -2557,7 +2550,7 @@ public: * @private For internal implementation use * * Stage 2 of the document parser. - * + * * Called after stage1(). * * Overridden by each implementation. @@ -2582,7 +2575,7 @@ public: /** * Change the capacity of this parser. - * + * * Generally used for reallocation. * * @param capacity The new capacity. @@ -2760,17 +2753,10 @@ enum instruction_set { SSE42 = 0x8, PCLMULQDQ = 0x10, BMI1 = 0x20, - BMI2 = 0x40, - ALTIVEC = 0x80 + BMI2 = 0x40 }; -#if defined(__PPC64__) - -static inline uint32_t detect_supported_architectures() { - return instruction_set::ALTIVEC; -} - -#elif defined(__arm__) || defined(__aarch64__) // incl. armel, armhf, arm64 +#if defined(__arm__) || defined(__aarch64__) // incl. armel, armhf, arm64 #if defined(__ARM_NEON) @@ -2938,12 +2924,12 @@ public: * @return the name of the implementation, e.g. "haswell", "westmere", "arm64" */ virtual const std::string &description() const { return _description; } - + /** * The instruction sets this implementation is compiled against * and the current CPU match. This function may poll the current CPU/system * and should therefore not be called too often if performance is a concern. - * + * * * @return true if the implementation can be safely used on the current system (determined at runtime) */ @@ -2989,9 +2975,9 @@ public: * @return the error code, or SUCCESS if there was no error. */ simdjson_warn_unused virtual error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept = 0; - - - /** + + + /** * Validate the UTF-8 string. * * Overridden by each implementation. @@ -3120,7 +3106,7 @@ extern SIMDJSON_DLLIMPORTEXPORT internal::atomic_ptr activ } // namespace simdjson #endif // SIMDJSON_IMPLEMENTATION_H -/* end file include/simdjson/implementation.h */ +/* end file include/simdjson/internal/isadetection.h */ /* begin file include/simdjson/dom/array.h */ #ifndef SIMDJSON_DOM_ARRAY_H #define SIMDJSON_DOM_ARRAY_H @@ -3207,7 +3193,7 @@ public: } // namespace simdjson #endif // SIMDJSON_INTERNAL_TAPE_REF_H -/* end file include/simdjson/internal/tape_ref.h */ +/* end file include/simdjson/internal/tape_type.h */ namespace simdjson { @@ -3289,6 +3275,17 @@ public: * is 0xFFFFFF then the size is 0xFFFFFF or greater. */ inline size_t size() const noexcept; + /** + * Get the total number of slots used by this array on the tape. + * + * Note that this is not the same thing as `size()`, which reports the + * number of actual elements within an array (not counting its children). + * + * Since an element can use 1 or 2 slots on the tape, you can only use this + * to figure out the total size of an array (including its children, + * recursively) if you know its structure ahead of time. + **/ + inline size_t slots() const noexcept; /** * Get the value associated with the given JSON pointer. We use the RFC 6901 * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node @@ -3310,7 +3307,7 @@ public: /** * Get the value at the given index. This function has linear-time complexity and * is equivalent to the following: - * + * * size_t i=0; * for (auto element : *this) { * if (i == index) { return element; } @@ -3319,7 +3316,7 @@ public: * return INDEX_OUT_OF_BOUNDS; * * Avoid calling the at() function repeatedly. - * + * * @return The value at the given index, or: * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length */ @@ -3375,7 +3372,7 @@ inline constexpr bool enable_view parser(new dom::parser{}); - * auto error = parser->load(f).get(root); - * - * You can then move std::unique_ptr safely. - * * @note This is not thread safe: one parser cannot produce two documents at the same time! */ class parser { @@ -3530,20 +3518,16 @@ public: * * dom::parser parser; * const element doc = parser.load("jsonexamples/twitter.json"); - * + * * The function is eager: the file's content is loaded in memory inside the parser instance * and immediately parsed. The file can be deleted after the `parser.load` call. - * + * * ### IMPORTANT: Document Lifetime * * The JSON document still lives in the parser: this is the most efficient way to parse JSON * documents because it reuses the same buffers, but you *must* use the document before you * destroy the parser or call parse() again. * - * Moving the parser instance is safe, but it invalidates the element instances. You may store - * the parser instance without moving it by wrapping it inside an `unique_ptr` instance like - * so: `std::unique_ptr parser(new dom::parser{});`. - * * ### Parser Capacity * * If the parser's current capacity is less than the file length, it will allocate enough capacity @@ -3564,7 +3548,7 @@ public: * * dom::parser parser; * element doc = parser.parse(buf, len); - * + * * The function eagerly parses the input: the input can be modified and discarded after * the `parser.parse(buf, len)` call has completed. * @@ -3574,32 +3558,28 @@ public: * documents because it reuses the same buffers, but you *must* use the document before you * destroy the parser or call parse() again. * - * Moving the parser instance is safe, but it invalidates the element instances. You may store - * the parser instance without moving it by wrapping it inside an `unique_ptr` instance like - * so: `std::unique_ptr parser(new dom::parser{});`. - * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what * those bytes are initialized to, as long as they are allocated. * * If realloc_if_needed is true (the default), it is assumed that the buffer does *not* have enough padding, - * and it is copied into an enlarged temporary buffer before parsing. Thus the following is safe: - * + * and it is copied into an enlarged temporary buffer before parsing. Thus the following is safe: + * * const char *json = R"({"key":"value"})"; * const size_t json_len = std::strlen(json); * simdjson::dom::parser parser; * simdjson::dom::element element = parser.parse(json, json_len); - * - * If you set realloc_if_needed to false (e.g., parser.parse(json, json_len, false)), + * + * If you set realloc_if_needed to false (e.g., parser.parse(json, json_len, false)), * you must provide a buffer with at least SIMDJSON_PADDING extra bytes at the end. * The benefit of setting realloc_if_needed to false is that you avoid a temporary * memory allocation and a copy. - * + * * The padded bytes may be read. It is not important how you initialize * these bytes though we recommend a sensible default like null character values or spaces. * For example, the following low-level code is safe: - * + * * const char *json = R"({"key":"value"})"; * const size_t json_len = std::strlen(json); * std::unique_ptr padded_json_copy{new char[json_len + SIMDJSON_PADDING]}; @@ -3649,11 +3629,11 @@ public: * * The file is loaded in memory and can be safely deleted after the `parser.load_many(path)` * function has returned. The memory is held by the `parser` instance. - * + * * The function is lazy: it may be that no more than one JSON document at a time is parsed. * And, possibly, no document many have been parsed when the `parser.load_many(path)` function * returned. - * + * * ### Format * * The file must contain a series of one or more JSON documents, concatenated into a single @@ -3664,7 +3644,7 @@ public: * Documents that consist of an object or array may omit the whitespace between them, concatenating * with no separator. documents that consist of a single primitive (i.e. documents that are not * arrays or objects) MUST be separated with whitespace. - * + * * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. * Setting batch_size to excessively large or excesively small values may impact negatively the * performance. @@ -3697,7 +3677,7 @@ public: * If the parser's current capacity is less than batch_size, it will allocate enough capacity * to handle it (up to max_capacity). * - * @param path File name pointing at the concatenated JSON to parse. + * @param path File name pointing at the concatenated JSON to parse. * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet * spot is cache-related: small enough to fit in cache, yet big enough to * parse as many documents as possible in one tight loop. @@ -3724,25 +3704,25 @@ public: * The function is lazy: it may be that no more than one JSON document at a time is parsed. * And, possibly, no document many have been parsed when the `parser.load_many(path)` function * returned. - * + * * The caller is responsabile to ensure that the input string data remains unchanged and is * not deleted during the loop. In particular, the following is unsafe and will not compile: - * + * * auto docs = parser.parse_many("[\"temporary data\"]"_padded); * // here the string "[\"temporary data\"]" may no longer exist in memory * // the parser instance may not have even accessed the input yet * for (element doc : docs) { * cout << std::string(doc["title"]) << endl; * } - * - * The following is safe: - * + * + * The following is safe: + * * auto json = "[\"temporary data\"]"_padded; * auto docs = parser.parse_many(json); * for (element doc : docs) { * cout << std::string(doc["title"]) << endl; * } - * + * * ### Format * * The buffer must contain a series of one or more JSON documents, concatenated into a single @@ -3753,7 +3733,7 @@ public: * documents that consist of an object or array may omit the whitespace between them, concatenating * with no separator. documents that consist of a single primitive (i.e. documents that are not * arrays or objects) MUST be separated with whitespace. - * + * * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. * Setting batch_size to excessively large or excesively small values may impact negatively the * performance. @@ -3812,7 +3792,7 @@ public: /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result parse_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept; inline simdjson_result parse_many(const padded_string &&s, size_t batch_size) = delete;// unsafe - + /** @private We do not want to allow implicit conversion from C string to std::string. */ simdjson_result parse_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; @@ -3826,7 +3806,6 @@ public: */ simdjson_warn_unused inline error_code allocate(size_t capacity, size_t max_depth = DEFAULT_MAX_DEPTH) noexcept; -#ifndef SIMDJSON_DISABLE_DEPRECATED_API /** * @private deprecated because it returns bool instead of error_code, which is our standard for * failures. Use allocate() instead. @@ -3840,7 +3819,7 @@ public: */ [[deprecated("Use allocate() instead.")]] simdjson_warn_unused inline bool allocate_capacity(size_t capacity, size_t max_depth = DEFAULT_MAX_DEPTH) noexcept; -#endif // SIMDJSON_DISABLE_DEPRECATED_API + /** * The largest document this parser can support without reallocating. * @@ -3972,7 +3951,7 @@ private: } // namespace simdjson #endif // SIMDJSON_DOM_PARSER_H -/* end file include/simdjson/dom/parser.h */ +/* end file include/simdjson/dom/document.h */ #ifdef SIMDJSON_THREADS_ENABLED #include #include @@ -3991,13 +3970,13 @@ struct stage1_worker { stage1_worker(stage1_worker&&) = delete; stage1_worker operator=(const stage1_worker&) = delete; ~stage1_worker(); - /** + /** * We only start the thread when it is needed, not at object construction, this may throw. - * You should only call this once. + * You should only call this once. **/ void start_thread(); - /** - * Start a stage 1 job. You should first call 'run', then 'finish'. + /** + * Start a stage 1 job. You should first call 'run', then 'finish'. * You must call start_thread once before. */ void run(document_stream * ds, dom::parser * stage1, size_t next_batch_start); @@ -4006,10 +3985,10 @@ struct stage1_worker { private: - /** + /** * Normally, we would never stop the thread. But we do in the destructor. - * This function is only safe assuming that you are not waiting for results. You - * should have called run, then finish, and be done. + * This function is only safe assuming that you are not waiting for results. You + * should have called run, then finish, and be done. **/ void stop_thread(); @@ -4018,8 +3997,8 @@ private: dom::parser * stage1_thread_parser{}; size_t _next_batch_start{}; document_stream * owner{}; - /** - * We have two state variables. This could be streamlined to one variable in the future but + /** + * We have two state variables. This could be streamlined to one variable in the future but * we use two for clarity. */ bool has_work{false}; @@ -4077,7 +4056,7 @@ public: simdjson_really_inline bool operator!=(const iterator &other) const noexcept; /** * @private - * + * * Gives the current index in the input document in bytes. * * document_stream stream = parser.parse_many(json,window); @@ -4085,15 +4064,15 @@ public: * auto doc = *i; * size_t index = i.current_index(); * } - * + * * This function (current_index()) is experimental and the usage * may change in future versions of simdjson: we find the API somewhat - * awkward and we would like to offer something friendlier. + * awkward and we would like to offer something friendlier. */ simdjson_really_inline size_t current_index() const noexcept; /** * @private - * + * * Gives a view of the current document. * * document_stream stream = parser.parse_many(json,window); @@ -4101,14 +4080,14 @@ public: * auto doc = *i; * std::string_view v = i->source(); * } - * + * * The returned string_view instance is simply a map to the (unparsed) * source string: it may thus include white-space characters and all manner * of padding. - * + * * This function (source()) is experimental and the usage * may change in future versions of simdjson: we find the API somewhat - * awkward and we would like to offer something friendlier. + * awkward and we would like to offer something friendlier. */ simdjson_really_inline std::string_view source() const noexcept; @@ -4138,7 +4117,7 @@ private: /** * Construct a document_stream. Does not allocate or parse anything until the iterator is * used. - * + * * @param parser is a reference to the parser instance used to generate this document_stream * @param buf is the raw byte buffer we need to process * @param len is the length of the raw byte buffer in bytes @@ -4206,7 +4185,7 @@ private: #ifdef SIMDJSON_THREADS_ENABLED /** Indicates whether we use threads. Note that this needs to be a constant during the execution of the parsing. */ - bool use_thread; + bool use_thread; inline void load_from_stage1_thread() noexcept; @@ -4247,19 +4226,17 @@ public: simdjson_really_inline dom::document_stream::iterator begin() noexcept(false); simdjson_really_inline dom::document_stream::iterator end() noexcept(false); #else // SIMDJSON_EXCEPTIONS -#ifndef SIMDJSON_DISABLE_DEPRECATED_API [[deprecated("parse_many() and load_many() may return errors. Use document_stream stream; error = parser.parse_many().get(doc); instead.")]] simdjson_really_inline dom::document_stream::iterator begin() noexcept; [[deprecated("parse_many() and load_many() may return errors. Use document_stream stream; error = parser.parse_many().get(doc); instead.")]] simdjson_really_inline dom::document_stream::iterator end() noexcept; -#endif // SIMDJSON_DISABLE_DEPRECATED_API #endif // SIMDJSON_EXCEPTIONS }; // struct simdjson_result } // namespace simdjson #endif // SIMDJSON_DOCUMENT_STREAM_H -/* end file include/simdjson/dom/document_stream.h */ +/* end file include/simdjson/dom/document.h */ /* begin file include/simdjson/dom/element.h */ #ifndef SIMDJSON_DOM_ELEMENT_H #define SIMDJSON_DOM_ELEMENT_H @@ -4324,14 +4301,14 @@ public: */ inline simdjson_result get_object() const noexcept; /** - * Cast this element to a null-terminated C string. - * + * Cast this element to a null-terminated C string. + * * The string is guaranteed to be valid UTF-8. * * The get_c_str() function is equivalent to get(). - * + * * The length of the string is given by get_string_length(). Because JSON strings - * may contain null characters, it may be incorrect to use strlen to determine the + * may contain null characters, it may be incorrect to use strlen to determine the * string length. * * It is possible to get a single string_view instance which represents both the string @@ -4344,7 +4321,7 @@ public: inline simdjson_result get_c_str() const noexcept; /** * Gives the length in bytes of the string. - * + * * It is possible to get a single string_view instance which represents both the string * content and its length: see get_string(). * @@ -4353,8 +4330,8 @@ public: */ inline simdjson_result get_string_length() const noexcept; /** - * Cast this element to a string. - * + * Cast this element to a string. + * * The string is guaranteed to be valid UTF-8. * * Equivalent to get(). @@ -4539,7 +4516,7 @@ public: /** * Read this element as a null-terminated UTF-8 string. - * + * * Be mindful that JSON allows strings to contain null characters. * * Does *not* convert other types to a string; requires that the JSON type of the element was @@ -4662,7 +4639,7 @@ public: * dom::parser parser; * object obj = parser.parse(R"({ "": { "a": [ 10, 20, 30 ] }})"_padded); * obj.at_pointer("//a/1") == 20 - * + * * @return The value associated with the given JSON pointer, or: * - NO_SUCH_FIELD if a field does not exist in an object * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length @@ -4670,22 +4647,21 @@ public: * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed */ inline simdjson_result at_pointer(const std::string_view json_pointer) const noexcept; - -#ifndef SIMDJSON_DISABLE_DEPRECATED_API + /** - * + * * Version 0.4 of simdjson used an incorrect interpretation of the JSON Pointer standard * and allowed the following : - * + * * dom::parser parser; * element doc = parser.parse(R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded); * doc.at("foo/a/1") == 20 - * + * * Though it is intuitive, it is not compliant with RFC 6901 - * https://tools.ietf.org/html/rfc6901 - * + * https://tools.ietf.org/html/rfc6901 + * * For standard compliance, use the at_pointer function instead. - * + * * @return The value associated with the given JSON pointer, or: * - NO_SUCH_FIELD if a field does not exist in an object * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length @@ -4694,7 +4670,6 @@ public: */ [[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]] inline simdjson_result at(const std::string_view json_pointer) const noexcept; -#endif // SIMDJSON_DISABLE_DEPRECATED_API /** * Get the value at the given index. @@ -5093,7 +5068,7 @@ namespace simdjson { /** * The string_builder template and mini_formatter class - * are not part of our public API and are subject to change + * are not part of our public API and are subject to change * at any time! */ namespace internal { @@ -5107,7 +5082,7 @@ class mini_formatter; * the string_builder template could support both minification * and prettification, and various other tradeoffs. */ -template +template class string_builder { public: /** Construct an initially empty builder, would print the empty string **/ @@ -5120,12 +5095,12 @@ public: inline void append(simdjson::dom::object value); /** Reset the builder (so that it would print the empty string) **/ simdjson_really_inline void clear(); - /** + /** * Get access to the string. The string_view is owned by the builder - * and it is invalid to use it after the string_builder has been + * and it is invalid to use it after the string_builder has been * destroyed. * However you can make a copy of the string_view on memory that you - * own. + * own. */ simdjson_really_inline std::string_view str() const; /** Append a key_value_pair to the builder (to be printed) **/ @@ -5170,9 +5145,9 @@ public: simdjson_really_inline void string(std::string_view unescaped); /** Clears out the content. **/ simdjson_really_inline void clear(); - /** + /** * Get access to the buffer, it is own by the instance, but - * the user can make a copy. + * the user can make a copy. **/ simdjson_really_inline std::string_view str() const; @@ -5195,13 +5170,13 @@ namespace dom { * @param value The element. * @throw if there is an error with the underlying output stream. simdjson itself will not throw. */ -inline std::ostream& operator<<(std::ostream& out, simdjson::dom::element value) { +inline std::ostream& operator<<(std::ostream& out, simdjson::dom::element value) { simdjson::internal::string_builder<> sb; sb.append(value); return (out << sb.str()); } #if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } @@ -5213,13 +5188,13 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result sb; sb.append(value); return (out << sb.str()); } #if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } @@ -5231,17 +5206,17 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result sb; sb.append(value); return (out << sb.str()); } #if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } -#endif +#endif } // namespace dom /** @@ -5252,10 +5227,10 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result +template std::string to_string(T x) { // in C++, to_string is standard: http://www.cplusplus.com/reference/string/to_string/ - // Currently minify and to_string are identical but in the future, they may + // Currently minify and to_string are identical but in the future, they may // differ. simdjson::internal::string_builder<> sb; sb.append(x); @@ -5263,12 +5238,12 @@ std::string to_string(T x) { return std::string(answer.data(), answer.size()); } #if SIMDJSON_EXCEPTIONS -template +template std::string to_string(simdjson_result x) { if (x.error()) { throw simdjson_error(x.error()); } return to_string(x.value()); } -#endif +#endif /** * Minifies a JSON element or document, printing the smallest possible valid JSON. @@ -5278,18 +5253,18 @@ std::string to_string(simdjson_result x) { * cout << minify(doc) << endl; // prints [1,2,3] * */ -template +template std::string minify(T x) { return to_string(x); } #if SIMDJSON_EXCEPTIONS -template +template std::string minify(simdjson_result x) { if (x.error()) { throw simdjson_error(x.error()); } return to_string(x.value()); } -#endif +#endif } // namespace simdjson @@ -5331,12 +5306,12 @@ using ParsedJson [[deprecated("Use dom::parser instead")]] = dom::parser; namespace simdjson { #if SIMDJSON_EXCEPTIONS -#ifndef SIMDJSON_DISABLE_DEPRECATED_API + [[deprecated("Use padded_string::load() instead")]] inline padded_string get_corpus(const char *path) { return padded_string::load(path); } -#endif // SIMDJSON_DISABLE_DEPRECATED_API + #endif // SIMDJSON_EXCEPTIONS } // namespace simdjson @@ -5350,7 +5325,6 @@ namespace simdjson { // C API (json_parse and build_parsed_json) declarations // -#ifndef SIMDJSON_DISABLE_DEPRECATED_API [[deprecated("Use parser.parse() instead")]] inline int json_parse(const uint8_t *buf, size_t len, dom::parser &parser, bool realloc_if_needed = true) noexcept { error_code code = parser.parse(buf, len, realloc_if_needed).error(); @@ -5444,7 +5418,6 @@ simdjson_warn_unused inline dom::parser build_parsed_json(const padded_string &s parser.error = code; return parser; } -#endif // SIMDJSON_DISABLE_DEPRECATED_API /** @private We do not want to allow implicit conversion from C string to std::string. */ int json_parse(const char *buf, dom::parser &parser) noexcept = delete; @@ -5454,7 +5427,7 @@ dom::parser build_parsed_json(const char *buf) noexcept = delete; } // namespace simdjson #endif // SIMDJSON_DOM_JSONPARSER_H -/* end file include/simdjson/dom/jsonparser.h */ +/* end file include/simdjson/jsonioutil.h */ /* begin file include/simdjson/dom/parsedjson_iterator.h */ // TODO Remove this -- deprecated API and files @@ -5536,9 +5509,8 @@ inline std::ostream& operator<<(std::ostream& out, const escape_json_string &une #endif // SIMDJSON_INTERNAL_JSONFORMATUTILS_H /* end file include/simdjson/internal/jsonformatutils.h */ -#ifndef SIMDJSON_DISABLE_DEPRECATED_API - namespace simdjson { + /** @private **/ class [[deprecated("Use the new DOM navigation API instead (see doc/basics.md)")]] dom::parser::Iterator { public: @@ -5789,10 +5761,9 @@ public: }; } // namespace simdjson -#endif // SIMDJSON_DISABLE_DEPRECATED_API #endif // SIMDJSON_DOM_PARSEDJSON_ITERATOR_H -/* end file include/simdjson/dom/parsedjson_iterator.h */ +/* end file include/simdjson/internal/jsonformatutils.h */ // Inline functions /* begin file include/simdjson/dom/array-inl.h */ @@ -5857,6 +5828,9 @@ inline array::iterator array::end() const noexcept { inline size_t array::size() const noexcept { return tape.scope_count(); } +inline size_t array::slots() const noexcept { + return tape.matching_brace_index() - tape.json_index; +} inline simdjson_result array::at_pointer(std::string_view json_pointer) const noexcept { if(json_pointer.empty()) { // an empty string means that we return the current node return element(this->tape); // copy the current node @@ -6062,7 +6036,6 @@ simdjson_really_inline simdjson_result simdjson_result simdjson_result::at(const std::string_view json_pointer) const noexcept { SIMDJSON_PUSH_DISABLE_WARNINGS @@ -6071,7 +6044,6 @@ SIMDJSON_DISABLE_DEPRECATED_WARNING return first.at(json_pointer); SIMDJSON_POP_DISABLE_WARNINGS } -#endif // SIMDJSON_DISABLE_DEPRECATED_API simdjson_really_inline simdjson_result simdjson_result::at(size_t index) const noexcept { if (error()) { return error(); } return first.at(index); @@ -6321,14 +6293,13 @@ inline simdjson_result element::at_pointer(std::string_view json_pointe } } } -#ifndef SIMDJSON_DISABLE_DEPRECATED_API + [[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]] inline simdjson_result element::at(std::string_view json_pointer) const noexcept { // version 0.4 of simdjson allowed non-compliant pointers auto std_pointer = (json_pointer.empty() ? "" : "/") + std::string(json_pointer.begin(), json_pointer.end()); return at_pointer(std_pointer); } -#endif // SIMDJSON_DISABLE_DEPRECATED_API inline simdjson_result element::at(size_t index) const noexcept { return get().at(index); @@ -6385,7 +6356,7 @@ static_assert(std::ranges::sized_range::begin() noexcept { first.error = error(); return first.begin(); @@ -6656,7 +6626,6 @@ simdjson_really_inline dom::document_stream::iterator simdjson_result= error_code::NUM_ERROR_CODES) { return internal::error_codes[UNEXPECTED_ERROR].message; } return internal::error_codes[error].message; } -#endif // SIMDJSON_DISABLE_DEPRECATED_API inline std::ostream& operator<<(std::ostream& out, error_code error) noexcept { return out << error_message(error); @@ -7234,11 +7201,11 @@ static_assert(std::ranges::sized_range padded_string::load(const std::string &fil #include -#ifndef SIMDJSON_DISABLE_DEPRECATED_API - namespace simdjson { // VS2017 reports deprecated warnings when you define a deprecated class's methods. @@ -7376,6 +7341,7 @@ SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_DEPRECATED_WARNING // Because of template weirdness, the actual class definition is inline in the document class + simdjson_warn_unused bool dom::parser::Iterator::is_ok() const { return location < tape_length; } @@ -7843,11 +7809,9 @@ bool dom::parser::Iterator::relative_move_to(const char *pointer, } SIMDJSON_POP_DISABLE_WARNINGS + } // namespace simdjson -#endif // SIMDJSON_DISABLE_DEPRECATED_API - - #endif // SIMDJSON_INLINE_PARSEDJSON_ITERATOR_H /* end file include/simdjson/dom/parsedjson_iterator-inl.h */ /* begin file include/simdjson/dom/parser-inl.h */ @@ -8008,12 +7972,11 @@ inline error_code parser::allocate(size_t capacity, size_t max_depth) noexcept { return SUCCESS; } -#ifndef SIMDJSON_DISABLE_DEPRECATED_API simdjson_warn_unused inline bool parser::allocate_capacity(size_t capacity, size_t max_depth) noexcept { return !allocate(capacity, max_depth); } -#endif // SIMDJSON_DISABLE_DEPRECATED_API + inline error_code parser::ensure_capacity(size_t desired_capacity) noexcept { // If we don't have enough capacity, (try to) automatically bump it. // If the document was taken, reallocate that too. @@ -8272,7 +8235,7 @@ simdjson_really_inline void mini_formatter::number(int64_t x) { simdjson_really_inline void mini_formatter::number(double x) { char number_buffer[24]; // Currently, passing the nullptr to the second argument is - // safe because our implementation does not check the second + // safe because our implementation does not check the second // argument. char *newp = internal::to_chars(number_buffer, nullptr, x); buffer.insert(buffer.end(), number_buffer, newp); @@ -8285,7 +8248,7 @@ simdjson_really_inline void mini_formatter::end_object() { one_char('}'); } simdjson_really_inline void mini_formatter::comma() { one_char(','); } -simdjson_really_inline void mini_formatter::true_atom() { +simdjson_really_inline void mini_formatter::true_atom() { const char * s = "true"; buffer.insert(buffer.end(), s, s + 4); } @@ -8307,29 +8270,29 @@ simdjson_really_inline void mini_formatter::string(std::string_view unescaped) { size_t i = 0; // Fast path for the case where we have no control character, no ", and no backslash. // This should include most keys. - constexpr static bool needs_escaping[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + constexpr static bool needs_escaping[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - for(;i + 8 <= unescaped.length(); i += 8) { + for(;i + 8 <= unescaped.length(); i += 8) { // Poor's man vectorization. This could get much faster if we used SIMD. - if(needs_escaping[uint8_t(unescaped[i])] | needs_escaping[uint8_t(unescaped[i+1])] + if(needs_escaping[uint8_t(unescaped[i])] | needs_escaping[uint8_t(unescaped[i+1])] | needs_escaping[uint8_t(unescaped[i+2])] | needs_escaping[uint8_t(unescaped[i+3])] - | needs_escaping[uint8_t(unescaped[i+4])] | needs_escaping[uint8_t(unescaped[i+5])] + | needs_escaping[uint8_t(unescaped[i+4])] | needs_escaping[uint8_t(unescaped[i+5])] | needs_escaping[uint8_t(unescaped[i+6])] | needs_escaping[uint8_t(unescaped[i+7])] ) { break; } } - for(;i < unescaped.length(); i++) { + for(;i < unescaped.length(); i++) { if(needs_escaping[uint8_t(unescaped[i])]) { break; } } // The following is also possible and omits a 256-byte table, but it is slower: - // for (; (i < unescaped.length()) && (uint8_t(unescaped[i]) > 0x1F) + // for (; (i < unescaped.length()) && (uint8_t(unescaped[i]) > 0x1F) // && (unescaped[i] != '\"') && (unescaped[i] != '\\'); i++) {} // At least for long strings, the following should be fast. We could @@ -8612,15 +8575,15 @@ extern SIMDJSON_DLLIMPORTEXPORT const uint32_t digit_to_val32[886]; namespace simdjson { namespace internal { /** - * The smallest non-zero float (binary64) is 2^-1074. + * The smallest non-zero float (binary64) is 2^−1074. * We take as input numbers of the form w x 10^q where w < 2^64. * We have that w * 10^-343 < 2^(64-344) 5^-343 < 2^-1076. - * However, we have that - * (2^64-1) * 10^-342 = (2^64-1) * 2^-342 * 5^-342 > 2^-1074. - * Thus it is possible for a number of the form w * 10^-342 where + * However, we have that + * (2^64-1) * 10^-342 = (2^64-1) * 2^-342 * 5^-342 > 2^−1074. + * Thus it is possible for a number of the form w * 10^-342 where * w is a 64-bit value to be a non-zero floating-point number. ********* - * Any number of form w * 10^309 where w>= 1 is going to be + * Any number of form w * 10^309 where w>= 1 is going to be * infinite in binary64 so we never need to worry about powers * of 5 greater than 308. */ @@ -8652,7 +8615,7 @@ extern SIMDJSON_DLLIMPORTEXPORT const double power_of_ten[]; * are not a concern since they can be represented * exactly using the binary notation, only the powers of five * affect the binary significand. - */ + */ // The truncated powers of five from 5^-342 all the way to 5^308 @@ -8681,7 +8644,7 @@ extern SIMDJSON_DLLIMPORTEXPORT const uint8_t pshufb_combine_table[272]; extern SIMDJSON_DLLIMPORTEXPORT const uint64_t thintable_epi8[256]; } // namespace internal -} // namespace simdjson +} // namespace simdjson #endif // SIMDJSON_INTERNAL_SIMDPRUNE_TABLES_H /* end file include/simdjson/internal/simdprune_tables.h */ @@ -8849,7 +8812,7 @@ NO_SANITIZE_UNDEFINED simdjson_really_inline int trailing_zeroes(uint64_t input_num) { #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO unsigned long ret; - // Search the mask data from least significant bit (LSB) + // Search the mask data from least significant bit (LSB) // to the most significant bit (MSB) for a set bit (1). _BitScanForward64(&ret, input_num); return (int)ret; @@ -8867,7 +8830,7 @@ simdjson_really_inline uint64_t clear_lowest_bit(uint64_t input_num) { simdjson_really_inline int leading_zeroes(uint64_t input_num) { #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO unsigned long leading_zero = 0; - // Search the mask data from most significant bit (MSB) + // Search the mask data from most significant bit (MSB) // to least significant bit (LSB) for a set bit (1). if (_BitScanReverse64(&leading_zero, input_num)) return (int)(63 - leading_zero); @@ -8915,15 +8878,15 @@ namespace { simdjson_really_inline uint64_t prefix_xor(uint64_t bitmask) { ///////////// // We could do this with PMULL, but it is apparently slow. - // + // //#ifdef __ARM_FEATURE_CRYPTO // some ARM processors lack this extension //return vmull_p64(-1ULL, bitmask); //#else // Analysis by @sebpop: // When diffing the assembly for src/stage1_find_marks.cpp I see that the eors are all spread out - // in between other vector code, so effectively the extra cycles of the sequence do not matter + // in between other vector code, so effectively the extra cycles of the sequence do not matter // because the GPR units are idle otherwise and the critical path is on the FP side. - // Also the PMULL requires two extra fmovs: GPR->FP (3 cycles in N1, 5 cycles in A72 ) + // Also the PMULL requires two extra fmovs: GPR->FP (3 cycles in N1, 5 cycles in A72 ) // and FP->GPR (2 cycles on N1 and 5 cycles on A72.) /////////// bitmask ^= bitmask << 1; @@ -9364,7 +9327,7 @@ simdjson_really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x simd8x64(const simd8x64& o) = delete; // no copy allowed simd8x64& operator=(const simd8 other) = delete; // no assignment allowed simd8x64() = delete; // no default constructor allowed - + simdjson_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} simdjson_really_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} @@ -9808,7 +9771,7 @@ simdjson_unused simdjson_warn_unused simdjson_really_inline error_code parse_str /* end file include/simdjson/generic/stringparsing.h */ #endif // SIMDJSON_ARM64_STRINGPARSING_H -/* end file include/simdjson/arm64/stringparsing.h */ +/* end file include/simdjson/generic/stringparsing.h */ /* begin file include/simdjson/arm64/numberparsing.h */ #ifndef SIMDJSON_ARM64_NUMBERPARSING_H #define SIMDJSON_ARM64_NUMBERPARSING_H @@ -9821,7 +9784,7 @@ namespace { // credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ static simdjson_really_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { uint64_t val; - std::memcpy(&val, chars, sizeof(uint64_t)); + memcpy(&val, chars, sizeof(uint64_t)); val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); @@ -9860,13 +9823,13 @@ namespace numberparsing { namespace { // Convert a mantissa, an exponent and a sign bit into an ieee64 double. // The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). -// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. simdjson_really_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { double d; mantissa &= ~(1ULL << 52); mantissa |= real_exponent << 52; mantissa |= (((uint64_t)negative) << 63); - std::memcpy(&d, &mantissa, sizeof(d)); + memcpy(&d, &mantissa, sizeof(d)); return d; } } @@ -9984,7 +9947,7 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg // We want the most significant 64 bits of the product. We know // this will be non-zero because the most significant bit of i is // 1. - const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) // // The full_multiplication function computes the 128-bit product of two 64-bit words @@ -9993,7 +9956,7 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg // to the 64-bit most significant bits of the product. simdjson::internal::value128 firstproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index]); // Both i and power_of_five_128[index] have their most significant bit set to 1 which - // implies that the either the most or the second most significant bit of the product + // implies that the either the most or the second most significant bit of the product // is 1. We pack values in this manner for efficiency reasons: it maximizes the use // we make of the product. It also makes it easy to reason aboutthe product: there // 0 or 1 leading zero in the product. @@ -10008,17 +9971,17 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing // the full computation is wasteful. So we do what is called a "truncated // multiplication". - // We take the most significant 64-bits, and we put them in + // We take the most significant 64-bits, and we put them in // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q - // to the desired approximation using one multiplication. Sometimes it does not suffice. + // to the desired approximation using one multiplication. Sometimes it does not suffice. // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and // then we get a better approximation to i * 5^q. In very rare cases, even that // will not suffice, though it is seemingly very hard to find such a scenario. - // + // // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat // more complicated. // - // There is an extra layer of complexity in that we need more than 55 bits of + // There is an extra layer of complexity in that we need more than 55 bits of // accuracy in the round-to-even scenario. // // The full_multiplication function computes the 128-bit product of two 64-bit words @@ -10051,7 +10014,7 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. d = 0.0; return true; - } + } // next line is safe because -real_exponent + 1 < 0 mantissa >>= -real_exponent + 1; // Thankfully, we can't have both "round-to-even" and subnormals because @@ -10064,7 +10027,7 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer // subnormal, but we can only know this after rounding. - // So we only declare a subnormal if we are smaller than the threshold. + // So we only declare a subnormal if we are smaller than the threshold. real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; d = to_double(mantissa, real_exponent, negative); return true; @@ -10074,7 +10037,7 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg // which we guard against. // If we have lots of trailing zeros, we may fall right between two // floating-point values. - // + // // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] // times a power of two. That is, it is right between a number with binary significand // m and another number with binary significand m+1; and it must be the case @@ -10085,11 +10048,11 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so - // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have // 2^{53} x 5^{-q} < 2^{64}. - // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. // - // We require lower <= 1 and not lower == 0 because we could not prove that + // We require lower <= 1 and not lower == 0 because we could not prove that // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { @@ -10142,7 +10105,7 @@ simdjson_really_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { // this can read up to 7 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); - std::memcpy(&val, chars, 8); + memcpy(&val, chars, 8); // a branchy method might be faster: // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == @@ -10297,7 +10260,7 @@ simdjson_really_inline error_code write_float(const uint8_t *const src, bool neg // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero // so something x 10^-343 goes to zero, but not so with something x 10^-342. static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); - // + // if((exponent < simdjson::internal::smallest_power) || (i == 0)) { WRITE_DOUBLE(0, src, writer); return SUCCESS; @@ -10569,12 +10532,11 @@ simdjson_unused simdjson_really_inline simdjson_result parse_double(cons if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } exponent += exp_neg ? 0-exp : exp; + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; } if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; - // // Assemble (or slow-parse) the float // @@ -10597,7 +10559,7 @@ simdjson_unused simdjson_really_inline simdjson_result parse_double(cons /* end file include/simdjson/generic/numberparsing.h */ #endif // SIMDJSON_ARM64_NUMBERPARSING_H -/* end file include/simdjson/arm64/numberparsing.h */ +/* end file include/simdjson/generic/numberparsing.h */ /* begin file include/simdjson/generic/implementation_simdjson_result_base.h */ namespace simdjson { namespace SIMDJSON_IMPLEMENTATION { @@ -10727,7 +10689,7 @@ namespace simdjson { namespace SIMDJSON_IMPLEMENTATION { /** * A fast, simple, DOM-like interface that parses JSON as you use it. - * + * * Designed for maximum speed and a lower memory profile. */ namespace ondemand { @@ -10778,11 +10740,11 @@ class parser; * * (In other words, a pointer to the beginning of a string, just after the start quote, inside a * JSON file.) - * + * * This class is deliberately simplistic and has little functionality. You can * compare two raw_json_string instances, or compare a raw_json_string with a string_view, but * that is pretty much all you can do. - * + * * They originate typically from field instance which in turn represent key-value pairs from * object instances. From a field instance, you get the raw_json_string instance by calling key(). * You can, if you want a more usable string_view instance, call the unescaped_key() method @@ -10792,7 +10754,7 @@ class raw_json_string { public: /** * Create a new invalid raw_json_string. - * + * * Exists so you can declare a variable and later assign to it before use. */ simdjson_really_inline raw_json_string() noexcept = default; @@ -10802,15 +10764,15 @@ public: /** * Create a new invalid raw_json_string pointed at the given location in the JSON. - * + * * The given location must be just *after* the beginning quote (") in the JSON file. - * + * * It *must* be terminated by a ", and be a valid JSON string. */ simdjson_really_inline raw_json_string(const uint8_t * _buf) noexcept; /** * Get the raw pointer to the beginning of the string in the JSON (just after the "). - * + * * It is possible for this function to return a null pointer if the instance * has outlived its existence. */ @@ -10830,11 +10792,11 @@ private: /** * Unescape this JSON string, replacing \\ with \, \n with newline, etc. - * + * * ## IMPORTANT: string_view lifetime - * + * * The string_view is only valid as long as the bytes in dst. - * + * * @param dst A pointer to a buffer at least large enough to write this string as well as a \0. * dst will be updated to the next unused location (just after the \0 written out at * the end of this string). @@ -10844,11 +10806,11 @@ private: simdjson_really_inline simdjson_warn_unused simdjson_result unescape(uint8_t *&dst) const noexcept; /** * Unescape this JSON string, replacing \\ with \, \n with newline, etc. - * + * * ## IMPORTANT: string_view lifetime - * + * * The string_view is only valid until the next parse() call on the parser. - * + * * @param iter A json_iterator, which contains a buffer where the string will be written. */ simdjson_really_inline simdjson_warn_unused simdjson_result unescape(json_iterator &iter) const noexcept; @@ -10904,7 +10866,7 @@ class token_iterator { public: /** * Create a new invalid token_iterator. - * + * * Exists so you can declare a variable and later assign to it before use. */ simdjson_really_inline token_iterator() noexcept = default; @@ -11004,7 +10966,7 @@ class json_iterator_ref; /** * Iterates through JSON, with structure-sensitive algorithms. - * + * * @private This is not intended for external use. */ class json_iterator : public token_iterator { @@ -11040,17 +11002,17 @@ public: * Start an object iteration after the user has already checked and moved past the {. * * Does not move the iterator. - * + * * @returns Whether the object had any fields (returns false for empty). */ simdjson_warn_unused simdjson_really_inline bool started_object() noexcept; /** * Moves to the next field in an object. - * + * * Looks for , and }. If } is found, the object is finished and the iterator advances past it. * Otherwise, it advances to the next value. - * + * * @return whether there is another field in the object. * @error TAPE_ERROR If there is a comma missing between fields. */ @@ -11070,7 +11032,7 @@ public: * Find the next field with the given key. * * Assumes you have called next_field() or otherwise matched the previous value. - * + * * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may * fail to match some keys with escapes (\u, \n, etc.). @@ -11104,10 +11066,10 @@ public: /** * Moves to the next element in an array. - * + * * Looks for , and ]. If ] is found, the array is finished and the iterator advances past it. * Otherwise, it advances to the next value. - * + * * @return Whether there is another element in the array. * @error TAPE_ERROR If there is a comma missing between elements. */ @@ -11146,7 +11108,7 @@ public: /** * Skips to the end of a JSON object or array. - * + * * @return true if this was the end of an array, false if it was the end of an object. */ simdjson_warn_unused simdjson_really_inline error_code skip_container() noexcept; @@ -11168,7 +11130,7 @@ public: /** * Report an error, preventing further iteration. - * + * * @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD. * @param message An error message to report with the error. */ @@ -11183,13 +11145,13 @@ protected: ondemand::parser *parser{}; /** * Next free location in the string buffer. - * + * * Used by raw_json_string::unescape() to have a place to unescape strings to. */ uint8_t *current_string_buf_loc{}; /** * JSON error, if there is one. - * + * * INCORRECT_TYPE and NO_SUCH_FIELD are *not* stored here, ever. * * PERF NOTE: we *hope* this will be elided into control flow, as it is only used (a) in the first @@ -11300,7 +11262,7 @@ class document; /** * A forward-only JSON array. - * + * * This is an input_iterator, meaning: * - It is forward-only * - * must be called exactly once per element. @@ -11320,7 +11282,7 @@ public: /** * Get the current element. - * + * * Part of the std::iterator interface. */ simdjson_really_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. @@ -11328,7 +11290,7 @@ public: * Check if we are at the end of the JSON. * * Part of the std::iterator interface. - * + * * @return true if there are no more elements in the JSON array. */ simdjson_really_inline bool operator==(const array_iterator &) noexcept; @@ -11336,13 +11298,13 @@ public: * Check if there are more elements in the JSON array. * * Part of the std::iterator interface. - * + * * @return true if there are more elements in the JSON array. */ simdjson_really_inline bool operator!=(const array_iterator &) noexcept; /** * Move to the next element. - * + * * Part of the std::iterator interface. */ simdjson_really_inline array_iterator &operator++() noexcept; @@ -11400,7 +11362,7 @@ class object_iterator { public: /** * Create a new invalid object_iterator. - * + * * Exists so you can declare a variable and later assign to it before use. */ simdjson_really_inline object_iterator() noexcept = default; @@ -11476,7 +11438,7 @@ class array { public: /** * Create a new invalid array. - * + * * Exists so you can declare a variable and later assign to it before use. */ simdjson_really_inline array() noexcept = default; @@ -11514,7 +11476,7 @@ protected: static simdjson_really_inline simdjson_result start(json_iterator_ref &&iter) noexcept; /** * Begin array iteration. - * + * * This version of the method should be called after the initial [ has been verified, and is * intended for use by switch statements that check the type of a value. * @@ -11541,7 +11503,7 @@ protected: /** * Iterator marking current position. - * + * * iter.is_alive() == false indicates iteration is complete. */ json_iterator_ref iter{}; @@ -11601,7 +11563,7 @@ public: /** * Create a new invalid document. - * + * * Exists so you can declare a variable and later assign to it before use. */ simdjson_really_inline document() noexcept = default; @@ -11649,7 +11611,7 @@ public: simdjson_really_inline simdjson_result get_double() noexcept; /** * Cast this JSON value to a string. - * + * * The string is guaranteed to be valid UTF-8. * * Equivalent to get(). @@ -11661,7 +11623,7 @@ public: simdjson_really_inline simdjson_result get_string() & noexcept; /** * Cast this JSON value to a raw_json_string. - * + * * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). * * @returns A pointer to the raw JSON for the given string. @@ -11677,7 +11639,7 @@ public: simdjson_really_inline simdjson_result get_bool() noexcept; /** * Checks if this JSON value is null. - * + * * @returns Whether the value is null. */ simdjson_really_inline bool is_null() noexcept; @@ -11686,7 +11648,7 @@ public: * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool - * + * * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ @@ -11698,7 +11660,7 @@ public: * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool - * + * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. * @returns INCORRECT_TYPE If the JSON value is not an object. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. @@ -11745,7 +11707,7 @@ public: simdjson_really_inline operator double() noexcept(false); /** * Cast this JSON value to a string. - * + * * The string is guaranteed to be valid UTF-8. * * Equivalent to get(). @@ -11757,7 +11719,7 @@ public: simdjson_really_inline operator std::string_view() & noexcept(false); /** * Cast this JSON value to a raw_json_string. - * + * * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). * * @returns A pointer to the raw JSON for the given string. @@ -11791,7 +11753,7 @@ public: * * This method may only be called once on a given value. If you want to look up multiple fields, * you must first get the object using value.get_object() or object(value). - * + * * @param key The key to look up. * @returns INCORRECT_TYPE If the JSON value is not an array. */ @@ -11801,7 +11763,7 @@ public: * * This method may only be called once on a given value. If you want to look up multiple fields, * you must first get the object using value.get_object() or object(value). - * + * * @param key The key to look up. * @returns INCORRECT_TYPE If the JSON value is not an array. */ @@ -11815,7 +11777,7 @@ protected: static simdjson_really_inline document start(ondemand::json_iterator &&iter) noexcept; /** * Set json to null if the result is successful. - * + * * Convenience function for value-getters. */ template @@ -11917,7 +11879,7 @@ class value { public: /** * Create a new invalid value. - * + * * Exists so you can declare a variable and later assign to it before use. */ simdjson_really_inline value() noexcept = default; @@ -11936,7 +11898,7 @@ public: * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool - * + * * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ @@ -11948,7 +11910,7 @@ public: * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool - * + * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. * @returns INCORRECT_TYPE If the JSON value is not an object. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. @@ -12009,7 +11971,7 @@ public: /** * Cast this JSON value to a string. - * + * * The string is guaranteed to be valid UTF-8. * * Equivalent to get(). @@ -12024,7 +11986,7 @@ public: /** * Cast this JSON value to a raw_json_string. - * + * * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). * * @returns A pointer to the raw JSON for the given string. @@ -12046,7 +12008,7 @@ public: /** * Checks if this JSON value is null. - * + * * @returns Whether the value is null. */ simdjson_really_inline bool is_null() && noexcept; @@ -12097,7 +12059,7 @@ public: simdjson_really_inline operator double() & noexcept(false); /** * Cast this JSON value to a string. - * + * * The string is guaranteed to be valid UTF-8. * * Equivalent to get(). @@ -12111,7 +12073,7 @@ public: simdjson_really_inline operator std::string_view() & noexcept(false); /** * Cast this JSON value to a raw_json_string. - * + * * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). * * @returns A pointer to the raw JSON for the given string. @@ -12135,7 +12097,7 @@ public: * Begin array iteration. * * Part of the std::iterable interface. - * + * * @returns INCORRECT_TYPE If the JSON value is not an array. */ simdjson_really_inline simdjson_result> begin() & noexcept; @@ -12272,16 +12234,16 @@ namespace ondemand { /** * A JSON field (key/value pair) in an object. - * + * * Returned from object iteration. - * + * * Extends from std::pair so you can use C++ algorithms that rely on pairs. */ class field : public std::pair { public: /** * Create a new invalid field. - * + * * Exists so you can declare a variable and later assign to it before use. */ simdjson_really_inline field() noexcept; @@ -12295,7 +12257,7 @@ public: * Get the key as a string_view (for higher speed, consider raw_key). * We deliberately use a more cumbersome name (unescaped_key) to force users * to think twice about using it. - * + * * This consumes the key: once you have called unescaped_key(), you cannot * call it again nor can you call key(). */ @@ -12358,7 +12320,7 @@ class object { public: /** * Create a new invalid object. - * + * * Exists so you can declare a variable and later assign to it before use. */ simdjson_really_inline object() noexcept = default; @@ -12404,7 +12366,7 @@ protected: json_iterator_ref iter{}; /** * Whether we are at the start. - * + * * PERF NOTE: this should be elided into inline control flow: it is only used for the first [] * or * call, and SSA optimizers commonly do first-iteration loop optimization. */ @@ -12473,17 +12435,17 @@ public: /** * Start iterating an on-demand JSON document. - * + * * ondemand::parser parser; * document doc = parser.iterate(json); - * + * * ### IMPORTANT: Buffer Lifetime - * + * * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as * long as the document iteration. - * + * * ### IMPORTANT: Document Lifetime - * + * * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before * you call parse() again or destroy the parser. @@ -12494,7 +12456,7 @@ public: * those bytes are initialized to, as long as they are allocated. * * @param json The JSON to parse. - * + * * @return The document, or an error: * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory * allocation fails. @@ -12507,19 +12469,19 @@ public: simdjson_warn_unused simdjson_result iterate(const std::string &json) & noexcept = delete; /** * @private - * + * * Start iterating an on-demand JSON document. - * + * * ondemand::parser parser; * json_iterator doc = parser.iterate(json); - * + * * ### IMPORTANT: Buffer Lifetime - * + * * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as * long as the document iteration. - * + * * ### IMPORTANT: Document Lifetime - * + * * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before * you call parse() again or destroy the parser. @@ -12530,7 +12492,7 @@ public: * those bytes are initialized to, as long as they are allocated. * * @param json The JSON to parse. - * + * * @return The iterator, or an error: * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory * allocation fails. @@ -12579,7 +12541,7 @@ public: } // namespace simdjson /* end file include/simdjson/generic/ondemand/parser.h */ -/* end file include/simdjson/generic/ondemand.h */ +/* end file include/simdjson/generic/ondemand/parser.h */ // Inline definitions /* begin file include/simdjson/generic/implementation_simdjson_result_base-inl.h */ @@ -12999,7 +12961,7 @@ simdjson_warn_unused simdjson_really_inline bool json_iterator::started_array() advance(); return false; } - logger::log_start_value(*this, "array"); + logger::log_start_value(*this, "array"); return true; } @@ -13086,7 +13048,7 @@ simdjson_warn_unused simdjson_really_inline bool json_iterator::copy_to_buffer(c } // Copy to the buffer. - std::memcpy(tmpbuf, json, len); + memcpy(tmpbuf, json, len); tmpbuf[len] = ' '; return true; } @@ -13105,7 +13067,7 @@ simdjson_warn_unused simdjson_result json_iterator::consume_root_uint6 return parse_root_uint64(advance()); } simdjson_warn_unused simdjson_result json_iterator::parse_root_int64(const uint8_t *json) noexcept { - uint8_t tmpbuf[20+1]; // -<19 digits> is the longest possible integer + uint8_t tmpbuf[20+1]; // -<19 digits> is the longest possible integer if (!copy_to_buffer(json, tmpbuf)) { logger::log_error(*this, "Root number more than 20 characters"); return NUMBER_ERROR; } logger::log_value(*this, "int64", ""); auto result = numberparsing::parse_integer(tmpbuf); @@ -13517,7 +13479,7 @@ namespace ondemand { // Next state. In this state, depth > iter->depth, at_start == false, and error == SUCCESS. // // ## Error States -// +// // In error states, we will yield exactly one more value before stopping. iter->depth == depth // and at_start is always false. We decrement after yielding the error, moving to the Finished // state. @@ -14387,7 +14349,7 @@ namespace ondemand { // Next state. In this state, depth > iter->depth, at_start == false, and error == SUCCESS. // // ## Error States -// +// // In error states, we will yield exactly one more value before stopping. iter->depth == depth // and at_start is always false. We decrement after yielding the error, moving to the Finished // state. @@ -14550,7 +14512,7 @@ simdjson_warn_unused simdjson_really_inline simdjson_result parser::it } // Run stage 1. - SIMDJSON_TRY( dom_parser.stage1((const uint8_t *)buf.data(), buf.size(), false) ); + SIMDJSON_TRY( dom_parser.stage1((const uint8_t *)buf.data(), buf.size(), false) ); return document::start(this); } @@ -14561,7 +14523,7 @@ simdjson_warn_unused simdjson_really_inline simdjson_result parse } // Run stage 1. - SIMDJSON_TRY( dom_parser.stage1((const uint8_t *)buf.data(), buf.size(), false) ); + SIMDJSON_TRY( dom_parser.stage1((const uint8_t *)buf.data(), buf.size(), false) ); return json_iterator(this); } @@ -14578,7 +14540,7 @@ simdjson_really_inline simdjson_result=0. Then 5^q may not fit in 64-bits. Doing // the full computation is wasteful. So we do what is called a "truncated // multiplication". - // We take the most significant 64-bits, and we put them in + // We take the most significant 64-bits, and we put them in // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q - // to the desired approximation using one multiplication. Sometimes it does not suffice. + // to the desired approximation using one multiplication. Sometimes it does not suffice. // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and // then we get a better approximation to i * 5^q. In very rare cases, even that // will not suffice, though it is seemingly very hard to find such a scenario. - // + // // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat // more complicated. // - // There is an extra layer of complexity in that we need more than 55 bits of + // There is an extra layer of complexity in that we need more than 55 bits of // accuracy in the round-to-even scenario. // // The full_multiplication function computes the 128-bit product of two 64-bit words @@ -15872,7 +15834,7 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. d = 0.0; return true; - } + } // next line is safe because -real_exponent + 1 < 0 mantissa >>= -real_exponent + 1; // Thankfully, we can't have both "round-to-even" and subnormals because @@ -15885,7 +15847,7 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer // subnormal, but we can only know this after rounding. - // So we only declare a subnormal if we are smaller than the threshold. + // So we only declare a subnormal if we are smaller than the threshold. real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; d = to_double(mantissa, real_exponent, negative); return true; @@ -15895,7 +15857,7 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg // which we guard against. // If we have lots of trailing zeros, we may fall right between two // floating-point values. - // + // // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] // times a power of two. That is, it is right between a number with binary significand // m and another number with binary significand m+1; and it must be the case @@ -15906,11 +15868,11 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so - // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have // 2^{53} x 5^{-q} < 2^{64}. - // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. // - // We require lower <= 1 and not lower == 0 because we could not prove that + // We require lower <= 1 and not lower == 0 because we could not prove that // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { @@ -15963,7 +15925,7 @@ simdjson_really_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { // this can read up to 7 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); - std::memcpy(&val, chars, 8); + memcpy(&val, chars, 8); // a branchy method might be faster: // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == @@ -16118,7 +16080,7 @@ simdjson_really_inline error_code write_float(const uint8_t *const src, bool neg // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero // so something x 10^-343 goes to zero, but not so with something x 10^-342. static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); - // + // if((exponent < simdjson::internal::smallest_power) || (i == 0)) { WRITE_DOUBLE(0, src, writer); return SUCCESS; @@ -16390,12 +16352,11 @@ simdjson_unused simdjson_really_inline simdjson_result parse_double(cons if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } exponent += exp_neg ? 0-exp : exp; + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; } if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; - // // Assemble (or slow-parse) the float // @@ -16418,7 +16379,7 @@ simdjson_unused simdjson_really_inline simdjson_result parse_double(cons /* end file include/simdjson/generic/numberparsing.h */ #endif // SIMDJSON_HASWELL_NUMBERPARSING_H -/* end file include/simdjson/haswell/numberparsing.h */ +/* end file include/simdjson/generic/numberparsing.h */ /* begin file include/simdjson/generic/implementation_simdjson_result_base.h */ namespace simdjson { namespace SIMDJSON_IMPLEMENTATION { @@ -16548,7 +16509,7 @@ namespace simdjson { namespace SIMDJSON_IMPLEMENTATION { /** * A fast, simple, DOM-like interface that parses JSON as you use it. - * + * * Designed for maximum speed and a lower memory profile. */ namespace ondemand { @@ -16599,11 +16560,11 @@ class parser; * * (In other words, a pointer to the beginning of a string, just after the start quote, inside a * JSON file.) - * + * * This class is deliberately simplistic and has little functionality. You can * compare two raw_json_string instances, or compare a raw_json_string with a string_view, but * that is pretty much all you can do. - * + * * They originate typically from field instance which in turn represent key-value pairs from * object instances. From a field instance, you get the raw_json_string instance by calling key(). * You can, if you want a more usable string_view instance, call the unescaped_key() method @@ -16613,7 +16574,7 @@ class raw_json_string { public: /** * Create a new invalid raw_json_string. - * + * * Exists so you can declare a variable and later assign to it before use. */ simdjson_really_inline raw_json_string() noexcept = default; @@ -16623,15 +16584,15 @@ public: /** * Create a new invalid raw_json_string pointed at the given location in the JSON. - * + * * The given location must be just *after* the beginning quote (") in the JSON file. - * + * * It *must* be terminated by a ", and be a valid JSON string. */ simdjson_really_inline raw_json_string(const uint8_t * _buf) noexcept; /** * Get the raw pointer to the beginning of the string in the JSON (just after the "). - * + * * It is possible for this function to return a null pointer if the instance * has outlived its existence. */ @@ -16651,11 +16612,11 @@ private: /** * Unescape this JSON string, replacing \\ with \, \n with newline, etc. - * + * * ## IMPORTANT: string_view lifetime - * + * * The string_view is only valid as long as the bytes in dst. - * + * * @param dst A pointer to a buffer at least large enough to write this string as well as a \0. * dst will be updated to the next unused location (just after the \0 written out at * the end of this string). @@ -16665,11 +16626,11 @@ private: simdjson_really_inline simdjson_warn_unused simdjson_result unescape(uint8_t *&dst) const noexcept; /** * Unescape this JSON string, replacing \\ with \, \n with newline, etc. - * + * * ## IMPORTANT: string_view lifetime - * + * * The string_view is only valid until the next parse() call on the parser. - * + * * @param iter A json_iterator, which contains a buffer where the string will be written. */ simdjson_really_inline simdjson_warn_unused simdjson_result unescape(json_iterator &iter) const noexcept; @@ -16725,7 +16686,7 @@ class token_iterator { public: /** * Create a new invalid token_iterator. - * + * * Exists so you can declare a variable and later assign to it before use. */ simdjson_really_inline token_iterator() noexcept = default; @@ -16825,7 +16786,7 @@ class json_iterator_ref; /** * Iterates through JSON, with structure-sensitive algorithms. - * + * * @private This is not intended for external use. */ class json_iterator : public token_iterator { @@ -16861,17 +16822,17 @@ public: * Start an object iteration after the user has already checked and moved past the {. * * Does not move the iterator. - * + * * @returns Whether the object had any fields (returns false for empty). */ simdjson_warn_unused simdjson_really_inline bool started_object() noexcept; /** * Moves to the next field in an object. - * + * * Looks for , and }. If } is found, the object is finished and the iterator advances past it. * Otherwise, it advances to the next value. - * + * * @return whether there is another field in the object. * @error TAPE_ERROR If there is a comma missing between fields. */ @@ -16891,7 +16852,7 @@ public: * Find the next field with the given key. * * Assumes you have called next_field() or otherwise matched the previous value. - * + * * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may * fail to match some keys with escapes (\u, \n, etc.). @@ -16925,10 +16886,10 @@ public: /** * Moves to the next element in an array. - * + * * Looks for , and ]. If ] is found, the array is finished and the iterator advances past it. * Otherwise, it advances to the next value. - * + * * @return Whether there is another element in the array. * @error TAPE_ERROR If there is a comma missing between elements. */ @@ -16967,7 +16928,7 @@ public: /** * Skips to the end of a JSON object or array. - * + * * @return true if this was the end of an array, false if it was the end of an object. */ simdjson_warn_unused simdjson_really_inline error_code skip_container() noexcept; @@ -16989,7 +16950,7 @@ public: /** * Report an error, preventing further iteration. - * + * * @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD. * @param message An error message to report with the error. */ @@ -17004,13 +16965,13 @@ protected: ondemand::parser *parser{}; /** * Next free location in the string buffer. - * + * * Used by raw_json_string::unescape() to have a place to unescape strings to. */ uint8_t *current_string_buf_loc{}; /** * JSON error, if there is one. - * + * * INCORRECT_TYPE and NO_SUCH_FIELD are *not* stored here, ever. * * PERF NOTE: we *hope* this will be elided into control flow, as it is only used (a) in the first @@ -17121,7 +17082,7 @@ class document; /** * A forward-only JSON array. - * + * * This is an input_iterator, meaning: * - It is forward-only * - * must be called exactly once per element. @@ -17141,7 +17102,7 @@ public: /** * Get the current element. - * + * * Part of the std::iterator interface. */ simdjson_really_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. @@ -17149,7 +17110,7 @@ public: * Check if we are at the end of the JSON. * * Part of the std::iterator interface. - * + * * @return true if there are no more elements in the JSON array. */ simdjson_really_inline bool operator==(const array_iterator &) noexcept; @@ -17157,13 +17118,13 @@ public: * Check if there are more elements in the JSON array. * * Part of the std::iterator interface. - * + * * @return true if there are more elements in the JSON array. */ simdjson_really_inline bool operator!=(const array_iterator &) noexcept; /** * Move to the next element. - * + * * Part of the std::iterator interface. */ simdjson_really_inline array_iterator &operator++() noexcept; @@ -17221,7 +17182,7 @@ class object_iterator { public: /** * Create a new invalid object_iterator. - * + * * Exists so you can declare a variable and later assign to it before use. */ simdjson_really_inline object_iterator() noexcept = default; @@ -17297,7 +17258,7 @@ class array { public: /** * Create a new invalid array. - * + * * Exists so you can declare a variable and later assign to it before use. */ simdjson_really_inline array() noexcept = default; @@ -17335,7 +17296,7 @@ protected: static simdjson_really_inline simdjson_result start(json_iterator_ref &&iter) noexcept; /** * Begin array iteration. - * + * * This version of the method should be called after the initial [ has been verified, and is * intended for use by switch statements that check the type of a value. * @@ -17362,7 +17323,7 @@ protected: /** * Iterator marking current position. - * + * * iter.is_alive() == false indicates iteration is complete. */ json_iterator_ref iter{}; @@ -17422,7 +17383,7 @@ public: /** * Create a new invalid document. - * + * * Exists so you can declare a variable and later assign to it before use. */ simdjson_really_inline document() noexcept = default; @@ -17470,7 +17431,7 @@ public: simdjson_really_inline simdjson_result get_double() noexcept; /** * Cast this JSON value to a string. - * + * * The string is guaranteed to be valid UTF-8. * * Equivalent to get(). @@ -17482,7 +17443,7 @@ public: simdjson_really_inline simdjson_result get_string() & noexcept; /** * Cast this JSON value to a raw_json_string. - * + * * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). * * @returns A pointer to the raw JSON for the given string. @@ -17498,7 +17459,7 @@ public: simdjson_really_inline simdjson_result get_bool() noexcept; /** * Checks if this JSON value is null. - * + * * @returns Whether the value is null. */ simdjson_really_inline bool is_null() noexcept; @@ -17507,7 +17468,7 @@ public: * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool - * + * * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ @@ -17519,7 +17480,7 @@ public: * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool - * + * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. * @returns INCORRECT_TYPE If the JSON value is not an object. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. @@ -17566,7 +17527,7 @@ public: simdjson_really_inline operator double() noexcept(false); /** * Cast this JSON value to a string. - * + * * The string is guaranteed to be valid UTF-8. * * Equivalent to get(). @@ -17578,7 +17539,7 @@ public: simdjson_really_inline operator std::string_view() & noexcept(false); /** * Cast this JSON value to a raw_json_string. - * + * * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). * * @returns A pointer to the raw JSON for the given string. @@ -17612,7 +17573,7 @@ public: * * This method may only be called once on a given value. If you want to look up multiple fields, * you must first get the object using value.get_object() or object(value). - * + * * @param key The key to look up. * @returns INCORRECT_TYPE If the JSON value is not an array. */ @@ -17622,7 +17583,7 @@ public: * * This method may only be called once on a given value. If you want to look up multiple fields, * you must first get the object using value.get_object() or object(value). - * + * * @param key The key to look up. * @returns INCORRECT_TYPE If the JSON value is not an array. */ @@ -17636,7 +17597,7 @@ protected: static simdjson_really_inline document start(ondemand::json_iterator &&iter) noexcept; /** * Set json to null if the result is successful. - * + * * Convenience function for value-getters. */ template @@ -17738,7 +17699,7 @@ class value { public: /** * Create a new invalid value. - * + * * Exists so you can declare a variable and later assign to it before use. */ simdjson_really_inline value() noexcept = default; @@ -17757,7 +17718,7 @@ public: * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool - * + * * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ @@ -17769,7 +17730,7 @@ public: * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool - * + * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. * @returns INCORRECT_TYPE If the JSON value is not an object. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. @@ -17830,7 +17791,7 @@ public: /** * Cast this JSON value to a string. - * + * * The string is guaranteed to be valid UTF-8. * * Equivalent to get(). @@ -17845,7 +17806,7 @@ public: /** * Cast this JSON value to a raw_json_string. - * + * * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). * * @returns A pointer to the raw JSON for the given string. @@ -17867,7 +17828,7 @@ public: /** * Checks if this JSON value is null. - * + * * @returns Whether the value is null. */ simdjson_really_inline bool is_null() && noexcept; @@ -17918,7 +17879,7 @@ public: simdjson_really_inline operator double() & noexcept(false); /** * Cast this JSON value to a string. - * + * * The string is guaranteed to be valid UTF-8. * * Equivalent to get(). @@ -17932,7 +17893,7 @@ public: simdjson_really_inline operator std::string_view() & noexcept(false); /** * Cast this JSON value to a raw_json_string. - * + * * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). * * @returns A pointer to the raw JSON for the given string. @@ -17956,7 +17917,7 @@ public: * Begin array iteration. * * Part of the std::iterable interface. - * + * * @returns INCORRECT_TYPE If the JSON value is not an array. */ simdjson_really_inline simdjson_result> begin() & noexcept; @@ -18093,16 +18054,16 @@ namespace ondemand { /** * A JSON field (key/value pair) in an object. - * + * * Returned from object iteration. - * + * * Extends from std::pair so you can use C++ algorithms that rely on pairs. */ class field : public std::pair { public: /** * Create a new invalid field. - * + * * Exists so you can declare a variable and later assign to it before use. */ simdjson_really_inline field() noexcept; @@ -18116,7 +18077,7 @@ public: * Get the key as a string_view (for higher speed, consider raw_key). * We deliberately use a more cumbersome name (unescaped_key) to force users * to think twice about using it. - * + * * This consumes the key: once you have called unescaped_key(), you cannot * call it again nor can you call key(). */ @@ -18179,7 +18140,7 @@ class object { public: /** * Create a new invalid object. - * + * * Exists so you can declare a variable and later assign to it before use. */ simdjson_really_inline object() noexcept = default; @@ -18225,7 +18186,7 @@ protected: json_iterator_ref iter{}; /** * Whether we are at the start. - * + * * PERF NOTE: this should be elided into inline control flow: it is only used for the first [] * or * call, and SSA optimizers commonly do first-iteration loop optimization. */ @@ -18294,17 +18255,17 @@ public: /** * Start iterating an on-demand JSON document. - * + * * ondemand::parser parser; * document doc = parser.iterate(json); - * + * * ### IMPORTANT: Buffer Lifetime - * + * * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as * long as the document iteration. - * + * * ### IMPORTANT: Document Lifetime - * + * * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before * you call parse() again or destroy the parser. @@ -18315,7 +18276,7 @@ public: * those bytes are initialized to, as long as they are allocated. * * @param json The JSON to parse. - * + * * @return The document, or an error: * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory * allocation fails. @@ -18328,19 +18289,19 @@ public: simdjson_warn_unused simdjson_result iterate(const std::string &json) & noexcept = delete; /** * @private - * + * * Start iterating an on-demand JSON document. - * + * * ondemand::parser parser; * json_iterator doc = parser.iterate(json); - * + * * ### IMPORTANT: Buffer Lifetime - * + * * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as * long as the document iteration. - * + * * ### IMPORTANT: Document Lifetime - * + * * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before * you call parse() again or destroy the parser. @@ -18351,7 +18312,7 @@ public: * those bytes are initialized to, as long as they are allocated. * * @param json The JSON to parse. - * + * * @return The iterator, or an error: * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory * allocation fails. @@ -18400,7 +18361,7 @@ public: } // namespace simdjson /* end file include/simdjson/generic/ondemand/parser.h */ -/* end file include/simdjson/generic/ondemand.h */ +/* end file include/simdjson/generic/ondemand/parser.h */ // Inline definitions /* begin file include/simdjson/generic/implementation_simdjson_result_base-inl.h */ @@ -18820,7 +18781,7 @@ simdjson_warn_unused simdjson_really_inline bool json_iterator::started_array() advance(); return false; } - logger::log_start_value(*this, "array"); + logger::log_start_value(*this, "array"); return true; } @@ -18907,7 +18868,7 @@ simdjson_warn_unused simdjson_really_inline bool json_iterator::copy_to_buffer(c } // Copy to the buffer. - std::memcpy(tmpbuf, json, len); + memcpy(tmpbuf, json, len); tmpbuf[len] = ' '; return true; } @@ -18926,7 +18887,7 @@ simdjson_warn_unused simdjson_result json_iterator::consume_root_uint6 return parse_root_uint64(advance()); } simdjson_warn_unused simdjson_result json_iterator::parse_root_int64(const uint8_t *json) noexcept { - uint8_t tmpbuf[20+1]; // -<19 digits> is the longest possible integer + uint8_t tmpbuf[20+1]; // -<19 digits> is the longest possible integer if (!copy_to_buffer(json, tmpbuf)) { logger::log_error(*this, "Root number more than 20 characters"); return NUMBER_ERROR; } logger::log_value(*this, "int64", ""); auto result = numberparsing::parse_integer(tmpbuf); @@ -19338,7 +19299,7 @@ namespace ondemand { // Next state. In this state, depth > iter->depth, at_start == false, and error == SUCCESS. // // ## Error States -// +// // In error states, we will yield exactly one more value before stopping. iter->depth == depth // and at_start is always false. We decrement after yielding the error, moving to the Finished // state. @@ -20208,7 +20169,7 @@ namespace ondemand { // Next state. In this state, depth > iter->depth, at_start == false, and error == SUCCESS. // // ## Error States -// +// // In error states, we will yield exactly one more value before stopping. iter->depth == depth // and at_start is always false. We decrement after yielding the error, moving to the Finished // state. @@ -20371,7 +20332,7 @@ simdjson_warn_unused simdjson_really_inline simdjson_result parser::it } // Run stage 1. - SIMDJSON_TRY( dom_parser.stage1((const uint8_t *)buf.data(), buf.size(), false) ); + SIMDJSON_TRY( dom_parser.stage1((const uint8_t *)buf.data(), buf.size(), false) ); return document::start(this); } @@ -20382,7 +20343,7 @@ simdjson_warn_unused simdjson_really_inline simdjson_result parse } // Run stage 1. - SIMDJSON_TRY( dom_parser.stage1((const uint8_t *)buf.data(), buf.size(), false) ); + SIMDJSON_TRY( dom_parser.stage1((const uint8_t *)buf.data(), buf.size(), false) ); return json_iterator(this); } @@ -20399,7 +20360,7 @@ simdjson_really_inline simdjson_resultchunks[3].to_bitmask() ; return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); } - + simdjson_really_inline uint64_t eq(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64( @@ -21395,7 +21356,7 @@ simdjson_unused simdjson_warn_unused simdjson_really_inline error_code parse_str /* end file include/simdjson/generic/stringparsing.h */ #endif // SIMDJSON_WESTMERE_STRINGPARSING_H -/* end file include/simdjson/westmere/stringparsing.h */ +/* end file include/simdjson/generic/stringparsing.h */ /* begin file include/simdjson/westmere/numberparsing.h */ #ifndef SIMDJSON_WESTMERE_NUMBERPARSING_H #define SIMDJSON_WESTMERE_NUMBERPARSING_H @@ -21455,13 +21416,13 @@ namespace numberparsing { namespace { // Convert a mantissa, an exponent and a sign bit into an ieee64 double. // The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). -// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. simdjson_really_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { double d; mantissa &= ~(1ULL << 52); mantissa |= real_exponent << 52; mantissa |= (((uint64_t)negative) << 63); - std::memcpy(&d, &mantissa, sizeof(d)); + memcpy(&d, &mantissa, sizeof(d)); return d; } } @@ -21579,7 +21540,7 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg // We want the most significant 64 bits of the product. We know // this will be non-zero because the most significant bit of i is // 1. - const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) // // The full_multiplication function computes the 128-bit product of two 64-bit words @@ -21588,7 +21549,7 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg // to the 64-bit most significant bits of the product. simdjson::internal::value128 firstproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index]); // Both i and power_of_five_128[index] have their most significant bit set to 1 which - // implies that the either the most or the second most significant bit of the product + // implies that the either the most or the second most significant bit of the product // is 1. We pack values in this manner for efficiency reasons: it maximizes the use // we make of the product. It also makes it easy to reason aboutthe product: there // 0 or 1 leading zero in the product. @@ -21603,17 +21564,17 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing // the full computation is wasteful. So we do what is called a "truncated // multiplication". - // We take the most significant 64-bits, and we put them in + // We take the most significant 64-bits, and we put them in // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q - // to the desired approximation using one multiplication. Sometimes it does not suffice. + // to the desired approximation using one multiplication. Sometimes it does not suffice. // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and // then we get a better approximation to i * 5^q. In very rare cases, even that // will not suffice, though it is seemingly very hard to find such a scenario. - // + // // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat // more complicated. // - // There is an extra layer of complexity in that we need more than 55 bits of + // There is an extra layer of complexity in that we need more than 55 bits of // accuracy in the round-to-even scenario. // // The full_multiplication function computes the 128-bit product of two 64-bit words @@ -21646,7 +21607,7 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. d = 0.0; return true; - } + } // next line is safe because -real_exponent + 1 < 0 mantissa >>= -real_exponent + 1; // Thankfully, we can't have both "round-to-even" and subnormals because @@ -21659,7 +21620,7 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer // subnormal, but we can only know this after rounding. - // So we only declare a subnormal if we are smaller than the threshold. + // So we only declare a subnormal if we are smaller than the threshold. real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; d = to_double(mantissa, real_exponent, negative); return true; @@ -21669,7 +21630,7 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg // which we guard against. // If we have lots of trailing zeros, we may fall right between two // floating-point values. - // + // // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] // times a power of two. That is, it is right between a number with binary significand // m and another number with binary significand m+1; and it must be the case @@ -21680,11 +21641,11 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so - // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have // 2^{53} x 5^{-q} < 2^{64}. - // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. // - // We require lower <= 1 and not lower == 0 because we could not prove that + // We require lower <= 1 and not lower == 0 because we could not prove that // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { @@ -21737,7 +21698,7 @@ simdjson_really_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { // this can read up to 7 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); - std::memcpy(&val, chars, 8); + memcpy(&val, chars, 8); // a branchy method might be faster: // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == @@ -21892,7 +21853,7 @@ simdjson_really_inline error_code write_float(const uint8_t *const src, bool neg // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero // so something x 10^-343 goes to zero, but not so with something x 10^-342. static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); - // + // if((exponent < simdjson::internal::smallest_power) || (i == 0)) { WRITE_DOUBLE(0, src, writer); return SUCCESS; @@ -22164,12 +22125,11 @@ simdjson_unused simdjson_really_inline simdjson_result parse_double(cons if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } exponent += exp_neg ? 0-exp : exp; + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; } if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; - // // Assemble (or slow-parse) the float // @@ -22192,7 +22152,7 @@ simdjson_unused simdjson_really_inline simdjson_result parse_double(cons /* end file include/simdjson/generic/numberparsing.h */ #endif // SIMDJSON_WESTMERE_NUMBERPARSING_H -/* end file include/simdjson/westmere/numberparsing.h */ +/* end file include/simdjson/generic/numberparsing.h */ /* begin file include/simdjson/generic/implementation_simdjson_result_base.h */ namespace simdjson { namespace SIMDJSON_IMPLEMENTATION { @@ -22322,7 +22282,7 @@ namespace simdjson { namespace SIMDJSON_IMPLEMENTATION { /** * A fast, simple, DOM-like interface that parses JSON as you use it. - * + * * Designed for maximum speed and a lower memory profile. */ namespace ondemand { @@ -22373,11 +22333,11 @@ class parser; * * (In other words, a pointer to the beginning of a string, just after the start quote, inside a * JSON file.) - * + * * This class is deliberately simplistic and has little functionality. You can * compare two raw_json_string instances, or compare a raw_json_string with a string_view, but * that is pretty much all you can do. - * + * * They originate typically from field instance which in turn represent key-value pairs from * object instances. From a field instance, you get the raw_json_string instance by calling key(). * You can, if you want a more usable string_view instance, call the unescaped_key() method @@ -22387,7 +22347,7 @@ class raw_json_string { public: /** * Create a new invalid raw_json_string. - * + * * Exists so you can declare a variable and later assign to it before use. */ simdjson_really_inline raw_json_string() noexcept = default; @@ -22397,15 +22357,15 @@ public: /** * Create a new invalid raw_json_string pointed at the given location in the JSON. - * + * * The given location must be just *after* the beginning quote (") in the JSON file. - * + * * It *must* be terminated by a ", and be a valid JSON string. */ simdjson_really_inline raw_json_string(const uint8_t * _buf) noexcept; /** * Get the raw pointer to the beginning of the string in the JSON (just after the "). - * + * * It is possible for this function to return a null pointer if the instance * has outlived its existence. */ @@ -22425,11 +22385,11 @@ private: /** * Unescape this JSON string, replacing \\ with \, \n with newline, etc. - * + * * ## IMPORTANT: string_view lifetime - * + * * The string_view is only valid as long as the bytes in dst. - * + * * @param dst A pointer to a buffer at least large enough to write this string as well as a \0. * dst will be updated to the next unused location (just after the \0 written out at * the end of this string). @@ -22439,11 +22399,11 @@ private: simdjson_really_inline simdjson_warn_unused simdjson_result unescape(uint8_t *&dst) const noexcept; /** * Unescape this JSON string, replacing \\ with \, \n with newline, etc. - * + * * ## IMPORTANT: string_view lifetime - * + * * The string_view is only valid until the next parse() call on the parser. - * + * * @param iter A json_iterator, which contains a buffer where the string will be written. */ simdjson_really_inline simdjson_warn_unused simdjson_result unescape(json_iterator &iter) const noexcept; @@ -22499,7 +22459,7 @@ class token_iterator { public: /** * Create a new invalid token_iterator. - * + * * Exists so you can declare a variable and later assign to it before use. */ simdjson_really_inline token_iterator() noexcept = default; @@ -22599,7 +22559,7 @@ class json_iterator_ref; /** * Iterates through JSON, with structure-sensitive algorithms. - * + * * @private This is not intended for external use. */ class json_iterator : public token_iterator { @@ -22635,17 +22595,17 @@ public: * Start an object iteration after the user has already checked and moved past the {. * * Does not move the iterator. - * + * * @returns Whether the object had any fields (returns false for empty). */ simdjson_warn_unused simdjson_really_inline bool started_object() noexcept; /** * Moves to the next field in an object. - * + * * Looks for , and }. If } is found, the object is finished and the iterator advances past it. * Otherwise, it advances to the next value. - * + * * @return whether there is another field in the object. * @error TAPE_ERROR If there is a comma missing between fields. */ @@ -22665,7 +22625,7 @@ public: * Find the next field with the given key. * * Assumes you have called next_field() or otherwise matched the previous value. - * + * * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may * fail to match some keys with escapes (\u, \n, etc.). @@ -22699,10 +22659,10 @@ public: /** * Moves to the next element in an array. - * + * * Looks for , and ]. If ] is found, the array is finished and the iterator advances past it. * Otherwise, it advances to the next value. - * + * * @return Whether there is another element in the array. * @error TAPE_ERROR If there is a comma missing between elements. */ @@ -22741,7 +22701,7 @@ public: /** * Skips to the end of a JSON object or array. - * + * * @return true if this was the end of an array, false if it was the end of an object. */ simdjson_warn_unused simdjson_really_inline error_code skip_container() noexcept; @@ -22763,7 +22723,7 @@ public: /** * Report an error, preventing further iteration. - * + * * @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD. * @param message An error message to report with the error. */ @@ -22778,13 +22738,13 @@ protected: ondemand::parser *parser{}; /** * Next free location in the string buffer. - * + * * Used by raw_json_string::unescape() to have a place to unescape strings to. */ uint8_t *current_string_buf_loc{}; /** * JSON error, if there is one. - * + * * INCORRECT_TYPE and NO_SUCH_FIELD are *not* stored here, ever. * * PERF NOTE: we *hope* this will be elided into control flow, as it is only used (a) in the first @@ -22895,7 +22855,7 @@ class document; /** * A forward-only JSON array. - * + * * This is an input_iterator, meaning: * - It is forward-only * - * must be called exactly once per element. @@ -22915,7 +22875,7 @@ public: /** * Get the current element. - * + * * Part of the std::iterator interface. */ simdjson_really_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. @@ -22923,7 +22883,7 @@ public: * Check if we are at the end of the JSON. * * Part of the std::iterator interface. - * + * * @return true if there are no more elements in the JSON array. */ simdjson_really_inline bool operator==(const array_iterator &) noexcept; @@ -22931,13 +22891,13 @@ public: * Check if there are more elements in the JSON array. * * Part of the std::iterator interface. - * + * * @return true if there are more elements in the JSON array. */ simdjson_really_inline bool operator!=(const array_iterator &) noexcept; /** * Move to the next element. - * + * * Part of the std::iterator interface. */ simdjson_really_inline array_iterator &operator++() noexcept; @@ -22995,7 +22955,7 @@ class object_iterator { public: /** * Create a new invalid object_iterator. - * + * * Exists so you can declare a variable and later assign to it before use. */ simdjson_really_inline object_iterator() noexcept = default; @@ -23071,7 +23031,7 @@ class array { public: /** * Create a new invalid array. - * + * * Exists so you can declare a variable and later assign to it before use. */ simdjson_really_inline array() noexcept = default; @@ -23109,7 +23069,7 @@ protected: static simdjson_really_inline simdjson_result start(json_iterator_ref &&iter) noexcept; /** * Begin array iteration. - * + * * This version of the method should be called after the initial [ has been verified, and is * intended for use by switch statements that check the type of a value. * @@ -23136,7 +23096,7 @@ protected: /** * Iterator marking current position. - * + * * iter.is_alive() == false indicates iteration is complete. */ json_iterator_ref iter{}; @@ -23196,7 +23156,7 @@ public: /** * Create a new invalid document. - * + * * Exists so you can declare a variable and later assign to it before use. */ simdjson_really_inline document() noexcept = default; @@ -23244,7 +23204,7 @@ public: simdjson_really_inline simdjson_result get_double() noexcept; /** * Cast this JSON value to a string. - * + * * The string is guaranteed to be valid UTF-8. * * Equivalent to get(). @@ -23256,7 +23216,7 @@ public: simdjson_really_inline simdjson_result get_string() & noexcept; /** * Cast this JSON value to a raw_json_string. - * + * * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). * * @returns A pointer to the raw JSON for the given string. @@ -23272,7 +23232,7 @@ public: simdjson_really_inline simdjson_result get_bool() noexcept; /** * Checks if this JSON value is null. - * + * * @returns Whether the value is null. */ simdjson_really_inline bool is_null() noexcept; @@ -23281,7 +23241,7 @@ public: * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool - * + * * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ @@ -23293,7 +23253,7 @@ public: * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool - * + * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. * @returns INCORRECT_TYPE If the JSON value is not an object. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. @@ -23340,7 +23300,7 @@ public: simdjson_really_inline operator double() noexcept(false); /** * Cast this JSON value to a string. - * + * * The string is guaranteed to be valid UTF-8. * * Equivalent to get(). @@ -23352,7 +23312,7 @@ public: simdjson_really_inline operator std::string_view() & noexcept(false); /** * Cast this JSON value to a raw_json_string. - * + * * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). * * @returns A pointer to the raw JSON for the given string. @@ -23386,7 +23346,7 @@ public: * * This method may only be called once on a given value. If you want to look up multiple fields, * you must first get the object using value.get_object() or object(value). - * + * * @param key The key to look up. * @returns INCORRECT_TYPE If the JSON value is not an array. */ @@ -23396,7 +23356,7 @@ public: * * This method may only be called once on a given value. If you want to look up multiple fields, * you must first get the object using value.get_object() or object(value). - * + * * @param key The key to look up. * @returns INCORRECT_TYPE If the JSON value is not an array. */ @@ -23410,7 +23370,7 @@ protected: static simdjson_really_inline document start(ondemand::json_iterator &&iter) noexcept; /** * Set json to null if the result is successful. - * + * * Convenience function for value-getters. */ template @@ -23512,7 +23472,7 @@ class value { public: /** * Create a new invalid value. - * + * * Exists so you can declare a variable and later assign to it before use. */ simdjson_really_inline value() noexcept = default; @@ -23531,7 +23491,7 @@ public: * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool - * + * * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ @@ -23543,7 +23503,7 @@ public: * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool - * + * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. * @returns INCORRECT_TYPE If the JSON value is not an object. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. @@ -23604,7 +23564,7 @@ public: /** * Cast this JSON value to a string. - * + * * The string is guaranteed to be valid UTF-8. * * Equivalent to get(). @@ -23619,7 +23579,7 @@ public: /** * Cast this JSON value to a raw_json_string. - * + * * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). * * @returns A pointer to the raw JSON for the given string. @@ -23641,7 +23601,7 @@ public: /** * Checks if this JSON value is null. - * + * * @returns Whether the value is null. */ simdjson_really_inline bool is_null() && noexcept; @@ -23692,7 +23652,7 @@ public: simdjson_really_inline operator double() & noexcept(false); /** * Cast this JSON value to a string. - * + * * The string is guaranteed to be valid UTF-8. * * Equivalent to get(). @@ -23706,7 +23666,7 @@ public: simdjson_really_inline operator std::string_view() & noexcept(false); /** * Cast this JSON value to a raw_json_string. - * + * * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). * * @returns A pointer to the raw JSON for the given string. @@ -23730,7 +23690,7 @@ public: * Begin array iteration. * * Part of the std::iterable interface. - * + * * @returns INCORRECT_TYPE If the JSON value is not an array. */ simdjson_really_inline simdjson_result> begin() & noexcept; @@ -23867,16 +23827,16 @@ namespace ondemand { /** * A JSON field (key/value pair) in an object. - * + * * Returned from object iteration. - * + * * Extends from std::pair so you can use C++ algorithms that rely on pairs. */ class field : public std::pair { public: /** * Create a new invalid field. - * + * * Exists so you can declare a variable and later assign to it before use. */ simdjson_really_inline field() noexcept; @@ -23890,7 +23850,7 @@ public: * Get the key as a string_view (for higher speed, consider raw_key). * We deliberately use a more cumbersome name (unescaped_key) to force users * to think twice about using it. - * + * * This consumes the key: once you have called unescaped_key(), you cannot * call it again nor can you call key(). */ @@ -23953,7 +23913,7 @@ class object { public: /** * Create a new invalid object. - * + * * Exists so you can declare a variable and later assign to it before use. */ simdjson_really_inline object() noexcept = default; @@ -23999,7 +23959,7 @@ protected: json_iterator_ref iter{}; /** * Whether we are at the start. - * + * * PERF NOTE: this should be elided into inline control flow: it is only used for the first [] * or * call, and SSA optimizers commonly do first-iteration loop optimization. */ @@ -24068,17 +24028,17 @@ public: /** * Start iterating an on-demand JSON document. - * + * * ondemand::parser parser; * document doc = parser.iterate(json); - * + * * ### IMPORTANT: Buffer Lifetime - * + * * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as * long as the document iteration. - * + * * ### IMPORTANT: Document Lifetime - * + * * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before * you call parse() again or destroy the parser. @@ -24089,7 +24049,7 @@ public: * those bytes are initialized to, as long as they are allocated. * * @param json The JSON to parse. - * + * * @return The document, or an error: * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory * allocation fails. @@ -24102,19 +24062,19 @@ public: simdjson_warn_unused simdjson_result iterate(const std::string &json) & noexcept = delete; /** * @private - * + * * Start iterating an on-demand JSON document. - * + * * ondemand::parser parser; * json_iterator doc = parser.iterate(json); - * + * * ### IMPORTANT: Buffer Lifetime - * + * * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as * long as the document iteration. - * + * * ### IMPORTANT: Document Lifetime - * + * * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before * you call parse() again or destroy the parser. @@ -24125,7 +24085,7 @@ public: * those bytes are initialized to, as long as they are allocated. * * @param json The JSON to parse. - * + * * @return The iterator, or an error: * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory * allocation fails. @@ -24174,7 +24134,7 @@ public: } // namespace simdjson /* end file include/simdjson/generic/ondemand/parser.h */ -/* end file include/simdjson/generic/ondemand.h */ +/* end file include/simdjson/generic/ondemand/parser.h */ // Inline definitions /* begin file include/simdjson/generic/implementation_simdjson_result_base-inl.h */ @@ -24594,7 +24554,7 @@ simdjson_warn_unused simdjson_really_inline bool json_iterator::started_array() advance(); return false; } - logger::log_start_value(*this, "array"); + logger::log_start_value(*this, "array"); return true; } @@ -24681,7 +24641,7 @@ simdjson_warn_unused simdjson_really_inline bool json_iterator::copy_to_buffer(c } // Copy to the buffer. - std::memcpy(tmpbuf, json, len); + memcpy(tmpbuf, json, len); tmpbuf[len] = ' '; return true; } @@ -24700,7 +24660,7 @@ simdjson_warn_unused simdjson_result json_iterator::consume_root_uint6 return parse_root_uint64(advance()); } simdjson_warn_unused simdjson_result json_iterator::parse_root_int64(const uint8_t *json) noexcept { - uint8_t tmpbuf[20+1]; // -<19 digits> is the longest possible integer + uint8_t tmpbuf[20+1]; // -<19 digits> is the longest possible integer if (!copy_to_buffer(json, tmpbuf)) { logger::log_error(*this, "Root number more than 20 characters"); return NUMBER_ERROR; } logger::log_value(*this, "int64", ""); auto result = numberparsing::parse_integer(tmpbuf); @@ -25112,7 +25072,7 @@ namespace ondemand { // Next state. In this state, depth > iter->depth, at_start == false, and error == SUCCESS. // // ## Error States -// +// // In error states, we will yield exactly one more value before stopping. iter->depth == depth // and at_start is always false. We decrement after yielding the error, moving to the Finished // state. @@ -25982,7 +25942,7 @@ namespace ondemand { // Next state. In this state, depth > iter->depth, at_start == false, and error == SUCCESS. // // ## Error States -// +// // In error states, we will yield exactly one more value before stopping. iter->depth == depth // and at_start is always false. We decrement after yielding the error, moving to the Finished // state. @@ -26145,7 +26105,7 @@ simdjson_warn_unused simdjson_really_inline simdjson_result parser::it } // Run stage 1. - SIMDJSON_TRY( dom_parser.stage1((const uint8_t *)buf.data(), buf.size(), false) ); + SIMDJSON_TRY( dom_parser.stage1((const uint8_t *)buf.data(), buf.size(), false) ); return document::start(this); } @@ -26156,7 +26116,7 @@ simdjson_warn_unused simdjson_really_inline simdjson_result parse } // Run stage 1. - SIMDJSON_TRY( dom_parser.stage1((const uint8_t *)buf.data(), buf.size(), false) ); + SIMDJSON_TRY( dom_parser.stage1((const uint8_t *)buf.data(), buf.size(), false) ); return json_iterator(this); } @@ -26173,7 +26133,7 @@ simdjson_really_inline simdjson_result &dst) - const noexcept final; - simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, - uint8_t *dst, - size_t &dst_len) const noexcept final; - simdjson_warn_unused bool validate_utf8(const char *buf, - size_t len) const noexcept final; -}; - -} // namespace ppc64 -} // namespace simdjson - -#endif // SIMDJSON_PPC64_IMPLEMENTATION_H -/* end file include/simdjson/ppc64/implementation.h */ - -/* begin file include/simdjson/ppc64/begin.h */ -#define SIMDJSON_IMPLEMENTATION ppc64 -/* end file include/simdjson/ppc64/begin.h */ - -// Declarations -/* begin file include/simdjson/generic/dom_parser_implementation.h */ - -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { - -// expectation: sizeof(open_container) = 64/8. -struct open_container { - uint32_t tape_index; // where, on the tape, does the scope ([,{) begins - uint32_t count; // how many elements in the scope -}; // struct open_container - -static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); - -class dom_parser_implementation final : public internal::dom_parser_implementation { -public: - /** Tape location of each open { or [ */ - std::unique_ptr open_containers{}; - /** Whether each open container is a [ or { */ - std::unique_ptr is_array{}; - /** Buffer passed to stage 1 */ - const uint8_t *buf{}; - /** Length passed to stage 1 */ - size_t len{0}; - /** Document passed to stage 2 */ - dom::document *doc{}; - - inline dom_parser_implementation() noexcept; - inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; - inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; - dom_parser_implementation(const dom_parser_implementation &) = delete; - dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; - - simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; - simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, bool partial) noexcept final; - simdjson_warn_unused error_code check_for_unclosed_array() noexcept; - simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; - simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; - inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; - inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; -private: - simdjson_really_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); - -}; - -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { - -inline dom_parser_implementation::dom_parser_implementation() noexcept = default; -inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; -inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; - -// Leaving these here so they can be inlined if so desired -inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { - // Stage 1 index output - size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; - structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); - if (!structural_indexes) { _capacity = 0; return MEMALLOC; } - structural_indexes[0] = 0; - n_structural_indexes = 0; - - _capacity = capacity; - return SUCCESS; -} - -inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { - // Stage 2 stacks - open_containers.reset(new (std::nothrow) open_container[max_depth]); - is_array.reset(new (std::nothrow) bool[max_depth]); - if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } - - _max_depth = max_depth; - return SUCCESS; -} - -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson -/* end file include/simdjson/generic/dom_parser_implementation.h */ -/* begin file include/simdjson/ppc64/intrinsics.h */ -#ifndef SIMDJSON_PPC64_INTRINSICS_H -#define SIMDJSON_PPC64_INTRINSICS_H - - -// This should be the correct header whether -// you use visual studio or other compilers. -#include - -// These are defined by altivec.h in GCC toolchain, it is safe to undef them. -#ifdef bool -#undef bool -#endif - -#ifdef vector -#undef vector -#endif - -#endif // SIMDJSON_PPC64_INTRINSICS_H -/* end file include/simdjson/ppc64/intrinsics.h */ -/* begin file include/simdjson/ppc64/bitmanipulation.h */ -#ifndef SIMDJSON_PPC64_BITMANIPULATION_H -#define SIMDJSON_PPC64_BITMANIPULATION_H - -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { -namespace { - -// We sometimes call trailing_zero on inputs that are zero, -// but the algorithms do not end up using the returned value. -// Sadly, sanitizers are not smart enough to figure it out. -NO_SANITIZE_UNDEFINED -simdjson_really_inline int trailing_zeroes(uint64_t input_num) { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO - unsigned long ret; - // Search the mask data from least significant bit (LSB) - // to the most significant bit (MSB) for a set bit (1). - _BitScanForward64(&ret, input_num); - return (int)ret; -#else // SIMDJSON_REGULAR_VISUAL_STUDIO - return __builtin_ctzll(input_num); -#endif // SIMDJSON_REGULAR_VISUAL_STUDIO -} - -/* result might be undefined when input_num is zero */ -simdjson_really_inline uint64_t clear_lowest_bit(uint64_t input_num) { - return input_num & (input_num - 1); -} - -/* result might be undefined when input_num is zero */ -simdjson_really_inline int leading_zeroes(uint64_t input_num) { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO - unsigned long leading_zero = 0; - // Search the mask data from most significant bit (MSB) - // to least significant bit (LSB) for a set bit (1). - if (_BitScanReverse64(&leading_zero, input_num)) - return (int)(63 - leading_zero); - else - return 64; -#else - return __builtin_clzll(input_num); -#endif // SIMDJSON_REGULAR_VISUAL_STUDIO -} - -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO -simdjson_really_inline int count_ones(uint64_t input_num) { - // note: we do not support legacy 32-bit Windows - return __popcnt64(input_num); // Visual Studio wants two underscores -} -#else -simdjson_really_inline int count_ones(uint64_t input_num) { - return __builtin_popcountll(input_num); -} -#endif - -simdjson_really_inline bool add_overflow(uint64_t value1, uint64_t value2, - uint64_t *result) { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO - *result = value1 + value2; - return *result < value1; -#else - return __builtin_uaddll_overflow(value1, value2, - (unsigned long long *)result); -#endif -} - -} // unnamed namespace -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson - -#endif // SIMDJSON_PPC64_BITMANIPULATION_H -/* end file include/simdjson/ppc64/bitmanipulation.h */ -/* begin file include/simdjson/ppc64/bitmask.h */ -#ifndef SIMDJSON_PPC64_BITMASK_H -#define SIMDJSON_PPC64_BITMASK_H - -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { -namespace { - -// -// Perform a "cumulative bitwise xor," flipping bits each time a 1 is -// encountered. -// -// For example, prefix_xor(00100100) == 00011100 -// -simdjson_really_inline uint64_t prefix_xor(uint64_t bitmask) { - // You can use the version below, however gcc sometimes miscompiles - // vec_pmsum_be, it happens somewhere around between 8 and 9th version. - // The performance boost was not noticeable, falling back to a usual - // implementation. - // __vector unsigned long long all_ones = {~0ull, ~0ull}; - // __vector unsigned long long mask = {bitmask, 0}; - // // Clang and GCC return different values for pmsum for ull so cast it to one. - // // Generally it is not specified by ALTIVEC ISA what is returned by - // // vec_pmsum_be. - // #if defined(__LITTLE_ENDIAN__) - // return (uint64_t)(((__vector unsigned long long)vec_pmsum_be(all_ones, mask))[0]); - // #else - // return (uint64_t)(((__vector unsigned long long)vec_pmsum_be(all_ones, mask))[1]); - // #endif - bitmask ^= bitmask << 1; - bitmask ^= bitmask << 2; - bitmask ^= bitmask << 4; - bitmask ^= bitmask << 8; - bitmask ^= bitmask << 16; - bitmask ^= bitmask << 32; - return bitmask; -} - -} // unnamed namespace -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson - -#endif -/* end file include/simdjson/ppc64/bitmask.h */ -/* begin file include/simdjson/ppc64/simd.h */ -#ifndef SIMDJSON_PPC64_SIMD_H -#define SIMDJSON_PPC64_SIMD_H - -#include - -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { -namespace { -namespace simd { - -using __m128i = __vector unsigned char; - -template struct base { - __m128i value; - - // Zero constructor - simdjson_really_inline base() : value{__m128i()} {} - - // Conversion from SIMD register - simdjson_really_inline base(const __m128i _value) : value(_value) {} - - // Conversion to SIMD register - simdjson_really_inline operator const __m128i &() const { - return this->value; - } - simdjson_really_inline operator __m128i &() { return this->value; } - - // Bit operations - simdjson_really_inline Child operator|(const Child other) const { - return vec_or(this->value, (__m128i)other); - } - simdjson_really_inline Child operator&(const Child other) const { - return vec_and(this->value, (__m128i)other); - } - simdjson_really_inline Child operator^(const Child other) const { - return vec_xor(this->value, (__m128i)other); - } - simdjson_really_inline Child bit_andnot(const Child other) const { - return vec_andc(this->value, (__m128i)other); - } - simdjson_really_inline Child &operator|=(const Child other) { - auto this_cast = (Child *)this; - *this_cast = *this_cast | other; - return *this_cast; - } - simdjson_really_inline Child &operator&=(const Child other) { - auto this_cast = (Child *)this; - *this_cast = *this_cast & other; - return *this_cast; - } - simdjson_really_inline Child &operator^=(const Child other) { - auto this_cast = (Child *)this; - *this_cast = *this_cast ^ other; - return *this_cast; - } -}; - -// Forward-declared so they can be used by splat and friends. -template struct simd8; - -template > -struct base8 : base> { - typedef uint16_t bitmask_t; - typedef uint32_t bitmask2_t; - - simdjson_really_inline base8() : base>() {} - simdjson_really_inline base8(const __m128i _value) : base>(_value) {} - - simdjson_really_inline Mask operator==(const simd8 other) const { - return (__m128i)vec_cmpeq(this->value, (__m128i)other); - } - - static const int SIZE = sizeof(base>::value); - - template - simdjson_really_inline simd8 prev(simd8 prev_chunk) const { - __m128i chunk = this->value; -#ifdef __LITTLE_ENDIAN__ - chunk = (__m128i)vec_reve(this->value); - prev_chunk = (__m128i)vec_reve((__m128i)prev_chunk); -#endif - chunk = (__m128i)vec_sld((__m128i)prev_chunk, (__m128i)chunk, 16 - N); -#ifdef __LITTLE_ENDIAN__ - chunk = (__m128i)vec_reve((__m128i)chunk); -#endif - return chunk; - } -}; - -// SIMD byte mask type (returned by things like eq and gt) -template <> struct simd8 : base8 { - static simdjson_really_inline simd8 splat(bool _value) { - return (__m128i)vec_splats((unsigned char)(-(!!_value))); - } - - simdjson_really_inline simd8() : base8() {} - simdjson_really_inline simd8(const __m128i _value) - : base8(_value) {} - // Splat constructor - simdjson_really_inline simd8(bool _value) - : base8(splat(_value)) {} - - simdjson_really_inline int to_bitmask() const { - __vector unsigned long long result; - const __m128i perm_mask = {0x78, 0x70, 0x68, 0x60, 0x58, 0x50, 0x48, 0x40, - 0x38, 0x30, 0x28, 0x20, 0x18, 0x10, 0x08, 0x00}; - - result = ((__vector unsigned long long)vec_vbpermq((__m128i)this->value, - (__m128i)perm_mask)); -#ifdef __LITTLE_ENDIAN__ - return static_cast(result[1]); -#else - return static_cast(result[0]); -#endif - } - simdjson_really_inline bool any() const { - return !vec_all_eq(this->value, (__m128i)vec_splats(0)); - } - simdjson_really_inline simd8 operator~() const { - return this->value ^ (__m128i)splat(true); - } -}; - -template struct base8_numeric : base8 { - static simdjson_really_inline simd8 splat(T value) { - (void)value; - return (__m128i)vec_splats(value); - } - static simdjson_really_inline simd8 zero() { return splat(0); } - static simdjson_really_inline simd8 load(const T values[16]) { - return (__m128i)(vec_vsx_ld(0, (const uint8_t *)values)); - } - // Repeat 16 values as many times as necessary (usually for lookup tables) - static simdjson_really_inline simd8 repeat_16(T v0, T v1, T v2, T v3, T v4, - T v5, T v6, T v7, T v8, T v9, - T v10, T v11, T v12, T v13, - T v14, T v15) { - return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, - v14, v15); - } - - simdjson_really_inline base8_numeric() : base8() {} - simdjson_really_inline base8_numeric(const __m128i _value) - : base8(_value) {} - - // Store to array - simdjson_really_inline void store(T dst[16]) const { - vec_vsx_st(this->value, 0, reinterpret_cast<__m128i *>(dst)); - } - - // Override to distinguish from bool version - simdjson_really_inline simd8 operator~() const { return *this ^ 0xFFu; } - - // Addition/subtraction are the same for signed and unsigned - simdjson_really_inline simd8 operator+(const simd8 other) const { - return (__m128i)((__m128i)this->value + (__m128i)other); - } - simdjson_really_inline simd8 operator-(const simd8 other) const { - return (__m128i)((__m128i)this->value - (__m128i)other); - } - simdjson_really_inline simd8 &operator+=(const simd8 other) { - *this = *this + other; - return *(simd8 *)this; - } - simdjson_really_inline simd8 &operator-=(const simd8 other) { - *this = *this - other; - return *(simd8 *)this; - } - - // Perform a lookup assuming the value is between 0 and 16 (undefined behavior - // for out of range values) - template - simdjson_really_inline simd8 lookup_16(simd8 lookup_table) const { - return (__m128i)vec_perm((__m128i)lookup_table, (__m128i)lookup_table, this->value); - } - - // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted - // as a bitset). Passing a 0 value for mask would be equivalent to writing out - // every byte to output. Only the first 16 - count_ones(mask) bytes of the - // result are significant but 16 bytes get written. Design consideration: it - // seems like a function with the signature simd8 compress(uint32_t mask) - // would be sensible, but the AVX ISA makes this kind of approach difficult. - template - simdjson_really_inline void compress(uint16_t mask, L *output) const { - using internal::BitsSetTable256mul2; - using internal::pshufb_combine_table; - using internal::thintable_epi8; - // this particular implementation was inspired by work done by @animetosho - // we do it in two steps, first 8 bytes and then second 8 bytes - uint8_t mask1 = uint8_t(mask); // least significant 8 bits - uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits - // next line just loads the 64-bit values thintable_epi8[mask1] and - // thintable_epi8[mask2] into a 128-bit register, using only - // two instructions on most compilers. -#ifdef __LITTLE_ENDIAN__ - __m128i shufmask = (__m128i)(__vector unsigned long long){ - thintable_epi8[mask1], thintable_epi8[mask2]}; -#else - __m128i shufmask = (__m128i)(__vector unsigned long long){ - thintable_epi8[mask2], thintable_epi8[mask1]}; - shufmask = (__m128i)vec_reve((__m128i)shufmask); -#endif - // we increment by 0x08 the second half of the mask - shufmask = ((__m128i)shufmask) + - ((__m128i)(__vector int){0, 0, 0x08080808, 0x08080808}); - - // this is the version "nearly pruned" - __m128i pruned = vec_perm(this->value, this->value, shufmask); - // we still need to put the two halves together. - // we compute the popcount of the first half: - int pop1 = BitsSetTable256mul2[mask1]; - // then load the corresponding mask, what it does is to write - // only the first pop1 bytes from the first 8 bytes, and then - // it fills in with the bytes from the second 8 bytes + some filling - // at the end. - __m128i compactmask = - vec_vsx_ld(0, (const uint8_t *)(pshufb_combine_table + pop1 * 8)); - __m128i answer = vec_perm(pruned, (__m128i)vec_splats(0), compactmask); - vec_vsx_st(answer, 0, (__m128i *)(output)); - } - - template - simdjson_really_inline simd8 - lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, - L replace5, L replace6, L replace7, L replace8, L replace9, - L replace10, L replace11, L replace12, L replace13, L replace14, - L replace15) const { - return lookup_16(simd8::repeat_16( - replace0, replace1, replace2, replace3, replace4, replace5, replace6, - replace7, replace8, replace9, replace10, replace11, replace12, - replace13, replace14, replace15)); - } -}; - -// Signed bytes -template <> struct simd8 : base8_numeric { - simdjson_really_inline simd8() : base8_numeric() {} - simdjson_really_inline simd8(const __m128i _value) - : base8_numeric(_value) {} - // Splat constructor - simdjson_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdjson_really_inline simd8(const int8_t *values) : simd8(load(values)) {} - // Member-by-member initialization - simdjson_really_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3, - int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, - int8_t v12, int8_t v13, int8_t v14, int8_t v15) - : simd8((__m128i)(__vector signed char){v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10, v11, v12, v13, v14, - v15}) {} - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_really_inline static simd8 - repeat_16(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, - int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, - int8_t v12, int8_t v13, int8_t v14, int8_t v15) { - return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, - v13, v14, v15); - } - - // Order-sensitive comparisons - simdjson_really_inline simd8 - max_val(const simd8 other) const { - return (__m128i)vec_max((__vector signed char)this->value, - (__vector signed char)(__m128i)other); - } - simdjson_really_inline simd8 - min_val(const simd8 other) const { - return (__m128i)vec_min((__vector signed char)this->value, - (__vector signed char)(__m128i)other); - } - simdjson_really_inline simd8 - operator>(const simd8 other) const { - return (__m128i)vec_cmpgt((__vector signed char)this->value, - (__vector signed char)(__m128i)other); - } - simdjson_really_inline simd8 - operator<(const simd8 other) const { - return (__m128i)vec_cmplt((__vector signed char)this->value, - (__vector signed char)(__m128i)other); - } -}; - -// Unsigned bytes -template <> struct simd8 : base8_numeric { - simdjson_really_inline simd8() : base8_numeric() {} - simdjson_really_inline simd8(const __m128i _value) - : base8_numeric(_value) {} - // Splat constructor - simdjson_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdjson_really_inline simd8(const uint8_t *values) : simd8(load(values)) {} - // Member-by-member initialization - simdjson_really_inline - simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, - uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, - uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) - : simd8((__m128i){v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, - v13, v14, v15}) {} - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_really_inline static simd8 - repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, - uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, - uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, - uint8_t v15) { - return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, - v13, v14, v15); - } - - // Saturated math - simdjson_really_inline simd8 - saturating_add(const simd8 other) const { - return (__m128i)vec_adds(this->value, (__m128i)other); - } - simdjson_really_inline simd8 - saturating_sub(const simd8 other) const { - return (__m128i)vec_subs(this->value, (__m128i)other); - } - - // Order-specific operations - simdjson_really_inline simd8 - max_val(const simd8 other) const { - return (__m128i)vec_max(this->value, (__m128i)other); - } - simdjson_really_inline simd8 - min_val(const simd8 other) const { - return (__m128i)vec_min(this->value, (__m128i)other); - } - // Same as >, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_really_inline simd8 - gt_bits(const simd8 other) const { - return this->saturating_sub(other); - } - // Same as <, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_really_inline simd8 - lt_bits(const simd8 other) const { - return other.saturating_sub(*this); - } - simdjson_really_inline simd8 - operator<=(const simd8 other) const { - return other.max_val(*this) == other; - } - simdjson_really_inline simd8 - operator>=(const simd8 other) const { - return other.min_val(*this) == other; - } - simdjson_really_inline simd8 - operator>(const simd8 other) const { - return this->gt_bits(other).any_bits_set(); - } - simdjson_really_inline simd8 - operator<(const simd8 other) const { - return this->gt_bits(other).any_bits_set(); - } - - // Bit-specific operations - simdjson_really_inline simd8 bits_not_set() const { - return (__m128i)vec_cmpeq(this->value, (__m128i)vec_splats(uint8_t(0))); - } - simdjson_really_inline simd8 bits_not_set(simd8 bits) const { - return (*this & bits).bits_not_set(); - } - simdjson_really_inline simd8 any_bits_set() const { - return ~this->bits_not_set(); - } - simdjson_really_inline simd8 any_bits_set(simd8 bits) const { - return ~this->bits_not_set(bits); - } - simdjson_really_inline bool bits_not_set_anywhere() const { - return vec_all_eq(this->value, (__m128i)vec_splats(0)); - } - simdjson_really_inline bool any_bits_set_anywhere() const { - return !bits_not_set_anywhere(); - } - simdjson_really_inline bool bits_not_set_anywhere(simd8 bits) const { - return vec_all_eq(vec_and(this->value, (__m128i)bits), - (__m128i)vec_splats(0)); - } - simdjson_really_inline bool any_bits_set_anywhere(simd8 bits) const { - return !bits_not_set_anywhere(bits); - } - template simdjson_really_inline simd8 shr() const { - return simd8( - (__m128i)vec_sr(this->value, (__m128i)vec_splat_u8(N))); - } - template simdjson_really_inline simd8 shl() const { - return simd8( - (__m128i)vec_sl(this->value, (__m128i)vec_splat_u8(N))); - } -}; - -template struct simd8x64 { - static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); - static_assert(NUM_CHUNKS == 4, - "Westmere kernel should use four registers per 64-byte block."); - const simd8 chunks[NUM_CHUNKS]; - - simd8x64(const simd8x64 &o) = delete; // no copy allowed - simd8x64 & - operator=(const simd8 other) = delete; // no assignment allowed - simd8x64() = delete; // no default constructor allowed - - simdjson_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1, - const simd8 chunk2, const simd8 chunk3) - : chunks{chunk0, chunk1, chunk2, chunk3} {} - simdjson_really_inline simd8x64(const T ptr[64]) - : chunks{simd8::load(ptr), simd8::load(ptr + 16), - simd8::load(ptr + 32), simd8::load(ptr + 48)} {} - - simdjson_really_inline void store(T ptr[64]) const { - this->chunks[0].store(ptr + sizeof(simd8) * 0); - this->chunks[1].store(ptr + sizeof(simd8) * 1); - this->chunks[2].store(ptr + sizeof(simd8) * 2); - this->chunks[3].store(ptr + sizeof(simd8) * 3); - } - - simdjson_really_inline simd8 reduce_or() const { - return (this->chunks[0] | this->chunks[1]) | - (this->chunks[2] | this->chunks[3]); - } - - simdjson_really_inline void compress(uint64_t mask, T *output) const { - this->chunks[0].compress(uint16_t(mask), output); - this->chunks[1].compress(uint16_t(mask >> 16), - output + 16 - count_ones(mask & 0xFFFF)); - this->chunks[2].compress(uint16_t(mask >> 32), - output + 32 - count_ones(mask & 0xFFFFFFFF)); - this->chunks[3].compress(uint16_t(mask >> 48), - output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); - } - - simdjson_really_inline uint64_t to_bitmask() const { - uint64_t r0 = uint32_t(this->chunks[0].to_bitmask()); - uint64_t r1 = this->chunks[1].to_bitmask(); - uint64_t r2 = this->chunks[2].to_bitmask(); - uint64_t r3 = this->chunks[3].to_bitmask(); - return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); - } - - simdjson_really_inline uint64_t eq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] == mask, this->chunks[1] == mask, - this->chunks[2] == mask, this->chunks[3] == mask) - .to_bitmask(); - } - - simdjson_really_inline uint64_t eq(const simd8x64 &other) const { - return simd8x64(this->chunks[0] == other.chunks[0], - this->chunks[1] == other.chunks[1], - this->chunks[2] == other.chunks[2], - this->chunks[3] == other.chunks[3]) - .to_bitmask(); - } - - simdjson_really_inline uint64_t lteq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] <= mask, this->chunks[1] <= mask, - this->chunks[2] <= mask, this->chunks[3] <= mask) - .to_bitmask(); - } -}; // struct simd8x64 - -} // namespace simd -} // unnamed namespace -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson - -#endif // SIMDJSON_PPC64_SIMD_INPUT_H -/* end file include/simdjson/ppc64/simd.h */ -/* begin file include/simdjson/generic/jsoncharutils.h */ - -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { -namespace { -namespace jsoncharutils { - -// return non-zero if not a structural or whitespace char -// zero otherwise -simdjson_really_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { - return internal::structural_or_whitespace_negated[c]; -} - -simdjson_really_inline uint32_t is_structural_or_whitespace(uint8_t c) { - return internal::structural_or_whitespace[c]; -} - -// returns a value with the high 16 bits set if not valid -// otherwise returns the conversion of the 4 hex digits at src into the bottom -// 16 bits of the 32-bit return register -// -// see -// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ -static inline uint32_t hex_to_u32_nocheck( - const uint8_t *src) { // strictly speaking, static inline is a C-ism - uint32_t v1 = internal::digit_to_val32[630 + src[0]]; - uint32_t v2 = internal::digit_to_val32[420 + src[1]]; - uint32_t v3 = internal::digit_to_val32[210 + src[2]]; - uint32_t v4 = internal::digit_to_val32[0 + src[3]]; - return v1 | v2 | v3 | v4; -} - -// given a code point cp, writes to c -// the utf-8 code, outputting the length in -// bytes, if the length is zero, the code point -// is invalid -// -// This can possibly be made faster using pdep -// and clz and table lookups, but JSON documents -// have few escaped code points, and the following -// function looks cheap. -// -// Note: we assume that surrogates are treated separately -// -simdjson_really_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { - if (cp <= 0x7F) { - c[0] = uint8_t(cp); - return 1; // ascii - } - if (cp <= 0x7FF) { - c[0] = uint8_t((cp >> 6) + 192); - c[1] = uint8_t((cp & 63) + 128); - return 2; // universal plane - // Surrogates are treated elsewhere... - //} //else if (0xd800 <= cp && cp <= 0xdfff) { - // return 0; // surrogates // could put assert here - } else if (cp <= 0xFFFF) { - c[0] = uint8_t((cp >> 12) + 224); - c[1] = uint8_t(((cp >> 6) & 63) + 128); - c[2] = uint8_t((cp & 63) + 128); - return 3; - } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this - // is not needed - c[0] = uint8_t((cp >> 18) + 240); - c[1] = uint8_t(((cp >> 12) & 63) + 128); - c[2] = uint8_t(((cp >> 6) & 63) + 128); - c[3] = uint8_t((cp & 63) + 128); - return 4; - } - // will return 0 when the code point was too large. - return 0; // bad r -} - -#ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm -// this is a slow emulation routine for 32-bit -// -static simdjson_really_inline uint64_t __emulu(uint32_t x, uint32_t y) { - return x * (uint64_t)y; -} -static simdjson_really_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { - uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); - uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); - uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); - uint64_t adbc_carry = !!(adbc < ad); - uint64_t lo = bd + (adbc << 32); - *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + - (adbc_carry << 32) + !!(lo < bd); - return lo; -} -#endif - -using internal::value128; - -simdjson_really_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { - value128 answer; -#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) -#ifdef _M_ARM64 - // ARM64 has native support for 64-bit multiplications, no need to emultate - answer.high = __umulh(value1, value2); - answer.low = value1 * value2; -#else - answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 -#endif // _M_ARM64 -#else // defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) - __uint128_t r = ((__uint128_t)value1) * value2; - answer.low = uint64_t(r); - answer.high = uint64_t(r >> 64); -#endif - return answer; -} - -} // namespace jsoncharutils -} // unnamed namespace -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson -/* end file include/simdjson/generic/jsoncharutils.h */ -/* begin file include/simdjson/generic/atomparsing.h */ -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { -namespace { -/// @private -namespace atomparsing { - -// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. -// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot -// be certain that the character pointer will be properly aligned. -// You might think that using memcpy makes this function expensive, but you'd be wrong. -// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); -// to the compile-time constant 1936482662. -simdjson_really_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } - - -// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. -// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. -simdjson_warn_unused -simdjson_really_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { - uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) - static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); - std::memcpy(&srcval, src, sizeof(uint32_t)); - return srcval ^ string_to_uint32(atom); -} - -simdjson_warn_unused -simdjson_really_inline bool is_valid_true_atom(const uint8_t *src) { - return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; -} - -simdjson_warn_unused -simdjson_really_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { - if (len > 4) { return is_valid_true_atom(src); } - else if (len == 4) { return !str4ncmp(src, "true"); } - else { return false; } -} - -simdjson_warn_unused -simdjson_really_inline bool is_valid_false_atom(const uint8_t *src) { - return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; -} - -simdjson_warn_unused -simdjson_really_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { - if (len > 5) { return is_valid_false_atom(src); } - else if (len == 5) { return !str4ncmp(src+1, "alse"); } - else { return false; } -} - -simdjson_warn_unused -simdjson_really_inline bool is_valid_null_atom(const uint8_t *src) { - return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; -} - -simdjson_warn_unused -simdjson_really_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { - if (len > 4) { return is_valid_null_atom(src); } - else if (len == 4) { return !str4ncmp(src, "null"); } - else { return false; } -} - -} // namespace atomparsing -} // unnamed namespace -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson -/* end file include/simdjson/generic/atomparsing.h */ -/* begin file include/simdjson/ppc64/stringparsing.h */ -#ifndef SIMDJSON_PPC64_STRINGPARSING_H -#define SIMDJSON_PPC64_STRINGPARSING_H - - -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { -namespace { - -using namespace simd; - -// Holds backslashes and quotes locations. -struct backslash_and_quote { -public: - static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_really_inline static backslash_and_quote - copy_and_find(const uint8_t *src, uint8_t *dst); - - simdjson_really_inline bool has_quote_first() { - return ((bs_bits - 1) & quote_bits) != 0; - } - simdjson_really_inline bool has_backslash() { return bs_bits != 0; } - simdjson_really_inline int quote_index() { - return trailing_zeroes(quote_bits); - } - simdjson_really_inline int backslash_index() { - return trailing_zeroes(bs_bits); - } - - uint32_t bs_bits; - uint32_t quote_bits; -}; // struct backslash_and_quote - -simdjson_really_inline backslash_and_quote -backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { - // this can read up to 31 bytes beyond the buffer size, but we require - // SIMDJSON_PADDING of padding - static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), - "backslash and quote finder must process fewer than " - "SIMDJSON_PADDING bytes"); - simd8 v0(src); - simd8 v1(src + sizeof(v0)); - v0.store(dst); - v1.store(dst + sizeof(v0)); - - // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on - // PPC; therefore, we smash them together into a 64-byte mask and get the - // bitmask from there. - uint64_t bs_and_quote = - simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); - return { - uint32_t(bs_and_quote), // bs_bits - uint32_t(bs_and_quote >> 32) // quote_bits - }; -} - -} // unnamed namespace -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson - -/* begin file include/simdjson/generic/stringparsing.h */ -// This file contains the common code every implementation uses -// It is intended to be included multiple times and compiled multiple times - -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { -namespace { -/// @private -namespace stringparsing { - -// begin copypasta -// These chars yield themselves: " \ / -// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab -// u not handled in this table as it's complex -static const uint8_t escape_map[256] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. - 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. - 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -// handle a unicode codepoint -// write appropriate values into dest -// src will advance 6 bytes or 12 bytes -// dest will advance a variable amount (return via pointer) -// return true if the unicode codepoint was valid -// We work in little-endian then swap at write time -simdjson_warn_unused -simdjson_really_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, - uint8_t **dst_ptr) { - // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the - // conversion isn't valid; we defer the check for this to inside the - // multilingual plane check - uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); - *src_ptr += 6; - // check for low surrogate for characters outside the Basic - // Multilingual Plane. - if (code_point >= 0xd800 && code_point < 0xdc00) { - if (((*src_ptr)[0] != '\\') || (*src_ptr)[1] != 'u') { - return false; - } - uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); - - // if the first code point is invalid we will get here, as we will go past - // the check for being outside the Basic Multilingual plane. If we don't - // find a \u immediately afterwards we fail out anyhow, but if we do, - // this check catches both the case of the first code point being invalid - // or the second code point being invalid. - if ((code_point | code_point_2) >> 16) { - return false; - } - - code_point = - (((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000; - *src_ptr += 6; - } - size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); - *dst_ptr += offset; - return offset > 0; -} - -/** - * Unescape a string from src to dst, stopping at a final unescaped quote. E.g., if src points at 'joe"', then - * dst needs to have four free bytes. - */ -simdjson_warn_unused simdjson_really_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst) { - while (1) { - // Copy the next n bytes, and find the backslash and quote in them. - auto bs_quote = backslash_and_quote::copy_and_find(src, dst); - // If the next thing is the end quote, copy and return - if (bs_quote.has_quote_first()) { - // we encountered quotes first. Move dst to point to quotes and exit - return dst + bs_quote.quote_index(); - } - if (bs_quote.has_backslash()) { - /* find out where the backspace is */ - auto bs_dist = bs_quote.backslash_index(); - uint8_t escape_char = src[bs_dist + 1]; - /* we encountered backslash first. Handle backslash */ - if (escape_char == 'u') { - /* move src/dst up to the start; they will be further adjusted - within the unicode codepoint handling code. */ - src += bs_dist; - dst += bs_dist; - if (!handle_unicode_codepoint(&src, &dst)) { - return nullptr; - } - } else { - /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and - * write bs_dist+1 characters to output - * note this may reach beyond the part of the buffer we've actually - * seen. I think this is ok */ - uint8_t escape_result = escape_map[escape_char]; - if (escape_result == 0u) { - return nullptr; /* bogus escape value is an error */ - } - dst[bs_dist] = escape_result; - src += bs_dist + 2; - dst += bs_dist + 1; - } - } else { - /* they are the same. Since they can't co-occur, it means we - * encountered neither. */ - src += backslash_and_quote::BYTES_PROCESSED; - dst += backslash_and_quote::BYTES_PROCESSED; - } - } - /* can't be reached */ - return nullptr; -} - -simdjson_unused simdjson_warn_unused simdjson_really_inline error_code parse_string_to_buffer(const uint8_t *src, uint8_t *¤t_string_buf_loc, std::string_view &s) { - if (*(src++) != '"') { return STRING_ERROR; } - auto end = stringparsing::parse_string(src, current_string_buf_loc); - if (!end) { return STRING_ERROR; } - s = std::string_view((const char *)current_string_buf_loc, end-current_string_buf_loc); - current_string_buf_loc = end; - return SUCCESS; -} - -} // namespace stringparsing -} // unnamed namespace -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson -/* end file include/simdjson/generic/stringparsing.h */ - -#endif // SIMDJSON_PPC64_STRINGPARSING_H -/* end file include/simdjson/ppc64/stringparsing.h */ -/* begin file include/simdjson/ppc64/numberparsing.h */ -#ifndef SIMDJSON_PPC64_NUMBERPARSING_H -#define SIMDJSON_PPC64_NUMBERPARSING_H - -#include - -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { -namespace { - -// we don't have appropriate instructions, so let us use a scalar function -// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ -static simdjson_really_inline uint32_t -parse_eight_digits_unrolled(const uint8_t *chars) { - uint64_t val; - std::memcpy(&val, chars, sizeof(uint64_t)); -#ifdef __BIG_ENDIAN__ - val = bswap_64(val); -#endif - val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; - val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; - return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); -} - -} // unnamed namespace -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson - -#define SWAR_NUMBER_PARSING - -/* begin file include/simdjson/generic/numberparsing.h */ -#include -#include - -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { -namespace { -/// @private -namespace numberparsing { - - - -#ifdef JSON_TEST_NUMBERS -#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) -#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) -#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) -#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) -#else -#define INVALID_NUMBER(SRC) (NUMBER_ERROR) -#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) -#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) -#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) -#endif - -namespace { -// Convert a mantissa, an exponent and a sign bit into an ieee64 double. -// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). -// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. -simdjson_really_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { - double d; - mantissa &= ~(1ULL << 52); - mantissa |= real_exponent << 52; - mantissa |= (((uint64_t)negative) << 63); - std::memcpy(&d, &mantissa, sizeof(d)); - return d; -} -} -// Attempts to compute i * 10^(power) exactly; and if "negative" is -// true, negate the result. -// This function will only work in some cases, when it does not work, success is -// set to false. This should work *most of the time* (like 99% of the time). -// We assume that power is in the [smallest_power, -// largest_power] interval: the caller is responsible for this check. -simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { - // we start with a fast path - // It was described in - // Clinger WD. How to read floating point numbers accurately. - // ACM SIGPLAN Notices. 1990 -#ifndef FLT_EVAL_METHOD -#error "FLT_EVAL_METHOD should be defined, please include cfloat." -#endif -#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) - // We cannot be certain that x/y is rounded to nearest. - if (0 <= power && power <= 22 && i <= 9007199254740991) { -#else - if (-22 <= power && power <= 22 && i <= 9007199254740991) { -#endif - // convert the integer into a double. This is lossless since - // 0 <= i <= 2^53 - 1. - d = double(i); - // - // The general idea is as follows. - // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then - // 1) Both s and p can be represented exactly as 64-bit floating-point - // values - // (binary64). - // 2) Because s and p can be represented exactly as floating-point values, - // then s * p - // and s / p will produce correctly rounded values. - // - if (power < 0) { - d = d / simdjson::internal::power_of_ten[-power]; - } else { - d = d * simdjson::internal::power_of_ten[power]; - } - if (negative) { - d = -d; - } - return true; - } - // When 22 < power && power < 22 + 16, we could - // hope for another, secondary fast path. It was - // described by David M. Gay in "Correctly rounded - // binary-decimal and decimal-binary conversions." (1990) - // If you need to compute i * 10^(22 + x) for x < 16, - // first compute i * 10^x, if you know that result is exact - // (e.g., when i * 10^x < 2^53), - // then you can still proceed and do (i * 10^x) * 10^22. - // Is this worth your time? - // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) - // for this second fast path to work. - // If you you have 22 < power *and* power < 22 + 16, and then you - // optimistically compute "i * 10^(x-22)", there is still a chance that you - // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of - // this optimization maybe less common than we would like. Source: - // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ - // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html - - // The fast path has now failed, so we are failing back on the slower path. - - // In the slow path, we need to adjust i so that it is > 1<<63 which is always - // possible, except if i == 0, so we handle i == 0 separately. - if(i == 0) { - d = 0.0; - return true; - } - - - // The exponent is 1024 + 63 + power - // + floor(log(5**power)/log(2)). - // The 1024 comes from the ieee64 standard. - // The 63 comes from the fact that we use a 64-bit word. - // - // Computing floor(log(5**power)/log(2)) could be - // slow. Instead we use a fast function. - // - // For power in (-400,350), we have that - // (((152170 + 65536) * power ) >> 16); - // is equal to - // floor(log(5**power)/log(2)) + power - // - // The 65536 is (1<<16) and corresponds to - // (65536 * power) >> 16 ---> power - // - // ((152170 * power ) >> 16) is equal to - // floor(log(5**power)/log(2)) - // - // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). - // The 1<<16 value is a power of two; we could use a - // larger power of 2 if we wanted to. - // - int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; - - - // We want the most significant bit of i to be 1. Shift if needed. - int lz = leading_zeroes(i); - i <<= lz; - - - // We are going to need to do some 64-bit arithmetic to get a precise product. - // We use a table lookup approach. - // It is safe because - // power >= smallest_power - // and power <= largest_power - // We recover the mantissa of the power, it has a leading 1. It is always - // rounded down. - // - // We want the most significant 64 bits of the product. We know - // this will be non-zero because the most significant bit of i is - // 1. - const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); - // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) - // - // The full_multiplication function computes the 128-bit product of two 64-bit words - // with a returned value of type value128 with a "low component" corresponding to the - // 64-bit least significant bits of the product and with a "high component" corresponding - // to the 64-bit most significant bits of the product. - simdjson::internal::value128 firstproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index]); - // Both i and power_of_five_128[index] have their most significant bit set to 1 which - // implies that the either the most or the second most significant bit of the product - // is 1. We pack values in this manner for efficiency reasons: it maximizes the use - // we make of the product. It also makes it easy to reason aboutthe product: there - // 0 or 1 leading zero in the product. - - // Unless the least significant 9 bits of the high (64-bit) part of the full - // product are all 1s, then we know that the most significant 55 bits are - // exact and no further work is needed. Having 55 bits is necessary because - // we need 53 bits for the mantissa but we have to have one rounding bit and - // we can waste a bit if the most significant bit of the product is zero. - if((firstproduct.high & 0x1FF) == 0x1FF) { - // We want to compute i * 5^q, but only care about the top 55 bits at most. - // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing - // the full computation is wasteful. So we do what is called a "truncated - // multiplication". - // We take the most significant 64-bits, and we put them in - // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q - // to the desired approximation using one multiplication. Sometimes it does not suffice. - // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and - // then we get a better approximation to i * 5^q. In very rare cases, even that - // will not suffice, though it is seemingly very hard to find such a scenario. - // - // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat - // more complicated. - // - // There is an extra layer of complexity in that we need more than 55 bits of - // accuracy in the round-to-even scenario. - // - // The full_multiplication function computes the 128-bit product of two 64-bit words - // with a returned value of type value128 with a "low component" corresponding to the - // 64-bit least significant bits of the product and with a "high component" corresponding - // to the 64-bit most significant bits of the product. - simdjson::internal::value128 secondproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); - firstproduct.low += secondproduct.high; - if(secondproduct.high > firstproduct.low) { firstproduct.high++; } - // At this point, we might need to add at most one to firstproduct, but this - // can only change the value of firstproduct.high if firstproduct.low is maximal. - if(simdjson_unlikely(firstproduct.low == 0xFFFFFFFFFFFFFFFF)) { - // This is very unlikely, but if so, we need to do much more work! - return false; - } - } - uint64_t lower = firstproduct.low; - uint64_t upper = firstproduct.high; - // The final mantissa should be 53 bits with a leading 1. - // We shift it so that it occupies 54 bits with a leading 1. - /////// - uint64_t upperbit = upper >> 63; - uint64_t mantissa = upper >> (upperbit + 9); - lz += int(1 ^ upperbit); - - // Here we have mantissa < (1<<54). - int64_t real_exponent = exponent - lz; - if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? - // Here have that real_exponent <= 0 so -real_exponent >= 0 - if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. - d = 0.0; - return true; - } - // next line is safe because -real_exponent + 1 < 0 - mantissa >>= -real_exponent + 1; - // Thankfully, we can't have both "round-to-even" and subnormals because - // "round-to-even" only occurs for powers close to 0. - mantissa += (mantissa & 1); // round up - mantissa >>= 1; - // There is a weird scenario where we don't have a subnormal but just. - // Suppose we start with 2.2250738585072013e-308, we end up - // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal - // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round - // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer - // subnormal, but we can only know this after rounding. - // So we only declare a subnormal if we are smaller than the threshold. - real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; - d = to_double(mantissa, real_exponent, negative); - return true; - } - // We have to round to even. The "to even" part - // is only a problem when we are right in between two floats - // which we guard against. - // If we have lots of trailing zeros, we may fall right between two - // floating-point values. - // - // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] - // times a power of two. That is, it is right between a number with binary significand - // m and another number with binary significand m+1; and it must be the case - // that it cannot be represented by a float itself. - // - // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. - // Recall that 10^q = 5^q * 2^q. - // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that - // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. - // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so - // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have - // 2^{53} x 5^{-q} < 2^{64}. - // Hence we have 5^{-q} < 2^{11}$ or q>= -4. - // - // We require lower <= 1 and not lower == 0 because we could not prove that - // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. - if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { - if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { - mantissa &= ~1; // flip it so that we do not round up - } - } - - mantissa += mantissa & 1; - mantissa >>= 1; - - // Here we have mantissa < (1<<53), unless there was an overflow - if (mantissa >= (1ULL << 53)) { - ////////// - // This will happen when parsing values such as 7.2057594037927933e+16 - //////// - mantissa = (1ULL << 52); - real_exponent++; - } - mantissa &= ~(1ULL << 52); - // we have to check that real_exponent is in range, otherwise we bail out - if (simdjson_unlikely(real_exponent > 2046)) { - // We have an infinte value!!! We could actually throw an error here if we could. - return false; - } - d = to_double(mantissa, real_exponent, negative); - return true; -} - -// We call a fallback floating-point parser that might be slow. Note -// it will accept JSON numbers, but the JSON spec. is more restrictive so -// before you call parse_float_fallback, you need to have validated the input -// string with the JSON grammar. -// It will return an error (false) if the parsed number is infinite. -// The string parsing itself always succeeds. We know that there is at least -// one digit. -static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { - *outDouble = simdjson::internal::from_chars((const char *)ptr); - // We do not accept infinite values. - if (!std::isfinite(*outDouble)) { - return false; - } - return true; -} - -// check quickly whether the next 8 chars are made of digits -// at a glance, it looks better than Mula's -// http://0x80.pl/articles/swar-digits-validate.html -simdjson_really_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { - uint64_t val; - // this can read up to 7 bytes beyond the buffer size, but we require - // SIMDJSON_PADDING of padding - static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); - std::memcpy(&val, chars, 8); - // a branchy method might be faster: - // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) - // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == - // 0x3030303030303030); - return (((val & 0xF0F0F0F0F0F0F0F0) | - (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == - 0x3333333333333333); -} - -template -error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) { - double d; - if (parse_float_fallback(src, &d)) { - writer.append_double(d); - return SUCCESS; - } - return INVALID_NUMBER(src); -} - -template -NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later -simdjson_really_inline bool parse_digit(const uint8_t c, I &i) { - const uint8_t digit = static_cast(c - '0'); - if (digit > 9) { - return false; - } - // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication - i = 10 * i + digit; // might overflow, we will handle the overflow later - return true; -} - -simdjson_really_inline error_code parse_decimal(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { - // we continue with the fiction that we have an integer. If the - // floating point number is representable as x * 10^z for some integer - // z that fits in 53 bits, then we will be able to convert back the - // the integer into a float in a lossless manner. - const uint8_t *const first_after_period = p; - -#ifdef SWAR_NUMBER_PARSING - // this helps if we have lots of decimals! - // this turns out to be frequent enough. - if (is_made_of_eight_digits_fast(p)) { - i = i * 100000000 + parse_eight_digits_unrolled(p); - p += 8; - } -#endif - // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) - if (parse_digit(*p, i)) { ++p; } - while (parse_digit(*p, i)) { p++; } - exponent = first_after_period - p; - // Decimal without digits (123.) is illegal - if (exponent == 0) { - return INVALID_NUMBER(src); - } - return SUCCESS; -} - -simdjson_really_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { - // Exp Sign: -123.456e[-]78 - bool neg_exp = ('-' == *p); - if (neg_exp || '+' == *p) { p++; } // Skip + as well - - // Exponent: -123.456e-[78] - auto start_exp = p; - int64_t exp_number = 0; - while (parse_digit(*p, exp_number)) { ++p; } - // It is possible for parse_digit to overflow. - // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. - // Thus we *must* check for possible overflow before we negate exp_number. - - // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into - // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may - // not oblige and may, in fact, generate two distinct paths in any case. It might be - // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off - // instructions for a simdjson_likely branch, an unconclusive gain. - - // If there were no digits, it's an error. - if (simdjson_unlikely(p == start_exp)) { - return INVALID_NUMBER(src); - } - // We have a valid positive exponent in exp_number at this point, except that - // it may have overflowed. - - // If there were more than 18 digits, we may have overflowed the integer. We have to do - // something!!!! - if (simdjson_unlikely(p > start_exp+18)) { - // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow - while (*start_exp == '0') { start_exp++; } - // 19 digits could overflow int64_t and is kind of absurd anyway. We don't - // support exponents smaller than -999,999,999,999,999,999 and bigger - // than 999,999,999,999,999,999. - // We can truncate. - // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before - // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could - // truncate at 324. - // Note that there is no reason to fail per se at this point in time. - // E.g., 0e999999999999999999999 is a fine number. - if (p > start_exp+18) { exp_number = 999999999999999999; } - } - // At this point, we know that exp_number is a sane, positive, signed integer. - // It is <= 999,999,999,999,999,999. As long as 'exponent' is in - // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' - // is bounded in magnitude by the size of the JSON input, we are fine in this universe. - // To sum it up: the next line should never overflow. - exponent += (neg_exp ? -exp_number : exp_number); - return SUCCESS; -} - -simdjson_really_inline int significant_digits(const uint8_t * start_digits, int digit_count) { - // It is possible that the integer had an overflow. - // We have to handle the case where we have 0.0000somenumber. - const uint8_t *start = start_digits; - while ((*start == '0') || (*start == '.')) { - start++; - } - // we over-decrement by one when there is a '.' - return digit_count - int(start - start_digits); -} - -template -simdjson_really_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, int digit_count, int64_t exponent, W &writer) { - // If we frequently had to deal with long strings of digits, - // we could extend our code by using a 128-bit integer instead - // of a 64-bit integer. However, this is uncommon in practice. - // - // 9999999999999999999 < 2**64 so we can accomodate 19 digits. - // If we have a decimal separator, then digit_count - 1 is the number of digits, but we - // may not have a decimal separator! - if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { - // Ok, chances are good that we had an overflow! - // this is almost never going to get called!!! - // we start anew, going slowly!!! - // This will happen in the following examples: - // 10000000000000000000000000000000000000000000e+308 - // 3.1415926535897932384626433832795028841971693993751 - // - // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens - // because slow_float_parsing is a non-inlined function. If we passed our writer reference to - // it, it would force it to be stored in memory, preventing the compiler from picking it apart - // and putting into registers. i.e. if we pass it as reference, it gets slow. - // This is what forces the skip_double, as well. - error_code error = slow_float_parsing(src, writer); - writer.skip_double(); - return error; - } - // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other - // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 - // To future reader: we'd love if someone found a better way, or at least could explain this result! - if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { - // - // Important: smallest_power is such that it leads to a zero value. - // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero - // so something x 10^-343 goes to zero, but not so with something x 10^-342. - static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); - // - if((exponent < simdjson::internal::smallest_power) || (i == 0)) { - WRITE_DOUBLE(0, src, writer); - return SUCCESS; - } else { // (exponent > largest_power) and (i != 0) - // We have, for sure, an infinite value and simdjson refuses to parse infinite values. - return INVALID_NUMBER(src); - } - } - double d; - if (!compute_float_64(exponent, i, negative, d)) { - // we are almost never going to get here. - if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } - } - WRITE_DOUBLE(d, src, writer); - return SUCCESS; -} - -// for performance analysis, it is sometimes useful to skip parsing -#ifdef SIMDJSON_SKIPNUMBERPARSING - -template -simdjson_really_inline error_code parse_number(const uint8_t *const, W &writer) { - writer.append_s64(0); // always write zero - return SUCCESS; // always succeeds -} - -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } - -#else - -// parse the number at src -// define JSON_TEST_NUMBERS for unit testing -// -// It is assumed that the number is followed by a structural ({,},],[) character -// or a white space character. If that is not the case (e.g., when the JSON -// document is made of a single number), then it is necessary to copy the -// content and append a space before calling this function. -// -// Our objective is accurate parsing (ULP of 0) at high speed. -template -simdjson_really_inline error_code parse_number(const uint8_t *const src, W &writer) { - - // - // Check for minus sign - // - bool negative = (*src == '-'); - const uint8_t *p = src + negative; - - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } - - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - int digit_count = int(p - start_digits); - if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } - - // - // Handle floats if there is a . or e (or both) - // - int64_t exponent = 0; - bool is_float = false; - if ('.' == *p) { - is_float = true; - ++p; - SIMDJSON_TRY( parse_decimal(src, p, i, exponent) ); - digit_count = int(p - start_digits); // used later to guard against overflows - } - if (('e' == *p) || ('E' == *p)) { - is_float = true; - ++p; - SIMDJSON_TRY( parse_exponent(src, p, exponent) ); - } - if (is_float) { - const bool clean_end = jsoncharutils::is_structural_or_whitespace(*p); - SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); - if (!clean_end) { return INVALID_NUMBER(src); } - return SUCCESS; - } - - // The longest negative 64-bit number is 19 digits. - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - int longest_digit_count = negative ? 19 : 20; - if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } - if (digit_count == longest_digit_count) { - if (negative) { - // Anything negative above INT64_MAX+1 is invalid - if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } - WRITE_INTEGER(~i+1, src, writer); - if (!jsoncharutils::is_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } - return SUCCESS; - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to 9,223,372,036,854,775,808 (INT64_MAX). - // - } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } - } - - // Write unsigned if it doesn't fit in a signed integer. - if (i > uint64_t(INT64_MAX)) { - WRITE_UNSIGNED(i, src, writer); - } else { - WRITE_INTEGER(negative ? (~i+1) : i, src, writer); - } - if (!jsoncharutils::is_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } - return SUCCESS; -} - -// SAX functions -namespace { -// Parse any number from 0 to 18,446,744,073,709,551,615 -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { - const uint8_t *p = src; - - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } - - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - int digit_count = int(p - start_digits); - if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return NUMBER_ERROR; } - if (!jsoncharutils::is_structural_or_whitespace(*p)) { return NUMBER_ERROR; } - - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - if (digit_count > 20) { return NUMBER_ERROR; } - if (digit_count == 20) { - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to 9,223,372,036,854,775,808 (INT64_MAX). - // - if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return NUMBER_ERROR; } - } - - return i; -} - -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t *src) noexcept { - // - // Check for minus sign - // - bool negative = (*src == '-'); - const uint8_t *p = src + negative; - - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } - - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - int digit_count = int(p - start_digits); - if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return NUMBER_ERROR; } - if (!jsoncharutils::is_structural_or_whitespace(*p)) { return NUMBER_ERROR; } - - // The longest negative 64-bit number is 19 digits. - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - int longest_digit_count = negative ? 19 : 20; - if (digit_count > longest_digit_count) { return NUMBER_ERROR; } - if (digit_count == longest_digit_count) { - if(negative) { - // Anything negative above INT64_MAX+1 is invalid - if (i > uint64_t(INT64_MAX)+1) { return NUMBER_ERROR; } - return ~i+1; - - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to 9,223,372,036,854,775,808 (INT64_MAX). - // - } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return NUMBER_ERROR; } - } - - return negative ? (~i+1) : i; -} - -simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src) noexcept { - // - // Check for minus sign - // - bool negative = (*src == '-'); - src += negative; - - // - // Parse the integer part. - // - uint64_t i = 0; - const uint8_t *p = src; - p += parse_digit(*p, i); - bool leading_zero = (i == 0); - while (parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src || (leading_zero && p != src+1)) { return NUMBER_ERROR; } - - // - // Parse the decimal part. - // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely(*p == '.')) { - p++; - const uint8_t *start_decimal_digits = p; - if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits - p++; - while (parse_digit(*p, i)) { p++; } - exponent = -(p - start_decimal_digits); - - // Overflow check. More than 19 digits (minus the decimal) may be overflow. - overflow = p-src-1 > 19; - if (simdjson_unlikely(overflow && leading_zero)) { - // Skip leading 0.00000 and see if it still overflows - const uint8_t *start_digits = src + 2; - while (*start_digits == '0') { start_digits++; } - overflow = start_digits-src > 19; - } - } else { - overflow = p-src > 19; - } - - // - // Parse the exponent - // - if (*p == 'e' || *p == 'E') { - p++; - bool exp_neg = *p == '-'; - p += exp_neg || *p == '+'; - - uint64_t exp = 0; - const uint8_t *start_exp_digits = p; - while (parse_digit(*p, exp)) { p++; } - // no exp digits, or 20+ exp digits - if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } - - exponent += exp_neg ? 0-exp : exp; - } - - if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } - - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; - - // - // Assemble (or slow-parse) the float - // - double d; - if (simdjson_likely(!overflow)) { - if (compute_float_64(exponent, i, negative, d)) { return d; } - } - if (!parse_float_fallback(src-negative, &d)) { - return NUMBER_ERROR; - } - return d; -} -} //namespace {} -#endif // SIMDJSON_SKIPNUMBERPARSING - -} // namespace numberparsing -} // unnamed namespace -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson -/* end file include/simdjson/generic/numberparsing.h */ - -#endif // SIMDJSON_PPC64_NUMBERPARSING_H -/* end file include/simdjson/ppc64/numberparsing.h */ -/* begin file include/simdjson/generic/implementation_simdjson_result_base.h */ -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { - -// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair -// so we can avoid inlining errors -// TODO reconcile these! -/** - * The result of a simdjson operation that could fail. - * - * Gives the option of reading error codes, or throwing an exception by casting to the desired result. - * - * This is a base class for implementations that want to add functions to the result type for - * chaining. - * - * Override like: - * - * struct simdjson_result : public internal::implementation_simdjson_result_base { - * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} - * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} - * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} - * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} - * // Your extra methods here - * } - * - * Then any method returning simdjson_result will be chainable with your methods. - */ -template -struct implementation_simdjson_result_base { - - /** - * Create a new empty result with error = UNINITIALIZED. - */ - simdjson_really_inline implementation_simdjson_result_base() noexcept; - - /** - * Create a new error result. - */ - simdjson_really_inline implementation_simdjson_result_base(error_code error) noexcept; - - /** - * Create a new successful result. - */ - simdjson_really_inline implementation_simdjson_result_base(T &&value) noexcept; - - /** - * Create a new result with both things (use if you don't want to branch when creating the result). - */ - simdjson_really_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; - - /** - * Move a result. - */ - simdjson_really_inline implementation_simdjson_result_base(implementation_simdjson_result_base &&value) noexcept = default; - - /** - * Copy a result. - */ - simdjson_really_inline implementation_simdjson_result_base(const implementation_simdjson_result_base &value) = default; - - /** - * Create a new empty result with error = UNINITIALIZED. - */ - simdjson_really_inline ~implementation_simdjson_result_base() noexcept; - - /** - * Move the value and the error to the provided variables. - * - * @param value The variable to assign the value to. May not be set if there is an error. - * @param error The variable to assign the error to. Set to SUCCESS if there is no error. - */ - simdjson_really_inline void tie(T &value, error_code &error) && noexcept; - - /** - * Move the value to the provided variable. - * - * @param value The variable to assign the value to. May not be set if there is an error. - */ - simdjson_really_inline error_code get(T &value) && noexcept; - - /** - * The error. - */ - simdjson_really_inline error_code error() const noexcept; - -#if SIMDJSON_EXCEPTIONS - - /** - * Get the result value. - * - * @throw simdjson_error if there was an error. - */ - simdjson_really_inline T& value() & noexcept(false); - - /** - * Take the result value (move it). - * - * @throw simdjson_error if there was an error. - */ - simdjson_really_inline T&& value() && noexcept(false); - - /** - * Take the result value (move it). - * - * @throw simdjson_error if there was an error. - */ - simdjson_really_inline T&& take_value() && noexcept(false); - - /** - * Cast to the value (will throw on error). - * - * @throw simdjson_error if there was an error. - */ - simdjson_really_inline operator T&&() && noexcept(false); - -#endif // SIMDJSON_EXCEPTIONS - - T first; - error_code second; -}; // struct implementation_simdjson_result_base - -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson -/* end file include/simdjson/generic/implementation_simdjson_result_base.h */ -/* begin file include/simdjson/generic/ondemand.h */ -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { -/** - * A fast, simple, DOM-like interface that parses JSON as you use it. - * - * Designed for maximum speed and a lower memory profile. - */ -namespace ondemand { -} // namespace ondemand -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson - -/* begin file include/simdjson/generic/ondemand/logger.h */ -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { -namespace ondemand { - -class json_iterator; - -namespace logger { - -#if SIMDJSON_VERBOSE_LOGGING - static constexpr const bool LOG_ENABLED = true; -#else - static constexpr const bool LOG_ENABLED = false; -#endif - -static simdjson_really_inline void log_headers() noexcept; -static simdjson_really_inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta) noexcept; -static simdjson_really_inline void log_event(const json_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; -static simdjson_really_inline void log_value(const json_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; -static simdjson_really_inline void log_start_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; -static simdjson_really_inline void log_end_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; -static simdjson_really_inline void log_error(const json_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; - -} // namespace logger -} // namespace ondemand -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/logger.h */ -/* begin file include/simdjson/generic/ondemand/raw_json_string.h */ - -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { -namespace ondemand { - -class object; -class parser; - -/** - * A string escaped per JSON rules, terminated with quote ("). They are used to represent - * unescaped keys inside JSON documents. - * - * (In other words, a pointer to the beginning of a string, just after the start quote, inside a - * JSON file.) - * - * This class is deliberately simplistic and has little functionality. You can - * compare two raw_json_string instances, or compare a raw_json_string with a string_view, but - * that is pretty much all you can do. - * - * They originate typically from field instance which in turn represent key-value pairs from - * object instances. From a field instance, you get the raw_json_string instance by calling key(). - * You can, if you want a more usable string_view instance, call the unescaped_key() method - * on the field instance. - */ -class raw_json_string { -public: - /** - * Create a new invalid raw_json_string. - * - * Exists so you can declare a variable and later assign to it before use. - */ - simdjson_really_inline raw_json_string() noexcept = default; - - simdjson_really_inline raw_json_string(const raw_json_string &other) noexcept = default; - simdjson_really_inline raw_json_string &operator=(const raw_json_string &other) noexcept = default; - - /** - * Create a new invalid raw_json_string pointed at the given location in the JSON. - * - * The given location must be just *after* the beginning quote (") in the JSON file. - * - * It *must* be terminated by a ", and be a valid JSON string. - */ - simdjson_really_inline raw_json_string(const uint8_t * _buf) noexcept; - /** - * Get the raw pointer to the beginning of the string in the JSON (just after the "). - * - * It is possible for this function to return a null pointer if the instance - * has outlived its existence. - */ - simdjson_really_inline const char * raw() const noexcept; - -private: - /** - * This will set the inner pointer to zero, effectively making - * this instance unusable. - */ - simdjson_really_inline void consume() noexcept { buf = nullptr; } - - /** - * Checks whether the inner pointer is non-null and thus usable. - */ - simdjson_really_inline simdjson_warn_unused bool alive() const noexcept { return buf != nullptr; } - - /** - * Unescape this JSON string, replacing \\ with \, \n with newline, etc. - * - * ## IMPORTANT: string_view lifetime - * - * The string_view is only valid as long as the bytes in dst. - * - * @param dst A pointer to a buffer at least large enough to write this string as well as a \0. - * dst will be updated to the next unused location (just after the \0 written out at - * the end of this string). - * @return A string_view pointing at the unescaped string in dst - * @error STRING_ERROR if escapes are incorrect. - */ - simdjson_really_inline simdjson_warn_unused simdjson_result unescape(uint8_t *&dst) const noexcept; - /** - * Unescape this JSON string, replacing \\ with \, \n with newline, etc. - * - * ## IMPORTANT: string_view lifetime - * - * The string_view is only valid until the next parse() call on the parser. - * - * @param iter A json_iterator, which contains a buffer where the string will be written. - */ - simdjson_really_inline simdjson_warn_unused simdjson_result unescape(json_iterator &iter) const noexcept; - - const uint8_t * buf{}; - friend class object; - friend class field; - friend struct simdjson_result; -}; - -simdjson_unused simdjson_really_inline bool operator==(const raw_json_string &a, std::string_view b) noexcept; -simdjson_unused simdjson_really_inline bool operator==(std::string_view a, const raw_json_string &b) noexcept; -simdjson_unused simdjson_really_inline bool operator!=(const raw_json_string &a, std::string_view b) noexcept; -simdjson_unused simdjson_really_inline bool operator!=(std::string_view a, const raw_json_string &b) noexcept; - -simdjson_unused simdjson_really_inline std::ostream &operator<<(std::ostream &, const raw_json_string &) noexcept; - -} // namespace ondemand -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -template<> -struct simdjson_result : public SIMDJSON_IMPLEMENTATION::implementation_simdjson_result_base { -public: - simdjson_really_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string &&value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - - simdjson_really_inline simdjson_result() noexcept = default; - simdjson_really_inline simdjson_result(const simdjson_result &a) noexcept = default; - simdjson_really_inline ~simdjson_result() noexcept = default; ///< @private - - simdjson_really_inline simdjson_result raw() const noexcept; - simdjson_really_inline simdjson_warn_unused simdjson_result unescape(uint8_t *&dst) const noexcept; - simdjson_really_inline simdjson_warn_unused simdjson_result unescape(SIMDJSON_IMPLEMENTATION::ondemand::json_iterator &iter) const noexcept; -}; - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/raw_json_string.h */ -/* begin file include/simdjson/generic/ondemand/token_iterator.h */ -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { -namespace ondemand { - -/** - * Iterates through JSON tokens (`{` `}` `[` `]` `,` `:` `""` `123` `true` `false` `null`) - * detected by stage 1. - * - * @private This is not intended for external use. - */ -class token_iterator { -public: - /** - * Create a new invalid token_iterator. - * - * Exists so you can declare a variable and later assign to it before use. - */ - simdjson_really_inline token_iterator() noexcept = default; - - simdjson_really_inline token_iterator(token_iterator &&other) noexcept = default; - simdjson_really_inline token_iterator &operator=(token_iterator &&other) noexcept = default; - simdjson_really_inline token_iterator(const token_iterator &other) noexcept = delete; - simdjson_really_inline token_iterator &operator=(const token_iterator &other) noexcept = delete; - - /** - * Get the JSON text for a given token (relative). - * - * This is not null-terminated; it is a view into the JSON. - * - * @param delta The relative position of the token to retrieve. e.g. 0 = current token, - * 1 = next token, -1 = prev token. - * - * TODO consider a string_view, assuming the length will get stripped out by the optimizer when - * it isn't used ... - */ - simdjson_really_inline const uint8_t *peek(int32_t delta=0) const noexcept; - /** - * Get the maximum length of the JSON text for a given token. - * - * The length will include any whitespace at the end of the token. - * - * @param delta The relative position of the token to retrieve. e.g. 0 = current token, - * 1 = next token, -1 = prev token. - */ - simdjson_really_inline uint32_t peek_length(int32_t delta=0) const noexcept; - /** - * Advance to the next token (returning the current one). - * - * Does not check or update depth/expect_value. Caller is responsible for that. - */ - simdjson_really_inline const uint8_t *advance() noexcept; - - // NOTE: we don't support a full C++ iterator interface, because we expect people to make - // different calls to advance the iterator based on *their own* state. - - simdjson_really_inline bool operator==(const token_iterator &other) const noexcept; - simdjson_really_inline bool operator!=(const token_iterator &other) const noexcept; - simdjson_really_inline bool operator>(const token_iterator &other) const noexcept; - simdjson_really_inline bool operator>=(const token_iterator &other) const noexcept; - simdjson_really_inline bool operator<(const token_iterator &other) const noexcept; - simdjson_really_inline bool operator<=(const token_iterator &other) const noexcept; - -protected: - simdjson_really_inline token_iterator(const uint8_t *buf, uint32_t *index) noexcept; - - /** - * Get the index of the JSON text for a given token (relative). - * - * This is not null-terminated; it is a view into the JSON. - * - * @param delta The relative position of the token to retrieve. e.g. 0 = current token, - * 1 = next token, -1 = prev token. - * - */ - simdjson_really_inline uint32_t peek_index(int32_t delta=0) const noexcept; - - const uint8_t *buf{}; - const uint32_t *index{}; -}; - -} // namespace ondemand -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -template<> -struct simdjson_result : public SIMDJSON_IMPLEMENTATION::implementation_simdjson_result_base { -public: - simdjson_really_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::token_iterator &&value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - - simdjson_really_inline simdjson_result() noexcept = default; - simdjson_really_inline simdjson_result(simdjson_result &&a) noexcept = default; - simdjson_really_inline ~simdjson_result() noexcept = default; ///< @private -}; - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/token_iterator.h */ -/* begin file include/simdjson/generic/ondemand/json_iterator.h */ -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { -namespace ondemand { - -class document; -class object; -class array; -class value; -class raw_json_string; -class parser; -class json_iterator_ref; - -/** - * Iterates through JSON, with structure-sensitive algorithms. - * - * @private This is not intended for external use. - */ -class json_iterator : public token_iterator { -public: - simdjson_really_inline json_iterator() noexcept = default; - simdjson_really_inline json_iterator(json_iterator &&other) noexcept; - simdjson_really_inline json_iterator &operator=(json_iterator &&other) noexcept; -#ifdef SIMDJSON_ONDEMAND_SAFETY_RAILS - simdjson_really_inline ~json_iterator() noexcept; -#else - simdjson_really_inline ~json_iterator() noexcept = default; -#endif - simdjson_really_inline json_iterator(const json_iterator &other) noexcept = delete; - simdjson_really_inline json_iterator &operator=(const json_iterator &other) noexcept = delete; - - /** - * Check for an opening { and start an object iteration. - * - * @param json A pointer to the potential { - * @returns Whether the object had any fields (returns false for empty). - * @error INCORRECT_TYPE if there is no opening { - */ - simdjson_warn_unused simdjson_really_inline simdjson_result start_object(const uint8_t *json) noexcept; - /** - * Check for an opening { and start an object iteration. - * - * @returns Whether the object had any fields (returns false for empty). - * @error INCORRECT_TYPE if there is no opening { - */ - simdjson_warn_unused simdjson_really_inline simdjson_result start_object() noexcept; - - /** - * Start an object iteration after the user has already checked and moved past the {. - * - * Does not move the iterator. - * - * @returns Whether the object had any fields (returns false for empty). - */ - simdjson_warn_unused simdjson_really_inline bool started_object() noexcept; - - /** - * Moves to the next field in an object. - * - * Looks for , and }. If } is found, the object is finished and the iterator advances past it. - * Otherwise, it advances to the next value. - * - * @return whether there is another field in the object. - * @error TAPE_ERROR If there is a comma missing between fields. - */ - simdjson_warn_unused simdjson_really_inline simdjson_result has_next_field() noexcept; - - /** - * Get the current field's key. - */ - simdjson_warn_unused simdjson_really_inline simdjson_result field_key() noexcept; - - /** - * Pass the : in the field and move to its value. - */ - simdjson_warn_unused simdjson_really_inline error_code field_value() noexcept; - - /** - * Find the next field with the given key. - * - * Assumes you have called next_field() or otherwise matched the previous value. - * - * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to - * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may - * fail to match some keys with escapes (\u, \n, etc.). - */ - simdjson_warn_unused simdjson_really_inline simdjson_result find_field_raw(const char *key) noexcept; - - /** - * Check for an opening [ and start an array iteration. - * - * @param json A pointer to the potential [. - * @returns Whether the array had any elements (returns false for empty). - * @error INCORRECT_TYPE If there is no [. - */ - simdjson_warn_unused simdjson_really_inline simdjson_result start_array(const uint8_t *json) noexcept; - /** - * Check for an opening [ and start an array iteration. - * - * @returns Whether the array had any elements (returns false for empty). - * @error INCORRECT_TYPE If there is no [. - */ - simdjson_warn_unused simdjson_really_inline simdjson_result start_array() noexcept; - - /** - * Start an array iteration after the user has already checked and moved past the [. - * - * Does not move the iterator. - * - * @returns Whether the array had any elements (returns false for empty). - */ - simdjson_warn_unused simdjson_really_inline bool started_array() noexcept; - - /** - * Moves to the next element in an array. - * - * Looks for , and ]. If ] is found, the array is finished and the iterator advances past it. - * Otherwise, it advances to the next value. - * - * @return Whether there is another element in the array. - * @error TAPE_ERROR If there is a comma missing between elements. - */ - simdjson_warn_unused simdjson_really_inline simdjson_result has_next_element() noexcept; - - simdjson_warn_unused simdjson_really_inline simdjson_result parse_string(const uint8_t *json) noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result consume_string() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result parse_raw_json_string(const uint8_t *json) noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result consume_raw_json_string() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result parse_uint64(const uint8_t *json) noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result consume_uint64() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result parse_int64(const uint8_t *json) noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result consume_int64() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result parse_double(const uint8_t *json) noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result consume_double() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result parse_bool(const uint8_t *json) noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result consume_bool() noexcept; - simdjson_really_inline bool is_null(const uint8_t *json) noexcept; - simdjson_really_inline bool is_null() noexcept; - - simdjson_warn_unused simdjson_really_inline simdjson_result parse_root_uint64(const uint8_t *json) noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result consume_root_uint64() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result parse_root_int64(const uint8_t *json) noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result consume_root_int64() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result parse_root_double(const uint8_t *json) noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result consume_root_double() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result parse_root_bool(const uint8_t *json) noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result consume_root_bool() noexcept; - simdjson_really_inline bool root_is_null(const uint8_t *json) noexcept; - simdjson_really_inline bool root_is_null() noexcept; - - /** - * Skips a JSON value, whether it is a scalar, array or object. - */ - simdjson_warn_unused simdjson_really_inline error_code skip() noexcept; - - /** - * Skips to the end of a JSON object or array. - * - * @return true if this was the end of an array, false if it was the end of an object. - */ - simdjson_warn_unused simdjson_really_inline error_code skip_container() noexcept; - - /** - * Tell whether the iterator is still at the start - */ - simdjson_really_inline bool at_start() const noexcept; - - /** - * Tell whether the iterator is at the EOF mark - */ - simdjson_really_inline bool at_eof() const noexcept; - - /** - * Tell whether the iterator is live (has not been moved). - */ - simdjson_really_inline bool is_alive() const noexcept; - - /** - * Report an error, preventing further iteration. - * - * @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD. - * @param message An error message to report with the error. - */ - simdjson_really_inline error_code report_error(error_code error, const char *message) noexcept; - - /** - * Get the error (if any). - */ - simdjson_really_inline error_code error() const noexcept; - -protected: - ondemand::parser *parser{}; - /** - * Next free location in the string buffer. - * - * Used by raw_json_string::unescape() to have a place to unescape strings to. - */ - uint8_t *current_string_buf_loc{}; - /** - * JSON error, if there is one. - * - * INCORRECT_TYPE and NO_SUCH_FIELD are *not* stored here, ever. - * - * PERF NOTE: we *hope* this will be elided into control flow, as it is only used (a) in the first - * iteration of the loop, or (b) for the final iteration after a missing comma is found in ++. If - * this is not elided, we should make sure it's at least not using up a register. Failing that, - * we should store it in document so there's only one of them. - */ - error_code _error{}; -#ifdef SIMDJSON_ONDEMAND_SAFETY_RAILS - uint32_t active_lease_depth{}; -#endif - - simdjson_really_inline json_iterator(ondemand::parser *parser) noexcept; - template - simdjson_warn_unused simdjson_really_inline bool copy_to_buffer(const uint8_t *json, uint8_t (&buf)[N]) noexcept; - - simdjson_really_inline json_iterator_ref borrow() noexcept; - - friend class document; - friend class object; - friend class array; - friend class value; - friend class raw_json_string; - friend class parser; - friend class json_iterator_ref; - friend simdjson_really_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta) noexcept; -}; // json_iterator - -class json_iterator_ref { -public: - simdjson_really_inline json_iterator_ref() noexcept = default; - simdjson_really_inline json_iterator_ref(json_iterator_ref &&other) noexcept; - simdjson_really_inline json_iterator_ref &operator=(json_iterator_ref &&other) noexcept; - -#ifdef SIMDJSON_ONDEMAND_SAFETY_RAILS - simdjson_really_inline ~json_iterator_ref() noexcept; -#else - simdjson_really_inline ~json_iterator_ref() noexcept = default; -#endif // SIMDJSON_ONDEMAND_SAFETY_RAILS - - simdjson_really_inline json_iterator_ref(const json_iterator_ref &other) noexcept = delete; - simdjson_really_inline json_iterator_ref &operator=(const json_iterator_ref &other) noexcept = delete; - - simdjson_really_inline json_iterator_ref borrow() noexcept; - simdjson_really_inline void release() noexcept; - - simdjson_really_inline json_iterator *operator->() noexcept; - simdjson_really_inline json_iterator &operator*() noexcept; - simdjson_really_inline const json_iterator &operator*() const noexcept; - - simdjson_really_inline bool is_alive() const noexcept; - simdjson_really_inline bool is_active() const noexcept; - - simdjson_really_inline void assert_is_active() const noexcept; - simdjson_really_inline void assert_is_not_active() const noexcept; - -private: - json_iterator *iter{}; -#ifdef SIMDJSON_ONDEMAND_SAFETY_RAILS - uint32_t lease_depth{}; - simdjson_really_inline json_iterator_ref(json_iterator *iter, uint32_t lease_depth) noexcept; -#else - simdjson_really_inline json_iterator_ref(json_iterator *iter) noexcept; -#endif - - friend class json_iterator; -}; // class json_iterator_ref - -} // namespace ondemand -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -template<> -struct simdjson_result : public SIMDJSON_IMPLEMENTATION::implementation_simdjson_result_base { -public: - simdjson_really_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::json_iterator &&value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - - simdjson_really_inline simdjson_result() noexcept = default; - simdjson_really_inline simdjson_result(simdjson_result &&a) noexcept = default; - simdjson_really_inline ~simdjson_result() noexcept = default; ///< @private -}; - -template<> -struct simdjson_result : public SIMDJSON_IMPLEMENTATION::implementation_simdjson_result_base { -public: - simdjson_really_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::json_iterator_ref &&value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - - simdjson_really_inline simdjson_result() noexcept = default; - simdjson_really_inline simdjson_result(simdjson_result &&a) noexcept = default; - simdjson_really_inline ~simdjson_result() noexcept = default; ///< @private -}; - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/json_iterator.h */ -/* begin file include/simdjson/generic/ondemand/array_iterator.h */ - -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { -namespace ondemand { - -class array; -class value; -class document; - -/** - * A forward-only JSON array. - * - * This is an input_iterator, meaning: - * - It is forward-only - * - * must be called exactly once per element. - * - ++ must be called exactly once in between each * (*, ++, *, ++, * ...) - */ -template -class array_iterator { -public: - /** Create a new, invalid array iterator. */ - simdjson_really_inline array_iterator() noexcept = default; - simdjson_really_inline array_iterator(const array_iterator &a) noexcept = default; - simdjson_really_inline array_iterator &operator=(const array_iterator &a) noexcept = default; - - // - // Iterator interface - // - - /** - * Get the current element. - * - * Part of the std::iterator interface. - */ - simdjson_really_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. - /** - * Check if we are at the end of the JSON. - * - * Part of the std::iterator interface. - * - * @return true if there are no more elements in the JSON array. - */ - simdjson_really_inline bool operator==(const array_iterator &) noexcept; - /** - * Check if there are more elements in the JSON array. - * - * Part of the std::iterator interface. - * - * @return true if there are more elements in the JSON array. - */ - simdjson_really_inline bool operator!=(const array_iterator &) noexcept; - /** - * Move to the next element. - * - * Part of the std::iterator interface. - */ - simdjson_really_inline array_iterator &operator++() noexcept; - -private: - T *iter{}; - - simdjson_really_inline array_iterator(T &iter) noexcept; - - static simdjson_really_inline simdjson_result> start(T &iter, const uint8_t *json) noexcept; - - friend T; - friend class array; - friend class value; - friend struct simdjson_result>; -}; - -} // namespace ondemand -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -template -struct simdjson_result> : public SIMDJSON_IMPLEMENTATION::implementation_simdjson_result_base> { -public: - simdjson_really_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::array_iterator &&value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - - simdjson_really_inline simdjson_result() noexcept = default; - simdjson_really_inline simdjson_result(simdjson_result> &&a) noexcept = default; - simdjson_really_inline ~simdjson_result() noexcept = default; ///< @private - - // - // Iterator interface - // - - simdjson_really_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. - simdjson_really_inline bool operator==(const simdjson_result> &) noexcept; - simdjson_really_inline bool operator!=(const simdjson_result> &) noexcept; - simdjson_really_inline simdjson_result> &operator++() noexcept; -}; - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/array_iterator.h */ -/* begin file include/simdjson/generic/ondemand/object_iterator.h */ - -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { -namespace ondemand { - -class field; - -class object_iterator { -public: - /** - * Create a new invalid object_iterator. - * - * Exists so you can declare a variable and later assign to it before use. - */ - simdjson_really_inline object_iterator() noexcept = default; - - simdjson_really_inline object_iterator(const object_iterator &o) noexcept = default; - simdjson_really_inline object_iterator &operator=(const object_iterator &o) noexcept = default; - - // - // Iterator interface - // - - // Reads key and value, yielding them to the user. - // MUST ONLY BE CALLED ONCE PER ITERATION. - simdjson_really_inline simdjson_result operator*() noexcept; - // Assumes it's being compared with the end. true if depth < iter->depth. - simdjson_really_inline bool operator==(const object_iterator &) noexcept; - // Assumes it's being compared with the end. true if depth >= iter->depth. - simdjson_really_inline bool operator!=(const object_iterator &) noexcept; - // Checks for ']' and ',' - simdjson_really_inline object_iterator &operator++() noexcept; -private: - json_iterator_ref *iter{}; - simdjson_really_inline object_iterator(json_iterator_ref &iter) noexcept; - friend struct simdjson_result; - friend class object; -}; - -} // namespace ondemand -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -template<> -struct simdjson_result : public SIMDJSON_IMPLEMENTATION::implementation_simdjson_result_base { -public: - simdjson_really_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::object_iterator &&value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - - simdjson_really_inline simdjson_result() noexcept = default; - simdjson_really_inline simdjson_result(simdjson_result &&a) noexcept = default; - simdjson_really_inline ~simdjson_result() noexcept = default; ///< @private - - // - // Iterator interface - // - - // Reads key and value, yielding them to the user. - simdjson_really_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. - // Assumes it's being compared with the end. true if depth < iter->depth. - simdjson_really_inline bool operator==(const simdjson_result &) noexcept; - // Assumes it's being compared with the end. true if depth >= iter->depth. - simdjson_really_inline bool operator!=(const simdjson_result &) noexcept; - // Checks for ']' and ',' - simdjson_really_inline simdjson_result &operator++() noexcept; -}; - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/object_iterator.h */ -/* begin file include/simdjson/generic/ondemand/array.h */ - -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { -namespace ondemand { - -class value; -class document; - -/** - * A forward-only JSON array. - */ -class array { -public: - /** - * Create a new invalid array. - * - * Exists so you can declare a variable and later assign to it before use. - */ - simdjson_really_inline array() noexcept = default; - simdjson_really_inline array(array &&other) noexcept = default; - simdjson_really_inline array &operator=(array &&other) noexcept = default; - array(const array &) = delete; - array &operator=(const array &) = delete; - - /** - * Finishes iterating the array if it is not already fully iterated. - */ - simdjson_really_inline ~array() noexcept; - - /** - * Begin array iteration. - * - * Part of the std::iterable interface. - */ - simdjson_really_inline array_iterator begin() & noexcept; - /** - * Sentinel representing the end of the array. - * - * Part of the std::iterable interface. - */ - simdjson_really_inline array_iterator end() & noexcept; - -protected: - /** - * Begin array iteration. - * - * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the - * resulting array. - * @error INCORRECT_TYPE if the iterator is not at [. - */ - static simdjson_really_inline simdjson_result start(json_iterator_ref &&iter) noexcept; - /** - * Begin array iteration. - * - * This version of the method should be called after the initial [ has been verified, and is - * intended for use by switch statements that check the type of a value. - * - * @param iter The iterator. Must be after the initial [. Will be *moved* into the resulting array. - */ - static simdjson_really_inline array started(json_iterator_ref &&iter) noexcept; - - /** - * Create an array at the given Internal array creation. Call array::start() or array::started() instead of this. - * - * @param iter The iterator. Must either be at the start of the first element with iter.is_alive() - * == true, or past the [] with is_alive() == false if the array is empty. Will be *moved* - * into the resulting array. - */ - simdjson_really_inline array(json_iterator_ref &&iter) noexcept; - - // - // For array_iterator - // - simdjson_really_inline json_iterator &get_iterator() noexcept; - simdjson_really_inline json_iterator_ref borrow_iterator() noexcept; - simdjson_really_inline bool is_iterator_alive() const noexcept; - simdjson_really_inline void iteration_finished() noexcept; - - /** - * Iterator marking current position. - * - * iter.is_alive() == false indicates iteration is complete. - */ - json_iterator_ref iter{}; - - friend class value; - friend struct simdjson_result; - friend struct simdjson_result; - friend class array_iterator; -}; - -} // namespace ondemand -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -template<> -struct simdjson_result : public SIMDJSON_IMPLEMENTATION::implementation_simdjson_result_base { -public: - simdjson_really_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::array &&value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - - simdjson_really_inline simdjson_result() noexcept = default; - simdjson_really_inline simdjson_result(simdjson_result &&a) noexcept = default; - simdjson_really_inline ~simdjson_result() noexcept = default; ///< @private - - simdjson_really_inline simdjson_result> begin() & noexcept; - simdjson_really_inline simdjson_result> end() & noexcept; -}; - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/array.h */ -/* begin file include/simdjson/generic/ondemand/document.h */ - -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { -namespace ondemand { - -class parser; -class array; -class object; -class value; -class raw_json_string; -template class array_iterator; - -/** - * A JSON document iteration. - * - * Used by tokens to get text, and string buffer location. - * - * You must keep the document around during iteration. - */ -class document { -public: - simdjson_really_inline document(document &&other) noexcept = default; - simdjson_really_inline document &operator=(document &&other) noexcept = default; - - /** - * Create a new invalid document. - * - * Exists so you can declare a variable and later assign to it before use. - */ - simdjson_really_inline document() noexcept = default; - simdjson_really_inline document(const document &other) = delete; - simdjson_really_inline document &operator=(const document &other) = delete; - /** - * Finishes logging (if logging is enabled). - */ - simdjson_really_inline ~document() noexcept; - - /** - * Cast this JSON value to an array. - * - * @returns An object that can be used to iterate the array. - * @returns INCORRECT_TYPE If the JSON value is not an array. - */ - simdjson_really_inline simdjson_result get_array() & noexcept; - /** - * Cast this JSON value to an object. - * - * @returns An object that can be used to look up or iterate fields. - * @returns INCORRECT_TYPE If the JSON value is not an object. - */ - simdjson_really_inline simdjson_result get_object() & noexcept; - /** - * Cast this JSON value to an unsigned integer. - * - * @returns A signed 64-bit integer. - * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. - */ - simdjson_really_inline simdjson_result get_uint64() noexcept; - /** - * Cast this JSON value to a signed integer. - * - * @returns A signed 64-bit integer. - * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. - */ - simdjson_really_inline simdjson_result get_int64() noexcept; - /** - * Cast this JSON value to a double. - * - * @returns A double. - * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. - */ - simdjson_really_inline simdjson_result get_double() noexcept; - /** - * Cast this JSON value to a string. - * - * The string is guaranteed to be valid UTF-8. - * - * Equivalent to get(). - * - * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next - * time it parses a document or when it is destroyed. - * @returns INCORRECT_TYPE if the JSON value is not a string. - */ - simdjson_really_inline simdjson_result get_string() & noexcept; - /** - * Cast this JSON value to a raw_json_string. - * - * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). - * - * @returns A pointer to the raw JSON for the given string. - * @returns INCORRECT_TYPE if the JSON value is not a string. - */ - simdjson_really_inline simdjson_result get_raw_json_string() & noexcept; - /** - * Cast this JSON value to a bool. - * - * @returns A bool value. - * @returns INCORRECT_TYPE if the JSON value is not true or false. - */ - simdjson_really_inline simdjson_result get_bool() noexcept; - /** - * Checks if this JSON value is null. - * - * @returns Whether the value is null. - */ - simdjson_really_inline bool is_null() noexcept; - - /** - * Get this value as the given type. - * - * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool - * - * @returns A value of the given type, parsed from the JSON. - * @returns INCORRECT_TYPE If the JSON value is not the given type. - */ - template simdjson_really_inline simdjson_result get() & noexcept; - /** @overload template simdjson_result get() & noexcept */ - template simdjson_really_inline simdjson_result get() && noexcept; - - /** - * Get this value as the given type. - * - * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool - * - * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. - * @returns INCORRECT_TYPE If the JSON value is not an object. - * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. - */ - template simdjson_really_inline error_code get(T &out) & noexcept; - /** @overload template error_code get(T &out) & noexcept */ - template simdjson_really_inline error_code get(T &out) && noexcept; - -#if SIMDJSON_EXCEPTIONS - /** - * Cast this JSON value to an array. - * - * @returns An object that can be used to iterate the array. - * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. - */ - simdjson_really_inline operator array() & noexcept(false); - /** - * Cast this JSON value to an object. - * - * @returns An object that can be used to look up or iterate fields. - * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. - */ - simdjson_really_inline operator object() & noexcept(false); - /** - * Cast this JSON value to an unsigned integer. - * - * @returns A signed 64-bit integer. - * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. - */ - simdjson_really_inline operator uint64_t() noexcept(false); - /** - * Cast this JSON value to a signed integer. - * - * @returns A signed 64-bit integer. - * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. - */ - simdjson_really_inline operator int64_t() noexcept(false); - /** - * Cast this JSON value to a double. - * - * @returns A double. - * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. - */ - simdjson_really_inline operator double() noexcept(false); - /** - * Cast this JSON value to a string. - * - * The string is guaranteed to be valid UTF-8. - * - * Equivalent to get(). - * - * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next - * time it parses a document or when it is destroyed. - * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. - */ - simdjson_really_inline operator std::string_view() & noexcept(false); - /** - * Cast this JSON value to a raw_json_string. - * - * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). - * - * @returns A pointer to the raw JSON for the given string. - * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. - */ - simdjson_really_inline operator raw_json_string() & noexcept(false); - /** - * Cast this JSON value to a bool. - * - * @returns A bool value. - * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. - */ - simdjson_really_inline operator bool() noexcept(false); -#endif - - /** - * Begin array iteration. - * - * Part of the std::iterable interface. - */ - simdjson_really_inline simdjson_result> begin() & noexcept; - /** - * Sentinel representing the end of the array. - * - * Part of the std::iterable interface. - */ - simdjson_really_inline simdjson_result> end() & noexcept; - - /** - * Look up a field by name on an object. - * - * This method may only be called once on a given value. If you want to look up multiple fields, - * you must first get the object using value.get_object() or object(value). - * - * @param key The key to look up. - * @returns INCORRECT_TYPE If the JSON value is not an array. - */ - simdjson_really_inline simdjson_result operator[](std::string_view key) & noexcept; - /** - * Look up a field by name on an object. - * - * This method may only be called once on a given value. If you want to look up multiple fields, - * you must first get the object using value.get_object() or object(value). - * - * @param key The key to look up. - * @returns INCORRECT_TYPE If the JSON value is not an array. - */ - simdjson_really_inline simdjson_result operator[](const char *key) & noexcept; - -protected: - simdjson_really_inline document(ondemand::json_iterator &&iter, const uint8_t *json) noexcept; - simdjson_really_inline const uint8_t *text(uint32_t idx) const noexcept; - - simdjson_really_inline value as_value() noexcept; - static simdjson_really_inline document start(ondemand::json_iterator &&iter) noexcept; - /** - * Set json to null if the result is successful. - * - * Convenience function for value-getters. - */ - template - simdjson_result consume_if_success(simdjson_result &&result) noexcept; - - simdjson_really_inline void assert_at_start() const noexcept; - - // - // For array_iterator - // - simdjson_really_inline json_iterator &get_iterator() noexcept; - simdjson_really_inline json_iterator_ref borrow_iterator() noexcept; - simdjson_really_inline bool is_iterator_alive() const noexcept; - simdjson_really_inline void iteration_finished() noexcept; - - // - // Fields - // - json_iterator iter{}; ///< Current position in the document - const uint8_t *json{}; ///< JSON for the value in the document (nullptr if value has been consumed) - - friend struct simdjson_result; - friend class array_iterator; - friend class value; - friend class ondemand::parser; - friend class object; - friend class array; - friend class field; - friend class token; -}; - -} // namespace ondemand -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -template<> -struct simdjson_result : public SIMDJSON_IMPLEMENTATION::implementation_simdjson_result_base { -public: - simdjson_really_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::document &&value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - - simdjson_really_inline simdjson_result() noexcept = default; - simdjson_really_inline simdjson_result(simdjson_result &&a) noexcept = default; - simdjson_really_inline ~simdjson_result() noexcept = default; ///< @private - - simdjson_really_inline simdjson_result get_array() & noexcept; - simdjson_really_inline simdjson_result get_object() & noexcept; - simdjson_really_inline simdjson_result get_uint64() noexcept; - simdjson_really_inline simdjson_result get_int64() noexcept; - simdjson_really_inline simdjson_result get_double() noexcept; - simdjson_really_inline simdjson_result get_string() & noexcept; - simdjson_really_inline simdjson_result get_raw_json_string() & noexcept; - simdjson_really_inline simdjson_result get_bool() noexcept; - simdjson_really_inline bool is_null() noexcept; - - template simdjson_really_inline simdjson_result get() & noexcept; - template simdjson_really_inline simdjson_result get() && noexcept; - - template simdjson_really_inline error_code get(T &out) & noexcept; - template simdjson_really_inline error_code get(T &out) && noexcept; - -#if SIMDJSON_EXCEPTIONS - simdjson_really_inline operator SIMDJSON_IMPLEMENTATION::ondemand::array() & noexcept(false); - simdjson_really_inline operator SIMDJSON_IMPLEMENTATION::ondemand::object() & noexcept(false); - simdjson_really_inline operator uint64_t() noexcept(false); - simdjson_really_inline operator int64_t() noexcept(false); - simdjson_really_inline operator double() noexcept(false); - simdjson_really_inline operator std::string_view() & noexcept(false); - simdjson_really_inline operator SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string() & noexcept(false); - simdjson_really_inline operator bool() noexcept(false); -#endif - - simdjson_really_inline simdjson_result> begin() & noexcept; - simdjson_really_inline simdjson_result> end() & noexcept; - simdjson_really_inline simdjson_result operator[](std::string_view key) & noexcept; - simdjson_really_inline simdjson_result operator[](const char *key) & noexcept; -}; - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/document.h */ -/* begin file include/simdjson/generic/ondemand/value.h */ - -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { -namespace ondemand { - -class array; -class document; -class field; -class object; -class raw_json_string; - -/** - * An ephemeral JSON value returned during iteration. - */ -class value { -public: - /** - * Create a new invalid value. - * - * Exists so you can declare a variable and later assign to it before use. - */ - simdjson_really_inline value() noexcept = default; - - simdjson_really_inline value(value &&other) noexcept = default; - simdjson_really_inline value &operator=(value && other) noexcept = default; - simdjson_really_inline value(const value &) noexcept = delete; - simdjson_really_inline value &operator=(const value &) noexcept = delete; - - /** - * Skips the value if the value was not successfully parsed or used. - */ - simdjson_really_inline ~value() noexcept; - - /** - * Get this value as the given type. - * - * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool - * - * @returns A value of the given type, parsed from the JSON. - * @returns INCORRECT_TYPE If the JSON value is not the given type. - */ - template simdjson_really_inline simdjson_result get() & noexcept; - /** @overload template simdjson_result get() & noexcept */ - template simdjson_really_inline simdjson_result get() && noexcept; - - /** - * Get this value as the given type. - * - * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool - * - * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. - * @returns INCORRECT_TYPE If the JSON value is not an object. - * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. - */ - template simdjson_really_inline error_code get(T &out) & noexcept; - /** @overload template error_code get(T &out) & noexcept */ - template simdjson_really_inline error_code get(T &out) && noexcept; - - /** - * Cast this JSON value to an array. - * - * @returns An object that can be used to iterate the array. - * @returns INCORRECT_TYPE If the JSON value is not an array. - */ - simdjson_really_inline simdjson_result get_array() noexcept; - - /** - * Cast this JSON value to an object. - * - * @returns An object that can be used to look up or iterate fields. - * @returns INCORRECT_TYPE If the JSON value is not an object. - */ - simdjson_really_inline simdjson_result get_object() noexcept; - - // PERF NOTE: get_XXX() methods generally have both && and & variants because performance is demonstrably better on clang. - // Specifically, in typical cases where you use a temporary value (like doc["x"].get_double()) the && version is faster - // because the & version has to branch to check whether the parse failed or not before deciding whether the value was consumed. - - /** - * Cast this JSON value to an unsigned integer. - * - * @returns A signed 64-bit integer. - * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. - */ - simdjson_really_inline simdjson_result get_uint64() && noexcept; - /** @overload simdjson_really_inline simdjson_result get_uint64() && noexcept */ - simdjson_really_inline simdjson_result get_uint64() & noexcept; - - /** - * Cast this JSON value to a signed integer. - * - * @returns A signed 64-bit integer. - * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. - */ - simdjson_really_inline simdjson_result get_int64() && noexcept; - /** @overload simdjson_really_inline simdjson_result get_int64() && noexcept */ - simdjson_really_inline simdjson_result get_int64() & noexcept; - - /** - * Cast this JSON value to a double. - * - * @returns A double. - * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. - */ - simdjson_really_inline simdjson_result get_double() && noexcept; - /** @overload simdjson_really_inline simdjson_result get_double() && noexcept */ - simdjson_really_inline simdjson_result get_double() & noexcept; - - /** - * Cast this JSON value to a string. - * - * The string is guaranteed to be valid UTF-8. - * - * Equivalent to get(). - * - * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next - * time it parses a document or when it is destroyed. - * @returns INCORRECT_TYPE if the JSON value is not a string. - */ - simdjson_really_inline simdjson_result get_string() && noexcept; - /** @overload simdjson_really_inline simdjson_result get_string() && noexcept */ - simdjson_really_inline simdjson_result get_string() & noexcept; - - /** - * Cast this JSON value to a raw_json_string. - * - * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). - * - * @returns A pointer to the raw JSON for the given string. - * @returns INCORRECT_TYPE if the JSON value is not a string. - */ - simdjson_really_inline simdjson_result get_raw_json_string() && noexcept; - /** @overload simdjson_really_inline simdjson_result get_raw_json_string() && noexcept */ - simdjson_really_inline simdjson_result get_raw_json_string() & noexcept; - - /** - * Cast this JSON value to a bool. - * - * @returns A bool value. - * @returns INCORRECT_TYPE if the JSON value is not true or false. - */ - simdjson_really_inline simdjson_result get_bool() && noexcept; - /** @overload simdjson_really_inline simdjson_result get_bool() && noexcept */ - simdjson_really_inline simdjson_result get_bool() & noexcept; - - /** - * Checks if this JSON value is null. - * - * @returns Whether the value is null. - */ - simdjson_really_inline bool is_null() && noexcept; - /** @overload simdjson_really_inline bool is_null() && noexcept */ - simdjson_really_inline bool is_null() & noexcept; - -#if SIMDJSON_EXCEPTIONS - /** - * Cast this JSON value to an array. - * - * @returns An object that can be used to iterate the array. - * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. - */ - simdjson_really_inline operator array() noexcept(false); - /** - * Cast this JSON value to an object. - * - * @returns An object that can be used to look up or iterate fields. - * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. - */ - simdjson_really_inline operator object() noexcept(false); - /** - * Cast this JSON value to an unsigned integer. - * - * @returns A signed 64-bit integer. - * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. - */ - simdjson_really_inline operator uint64_t() && noexcept(false); - /** @overload simdjson_really_inline operator uint64_t() && noexcept(false); */ - simdjson_really_inline operator uint64_t() & noexcept(false); - /** - * Cast this JSON value to a signed integer. - * - * @returns A signed 64-bit integer. - * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. - */ - simdjson_really_inline operator int64_t() && noexcept(false); - /** @overload simdjson_really_inline operator int64_t() && noexcept(false); */ - simdjson_really_inline operator int64_t() & noexcept(false); - /** - * Cast this JSON value to a double. - * - * @returns A double. - * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. - */ - simdjson_really_inline operator double() && noexcept(false); - /** @overload simdjson_really_inline operator double() && noexcept(false); */ - simdjson_really_inline operator double() & noexcept(false); - /** - * Cast this JSON value to a string. - * - * The string is guaranteed to be valid UTF-8. - * - * Equivalent to get(). - * - * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next - * time it parses a document or when it is destroyed. - * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. - */ - simdjson_really_inline operator std::string_view() && noexcept(false); - /** @overload simdjson_really_inline operator std::string_view() && noexcept(false); */ - simdjson_really_inline operator std::string_view() & noexcept(false); - /** - * Cast this JSON value to a raw_json_string. - * - * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). - * - * @returns A pointer to the raw JSON for the given string. - * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. - */ - simdjson_really_inline operator raw_json_string() && noexcept(false); - /** @overload simdjson_really_inline operator raw_json_string() && noexcept(false); */ - simdjson_really_inline operator raw_json_string() & noexcept(false); - /** - * Cast this JSON value to a bool. - * - * @returns A bool value. - * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. - */ - simdjson_really_inline operator bool() && noexcept(false); - /** @overload simdjson_really_inline operator bool() && noexcept(false); */ - simdjson_really_inline operator bool() & noexcept(false); -#endif - - /** - * Begin array iteration. - * - * Part of the std::iterable interface. - * - * @returns INCORRECT_TYPE If the JSON value is not an array. - */ - simdjson_really_inline simdjson_result> begin() & noexcept; - /** - * Sentinel representing the end of the array. - * - * Part of the std::iterable interface. - */ - simdjson_really_inline simdjson_result> end() & noexcept; - -protected: - /** - * Create a value. - * - * Use value::read() instead of this. - */ - simdjson_really_inline value(json_iterator_ref &&iter, const uint8_t *json) noexcept; - - /** - * Read a value. - * - * If the value is an array or object, only the opening brace will be consumed. - * - * @param doc The document containing the value. Iterator must be at the value start position. - */ - static simdjson_really_inline value start(json_iterator_ref &&iter) noexcept; - - /** - * Skip this value, allowing iteration to continue. - */ - simdjson_really_inline void skip() noexcept; - - simdjson_really_inline void log_value(const char *type) const noexcept; - simdjson_really_inline void log_error(const char *message) const noexcept; - - // - // For array_iterator - // - simdjson_really_inline json_iterator &get_iterator() noexcept; - simdjson_really_inline json_iterator_ref borrow_iterator() noexcept; - simdjson_really_inline bool is_iterator_alive() const noexcept; - simdjson_really_inline void iteration_finished() noexcept; - simdjson_really_inline const uint8_t *consume() noexcept; - template - simdjson_really_inline simdjson_result consume_if_success(simdjson_result &&result) noexcept; - - json_iterator_ref iter{}; - const uint8_t *json{}; // The JSON text of the value - - friend class document; - template friend class array_iterator; - friend class field; - friend class object; - friend struct simdjson_result; - friend struct simdjson_result; - friend struct simdjson_result; -}; - -} // namespace ondemand -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -template<> -struct simdjson_result : public SIMDJSON_IMPLEMENTATION::implementation_simdjson_result_base { -public: - simdjson_really_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::value &&value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - - simdjson_really_inline simdjson_result() noexcept = default; - simdjson_really_inline simdjson_result(simdjson_result &&a) noexcept = default; - simdjson_really_inline ~simdjson_result() noexcept = default; ///< @private - - simdjson_really_inline simdjson_result get_array() noexcept; - - simdjson_really_inline simdjson_result get_object() noexcept; - - simdjson_really_inline simdjson_result get_uint64() && noexcept; - simdjson_really_inline simdjson_result get_uint64() & noexcept; - - simdjson_really_inline simdjson_result get_int64() && noexcept; - simdjson_really_inline simdjson_result get_int64() & noexcept; - - simdjson_really_inline simdjson_result get_double() && noexcept; - simdjson_really_inline simdjson_result get_double() & noexcept; - - simdjson_really_inline simdjson_result get_string() && noexcept; - simdjson_really_inline simdjson_result get_string() & noexcept; - - simdjson_really_inline simdjson_result get_raw_json_string() && noexcept; - simdjson_really_inline simdjson_result get_raw_json_string() & noexcept; - - simdjson_really_inline simdjson_result get_bool() && noexcept; - simdjson_really_inline simdjson_result get_bool() & noexcept; - - simdjson_really_inline bool is_null() && noexcept; - simdjson_really_inline bool is_null() & noexcept; - - template simdjson_really_inline simdjson_result get() & noexcept; - template simdjson_really_inline simdjson_result get() && noexcept; - - template simdjson_really_inline error_code get(T &out) & noexcept; - template simdjson_really_inline error_code get(T &out) && noexcept; - -#if SIMDJSON_EXCEPTIONS - simdjson_really_inline operator SIMDJSON_IMPLEMENTATION::ondemand::array() noexcept(false); - simdjson_really_inline operator SIMDJSON_IMPLEMENTATION::ondemand::object() noexcept(false); - simdjson_really_inline operator uint64_t() && noexcept(false); - simdjson_really_inline operator uint64_t() & noexcept(false); - simdjson_really_inline operator int64_t() && noexcept(false); - simdjson_really_inline operator int64_t() & noexcept(false); - simdjson_really_inline operator double() && noexcept(false); - simdjson_really_inline operator double() & noexcept(false); - simdjson_really_inline operator std::string_view() && noexcept(false); - simdjson_really_inline operator std::string_view() & noexcept(false); - simdjson_really_inline operator SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string() && noexcept(false); - simdjson_really_inline operator SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string() & noexcept(false); - simdjson_really_inline operator bool() && noexcept(false); - simdjson_really_inline operator bool() & noexcept(false); -#endif - - simdjson_really_inline simdjson_result> begin() & noexcept; - simdjson_really_inline simdjson_result> end() & noexcept; -}; - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/value.h */ -/* begin file include/simdjson/generic/ondemand/field.h */ - -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { -namespace ondemand { - -/** - * A JSON field (key/value pair) in an object. - * - * Returned from object iteration. - * - * Extends from std::pair so you can use C++ algorithms that rely on pairs. - */ -class field : public std::pair { -public: - /** - * Create a new invalid field. - * - * Exists so you can declare a variable and later assign to it before use. - */ - simdjson_really_inline field() noexcept; - - simdjson_really_inline field(field &&other) noexcept = default; - simdjson_really_inline field &operator=(field &&other) noexcept = default; - simdjson_really_inline field(const field &other) noexcept = delete; - simdjson_really_inline field &operator=(const field &other) noexcept = delete; - - /** - * Get the key as a string_view (for higher speed, consider raw_key). - * We deliberately use a more cumbersome name (unescaped_key) to force users - * to think twice about using it. - * - * This consumes the key: once you have called unescaped_key(), you cannot - * call it again nor can you call key(). - */ - simdjson_really_inline simdjson_warn_unused simdjson_result unescaped_key() noexcept; - /** - * Get the key as a raw_json_string: this is fast and allows straight comparisons. - * We want this to be the default for most users. - */ - simdjson_really_inline raw_json_string key() const noexcept; - /** - * Get the field value. - */ - simdjson_really_inline ondemand::value &value() & noexcept; - /** - * @overload ondemand::value &ondemand::value() & noexcept - */ - simdjson_really_inline ondemand::value value() && noexcept; - -protected: - simdjson_really_inline field(raw_json_string key, ondemand::value &&value) noexcept; - static simdjson_really_inline simdjson_result start(json_iterator_ref &iter) noexcept; - static simdjson_really_inline simdjson_result start(json_iterator_ref &&iter, raw_json_string key) noexcept; - friend struct simdjson_result; - friend class object_iterator; -}; - -} // namespace ondemand -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -template<> -struct simdjson_result : public SIMDJSON_IMPLEMENTATION::implementation_simdjson_result_base { -public: - simdjson_really_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::field &&value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - - simdjson_really_inline simdjson_result() noexcept = default; - simdjson_really_inline simdjson_result(simdjson_result &&a) noexcept = default; - simdjson_really_inline ~simdjson_result() noexcept = default; ///< @private - - simdjson_really_inline simdjson_result unescaped_key() noexcept; - simdjson_really_inline simdjson_result key() noexcept; - simdjson_really_inline simdjson_result value() noexcept; -}; - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/field.h */ -/* begin file include/simdjson/generic/ondemand/object.h */ - -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { -namespace ondemand { - -/** - * A forward-only JSON object field iterator. - */ -class object { -public: - /** - * Create a new invalid object. - * - * Exists so you can declare a variable and later assign to it before use. - */ - simdjson_really_inline object() noexcept = default; - - simdjson_really_inline object(object &&other) noexcept = default; - simdjson_really_inline object &operator=(object &&other) noexcept = default; - object(const object &) = delete; - object &operator=(const object &) = delete; - - simdjson_really_inline ~object() noexcept; - - simdjson_really_inline object_iterator begin() noexcept; - simdjson_really_inline object_iterator end() noexcept; - simdjson_really_inline simdjson_result operator[](const std::string_view key) & noexcept; - simdjson_really_inline simdjson_result operator[](const std::string_view key) && noexcept; - -protected: - /** - * Begin object iteration. - * - * @param doc The document containing the object. The iterator must be just after the opening `{`. - * @param error If this is not SUCCESS, creates an error chained object. - */ - static simdjson_really_inline simdjson_result start(json_iterator_ref &&iter) noexcept; - static simdjson_really_inline object started(json_iterator_ref &&iter) noexcept; - - /** - * Internal object creation. Call object::begin(doc) instead of this. - * - * @param doc The document containing the object. doc->depth must already be incremented to - * reflect the object's depth. The iterator must be just after the opening `{`. - */ - simdjson_really_inline object(json_iterator_ref &&_iter) noexcept; - - simdjson_really_inline error_code find_field(const std::string_view key) noexcept; - - /** - * Document containing the primary iterator. - * - * PERF NOTE: expected to be elided in favor of the parent document: this is set when the object - * is first used, and never changes afterwards. - */ - json_iterator_ref iter{}; - /** - * Whether we are at the start. - * - * PERF NOTE: this should be elided into inline control flow: it is only used for the first [] - * or * call, and SSA optimizers commonly do first-iteration loop optimization. - */ - bool at_start{}; - - friend class value; - friend class document; - friend struct simdjson_result; -}; - -} // namespace ondemand -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -template<> -struct simdjson_result : public SIMDJSON_IMPLEMENTATION::implementation_simdjson_result_base { -public: - simdjson_really_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::object &&value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - - simdjson_really_inline simdjson_result() noexcept = default; - simdjson_really_inline simdjson_result(simdjson_result &&a) noexcept = default; - simdjson_really_inline ~simdjson_result() noexcept = default; ///< @private - - simdjson_really_inline simdjson_result begin() noexcept; - simdjson_really_inline simdjson_result end() noexcept; - simdjson_really_inline simdjson_result operator[](std::string_view key) & noexcept; - simdjson_really_inline simdjson_result operator[](std::string_view key) && noexcept; -}; - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/object.h */ -/* begin file include/simdjson/generic/ondemand/parser.h */ - -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { -namespace ondemand { - -class array; -class object; -class value; -class raw_json_string; - -/** - * A JSON fragment iterator. - * - * This holds the actual iterator as well as the buffer for writing strings. - */ -class parser { -public: - /** - * Create a JSON parser. - * - * The new parser will have zero capacity. - */ - inline parser() noexcept = default; - - inline parser(parser &&other) noexcept = default; - simdjson_really_inline parser(const parser &other) = delete; - simdjson_really_inline parser &operator=(const parser &other) = delete; - - /** Deallocate the JSON parser. */ - inline ~parser() noexcept = default; - - /** - * Start iterating an on-demand JSON document. - * - * ondemand::parser parser; - * document doc = parser.iterate(json); - * - * ### IMPORTANT: Buffer Lifetime - * - * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as - * long as the document iteration. - * - * ### IMPORTANT: Document Lifetime - * - * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during - * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before - * you call parse() again or destroy the parser. - * - * ### REQUIRED: Buffer Padding - * - * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what - * those bytes are initialized to, as long as they are allocated. - * - * @param json The JSON to parse. - * - * @return The document, or an error: - * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory - * allocation fails. - * - EMPTY if the document is all whitespace. - * - UTF8_ERROR if the document is not valid UTF-8. - * - UNESCAPED_CHARS if a string contains control characters that must be escaped - * - UNCLOSED_STRING if there is an unclosed string in the document. - */ - simdjson_warn_unused simdjson_result iterate(const padded_string &json) & noexcept; - simdjson_warn_unused simdjson_result iterate(const std::string &json) & noexcept = delete; - /** - * @private - * - * Start iterating an on-demand JSON document. - * - * ondemand::parser parser; - * json_iterator doc = parser.iterate(json); - * - * ### IMPORTANT: Buffer Lifetime - * - * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as - * long as the document iteration. - * - * ### IMPORTANT: Document Lifetime - * - * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during - * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before - * you call parse() again or destroy the parser. - * - * ### REQUIRED: Buffer Padding - * - * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what - * those bytes are initialized to, as long as they are allocated. - * - * @param json The JSON to parse. - * - * @return The iterator, or an error: - * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory - * allocation fails. - * - EMPTY if the document is all whitespace. - * - UTF8_ERROR if the document is not valid UTF-8. - * - UNESCAPED_CHARS if a string contains control characters that must be escaped - * - UNCLOSED_STRING if there is an unclosed string in the document. - */ - simdjson_warn_unused simdjson_result iterate_raw(const padded_string &json) & noexcept; - -private: - dom_parser_implementation dom_parser{}; - size_t _capacity{0}; - size_t _max_depth{0}; - std::unique_ptr string_buf{}; - - /** - * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length - * and `max_depth` depth. - * - * @param capacity The new capacity. - * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. - * @return The error, if there is one. - */ - simdjson_warn_unused error_code allocate(size_t capacity, size_t max_depth=DEFAULT_MAX_DEPTH) noexcept; - - friend class json_iterator; -}; - -} // namespace ondemand -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -template<> -struct simdjson_result : public SIMDJSON_IMPLEMENTATION::implementation_simdjson_result_base { -public: - simdjson_really_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::parser &&value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - - simdjson_really_inline simdjson_result() noexcept = default; - simdjson_really_inline simdjson_result(simdjson_result &&a) noexcept = default; - simdjson_really_inline ~simdjson_result() noexcept = default; ///< @private -}; - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/parser.h */ -/* end file include/simdjson/generic/ondemand.h */ - -// Inline definitions -/* begin file include/simdjson/generic/implementation_simdjson_result_base-inl.h */ -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { - -// -// internal::implementation_simdjson_result_base inline implementation -// - -/** - * Create a new empty result with error = UNINITIALIZED. - */ -template -simdjson_really_inline implementation_simdjson_result_base::~implementation_simdjson_result_base() noexcept { -} - -template -simdjson_really_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { - // on the clang compiler that comes with current macOS (Apple clang version 11.0.0), - // tie(width, error) = size["w"].get(); - // fails with "error: no viable overloaded '='"" - error = this->second; - if (!error) { - value = std::forward>(*this).first; - } -} - -template -simdjson_warn_unused simdjson_really_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { - error_code error; - std::forward>(*this).tie(value, error); - return error; -} - -template -simdjson_really_inline error_code implementation_simdjson_result_base::error() const noexcept { - return this->second; -} - -#if SIMDJSON_EXCEPTIONS - -template -simdjson_really_inline T& implementation_simdjson_result_base::value() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return this->first; -} - -template -simdjson_really_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { - return std::forward>(*this).take_value(); -} - -template -simdjson_really_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return std::forward(this->first); -} - -template -simdjson_really_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { - return std::forward>(*this).take_value(); -} - -#endif // SIMDJSON_EXCEPTIONS - -template -simdjson_really_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept - : first{std::forward(value)}, second{error} {} -template -simdjson_really_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept - : implementation_simdjson_result_base(T{}, error) {} -template -simdjson_really_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept - : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} -template -simdjson_really_inline implementation_simdjson_result_base::implementation_simdjson_result_base() noexcept - : implementation_simdjson_result_base(T{}, UNINITIALIZED) {} - -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson -/* end file include/simdjson/generic/implementation_simdjson_result_base-inl.h */ -/* begin file include/simdjson/generic/ondemand-inl.h */ -/* begin file include/simdjson/generic/ondemand/logger-inl.h */ -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { -namespace ondemand { -namespace logger { - -static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; -static constexpr const int LOG_EVENT_LEN = 20; -static constexpr const int LOG_BUFFER_LEN = 30; -static constexpr const int LOG_SMALL_BUFFER_LEN = 10; -static int log_depth = 0; // Not threadsafe. Log only. - -// Helper to turn unprintable or newline characters into spaces -static simdjson_really_inline char printable_char(char c) { - if (c >= 0x20) { - return c; - } else { - return ' '; - } -} - -simdjson_really_inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { - log_line(iter, "", type, detail, delta, depth_delta); -} -simdjson_really_inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { - log_line(iter, "", type, detail, delta, depth_delta); -} -simdjson_really_inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { - log_line(iter, "+", type, "", delta, depth_delta); - log_depth++; -} -simdjson_really_inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { - log_depth--; - log_line(iter, "-", type, "", delta, depth_delta); -} -simdjson_really_inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { - log_line(iter, "ERROR: ", error, detail, delta, depth_delta); -} - -simdjson_really_inline void log_headers() noexcept { - log_depth = 0; - if (LOG_ENABLED) { - printf("\n"); - printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#"); - printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES); - fflush(stdout); - } -} - -simdjson_really_inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta) noexcept { - if (LOG_ENABLED) { - const int indent = (log_depth+depth_delta)*2; - printf("| %*s%s%-*s ", - indent, "", - title_prefix, - LOG_EVENT_LEN - indent - int(strlen(title_prefix)), title - ); - { - // Print the current structural. - printf("| "); - for (int i=0;i raw_json_string::unescape(uint8_t *&dst) const noexcept { - uint8_t *end = stringparsing::parse_string(buf, dst); - if (!end) { return STRING_ERROR; } - std::string_view result((const char *)dst, end-dst); - dst = end; - return result; -} - -simdjson_really_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator &iter) const noexcept { - return unescape(iter.current_string_buf_loc); -} - -simdjson_unused simdjson_really_inline bool operator==(const raw_json_string &a, std::string_view b) noexcept { - return !memcmp(a.raw(), b.data(), b.size()); -} - -simdjson_unused simdjson_really_inline bool operator==(std::string_view a, const raw_json_string &b) noexcept { - return b == a; -} - -simdjson_unused simdjson_really_inline bool operator!=(const raw_json_string &a, std::string_view b) noexcept { - return !(a == b); -} - -simdjson_unused simdjson_really_inline bool operator!=(std::string_view a, const raw_json_string &b) noexcept { - return !(a == b); -} - -simdjson_unused simdjson_really_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { - bool in_escape = false; - const char *s = str.raw(); - while (true) { - switch (*s) { - case '\\': in_escape = !in_escape; break; - case '"': if (in_escape) { in_escape = false; } else { return out; } break; - default: if (in_escape) { in_escape = false; } - } - out << *s; - s++; - } -} - -} // namespace ondemand -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} - -simdjson_really_inline simdjson_result simdjson_result::raw() const noexcept { - if (error()) { return error(); } - return first.raw(); -} -simdjson_really_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(uint8_t *&dst) const noexcept { - if (error()) { return error(); } - return first.unescape(dst); -} -simdjson_really_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(SIMDJSON_IMPLEMENTATION::ondemand::json_iterator &iter) const noexcept { - if (error()) { return error(); } - return first.unescape(iter); -} - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/raw_json_string-inl.h */ -/* begin file include/simdjson/generic/ondemand/token_iterator-inl.h */ -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { -namespace ondemand { - -simdjson_really_inline token_iterator::token_iterator(const uint8_t *_buf, uint32_t *_index) noexcept - : buf{_buf}, index{_index} -{ -} - -simdjson_really_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { - return &buf[*(index+delta)]; -} -simdjson_really_inline const uint8_t *token_iterator::advance() noexcept { - return &buf[*(index++)]; -} -simdjson_really_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { - return *(index+delta); -} -simdjson_really_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { - return *(index+delta+1) - *(index+delta); -} - -simdjson_really_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { - return index == other.index; -} -simdjson_really_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { - return index != other.index; -} -simdjson_really_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { - return index > other.index; -} -simdjson_really_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { - return index >= other.index; -} -simdjson_really_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { - return index < other.index; -} -simdjson_really_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { - return index <= other.index; -} - -} // namespace ondemand -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::token_iterator &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/token_iterator-inl.h */ -/* begin file include/simdjson/generic/ondemand/json_iterator-inl.h */ -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { -namespace ondemand { - -simdjson_really_inline json_iterator::json_iterator(json_iterator &&other) noexcept - : token_iterator(std::forward(other)), - parser{other.parser}, - current_string_buf_loc{other.current_string_buf_loc} -#ifdef SIMDJSON_ONDEMAND_SAFETY_RAILS - , active_lease_depth{other.active_lease_depth} -#endif -{ - other.parser = nullptr; -} -simdjson_really_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept { - buf = other.buf; - index = other.index; - parser = other.parser; - current_string_buf_loc = other.current_string_buf_loc; -#ifdef SIMDJSON_ONDEMAND_SAFETY_RAILS - active_lease_depth = other.active_lease_depth; -#endif - other.parser = nullptr; - return *this; -} - -simdjson_really_inline json_iterator::json_iterator(ondemand::parser *_parser) noexcept - : token_iterator(_parser->dom_parser.buf, _parser->dom_parser.structural_indexes.get()), - parser{_parser}, - current_string_buf_loc{parser->string_buf.get()} -#ifdef SIMDJSON_ONDEMAND_SAFETY_RAILS - , active_lease_depth{0} -#endif -{ - // Release the string buf so it can be reused by the next document - logger::log_headers(); -} -#ifdef SIMDJSON_ONDEMAND_SAFETY_RAILS -simdjson_really_inline json_iterator::~json_iterator() noexcept { - // If we have any leases out when we die, it's an error - SIMDJSON_ASSUME(active_lease_depth == 0); -} -#endif - -simdjson_warn_unused simdjson_really_inline simdjson_result json_iterator::start_object(const uint8_t *json) noexcept { - if (*json != '{') { logger::log_error(*this, "Not an object"); return INCORRECT_TYPE; } - return started_object(); -} -simdjson_warn_unused simdjson_really_inline simdjson_result json_iterator::start_object() noexcept { - return start_object(advance()); -} - -simdjson_warn_unused simdjson_really_inline bool json_iterator::started_object() noexcept { - if (*peek() == '}') { - logger::log_value(*this, "empty object"); - advance(); - return false; - } - logger::log_start_value(*this, "object"); - return true; -} - -simdjson_warn_unused simdjson_really_inline simdjson_result json_iterator::has_next_field() noexcept { - switch (*advance()) { - case '}': - logger::log_end_value(*this, "object"); - return false; - case ',': - return true; - default: - return report_error(TAPE_ERROR, "Missing comma between object fields"); - } -} - -simdjson_warn_unused simdjson_really_inline simdjson_result json_iterator::find_field_raw(const char *key) noexcept { - bool has_next; - do { - raw_json_string actual_key; - SIMDJSON_TRY( consume_raw_json_string().get(actual_key) ); - if (*advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); } - if (actual_key == key) { - logger::log_event(*this, "match", key); - return true; - } - logger::log_event(*this, "non-match", key); - SIMDJSON_TRY( skip() ); // Skip the value so we can look at the next key - - SIMDJSON_TRY( has_next_field().get(has_next) ); - } while (has_next); - logger::log_event(*this, "no matches", key); - return false; -} - -simdjson_warn_unused simdjson_really_inline simdjson_result json_iterator::field_key() noexcept { - const uint8_t *key = advance(); - if (*(key++) != '"') { return report_error(TAPE_ERROR, "Object key is not a string"); } - return raw_json_string(key); -} - -simdjson_warn_unused simdjson_really_inline error_code json_iterator::field_value() noexcept { - if (*advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); } - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline simdjson_result json_iterator::start_array(const uint8_t *json) noexcept { - if (*json != '[') { logger::log_error(*this, "Not an array"); return INCORRECT_TYPE; } - return started_array(); -} - -simdjson_warn_unused simdjson_really_inline simdjson_result json_iterator::start_array() noexcept { - return start_array(advance()); -} - -simdjson_warn_unused simdjson_really_inline bool json_iterator::started_array() noexcept { - if (*peek() == ']') { - logger::log_value(*this, "empty array"); - advance(); - return false; - } - logger::log_start_value(*this, "array"); - return true; -} - -simdjson_warn_unused simdjson_really_inline simdjson_result json_iterator::has_next_element() noexcept { - switch (*advance()) { - case ']': - logger::log_end_value(*this, "array"); - return false; - case ',': - return true; - default: - return report_error(TAPE_ERROR, "Missing comma between array elements"); - } -} - -simdjson_warn_unused simdjson_result json_iterator::parse_string(const uint8_t *json) noexcept { - return parse_raw_json_string(json).unescape(current_string_buf_loc); -} -simdjson_warn_unused simdjson_result json_iterator::consume_string() noexcept { - return parse_string(advance()); -} -simdjson_warn_unused simdjson_result json_iterator::parse_raw_json_string(const uint8_t *json) noexcept { - logger::log_value(*this, "string", ""); - if (*json != '"') { logger::log_error(*this, "Not a string"); return INCORRECT_TYPE; } - return raw_json_string(json+1); -} -simdjson_warn_unused simdjson_result json_iterator::consume_raw_json_string() noexcept { - return parse_raw_json_string(advance()); -} -simdjson_warn_unused simdjson_result json_iterator::parse_uint64(const uint8_t *json) noexcept { - logger::log_value(*this, "uint64", ""); - return numberparsing::parse_unsigned(json); -} -simdjson_warn_unused simdjson_result json_iterator::consume_uint64() noexcept { - return parse_uint64(advance()); -} -simdjson_warn_unused simdjson_result json_iterator::parse_int64(const uint8_t *json) noexcept { - logger::log_value(*this, "int64", ""); - return numberparsing::parse_integer(json); -} -simdjson_warn_unused simdjson_result json_iterator::consume_int64() noexcept { - return parse_int64(advance()); -} -simdjson_warn_unused simdjson_result json_iterator::parse_double(const uint8_t *json) noexcept { - logger::log_value(*this, "double", ""); - return numberparsing::parse_double(json); -} -simdjson_warn_unused simdjson_result json_iterator::consume_double() noexcept { - return parse_double(advance()); -} -simdjson_warn_unused simdjson_result json_iterator::parse_bool(const uint8_t *json) noexcept { - logger::log_value(*this, "bool", ""); - auto not_true = atomparsing::str4ncmp(json, "true"); - auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e'); - bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]); - if (error) { logger::log_error(*this, "Not a boolean"); return INCORRECT_TYPE; } - return simdjson_result(!not_true); -} -simdjson_warn_unused simdjson_result json_iterator::consume_bool() noexcept { - return parse_bool(advance()); -} -simdjson_really_inline bool json_iterator::is_null(const uint8_t *json) noexcept { - if (!atomparsing::str4ncmp(json, "null")) { - logger::log_value(*this, "null", ""); - return true; - } - return false; -} -simdjson_really_inline bool json_iterator::is_null() noexcept { - if (is_null(peek())) { - advance(); - return true; - } - return false; -} - -template -simdjson_warn_unused simdjson_really_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint8_t (&tmpbuf)[N]) noexcept { - // Truncate whitespace to fit the buffer. - auto len = peek_length(-1); - if (len > N-1) { - if (jsoncharutils::is_not_structural_or_whitespace(json[N])) { return false; } - len = N-1; - } - - // Copy to the buffer. - std::memcpy(tmpbuf, json, len); - tmpbuf[len] = ' '; - return true; -} - -constexpr const uint32_t MAX_INT_LENGTH = 1024; - -simdjson_warn_unused simdjson_result json_iterator::parse_root_uint64(const uint8_t *json) noexcept { - uint8_t tmpbuf[20+1]; // <20 digits> is the longest possible unsigned integer - if (!copy_to_buffer(json, tmpbuf)) { logger::log_error(*this, "Root number more than 20 characters"); return NUMBER_ERROR; } - logger::log_value(*this, "uint64", ""); - auto result = numberparsing::parse_unsigned(tmpbuf); - if (result.error()) { logger::log_error(*this, "Error parsing unsigned integer"); return result.error(); } - return result; -} -simdjson_warn_unused simdjson_result json_iterator::consume_root_uint64() noexcept { - return parse_root_uint64(advance()); -} -simdjson_warn_unused simdjson_result json_iterator::parse_root_int64(const uint8_t *json) noexcept { - uint8_t tmpbuf[20+1]; // -<19 digits> is the longest possible integer - if (!copy_to_buffer(json, tmpbuf)) { logger::log_error(*this, "Root number more than 20 characters"); return NUMBER_ERROR; } - logger::log_value(*this, "int64", ""); - auto result = numberparsing::parse_integer(tmpbuf); - if (result.error()) { report_error(result.error(), "Error parsing integer"); } - return result; -} -simdjson_warn_unused simdjson_result json_iterator::consume_root_int64() noexcept { - return parse_root_int64(advance()); -} -simdjson_warn_unused simdjson_result json_iterator::parse_root_double(const uint8_t *json) noexcept { - // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest number: -0.e-308. - uint8_t tmpbuf[1074+8+1]; - if (!copy_to_buffer(json, tmpbuf)) { logger::log_error(*this, "Root number more than 1082 characters"); return NUMBER_ERROR; } - logger::log_value(*this, "double", ""); - auto result = numberparsing::parse_double(tmpbuf); - if (result.error()) { report_error(result.error(), "Error parsing double"); } - return result; -} -simdjson_warn_unused simdjson_result json_iterator::consume_root_double() noexcept { - return parse_root_double(advance()); -} -simdjson_warn_unused simdjson_result json_iterator::parse_root_bool(const uint8_t *json) noexcept { - uint8_t tmpbuf[5+1]; - if (!copy_to_buffer(json, tmpbuf)) { logger::log_error(*this, "Not a boolean"); return INCORRECT_TYPE; } - return parse_bool(tmpbuf); -} -simdjson_warn_unused simdjson_result json_iterator::consume_root_bool() noexcept { - return parse_root_bool(advance()); -} -simdjson_really_inline bool json_iterator::root_is_null(const uint8_t *json) noexcept { - uint8_t tmpbuf[4+1]; - if (!copy_to_buffer(json, tmpbuf)) { return false; } - return is_null(tmpbuf); -} - -simdjson_warn_unused simdjson_really_inline error_code json_iterator::skip() noexcept { - switch (*advance()) { - // PERF TODO does it skip the depth check when we don't decrement depth? - case '[': case '{': - logger::log_start_value(*this, "skip"); - return skip_container(); - default: - logger::log_value(*this, "skip", ""); - return SUCCESS; - } -} - -simdjson_warn_unused simdjson_really_inline error_code json_iterator::skip_container() noexcept { - uint32_t depth = 1; - // The loop breaks only when depth-- happens. - auto end = &parser->dom_parser.structural_indexes[parser->dom_parser.n_structural_indexes]; - while (index <= end) { - uint8_t ch = *advance(); - switch (ch) { - // TODO consider whether matching braces is a requirement: if non-matching braces indicates - // *missing* braces, then future lookups are not in the object/arrays they think they are, - // violating the rule "validate enough structure that the user can be confident they are - // looking at the right values." - case ']': case '}': - logger::log_end_value(*this, "skip"); - depth--; - if (depth == 0) { logger::log_event(*this, "end skip", ""); return SUCCESS; } - break; - // PERF TODO does it skip the depth check when we don't decrement depth? - case '[': case '{': - logger::log_start_value(*this, "skip"); - depth++; - break; - default: - logger::log_value(*this, "skip", ""); - break; - } - } - - return report_error(TAPE_ERROR, "not enough close braces"); -} - -simdjson_really_inline bool json_iterator::at_start() const noexcept { - return index == parser->dom_parser.structural_indexes.get(); -} - -simdjson_really_inline bool json_iterator::at_eof() const noexcept { - return index == &parser->dom_parser.structural_indexes[parser->dom_parser.n_structural_indexes]; -} - -simdjson_really_inline bool json_iterator::is_alive() const noexcept { - return parser; -} - - -simdjson_really_inline json_iterator_ref json_iterator::borrow() noexcept { -#ifdef SIMDJSON_ONDEMAND_SAFETY_RAILS - SIMDJSON_ASSUME(active_lease_depth == 0); - const uint32_t child_depth = 1; - active_lease_depth = child_depth; - return json_iterator_ref(this, child_depth); -#else - return json_iterator_ref(this); -#endif -} - -simdjson_really_inline error_code json_iterator::report_error(error_code error, const char *message) noexcept { - SIMDJSON_ASSUME(error != SUCCESS && error != UNINITIALIZED && error != INCORRECT_TYPE && error != NO_SUCH_FIELD); - logger::log_error(*this, message); - _error = error; - return error; -} -simdjson_really_inline error_code json_iterator::error() const noexcept { - return _error; -} - -// -// json_iterator_ref -// -simdjson_really_inline json_iterator_ref::json_iterator_ref(json_iterator_ref &&other) noexcept - : iter{other.iter} -#ifdef SIMDJSON_ONDEMAND_SAFETY_RAILS - , lease_depth{other.lease_depth} -#endif // SIMDJSON_ONDEMAND_SAFETY_RAILS -{ - other.iter = nullptr; -} -simdjson_really_inline json_iterator_ref &json_iterator_ref::operator=(json_iterator_ref &&other) noexcept { - assert_is_not_active(); - iter = other.iter; -#ifdef SIMDJSON_ONDEMAND_SAFETY_RAILS - lease_depth = other.lease_depth; -#endif // SIMDJSON_ONDEMAND_SAFETY_RAILS - other.iter = nullptr; - return *this; -} - -#ifdef SIMDJSON_ONDEMAND_SAFETY_RAILS -simdjson_really_inline json_iterator_ref::~json_iterator_ref() noexcept { - // The caller MUST consume their value and release the iterator before they die - assert_is_not_active(); -} -simdjson_really_inline json_iterator_ref::json_iterator_ref( - json_iterator *_iter, - uint32_t _lease_depth -) noexcept : iter{_iter}, lease_depth{_lease_depth} -{ - assert_is_active(); -} -#else -simdjson_really_inline json_iterator_ref::json_iterator_ref( - json_iterator *_iter -) noexcept : iter{_iter} -{ - assert_is_active(); -} -#endif // SIMDJSON_ONDEMAND_SAFETY_RAILS - -simdjson_really_inline json_iterator_ref json_iterator_ref::borrow() noexcept { - assert_is_active(); -#ifdef SIMDJSON_ONDEMAND_SAFETY_RAILS - const uint32_t child_depth = lease_depth + 1; - iter->active_lease_depth = child_depth; - return json_iterator_ref(iter, child_depth); -#else - return json_iterator_ref(iter); -#endif -} -simdjson_really_inline void json_iterator_ref::release() noexcept { - assert_is_active(); -#ifdef SIMDJSON_ONDEMAND_SAFETY_RAILS - iter->active_lease_depth = lease_depth - 1; -#endif - iter = nullptr; -} - -simdjson_really_inline json_iterator *json_iterator_ref::operator->() noexcept { - assert_is_active(); - return iter; -} -simdjson_really_inline json_iterator &json_iterator_ref::operator*() noexcept { - assert_is_active(); - return *iter; -} -simdjson_really_inline const json_iterator &json_iterator_ref::operator*() const noexcept { - assert_is_active(); - return *iter; -} - -simdjson_really_inline bool json_iterator_ref::is_alive() const noexcept { - return iter != nullptr; -} -simdjson_really_inline bool json_iterator_ref::is_active() const noexcept { -#ifdef SIMDJSON_ONDEMAND_SAFETY_RAILS - return is_alive() && lease_depth == iter->active_lease_depth; -#else - return is_alive(); -#endif -} -simdjson_really_inline void json_iterator_ref::assert_is_active() const noexcept { -// We don't call const functions because VC++ is worried they might have side effects in __assume -#ifdef SIMDJSON_ONDEMAND_SAFETY_RAILS - SIMDJSON_ASSUME(iter != nullptr && lease_depth == iter->active_lease_depth); -#else - SIMDJSON_ASSUME(iter != nullptr); -#endif -} -simdjson_really_inline void json_iterator_ref::assert_is_not_active() const noexcept { -// We don't call const functions because VC++ is worried they might have side effects in __assume -#ifdef SIMDJSON_ONDEMAND_SAFETY_RAILS - SIMDJSON_ASSUME(!(iter != nullptr && lease_depth == iter->active_lease_depth)); -#else - SIMDJSON_ASSUME(!(iter != nullptr)); -#endif -} - - - -} // namespace ondemand -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::json_iterator &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} - -simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::json_iterator_ref &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/json_iterator-inl.h */ -/* begin file include/simdjson/generic/ondemand/array_iterator-inl.h */ -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { -namespace ondemand { - -template -simdjson_really_inline array_iterator::array_iterator(T &_iter) noexcept : iter{&_iter} {} - -template -simdjson_really_inline simdjson_result> array_iterator::start(T &iter, const uint8_t *json) noexcept { - bool has_value; - SIMDJSON_TRY( iter.get_iterator().start_array(json).get(has_value) ); - if (!has_value) { iter.iteration_finished(); } - return array_iterator(iter); -} -template -simdjson_really_inline simdjson_result array_iterator::operator*() noexcept { - error_code error = iter->get_iterator().error(); - if (error) { iter->iteration_finished(); return error; } - return value::start(iter->borrow_iterator()); -} -template -simdjson_really_inline bool array_iterator::operator==(const array_iterator &other) noexcept { - return !(*this != other); -} -template -simdjson_really_inline bool array_iterator::operator!=(const array_iterator &) noexcept { - return iter->is_iterator_alive(); -} -template -simdjson_really_inline array_iterator &array_iterator::operator++() noexcept { - // TODO this is a safety rail ... users should exit loops as soon as they receive an error. - // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. - if (!iter->is_iterator_alive()) { return *this; } // Iterator will be released if there is an error - bool has_value; - error_code error = iter->get_iterator().has_next_element().get(has_value); // If there's an error, has_next stays true. - if (!(error || has_value)) { iter->iteration_finished(); } - return *this; -} - -} // namespace ondemand -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -template -simdjson_really_inline simdjson_result>::simdjson_result( - SIMDJSON_IMPLEMENTATION::ondemand::array_iterator &&value -) noexcept - : SIMDJSON_IMPLEMENTATION::implementation_simdjson_result_base>(std::forward>(value)) -{ -} -template -simdjson_really_inline simdjson_result>::simdjson_result(error_code error) noexcept - : SIMDJSON_IMPLEMENTATION::implementation_simdjson_result_base>({}, error) -{ -} - -template -simdjson_really_inline simdjson_result simdjson_result>::operator*() noexcept { - if (this->error()) { this->second = SUCCESS; return this->error(); } - return *this->first; -} -template -simdjson_really_inline bool simdjson_result>::operator==(const simdjson_result> &other) noexcept { - if (this->error()) { return true; } - return this->first == other.first; -} -template -simdjson_really_inline bool simdjson_result>::operator!=(const simdjson_result> &other) noexcept { - if (this->error()) { return false; } - return this->first != other.first; -} -template -simdjson_really_inline simdjson_result> &simdjson_result>::operator++() noexcept { - if (this->error()) { return *this; } - ++(this->first); - return *this; -} - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/array_iterator-inl.h */ -/* begin file include/simdjson/generic/ondemand/object_iterator-inl.h */ -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { -namespace ondemand { - -// -// object_iterator -// - -simdjson_really_inline object_iterator::object_iterator(json_iterator_ref &_iter) noexcept : iter{&_iter} {} - -simdjson_really_inline simdjson_result object_iterator::operator*() noexcept { - error_code error = (*iter)->error(); - if (error) { iter->release(); return error; } - auto result = field::start(*iter); - // TODO this is a safety rail ... users should exit loops as soon as they receive an error. - // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. - if (result.error()) { iter->release(); } - return result; -} -simdjson_really_inline bool object_iterator::operator==(const object_iterator &other) noexcept { - return !(*this != other); -} -simdjson_really_inline bool object_iterator::operator!=(const object_iterator &) noexcept { - return iter->is_alive(); -} -simdjson_really_inline object_iterator &object_iterator::operator++() noexcept { - // TODO this is a safety rail ... users should exit loops as soon as they receive an error. - // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. - if (!iter->is_alive()) { return *this; } // Iterator will be released if there is an error - bool has_value; - error_code error = (*iter)->has_next_field().get(has_value); - if (!(error || has_value)) { iter->release(); } - return *this; -} - -} // namespace ondemand -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -simdjson_really_inline simdjson_result::simdjson_result( - SIMDJSON_IMPLEMENTATION::ondemand::object_iterator &&value -) noexcept - : implementation_simdjson_result_base(std::forward(value)) -{ -} -simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base({}, error) -{ -} - -simdjson_really_inline simdjson_result simdjson_result::operator*() noexcept { - if (error()) { second = SUCCESS; return error(); } - return *first; -} -// Assumes it's being compared with the end. true if depth < iter->depth. -simdjson_really_inline bool simdjson_result::operator==(const simdjson_result &other) noexcept { - if (error()) { return true; } - return first == other.first; -} -// Assumes it's being compared with the end. true if depth >= iter->depth. -simdjson_really_inline bool simdjson_result::operator!=(const simdjson_result &other) noexcept { - if (error()) { return false; } - return first != other.first; -} -// Checks for ']' and ',' -simdjson_really_inline simdjson_result &simdjson_result::operator++() noexcept { - if (error()) { return *this; } - ++first; - return *this; -} - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/object_iterator-inl.h */ -/* begin file include/simdjson/generic/ondemand/array-inl.h */ -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { -namespace ondemand { - -// -// ### Live States -// -// While iterating or looking up values, depth >= iter->depth. at_start may vary. Error is -// always SUCCESS: -// -// - Start: This is the state when the array is first found and the iterator is just past the `{`. -// In this state, at_start == true. -// - Next: After we hand a scalar value to the user, or an array/object which they then fully -// iterate over, the iterator is at the `,` before the next value (or `]`). In this state, -// depth == iter->depth, at_start == false, and error == SUCCESS. -// - Unfinished Business: When we hand an array/object to the user which they do not fully -// iterate over, we need to finish that iteration by skipping child values until we reach the -// Next state. In this state, depth > iter->depth, at_start == false, and error == SUCCESS. -// -// ## Error States -// -// In error states, we will yield exactly one more value before stopping. iter->depth == depth -// and at_start is always false. We decrement after yielding the error, moving to the Finished -// state. -// -// - Chained Error: When the array iterator is part of an error chain--for example, in -// `for (auto tweet : doc["tweets"])`, where the tweet element may be missing or not be an -// array--we yield that error in the loop, exactly once. In this state, error != SUCCESS and -// iter->depth == depth, and at_start == false. We decrement depth when we yield the error. -// - Missing Comma Error: When the iterator ++ method discovers there is no comma between elements, -// we flag that as an error and treat it exactly the same as a Chained Error. In this state, -// error == TAPE_ERROR, iter->depth == depth, and at_start == false. -// -// ## Terminal State -// -// The terminal state has iter->depth < depth. at_start is always false. -// -// - Finished: When we have reached a `]` or have reported an error, we are finished. We signal this -// by decrementing depth. In this state, iter->depth < depth, at_start == false, and -// error == SUCCESS. -// - -simdjson_really_inline array::array(json_iterator_ref &&_iter) noexcept - : iter{std::forward(_iter)} -{ -} - -simdjson_really_inline array::~array() noexcept { - if (iter.is_alive()) { - logger::log_event(*iter, "unfinished", "array"); - simdjson_unused auto _err = iter->skip_container(); - iter.release(); - } -} - -simdjson_really_inline simdjson_result array::start(json_iterator_ref &&iter) noexcept { - bool has_value; - SIMDJSON_TRY( iter->start_array().get(has_value) ); - if (!has_value) { iter.release(); } - return array(std::forward(iter)); -} -simdjson_really_inline array array::started(json_iterator_ref &&iter) noexcept { - if (!iter->started_array()) { iter.release(); } - return array(std::forward(iter)); -} - -// -// For array_iterator -// -simdjson_really_inline json_iterator &array::get_iterator() noexcept { - return *iter; -} -simdjson_really_inline json_iterator_ref array::borrow_iterator() noexcept { - return iter.borrow(); -} -simdjson_really_inline bool array::is_iterator_alive() const noexcept { - return iter.is_alive(); -} -simdjson_really_inline void array::iteration_finished() noexcept { - iter.release(); -} - -simdjson_really_inline array_iterator array::begin() & noexcept { - return *this; -} -simdjson_really_inline array_iterator array::end() & noexcept { - return {}; -} - -} // namespace ondemand -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -simdjson_really_inline simdjson_result::simdjson_result( - SIMDJSON_IMPLEMENTATION::ondemand::array &&value -) noexcept - : implementation_simdjson_result_base( - std::forward(value) - ) -{ -} -simdjson_really_inline simdjson_result::simdjson_result( - error_code error -) noexcept - : implementation_simdjson_result_base(error) -{ -} - -simdjson_really_inline simdjson_result> simdjson_result::begin() & noexcept { - if (error()) { return error(); } - return first.begin(); -} -simdjson_really_inline simdjson_result> simdjson_result::end() & noexcept { - if (error()) { return error(); } - return first.end(); -} - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/array-inl.h */ -/* begin file include/simdjson/generic/ondemand/document-inl.h */ -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { -namespace ondemand { - -simdjson_really_inline document::document(ondemand::json_iterator &&_iter, const uint8_t *_json) noexcept - : iter{std::forward(_iter)}, - json{_json} -{ - logger::log_start_value(iter, "document"); -} -simdjson_really_inline document::~document() noexcept { - if (iter.is_alive()) { - logger::log_end_value(iter, "document"); - } -} - -simdjson_really_inline void document::assert_at_start() const noexcept { - SIMDJSON_ASSUME(json != nullptr); -} -simdjson_really_inline document document::start(json_iterator &&iter) noexcept { - auto json = iter.advance(); - return document(std::forward(iter), json); -} - -simdjson_really_inline value document::as_value() noexcept { - assert_at_start(); - return { iter.borrow(), json }; -} - -template -simdjson_result document::consume_if_success(simdjson_result &&result) noexcept { - if (result.error()) { json = nullptr; } - return std::forward>(result); -} - -simdjson_really_inline simdjson_result document::get_array() & noexcept { - assert_at_start(); - return consume_if_success( as_value().get_array() ); -} -simdjson_really_inline simdjson_result document::get_object() & noexcept { - assert_at_start(); - return consume_if_success( as_value().get_object() ); -} -simdjson_really_inline simdjson_result document::get_uint64() noexcept { - assert_at_start(); - return consume_if_success( iter.parse_root_uint64(json) ); -} -simdjson_really_inline simdjson_result document::get_int64() noexcept { - assert_at_start(); - return consume_if_success( iter.parse_root_int64(json) ); -} -simdjson_really_inline simdjson_result document::get_double() noexcept { - assert_at_start(); - return consume_if_success( iter.parse_root_double(json) ); -} -simdjson_really_inline simdjson_result document::get_string() & noexcept { - return consume_if_success( as_value().get_string() ); -} -simdjson_really_inline simdjson_result document::get_raw_json_string() & noexcept { - return consume_if_success( as_value().get_raw_json_string() ); -} -simdjson_really_inline simdjson_result document::get_bool() noexcept { - assert_at_start(); - return consume_if_success( iter.parse_root_bool(json) ); -} -simdjson_really_inline bool document::is_null() noexcept { - assert_at_start(); - if (iter.root_is_null(json)) { json = nullptr; return true; } - return false; -} - -template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_array(); } -template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_object(); } -template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_raw_json_string(); } -template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_string(); } -template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_double(); } -template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_uint64(); } -template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_int64(); } -template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_bool(); } - -template<> simdjson_really_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } -template<> simdjson_really_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } -template<> simdjson_really_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } -template<> simdjson_really_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } - -template simdjson_really_inline error_code document::get(T &out) & noexcept { - return get().get(out); -} -template simdjson_really_inline error_code document::get(T &out) && noexcept { - return std::forward(*this).get().get(out); -} - -#if SIMDJSON_EXCEPTIONS -simdjson_really_inline document::operator array() & noexcept(false) { return get_array(); } -simdjson_really_inline document::operator object() & noexcept(false) { return get_object(); } -simdjson_really_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } -simdjson_really_inline document::operator int64_t() noexcept(false) { return get_int64(); } -simdjson_really_inline document::operator double() noexcept(false) { return get_double(); } -simdjson_really_inline document::operator std::string_view() & noexcept(false) { return get_string(); } -simdjson_really_inline document::operator raw_json_string() & noexcept(false) { return get_raw_json_string(); } -simdjson_really_inline document::operator bool() noexcept(false) { return get_bool(); } -#endif - -simdjson_really_inline simdjson_result> document::begin() & noexcept { - return array_iterator::start(*this, json); -} -simdjson_really_inline simdjson_result> document::end() & noexcept { - return {}; -} -simdjson_really_inline simdjson_result document::operator[](std::string_view key) & noexcept { - return get_object()[key]; -} -simdjson_really_inline simdjson_result document::operator[](const char *key) & noexcept { - return get_object()[key]; -} - -// -// For array_iterator -// -simdjson_really_inline json_iterator &document::get_iterator() noexcept { - return iter; -} -simdjson_really_inline json_iterator_ref document::borrow_iterator() noexcept { - return iter.borrow(); -} -simdjson_really_inline bool document::is_iterator_alive() const noexcept { - return json; -} -simdjson_really_inline void document::iteration_finished() noexcept { - json = nullptr; -} - -} // namespace ondemand -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -simdjson_really_inline simdjson_result::simdjson_result( - SIMDJSON_IMPLEMENTATION::ondemand::document &&value -) noexcept : - implementation_simdjson_result_base( - std::forward(value) - ) -{ -} -simdjson_really_inline simdjson_result::simdjson_result( - error_code error -) noexcept : - implementation_simdjson_result_base( - error - ) -{ -} - -simdjson_really_inline simdjson_result> simdjson_result::begin() & noexcept { - if (error()) { return error(); } - return first.begin(); -} -simdjson_really_inline simdjson_result> simdjson_result::end() & noexcept { - return {}; -} -simdjson_really_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { - if (error()) { return error(); } - return first[key]; -} -simdjson_really_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { - if (error()) { return error(); } - return first[key]; -} -simdjson_really_inline simdjson_result simdjson_result::get_array() & noexcept { - if (error()) { return error(); } - return first.get_array(); -} -simdjson_really_inline simdjson_result simdjson_result::get_object() & noexcept { - if (error()) { return error(); } - return first.get_object(); -} -simdjson_really_inline simdjson_result simdjson_result::get_uint64() noexcept { - if (error()) { return error(); } - return first.get_uint64(); -} -simdjson_really_inline simdjson_result simdjson_result::get_int64() noexcept { - if (error()) { return error(); } - return first.get_int64(); -} -simdjson_really_inline simdjson_result simdjson_result::get_double() noexcept { - if (error()) { return error(); } - return first.get_double(); -} -simdjson_really_inline simdjson_result simdjson_result::get_string() & noexcept { - if (error()) { return error(); } - return first.get_string(); -} -simdjson_really_inline simdjson_result simdjson_result::get_raw_json_string() & noexcept { - if (error()) { return error(); } - return first.get_raw_json_string(); -} -simdjson_really_inline simdjson_result simdjson_result::get_bool() noexcept { - if (error()) { return error(); } - return first.get_bool(); -} -simdjson_really_inline bool simdjson_result::is_null() noexcept { - if (error()) { return error(); } - return first.is_null(); -} - -template -simdjson_really_inline simdjson_result simdjson_result::get() & noexcept { - if (error()) { return error(); } - return first.get(); -} -template -simdjson_really_inline simdjson_result simdjson_result::get() && noexcept { - if (error()) { return error(); } - return std::forward(first).get(); -} -template -simdjson_really_inline error_code simdjson_result::get(T &out) & noexcept { - if (error()) { return error(); } - return first.get(out); -} -template -simdjson_really_inline error_code simdjson_result::get(T &out) && noexcept { - if (error()) { return error(); } - return std::forward(first).get(out); -} - -template<> simdjson_really_inline simdjson_result simdjson_result::get() & noexcept = delete; -template<> simdjson_really_inline simdjson_result simdjson_result::get() && noexcept { - if (error()) { return error(); } - return std::forward(first); -} -template<> simdjson_really_inline error_code simdjson_result::get(SIMDJSON_IMPLEMENTATION::ondemand::document &out) & noexcept = delete; -template<> simdjson_really_inline error_code simdjson_result::get(SIMDJSON_IMPLEMENTATION::ondemand::document &out) && noexcept { - if (error()) { return error(); } - out = std::forward(first); - return SUCCESS; -} - -#if SIMDJSON_EXCEPTIONS -simdjson_really_inline simdjson_result::operator SIMDJSON_IMPLEMENTATION::ondemand::array() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_really_inline simdjson_result::operator SIMDJSON_IMPLEMENTATION::ondemand::object() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_really_inline simdjson_result::operator uint64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_really_inline simdjson_result::operator int64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_really_inline simdjson_result::operator double() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_really_inline simdjson_result::operator std::string_view() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_really_inline simdjson_result::operator SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_really_inline simdjson_result::operator bool() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -#endif - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/document-inl.h */ -/* begin file include/simdjson/generic/ondemand/value-inl.h */ -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { -namespace ondemand { - -simdjson_really_inline value::value(json_iterator_ref && _iter, const uint8_t *_json) noexcept - : iter{std::forward(_iter)}, - json{_json} -{ - iter.assert_is_active(); - SIMDJSON_ASSUME(json != nullptr); -} - -simdjson_really_inline value::~value() noexcept { - // If the user didn't actually use the value, we need to check if it's an array/object and bump - // depth so that the array/object iteration routines will work correctly. - // PERF TODO this better be elided entirely when people actually use the value. Don't care if it - // gets bumped on the error path unless that's costing us something important. - if (iter.is_alive()) { - if (*json == '[' || *json == '{') { - logger::log_start_value(*iter, "unused"); - simdjson_unused auto _err = iter->skip_container(); - } else { - logger::log_value(*iter, "unused"); - } - iter.release(); - } -} - -simdjson_really_inline value value::start(json_iterator_ref &&iter) noexcept { - return { std::forward(iter), iter->advance() }; -} - -simdjson_really_inline const uint8_t *value::consume() noexcept { - iter.release(); - return json; -} -template -simdjson_really_inline simdjson_result value::consume_if_success(simdjson_result &&result) noexcept { - if (!result.error()) { consume(); } - return std::forward>(result); -} - -simdjson_really_inline simdjson_result value::get_array() noexcept { - bool has_value; - SIMDJSON_TRY( iter->start_array(json).get(has_value) ); - if (!has_value) { iter.release(); } - return array(std::move(iter)); -} -simdjson_really_inline simdjson_result value::get_object() noexcept { - bool has_value; - SIMDJSON_TRY( iter->start_object(json).get(has_value) ); - if (!has_value) { iter.release(); } - return object(std::move(iter)); -} -simdjson_really_inline simdjson_result value::get_raw_json_string() && noexcept { - return iter->consume_raw_json_string(); -} -simdjson_really_inline simdjson_result value::get_raw_json_string() & noexcept { - return consume_if_success( iter->parse_raw_json_string(json) ); -} -simdjson_really_inline simdjson_result value::get_string() && noexcept { - auto result = iter->parse_string(json); - consume(); - return result; -} -simdjson_really_inline simdjson_result value::get_string() & noexcept { - return consume_if_success( iter->parse_string(json) ); -} -simdjson_really_inline simdjson_result value::get_double() && noexcept { - return iter->parse_double(consume()); -} -simdjson_really_inline simdjson_result value::get_double() & noexcept { - return consume_if_success( iter->parse_double(json) ); -} -simdjson_really_inline simdjson_result value::get_uint64() && noexcept { - return iter->parse_uint64(consume()); -} -simdjson_really_inline simdjson_result value::get_uint64() & noexcept { - return consume_if_success( iter->parse_uint64(json) ); -} -simdjson_really_inline simdjson_result value::get_int64() && noexcept { - return iter->parse_int64(consume()); -} -simdjson_really_inline simdjson_result value::get_int64() & noexcept { - return consume_if_success( iter->parse_int64(json) ); -} -simdjson_really_inline simdjson_result value::get_bool() && noexcept { - return iter->parse_bool(consume()); -} -simdjson_really_inline simdjson_result value::get_bool() & noexcept { - return consume_if_success( iter->parse_bool(json) ); -} -simdjson_really_inline bool value::is_null() && noexcept { - return iter->is_null(consume()); -} -simdjson_really_inline bool value::is_null() & noexcept { - if (!iter->is_null(json)) { return false; } - consume(); - return true; -} - -template<> simdjson_really_inline simdjson_result value::get() & noexcept { return get_array(); } -template<> simdjson_really_inline simdjson_result value::get() & noexcept { return get_object(); } -template<> simdjson_really_inline simdjson_result value::get() & noexcept { return get_raw_json_string(); } -template<> simdjson_really_inline simdjson_result value::get() & noexcept { return get_string(); } -template<> simdjson_really_inline simdjson_result value::get() & noexcept { return get_double(); } -template<> simdjson_really_inline simdjson_result value::get() & noexcept { return get_uint64(); } -template<> simdjson_really_inline simdjson_result value::get() & noexcept { return get_int64(); } -template<> simdjson_really_inline simdjson_result value::get() & noexcept { return get_bool(); } - -template<> simdjson_really_inline simdjson_result value::get() && noexcept { return std::forward(*this); } -template<> simdjson_really_inline simdjson_result value::get() && noexcept { return std::forward(*this).get_array(); } -template<> simdjson_really_inline simdjson_result value::get() && noexcept { return std::forward(*this).get_object(); } -template<> simdjson_really_inline simdjson_result value::get() && noexcept { return std::forward(*this).get_raw_json_string(); } -template<> simdjson_really_inline simdjson_result value::get() && noexcept { return std::forward(*this).get_string(); } -template<> simdjson_really_inline simdjson_result value::get() && noexcept { return std::forward(*this).get_double(); } -template<> simdjson_really_inline simdjson_result value::get() && noexcept { return std::forward(*this).get_uint64(); } -template<> simdjson_really_inline simdjson_result value::get() && noexcept { return std::forward(*this).get_int64(); } -template<> simdjson_really_inline simdjson_result value::get() && noexcept { return std::forward(*this).get_bool(); } - -template simdjson_really_inline error_code value::get(T &out) & noexcept { - return get().get(out); -} -template simdjson_really_inline error_code value::get(T &out) && noexcept { - return std::forward(*this).get().get(out); -} - -#if SIMDJSON_EXCEPTIONS -simdjson_really_inline value::operator array() noexcept(false) { - return std::forward(*this).get_array(); -} -simdjson_really_inline value::operator object() noexcept(false) { - return std::forward(*this).get_object(); -} -simdjson_really_inline value::operator uint64_t() && noexcept(false) { - return std::forward(*this).get_uint64(); -} -simdjson_really_inline value::operator uint64_t() & noexcept(false) { - return std::forward(*this).get_uint64(); -} -simdjson_really_inline value::operator int64_t() && noexcept(false) { - return std::forward(*this).get_int64(); -} -simdjson_really_inline value::operator int64_t() & noexcept(false) { - return std::forward(*this).get_int64(); -} -simdjson_really_inline value::operator double() && noexcept(false) { - return std::forward(*this).get_double(); -} -simdjson_really_inline value::operator double() & noexcept(false) { - return std::forward(*this).get_double(); -} -simdjson_really_inline value::operator std::string_view() && noexcept(false) { - return std::forward(*this).get_string(); -} -simdjson_really_inline value::operator std::string_view() & noexcept(false) { - return std::forward(*this).get_string(); -} -simdjson_really_inline value::operator raw_json_string() && noexcept(false) { - return std::forward(*this).get_raw_json_string(); -} -simdjson_really_inline value::operator raw_json_string() & noexcept(false) { - return std::forward(*this).get_raw_json_string(); -} -simdjson_really_inline value::operator bool() && noexcept(false) { - return std::forward(*this).get_bool(); -} -simdjson_really_inline value::operator bool() & noexcept(false) { - return std::forward(*this).get_bool(); -} -#endif - -simdjson_really_inline simdjson_result> value::begin() & noexcept { - return array_iterator::start(*this, json); -} -simdjson_really_inline simdjson_result> value::end() & noexcept { - return {}; -} - -simdjson_really_inline void value::log_value(const char *type) const noexcept { - char json_char[]{char(json[0]), '\0'}; - logger::log_value(*iter, type, json_char); -} -simdjson_really_inline void value::log_error(const char *message) const noexcept { - char json_char[]{char(json[0]), '\0'}; - logger::log_error(*iter, message, json_char); -} - -// -// For array_iterator -// -simdjson_really_inline json_iterator &value::get_iterator() noexcept { - return *iter; -} -simdjson_really_inline json_iterator_ref value::borrow_iterator() noexcept { - return iter.borrow(); -} -simdjson_really_inline bool value::is_iterator_alive() const noexcept { - return iter.is_alive(); -} -simdjson_really_inline void value::iteration_finished() noexcept { - iter.release(); -} - -} // namespace ondemand -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -simdjson_really_inline simdjson_result::simdjson_result( - SIMDJSON_IMPLEMENTATION::ondemand::value &&value -) noexcept : - implementation_simdjson_result_base( - std::forward(value) - ) -{ -} -simdjson_really_inline simdjson_result::simdjson_result( - error_code error -) noexcept : - implementation_simdjson_result_base(error) -{ -} - -simdjson_really_inline simdjson_result> simdjson_result::begin() & noexcept { - if (error()) { return error(); } - return first.begin(); -} -simdjson_really_inline simdjson_result> simdjson_result::end() & noexcept { - if (error()) { return error(); } - return {}; -} - -simdjson_really_inline simdjson_result simdjson_result::get_array() noexcept { - if (error()) { return error(); } - return std::forward(first).get_array(); -} -simdjson_really_inline simdjson_result simdjson_result::get_object() noexcept { - if (error()) { return error(); } - return std::forward(first).get_object(); -} -simdjson_really_inline simdjson_result simdjson_result::get_uint64() && noexcept { - if (error()) { return error(); } - return std::forward(first).get_uint64(); -} -simdjson_really_inline simdjson_result simdjson_result::get_uint64() & noexcept { - if (error()) { return error(); } - return std::forward(first).get_uint64(); -} -simdjson_really_inline simdjson_result simdjson_result::get_int64() && noexcept { - if (error()) { return error(); } - return std::forward(first).get_int64(); -} -simdjson_really_inline simdjson_result simdjson_result::get_int64() & noexcept { - if (error()) { return error(); } - return std::forward(first).get_int64(); -} -simdjson_really_inline simdjson_result simdjson_result::get_double() && noexcept { - if (error()) { return error(); } - return std::forward(first).get_double(); -} -simdjson_really_inline simdjson_result simdjson_result::get_double() & noexcept { - if (error()) { return error(); } - return std::forward(first).get_double(); -} -simdjson_really_inline simdjson_result simdjson_result::get_string() && noexcept { - if (error()) { return error(); } - return std::forward(first).get_string(); -} -simdjson_really_inline simdjson_result simdjson_result::get_string() & noexcept { - if (error()) { return error(); } - return std::forward(first).get_string(); -} -simdjson_really_inline simdjson_result simdjson_result::get_raw_json_string() && noexcept { - if (error()) { return error(); } - return std::forward(first).get_raw_json_string(); -} -simdjson_really_inline simdjson_result simdjson_result::get_raw_json_string() & noexcept { - if (error()) { return error(); } - return std::forward(first).get_raw_json_string(); -} -simdjson_really_inline simdjson_result simdjson_result::get_bool() && noexcept { - if (error()) { return error(); } - return std::forward(first).get_bool(); -} -simdjson_really_inline simdjson_result simdjson_result::get_bool() & noexcept { - if (error()) { return error(); } - return std::forward(first).get_bool(); -} -simdjson_really_inline bool simdjson_result::is_null() && noexcept { - if (error()) { return false; } - return std::forward(first).is_null(); -} -simdjson_really_inline bool simdjson_result::is_null() & noexcept { - if (error()) { return false; } - return std::forward(first).is_null(); -} - -template simdjson_really_inline simdjson_result simdjson_result::get() & noexcept { - if (error()) { return error(); } - return first.get(); -} -template simdjson_really_inline simdjson_result simdjson_result::get() && noexcept { - if (error()) { return error(); } - return std::forward(first).get(); -} -template simdjson_really_inline error_code simdjson_result::get(T &out) & noexcept { - if (error()) { return error(); } - return first.get(out); -} -template simdjson_really_inline error_code simdjson_result::get(T &out) && noexcept { - if (error()) { return error(); } - return std::forward(first).get(out); -} - -template<> simdjson_really_inline simdjson_result simdjson_result::get() & noexcept = delete; -template<> simdjson_really_inline simdjson_result simdjson_result::get() && noexcept { - if (error()) { return error(); } - return std::forward(first); -} -template<> simdjson_really_inline error_code simdjson_result::get(SIMDJSON_IMPLEMENTATION::ondemand::value &out) & noexcept = delete; -template<> simdjson_really_inline error_code simdjson_result::get(SIMDJSON_IMPLEMENTATION::ondemand::value &out) && noexcept { - if (error()) { return error(); } - out = std::forward(first); - return SUCCESS; -} - -#if SIMDJSON_EXCEPTIONS -simdjson_really_inline simdjson_result::operator SIMDJSON_IMPLEMENTATION::ondemand::array() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return std::forward(first); -} -simdjson_really_inline simdjson_result::operator SIMDJSON_IMPLEMENTATION::ondemand::object() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return std::forward(first); -} -simdjson_really_inline simdjson_result::operator uint64_t() && noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return std::forward(first); -} -simdjson_really_inline simdjson_result::operator uint64_t() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return std::forward(first); -} -simdjson_really_inline simdjson_result::operator int64_t() && noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return std::forward(first); -} -simdjson_really_inline simdjson_result::operator int64_t() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return std::forward(first); -} -simdjson_really_inline simdjson_result::operator double() && noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return std::forward(first); -} -simdjson_really_inline simdjson_result::operator double() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return std::forward(first); -} -simdjson_really_inline simdjson_result::operator std::string_view() && noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return std::forward(first); -} -simdjson_really_inline simdjson_result::operator std::string_view() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return std::forward(first); -} -simdjson_really_inline simdjson_result::operator SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string() && noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return std::forward(first); -} -simdjson_really_inline simdjson_result::operator SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return std::forward(first); -} -simdjson_really_inline simdjson_result::operator bool() && noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return std::forward(first); -} -simdjson_really_inline simdjson_result::operator bool() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return std::forward(first); -} -#endif - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/value-inl.h */ -/* begin file include/simdjson/generic/ondemand/field-inl.h */ -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { -namespace ondemand { - -// clang 6 doesn't think the default constructor can be noexcept, so we make it explicit -simdjson_really_inline field::field() noexcept : std::pair() {} - -simdjson_really_inline field::field(raw_json_string key, ondemand::value &&value) noexcept - : std::pair(key, std::forward(value)) -{ -} - -simdjson_really_inline simdjson_result field::start(json_iterator_ref &parent_iter) noexcept { - raw_json_string key; - SIMDJSON_TRY( parent_iter->field_key().get(key) ); - SIMDJSON_TRY( parent_iter->field_value() ); - return field::start(parent_iter.borrow(), key); -} - -simdjson_really_inline simdjson_result field::start(json_iterator_ref &&iter, raw_json_string key) noexcept { - return field(key, value::start(std::forward(iter))); -} - -simdjson_really_inline simdjson_warn_unused simdjson_result field::unescaped_key() noexcept { - SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. - simdjson_result answer = first.unescape(second.get_iterator()); - first.consume(); - return answer; -} - -simdjson_really_inline raw_json_string field::key() const noexcept { - SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. - return first; -} - -simdjson_really_inline value &field::value() & noexcept { - return second; -} - -simdjson_really_inline value field::value() && noexcept { - return std::forward(*this).second; -} - -} // namespace ondemand -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -simdjson_really_inline simdjson_result::simdjson_result( - SIMDJSON_IMPLEMENTATION::ondemand::field &&value -) noexcept : - implementation_simdjson_result_base( - std::forward(value) - ) -{ -} -simdjson_really_inline simdjson_result::simdjson_result( - error_code error -) noexcept : - implementation_simdjson_result_base(error) -{ -} - -simdjson_really_inline simdjson_result simdjson_result::key() noexcept { - if (error()) { return error(); } - return first.key(); -} -simdjson_really_inline simdjson_result simdjson_result::unescaped_key() noexcept { - if (error()) { return error(); } - return first.unescaped_key(); -} -simdjson_really_inline simdjson_result simdjson_result::value() noexcept { - if (error()) { return error(); } - return std::move(first.value()); -} - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/field-inl.h */ -/* begin file include/simdjson/generic/ondemand/object-inl.h */ -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { -namespace ondemand { - -// -// ### Live States -// -// While iterating or looking up values, depth >= iter->depth. at_start may vary. Error is -// always SUCCESS: -// -// - Start: This is the state when the object is first found and the iterator is just past the {. -// In this state, at_start == true. -// - Next: After we hand a scalar value to the user, or an array/object which they then fully -// iterate over, the iterator is at the , or } before the next value. In this state, -// depth == iter->depth, at_start == false, and error == SUCCESS. -// - Unfinished Business: When we hand an array/object to the user which they do not fully -// iterate over, we need to finish that iteration by skipping child values until we reach the -// Next state. In this state, depth > iter->depth, at_start == false, and error == SUCCESS. -// -// ## Error States -// -// In error states, we will yield exactly one more value before stopping. iter->depth == depth -// and at_start is always false. We decrement after yielding the error, moving to the Finished -// state. -// -// - Chained Error: When the object iterator is part of an error chain--for example, in -// `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an -// object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and -// iter->depth == depth, and at_start == false. We decrement depth when we yield the error. -// - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields, -// we flag that as an error and treat it exactly the same as a Chained Error. In this state, -// error == TAPE_ERROR, iter->depth == depth, and at_start == false. -// -// Errors that occur while reading a field to give to the user (such as when the key is not a -// string or the field is missing a colon) are yielded immediately. Depth is then decremented, -// moving to the Finished state without transitioning through an Error state at all. -// -// ## Terminal State -// -// The terminal state has iter->depth < depth. at_start is always false. -// -// - Finished: When we have reached a }, we are finished. We signal this by decrementing depth. -// In this state, iter->depth < depth, at_start == false, and error == SUCCESS. -// - -simdjson_really_inline object::object(json_iterator_ref &&_iter) noexcept - : iter{std::forward(_iter)}, - at_start{iter.is_alive()} -{ -} - - -simdjson_really_inline object::~object() noexcept { - if (iter.is_alive()) { - logger::log_event(*iter, "unfinished", "object"); - simdjson_unused auto _err = iter->skip_container(); - iter.release(); - } -} - -simdjson_really_inline error_code object::find_field(const std::string_view key) noexcept { - if (!iter.is_alive()) { return NO_SUCH_FIELD; } - - // Unless this is the first field, we need to advance past the , and check for } - error_code error; - bool has_value; - if (at_start) { - at_start = false; - has_value = true; - } else { - if ((error = iter->has_next_field().get(has_value) )) { iter.release(); return error; } - } - while (has_value) { - // Get the key - raw_json_string actual_key; - if ((error = iter->field_key().get(actual_key) )) { iter.release(); return error; }; - if ((error = iter->field_value() )) { iter.release(); return error; } - - // Check if it matches - if (actual_key == key) { - logger::log_event(*iter, "match", key, -2); - return SUCCESS; - } - logger::log_event(*iter, "no match", key, -2); - SIMDJSON_TRY( iter->skip() ); // Skip the value entirely - if ((error = iter->has_next_field().get(has_value) )) { iter.release(); return error; } - } - - // If the loop ended, we're out of fields to look at. - iter.release(); - return NO_SUCH_FIELD; -} - -simdjson_really_inline simdjson_result object::operator[](const std::string_view key) & noexcept { - SIMDJSON_TRY( find_field(key) ); - return value::start(iter.borrow()); -} - -simdjson_really_inline simdjson_result object::operator[](const std::string_view key) && noexcept { - SIMDJSON_TRY( find_field(key) ); - return value::start(std::forward(iter)); -} - -simdjson_really_inline simdjson_result object::start(json_iterator_ref &&iter) noexcept { - bool has_value; - SIMDJSON_TRY( iter->start_object().get(has_value) ); - if (!has_value) { iter.release(); } - return object(std::forward(iter)); -} -simdjson_really_inline object object::started(json_iterator_ref &&iter) noexcept { - if (!iter->started_object()) { iter.release(); } - return object(std::forward(iter)); -} -simdjson_really_inline object_iterator object::begin() noexcept { - if (at_start) { - iter.assert_is_active(); - } else { - iter.assert_is_not_active(); - } - at_start = false; - return iter; -} -simdjson_really_inline object_iterator object::end() noexcept { - return {}; -} - -} // namespace ondemand -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::object &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} - -simdjson_really_inline simdjson_result simdjson_result::begin() noexcept { - if (error()) { return error(); } - return first.begin(); -} -simdjson_really_inline simdjson_result simdjson_result::end() noexcept { - if (error()) { return error(); } - return first.end(); -} -simdjson_really_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { - if (error()) { return error(); } - return first[key]; -} -simdjson_really_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { - if (error()) { return error(); } - return std::forward(first)[key]; -} - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/object-inl.h */ -/* begin file include/simdjson/generic/ondemand/parser-inl.h */ -namespace simdjson { -namespace SIMDJSON_IMPLEMENTATION { -namespace ondemand { - -simdjson_warn_unused simdjson_really_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { - if (string_buf && new_capacity == _capacity && new_max_depth == _max_depth) { return SUCCESS; } - - // string_capacity copied from document::allocate - _capacity = 0; - _max_depth = 0; - size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); - string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); - SIMDJSON_TRY( dom_parser.set_capacity(new_capacity) ); - SIMDJSON_TRY( dom_parser.set_max_depth(DEFAULT_MAX_DEPTH) ); - _capacity = new_capacity; - _max_depth = new_max_depth; - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate(const padded_string &buf) & noexcept { - // Allocate if needed - if (_capacity < buf.size() || !string_buf) { - SIMDJSON_TRY( allocate(buf.size(), _max_depth) ); - } - - // Run stage 1. - SIMDJSON_TRY( dom_parser.stage1((const uint8_t *)buf.data(), buf.size(), false) ); - return document::start(this); -} - -simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate_raw(const padded_string &buf) & noexcept { - // Allocate if needed - if (_capacity < buf.size()) { - SIMDJSON_TRY( allocate(buf.size(), _max_depth) ); - } - - // Run stage 1. - SIMDJSON_TRY( dom_parser.stage1((const uint8_t *)buf.data(), buf.size(), false) ); - return json_iterator(this); -} - -} // namespace ondemand -} // namespace SIMDJSON_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::parser &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/parser-inl.h */ -/* end file include/simdjson/generic/ondemand-inl.h */ -/* begin file include/simdjson/ppc64/end.h */ -#undef SIMDJSON_IMPLEMENTATION -/* end file include/simdjson/ppc64/end.h */ - -#endif // SIMDJSON_IMPLEMENTATION_PPC64 - -#endif // SIMDJSON_PPC64_H -/* end file include/simdjson/ppc64.h */ +/* end file include/simdjson/westmere/end.h */ /* begin file include/simdjson/fallback.h */ #ifndef SIMDJSON_FALLBACK_H #define SIMDJSON_FALLBACK_H @@ -32106,7 +26150,7 @@ simdjson_really_inline simdjson_result> 8; - val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; - return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); + uint32_t result = 0; + for (int i=0;i<8;i++) { + result = result*10 + (chars[i] - '0'); + } + return result; } static simdjson_really_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { return parse_eight_digits_unrolled((const char *)chars); @@ -32705,13 +26748,13 @@ namespace numberparsing { namespace { // Convert a mantissa, an exponent and a sign bit into an ieee64 double. // The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). -// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. simdjson_really_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { double d; mantissa &= ~(1ULL << 52); mantissa |= real_exponent << 52; mantissa |= (((uint64_t)negative) << 63); - std::memcpy(&d, &mantissa, sizeof(d)); + memcpy(&d, &mantissa, sizeof(d)); return d; } } @@ -32829,7 +26872,7 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg // We want the most significant 64 bits of the product. We know // this will be non-zero because the most significant bit of i is // 1. - const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) // // The full_multiplication function computes the 128-bit product of two 64-bit words @@ -32838,7 +26881,7 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg // to the 64-bit most significant bits of the product. simdjson::internal::value128 firstproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index]); // Both i and power_of_five_128[index] have their most significant bit set to 1 which - // implies that the either the most or the second most significant bit of the product + // implies that the either the most or the second most significant bit of the product // is 1. We pack values in this manner for efficiency reasons: it maximizes the use // we make of the product. It also makes it easy to reason aboutthe product: there // 0 or 1 leading zero in the product. @@ -32853,17 +26896,17 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing // the full computation is wasteful. So we do what is called a "truncated // multiplication". - // We take the most significant 64-bits, and we put them in + // We take the most significant 64-bits, and we put them in // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q - // to the desired approximation using one multiplication. Sometimes it does not suffice. + // to the desired approximation using one multiplication. Sometimes it does not suffice. // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and // then we get a better approximation to i * 5^q. In very rare cases, even that // will not suffice, though it is seemingly very hard to find such a scenario. - // + // // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat // more complicated. // - // There is an extra layer of complexity in that we need more than 55 bits of + // There is an extra layer of complexity in that we need more than 55 bits of // accuracy in the round-to-even scenario. // // The full_multiplication function computes the 128-bit product of two 64-bit words @@ -32896,7 +26939,7 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. d = 0.0; return true; - } + } // next line is safe because -real_exponent + 1 < 0 mantissa >>= -real_exponent + 1; // Thankfully, we can't have both "round-to-even" and subnormals because @@ -32909,7 +26952,7 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer // subnormal, but we can only know this after rounding. - // So we only declare a subnormal if we are smaller than the threshold. + // So we only declare a subnormal if we are smaller than the threshold. real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; d = to_double(mantissa, real_exponent, negative); return true; @@ -32919,7 +26962,7 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg // which we guard against. // If we have lots of trailing zeros, we may fall right between two // floating-point values. - // + // // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] // times a power of two. That is, it is right between a number with binary significand // m and another number with binary significand m+1; and it must be the case @@ -32930,11 +26973,11 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so - // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have // 2^{53} x 5^{-q} < 2^{64}. - // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. // - // We require lower <= 1 and not lower == 0 because we could not prove that + // We require lower <= 1 and not lower == 0 because we could not prove that // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { @@ -32987,7 +27030,7 @@ simdjson_really_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { // this can read up to 7 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); - std::memcpy(&val, chars, 8); + memcpy(&val, chars, 8); // a branchy method might be faster: // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == @@ -33142,7 +27185,7 @@ simdjson_really_inline error_code write_float(const uint8_t *const src, bool neg // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero // so something x 10^-343 goes to zero, but not so with something x 10^-342. static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); - // + // if((exponent < simdjson::internal::smallest_power) || (i == 0)) { WRITE_DOUBLE(0, src, writer); return SUCCESS; @@ -33414,12 +27457,11 @@ simdjson_unused simdjson_really_inline simdjson_result parse_double(cons if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } exponent += exp_neg ? 0-exp : exp; + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; } if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; - // // Assemble (or slow-parse) the float // @@ -33442,7 +27484,7 @@ simdjson_unused simdjson_really_inline simdjson_result parse_double(cons /* end file include/simdjson/generic/numberparsing.h */ #endif // SIMDJSON_FALLBACK_NUMBERPARSING_H -/* end file include/simdjson/fallback/numberparsing.h */ +/* end file include/simdjson/generic/numberparsing.h */ /* begin file include/simdjson/generic/implementation_simdjson_result_base.h */ namespace simdjson { namespace SIMDJSON_IMPLEMENTATION { @@ -33572,7 +27614,7 @@ namespace simdjson { namespace SIMDJSON_IMPLEMENTATION { /** * A fast, simple, DOM-like interface that parses JSON as you use it. - * + * * Designed for maximum speed and a lower memory profile. */ namespace ondemand { @@ -33623,11 +27665,11 @@ class parser; * * (In other words, a pointer to the beginning of a string, just after the start quote, inside a * JSON file.) - * + * * This class is deliberately simplistic and has little functionality. You can * compare two raw_json_string instances, or compare a raw_json_string with a string_view, but * that is pretty much all you can do. - * + * * They originate typically from field instance which in turn represent key-value pairs from * object instances. From a field instance, you get the raw_json_string instance by calling key(). * You can, if you want a more usable string_view instance, call the unescaped_key() method @@ -33637,7 +27679,7 @@ class raw_json_string { public: /** * Create a new invalid raw_json_string. - * + * * Exists so you can declare a variable and later assign to it before use. */ simdjson_really_inline raw_json_string() noexcept = default; @@ -33647,15 +27689,15 @@ public: /** * Create a new invalid raw_json_string pointed at the given location in the JSON. - * + * * The given location must be just *after* the beginning quote (") in the JSON file. - * + * * It *must* be terminated by a ", and be a valid JSON string. */ simdjson_really_inline raw_json_string(const uint8_t * _buf) noexcept; /** * Get the raw pointer to the beginning of the string in the JSON (just after the "). - * + * * It is possible for this function to return a null pointer if the instance * has outlived its existence. */ @@ -33675,11 +27717,11 @@ private: /** * Unescape this JSON string, replacing \\ with \, \n with newline, etc. - * + * * ## IMPORTANT: string_view lifetime - * + * * The string_view is only valid as long as the bytes in dst. - * + * * @param dst A pointer to a buffer at least large enough to write this string as well as a \0. * dst will be updated to the next unused location (just after the \0 written out at * the end of this string). @@ -33689,11 +27731,11 @@ private: simdjson_really_inline simdjson_warn_unused simdjson_result unescape(uint8_t *&dst) const noexcept; /** * Unescape this JSON string, replacing \\ with \, \n with newline, etc. - * + * * ## IMPORTANT: string_view lifetime - * + * * The string_view is only valid until the next parse() call on the parser. - * + * * @param iter A json_iterator, which contains a buffer where the string will be written. */ simdjson_really_inline simdjson_warn_unused simdjson_result unescape(json_iterator &iter) const noexcept; @@ -33749,7 +27791,7 @@ class token_iterator { public: /** * Create a new invalid token_iterator. - * + * * Exists so you can declare a variable and later assign to it before use. */ simdjson_really_inline token_iterator() noexcept = default; @@ -33849,7 +27891,7 @@ class json_iterator_ref; /** * Iterates through JSON, with structure-sensitive algorithms. - * + * * @private This is not intended for external use. */ class json_iterator : public token_iterator { @@ -33885,17 +27927,17 @@ public: * Start an object iteration after the user has already checked and moved past the {. * * Does not move the iterator. - * + * * @returns Whether the object had any fields (returns false for empty). */ simdjson_warn_unused simdjson_really_inline bool started_object() noexcept; /** * Moves to the next field in an object. - * + * * Looks for , and }. If } is found, the object is finished and the iterator advances past it. * Otherwise, it advances to the next value. - * + * * @return whether there is another field in the object. * @error TAPE_ERROR If there is a comma missing between fields. */ @@ -33915,7 +27957,7 @@ public: * Find the next field with the given key. * * Assumes you have called next_field() or otherwise matched the previous value. - * + * * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may * fail to match some keys with escapes (\u, \n, etc.). @@ -33949,10 +27991,10 @@ public: /** * Moves to the next element in an array. - * + * * Looks for , and ]. If ] is found, the array is finished and the iterator advances past it. * Otherwise, it advances to the next value. - * + * * @return Whether there is another element in the array. * @error TAPE_ERROR If there is a comma missing between elements. */ @@ -33991,7 +28033,7 @@ public: /** * Skips to the end of a JSON object or array. - * + * * @return true if this was the end of an array, false if it was the end of an object. */ simdjson_warn_unused simdjson_really_inline error_code skip_container() noexcept; @@ -34013,7 +28055,7 @@ public: /** * Report an error, preventing further iteration. - * + * * @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD. * @param message An error message to report with the error. */ @@ -34028,13 +28070,13 @@ protected: ondemand::parser *parser{}; /** * Next free location in the string buffer. - * + * * Used by raw_json_string::unescape() to have a place to unescape strings to. */ uint8_t *current_string_buf_loc{}; /** * JSON error, if there is one. - * + * * INCORRECT_TYPE and NO_SUCH_FIELD are *not* stored here, ever. * * PERF NOTE: we *hope* this will be elided into control flow, as it is only used (a) in the first @@ -34145,7 +28187,7 @@ class document; /** * A forward-only JSON array. - * + * * This is an input_iterator, meaning: * - It is forward-only * - * must be called exactly once per element. @@ -34165,7 +28207,7 @@ public: /** * Get the current element. - * + * * Part of the std::iterator interface. */ simdjson_really_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. @@ -34173,7 +28215,7 @@ public: * Check if we are at the end of the JSON. * * Part of the std::iterator interface. - * + * * @return true if there are no more elements in the JSON array. */ simdjson_really_inline bool operator==(const array_iterator &) noexcept; @@ -34181,13 +28223,13 @@ public: * Check if there are more elements in the JSON array. * * Part of the std::iterator interface. - * + * * @return true if there are more elements in the JSON array. */ simdjson_really_inline bool operator!=(const array_iterator &) noexcept; /** * Move to the next element. - * + * * Part of the std::iterator interface. */ simdjson_really_inline array_iterator &operator++() noexcept; @@ -34245,7 +28287,7 @@ class object_iterator { public: /** * Create a new invalid object_iterator. - * + * * Exists so you can declare a variable and later assign to it before use. */ simdjson_really_inline object_iterator() noexcept = default; @@ -34321,7 +28363,7 @@ class array { public: /** * Create a new invalid array. - * + * * Exists so you can declare a variable and later assign to it before use. */ simdjson_really_inline array() noexcept = default; @@ -34359,7 +28401,7 @@ protected: static simdjson_really_inline simdjson_result start(json_iterator_ref &&iter) noexcept; /** * Begin array iteration. - * + * * This version of the method should be called after the initial [ has been verified, and is * intended for use by switch statements that check the type of a value. * @@ -34386,7 +28428,7 @@ protected: /** * Iterator marking current position. - * + * * iter.is_alive() == false indicates iteration is complete. */ json_iterator_ref iter{}; @@ -34446,7 +28488,7 @@ public: /** * Create a new invalid document. - * + * * Exists so you can declare a variable and later assign to it before use. */ simdjson_really_inline document() noexcept = default; @@ -34494,7 +28536,7 @@ public: simdjson_really_inline simdjson_result get_double() noexcept; /** * Cast this JSON value to a string. - * + * * The string is guaranteed to be valid UTF-8. * * Equivalent to get(). @@ -34506,7 +28548,7 @@ public: simdjson_really_inline simdjson_result get_string() & noexcept; /** * Cast this JSON value to a raw_json_string. - * + * * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). * * @returns A pointer to the raw JSON for the given string. @@ -34522,7 +28564,7 @@ public: simdjson_really_inline simdjson_result get_bool() noexcept; /** * Checks if this JSON value is null. - * + * * @returns Whether the value is null. */ simdjson_really_inline bool is_null() noexcept; @@ -34531,7 +28573,7 @@ public: * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool - * + * * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ @@ -34543,7 +28585,7 @@ public: * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool - * + * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. * @returns INCORRECT_TYPE If the JSON value is not an object. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. @@ -34590,7 +28632,7 @@ public: simdjson_really_inline operator double() noexcept(false); /** * Cast this JSON value to a string. - * + * * The string is guaranteed to be valid UTF-8. * * Equivalent to get(). @@ -34602,7 +28644,7 @@ public: simdjson_really_inline operator std::string_view() & noexcept(false); /** * Cast this JSON value to a raw_json_string. - * + * * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). * * @returns A pointer to the raw JSON for the given string. @@ -34636,7 +28678,7 @@ public: * * This method may only be called once on a given value. If you want to look up multiple fields, * you must first get the object using value.get_object() or object(value). - * + * * @param key The key to look up. * @returns INCORRECT_TYPE If the JSON value is not an array. */ @@ -34646,7 +28688,7 @@ public: * * This method may only be called once on a given value. If you want to look up multiple fields, * you must first get the object using value.get_object() or object(value). - * + * * @param key The key to look up. * @returns INCORRECT_TYPE If the JSON value is not an array. */ @@ -34660,7 +28702,7 @@ protected: static simdjson_really_inline document start(ondemand::json_iterator &&iter) noexcept; /** * Set json to null if the result is successful. - * + * * Convenience function for value-getters. */ template @@ -34762,7 +28804,7 @@ class value { public: /** * Create a new invalid value. - * + * * Exists so you can declare a variable and later assign to it before use. */ simdjson_really_inline value() noexcept = default; @@ -34781,7 +28823,7 @@ public: * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool - * + * * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ @@ -34793,7 +28835,7 @@ public: * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool - * + * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. * @returns INCORRECT_TYPE If the JSON value is not an object. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. @@ -34854,7 +28896,7 @@ public: /** * Cast this JSON value to a string. - * + * * The string is guaranteed to be valid UTF-8. * * Equivalent to get(). @@ -34869,7 +28911,7 @@ public: /** * Cast this JSON value to a raw_json_string. - * + * * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). * * @returns A pointer to the raw JSON for the given string. @@ -34891,7 +28933,7 @@ public: /** * Checks if this JSON value is null. - * + * * @returns Whether the value is null. */ simdjson_really_inline bool is_null() && noexcept; @@ -34942,7 +28984,7 @@ public: simdjson_really_inline operator double() & noexcept(false); /** * Cast this JSON value to a string. - * + * * The string is guaranteed to be valid UTF-8. * * Equivalent to get(). @@ -34956,7 +28998,7 @@ public: simdjson_really_inline operator std::string_view() & noexcept(false); /** * Cast this JSON value to a raw_json_string. - * + * * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). * * @returns A pointer to the raw JSON for the given string. @@ -34980,7 +29022,7 @@ public: * Begin array iteration. * * Part of the std::iterable interface. - * + * * @returns INCORRECT_TYPE If the JSON value is not an array. */ simdjson_really_inline simdjson_result> begin() & noexcept; @@ -35117,16 +29159,16 @@ namespace ondemand { /** * A JSON field (key/value pair) in an object. - * + * * Returned from object iteration. - * + * * Extends from std::pair so you can use C++ algorithms that rely on pairs. */ class field : public std::pair { public: /** * Create a new invalid field. - * + * * Exists so you can declare a variable and later assign to it before use. */ simdjson_really_inline field() noexcept; @@ -35140,7 +29182,7 @@ public: * Get the key as a string_view (for higher speed, consider raw_key). * We deliberately use a more cumbersome name (unescaped_key) to force users * to think twice about using it. - * + * * This consumes the key: once you have called unescaped_key(), you cannot * call it again nor can you call key(). */ @@ -35203,7 +29245,7 @@ class object { public: /** * Create a new invalid object. - * + * * Exists so you can declare a variable and later assign to it before use. */ simdjson_really_inline object() noexcept = default; @@ -35249,7 +29291,7 @@ protected: json_iterator_ref iter{}; /** * Whether we are at the start. - * + * * PERF NOTE: this should be elided into inline control flow: it is only used for the first [] * or * call, and SSA optimizers commonly do first-iteration loop optimization. */ @@ -35318,17 +29360,17 @@ public: /** * Start iterating an on-demand JSON document. - * + * * ondemand::parser parser; * document doc = parser.iterate(json); - * + * * ### IMPORTANT: Buffer Lifetime - * + * * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as * long as the document iteration. - * + * * ### IMPORTANT: Document Lifetime - * + * * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before * you call parse() again or destroy the parser. @@ -35339,7 +29381,7 @@ public: * those bytes are initialized to, as long as they are allocated. * * @param json The JSON to parse. - * + * * @return The document, or an error: * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory * allocation fails. @@ -35352,19 +29394,19 @@ public: simdjson_warn_unused simdjson_result iterate(const std::string &json) & noexcept = delete; /** * @private - * + * * Start iterating an on-demand JSON document. - * + * * ondemand::parser parser; * json_iterator doc = parser.iterate(json); - * + * * ### IMPORTANT: Buffer Lifetime - * + * * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as * long as the document iteration. - * + * * ### IMPORTANT: Document Lifetime - * + * * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before * you call parse() again or destroy the parser. @@ -35375,7 +29417,7 @@ public: * those bytes are initialized to, as long as they are allocated. * * @param json The JSON to parse. - * + * * @return The iterator, or an error: * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory * allocation fails. @@ -35424,7 +29466,7 @@ public: } // namespace simdjson /* end file include/simdjson/generic/ondemand/parser.h */ -/* end file include/simdjson/generic/ondemand.h */ +/* end file include/simdjson/generic/ondemand/parser.h */ // Inline definitions /* begin file include/simdjson/generic/implementation_simdjson_result_base-inl.h */ @@ -35844,7 +29886,7 @@ simdjson_warn_unused simdjson_really_inline bool json_iterator::started_array() advance(); return false; } - logger::log_start_value(*this, "array"); + logger::log_start_value(*this, "array"); return true; } @@ -35931,7 +29973,7 @@ simdjson_warn_unused simdjson_really_inline bool json_iterator::copy_to_buffer(c } // Copy to the buffer. - std::memcpy(tmpbuf, json, len); + memcpy(tmpbuf, json, len); tmpbuf[len] = ' '; return true; } @@ -35950,7 +29992,7 @@ simdjson_warn_unused simdjson_result json_iterator::consume_root_uint6 return parse_root_uint64(advance()); } simdjson_warn_unused simdjson_result json_iterator::parse_root_int64(const uint8_t *json) noexcept { - uint8_t tmpbuf[20+1]; // -<19 digits> is the longest possible integer + uint8_t tmpbuf[20+1]; // -<19 digits> is the longest possible integer if (!copy_to_buffer(json, tmpbuf)) { logger::log_error(*this, "Root number more than 20 characters"); return NUMBER_ERROR; } logger::log_value(*this, "int64", ""); auto result = numberparsing::parse_integer(tmpbuf); @@ -36362,7 +30404,7 @@ namespace ondemand { // Next state. In this state, depth > iter->depth, at_start == false, and error == SUCCESS. // // ## Error States -// +// // In error states, we will yield exactly one more value before stopping. iter->depth == depth // and at_start is always false. We decrement after yielding the error, moving to the Finished // state. @@ -37232,7 +31274,7 @@ namespace ondemand { // Next state. In this state, depth > iter->depth, at_start == false, and error == SUCCESS. // // ## Error States -// +// // In error states, we will yield exactly one more value before stopping. iter->depth == depth // and at_start is always false. We decrement after yielding the error, moving to the Finished // state. @@ -37395,7 +31437,7 @@ simdjson_warn_unused simdjson_really_inline simdjson_result parser::it } // Run stage 1. - SIMDJSON_TRY( dom_parser.stage1((const uint8_t *)buf.data(), buf.size(), false) ); + SIMDJSON_TRY( dom_parser.stage1((const uint8_t *)buf.data(), buf.size(), false) ); return document::start(this); } @@ -37406,7 +31448,7 @@ simdjson_warn_unused simdjson_really_inline simdjson_result parse } // Run stage 1. - SIMDJSON_TRY( dom_parser.stage1((const uint8_t *)buf.data(), buf.size(), false) ); + SIMDJSON_TRY( dom_parser.stage1((const uint8_t *)buf.data(), buf.size(), false) ); return json_iterator(this); } @@ -37423,7 +31465,7 @@ simdjson_really_inline simdjson_result