/* auto-generated on Fri Aug 23 10:23:28 DST 2019. Do not edit! */ #include "simdjson.h" /* used for http://dmalloc.com/ Dmalloc - Debug Malloc Library */ #ifdef DMALLOC #include "dmalloc.h" #endif /* begin file src/simdjson.cpp */ #include namespace simdjson { const std::map error_strings = { {SUCCESS, "No errors"}, {CAPACITY, "This ParsedJson can't support a document that big"}, {MEMALLOC, "Error allocating memory, we're most likely out of memory"}, {TAPE_ERROR, "Something went wrong while writing to the tape"}, {STRING_ERROR, "Problem while parsing a string"}, {T_ATOM_ERROR, "Problem while parsing an atom starting with the letter 't'"}, {F_ATOM_ERROR, "Problem while parsing an atom starting with the letter 'f'"}, {N_ATOM_ERROR, "Problem while parsing an atom starting with the letter 'n'"}, {NUMBER_ERROR, "Problem while parsing a number"}, {UTF8_ERROR, "The input is not valid UTF-8"}, {UNITIALIZED, "Unitialized"}, {EMPTY, "Empty"}, {UNESCAPED_CHARS, "Within strings, some characters must be escapted, we " "found unescapted characters"}, {UNEXPECTED_ERROR, "Unexpected error, consider reporting this problem as " "you may have found a bug in simdjson"}, }; const std::string &error_message(const int error_code) { return error_strings.at(error_code); } } // namespace simdjson /* end file src/simdjson.cpp */ /* begin file src/jsonioutil.cpp */ #include #include namespace simdjson { char *allocate_padded_buffer(size_t length) { // we could do a simple malloc // return (char *) malloc(length + SIMDJSON_PADDING); // However, we might as well align to cache lines... size_t totalpaddedlength = length + SIMDJSON_PADDING; char *padded_buffer = aligned_malloc_char(64, totalpaddedlength); return padded_buffer; } padded_string get_corpus(const std::string &filename) { std::FILE *fp = std::fopen(filename.c_str(), "rb"); if (fp != nullptr) { std::fseek(fp, 0, SEEK_END); size_t len = std::ftell(fp); padded_string s(len); if (s.data() == nullptr) { std::fclose(fp); throw std::runtime_error("could not allocate memory"); } std::rewind(fp); size_t readb = std::fread(s.data(), 1, len, fp); std::fclose(fp); if (readb != len) { throw std::runtime_error("could not read the data"); } return s; } throw std::runtime_error("could not load corpus"); } } // namespace simdjson /* end file src/jsonioutil.cpp */ /* begin file src/jsonminifier.cpp */ #include #ifndef __AVX2__ namespace simdjson { static uint8_t jump_table[256 * 3] = { 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, }; size_t json_minify(const unsigned char *bytes, size_t how_many, unsigned char *out) { size_t i = 0, pos = 0; uint8_t quote = 0; uint8_t nonescape = 1; while (i < how_many) { unsigned char c = bytes[i]; uint8_t *meta = jump_table + 3 * c; quote = quote ^ (meta[0] & nonescape); out[pos] = c; pos += meta[2] | quote; i += 1; nonescape = (~nonescape) | (meta[1]); } return pos; } } // namespace simdjson #else #include namespace simdjson { // some intrinsics are missing under GCC? #ifndef __clang__ #ifndef _MSC_VER static __m256i inline _mm256_loadu2_m128i(__m128i const *__addr_hi, __m128i const *__addr_lo) { __m256i __v256 = _mm256_castsi128_si256(_mm_loadu_si128(__addr_lo)); return _mm256_insertf128_si256(__v256, _mm_loadu_si128(__addr_hi), 1); } static inline void _mm256_storeu2_m128i(__m128i *__addr_hi, __m128i *__addr_lo, __m256i __a) { __m128i __v128; __v128 = _mm256_castsi256_si128(__a); _mm_storeu_si128(__addr_lo, __v128); __v128 = _mm256_extractf128_si256(__a, 1); _mm_storeu_si128(__addr_hi, __v128); } #endif #endif // a straightforward comparison of a mask against input. static uint64_t cmp_mask_against_input_mini(__m256i input_lo, __m256i input_hi, __m256i mask) { __m256i cmp_res_0 = _mm256_cmpeq_epi8(input_lo, mask); uint64_t res_0 = static_cast(_mm256_movemask_epi8(cmp_res_0)); __m256i cmp_res_1 = _mm256_cmpeq_epi8(input_hi, mask); uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1); return res_0 | (res_1 << 32); } // take input from buf and remove useless whitespace, input and output can be // the same, result is null terminated, return the string length (minus the null // termination) size_t json_minify(const uint8_t *buf, size_t len, uint8_t *out) { // Useful constant masks const uint64_t even_bits = 0x5555555555555555ULL; const uint64_t odd_bits = ~even_bits; uint8_t *initout(out); uint64_t prev_iter_ends_odd_backslash = 0ULL; // either 0 or 1, but a 64-bit value uint64_t prev_iter_inside_quote = 0ULL; // either all zeros or all ones size_t idx = 0; if (len >= 64) { size_t avx_len = len - 63; for (; idx < avx_len; idx += 64) { __m256i input_lo = _mm256_loadu_si256(reinterpret_cast(buf + idx + 0)); __m256i input_hi = _mm256_loadu_si256(reinterpret_cast(buf + idx + 32)); uint64_t bs_bits = cmp_mask_against_input_mini(input_lo, input_hi, _mm256_set1_epi8('\\')); uint64_t start_edges = bs_bits & ~(bs_bits << 1); uint64_t even_start_mask = even_bits ^ prev_iter_ends_odd_backslash; uint64_t even_starts = start_edges & even_start_mask; uint64_t odd_starts = start_edges & ~even_start_mask; uint64_t even_carries = bs_bits + even_starts; uint64_t odd_carries; bool iter_ends_odd_backslash = add_overflow(bs_bits, odd_starts, &odd_carries); odd_carries |= prev_iter_ends_odd_backslash; prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL; uint64_t even_carry_ends = even_carries & ~bs_bits; uint64_t odd_carry_ends = odd_carries & ~bs_bits; uint64_t even_start_odd_end = even_carry_ends & odd_bits; uint64_t odd_start_even_end = odd_carry_ends & even_bits; uint64_t odd_ends = even_start_odd_end | odd_start_even_end; uint64_t quote_bits = cmp_mask_against_input_mini(input_lo, input_hi, _mm256_set1_epi8('"')); quote_bits = quote_bits & ~odd_ends; uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128( _mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0)); quote_mask ^= prev_iter_inside_quote; prev_iter_inside_quote = static_cast( static_cast(quote_mask) >> 63); // might be undefined behavior, should be fully defined in C++20, // ok according to John Regher from Utah University const __m256i low_nibble_mask = _mm256_setr_epi8( // 0 9 a b c d 16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0); const __m256i high_nibble_mask = _mm256_setr_epi8( // 0 2 3 5 7 8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0, 8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0); __m256i whitespace_shufti_mask = _mm256_set1_epi8(0x18); __m256i v_lo = _mm256_and_si256( _mm256_shuffle_epi8(low_nibble_mask, input_lo), _mm256_shuffle_epi8(high_nibble_mask, _mm256_and_si256(_mm256_srli_epi32(input_lo, 4), _mm256_set1_epi8(0x7f)))); __m256i v_hi = _mm256_and_si256( _mm256_shuffle_epi8(low_nibble_mask, input_hi), _mm256_shuffle_epi8(high_nibble_mask, _mm256_and_si256(_mm256_srli_epi32(input_hi, 4), _mm256_set1_epi8(0x7f)))); __m256i tmp_ws_lo = _mm256_cmpeq_epi8( _mm256_and_si256(v_lo, whitespace_shufti_mask), _mm256_set1_epi8(0)); __m256i tmp_ws_hi = _mm256_cmpeq_epi8( _mm256_and_si256(v_hi, whitespace_shufti_mask), _mm256_set1_epi8(0)); uint64_t ws_res_0 = static_cast(_mm256_movemask_epi8(tmp_ws_lo)); uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi); uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32)); whitespace &= ~quote_mask; int mask1 = whitespace & 0xFFFF; int mask2 = (whitespace >> 16) & 0xFFFF; int mask3 = (whitespace >> 32) & 0xFFFF; int mask4 = (whitespace >> 48) & 0xFFFF; int pop1 = hamming((~whitespace) & 0xFFFF); int pop2 = hamming((~whitespace) & UINT64_C(0xFFFFFFFF)); int pop3 = hamming((~whitespace) & UINT64_C(0xFFFFFFFFFFFF)); int pop4 = hamming((~whitespace)); __m256i vmask1 = _mm256_loadu2_m128i( reinterpret_cast(mask128_epi8) + (mask2 & 0x7FFF), reinterpret_cast(mask128_epi8) + (mask1 & 0x7FFF)); __m256i vmask2 = _mm256_loadu2_m128i( reinterpret_cast(mask128_epi8) + (mask4 & 0x7FFF), reinterpret_cast(mask128_epi8) + (mask3 & 0x7FFF)); __m256i result1 = _mm256_shuffle_epi8(input_lo, vmask1); __m256i result2 = _mm256_shuffle_epi8(input_hi, vmask2); _mm256_storeu2_m128i(reinterpret_cast<__m128i *>(out + pop1), reinterpret_cast<__m128i *>(out), result1); _mm256_storeu2_m128i(reinterpret_cast<__m128i *>(out + pop3), reinterpret_cast<__m128i *>(out + pop2), result2); out += pop4; } } // we finish off the job... copying and pasting the code is not ideal here, // but it gets the job done. if (idx < len) { uint8_t buffer[64]; memset(buffer, 0, 64); memcpy(buffer, buf + idx, len - idx); __m256i input_lo = _mm256_loadu_si256(reinterpret_cast(buffer)); __m256i input_hi = _mm256_loadu_si256(reinterpret_cast(buffer + 32)); uint64_t bs_bits = cmp_mask_against_input_mini(input_lo, input_hi, _mm256_set1_epi8('\\')); uint64_t start_edges = bs_bits & ~(bs_bits << 1); uint64_t even_start_mask = even_bits ^ prev_iter_ends_odd_backslash; uint64_t even_starts = start_edges & even_start_mask; uint64_t odd_starts = start_edges & ~even_start_mask; uint64_t even_carries = bs_bits + even_starts; uint64_t odd_carries; // bool iter_ends_odd_backslash = add_overflow(bs_bits, odd_starts, &odd_carries); odd_carries |= prev_iter_ends_odd_backslash; // prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL; // // we never use it uint64_t even_carry_ends = even_carries & ~bs_bits; uint64_t odd_carry_ends = odd_carries & ~bs_bits; uint64_t even_start_odd_end = even_carry_ends & odd_bits; uint64_t odd_start_even_end = odd_carry_ends & even_bits; uint64_t odd_ends = even_start_odd_end | odd_start_even_end; uint64_t quote_bits = cmp_mask_against_input_mini(input_lo, input_hi, _mm256_set1_epi8('"')); quote_bits = quote_bits & ~odd_ends; uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128( _mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0)); quote_mask ^= prev_iter_inside_quote; // prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63);// we // don't need this anymore __m256i mask_20 = _mm256_set1_epi8(0x20); // c==32 __m256i mask_70 = _mm256_set1_epi8(0x70); // adding 0x70 does not check low 4-bits // but moves any value >= 16 above 128 __m256i lut_cntrl = _mm256_setr_epi8( 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00); __m256i tmp_ws_lo = _mm256_or_si256( _mm256_cmpeq_epi8(mask_20, input_lo), _mm256_shuffle_epi8(lut_cntrl, _mm256_adds_epu8(mask_70, input_lo))); __m256i tmp_ws_hi = _mm256_or_si256( _mm256_cmpeq_epi8(mask_20, input_hi), _mm256_shuffle_epi8(lut_cntrl, _mm256_adds_epu8(mask_70, input_hi))); uint64_t ws_res_0 = static_cast(_mm256_movemask_epi8(tmp_ws_lo)); uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi); uint64_t whitespace = (ws_res_0 | (ws_res_1 << 32)); whitespace &= ~quote_mask; if (len - idx < 64) { whitespace |= UINT64_C(0xFFFFFFFFFFFFFFFF) << (len - idx); } int mask1 = whitespace & 0xFFFF; int mask2 = (whitespace >> 16) & 0xFFFF; int mask3 = (whitespace >> 32) & 0xFFFF; int mask4 = (whitespace >> 48) & 0xFFFF; int pop1 = hamming((~whitespace) & 0xFFFF); int pop2 = hamming((~whitespace) & UINT64_C(0xFFFFFFFF)); int pop3 = hamming((~whitespace) & UINT64_C(0xFFFFFFFFFFFF)); int pop4 = hamming((~whitespace)); __m256i vmask1 = _mm256_loadu2_m128i( reinterpret_cast(mask128_epi8) + (mask2 & 0x7FFF), reinterpret_cast(mask128_epi8) + (mask1 & 0x7FFF)); __m256i vmask2 = _mm256_loadu2_m128i( reinterpret_cast(mask128_epi8) + (mask4 & 0x7FFF), reinterpret_cast(mask128_epi8) + (mask3 & 0x7FFF)); __m256i result1 = _mm256_shuffle_epi8(input_lo, vmask1); __m256i result2 = _mm256_shuffle_epi8(input_hi, vmask2); _mm256_storeu2_m128i(reinterpret_cast<__m128i *>(buffer + pop1), reinterpret_cast<__m128i *>(buffer), result1); _mm256_storeu2_m128i(reinterpret_cast<__m128i *>(buffer + pop3), reinterpret_cast<__m128i *>(buffer + pop2), result2); memcpy(out, buffer, pop4); out += pop4; } *out = '\0'; // NULL termination return out - initout; } } // namespace simdjson #endif /* end file src/jsonminifier.cpp */ /* begin file src/jsonparser.cpp */ #include namespace simdjson { // The function that users are expected to call is json_parse. // We have more than one such function because we want to support several // instruction sets. // function pointer type for json_parse using json_parse_functype = int(const uint8_t *buf, size_t len, ParsedJson &pj, bool realloc); // Pointer that holds the json_parse implementation corresponding to the // available SIMD instruction set extern std::atomic json_parse_ptr; int json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool realloc) { return json_parse_ptr.load(std::memory_order_relaxed)(buf, len, pj, realloc); } int json_parse(const char *buf, size_t len, ParsedJson &pj, bool realloc) { return json_parse_ptr.load(std::memory_order_relaxed)(reinterpret_cast(buf), len, pj, realloc); } Architecture find_best_supported_implementation() { constexpr uint32_t haswell_flags = instruction_set::AVX2 | instruction_set::PCLMULQDQ | instruction_set::BMI1 | instruction_set::BMI2; constexpr uint32_t westmere_flags = instruction_set::SSE42 | instruction_set::PCLMULQDQ; uint32_t supports = detect_supported_architectures(); // Order from best to worst (within architecture) if ((haswell_flags & supports) == haswell_flags) return Architecture::HASWELL; if ((westmere_flags & supports) == westmere_flags) return Architecture::WESTMERE; if (instruction_set::NEON) return Architecture::ARM64; return Architecture::NONE; } // Responsible to select the best json_parse implementation int json_parse_dispatch(const uint8_t *buf, size_t len, ParsedJson &pj, bool realloc) { Architecture best_implementation = find_best_supported_implementation(); // Selecting the best implementation switch (best_implementation) { #ifdef IS_X86_64 case Architecture::HASWELL: json_parse_ptr.store(&json_parse_implementation, std::memory_order_relaxed); break; case Architecture::WESTMERE: json_parse_ptr.store(&json_parse_implementation, std::memory_order_relaxed); break; #endif #ifdef IS_ARM64 case Architecture::ARM64: json_parse_ptr.store(&json_parse_implementation, std::memory_order_relaxed); break; #endif default: std::cerr << "The processor is not supported by simdjson." << std::endl; return simdjson::UNEXPECTED_ERROR; } return json_parse_ptr.load(std::memory_order_relaxed)(buf, len, pj, realloc); } std::atomic json_parse_ptr = &json_parse_dispatch; WARN_UNUSED ParsedJson build_parsed_json(const uint8_t *buf, size_t len, bool realloc) { ParsedJson pj; bool ok = pj.allocate_capacity(len); if (ok) { json_parse(buf, len, pj, realloc); } else { std::cerr << "failure during memory allocation " << std::endl; } return pj; } } // namespace simdjson /* end file src/jsonparser.cpp */ /* begin file src/simd_input.h */ #ifndef SIMDJSON_SIMD_INPUT_H #define SIMDJSON_SIMD_INPUT_H #include namespace simdjson { template struct simd_input { simd_input(const uint8_t *ptr); // a straightforward comparison of a mask against input. uint64_t eq(uint8_t m); // find all values less than or equal than the content of maxval (using unsigned arithmetic) uint64_t lteq(uint8_t m); }; // struct simd_input } // namespace simdjson #endif /* end file src/simd_input.h */ /* begin file src/arm64/architecture.h */ #ifndef SIMDJSON_ARM64_ARCHITECTURE_H #define SIMDJSON_ARM64_ARCHITECTURE_H #ifdef IS_ARM64 namespace simdjson::arm64 { static const Architecture ARCHITECTURE = Architecture::ARM64; } // namespace simdjson::arm64 #endif // IS_ARM64 #endif // SIMDJSON_ARM64_ARCHITECTURE_H /* end file src/arm64/architecture.h */ /* begin file src/haswell/architecture.h */ #ifndef SIMDJSON_HASWELL_ARCHITECTURE_H #define SIMDJSON_HASWELL_ARCHITECTURE_H #ifdef IS_X86_64 namespace simdjson::haswell { static const Architecture ARCHITECTURE = Architecture::HASWELL; } // namespace simdjson::haswell #endif // IS_X86_64 #endif // SIMDJSON_HASWELL_ARCHITECTURE_H /* end file src/haswell/architecture.h */ /* begin file src/westmere/architecture.h */ #ifndef SIMDJSON_WESTMERE_ARCHITECTURE_H #define SIMDJSON_WESTMERE_ARCHITECTURE_H #ifdef IS_X86_64 namespace simdjson::westmere { static const Architecture ARCHITECTURE = Architecture::WESTMERE; } // namespace simdjson::westmere #endif // IS_X86_64 #endif // SIMDJSON_WESTMERE_ARCHITECTURE_H /* end file src/westmere/architecture.h */ /* begin file src/arm64/simd_input.h */ #ifndef SIMDJSON_ARM64_SIMD_INPUT_H #define SIMDJSON_ARM64_SIMD_INPUT_H #ifdef IS_ARM64 namespace simdjson { really_inline uint16_t neon_movemask(uint8x16_t input) { const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; uint8x16_t minput = vandq_u8(input, bit_mask); uint8x16_t tmp = vpaddq_u8(minput, minput); tmp = vpaddq_u8(tmp, tmp); tmp = vpaddq_u8(tmp, tmp); return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0); } really_inline uint64_t neon_movemask_bulk(uint8x16_t p0, uint8x16_t p1, uint8x16_t p2, uint8x16_t p3) { const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; uint8x16_t t0 = vandq_u8(p0, bit_mask); uint8x16_t t1 = vandq_u8(p1, bit_mask); uint8x16_t t2 = vandq_u8(p2, bit_mask); uint8x16_t t3 = vandq_u8(p3, bit_mask); uint8x16_t sum0 = vpaddq_u8(t0, t1); uint8x16_t sum1 = vpaddq_u8(t2, t3); sum0 = vpaddq_u8(sum0, sum1); sum0 = vpaddq_u8(sum0, sum0); return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0); } template <> struct simd_input { uint8x16_t i0; uint8x16_t i1; uint8x16_t i2; uint8x16_t i3; really_inline simd_input(const uint8_t *ptr) { this->i0 = vld1q_u8(ptr + 0); this->i1 = vld1q_u8(ptr + 16); this->i2 = vld1q_u8(ptr + 32); this->i3 = vld1q_u8(ptr + 48); } really_inline uint64_t eq(uint8_t m) { const uint8x16_t mask = vmovq_n_u8(m); uint8x16_t cmp_res_0 = vceqq_u8(this->i0, mask); uint8x16_t cmp_res_1 = vceqq_u8(this->i1, mask); uint8x16_t cmp_res_2 = vceqq_u8(this->i2, mask); uint8x16_t cmp_res_3 = vceqq_u8(this->i3, mask); return neon_movemask_bulk(cmp_res_0, cmp_res_1, cmp_res_2, cmp_res_3); } really_inline uint64_t lteq(uint8_t m) { const uint8x16_t mask = vmovq_n_u8(m); uint8x16_t cmp_res_0 = vcleq_u8(this->i0, mask); uint8x16_t cmp_res_1 = vcleq_u8(this->i1, mask); uint8x16_t cmp_res_2 = vcleq_u8(this->i2, mask); uint8x16_t cmp_res_3 = vcleq_u8(this->i3, mask); return neon_movemask_bulk(cmp_res_0, cmp_res_1, cmp_res_2, cmp_res_3); } }; // struct simd_input } // namespace simdjson #endif // IS_ARM64 #endif // SIMDJSON_ARM64_SIMD_INPUT_H /* end file src/arm64/simd_input.h */ /* begin file src/haswell/simd_input.h */ #ifndef SIMDJSON_HASWELL_SIMD_INPUT_H #define SIMDJSON_HASWELL_SIMD_INPUT_H #ifdef IS_X86_64 TARGET_HASWELL namespace simdjson { template <> struct simd_input { __m256i lo; __m256i hi; really_inline simd_input(const uint8_t *ptr) { this->lo = _mm256_loadu_si256(reinterpret_cast(ptr + 0)); this->hi = _mm256_loadu_si256(reinterpret_cast(ptr + 32)); } template really_inline uint64_t build_bitmask(F const& chunk_to_mask) { uint64_t r0 = static_cast(_mm256_movemask_epi8(chunk_to_mask(this->lo))); uint64_t r1 = _mm256_movemask_epi8(chunk_to_mask(this->hi)); return r0 | (r1 << 32); } really_inline uint64_t eq(uint8_t m) { const __m256i mask = _mm256_set1_epi8(m); return this->build_bitmask([&] (auto chunk) { return _mm256_cmpeq_epi8(chunk, mask); }); } really_inline uint64_t lteq(uint8_t m) { const __m256i maxval = _mm256_set1_epi8(m); return this->build_bitmask([&] (auto chunk) { return _mm256_cmpeq_epi8(_mm256_max_epu8(maxval, chunk), maxval); }); } }; // struct simd_input } // namespace simdjson UNTARGET_REGION #endif // IS_X86_64 #endif // SIMDJSON_HASWELL_SIMD_INPUT_H /* end file src/haswell/simd_input.h */ /* begin file src/westmere/simd_input.h */ #ifndef SIMDJSON_WESTMERE_SIMD_INPUT_H #define SIMDJSON_WESTMERE_SIMD_INPUT_H #ifdef IS_X86_64 TARGET_WESTMERE namespace simdjson { template <> struct simd_input { __m128i v0; __m128i v1; __m128i v2; __m128i v3; really_inline simd_input(const uint8_t *ptr) { this->v0 = _mm_loadu_si128(reinterpret_cast(ptr + 0)); this->v1 = _mm_loadu_si128(reinterpret_cast(ptr + 16)); this->v2 = _mm_loadu_si128(reinterpret_cast(ptr + 32)); this->v3 = _mm_loadu_si128(reinterpret_cast(ptr + 48)); } template really_inline uint64_t build_bitmask(F const& chunk_to_mask) { uint64_t r0 = static_cast(_mm_movemask_epi8(chunk_to_mask(this->v0))); uint64_t r1 = _mm_movemask_epi8(chunk_to_mask(this->v1)); uint64_t r2 = _mm_movemask_epi8(chunk_to_mask(this->v2)); uint64_t r3 = _mm_movemask_epi8(chunk_to_mask(this->v3)); return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); } really_inline uint64_t eq(uint8_t m) { const __m128i mask = _mm_set1_epi8(m); return this->build_bitmask([&](auto chunk) { return _mm_cmpeq_epi8(chunk, mask); }); } really_inline uint64_t lteq(uint8_t m) { const __m128i maxval = _mm_set1_epi8(m); return this->build_bitmask([&](auto chunk) { return _mm_cmpeq_epi8(_mm_max_epu8(maxval, chunk), maxval); }); } }; // struct simd_input } // namespace simdjson UNTARGET_REGION #endif // IS_X86_64 #endif // SIMDJSON_WESTMERE_SIMD_INPUT_H /* end file src/westmere/simd_input.h */ /* begin file src/simdutf8check.h */ #ifndef SIMDJSON_SIMDUTF8CHECK_H #define SIMDJSON_SIMDUTF8CHECK_H namespace simdjson { // Checks UTF8, chunk by chunk. template struct utf8_checker { // Process the next chunk of input. void check_next_input(simd_input in); // Find out what (if any) errors have occurred ErrorValues errors(); }; } // namespace simdjson #endif // SIMDJSON_SIMDUTF8CHECK_H /* end file src/simdutf8check.h */ /* begin file src/arm64/simdutf8check.h */ // From https://github.com/cyb70289/utf8/blob/master/lemire-neon.c // Adapted from https://github.com/lemire/fastvalidate-utf-8 #ifndef SIMDJSON_ARM64_SIMDUTF8CHECK_H #define SIMDJSON_ARM64_SIMDUTF8CHECK_H #if defined(_ARM_NEON) || defined(__aarch64__) || \ (defined(_MSC_VER) && defined(_M_ARM64)) #include #include #include #include #include #include /* * legal utf-8 byte sequence * http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf - page 94 * * Code Points 1st 2s 3s 4s * U+0000..U+007F 00..7F * U+0080..U+07FF C2..DF 80..BF * U+0800..U+0FFF E0 A0..BF 80..BF * U+1000..U+CFFF E1..EC 80..BF 80..BF * U+D000..U+D7FF ED 80..9F 80..BF * U+E000..U+FFFF EE..EF 80..BF 80..BF * U+10000..U+3FFFF F0 90..BF 80..BF 80..BF * U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF * U+100000..U+10FFFF F4 80..8F 80..BF 80..BF * */ namespace simdjson::arm64 { // all byte values must be no larger than 0xF4 static inline void check_smaller_than_0xF4(int8x16_t current_bytes, int8x16_t *has_error) { // unsigned, saturates to 0 below max *has_error = vorrq_s8( *has_error, vreinterpretq_s8_u8(vqsubq_u8( vreinterpretq_u8_s8(current_bytes), vdupq_n_u8(0xF4)))); } static const int8_t _nibbles[] = { 1, 1, 1, 1, 1, 1, 1, 1, // 0xxx (ASCII) 0, 0, 0, 0, // 10xx (continuation) 2, 2, // 110x 3, // 1110 4, // 1111, next should be 0 (not checked here) }; static inline int8x16_t continuation_lengths(int8x16_t high_nibbles) { return vqtbl1q_s8(vld1q_s8(_nibbles), vreinterpretq_u8_s8(high_nibbles)); } static inline int8x16_t carry_continuations(int8x16_t initial_lengths, int8x16_t previous_carries) { int8x16_t right1 = vreinterpretq_s8_u8(vqsubq_u8( vreinterpretq_u8_s8(vextq_s8(previous_carries, initial_lengths, 16 - 1)), vdupq_n_u8(1))); int8x16_t sum = vaddq_s8(initial_lengths, right1); int8x16_t right2 = vreinterpretq_s8_u8( vqsubq_u8(vreinterpretq_u8_s8(vextq_s8(previous_carries, sum, 16 - 2)), vdupq_n_u8(2))); return vaddq_s8(sum, right2); } static inline void check_continuations(int8x16_t initial_lengths, int8x16_t carries, int8x16_t *has_error) { // overlap || underlap // carry > length && length > 0 || !(carry > length) && !(length > 0) // (carries > length) == (lengths > 0) uint8x16_t overunder = vceqq_u8(vcgtq_s8(carries, initial_lengths), vcgtq_s8(initial_lengths, vdupq_n_s8(0))); *has_error = vorrq_s8(*has_error, vreinterpretq_s8_u8(overunder)); } // when 0xED is found, next byte must be no larger than 0x9F // when 0xF4 is found, next byte must be no larger than 0x8F // next byte must be continuation, ie sign bit is set, so signed < is ok static inline void check_first_continuation_max(int8x16_t current_bytes, int8x16_t off1_current_bytes, int8x16_t *has_error) { uint8x16_t maskED = vceqq_s8(off1_current_bytes, vdupq_n_s8(0xED)); uint8x16_t maskF4 = vceqq_s8(off1_current_bytes, vdupq_n_s8(0xF4)); uint8x16_t badfollowED = vandq_u8(vcgtq_s8(current_bytes, vdupq_n_s8(0x9F)), maskED); uint8x16_t badfollowF4 = vandq_u8(vcgtq_s8(current_bytes, vdupq_n_s8(0x8F)), maskF4); *has_error = vorrq_s8( *has_error, vreinterpretq_s8_u8(vorrq_u8(badfollowED, badfollowF4))); } static const int8_t _initial_mins[] = { -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, // 10xx => false (int8_t)0xC2, -128, // 110x (int8_t)0xE1, // 1110 (int8_t)0xF1, }; static const int8_t _second_mins[] = { -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, // 10xx => false 127, 127, // 110x => true (int8_t)0xA0, // 1110 (int8_t)0x90, }; // map off1_hibits => error condition // hibits off1 cur // C => < C2 && true // E => < E1 && < A0 // F => < F1 && < 90 // else false && false static inline void check_overlong(int8x16_t current_bytes, int8x16_t off1_current_bytes, int8x16_t hibits, int8x16_t previous_hibits, int8x16_t *has_error) { int8x16_t off1_hibits = vextq_s8(previous_hibits, hibits, 16 - 1); int8x16_t initial_mins = vqtbl1q_s8(vld1q_s8(_initial_mins), vreinterpretq_u8_s8(off1_hibits)); uint8x16_t initial_under = vcgtq_s8(initial_mins, off1_current_bytes); int8x16_t second_mins = vqtbl1q_s8(vld1q_s8(_second_mins), vreinterpretq_u8_s8(off1_hibits)); uint8x16_t second_under = vcgtq_s8(second_mins, current_bytes); *has_error = vorrq_s8( *has_error, vreinterpretq_s8_u8(vandq_u8(initial_under, second_under))); } struct processed_utf_bytes { int8x16_t raw_bytes; int8x16_t high_nibbles; int8x16_t carried_continuations; }; static inline void count_nibbles(int8x16_t bytes, struct processed_utf_bytes *answer) { answer->raw_bytes = bytes; answer->high_nibbles = vreinterpretq_s8_u8(vshrq_n_u8(vreinterpretq_u8_s8(bytes), 4)); } // check whether the current bytes are valid UTF-8 // at the end of the function, previous gets updated static inline struct processed_utf_bytes check_utf8_bytes(int8x16_t current_bytes, struct processed_utf_bytes *previous, int8x16_t *has_error) { struct processed_utf_bytes pb; count_nibbles(current_bytes, &pb); check_smaller_than_0xF4(current_bytes, has_error); int8x16_t initial_lengths = continuation_lengths(pb.high_nibbles); pb.carried_continuations = carry_continuations(initial_lengths, previous->carried_continuations); check_continuations(initial_lengths, pb.carried_continuations, has_error); int8x16_t off1_current_bytes = vextq_s8(previous->raw_bytes, pb.raw_bytes, 16 - 1); check_first_continuation_max(current_bytes, off1_current_bytes, has_error); check_overlong(current_bytes, off1_current_bytes, pb.high_nibbles, previous->high_nibbles, has_error); return pb; } // Checks that all bytes are ascii really_inline bool check_ascii_neon(simd_input in) { // checking if the most significant bit is always equal to 0. uint8x16_t high_bit = vdupq_n_u8(0x80); uint8x16_t t0 = vorrq_u8(in.i0, in.i1); uint8x16_t t1 = vorrq_u8(in.i2, in.i3); uint8x16_t t3 = vorrq_u8(t0, t1); uint8x16_t t4 = vandq_u8(t3, high_bit); uint64x2_t v64 = vreinterpretq_u64_u8(t4); uint32x2_t v32 = vqmovn_u64(v64); uint64x1_t result = vreinterpret_u64_u32(v32); return vget_lane_u64(result, 0) == 0; } } // namespace simdjson::arm64 namespace simdjson { using namespace simdjson::arm64; template <> struct utf8_checker { int8x16_t has_error{}; processed_utf_bytes previous{}; really_inline void check_next_input(simd_input in) { if (check_ascii_neon(in)) { // All bytes are ascii. Therefore the byte that was just before must be // ascii too. We only check the byte that was just before simd_input. Nines // are arbitrary values. const int8x16_t verror = (int8x16_t){9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1}; this->has_error = vorrq_s8(vreinterpretq_s8_u8( vcgtq_s8(this->previous.carried_continuations, verror)), this->has_error); } else { // it is not ascii so we have to do heavy work this->previous = check_utf8_bytes(vreinterpretq_s8_u8(in.i0), &(this->previous), &(this->has_error)); this->previous = check_utf8_bytes(vreinterpretq_s8_u8(in.i1), &(this->previous), &(this->has_error)); this->previous = check_utf8_bytes(vreinterpretq_s8_u8(in.i2), &(this->previous), &(this->has_error)); this->previous = check_utf8_bytes(vreinterpretq_s8_u8(in.i3), &(this->previous), &(this->has_error)); } } really_inline ErrorValues errors() { uint64x2_t v64 = vreinterpretq_u64_s8(this->has_error); uint32x2_t v32 = vqmovn_u64(v64); uint64x1_t result = vreinterpret_u64_u32(v32); return vget_lane_u64(result, 0) != 0 ? simdjson::UTF8_ERROR : simdjson::SUCCESS; } }; // struct utf8_checker } // namespace simdjson #endif #endif /* end file src/arm64/simdutf8check.h */ /* begin file src/haswell/simdutf8check.h */ #ifndef SIMDJSON_HASWELL_SIMDUTF8CHECK_H #define SIMDJSON_HASWELL_SIMDUTF8CHECK_H #include #include #include #ifdef IS_X86_64 /* * legal utf-8 byte sequence * http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf - page 94 * * Code Points 1st 2s 3s 4s * U+0000..U+007F 00..7F * U+0080..U+07FF C2..DF 80..BF * U+0800..U+0FFF E0 A0..BF 80..BF * U+1000..U+CFFF E1..EC 80..BF 80..BF * U+D000..U+D7FF ED 80..9F 80..BF * U+E000..U+FFFF EE..EF 80..BF 80..BF * U+10000..U+3FFFF F0 90..BF 80..BF 80..BF * U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF * U+100000..U+10FFFF F4 80..8F 80..BF 80..BF * */ // all byte values must be no larger than 0xF4 TARGET_HASWELL namespace simdjson::haswell { static inline __m256i push_last_byte_of_a_to_b(__m256i a, __m256i b) { return _mm256_alignr_epi8(b, _mm256_permute2x128_si256(a, b, 0x21), 15); } static inline __m256i push_last_2bytes_of_a_to_b(__m256i a, __m256i b) { return _mm256_alignr_epi8(b, _mm256_permute2x128_si256(a, b, 0x21), 14); } // all byte values must be no larger than 0xF4 static inline void avx_check_smaller_than_0xF4(__m256i current_bytes, __m256i *has_error) { // unsigned, saturates to 0 below max *has_error = _mm256_or_si256( *has_error, _mm256_subs_epu8(current_bytes, _mm256_set1_epi8(0xF4u))); } static inline __m256i avx_continuation_lengths(__m256i high_nibbles) { return _mm256_shuffle_epi8( _mm256_setr_epi8(1, 1, 1, 1, 1, 1, 1, 1, // 0xxx (ASCII) 0, 0, 0, 0, // 10xx (continuation) 2, 2, // 110x 3, // 1110 4, // 1111, next should be 0 (not checked here) 1, 1, 1, 1, 1, 1, 1, 1, // 0xxx (ASCII) 0, 0, 0, 0, // 10xx (continuation) 2, 2, // 110x 3, // 1110 4 // 1111, next should be 0 (not checked here) ), high_nibbles); } static inline __m256i avx_carry_continuations(__m256i initial_lengths, __m256i previous_carries) { __m256i right1 = _mm256_subs_epu8( push_last_byte_of_a_to_b(previous_carries, initial_lengths), _mm256_set1_epi8(1)); __m256i sum = _mm256_add_epi8(initial_lengths, right1); __m256i right2 = _mm256_subs_epu8( push_last_2bytes_of_a_to_b(previous_carries, sum), _mm256_set1_epi8(2)); return _mm256_add_epi8(sum, right2); } static inline void avx_check_continuations(__m256i initial_lengths, __m256i carries, __m256i *has_error) { // overlap || underlap // carry > length && length > 0 || !(carry > length) && !(length > 0) // (carries > length) == (lengths > 0) __m256i overunder = _mm256_cmpeq_epi8( _mm256_cmpgt_epi8(carries, initial_lengths), _mm256_cmpgt_epi8(initial_lengths, _mm256_setzero_si256())); *has_error = _mm256_or_si256(*has_error, overunder); } // when 0xED is found, next byte must be no larger than 0x9F // when 0xF4 is found, next byte must be no larger than 0x8F // next byte must be continuation, ie sign bit is set, so signed < is ok static inline void avx_check_first_continuation_max(__m256i current_bytes, __m256i off1_current_bytes, __m256i *has_error) { __m256i maskED = _mm256_cmpeq_epi8(off1_current_bytes, _mm256_set1_epi8(0xEDu)); __m256i maskF4 = _mm256_cmpeq_epi8(off1_current_bytes, _mm256_set1_epi8(0xF4u)); __m256i badfollowED = _mm256_and_si256( _mm256_cmpgt_epi8(current_bytes, _mm256_set1_epi8(0x9Fu)), maskED); __m256i badfollowF4 = _mm256_and_si256( _mm256_cmpgt_epi8(current_bytes, _mm256_set1_epi8(0x8Fu)), maskF4); *has_error = _mm256_or_si256(*has_error, _mm256_or_si256(badfollowED, badfollowF4)); } // map off1_hibits => error condition // hibits off1 cur // C => < C2 && true // E => < E1 && < A0 // F => < F1 && < 90 // else false && false static inline void avx_check_overlong(__m256i current_bytes, __m256i off1_current_bytes, __m256i hibits, __m256i previous_hibits, __m256i *has_error) { __m256i off1_hibits = push_last_byte_of_a_to_b(previous_hibits, hibits); __m256i initial_mins = _mm256_shuffle_epi8( _mm256_setr_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, // 10xx => false 0xC2u, -128, // 110x 0xE1u, // 1110 0xF1u, // 1111 -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, // 10xx => false 0xC2u, -128, // 110x 0xE1u, // 1110 0xF1u), // 1111 off1_hibits); __m256i initial_under = _mm256_cmpgt_epi8(initial_mins, off1_current_bytes); __m256i second_mins = _mm256_shuffle_epi8( _mm256_setr_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, // 10xx => false 127, 127, // 110x => true 0xA0u, // 1110 0x90u, // 1111 -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, // 10xx => false 127, 127, // 110x => true 0xA0u, // 1110 0x90u), // 1111 off1_hibits); __m256i second_under = _mm256_cmpgt_epi8(second_mins, current_bytes); *has_error = _mm256_or_si256(*has_error, _mm256_and_si256(initial_under, second_under)); } struct avx_processed_utf_bytes { __m256i raw_bytes; __m256i high_nibbles; __m256i carried_continuations; }; static inline void avx_count_nibbles(__m256i bytes, struct avx_processed_utf_bytes *answer) { answer->raw_bytes = bytes; answer->high_nibbles = _mm256_and_si256(_mm256_srli_epi16(bytes, 4), _mm256_set1_epi8(0x0F)); } // check whether the current bytes are valid UTF-8 // at the end of the function, previous gets updated static inline struct avx_processed_utf_bytes avx_check_utf8_bytes(__m256i current_bytes, struct avx_processed_utf_bytes *previous, __m256i *has_error) { struct avx_processed_utf_bytes pb {}; avx_count_nibbles(current_bytes, &pb); avx_check_smaller_than_0xF4(current_bytes, has_error); __m256i initial_lengths = avx_continuation_lengths(pb.high_nibbles); pb.carried_continuations = avx_carry_continuations(initial_lengths, previous->carried_continuations); avx_check_continuations(initial_lengths, pb.carried_continuations, has_error); __m256i off1_current_bytes = push_last_byte_of_a_to_b(previous->raw_bytes, pb.raw_bytes); avx_check_first_continuation_max(current_bytes, off1_current_bytes, has_error); avx_check_overlong(current_bytes, off1_current_bytes, pb.high_nibbles, previous->high_nibbles, has_error); return pb; } }; // namespace simdjson::haswell UNTARGET_REGION // haswell TARGET_HASWELL namespace simdjson { using namespace simdjson::haswell; template <> struct utf8_checker { __m256i has_error; avx_processed_utf_bytes previous; utf8_checker() { has_error = _mm256_setzero_si256(); previous.raw_bytes = _mm256_setzero_si256(); previous.high_nibbles = _mm256_setzero_si256(); previous.carried_continuations = _mm256_setzero_si256(); } really_inline void check_next_input(simd_input in) { __m256i high_bit = _mm256_set1_epi8(0x80u); if ((_mm256_testz_si256(_mm256_or_si256(in.lo, in.hi), high_bit)) == 1) { // it is ascii, we just check continuation this->has_error = _mm256_or_si256( _mm256_cmpgt_epi8(this->previous.carried_continuations, _mm256_setr_epi8(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1)), this->has_error); } else { // it is not ascii so we have to do heavy work this->previous = avx_check_utf8_bytes(in.lo, &(this->previous), &(this->has_error)); this->previous = avx_check_utf8_bytes(in.hi, &(this->previous), &(this->has_error)); } } really_inline ErrorValues errors() { return _mm256_testz_si256(this->has_error, this->has_error) == 0 ? simdjson::UTF8_ERROR : simdjson::SUCCESS; } }; // struct utf8_checker }; // namespace simdjson UNTARGET_REGION // haswell #endif // IS_X86_64 #endif /* end file src/haswell/simdutf8check.h */ /* begin file src/westmere/simdutf8check.h */ #ifndef SIMDJSON_WESTMERE_SIMDUTF8CHECK_H #define SIMDJSON_WESTMERE_SIMDUTF8CHECK_H #include #include #include #ifdef IS_X86_64 /* * legal utf-8 byte sequence * http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf - page 94 * * Code Points 1st 2s 3s 4s * U+0000..U+007F 00..7F * U+0080..U+07FF C2..DF 80..BF * U+0800..U+0FFF E0 A0..BF 80..BF * U+1000..U+CFFF E1..EC 80..BF 80..BF * U+D000..U+D7FF ED 80..9F 80..BF * U+E000..U+FFFF EE..EF 80..BF 80..BF * U+10000..U+3FFFF F0 90..BF 80..BF 80..BF * U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF * U+100000..U+10FFFF F4 80..8F 80..BF 80..BF * */ // all byte values must be no larger than 0xF4 /********** sse code **********/ TARGET_WESTMERE namespace simdjson::westmere { // all byte values must be no larger than 0xF4 static inline void check_smaller_than_0xF4(__m128i current_bytes, __m128i *has_error) { // unsigned, saturates to 0 below max *has_error = _mm_or_si128(*has_error, _mm_subs_epu8(current_bytes, _mm_set1_epi8(0xF4u))); } static inline __m128i continuation_lengths(__m128i high_nibbles) { return _mm_shuffle_epi8( _mm_setr_epi8(1, 1, 1, 1, 1, 1, 1, 1, // 0xxx (ASCII) 0, 0, 0, 0, // 10xx (continuation) 2, 2, // 110x 3, // 1110 4), // 1111, next should be 0 (not checked here) high_nibbles); } static inline __m128i carry_continuations(__m128i initial_lengths, __m128i previous_carries) { __m128i right1 = _mm_subs_epu8(_mm_alignr_epi8(initial_lengths, previous_carries, 16 - 1), _mm_set1_epi8(1)); __m128i sum = _mm_add_epi8(initial_lengths, right1); __m128i right2 = _mm_subs_epu8(_mm_alignr_epi8(sum, previous_carries, 16 - 2), _mm_set1_epi8(2)); return _mm_add_epi8(sum, right2); } static inline void check_continuations(__m128i initial_lengths, __m128i carries, __m128i *has_error) { // overlap || underlap // carry > length && length > 0 || !(carry > length) && !(length > 0) // (carries > length) == (lengths > 0) __m128i overunder = _mm_cmpeq_epi8(_mm_cmpgt_epi8(carries, initial_lengths), _mm_cmpgt_epi8(initial_lengths, _mm_setzero_si128())); *has_error = _mm_or_si128(*has_error, overunder); } // when 0xED is found, next byte must be no larger than 0x9F // when 0xF4 is found, next byte must be no larger than 0x8F // next byte must be continuation, ie sign bit is set, so signed < is ok static inline void check_first_continuation_max(__m128i current_bytes, __m128i off1_current_bytes, __m128i *has_error) { __m128i maskED = _mm_cmpeq_epi8(off1_current_bytes, _mm_set1_epi8(0xEDu)); __m128i maskF4 = _mm_cmpeq_epi8(off1_current_bytes, _mm_set1_epi8(0xF4u)); __m128i badfollowED = _mm_and_si128( _mm_cmpgt_epi8(current_bytes, _mm_set1_epi8(0x9Fu)), maskED); __m128i badfollowF4 = _mm_and_si128( _mm_cmpgt_epi8(current_bytes, _mm_set1_epi8(0x8Fu)), maskF4); *has_error = _mm_or_si128(*has_error, _mm_or_si128(badfollowED, badfollowF4)); } // map off1_hibits => error condition // hibits off1 cur // C => < C2 && true // E => < E1 && < A0 // F => < F1 && < 90 // else false && false static inline void check_overlong(__m128i current_bytes, __m128i off1_current_bytes, __m128i hibits, __m128i previous_hibits, __m128i *has_error) { __m128i off1_hibits = _mm_alignr_epi8(hibits, previous_hibits, 16 - 1); __m128i initial_mins = _mm_shuffle_epi8( _mm_setr_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, // 10xx => false 0xC2u, -128, // 110x 0xE1u, // 1110 0xF1u), off1_hibits); __m128i initial_under = _mm_cmpgt_epi8(initial_mins, off1_current_bytes); __m128i second_mins = _mm_shuffle_epi8( _mm_setr_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, // 10xx => false 127, 127, // 110x => true 0xA0u, // 1110 0x90u), off1_hibits); __m128i second_under = _mm_cmpgt_epi8(second_mins, current_bytes); *has_error = _mm_or_si128(*has_error, _mm_and_si128(initial_under, second_under)); } struct processed_utf_bytes { __m128i raw_bytes; __m128i high_nibbles; __m128i carried_continuations; }; static inline void count_nibbles(__m128i bytes, struct processed_utf_bytes *answer) { answer->raw_bytes = bytes; answer->high_nibbles = _mm_and_si128(_mm_srli_epi16(bytes, 4), _mm_set1_epi8(0x0F)); } // check whether the current bytes are valid UTF-8 // at the end of the function, previous gets updated static struct processed_utf_bytes check_utf8_bytes(__m128i current_bytes, struct processed_utf_bytes *previous, __m128i *has_error) { struct processed_utf_bytes pb; count_nibbles(current_bytes, &pb); check_smaller_than_0xF4(current_bytes, has_error); __m128i initial_lengths = continuation_lengths(pb.high_nibbles); pb.carried_continuations = carry_continuations(initial_lengths, previous->carried_continuations); check_continuations(initial_lengths, pb.carried_continuations, has_error); __m128i off1_current_bytes = _mm_alignr_epi8(pb.raw_bytes, previous->raw_bytes, 16 - 1); check_first_continuation_max(current_bytes, off1_current_bytes, has_error); check_overlong(current_bytes, off1_current_bytes, pb.high_nibbles, previous->high_nibbles, has_error); return pb; } } // namespace simdjson::westmere UNTARGET_REGION // westmere TARGET_WESTMERE namespace simdjson { using namespace simdjson::westmere; template <> struct utf8_checker { __m128i has_error = _mm_setzero_si128(); processed_utf_bytes previous{ _mm_setzero_si128(), // raw_bytes _mm_setzero_si128(), // high_nibbles _mm_setzero_si128() // carried_continuations }; really_inline void check_next_input(simd_input in) { __m128i high_bit = _mm_set1_epi8(0x80u); if ((_mm_testz_si128(_mm_or_si128(in.v0, in.v1), high_bit)) == 1) { // it is ascii, we just check continuation this->has_error = _mm_or_si128(_mm_cmpgt_epi8(this->previous.carried_continuations, _mm_setr_epi8(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1)), this->has_error); } else { // it is not ascii so we have to do heavy work this->previous = check_utf8_bytes(in.v0, &(this->previous), &(this->has_error)); this->previous = check_utf8_bytes(in.v1, &(this->previous), &(this->has_error)); } if ((_mm_testz_si128(_mm_or_si128(in.v2, in.v3), high_bit)) == 1) { // it is ascii, we just check continuation this->has_error = _mm_or_si128(_mm_cmpgt_epi8(this->previous.carried_continuations, _mm_setr_epi8(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1)), this->has_error); } else { // it is not ascii so we have to do heavy work this->previous = check_utf8_bytes(in.v2, &(this->previous), &(this->has_error)); this->previous = check_utf8_bytes(in.v3, &(this->previous), &(this->has_error)); } } really_inline ErrorValues errors() { return _mm_testz_si128(this->has_error, this->has_error) == 0 ? simdjson::UTF8_ERROR : simdjson::SUCCESS; } }; // struct utf8_checker } // namespace simdjson UNTARGET_REGION // westmere #endif // IS_X86_64 #endif /* end file src/westmere/simdutf8check.h */ /* begin file src/arm64/stage1_find_marks.h */ #ifndef SIMDJSON_ARM64_STAGE1_FIND_MARKS_H #define SIMDJSON_ARM64_STAGE1_FIND_MARKS_H #ifdef IS_ARM64 namespace simdjson::arm64 { really_inline uint64_t compute_quote_mask(uint64_t quote_bits) { #ifdef __ARM_FEATURE_CRYPTO // some ARM processors lack this extension return vmull_p64(-1ULL, quote_bits); #else return portable_compute_quote_mask(quote_bits); #endif } really_inline void find_whitespace_and_structurals( simd_input in, uint64_t &whitespace, uint64_t &structurals) { const uint8x16_t low_nibble_mask = (uint8x16_t){16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0}; const uint8x16_t high_nibble_mask = (uint8x16_t){8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0}; const uint8x16_t structural_shufti_mask = vmovq_n_u8(0x7); const uint8x16_t whitespace_shufti_mask = vmovq_n_u8(0x18); const uint8x16_t low_nib_and_mask = vmovq_n_u8(0xf); uint8x16_t nib_0_lo = vandq_u8(in.i0, low_nib_and_mask); uint8x16_t nib_0_hi = vshrq_n_u8(in.i0, 4); uint8x16_t shuf_0_lo = vqtbl1q_u8(low_nibble_mask, nib_0_lo); uint8x16_t shuf_0_hi = vqtbl1q_u8(high_nibble_mask, nib_0_hi); uint8x16_t v_0 = vandq_u8(shuf_0_lo, shuf_0_hi); uint8x16_t nib_1_lo = vandq_u8(in.i1, low_nib_and_mask); uint8x16_t nib_1_hi = vshrq_n_u8(in.i1, 4); uint8x16_t shuf_1_lo = vqtbl1q_u8(low_nibble_mask, nib_1_lo); uint8x16_t shuf_1_hi = vqtbl1q_u8(high_nibble_mask, nib_1_hi); uint8x16_t v_1 = vandq_u8(shuf_1_lo, shuf_1_hi); uint8x16_t nib_2_lo = vandq_u8(in.i2, low_nib_and_mask); uint8x16_t nib_2_hi = vshrq_n_u8(in.i2, 4); uint8x16_t shuf_2_lo = vqtbl1q_u8(low_nibble_mask, nib_2_lo); uint8x16_t shuf_2_hi = vqtbl1q_u8(high_nibble_mask, nib_2_hi); uint8x16_t v_2 = vandq_u8(shuf_2_lo, shuf_2_hi); uint8x16_t nib_3_lo = vandq_u8(in.i3, low_nib_and_mask); uint8x16_t nib_3_hi = vshrq_n_u8(in.i3, 4); uint8x16_t shuf_3_lo = vqtbl1q_u8(low_nibble_mask, nib_3_lo); uint8x16_t shuf_3_hi = vqtbl1q_u8(high_nibble_mask, nib_3_hi); uint8x16_t v_3 = vandq_u8(shuf_3_lo, shuf_3_hi); uint8x16_t tmp_0 = vtstq_u8(v_0, structural_shufti_mask); uint8x16_t tmp_1 = vtstq_u8(v_1, structural_shufti_mask); uint8x16_t tmp_2 = vtstq_u8(v_2, structural_shufti_mask); uint8x16_t tmp_3 = vtstq_u8(v_3, structural_shufti_mask); structurals = neon_movemask_bulk(tmp_0, tmp_1, tmp_2, tmp_3); uint8x16_t tmp_ws_0 = vtstq_u8(v_0, whitespace_shufti_mask); uint8x16_t tmp_ws_1 = vtstq_u8(v_1, whitespace_shufti_mask); uint8x16_t tmp_ws_2 = vtstq_u8(v_2, whitespace_shufti_mask); uint8x16_t tmp_ws_3 = vtstq_u8(v_3, whitespace_shufti_mask); whitespace = neon_movemask_bulk(tmp_ws_0, tmp_ws_1, tmp_ws_2, tmp_ws_3); } // This file contains a non-architecture-specific version of "flatten" used in stage1. // It is intended to be included multiple times and compiled multiple times // We assume the file in which it is include already includes // "simdjson/stage1_find_marks.h" (this simplifies amalgation) #ifdef SIMDJSON_NAIVE_FLATTEN // useful for benchmarking // This is just a naive implementation. It should be normally // disable, but can be used for research purposes to compare // again our optimized version. really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base, uint32_t idx, uint64_t bits) { uint32_t *out_ptr = base_ptr + base; idx -= 64; while (bits != 0) { out_ptr[0] = idx + trailing_zeroes(bits); bits = bits & (bits - 1); out_ptr++; } base = (out_ptr - base_ptr); } #else // SIMDJSON_NAIVE_FLATTEN // flatten out values in 'bits' assuming that they are are to have values of idx // plus their position in the bitvector, and store these indexes at // base_ptr[base] incrementing base as we go // will potentially store extra values beyond end of valid bits, so base_ptr // needs to be large enough to handle this really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base, uint32_t idx, uint64_t bits) { // In some instances, the next branch is expensive because it is mispredicted. // Unfortunately, in other cases, // it helps tremendously. if (bits == 0) return; uint32_t cnt = hamming(bits); uint32_t next_base = base + cnt; idx -= 64; base_ptr += base; { base_ptr[0] = idx + trailing_zeroes(bits); bits = bits & (bits - 1); base_ptr[1] = idx + trailing_zeroes(bits); bits = bits & (bits - 1); base_ptr[2] = idx + trailing_zeroes(bits); bits = bits & (bits - 1); base_ptr[3] = idx + trailing_zeroes(bits); bits = bits & (bits - 1); base_ptr[4] = idx + trailing_zeroes(bits); bits = bits & (bits - 1); base_ptr[5] = idx + trailing_zeroes(bits); bits = bits & (bits - 1); base_ptr[6] = idx + trailing_zeroes(bits); bits = bits & (bits - 1); base_ptr[7] = idx + trailing_zeroes(bits); bits = bits & (bits - 1); base_ptr += 8; } // We hope that the next branch is easily predicted. if (cnt > 8) { base_ptr[0] = idx + trailing_zeroes(bits); bits = bits & (bits - 1); base_ptr[1] = idx + trailing_zeroes(bits); bits = bits & (bits - 1); base_ptr[2] = idx + trailing_zeroes(bits); bits = bits & (bits - 1); base_ptr[3] = idx + trailing_zeroes(bits); bits = bits & (bits - 1); base_ptr[4] = idx + trailing_zeroes(bits); bits = bits & (bits - 1); base_ptr[5] = idx + trailing_zeroes(bits); bits = bits & (bits - 1); base_ptr[6] = idx + trailing_zeroes(bits); bits = bits & (bits - 1); base_ptr[7] = idx + trailing_zeroes(bits); bits = bits & (bits - 1); base_ptr += 8; } if (cnt > 16) { // unluckly: we rarely get here // since it means having one structural or pseudo-structral element // every 4 characters (possible with inputs like "","","",...). do { base_ptr[0] = idx + trailing_zeroes(bits); bits = bits & (bits - 1); base_ptr++; } while (bits != 0); } base = next_base; } #endif // SIMDJSON_NAIVE_FLATTEN // This file contains the common code every implementation uses in stage1 // It is intended to be included multiple times and compiled multiple times // We assume the file in which it is included already includes // "simdjson/stage1_find_marks.h" (this simplifies amalgation) // return a bitvector indicating where we have characters that end an odd-length // sequence of backslashes (and thus change the behavior of the next character // to follow). A even-length sequence of backslashes, and, for that matter, the // largest even-length prefix of our odd-length sequence of backslashes, simply // modify the behavior of the backslashes themselves. // We also update the prev_iter_ends_odd_backslash reference parameter to // indicate whether we end an iteration on an odd-length sequence of // backslashes, which modifies our subsequent search for odd-length // sequences of backslashes in an obvious way. really_inline uint64_t find_odd_backslash_sequences( simd_input in, uint64_t &prev_iter_ends_odd_backslash) { const uint64_t even_bits = 0x5555555555555555ULL; const uint64_t odd_bits = ~even_bits; uint64_t bs_bits = in.eq('\\'); uint64_t start_edges = bs_bits & ~(bs_bits << 1); /* flip lowest if we have an odd-length run at the end of the prior * iteration */ uint64_t even_start_mask = even_bits ^ prev_iter_ends_odd_backslash; uint64_t even_starts = start_edges & even_start_mask; uint64_t odd_starts = start_edges & ~even_start_mask; uint64_t even_carries = bs_bits + even_starts; uint64_t odd_carries; /* must record the carry-out of our odd-carries out of bit 63; this * indicates whether the sense of any edge going to the next iteration * should be flipped */ bool iter_ends_odd_backslash = add_overflow(bs_bits, odd_starts, &odd_carries); odd_carries |= prev_iter_ends_odd_backslash; /* push in bit zero as a * potential end if we had an * odd-numbered run at the * end of the previous * iteration */ prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL; uint64_t even_carry_ends = even_carries & ~bs_bits; uint64_t odd_carry_ends = odd_carries & ~bs_bits; uint64_t even_start_odd_end = even_carry_ends & odd_bits; uint64_t odd_start_even_end = odd_carry_ends & even_bits; uint64_t odd_ends = even_start_odd_end | odd_start_even_end; return odd_ends; } // return both the quote mask (which is a half-open mask that covers the first // quote // in an unescaped quote pair and everything in the quote pair) and the quote // bits, which are the simple // unescaped quoted bits. We also update the prev_iter_inside_quote value to // tell the next iteration // whether we finished the final iteration inside a quote pair; if so, this // inverts our behavior of // whether we're inside quotes for the next iteration. // Note that we don't do any error checking to see if we have backslash // sequences outside quotes; these // backslash sequences (of any length) will be detected elsewhere. really_inline uint64_t find_quote_mask_and_bits( simd_input in, uint64_t odd_ends, uint64_t &prev_iter_inside_quote, uint64_t "e_bits, uint64_t &error_mask) { quote_bits = in.eq('"'); quote_bits = quote_bits & ~odd_ends; uint64_t quote_mask = compute_quote_mask(quote_bits); quote_mask ^= prev_iter_inside_quote; /* All Unicode characters may be placed within the * quotation marks, except for the characters that MUST be escaped: * quotation mark, reverse solidus, and the control characters (U+0000 * through U+001F). * https://tools.ietf.org/html/rfc8259 */ uint64_t unescaped = in.lteq(0x1F); error_mask |= quote_mask & unescaped; /* right shift of a signed value expected to be well-defined and standard * compliant as of C++20, * John Regher from Utah U. says this is fine code */ prev_iter_inside_quote = static_cast(static_cast(quote_mask) >> 63); return quote_mask; } really_inline uint64_t finalize_structurals( uint64_t structurals, uint64_t whitespace, uint64_t quote_mask, uint64_t quote_bits, uint64_t &prev_iter_ends_pseudo_pred) { // mask off anything inside quotes structurals &= ~quote_mask; // add the real quote bits back into our bit_mask as well, so we can // quickly traverse the strings we've spent all this trouble gathering structurals |= quote_bits; // Now, establish "pseudo-structural characters". These are non-whitespace // characters that are (a) outside quotes and (b) have a predecessor that's // either whitespace or a structural character. This means that subsequent // passes will get a chance to encounter the first character of every string // of non-whitespace and, if we're parsing an atom like true/false/null or a // number we can stop at the first whitespace or structural character // following it. // a qualified predecessor is something that can happen 1 position before an // pseudo-structural character uint64_t pseudo_pred = structurals | whitespace; uint64_t shifted_pseudo_pred = (pseudo_pred << 1) | prev_iter_ends_pseudo_pred; prev_iter_ends_pseudo_pred = pseudo_pred >> 63; uint64_t pseudo_structurals = shifted_pseudo_pred & (~whitespace) & (~quote_mask); structurals |= pseudo_structurals; // now, we've used our close quotes all we need to. So let's switch them off // they will be off in the quote mask and on in quote bits. structurals &= ~(quote_bits & ~quote_mask); return structurals; } // Find structural bits in a 64-byte chunk. really_inline void find_structural_bits_64( const uint8_t *buf, size_t idx, uint32_t *base_ptr, uint32_t &base, uint64_t &prev_iter_ends_odd_backslash, uint64_t &prev_iter_inside_quote, uint64_t &prev_iter_ends_pseudo_pred, uint64_t &structurals, uint64_t &error_mask, utf8_checker &utf8_state) { simd_input in(buf); utf8_state.check_next_input(in); /* detect odd sequences of backslashes */ uint64_t odd_ends = find_odd_backslash_sequences( in, prev_iter_ends_odd_backslash); /* detect insides of quote pairs ("quote_mask") and also our quote_bits * themselves */ uint64_t quote_bits; uint64_t quote_mask = find_quote_mask_and_bits( in, odd_ends, prev_iter_inside_quote, quote_bits, error_mask); /* take the previous iterations structural bits, not our current * iteration, * and flatten */ flatten_bits(base_ptr, base, idx, structurals); uint64_t whitespace; find_whitespace_and_structurals(in, whitespace, structurals); /* fixup structurals to reflect quotes and add pseudo-structural * characters */ structurals = finalize_structurals(structurals, whitespace, quote_mask, quote_bits, prev_iter_ends_pseudo_pred); } int find_structural_bits(const uint8_t *buf, size_t len, simdjson::ParsedJson &pj) { if (len > pj.byte_capacity) { std::cerr << "Your ParsedJson object only supports documents up to " << pj.byte_capacity << " bytes but you are trying to process " << len << " bytes" << std::endl; return simdjson::CAPACITY; } uint32_t *base_ptr = pj.structural_indexes; uint32_t base = 0; utf8_checker utf8_state; /* we have padded the input out to 64 byte multiple with the remainder * being zeros persistent state across loop does the last iteration end * with an odd-length sequence of backslashes? */ /* either 0 or 1, but a 64-bit value */ uint64_t prev_iter_ends_odd_backslash = 0ULL; /* does the previous iteration end inside a double-quote pair? */ uint64_t prev_iter_inside_quote = 0ULL; /* either all zeros or all ones * does the previous iteration end on something that is a * predecessor of a pseudo-structural character - i.e. * whitespace or a structural character effectively the very * first char is considered to follow "whitespace" for the * purposes of pseudo-structural character detection so we * initialize to 1 */ uint64_t prev_iter_ends_pseudo_pred = 1ULL; /* structurals are persistent state across loop as we flatten them on the * subsequent iteration into our array pointed to be base_ptr. * This is harmless on the first iteration as structurals==0 * and is done for performance reasons; we can hide some of the latency of * the * expensive carryless multiply in the previous step with this work */ uint64_t structurals = 0; size_t lenminus64 = len < 64 ? 0 : len - 64; size_t idx = 0; uint64_t error_mask = 0; /* for unescaped characters within strings (ASCII code points < 0x20) */ for (; idx < lenminus64; idx += 64) { find_structural_bits_64(&buf[idx], idx, base_ptr, base, prev_iter_ends_odd_backslash, prev_iter_inside_quote, prev_iter_ends_pseudo_pred, structurals, error_mask, utf8_state); } /* If we have a final chunk of less than 64 bytes, pad it to 64 with * spaces before processing it (otherwise, we risk invalidating the UTF-8 * checks). */ if (idx < len) { uint8_t tmp_buf[64]; memset(tmp_buf, 0x20, 64); memcpy(tmp_buf, buf + idx, len - idx); find_structural_bits_64(&tmp_buf[0], idx, base_ptr, base, prev_iter_ends_odd_backslash, prev_iter_inside_quote, prev_iter_ends_pseudo_pred, structurals, error_mask, utf8_state); idx += 64; } /* is last string quote closed? */ if (prev_iter_inside_quote) { return simdjson::UNCLOSED_STRING; } /* finally, flatten out the remaining structurals from the last iteration */ flatten_bits(base_ptr, base, idx, structurals); pj.n_structural_indexes = base; /* a valid JSON file cannot have zero structural indexes - we should have * found something */ if (pj.n_structural_indexes == 0u) { return simdjson::EMPTY; } if (base_ptr[pj.n_structural_indexes - 1] > len) { return simdjson::UNEXPECTED_ERROR; } if (len != base_ptr[pj.n_structural_indexes - 1]) { /* the string might not be NULL terminated, but we add a virtual NULL * ending character. */ base_ptr[pj.n_structural_indexes++] = len; } /* make it safe to dereference one beyond this array */ base_ptr[pj.n_structural_indexes] = 0; if (error_mask) { return simdjson::UNESCAPED_CHARS; } return utf8_state.errors(); } } // namespace simdjson::arm64 namespace simdjson { template <> int find_structural_bits(const uint8_t *buf, size_t len, simdjson::ParsedJson &pj) { return arm64::find_structural_bits(buf, len, pj); } } // namespace simdjson #endif // IS_ARM64 #endif // SIMDJSON_ARM64_STAGE1_FIND_MARKS_H /* end file src/arm64/stage1_find_marks.h */ /* begin file src/haswell/stage1_find_marks.h */ #ifndef SIMDJSON_HASWELL_STAGE1_FIND_MARKS_H #define SIMDJSON_HASWELL_STAGE1_FIND_MARKS_H #ifdef IS_X86_64 TARGET_HASWELL namespace simdjson::haswell { really_inline uint64_t compute_quote_mask(uint64_t quote_bits) { // There should be no such thing with a processing supporting avx2 // but not clmul. uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128( _mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFFu), 0)); return quote_mask; } really_inline void find_whitespace_and_structurals(simd_input in, uint64_t &whitespace, uint64_t &structurals) { #ifdef SIMDJSON_NAIVE_STRUCTURAL // You should never need this naive approach, but it can be useful // for research purposes const __m256i mask_open_brace = _mm256_set1_epi8(0x7b); const __m256i mask_close_brace = _mm256_set1_epi8(0x7d); const __m256i mask_open_bracket = _mm256_set1_epi8(0x5b); const __m256i mask_close_bracket = _mm256_set1_epi8(0x5d); const __m256i mask_column = _mm256_set1_epi8(0x3a); const __m256i mask_comma = _mm256_set1_epi8(0x2c); structurals = in->build_bitmask([&](auto in) { __m256i structurals = _mm256_cmpeq_epi8(in, mask_open_brace); structurals = _mm256_or_si256(structurals, _mm256_cmpeq_epi8(in, mask_close_brace)); structurals = _mm256_or_si256(structurals, _mm256_cmpeq_epi8(in, mask_open_bracket)); structurals = _mm256_or_si256(structurals, _mm256_cmpeq_epi8(in, mask_close_bracket)); structurals = _mm256_or_si256(structurals, _mm256_cmpeq_epi8(in, mask_column)); structurals = _mm256_or_si256(structurals, _mm256_cmpeq_epi8(in, mask_comma)); return structurals; }); const __m256i mask_space = _mm256_set1_epi8(0x20); const __m256i mask_linefeed = _mm256_set1_epi8(0x0a); const __m256i mask_tab = _mm256_set1_epi8(0x09); const __m256i mask_carriage = _mm256_set1_epi8(0x0d); whitespace = in->build_bitmask([&](auto in) { __m256i space = _mm256_cmpeq_epi8(in, mask_space); space = _mm256_or_si256(space, _mm256_cmpeq_epi8(in, mask_linefeed)); space = _mm256_or_si256(space, _mm256_cmpeq_epi8(in, mask_tab)); space = _mm256_or_si256(space, _mm256_cmpeq_epi8(in, mask_carriage)); }); // end of naive approach #else // SIMDJSON_NAIVE_STRUCTURAL // clang-format off const __m256i structural_table = _mm256_setr_epi8(44, 125, 0, 0, 0xc0u, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58, 123, 44, 125, 0, 0, 0xc0u, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58, 123); const __m256i white_table = _mm256_setr_epi8( 32, 100, 100, 100, 17, 100, 113, 2, 100, 9, 10, 112, 100, 13, 100, 100, 32, 100, 100, 100, 17, 100, 113, 2, 100, 9, 10, 112, 100, 13, 100, 100); // clang-format on const __m256i struct_offset = _mm256_set1_epi8(0xd4u); const __m256i struct_mask = _mm256_set1_epi8(32); whitespace = in.build_bitmask([&](auto chunk) { return _mm256_cmpeq_epi8(chunk, _mm256_shuffle_epi8(white_table, chunk)); }); structurals = in.build_bitmask([&](auto chunk) { __m256i struct_r1 = _mm256_add_epi8(struct_offset, chunk); __m256i struct_r2 = _mm256_or_si256(chunk, struct_mask); __m256i struct_r3 = _mm256_shuffle_epi8(structural_table, struct_r1); return _mm256_cmpeq_epi8(struct_r2, struct_r3); }); #endif // else SIMDJSON_NAIVE_STRUCTURAL } // flatten out values in 'bits' assuming that they are are to have values of idx // plus their position in the bitvector, and store these indexes at // base_ptr[base] incrementing base as we go // will potentially store extra values beyond end of valid bits, so base_ptr // needs to be large enough to handle this really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base, uint32_t idx, uint64_t bits) { // In some instances, the next branch is expensive because it is mispredicted. // Unfortunately, in other cases, // it helps tremendously. if (bits == 0) return; uint32_t cnt = _mm_popcnt_u64(bits); uint32_t next_base = base + cnt; idx -= 64; base_ptr += base; { base_ptr[0] = idx + trailing_zeroes(bits); bits = _blsr_u64(bits); base_ptr[1] = idx + trailing_zeroes(bits); bits = _blsr_u64(bits); base_ptr[2] = idx + trailing_zeroes(bits); bits = _blsr_u64(bits); base_ptr[3] = idx + trailing_zeroes(bits); bits = _blsr_u64(bits); base_ptr[4] = idx + trailing_zeroes(bits); bits = _blsr_u64(bits); base_ptr[5] = idx + trailing_zeroes(bits); bits = _blsr_u64(bits); base_ptr[6] = idx + trailing_zeroes(bits); bits = _blsr_u64(bits); base_ptr[7] = idx + trailing_zeroes(bits); bits = _blsr_u64(bits); base_ptr += 8; } // We hope that the next branch is easily predicted. if (cnt > 8) { base_ptr[0] = idx + trailing_zeroes(bits); bits = _blsr_u64(bits); base_ptr[1] = idx + trailing_zeroes(bits); bits = _blsr_u64(bits); base_ptr[2] = idx + trailing_zeroes(bits); bits = _blsr_u64(bits); base_ptr[3] = idx + trailing_zeroes(bits); bits = _blsr_u64(bits); base_ptr[4] = idx + trailing_zeroes(bits); bits = _blsr_u64(bits); base_ptr[5] = idx + trailing_zeroes(bits); bits = _blsr_u64(bits); base_ptr[6] = idx + trailing_zeroes(bits); bits = _blsr_u64(bits); base_ptr[7] = idx + trailing_zeroes(bits); bits = _blsr_u64(bits); base_ptr += 8; } if (cnt > 16) { // unluckly: we rarely get here // since it means having one structural or pseudo-structral element // every 4 characters (possible with inputs like "","","",...). do { base_ptr[0] = idx + trailing_zeroes(bits); bits = _blsr_u64(bits); base_ptr++; } while (bits != 0); } base = next_base; } // This file contains the common code every implementation uses in stage1 // It is intended to be included multiple times and compiled multiple times // We assume the file in which it is included already includes // "simdjson/stage1_find_marks.h" (this simplifies amalgation) // return a bitvector indicating where we have characters that end an odd-length // sequence of backslashes (and thus change the behavior of the next character // to follow). A even-length sequence of backslashes, and, for that matter, the // largest even-length prefix of our odd-length sequence of backslashes, simply // modify the behavior of the backslashes themselves. // We also update the prev_iter_ends_odd_backslash reference parameter to // indicate whether we end an iteration on an odd-length sequence of // backslashes, which modifies our subsequent search for odd-length // sequences of backslashes in an obvious way. really_inline uint64_t find_odd_backslash_sequences( simd_input in, uint64_t &prev_iter_ends_odd_backslash) { const uint64_t even_bits = 0x5555555555555555ULL; const uint64_t odd_bits = ~even_bits; uint64_t bs_bits = in.eq('\\'); uint64_t start_edges = bs_bits & ~(bs_bits << 1); /* flip lowest if we have an odd-length run at the end of the prior * iteration */ uint64_t even_start_mask = even_bits ^ prev_iter_ends_odd_backslash; uint64_t even_starts = start_edges & even_start_mask; uint64_t odd_starts = start_edges & ~even_start_mask; uint64_t even_carries = bs_bits + even_starts; uint64_t odd_carries; /* must record the carry-out of our odd-carries out of bit 63; this * indicates whether the sense of any edge going to the next iteration * should be flipped */ bool iter_ends_odd_backslash = add_overflow(bs_bits, odd_starts, &odd_carries); odd_carries |= prev_iter_ends_odd_backslash; /* push in bit zero as a * potential end if we had an * odd-numbered run at the * end of the previous * iteration */ prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL; uint64_t even_carry_ends = even_carries & ~bs_bits; uint64_t odd_carry_ends = odd_carries & ~bs_bits; uint64_t even_start_odd_end = even_carry_ends & odd_bits; uint64_t odd_start_even_end = odd_carry_ends & even_bits; uint64_t odd_ends = even_start_odd_end | odd_start_even_end; return odd_ends; } // return both the quote mask (which is a half-open mask that covers the first // quote // in an unescaped quote pair and everything in the quote pair) and the quote // bits, which are the simple // unescaped quoted bits. We also update the prev_iter_inside_quote value to // tell the next iteration // whether we finished the final iteration inside a quote pair; if so, this // inverts our behavior of // whether we're inside quotes for the next iteration. // Note that we don't do any error checking to see if we have backslash // sequences outside quotes; these // backslash sequences (of any length) will be detected elsewhere. really_inline uint64_t find_quote_mask_and_bits( simd_input in, uint64_t odd_ends, uint64_t &prev_iter_inside_quote, uint64_t "e_bits, uint64_t &error_mask) { quote_bits = in.eq('"'); quote_bits = quote_bits & ~odd_ends; uint64_t quote_mask = compute_quote_mask(quote_bits); quote_mask ^= prev_iter_inside_quote; /* All Unicode characters may be placed within the * quotation marks, except for the characters that MUST be escaped: * quotation mark, reverse solidus, and the control characters (U+0000 * through U+001F). * https://tools.ietf.org/html/rfc8259 */ uint64_t unescaped = in.lteq(0x1F); error_mask |= quote_mask & unescaped; /* right shift of a signed value expected to be well-defined and standard * compliant as of C++20, * John Regher from Utah U. says this is fine code */ prev_iter_inside_quote = static_cast(static_cast(quote_mask) >> 63); return quote_mask; } really_inline uint64_t finalize_structurals( uint64_t structurals, uint64_t whitespace, uint64_t quote_mask, uint64_t quote_bits, uint64_t &prev_iter_ends_pseudo_pred) { // mask off anything inside quotes structurals &= ~quote_mask; // add the real quote bits back into our bit_mask as well, so we can // quickly traverse the strings we've spent all this trouble gathering structurals |= quote_bits; // Now, establish "pseudo-structural characters". These are non-whitespace // characters that are (a) outside quotes and (b) have a predecessor that's // either whitespace or a structural character. This means that subsequent // passes will get a chance to encounter the first character of every string // of non-whitespace and, if we're parsing an atom like true/false/null or a // number we can stop at the first whitespace or structural character // following it. // a qualified predecessor is something that can happen 1 position before an // pseudo-structural character uint64_t pseudo_pred = structurals | whitespace; uint64_t shifted_pseudo_pred = (pseudo_pred << 1) | prev_iter_ends_pseudo_pred; prev_iter_ends_pseudo_pred = pseudo_pred >> 63; uint64_t pseudo_structurals = shifted_pseudo_pred & (~whitespace) & (~quote_mask); structurals |= pseudo_structurals; // now, we've used our close quotes all we need to. So let's switch them off // they will be off in the quote mask and on in quote bits. structurals &= ~(quote_bits & ~quote_mask); return structurals; } // Find structural bits in a 64-byte chunk. really_inline void find_structural_bits_64( const uint8_t *buf, size_t idx, uint32_t *base_ptr, uint32_t &base, uint64_t &prev_iter_ends_odd_backslash, uint64_t &prev_iter_inside_quote, uint64_t &prev_iter_ends_pseudo_pred, uint64_t &structurals, uint64_t &error_mask, utf8_checker &utf8_state) { simd_input in(buf); utf8_state.check_next_input(in); /* detect odd sequences of backslashes */ uint64_t odd_ends = find_odd_backslash_sequences( in, prev_iter_ends_odd_backslash); /* detect insides of quote pairs ("quote_mask") and also our quote_bits * themselves */ uint64_t quote_bits; uint64_t quote_mask = find_quote_mask_and_bits( in, odd_ends, prev_iter_inside_quote, quote_bits, error_mask); /* take the previous iterations structural bits, not our current * iteration, * and flatten */ flatten_bits(base_ptr, base, idx, structurals); uint64_t whitespace; find_whitespace_and_structurals(in, whitespace, structurals); /* fixup structurals to reflect quotes and add pseudo-structural * characters */ structurals = finalize_structurals(structurals, whitespace, quote_mask, quote_bits, prev_iter_ends_pseudo_pred); } int find_structural_bits(const uint8_t *buf, size_t len, simdjson::ParsedJson &pj) { if (len > pj.byte_capacity) { std::cerr << "Your ParsedJson object only supports documents up to " << pj.byte_capacity << " bytes but you are trying to process " << len << " bytes" << std::endl; return simdjson::CAPACITY; } uint32_t *base_ptr = pj.structural_indexes; uint32_t base = 0; utf8_checker utf8_state; /* we have padded the input out to 64 byte multiple with the remainder * being zeros persistent state across loop does the last iteration end * with an odd-length sequence of backslashes? */ /* either 0 or 1, but a 64-bit value */ uint64_t prev_iter_ends_odd_backslash = 0ULL; /* does the previous iteration end inside a double-quote pair? */ uint64_t prev_iter_inside_quote = 0ULL; /* either all zeros or all ones * does the previous iteration end on something that is a * predecessor of a pseudo-structural character - i.e. * whitespace or a structural character effectively the very * first char is considered to follow "whitespace" for the * purposes of pseudo-structural character detection so we * initialize to 1 */ uint64_t prev_iter_ends_pseudo_pred = 1ULL; /* structurals are persistent state across loop as we flatten them on the * subsequent iteration into our array pointed to be base_ptr. * This is harmless on the first iteration as structurals==0 * and is done for performance reasons; we can hide some of the latency of * the * expensive carryless multiply in the previous step with this work */ uint64_t structurals = 0; size_t lenminus64 = len < 64 ? 0 : len - 64; size_t idx = 0; uint64_t error_mask = 0; /* for unescaped characters within strings (ASCII code points < 0x20) */ for (; idx < lenminus64; idx += 64) { find_structural_bits_64(&buf[idx], idx, base_ptr, base, prev_iter_ends_odd_backslash, prev_iter_inside_quote, prev_iter_ends_pseudo_pred, structurals, error_mask, utf8_state); } /* If we have a final chunk of less than 64 bytes, pad it to 64 with * spaces before processing it (otherwise, we risk invalidating the UTF-8 * checks). */ if (idx < len) { uint8_t tmp_buf[64]; memset(tmp_buf, 0x20, 64); memcpy(tmp_buf, buf + idx, len - idx); find_structural_bits_64(&tmp_buf[0], idx, base_ptr, base, prev_iter_ends_odd_backslash, prev_iter_inside_quote, prev_iter_ends_pseudo_pred, structurals, error_mask, utf8_state); idx += 64; } /* is last string quote closed? */ if (prev_iter_inside_quote) { return simdjson::UNCLOSED_STRING; } /* finally, flatten out the remaining structurals from the last iteration */ flatten_bits(base_ptr, base, idx, structurals); pj.n_structural_indexes = base; /* a valid JSON file cannot have zero structural indexes - we should have * found something */ if (pj.n_structural_indexes == 0u) { return simdjson::EMPTY; } if (base_ptr[pj.n_structural_indexes - 1] > len) { return simdjson::UNEXPECTED_ERROR; } if (len != base_ptr[pj.n_structural_indexes - 1]) { /* the string might not be NULL terminated, but we add a virtual NULL * ending character. */ base_ptr[pj.n_structural_indexes++] = len; } /* make it safe to dereference one beyond this array */ base_ptr[pj.n_structural_indexes] = 0; if (error_mask) { return simdjson::UNESCAPED_CHARS; } return utf8_state.errors(); } } // namespace haswell UNTARGET_REGION TARGET_HASWELL namespace simdjson { template <> int find_structural_bits(const uint8_t *buf, size_t len, simdjson::ParsedJson &pj) { return haswell::find_structural_bits(buf, len, pj); } } // namespace simdjson UNTARGET_REGION #endif // IS_X86_64 #endif // SIMDJSON_HASWELL_STAGE1_FIND_MARKS_H /* end file src/haswell/stage1_find_marks.h */ /* begin file src/westmere/stage1_find_marks.h */ #ifndef SIMDJSON_WESTMERE_STAGE1_FIND_MARKS_H #define SIMDJSON_WESTMERE_STAGE1_FIND_MARKS_H #ifdef IS_X86_64 TARGET_WESTMERE namespace simdjson::westmere { really_inline uint64_t compute_quote_mask(uint64_t quote_bits) { return _mm_cvtsi128_si64(_mm_clmulepi64_si128( _mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFFu), 0)); } really_inline void find_whitespace_and_structurals(simd_input in, uint64_t &whitespace, uint64_t &structurals) { const __m128i structural_table = _mm_setr_epi8(44, 125, 0, 0, 0xc0u, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58, 123); const __m128i white_table = _mm_setr_epi8(32, 100, 100, 100, 17, 100, 113, 2, 100, 9, 10, 112, 100, 13, 100, 100); const __m128i struct_offset = _mm_set1_epi8(0xd4u); const __m128i struct_mask = _mm_set1_epi8(32); whitespace = in.build_bitmask([&](auto chunk) { return _mm_cmpeq_epi8(chunk, _mm_shuffle_epi8(white_table, chunk)); }); structurals = in.build_bitmask([&](auto chunk) { __m128i struct_r1 = _mm_add_epi8(struct_offset, chunk); __m128i struct_r2 = _mm_or_si128(chunk, struct_mask); __m128i struct_r3 = _mm_shuffle_epi8(structural_table, struct_r1); return _mm_cmpeq_epi8(struct_r2, struct_r3); }); } // This file contains a non-architecture-specific version of "flatten" used in stage1. // It is intended to be included multiple times and compiled multiple times // We assume the file in which it is include already includes // "simdjson/stage1_find_marks.h" (this simplifies amalgation) #ifdef SIMDJSON_NAIVE_FLATTEN // useful for benchmarking // This is just a naive implementation. It should be normally // disable, but can be used for research purposes to compare // again our optimized version. really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base, uint32_t idx, uint64_t bits) { uint32_t *out_ptr = base_ptr + base; idx -= 64; while (bits != 0) { out_ptr[0] = idx + trailing_zeroes(bits); bits = bits & (bits - 1); out_ptr++; } base = (out_ptr - base_ptr); } #else // SIMDJSON_NAIVE_FLATTEN // flatten out values in 'bits' assuming that they are are to have values of idx // plus their position in the bitvector, and store these indexes at // base_ptr[base] incrementing base as we go // will potentially store extra values beyond end of valid bits, so base_ptr // needs to be large enough to handle this really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base, uint32_t idx, uint64_t bits) { // In some instances, the next branch is expensive because it is mispredicted. // Unfortunately, in other cases, // it helps tremendously. if (bits == 0) return; uint32_t cnt = hamming(bits); uint32_t next_base = base + cnt; idx -= 64; base_ptr += base; { base_ptr[0] = idx + trailing_zeroes(bits); bits = bits & (bits - 1); base_ptr[1] = idx + trailing_zeroes(bits); bits = bits & (bits - 1); base_ptr[2] = idx + trailing_zeroes(bits); bits = bits & (bits - 1); base_ptr[3] = idx + trailing_zeroes(bits); bits = bits & (bits - 1); base_ptr[4] = idx + trailing_zeroes(bits); bits = bits & (bits - 1); base_ptr[5] = idx + trailing_zeroes(bits); bits = bits & (bits - 1); base_ptr[6] = idx + trailing_zeroes(bits); bits = bits & (bits - 1); base_ptr[7] = idx + trailing_zeroes(bits); bits = bits & (bits - 1); base_ptr += 8; } // We hope that the next branch is easily predicted. if (cnt > 8) { base_ptr[0] = idx + trailing_zeroes(bits); bits = bits & (bits - 1); base_ptr[1] = idx + trailing_zeroes(bits); bits = bits & (bits - 1); base_ptr[2] = idx + trailing_zeroes(bits); bits = bits & (bits - 1); base_ptr[3] = idx + trailing_zeroes(bits); bits = bits & (bits - 1); base_ptr[4] = idx + trailing_zeroes(bits); bits = bits & (bits - 1); base_ptr[5] = idx + trailing_zeroes(bits); bits = bits & (bits - 1); base_ptr[6] = idx + trailing_zeroes(bits); bits = bits & (bits - 1); base_ptr[7] = idx + trailing_zeroes(bits); bits = bits & (bits - 1); base_ptr += 8; } if (cnt > 16) { // unluckly: we rarely get here // since it means having one structural or pseudo-structral element // every 4 characters (possible with inputs like "","","",...). do { base_ptr[0] = idx + trailing_zeroes(bits); bits = bits & (bits - 1); base_ptr++; } while (bits != 0); } base = next_base; } #endif // SIMDJSON_NAIVE_FLATTEN // This file contains the common code every implementation uses in stage1 // It is intended to be included multiple times and compiled multiple times // We assume the file in which it is included already includes // "simdjson/stage1_find_marks.h" (this simplifies amalgation) // return a bitvector indicating where we have characters that end an odd-length // sequence of backslashes (and thus change the behavior of the next character // to follow). A even-length sequence of backslashes, and, for that matter, the // largest even-length prefix of our odd-length sequence of backslashes, simply // modify the behavior of the backslashes themselves. // We also update the prev_iter_ends_odd_backslash reference parameter to // indicate whether we end an iteration on an odd-length sequence of // backslashes, which modifies our subsequent search for odd-length // sequences of backslashes in an obvious way. really_inline uint64_t find_odd_backslash_sequences( simd_input in, uint64_t &prev_iter_ends_odd_backslash) { const uint64_t even_bits = 0x5555555555555555ULL; const uint64_t odd_bits = ~even_bits; uint64_t bs_bits = in.eq('\\'); uint64_t start_edges = bs_bits & ~(bs_bits << 1); /* flip lowest if we have an odd-length run at the end of the prior * iteration */ uint64_t even_start_mask = even_bits ^ prev_iter_ends_odd_backslash; uint64_t even_starts = start_edges & even_start_mask; uint64_t odd_starts = start_edges & ~even_start_mask; uint64_t even_carries = bs_bits + even_starts; uint64_t odd_carries; /* must record the carry-out of our odd-carries out of bit 63; this * indicates whether the sense of any edge going to the next iteration * should be flipped */ bool iter_ends_odd_backslash = add_overflow(bs_bits, odd_starts, &odd_carries); odd_carries |= prev_iter_ends_odd_backslash; /* push in bit zero as a * potential end if we had an * odd-numbered run at the * end of the previous * iteration */ prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL; uint64_t even_carry_ends = even_carries & ~bs_bits; uint64_t odd_carry_ends = odd_carries & ~bs_bits; uint64_t even_start_odd_end = even_carry_ends & odd_bits; uint64_t odd_start_even_end = odd_carry_ends & even_bits; uint64_t odd_ends = even_start_odd_end | odd_start_even_end; return odd_ends; } // return both the quote mask (which is a half-open mask that covers the first // quote // in an unescaped quote pair and everything in the quote pair) and the quote // bits, which are the simple // unescaped quoted bits. We also update the prev_iter_inside_quote value to // tell the next iteration // whether we finished the final iteration inside a quote pair; if so, this // inverts our behavior of // whether we're inside quotes for the next iteration. // Note that we don't do any error checking to see if we have backslash // sequences outside quotes; these // backslash sequences (of any length) will be detected elsewhere. really_inline uint64_t find_quote_mask_and_bits( simd_input in, uint64_t odd_ends, uint64_t &prev_iter_inside_quote, uint64_t "e_bits, uint64_t &error_mask) { quote_bits = in.eq('"'); quote_bits = quote_bits & ~odd_ends; uint64_t quote_mask = compute_quote_mask(quote_bits); quote_mask ^= prev_iter_inside_quote; /* All Unicode characters may be placed within the * quotation marks, except for the characters that MUST be escaped: * quotation mark, reverse solidus, and the control characters (U+0000 * through U+001F). * https://tools.ietf.org/html/rfc8259 */ uint64_t unescaped = in.lteq(0x1F); error_mask |= quote_mask & unescaped; /* right shift of a signed value expected to be well-defined and standard * compliant as of C++20, * John Regher from Utah U. says this is fine code */ prev_iter_inside_quote = static_cast(static_cast(quote_mask) >> 63); return quote_mask; } really_inline uint64_t finalize_structurals( uint64_t structurals, uint64_t whitespace, uint64_t quote_mask, uint64_t quote_bits, uint64_t &prev_iter_ends_pseudo_pred) { // mask off anything inside quotes structurals &= ~quote_mask; // add the real quote bits back into our bit_mask as well, so we can // quickly traverse the strings we've spent all this trouble gathering structurals |= quote_bits; // Now, establish "pseudo-structural characters". These are non-whitespace // characters that are (a) outside quotes and (b) have a predecessor that's // either whitespace or a structural character. This means that subsequent // passes will get a chance to encounter the first character of every string // of non-whitespace and, if we're parsing an atom like true/false/null or a // number we can stop at the first whitespace or structural character // following it. // a qualified predecessor is something that can happen 1 position before an // pseudo-structural character uint64_t pseudo_pred = structurals | whitespace; uint64_t shifted_pseudo_pred = (pseudo_pred << 1) | prev_iter_ends_pseudo_pred; prev_iter_ends_pseudo_pred = pseudo_pred >> 63; uint64_t pseudo_structurals = shifted_pseudo_pred & (~whitespace) & (~quote_mask); structurals |= pseudo_structurals; // now, we've used our close quotes all we need to. So let's switch them off // they will be off in the quote mask and on in quote bits. structurals &= ~(quote_bits & ~quote_mask); return structurals; } // Find structural bits in a 64-byte chunk. really_inline void find_structural_bits_64( const uint8_t *buf, size_t idx, uint32_t *base_ptr, uint32_t &base, uint64_t &prev_iter_ends_odd_backslash, uint64_t &prev_iter_inside_quote, uint64_t &prev_iter_ends_pseudo_pred, uint64_t &structurals, uint64_t &error_mask, utf8_checker &utf8_state) { simd_input in(buf); utf8_state.check_next_input(in); /* detect odd sequences of backslashes */ uint64_t odd_ends = find_odd_backslash_sequences( in, prev_iter_ends_odd_backslash); /* detect insides of quote pairs ("quote_mask") and also our quote_bits * themselves */ uint64_t quote_bits; uint64_t quote_mask = find_quote_mask_and_bits( in, odd_ends, prev_iter_inside_quote, quote_bits, error_mask); /* take the previous iterations structural bits, not our current * iteration, * and flatten */ flatten_bits(base_ptr, base, idx, structurals); uint64_t whitespace; find_whitespace_and_structurals(in, whitespace, structurals); /* fixup structurals to reflect quotes and add pseudo-structural * characters */ structurals = finalize_structurals(structurals, whitespace, quote_mask, quote_bits, prev_iter_ends_pseudo_pred); } int find_structural_bits(const uint8_t *buf, size_t len, simdjson::ParsedJson &pj) { if (len > pj.byte_capacity) { std::cerr << "Your ParsedJson object only supports documents up to " << pj.byte_capacity << " bytes but you are trying to process " << len << " bytes" << std::endl; return simdjson::CAPACITY; } uint32_t *base_ptr = pj.structural_indexes; uint32_t base = 0; utf8_checker utf8_state; /* we have padded the input out to 64 byte multiple with the remainder * being zeros persistent state across loop does the last iteration end * with an odd-length sequence of backslashes? */ /* either 0 or 1, but a 64-bit value */ uint64_t prev_iter_ends_odd_backslash = 0ULL; /* does the previous iteration end inside a double-quote pair? */ uint64_t prev_iter_inside_quote = 0ULL; /* either all zeros or all ones * does the previous iteration end on something that is a * predecessor of a pseudo-structural character - i.e. * whitespace or a structural character effectively the very * first char is considered to follow "whitespace" for the * purposes of pseudo-structural character detection so we * initialize to 1 */ uint64_t prev_iter_ends_pseudo_pred = 1ULL; /* structurals are persistent state across loop as we flatten them on the * subsequent iteration into our array pointed to be base_ptr. * This is harmless on the first iteration as structurals==0 * and is done for performance reasons; we can hide some of the latency of * the * expensive carryless multiply in the previous step with this work */ uint64_t structurals = 0; size_t lenminus64 = len < 64 ? 0 : len - 64; size_t idx = 0; uint64_t error_mask = 0; /* for unescaped characters within strings (ASCII code points < 0x20) */ for (; idx < lenminus64; idx += 64) { find_structural_bits_64(&buf[idx], idx, base_ptr, base, prev_iter_ends_odd_backslash, prev_iter_inside_quote, prev_iter_ends_pseudo_pred, structurals, error_mask, utf8_state); } /* If we have a final chunk of less than 64 bytes, pad it to 64 with * spaces before processing it (otherwise, we risk invalidating the UTF-8 * checks). */ if (idx < len) { uint8_t tmp_buf[64]; memset(tmp_buf, 0x20, 64); memcpy(tmp_buf, buf + idx, len - idx); find_structural_bits_64(&tmp_buf[0], idx, base_ptr, base, prev_iter_ends_odd_backslash, prev_iter_inside_quote, prev_iter_ends_pseudo_pred, structurals, error_mask, utf8_state); idx += 64; } /* is last string quote closed? */ if (prev_iter_inside_quote) { return simdjson::UNCLOSED_STRING; } /* finally, flatten out the remaining structurals from the last iteration */ flatten_bits(base_ptr, base, idx, structurals); pj.n_structural_indexes = base; /* a valid JSON file cannot have zero structural indexes - we should have * found something */ if (pj.n_structural_indexes == 0u) { return simdjson::EMPTY; } if (base_ptr[pj.n_structural_indexes - 1] > len) { return simdjson::UNEXPECTED_ERROR; } if (len != base_ptr[pj.n_structural_indexes - 1]) { /* the string might not be NULL terminated, but we add a virtual NULL * ending character. */ base_ptr[pj.n_structural_indexes++] = len; } /* make it safe to dereference one beyond this array */ base_ptr[pj.n_structural_indexes] = 0; if (error_mask) { return simdjson::UNESCAPED_CHARS; } return utf8_state.errors(); } } // namespace westmere UNTARGET_REGION TARGET_WESTMERE namespace simdjson { template <> int find_structural_bits(const uint8_t *buf, size_t len, simdjson::ParsedJson &pj) { return westmere::find_structural_bits(buf, len, pj); } } // namespace simdjson UNTARGET_REGION #endif // IS_X86_64 #endif // SIMDJSON_WESTMERE_STAGE1_FIND_MARKS_H /* end file src/westmere/stage1_find_marks.h */ /* begin file src/stage1_find_marks.cpp */ namespace { // for when clmul is unavailable [[maybe_unused]] uint64_t portable_compute_quote_mask(uint64_t quote_bits) { uint64_t quote_mask = quote_bits ^ (quote_bits << 1); quote_mask = quote_mask ^ (quote_mask << 2); quote_mask = quote_mask ^ (quote_mask << 4); quote_mask = quote_mask ^ (quote_mask << 8); quote_mask = quote_mask ^ (quote_mask << 16); quote_mask = quote_mask ^ (quote_mask << 32); return quote_mask; } } // namespace /* end file src/stage1_find_marks.cpp */ /* begin file src/stringparsing.h */ #ifndef SIMDJSON_STRINGPARSING_H #define SIMDJSON_STRINGPARSING_H #ifdef JSON_TEST_STRINGS void found_string(const uint8_t *buf, const uint8_t *parsed_begin, const uint8_t *parsed_end); void found_bad_string(const uint8_t *buf); #endif namespace simdjson { // begin copypasta // These chars yield themselves: " \ / // b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab // u not handled in this table as it's complex static const uint8_t escape_map[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; // handle a unicode codepoint // write appropriate values into dest // src will advance 6 bytes or 12 bytes // dest will advance a variable amount (return via pointer) // return true if the unicode codepoint was valid // We work in little-endian then swap at write time WARN_UNUSED really_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, uint8_t **dst_ptr) { // hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the // conversion isn't valid; we defer the check for this to inside the // multilingual plane check uint32_t code_point = hex_to_u32_nocheck(*src_ptr + 2); *src_ptr += 6; // check for low surrogate for characters outside the Basic // Multilingual Plane. if (code_point >= 0xd800 && code_point < 0xdc00) { if (((*src_ptr)[0] != '\\') || (*src_ptr)[1] != 'u') { return false; } uint32_t code_point_2 = hex_to_u32_nocheck(*src_ptr + 2); // if the first code point is invalid we will get here, as we will go past // the check for being outside the Basic Multilingual plane. If we don't // find a \u immediately afterwards we fail out anyhow, but if we do, // this check catches both the case of the first code point being invalid // or the second code point being invalid. if ((code_point | code_point_2) >> 16) { return false; } code_point = (((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000; *src_ptr += 6; } size_t offset = codepoint_to_utf8(code_point, *dst_ptr); *dst_ptr += offset; return offset > 0; } // Holds backslashes and quotes locations. struct parse_string_helper { uint32_t bs_bits; uint32_t quote_bits; }; } // namespace simdjson #endif // SIMDJSON_STRINGPARSING_H /* end file src/stringparsing.h */ /* begin file src/arm64/stringparsing.h */ #ifndef SIMDJSON_ARM64_STRINGPARSING_H #define SIMDJSON_ARM64_STRINGPARSING_H #ifdef IS_ARM64 namespace simdjson::arm64 { really_inline parse_string_helper find_bs_bits_and_quote_bits(const uint8_t *src, uint8_t *dst) { // this can read up to 31 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding static_assert(2 * sizeof(uint8x16_t) - 1 <= SIMDJSON_PADDING); uint8x16_t v0 = vld1q_u8(src); uint8x16_t v1 = vld1q_u8(src + 16); vst1q_u8(dst, v0); vst1q_u8(dst + 16, v1); uint8x16_t bs_mask = vmovq_n_u8('\\'); uint8x16_t qt_mask = vmovq_n_u8('"'); const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; uint8x16_t cmp_bs_0 = vceqq_u8(v0, bs_mask); uint8x16_t cmp_bs_1 = vceqq_u8(v1, bs_mask); uint8x16_t cmp_qt_0 = vceqq_u8(v0, qt_mask); uint8x16_t cmp_qt_1 = vceqq_u8(v1, qt_mask); cmp_bs_0 = vandq_u8(cmp_bs_0, bit_mask); cmp_bs_1 = vandq_u8(cmp_bs_1, bit_mask); cmp_qt_0 = vandq_u8(cmp_qt_0, bit_mask); cmp_qt_1 = vandq_u8(cmp_qt_1, bit_mask); uint8x16_t sum0 = vpaddq_u8(cmp_bs_0, cmp_bs_1); uint8x16_t sum1 = vpaddq_u8(cmp_qt_0, cmp_qt_1); sum0 = vpaddq_u8(sum0, sum1); sum0 = vpaddq_u8(sum0, sum0); return { vgetq_lane_u32(vreinterpretq_u32_u8(sum0), 0), // bs_bits vgetq_lane_u32(vreinterpretq_u32_u8(sum0), 1) // quote_bits }; } // This file contains the common code every implementation uses // It is intended to be included multiple times and compiled multiple times // We assume the file in which it is include already includes // "stringparsing.h" (this simplifies amalgation) WARN_UNUSED really_inline bool parse_string(UNUSED const uint8_t *buf, UNUSED size_t len, ParsedJson &pj, UNUSED const uint32_t depth, UNUSED uint32_t offset) { pj.write_tape(pj.current_string_buf_loc - pj.string_buf, '"'); const uint8_t *src = &buf[offset + 1]; /* we know that buf at offset is a " */ uint8_t *dst = pj.current_string_buf_loc + sizeof(uint32_t); const uint8_t *const start_of_string = dst; while (1) { parse_string_helper helper = find_bs_bits_and_quote_bits(src, dst); if (((helper.bs_bits - 1) & helper.quote_bits) != 0) { /* we encountered quotes first. Move dst to point to quotes and exit */ /* find out where the quote is... */ uint32_t quote_dist = trailing_zeroes(helper.quote_bits); /* NULL termination is still handy if you expect all your strings to * be NULL terminated? */ /* It comes at a small cost */ dst[quote_dist] = 0; uint32_t str_length = (dst - start_of_string) + quote_dist; memcpy(pj.current_string_buf_loc, &str_length, sizeof(uint32_t)); /***************************** * Above, check for overflow in case someone has a crazy string * (>=4GB?) _ * But only add the overflow check when the document itself exceeds * 4GB * Currently unneeded because we refuse to parse docs larger or equal * to 4GB. ****************************/ /* we advance the point, accounting for the fact that we have a NULL * termination */ pj.current_string_buf_loc = dst + quote_dist + 1; return true; } if (((helper.quote_bits - 1) & helper.bs_bits) != 0) { /* find out where the backspace is */ uint32_t bs_dist = trailing_zeroes(helper.bs_bits); uint8_t escape_char = src[bs_dist + 1]; /* we encountered backslash first. Handle backslash */ if (escape_char == 'u') { /* move src/dst up to the start; they will be further adjusted within the unicode codepoint handling code. */ src += bs_dist; dst += bs_dist; if (!handle_unicode_codepoint(&src, &dst)) { return false; } } else { /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and * write bs_dist+1 characters to output * note this may reach beyond the part of the buffer we've actually * seen. I think this is ok */ uint8_t escape_result = escape_map[escape_char]; if (escape_result == 0u) { return false; /* bogus escape value is an error */ } dst[bs_dist] = escape_result; src += bs_dist + 2; dst += bs_dist + 1; } } else { /* they are the same. Since they can't co-occur, it means we * encountered neither. */ if constexpr (ARCHITECTURE == Architecture::WESTMERE) { src += 16; dst += 16; } else { src += 32; dst += 32; } } } /* can't be reached */ return true; } } // namespace simdjson::amd64 #endif // IS_ARM64 #endif /* end file src/arm64/stringparsing.h */ /* begin file src/haswell/stringparsing.h */ #ifndef SIMDJSON_HASWELL_STRINGPARSING_H #define SIMDJSON_HASWELL_STRINGPARSING_H #ifdef IS_X86_64 TARGET_HASWELL namespace simdjson::haswell { really_inline parse_string_helper find_bs_bits_and_quote_bits(const uint8_t *src, uint8_t *dst) { // this can read up to 31 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding static_assert(sizeof(__m256i) - 1 <= SIMDJSON_PADDING); __m256i v = _mm256_loadu_si256(reinterpret_cast(src)); // store to dest unconditionally - we can overwrite the bits we don't like // later _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), v); auto quote_mask = _mm256_cmpeq_epi8(v, _mm256_set1_epi8('"')); return { static_cast(_mm256_movemask_epi8( _mm256_cmpeq_epi8(v, _mm256_set1_epi8('\\')))), // bs_bits static_cast(_mm256_movemask_epi8(quote_mask)) // quote_bits }; } // This file contains the common code every implementation uses // It is intended to be included multiple times and compiled multiple times // We assume the file in which it is include already includes // "stringparsing.h" (this simplifies amalgation) WARN_UNUSED really_inline bool parse_string(UNUSED const uint8_t *buf, UNUSED size_t len, ParsedJson &pj, UNUSED const uint32_t depth, UNUSED uint32_t offset) { pj.write_tape(pj.current_string_buf_loc - pj.string_buf, '"'); const uint8_t *src = &buf[offset + 1]; /* we know that buf at offset is a " */ uint8_t *dst = pj.current_string_buf_loc + sizeof(uint32_t); const uint8_t *const start_of_string = dst; while (1) { parse_string_helper helper = find_bs_bits_and_quote_bits(src, dst); if (((helper.bs_bits - 1) & helper.quote_bits) != 0) { /* we encountered quotes first. Move dst to point to quotes and exit */ /* find out where the quote is... */ uint32_t quote_dist = trailing_zeroes(helper.quote_bits); /* NULL termination is still handy if you expect all your strings to * be NULL terminated? */ /* It comes at a small cost */ dst[quote_dist] = 0; uint32_t str_length = (dst - start_of_string) + quote_dist; memcpy(pj.current_string_buf_loc, &str_length, sizeof(uint32_t)); /***************************** * Above, check for overflow in case someone has a crazy string * (>=4GB?) _ * But only add the overflow check when the document itself exceeds * 4GB * Currently unneeded because we refuse to parse docs larger or equal * to 4GB. ****************************/ /* we advance the point, accounting for the fact that we have a NULL * termination */ pj.current_string_buf_loc = dst + quote_dist + 1; return true; } if (((helper.quote_bits - 1) & helper.bs_bits) != 0) { /* find out where the backspace is */ uint32_t bs_dist = trailing_zeroes(helper.bs_bits); uint8_t escape_char = src[bs_dist + 1]; /* we encountered backslash first. Handle backslash */ if (escape_char == 'u') { /* move src/dst up to the start; they will be further adjusted within the unicode codepoint handling code. */ src += bs_dist; dst += bs_dist; if (!handle_unicode_codepoint(&src, &dst)) { return false; } } else { /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and * write bs_dist+1 characters to output * note this may reach beyond the part of the buffer we've actually * seen. I think this is ok */ uint8_t escape_result = escape_map[escape_char]; if (escape_result == 0u) { return false; /* bogus escape value is an error */ } dst[bs_dist] = escape_result; src += bs_dist + 2; dst += bs_dist + 1; } } else { /* they are the same. Since they can't co-occur, it means we * encountered neither. */ if constexpr (ARCHITECTURE == Architecture::WESTMERE) { src += 16; dst += 16; } else { src += 32; dst += 32; } } } /* can't be reached */ return true; } } // namespace simdjson::haswell UNTARGET_REGION #endif // IS_X86_64 #endif /* end file src/haswell/stringparsing.h */ /* begin file src/westmere/stringparsing.h */ #ifndef SIMDJSON_WESTMERE_STRINGPARSING_H #define SIMDJSON_WESTMERE_STRINGPARSING_H #ifdef IS_X86_64 TARGET_WESTMERE namespace simdjson::westmere { really_inline parse_string_helper find_bs_bits_and_quote_bits(const uint8_t *src, uint8_t *dst) { // this can read up to 31 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding __m128i v = _mm_loadu_si128(reinterpret_cast(src)); // store to dest unconditionally - we can overwrite the bits we don't like // later _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), v); auto quote_mask = _mm_cmpeq_epi8(v, _mm_set1_epi8('"')); return { static_cast( _mm_movemask_epi8(_mm_cmpeq_epi8(v, _mm_set1_epi8('\\')))), // bs_bits static_cast(_mm_movemask_epi8(quote_mask)) // quote_bits }; } // This file contains the common code every implementation uses // It is intended to be included multiple times and compiled multiple times // We assume the file in which it is include already includes // "stringparsing.h" (this simplifies amalgation) WARN_UNUSED really_inline bool parse_string(UNUSED const uint8_t *buf, UNUSED size_t len, ParsedJson &pj, UNUSED const uint32_t depth, UNUSED uint32_t offset) { pj.write_tape(pj.current_string_buf_loc - pj.string_buf, '"'); const uint8_t *src = &buf[offset + 1]; /* we know that buf at offset is a " */ uint8_t *dst = pj.current_string_buf_loc + sizeof(uint32_t); const uint8_t *const start_of_string = dst; while (1) { parse_string_helper helper = find_bs_bits_and_quote_bits(src, dst); if (((helper.bs_bits - 1) & helper.quote_bits) != 0) { /* we encountered quotes first. Move dst to point to quotes and exit */ /* find out where the quote is... */ uint32_t quote_dist = trailing_zeroes(helper.quote_bits); /* NULL termination is still handy if you expect all your strings to * be NULL terminated? */ /* It comes at a small cost */ dst[quote_dist] = 0; uint32_t str_length = (dst - start_of_string) + quote_dist; memcpy(pj.current_string_buf_loc, &str_length, sizeof(uint32_t)); /***************************** * Above, check for overflow in case someone has a crazy string * (>=4GB?) _ * But only add the overflow check when the document itself exceeds * 4GB * Currently unneeded because we refuse to parse docs larger or equal * to 4GB. ****************************/ /* we advance the point, accounting for the fact that we have a NULL * termination */ pj.current_string_buf_loc = dst + quote_dist + 1; return true; } if (((helper.quote_bits - 1) & helper.bs_bits) != 0) { /* find out where the backspace is */ uint32_t bs_dist = trailing_zeroes(helper.bs_bits); uint8_t escape_char = src[bs_dist + 1]; /* we encountered backslash first. Handle backslash */ if (escape_char == 'u') { /* move src/dst up to the start; they will be further adjusted within the unicode codepoint handling code. */ src += bs_dist; dst += bs_dist; if (!handle_unicode_codepoint(&src, &dst)) { return false; } } else { /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and * write bs_dist+1 characters to output * note this may reach beyond the part of the buffer we've actually * seen. I think this is ok */ uint8_t escape_result = escape_map[escape_char]; if (escape_result == 0u) { return false; /* bogus escape value is an error */ } dst[bs_dist] = escape_result; src += bs_dist + 2; dst += bs_dist + 1; } } else { /* they are the same. Since they can't co-occur, it means we * encountered neither. */ if constexpr (ARCHITECTURE == Architecture::WESTMERE) { src += 16; dst += 16; } else { src += 32; dst += 32; } } } /* can't be reached */ return true; } } // namespace simdjson::westmere UNTARGET_REGION #endif // IS_X86_64 #endif /* end file src/westmere/stringparsing.h */ /* begin file src/arm64/stage2_build_tape.h */ #ifndef SIMDJSON_ARM64_STAGE2_BUILD_TAPE_H #define SIMDJSON_ARM64_STAGE2_BUILD_TAPE_H #ifdef IS_ARM64 namespace simdjson::arm64 { // This file contains the common code every implementation uses for stage2 // It is intended to be included multiple times and compiled multiple times // We assume the file in which it is include already includes // "simdjson/stage2_build_tape.h" (this simplifies amalgation) // this macro reads the next structural character, updating idx, i and c. #define UPDATE_CHAR() \ { \ idx = pj.structural_indexes[i++]; \ c = buf[idx]; \ } #ifdef SIMDJSON_USE_COMPUTED_GOTO #define SET_GOTO_ARRAY_CONTINUE() pj.ret_address[depth] = &&array_continue; #define SET_GOTO_OBJECT_CONTINUE() pj.ret_address[depth] = &&object_continue; #define SET_GOTO_START_CONTINUE() pj.ret_address[depth] = &&start_continue; #define GOTO_CONTINUE() goto *pj.ret_address[depth]; #else #define SET_GOTO_ARRAY_CONTINUE() pj.ret_address[depth] = 'a'; #define SET_GOTO_OBJECT_CONTINUE() pj.ret_address[depth] = 'o'; #define SET_GOTO_START_CONTINUE() pj.ret_address[depth] = 's'; #define GOTO_CONTINUE() \ { \ if (pj.ret_address[depth] == 'a') { \ goto array_continue; \ } else if (pj.ret_address[depth] == 'o') { \ goto object_continue; \ } else { \ goto start_continue; \ } \ } #endif /************ * The JSON is parsed to a tape, see the accompanying tape.md file * for documentation. ***********/ WARN_UNUSED int unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) { uint32_t i = 0; /* index of the structural character (0,1,2,3...) */ uint32_t idx; /* location of the structural character in the input (buf) */ uint8_t c; /* used to track the (structural) character we are looking at, updated */ /* by UPDATE_CHAR macro */ uint32_t depth = 0; /* could have an arbitrary starting depth */ pj.init(); /* sets is_valid to false */ if (pj.byte_capacity < len) { pj.error_code = simdjson::CAPACITY; return pj.error_code; } /*//////////////////////////// START STATE ///////////////////////////// */ SET_GOTO_START_CONTINUE() pj.containing_scope_offset[depth] = pj.get_current_loc(); pj.write_tape(0, 'r'); /* r for root, 0 is going to get overwritten */ /* the root is used, if nothing else, to capture the size of the tape */ depth++; /* everything starts at depth = 1, depth = 0 is just for the root, the root may contain an object, an array or something else. */ if (depth >= pj.depth_capacity) { goto fail; } UPDATE_CHAR(); switch (c) { case '{': pj.containing_scope_offset[depth] = pj.get_current_loc(); SET_GOTO_START_CONTINUE(); depth++; if (depth >= pj.depth_capacity) { goto fail; } pj.write_tape( 0, c); /* strangely, moving this to object_begin slows things down */ goto object_begin; case '[': pj.containing_scope_offset[depth] = pj.get_current_loc(); SET_GOTO_START_CONTINUE(); depth++; if (depth >= pj.depth_capacity) { goto fail; } pj.write_tape(0, c); goto array_begin; /* #define SIMDJSON_ALLOWANYTHINGINROOT * A JSON text is a serialized value. Note that certain previous * specifications of JSON constrained a JSON text to be an object or an * array. Implementations that generate only objects or arrays where a * JSON text is called for will be interoperable in the sense that all * implementations will accept these as conforming JSON texts. * https://tools.ietf.org/html/rfc8259 * #ifdef SIMDJSON_ALLOWANYTHINGINROOT */ case '"': { if (!parse_string(buf, len, pj, depth, idx)) { goto fail; } break; } case 't': { /* we need to make a copy to make sure that the string is space * terminated. * this only applies to the JSON document made solely of the true value. * this will almost never be called in practice */ char *copy = static_cast(malloc(len + SIMDJSON_PADDING)); if (copy == nullptr) { goto fail; } memcpy(copy, buf, len); copy[len] = ' '; if (!is_valid_true_atom(reinterpret_cast(copy) + idx)) { free(copy); goto fail; } free(copy); pj.write_tape(0, c); break; } case 'f': { /* we need to make a copy to make sure that the string is space * terminated. * this only applies to the JSON document made solely of the false * value. * this will almost never be called in practice */ char *copy = static_cast(malloc(len + SIMDJSON_PADDING)); if (copy == nullptr) { goto fail; } memcpy(copy, buf, len); copy[len] = ' '; if (!is_valid_false_atom(reinterpret_cast(copy) + idx)) { free(copy); goto fail; } free(copy); pj.write_tape(0, c); break; } case 'n': { /* we need to make a copy to make sure that the string is space * terminated. * this only applies to the JSON document made solely of the null value. * this will almost never be called in practice */ char *copy = static_cast(malloc(len + SIMDJSON_PADDING)); if (copy == nullptr) { goto fail; } memcpy(copy, buf, len); copy[len] = ' '; if (!is_valid_null_atom(reinterpret_cast(copy) + idx)) { free(copy); goto fail; } free(copy); pj.write_tape(0, c); break; } case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { /* we need to make a copy to make sure that the string is space * terminated. * this is done only for JSON documents made of a sole number * this will almost never be called in practice. We terminate with a * space * because we do not want to allow NULLs in the middle of a number * (whereas a * space in the middle of a number would be identified in stage 1). */ char *copy = static_cast(malloc(len + SIMDJSON_PADDING)); if (copy == nullptr) { goto fail; } memcpy(copy, buf, len); copy[len] = ' '; if (!parse_number(reinterpret_cast(copy), pj, idx, false)) { free(copy); goto fail; } free(copy); break; } case '-': { /* we need to make a copy to make sure that the string is NULL * terminated. * this is done only for JSON documents made of a sole number * this will almost never be called in practice */ char *copy = static_cast(malloc(len + SIMDJSON_PADDING)); if (copy == nullptr) { goto fail; } memcpy(copy, buf, len); copy[len] = ' '; if (!parse_number(reinterpret_cast(copy), pj, idx, true)) { free(copy); goto fail; } free(copy); break; } default: goto fail; } start_continue: /* the string might not be NULL terminated. */ if (i + 1 == pj.n_structural_indexes) { goto succeed; } else { goto fail; } /*//////////////////////////// OBJECT STATES ///////////////////////////*/ object_begin: UPDATE_CHAR(); switch (c) { case '"': { if (!parse_string(buf, len, pj, depth, idx)) { goto fail; } goto object_key_state; } case '}': goto scope_end; /* could also go to object_continue */ default: goto fail; } object_key_state: UPDATE_CHAR(); if (c != ':') { goto fail; } UPDATE_CHAR(); switch (c) { case '"': { if (!parse_string(buf, len, pj, depth, idx)) { goto fail; } break; } case 't': if (!is_valid_true_atom(buf + idx)) { goto fail; } pj.write_tape(0, c); break; case 'f': if (!is_valid_false_atom(buf + idx)) { goto fail; } pj.write_tape(0, c); break; case 'n': if (!is_valid_null_atom(buf + idx)) { goto fail; } pj.write_tape(0, c); break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { if (!parse_number(buf, pj, idx, false)) { goto fail; } break; } case '-': { if (!parse_number(buf, pj, idx, true)) { goto fail; } break; } case '{': { pj.containing_scope_offset[depth] = pj.get_current_loc(); pj.write_tape(0, c); /* here the compilers knows what c is so this gets optimized */ /* we have not yet encountered } so we need to come back for it */ SET_GOTO_OBJECT_CONTINUE() /* we found an object inside an object, so we need to increment the * depth */ depth++; if (depth >= pj.depth_capacity) { goto fail; } goto object_begin; } case '[': { pj.containing_scope_offset[depth] = pj.get_current_loc(); pj.write_tape(0, c); /* here the compilers knows what c is so this gets optimized */ /* we have not yet encountered } so we need to come back for it */ SET_GOTO_OBJECT_CONTINUE() /* we found an array inside an object, so we need to increment the depth */ depth++; if (depth >= pj.depth_capacity) { goto fail; } goto array_begin; } default: goto fail; } object_continue: UPDATE_CHAR(); switch (c) { case ',': UPDATE_CHAR(); if (c != '"') { goto fail; } else { if (!parse_string(buf, len, pj, depth, idx)) { goto fail; } goto object_key_state; } case '}': goto scope_end; default: goto fail; } /*//////////////////////////// COMMON STATE ///////////////////////////*/ scope_end: /* write our tape location to the header scope */ depth--; pj.write_tape(pj.containing_scope_offset[depth], c); pj.annotate_previous_loc(pj.containing_scope_offset[depth], pj.get_current_loc()); /* goto saved_state */ GOTO_CONTINUE() /*//////////////////////////// ARRAY STATES ///////////////////////////*/ array_begin: UPDATE_CHAR(); if (c == ']') { goto scope_end; /* could also go to array_continue */ } main_array_switch: /* we call update char on all paths in, so we can peek at c on the * on paths that can accept a close square brace (post-, and at start) */ switch (c) { case '"': { if (!parse_string(buf, len, pj, depth, idx)) { goto fail; } break; } case 't': if (!is_valid_true_atom(buf + idx)) { goto fail; } pj.write_tape(0, c); break; case 'f': if (!is_valid_false_atom(buf + idx)) { goto fail; } pj.write_tape(0, c); break; case 'n': if (!is_valid_null_atom(buf + idx)) { goto fail; } pj.write_tape(0, c); break; /* goto array_continue; */ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { if (!parse_number(buf, pj, idx, false)) { goto fail; } break; /* goto array_continue; */ } case '-': { if (!parse_number(buf, pj, idx, true)) { goto fail; } break; /* goto array_continue; */ } case '{': { /* we have not yet encountered ] so we need to come back for it */ pj.containing_scope_offset[depth] = pj.get_current_loc(); pj.write_tape(0, c); /* here the compilers knows what c is so this gets optimized */ SET_GOTO_ARRAY_CONTINUE() /* we found an object inside an array, so we need to increment the depth */ depth++; if (depth >= pj.depth_capacity) { goto fail; } goto object_begin; } case '[': { /* we have not yet encountered ] so we need to come back for it */ pj.containing_scope_offset[depth] = pj.get_current_loc(); pj.write_tape(0, c); /* here the compilers knows what c is so this gets optimized */ SET_GOTO_ARRAY_CONTINUE() /* we found an array inside an array, so we need to increment the depth */ depth++; if (depth >= pj.depth_capacity) { goto fail; } goto array_begin; } default: goto fail; } array_continue: UPDATE_CHAR(); switch (c) { case ',': UPDATE_CHAR(); goto main_array_switch; case ']': goto scope_end; default: goto fail; } /*//////////////////////////// FINAL STATES ///////////////////////////*/ succeed: depth--; if (depth != 0) { fprintf(stderr, "internal bug\n"); abort(); } if (pj.containing_scope_offset[depth] != 0) { fprintf(stderr, "internal bug\n"); abort(); } pj.annotate_previous_loc(pj.containing_scope_offset[depth], pj.get_current_loc()); pj.write_tape(pj.containing_scope_offset[depth], 'r'); /* r is root */ pj.valid = true; pj.error_code = simdjson::SUCCESS; return pj.error_code; fail: /* we do not need the next line because this is done by pj.init(), * pessimistically. * pj.is_valid = false; * At this point in the code, we have all the time in the world. * Note that we know exactly where we are in the document so we could, * without any overhead on the processing code, report a specific * location. * We could even trigger special code paths to assess what happened * carefully, * all without any added cost. */ if (depth >= pj.depth_capacity) { pj.error_code = simdjson::DEPTH_ERROR; return pj.error_code; } switch (c) { case '"': pj.error_code = simdjson::STRING_ERROR; return pj.error_code; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '-': pj.error_code = simdjson::NUMBER_ERROR; return pj.error_code; case 't': pj.error_code = simdjson::T_ATOM_ERROR; return pj.error_code; case 'n': pj.error_code = simdjson::N_ATOM_ERROR; return pj.error_code; case 'f': pj.error_code = simdjson::F_ATOM_ERROR; return pj.error_code; default: break; } pj.error_code = simdjson::TAPE_ERROR; return pj.error_code; } } // namespace simdjson::arm64 namespace simdjson { template <> WARN_UNUSED int unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) { return arm64::unified_machine(buf, len, pj); } } // namespace simdjson #endif // IS_ARM64 #endif // SIMDJSON_ARM64_STAGE2_BUILD_TAPE_H /* end file src/arm64/stage2_build_tape.h */ /* begin file src/haswell/stage2_build_tape.h */ #ifndef SIMDJSON_HASWELL_STAGE2_BUILD_TAPE_H #define SIMDJSON_HASWELL_STAGE2_BUILD_TAPE_H #ifdef IS_X86_64 TARGET_HASWELL namespace simdjson::haswell { // This file contains the common code every implementation uses for stage2 // It is intended to be included multiple times and compiled multiple times // We assume the file in which it is include already includes // "simdjson/stage2_build_tape.h" (this simplifies amalgation) // this macro reads the next structural character, updating idx, i and c. #define UPDATE_CHAR() \ { \ idx = pj.structural_indexes[i++]; \ c = buf[idx]; \ } #ifdef SIMDJSON_USE_COMPUTED_GOTO #define SET_GOTO_ARRAY_CONTINUE() pj.ret_address[depth] = &&array_continue; #define SET_GOTO_OBJECT_CONTINUE() pj.ret_address[depth] = &&object_continue; #define SET_GOTO_START_CONTINUE() pj.ret_address[depth] = &&start_continue; #define GOTO_CONTINUE() goto *pj.ret_address[depth]; #else #define SET_GOTO_ARRAY_CONTINUE() pj.ret_address[depth] = 'a'; #define SET_GOTO_OBJECT_CONTINUE() pj.ret_address[depth] = 'o'; #define SET_GOTO_START_CONTINUE() pj.ret_address[depth] = 's'; #define GOTO_CONTINUE() \ { \ if (pj.ret_address[depth] == 'a') { \ goto array_continue; \ } else if (pj.ret_address[depth] == 'o') { \ goto object_continue; \ } else { \ goto start_continue; \ } \ } #endif /************ * The JSON is parsed to a tape, see the accompanying tape.md file * for documentation. ***********/ WARN_UNUSED int unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) { uint32_t i = 0; /* index of the structural character (0,1,2,3...) */ uint32_t idx; /* location of the structural character in the input (buf) */ uint8_t c; /* used to track the (structural) character we are looking at, updated */ /* by UPDATE_CHAR macro */ uint32_t depth = 0; /* could have an arbitrary starting depth */ pj.init(); /* sets is_valid to false */ if (pj.byte_capacity < len) { pj.error_code = simdjson::CAPACITY; return pj.error_code; } /*//////////////////////////// START STATE ///////////////////////////// */ SET_GOTO_START_CONTINUE() pj.containing_scope_offset[depth] = pj.get_current_loc(); pj.write_tape(0, 'r'); /* r for root, 0 is going to get overwritten */ /* the root is used, if nothing else, to capture the size of the tape */ depth++; /* everything starts at depth = 1, depth = 0 is just for the root, the root may contain an object, an array or something else. */ if (depth >= pj.depth_capacity) { goto fail; } UPDATE_CHAR(); switch (c) { case '{': pj.containing_scope_offset[depth] = pj.get_current_loc(); SET_GOTO_START_CONTINUE(); depth++; if (depth >= pj.depth_capacity) { goto fail; } pj.write_tape( 0, c); /* strangely, moving this to object_begin slows things down */ goto object_begin; case '[': pj.containing_scope_offset[depth] = pj.get_current_loc(); SET_GOTO_START_CONTINUE(); depth++; if (depth >= pj.depth_capacity) { goto fail; } pj.write_tape(0, c); goto array_begin; /* #define SIMDJSON_ALLOWANYTHINGINROOT * A JSON text is a serialized value. Note that certain previous * specifications of JSON constrained a JSON text to be an object or an * array. Implementations that generate only objects or arrays where a * JSON text is called for will be interoperable in the sense that all * implementations will accept these as conforming JSON texts. * https://tools.ietf.org/html/rfc8259 * #ifdef SIMDJSON_ALLOWANYTHINGINROOT */ case '"': { if (!parse_string(buf, len, pj, depth, idx)) { goto fail; } break; } case 't': { /* we need to make a copy to make sure that the string is space * terminated. * this only applies to the JSON document made solely of the true value. * this will almost never be called in practice */ char *copy = static_cast(malloc(len + SIMDJSON_PADDING)); if (copy == nullptr) { goto fail; } memcpy(copy, buf, len); copy[len] = ' '; if (!is_valid_true_atom(reinterpret_cast(copy) + idx)) { free(copy); goto fail; } free(copy); pj.write_tape(0, c); break; } case 'f': { /* we need to make a copy to make sure that the string is space * terminated. * this only applies to the JSON document made solely of the false * value. * this will almost never be called in practice */ char *copy = static_cast(malloc(len + SIMDJSON_PADDING)); if (copy == nullptr) { goto fail; } memcpy(copy, buf, len); copy[len] = ' '; if (!is_valid_false_atom(reinterpret_cast(copy) + idx)) { free(copy); goto fail; } free(copy); pj.write_tape(0, c); break; } case 'n': { /* we need to make a copy to make sure that the string is space * terminated. * this only applies to the JSON document made solely of the null value. * this will almost never be called in practice */ char *copy = static_cast(malloc(len + SIMDJSON_PADDING)); if (copy == nullptr) { goto fail; } memcpy(copy, buf, len); copy[len] = ' '; if (!is_valid_null_atom(reinterpret_cast(copy) + idx)) { free(copy); goto fail; } free(copy); pj.write_tape(0, c); break; } case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { /* we need to make a copy to make sure that the string is space * terminated. * this is done only for JSON documents made of a sole number * this will almost never be called in practice. We terminate with a * space * because we do not want to allow NULLs in the middle of a number * (whereas a * space in the middle of a number would be identified in stage 1). */ char *copy = static_cast(malloc(len + SIMDJSON_PADDING)); if (copy == nullptr) { goto fail; } memcpy(copy, buf, len); copy[len] = ' '; if (!parse_number(reinterpret_cast(copy), pj, idx, false)) { free(copy); goto fail; } free(copy); break; } case '-': { /* we need to make a copy to make sure that the string is NULL * terminated. * this is done only for JSON documents made of a sole number * this will almost never be called in practice */ char *copy = static_cast(malloc(len + SIMDJSON_PADDING)); if (copy == nullptr) { goto fail; } memcpy(copy, buf, len); copy[len] = ' '; if (!parse_number(reinterpret_cast(copy), pj, idx, true)) { free(copy); goto fail; } free(copy); break; } default: goto fail; } start_continue: /* the string might not be NULL terminated. */ if (i + 1 == pj.n_structural_indexes) { goto succeed; } else { goto fail; } /*//////////////////////////// OBJECT STATES ///////////////////////////*/ object_begin: UPDATE_CHAR(); switch (c) { case '"': { if (!parse_string(buf, len, pj, depth, idx)) { goto fail; } goto object_key_state; } case '}': goto scope_end; /* could also go to object_continue */ default: goto fail; } object_key_state: UPDATE_CHAR(); if (c != ':') { goto fail; } UPDATE_CHAR(); switch (c) { case '"': { if (!parse_string(buf, len, pj, depth, idx)) { goto fail; } break; } case 't': if (!is_valid_true_atom(buf + idx)) { goto fail; } pj.write_tape(0, c); break; case 'f': if (!is_valid_false_atom(buf + idx)) { goto fail; } pj.write_tape(0, c); break; case 'n': if (!is_valid_null_atom(buf + idx)) { goto fail; } pj.write_tape(0, c); break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { if (!parse_number(buf, pj, idx, false)) { goto fail; } break; } case '-': { if (!parse_number(buf, pj, idx, true)) { goto fail; } break; } case '{': { pj.containing_scope_offset[depth] = pj.get_current_loc(); pj.write_tape(0, c); /* here the compilers knows what c is so this gets optimized */ /* we have not yet encountered } so we need to come back for it */ SET_GOTO_OBJECT_CONTINUE() /* we found an object inside an object, so we need to increment the * depth */ depth++; if (depth >= pj.depth_capacity) { goto fail; } goto object_begin; } case '[': { pj.containing_scope_offset[depth] = pj.get_current_loc(); pj.write_tape(0, c); /* here the compilers knows what c is so this gets optimized */ /* we have not yet encountered } so we need to come back for it */ SET_GOTO_OBJECT_CONTINUE() /* we found an array inside an object, so we need to increment the depth */ depth++; if (depth >= pj.depth_capacity) { goto fail; } goto array_begin; } default: goto fail; } object_continue: UPDATE_CHAR(); switch (c) { case ',': UPDATE_CHAR(); if (c != '"') { goto fail; } else { if (!parse_string(buf, len, pj, depth, idx)) { goto fail; } goto object_key_state; } case '}': goto scope_end; default: goto fail; } /*//////////////////////////// COMMON STATE ///////////////////////////*/ scope_end: /* write our tape location to the header scope */ depth--; pj.write_tape(pj.containing_scope_offset[depth], c); pj.annotate_previous_loc(pj.containing_scope_offset[depth], pj.get_current_loc()); /* goto saved_state */ GOTO_CONTINUE() /*//////////////////////////// ARRAY STATES ///////////////////////////*/ array_begin: UPDATE_CHAR(); if (c == ']') { goto scope_end; /* could also go to array_continue */ } main_array_switch: /* we call update char on all paths in, so we can peek at c on the * on paths that can accept a close square brace (post-, and at start) */ switch (c) { case '"': { if (!parse_string(buf, len, pj, depth, idx)) { goto fail; } break; } case 't': if (!is_valid_true_atom(buf + idx)) { goto fail; } pj.write_tape(0, c); break; case 'f': if (!is_valid_false_atom(buf + idx)) { goto fail; } pj.write_tape(0, c); break; case 'n': if (!is_valid_null_atom(buf + idx)) { goto fail; } pj.write_tape(0, c); break; /* goto array_continue; */ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { if (!parse_number(buf, pj, idx, false)) { goto fail; } break; /* goto array_continue; */ } case '-': { if (!parse_number(buf, pj, idx, true)) { goto fail; } break; /* goto array_continue; */ } case '{': { /* we have not yet encountered ] so we need to come back for it */ pj.containing_scope_offset[depth] = pj.get_current_loc(); pj.write_tape(0, c); /* here the compilers knows what c is so this gets optimized */ SET_GOTO_ARRAY_CONTINUE() /* we found an object inside an array, so we need to increment the depth */ depth++; if (depth >= pj.depth_capacity) { goto fail; } goto object_begin; } case '[': { /* we have not yet encountered ] so we need to come back for it */ pj.containing_scope_offset[depth] = pj.get_current_loc(); pj.write_tape(0, c); /* here the compilers knows what c is so this gets optimized */ SET_GOTO_ARRAY_CONTINUE() /* we found an array inside an array, so we need to increment the depth */ depth++; if (depth >= pj.depth_capacity) { goto fail; } goto array_begin; } default: goto fail; } array_continue: UPDATE_CHAR(); switch (c) { case ',': UPDATE_CHAR(); goto main_array_switch; case ']': goto scope_end; default: goto fail; } /*//////////////////////////// FINAL STATES ///////////////////////////*/ succeed: depth--; if (depth != 0) { fprintf(stderr, "internal bug\n"); abort(); } if (pj.containing_scope_offset[depth] != 0) { fprintf(stderr, "internal bug\n"); abort(); } pj.annotate_previous_loc(pj.containing_scope_offset[depth], pj.get_current_loc()); pj.write_tape(pj.containing_scope_offset[depth], 'r'); /* r is root */ pj.valid = true; pj.error_code = simdjson::SUCCESS; return pj.error_code; fail: /* we do not need the next line because this is done by pj.init(), * pessimistically. * pj.is_valid = false; * At this point in the code, we have all the time in the world. * Note that we know exactly where we are in the document so we could, * without any overhead on the processing code, report a specific * location. * We could even trigger special code paths to assess what happened * carefully, * all without any added cost. */ if (depth >= pj.depth_capacity) { pj.error_code = simdjson::DEPTH_ERROR; return pj.error_code; } switch (c) { case '"': pj.error_code = simdjson::STRING_ERROR; return pj.error_code; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '-': pj.error_code = simdjson::NUMBER_ERROR; return pj.error_code; case 't': pj.error_code = simdjson::T_ATOM_ERROR; return pj.error_code; case 'n': pj.error_code = simdjson::N_ATOM_ERROR; return pj.error_code; case 'f': pj.error_code = simdjson::F_ATOM_ERROR; return pj.error_code; default: break; } pj.error_code = simdjson::TAPE_ERROR; return pj.error_code; } } // namespace simdjson::haswell UNTARGET_REGION TARGET_HASWELL namespace simdjson { template <> WARN_UNUSED int unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) { return haswell::unified_machine(buf, len, pj); } } // namespace simdjson UNTARGET_REGION #endif // IS_X86_64 #endif // SIMDJSON_HASWELL_STAGE2_BUILD_TAPE_H /* end file src/haswell/stage2_build_tape.h */ /* begin file src/westmere/stage2_build_tape.h */ #ifndef SIMDJSON_WESTMERE_STAGE2_BUILD_TAPE_H #define SIMDJSON_WESTMERE_STAGE2_BUILD_TAPE_H #ifdef IS_X86_64 TARGET_WESTMERE namespace simdjson::westmere { // This file contains the common code every implementation uses for stage2 // It is intended to be included multiple times and compiled multiple times // We assume the file in which it is include already includes // "simdjson/stage2_build_tape.h" (this simplifies amalgation) // this macro reads the next structural character, updating idx, i and c. #define UPDATE_CHAR() \ { \ idx = pj.structural_indexes[i++]; \ c = buf[idx]; \ } #ifdef SIMDJSON_USE_COMPUTED_GOTO #define SET_GOTO_ARRAY_CONTINUE() pj.ret_address[depth] = &&array_continue; #define SET_GOTO_OBJECT_CONTINUE() pj.ret_address[depth] = &&object_continue; #define SET_GOTO_START_CONTINUE() pj.ret_address[depth] = &&start_continue; #define GOTO_CONTINUE() goto *pj.ret_address[depth]; #else #define SET_GOTO_ARRAY_CONTINUE() pj.ret_address[depth] = 'a'; #define SET_GOTO_OBJECT_CONTINUE() pj.ret_address[depth] = 'o'; #define SET_GOTO_START_CONTINUE() pj.ret_address[depth] = 's'; #define GOTO_CONTINUE() \ { \ if (pj.ret_address[depth] == 'a') { \ goto array_continue; \ } else if (pj.ret_address[depth] == 'o') { \ goto object_continue; \ } else { \ goto start_continue; \ } \ } #endif /************ * The JSON is parsed to a tape, see the accompanying tape.md file * for documentation. ***********/ WARN_UNUSED int unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) { uint32_t i = 0; /* index of the structural character (0,1,2,3...) */ uint32_t idx; /* location of the structural character in the input (buf) */ uint8_t c; /* used to track the (structural) character we are looking at, updated */ /* by UPDATE_CHAR macro */ uint32_t depth = 0; /* could have an arbitrary starting depth */ pj.init(); /* sets is_valid to false */ if (pj.byte_capacity < len) { pj.error_code = simdjson::CAPACITY; return pj.error_code; } /*//////////////////////////// START STATE ///////////////////////////// */ SET_GOTO_START_CONTINUE() pj.containing_scope_offset[depth] = pj.get_current_loc(); pj.write_tape(0, 'r'); /* r for root, 0 is going to get overwritten */ /* the root is used, if nothing else, to capture the size of the tape */ depth++; /* everything starts at depth = 1, depth = 0 is just for the root, the root may contain an object, an array or something else. */ if (depth >= pj.depth_capacity) { goto fail; } UPDATE_CHAR(); switch (c) { case '{': pj.containing_scope_offset[depth] = pj.get_current_loc(); SET_GOTO_START_CONTINUE(); depth++; if (depth >= pj.depth_capacity) { goto fail; } pj.write_tape( 0, c); /* strangely, moving this to object_begin slows things down */ goto object_begin; case '[': pj.containing_scope_offset[depth] = pj.get_current_loc(); SET_GOTO_START_CONTINUE(); depth++; if (depth >= pj.depth_capacity) { goto fail; } pj.write_tape(0, c); goto array_begin; /* #define SIMDJSON_ALLOWANYTHINGINROOT * A JSON text is a serialized value. Note that certain previous * specifications of JSON constrained a JSON text to be an object or an * array. Implementations that generate only objects or arrays where a * JSON text is called for will be interoperable in the sense that all * implementations will accept these as conforming JSON texts. * https://tools.ietf.org/html/rfc8259 * #ifdef SIMDJSON_ALLOWANYTHINGINROOT */ case '"': { if (!parse_string(buf, len, pj, depth, idx)) { goto fail; } break; } case 't': { /* we need to make a copy to make sure that the string is space * terminated. * this only applies to the JSON document made solely of the true value. * this will almost never be called in practice */ char *copy = static_cast(malloc(len + SIMDJSON_PADDING)); if (copy == nullptr) { goto fail; } memcpy(copy, buf, len); copy[len] = ' '; if (!is_valid_true_atom(reinterpret_cast(copy) + idx)) { free(copy); goto fail; } free(copy); pj.write_tape(0, c); break; } case 'f': { /* we need to make a copy to make sure that the string is space * terminated. * this only applies to the JSON document made solely of the false * value. * this will almost never be called in practice */ char *copy = static_cast(malloc(len + SIMDJSON_PADDING)); if (copy == nullptr) { goto fail; } memcpy(copy, buf, len); copy[len] = ' '; if (!is_valid_false_atom(reinterpret_cast(copy) + idx)) { free(copy); goto fail; } free(copy); pj.write_tape(0, c); break; } case 'n': { /* we need to make a copy to make sure that the string is space * terminated. * this only applies to the JSON document made solely of the null value. * this will almost never be called in practice */ char *copy = static_cast(malloc(len + SIMDJSON_PADDING)); if (copy == nullptr) { goto fail; } memcpy(copy, buf, len); copy[len] = ' '; if (!is_valid_null_atom(reinterpret_cast(copy) + idx)) { free(copy); goto fail; } free(copy); pj.write_tape(0, c); break; } case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { /* we need to make a copy to make sure that the string is space * terminated. * this is done only for JSON documents made of a sole number * this will almost never be called in practice. We terminate with a * space * because we do not want to allow NULLs in the middle of a number * (whereas a * space in the middle of a number would be identified in stage 1). */ char *copy = static_cast(malloc(len + SIMDJSON_PADDING)); if (copy == nullptr) { goto fail; } memcpy(copy, buf, len); copy[len] = ' '; if (!parse_number(reinterpret_cast(copy), pj, idx, false)) { free(copy); goto fail; } free(copy); break; } case '-': { /* we need to make a copy to make sure that the string is NULL * terminated. * this is done only for JSON documents made of a sole number * this will almost never be called in practice */ char *copy = static_cast(malloc(len + SIMDJSON_PADDING)); if (copy == nullptr) { goto fail; } memcpy(copy, buf, len); copy[len] = ' '; if (!parse_number(reinterpret_cast(copy), pj, idx, true)) { free(copy); goto fail; } free(copy); break; } default: goto fail; } start_continue: /* the string might not be NULL terminated. */ if (i + 1 == pj.n_structural_indexes) { goto succeed; } else { goto fail; } /*//////////////////////////// OBJECT STATES ///////////////////////////*/ object_begin: UPDATE_CHAR(); switch (c) { case '"': { if (!parse_string(buf, len, pj, depth, idx)) { goto fail; } goto object_key_state; } case '}': goto scope_end; /* could also go to object_continue */ default: goto fail; } object_key_state: UPDATE_CHAR(); if (c != ':') { goto fail; } UPDATE_CHAR(); switch (c) { case '"': { if (!parse_string(buf, len, pj, depth, idx)) { goto fail; } break; } case 't': if (!is_valid_true_atom(buf + idx)) { goto fail; } pj.write_tape(0, c); break; case 'f': if (!is_valid_false_atom(buf + idx)) { goto fail; } pj.write_tape(0, c); break; case 'n': if (!is_valid_null_atom(buf + idx)) { goto fail; } pj.write_tape(0, c); break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { if (!parse_number(buf, pj, idx, false)) { goto fail; } break; } case '-': { if (!parse_number(buf, pj, idx, true)) { goto fail; } break; } case '{': { pj.containing_scope_offset[depth] = pj.get_current_loc(); pj.write_tape(0, c); /* here the compilers knows what c is so this gets optimized */ /* we have not yet encountered } so we need to come back for it */ SET_GOTO_OBJECT_CONTINUE() /* we found an object inside an object, so we need to increment the * depth */ depth++; if (depth >= pj.depth_capacity) { goto fail; } goto object_begin; } case '[': { pj.containing_scope_offset[depth] = pj.get_current_loc(); pj.write_tape(0, c); /* here the compilers knows what c is so this gets optimized */ /* we have not yet encountered } so we need to come back for it */ SET_GOTO_OBJECT_CONTINUE() /* we found an array inside an object, so we need to increment the depth */ depth++; if (depth >= pj.depth_capacity) { goto fail; } goto array_begin; } default: goto fail; } object_continue: UPDATE_CHAR(); switch (c) { case ',': UPDATE_CHAR(); if (c != '"') { goto fail; } else { if (!parse_string(buf, len, pj, depth, idx)) { goto fail; } goto object_key_state; } case '}': goto scope_end; default: goto fail; } /*//////////////////////////// COMMON STATE ///////////////////////////*/ scope_end: /* write our tape location to the header scope */ depth--; pj.write_tape(pj.containing_scope_offset[depth], c); pj.annotate_previous_loc(pj.containing_scope_offset[depth], pj.get_current_loc()); /* goto saved_state */ GOTO_CONTINUE() /*//////////////////////////// ARRAY STATES ///////////////////////////*/ array_begin: UPDATE_CHAR(); if (c == ']') { goto scope_end; /* could also go to array_continue */ } main_array_switch: /* we call update char on all paths in, so we can peek at c on the * on paths that can accept a close square brace (post-, and at start) */ switch (c) { case '"': { if (!parse_string(buf, len, pj, depth, idx)) { goto fail; } break; } case 't': if (!is_valid_true_atom(buf + idx)) { goto fail; } pj.write_tape(0, c); break; case 'f': if (!is_valid_false_atom(buf + idx)) { goto fail; } pj.write_tape(0, c); break; case 'n': if (!is_valid_null_atom(buf + idx)) { goto fail; } pj.write_tape(0, c); break; /* goto array_continue; */ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { if (!parse_number(buf, pj, idx, false)) { goto fail; } break; /* goto array_continue; */ } case '-': { if (!parse_number(buf, pj, idx, true)) { goto fail; } break; /* goto array_continue; */ } case '{': { /* we have not yet encountered ] so we need to come back for it */ pj.containing_scope_offset[depth] = pj.get_current_loc(); pj.write_tape(0, c); /* here the compilers knows what c is so this gets optimized */ SET_GOTO_ARRAY_CONTINUE() /* we found an object inside an array, so we need to increment the depth */ depth++; if (depth >= pj.depth_capacity) { goto fail; } goto object_begin; } case '[': { /* we have not yet encountered ] so we need to come back for it */ pj.containing_scope_offset[depth] = pj.get_current_loc(); pj.write_tape(0, c); /* here the compilers knows what c is so this gets optimized */ SET_GOTO_ARRAY_CONTINUE() /* we found an array inside an array, so we need to increment the depth */ depth++; if (depth >= pj.depth_capacity) { goto fail; } goto array_begin; } default: goto fail; } array_continue: UPDATE_CHAR(); switch (c) { case ',': UPDATE_CHAR(); goto main_array_switch; case ']': goto scope_end; default: goto fail; } /*//////////////////////////// FINAL STATES ///////////////////////////*/ succeed: depth--; if (depth != 0) { fprintf(stderr, "internal bug\n"); abort(); } if (pj.containing_scope_offset[depth] != 0) { fprintf(stderr, "internal bug\n"); abort(); } pj.annotate_previous_loc(pj.containing_scope_offset[depth], pj.get_current_loc()); pj.write_tape(pj.containing_scope_offset[depth], 'r'); /* r is root */ pj.valid = true; pj.error_code = simdjson::SUCCESS; return pj.error_code; fail: /* we do not need the next line because this is done by pj.init(), * pessimistically. * pj.is_valid = false; * At this point in the code, we have all the time in the world. * Note that we know exactly where we are in the document so we could, * without any overhead on the processing code, report a specific * location. * We could even trigger special code paths to assess what happened * carefully, * all without any added cost. */ if (depth >= pj.depth_capacity) { pj.error_code = simdjson::DEPTH_ERROR; return pj.error_code; } switch (c) { case '"': pj.error_code = simdjson::STRING_ERROR; return pj.error_code; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '-': pj.error_code = simdjson::NUMBER_ERROR; return pj.error_code; case 't': pj.error_code = simdjson::T_ATOM_ERROR; return pj.error_code; case 'n': pj.error_code = simdjson::N_ATOM_ERROR; return pj.error_code; case 'f': pj.error_code = simdjson::F_ATOM_ERROR; return pj.error_code; default: break; } pj.error_code = simdjson::TAPE_ERROR; return pj.error_code; } } // namespace simdjson::westmere UNTARGET_REGION TARGET_WESTMERE namespace simdjson { template <> WARN_UNUSED int unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) { return westmere::unified_machine(buf, len, pj); } } // namespace simdjson UNTARGET_REGION #endif // IS_X86_64 #endif // SIMDJSON_WESTMERE_STAGE2_BUILD_TAPE_H /* end file src/westmere/stage2_build_tape.h */ /* begin file src/stage2_build_tape.cpp */ /* end file src/stage2_build_tape.cpp */ /* begin file src/parsedjson.cpp */ namespace simdjson { ParsedJson::ParsedJson() : structural_indexes(nullptr), tape(nullptr), containing_scope_offset(nullptr), ret_address(nullptr), string_buf(nullptr), current_string_buf_loc(nullptr) {} ParsedJson::~ParsedJson() { deallocate(); } ParsedJson::ParsedJson(ParsedJson &&p) : byte_capacity(p.byte_capacity), depth_capacity(p.depth_capacity), tape_capacity(p.tape_capacity), string_capacity(p.string_capacity), current_loc(p.current_loc), n_structural_indexes(p.n_structural_indexes), structural_indexes(p.structural_indexes), tape(p.tape), containing_scope_offset(p.containing_scope_offset), ret_address(p.ret_address), string_buf(p.string_buf), current_string_buf_loc(p.current_string_buf_loc), valid(p.valid) { p.structural_indexes = nullptr; p.tape = nullptr; p.containing_scope_offset = nullptr; p.ret_address = nullptr; p.string_buf = nullptr; p.current_string_buf_loc = nullptr; } WARN_UNUSED bool ParsedJson::allocate_capacity(size_t len, size_t max_depth) { if (max_depth <= 0) { max_depth = 1; // don't let the user allocate nothing } if (len <= 0) { len = 64; // allocating 0 bytes is wasteful. } if (len > SIMDJSON_MAXSIZE_BYTES) { return false; } if ((len <= byte_capacity) && (max_depth <= depth_capacity)) { return true; } deallocate(); valid = false; byte_capacity = 0; // will only set it to len after allocations are a success n_structural_indexes = 0; uint32_t max_structures = ROUNDUP_N(len, 64) + 2 + 7; structural_indexes = new (std::nothrow) uint32_t[max_structures]; // a pathological input like "[[[[..." would generate len tape elements, so // need a capacity of len + 1 size_t local_tape_capacity = ROUNDUP_N(len + 1, 64); // a document with only zero-length strings... could have len/3 string // and we would need len/3 * 5 bytes on the string buffer size_t local_string_capacity = ROUNDUP_N(5 * len / 3 + 32, 64); string_buf = new (std::nothrow) uint8_t[local_string_capacity]; tape = new (std::nothrow) uint64_t[local_tape_capacity]; containing_scope_offset = new (std::nothrow) uint32_t[max_depth]; #ifdef SIMDJSON_USE_COMPUTED_GOTO ret_address = new (std::nothrow) void *[max_depth]; #else ret_address = new (std::nothrow) char[max_depth]; #endif if ((string_buf == nullptr) || (tape == nullptr) || (containing_scope_offset == nullptr) || (ret_address == nullptr) || (structural_indexes == nullptr)) { std::cerr << "Could not allocate memory" << std::endl; delete[] ret_address; delete[] containing_scope_offset; delete[] tape; delete[] string_buf; delete[] structural_indexes; return false; } /* // We do not need to initialize this content for parsing, though we could // need to initialize it for safety. memset(string_buf, 0 , local_string_capacity); memset(structural_indexes, 0, max_structures * sizeof(uint32_t)); memset(tape, 0, local_tape_capacity * sizeof(uint64_t)); */ byte_capacity = len; depth_capacity = max_depth; tape_capacity = local_tape_capacity; string_capacity = local_string_capacity; return true; } bool ParsedJson::is_valid() const { return valid; } int ParsedJson::get_error_code() const { return error_code; } std::string ParsedJson::get_error_message() const { return error_message(error_code); } void ParsedJson::deallocate() { byte_capacity = 0; depth_capacity = 0; tape_capacity = 0; string_capacity = 0; delete[] ret_address; delete[] containing_scope_offset; delete[] tape; delete[] string_buf; delete[] structural_indexes; valid = false; } void ParsedJson::init() { current_string_buf_loc = string_buf; current_loc = 0; valid = false; } WARN_UNUSED bool ParsedJson::print_json(std::ostream &os) { if (!valid) { return false; } uint32_t string_length; size_t tape_idx = 0; uint64_t tape_val = tape[tape_idx]; uint8_t type = (tape_val >> 56); size_t how_many = 0; if (type == 'r') { how_many = tape_val & JSON_VALUE_MASK; } else { fprintf(stderr, "Error: no starting root node?"); return false; } if (how_many > tape_capacity) { fprintf( stderr, "We may be exceeding the tape capacity. Is this a valid document?\n"); return false; } tape_idx++; bool *in_object = new bool[depth_capacity]; auto *in_object_idx = new size_t[depth_capacity]; int depth = 1; // only root at level 0 in_object_idx[depth] = 0; in_object[depth] = false; for (; tape_idx < how_many; tape_idx++) { tape_val = tape[tape_idx]; uint64_t payload = tape_val & JSON_VALUE_MASK; type = (tape_val >> 56); if (!in_object[depth]) { if ((in_object_idx[depth] > 0) && (type != ']')) { os << ","; } in_object_idx[depth]++; } else { // if (in_object) { if ((in_object_idx[depth] > 0) && ((in_object_idx[depth] & 1) == 0) && (type != '}')) { os << ","; } if (((in_object_idx[depth] & 1) == 1)) { os << ":"; } in_object_idx[depth]++; } switch (type) { case '"': // we have a string os << '"'; memcpy(&string_length, string_buf + payload, sizeof(uint32_t)); print_with_escapes( (const unsigned char *)(string_buf + payload + sizeof(uint32_t)), string_length); os << '"'; break; case 'l': // we have a long int if (tape_idx + 1 >= how_many) { delete[] in_object; delete[] in_object_idx; return false; } os << static_cast(tape[++tape_idx]); break; case 'd': // we have a double if (tape_idx + 1 >= how_many) { delete[] in_object; delete[] in_object_idx; return false; } double answer; memcpy(&answer, &tape[++tape_idx], sizeof(answer)); os << answer; break; case 'n': // we have a null os << "null"; break; case 't': // we have a true os << "true"; break; case 'f': // we have a false os << "false"; break; case '{': // we have an object os << '{'; depth++; in_object[depth] = true; in_object_idx[depth] = 0; break; case '}': // we end an object depth--; os << '}'; break; case '[': // we start an array os << '['; depth++; in_object[depth] = false; in_object_idx[depth] = 0; break; case ']': // we end an array depth--; os << ']'; break; case 'r': // we start and end with the root node fprintf(stderr, "should we be hitting the root node?\n"); delete[] in_object; delete[] in_object_idx; return false; default: fprintf(stderr, "bug %c\n", type); delete[] in_object; delete[] in_object_idx; return false; } } delete[] in_object; delete[] in_object_idx; return true; } WARN_UNUSED bool ParsedJson::dump_raw_tape(std::ostream &os) { if (!valid) { return false; } uint32_t string_length; size_t tape_idx = 0; uint64_t tape_val = tape[tape_idx]; uint8_t type = (tape_val >> 56); os << tape_idx << " : " << type; tape_idx++; size_t how_many = 0; if (type == 'r') { how_many = tape_val & JSON_VALUE_MASK; } else { fprintf(stderr, "Error: no starting root node?"); return false; } os << "\t// pointing to " << how_many << " (right after last node)\n"; uint64_t payload; for (; tape_idx < how_many; tape_idx++) { os << tape_idx << " : "; tape_val = tape[tape_idx]; payload = tape_val & JSON_VALUE_MASK; type = (tape_val >> 56); switch (type) { case '"': // we have a string os << "string \""; memcpy(&string_length, string_buf + payload, sizeof(uint32_t)); print_with_escapes( (const unsigned char *)(string_buf + payload + sizeof(uint32_t)), string_length); os << '"'; os << '\n'; break; case 'l': // we have a long int if (tape_idx + 1 >= how_many) { return false; } os << "integer " << static_cast(tape[++tape_idx]) << "\n"; break; case 'd': // we have a double os << "float "; if (tape_idx + 1 >= how_many) { return false; } double answer; memcpy(&answer, &tape[++tape_idx], sizeof(answer)); os << answer << '\n'; break; case 'n': // we have a null os << "null\n"; break; case 't': // we have a true os << "true\n"; break; case 'f': // we have a false os << "false\n"; break; case '{': // we have an object os << "{\t// pointing to next tape location " << payload << " (first node after the scope) \n"; break; case '}': // we end an object os << "}\t// pointing to previous tape location " << payload << " (start of the scope) \n"; break; case '[': // we start an array os << "[\t// pointing to next tape location " << payload << " (first node after the scope) \n"; break; case ']': // we end an array os << "]\t// pointing to previous tape location " << payload << " (start of the scope) \n"; break; case 'r': // we start and end with the root node printf("end of root\n"); return false; default: return false; } } tape_val = tape[tape_idx]; payload = tape_val & JSON_VALUE_MASK; type = (tape_val >> 56); os << tape_idx << " : " << type << "\t// pointing to " << payload << " (start root)\n"; return true; } } // namespace simdjson /* end file src/parsedjson.cpp */ /* begin file src/parsedjsoniterator.cpp */ namespace simdjson { template class ParsedJson::BasicIterator; } // namespace simdjson /* end file src/parsedjsoniterator.cpp */