1653 lines
80 KiB
C++
1653 lines
80 KiB
C++
/* auto-generated on Sun Aug 4 15:43:41 EDT 2019. Do not edit! */
|
|
#include "simdjson.h"
|
|
|
|
/* used for http://dmalloc.com/ Dmalloc - Debug Malloc Library */
|
|
#ifdef DMALLOC
|
|
#include "dmalloc.h"
|
|
#endif
|
|
|
|
/* begin file src/simdjson.cpp */
|
|
#include <map>
|
|
|
|
namespace simdjson {
|
|
const std::map<int, const std::string> error_strings = {
|
|
{SUCCESS, "No errors"},
|
|
{CAPACITY, "This ParsedJson can't support a document that big"},
|
|
{MEMALLOC, "Error allocating memory, we're most likely out of memory"},
|
|
{TAPE_ERROR, "Something went wrong while writing to the tape"},
|
|
{STRING_ERROR, "Problem while parsing a string"},
|
|
{T_ATOM_ERROR,
|
|
"Problem while parsing an atom starting with the letter 't'"},
|
|
{F_ATOM_ERROR,
|
|
"Problem while parsing an atom starting with the letter 'f'"},
|
|
{N_ATOM_ERROR,
|
|
"Problem while parsing an atom starting with the letter 'n'"},
|
|
{NUMBER_ERROR, "Problem while parsing a number"},
|
|
{UTF8_ERROR, "The input is not valid UTF-8"},
|
|
{UNITIALIZED, "Unitialized"},
|
|
{EMPTY, "Empty"},
|
|
{UNESCAPED_CHARS, "Within strings, some characters must be escapted, we "
|
|
"found unescapted characters"},
|
|
{UNEXPECTED_ERROR, "Unexpected error, consider reporting this problem as "
|
|
"you may have found a bug in simdjson"},
|
|
};
|
|
|
|
const std::string &error_message(const int error_code) {
|
|
return error_strings.at(error_code);
|
|
}
|
|
} // namespace simdjson
|
|
/* end file src/simdjson.cpp */
|
|
/* begin file src/jsonioutil.cpp */
|
|
#include <cstdlib>
|
|
#include <cstring>
|
|
|
|
namespace simdjson {
|
|
char *allocate_padded_buffer(size_t length) {
|
|
// we could do a simple malloc
|
|
// return (char *) malloc(length + SIMDJSON_PADDING);
|
|
// However, we might as well align to cache lines...
|
|
size_t totalpaddedlength = length + SIMDJSON_PADDING;
|
|
char *padded_buffer = aligned_malloc_char(64, totalpaddedlength);
|
|
return padded_buffer;
|
|
}
|
|
|
|
padded_string get_corpus(const std::string &filename) {
|
|
std::FILE *fp = std::fopen(filename.c_str(), "rb");
|
|
if (fp != nullptr) {
|
|
std::fseek(fp, 0, SEEK_END);
|
|
size_t len = std::ftell(fp);
|
|
padded_string s(len);
|
|
if (s.data() == nullptr) {
|
|
std::fclose(fp);
|
|
throw std::runtime_error("could not allocate memory");
|
|
}
|
|
std::rewind(fp);
|
|
size_t readb = std::fread(s.data(), 1, len, fp);
|
|
std::fclose(fp);
|
|
if (readb != len) {
|
|
throw std::runtime_error("could not read the data");
|
|
}
|
|
return s;
|
|
}
|
|
throw std::runtime_error("could not load corpus");
|
|
}
|
|
} // namespace simdjson
|
|
/* end file src/jsonioutil.cpp */
|
|
/* begin file src/jsonminifier.cpp */
|
|
#include <cstdint>
|
|
|
|
#ifndef __AVX2__
|
|
|
|
namespace simdjson {
|
|
static uint8_t jump_table[256 * 3] = {
|
|
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
|
1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1,
|
|
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
|
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0,
|
|
1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
|
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
|
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
|
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
|
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
|
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
|
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
|
1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
|
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
|
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
|
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
|
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
|
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
|
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
|
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
|
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
|
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
|
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
|
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
|
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
|
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
|
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
|
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
|
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
|
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
|
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
|
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
|
};
|
|
|
|
size_t json_minify(const unsigned char *bytes, size_t how_many,
|
|
unsigned char *out) {
|
|
size_t i = 0, pos = 0;
|
|
uint8_t quote = 0;
|
|
uint8_t nonescape = 1;
|
|
|
|
while (i < how_many) {
|
|
unsigned char c = bytes[i];
|
|
uint8_t *meta = jump_table + 3 * c;
|
|
|
|
quote = quote ^ (meta[0] & nonescape);
|
|
out[pos] = c;
|
|
pos += meta[2] | quote;
|
|
|
|
i += 1;
|
|
nonescape = (~nonescape) | (meta[1]);
|
|
}
|
|
return pos;
|
|
}
|
|
} // namespace simdjson
|
|
#else
|
|
#include <cstring>
|
|
|
|
namespace simdjson {
|
|
|
|
// some intrinsics are missing under GCC?
|
|
#ifndef __clang__
|
|
#ifndef _MSC_VER
|
|
static __m256i inline _mm256_loadu2_m128i(__m128i const *__addr_hi,
|
|
__m128i const *__addr_lo) {
|
|
__m256i __v256 = _mm256_castsi128_si256(_mm_loadu_si128(__addr_lo));
|
|
return _mm256_insertf128_si256(__v256, _mm_loadu_si128(__addr_hi), 1);
|
|
}
|
|
|
|
static inline void _mm256_storeu2_m128i(__m128i *__addr_hi, __m128i *__addr_lo,
|
|
__m256i __a) {
|
|
__m128i __v128;
|
|
__v128 = _mm256_castsi256_si128(__a);
|
|
_mm_storeu_si128(__addr_lo, __v128);
|
|
__v128 = _mm256_extractf128_si256(__a, 1);
|
|
_mm_storeu_si128(__addr_hi, __v128);
|
|
}
|
|
#endif
|
|
#endif
|
|
|
|
// a straightforward comparison of a mask against input.
|
|
static uint64_t cmp_mask_against_input_mini(__m256i input_lo, __m256i input_hi,
|
|
__m256i mask) {
|
|
__m256i cmp_res_0 = _mm256_cmpeq_epi8(input_lo, mask);
|
|
uint64_t res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(cmp_res_0));
|
|
__m256i cmp_res_1 = _mm256_cmpeq_epi8(input_hi, mask);
|
|
uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1);
|
|
return res_0 | (res_1 << 32);
|
|
}
|
|
|
|
// take input from buf and remove useless whitespace, input and output can be
|
|
// the same, result is null terminated, return the string length (minus the null
|
|
// termination)
|
|
size_t json_minify(const uint8_t *buf, size_t len, uint8_t *out) {
|
|
// Useful constant masks
|
|
const uint64_t even_bits = 0x5555555555555555ULL;
|
|
const uint64_t odd_bits = ~even_bits;
|
|
uint8_t *initout(out);
|
|
uint64_t prev_iter_ends_odd_backslash =
|
|
0ULL; // either 0 or 1, but a 64-bit value
|
|
uint64_t prev_iter_inside_quote = 0ULL; // either all zeros or all ones
|
|
size_t idx = 0;
|
|
if (len >= 64) {
|
|
size_t avx_len = len - 63;
|
|
|
|
for (; idx < avx_len; idx += 64) {
|
|
__m256i input_lo =
|
|
_mm256_loadu_si256(reinterpret_cast<const __m256i *>(buf + idx + 0));
|
|
__m256i input_hi =
|
|
_mm256_loadu_si256(reinterpret_cast<const __m256i *>(buf + idx + 32));
|
|
uint64_t bs_bits = cmp_mask_against_input_mini(input_lo, input_hi,
|
|
_mm256_set1_epi8('\\'));
|
|
uint64_t start_edges = bs_bits & ~(bs_bits << 1);
|
|
uint64_t even_start_mask = even_bits ^ prev_iter_ends_odd_backslash;
|
|
uint64_t even_starts = start_edges & even_start_mask;
|
|
uint64_t odd_starts = start_edges & ~even_start_mask;
|
|
uint64_t even_carries = bs_bits + even_starts;
|
|
uint64_t odd_carries;
|
|
bool iter_ends_odd_backslash =
|
|
add_overflow(bs_bits, odd_starts, &odd_carries);
|
|
odd_carries |= prev_iter_ends_odd_backslash;
|
|
prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL;
|
|
uint64_t even_carry_ends = even_carries & ~bs_bits;
|
|
uint64_t odd_carry_ends = odd_carries & ~bs_bits;
|
|
uint64_t even_start_odd_end = even_carry_ends & odd_bits;
|
|
uint64_t odd_start_even_end = odd_carry_ends & even_bits;
|
|
uint64_t odd_ends = even_start_odd_end | odd_start_even_end;
|
|
uint64_t quote_bits = cmp_mask_against_input_mini(input_lo, input_hi,
|
|
_mm256_set1_epi8('"'));
|
|
quote_bits = quote_bits & ~odd_ends;
|
|
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
|
|
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
|
|
quote_mask ^= prev_iter_inside_quote;
|
|
prev_iter_inside_quote = static_cast<uint64_t>(
|
|
static_cast<int64_t>(quote_mask) >>
|
|
63); // might be undefined behavior, should be fully defined in C++20,
|
|
// ok according to John Regher from Utah University
|
|
const __m256i low_nibble_mask = _mm256_setr_epi8(
|
|
// 0 9 a b c d
|
|
16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0, 16, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 8, 12, 1, 2, 9, 0, 0);
|
|
const __m256i high_nibble_mask = _mm256_setr_epi8(
|
|
// 0 2 3 5 7
|
|
8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0, 8, 0, 18, 4, 0, 1, 0,
|
|
1, 0, 0, 0, 3, 2, 1, 0, 0);
|
|
__m256i whitespace_shufti_mask = _mm256_set1_epi8(0x18);
|
|
__m256i v_lo = _mm256_and_si256(
|
|
_mm256_shuffle_epi8(low_nibble_mask, input_lo),
|
|
_mm256_shuffle_epi8(high_nibble_mask,
|
|
_mm256_and_si256(_mm256_srli_epi32(input_lo, 4),
|
|
_mm256_set1_epi8(0x7f))));
|
|
|
|
__m256i v_hi = _mm256_and_si256(
|
|
_mm256_shuffle_epi8(low_nibble_mask, input_hi),
|
|
_mm256_shuffle_epi8(high_nibble_mask,
|
|
_mm256_and_si256(_mm256_srli_epi32(input_hi, 4),
|
|
_mm256_set1_epi8(0x7f))));
|
|
__m256i tmp_ws_lo = _mm256_cmpeq_epi8(
|
|
_mm256_and_si256(v_lo, whitespace_shufti_mask), _mm256_set1_epi8(0));
|
|
__m256i tmp_ws_hi = _mm256_cmpeq_epi8(
|
|
_mm256_and_si256(v_hi, whitespace_shufti_mask), _mm256_set1_epi8(0));
|
|
|
|
uint64_t ws_res_0 =
|
|
static_cast<uint32_t>(_mm256_movemask_epi8(tmp_ws_lo));
|
|
uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
|
|
uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32));
|
|
whitespace &= ~quote_mask;
|
|
int mask1 = whitespace & 0xFFFF;
|
|
int mask2 = (whitespace >> 16) & 0xFFFF;
|
|
int mask3 = (whitespace >> 32) & 0xFFFF;
|
|
int mask4 = (whitespace >> 48) & 0xFFFF;
|
|
int pop1 = hamming((~whitespace) & 0xFFFF);
|
|
int pop2 = hamming((~whitespace) & UINT64_C(0xFFFFFFFF));
|
|
int pop3 = hamming((~whitespace) & UINT64_C(0xFFFFFFFFFFFF));
|
|
int pop4 = hamming((~whitespace));
|
|
__m256i vmask1 = _mm256_loadu2_m128i(
|
|
reinterpret_cast<const __m128i *>(mask128_epi8) + (mask2 & 0x7FFF),
|
|
reinterpret_cast<const __m128i *>(mask128_epi8) + (mask1 & 0x7FFF));
|
|
__m256i vmask2 = _mm256_loadu2_m128i(
|
|
reinterpret_cast<const __m128i *>(mask128_epi8) + (mask4 & 0x7FFF),
|
|
reinterpret_cast<const __m128i *>(mask128_epi8) + (mask3 & 0x7FFF));
|
|
__m256i result1 = _mm256_shuffle_epi8(input_lo, vmask1);
|
|
__m256i result2 = _mm256_shuffle_epi8(input_hi, vmask2);
|
|
_mm256_storeu2_m128i(reinterpret_cast<__m128i *>(out + pop1),
|
|
reinterpret_cast<__m128i *>(out), result1);
|
|
_mm256_storeu2_m128i(reinterpret_cast<__m128i *>(out + pop3),
|
|
reinterpret_cast<__m128i *>(out + pop2), result2);
|
|
out += pop4;
|
|
}
|
|
}
|
|
// we finish off the job... copying and pasting the code is not ideal here,
|
|
// but it gets the job done.
|
|
if (idx < len) {
|
|
uint8_t buffer[64];
|
|
memset(buffer, 0, 64);
|
|
memcpy(buffer, buf + idx, len - idx);
|
|
__m256i input_lo =
|
|
_mm256_loadu_si256(reinterpret_cast<const __m256i *>(buffer));
|
|
__m256i input_hi =
|
|
_mm256_loadu_si256(reinterpret_cast<const __m256i *>(buffer + 32));
|
|
uint64_t bs_bits =
|
|
cmp_mask_against_input_mini(input_lo, input_hi, _mm256_set1_epi8('\\'));
|
|
uint64_t start_edges = bs_bits & ~(bs_bits << 1);
|
|
uint64_t even_start_mask = even_bits ^ prev_iter_ends_odd_backslash;
|
|
uint64_t even_starts = start_edges & even_start_mask;
|
|
uint64_t odd_starts = start_edges & ~even_start_mask;
|
|
uint64_t even_carries = bs_bits + even_starts;
|
|
uint64_t odd_carries;
|
|
// bool iter_ends_odd_backslash =
|
|
add_overflow(bs_bits, odd_starts, &odd_carries);
|
|
odd_carries |= prev_iter_ends_odd_backslash;
|
|
// prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL;
|
|
// // we never use it
|
|
uint64_t even_carry_ends = even_carries & ~bs_bits;
|
|
uint64_t odd_carry_ends = odd_carries & ~bs_bits;
|
|
uint64_t even_start_odd_end = even_carry_ends & odd_bits;
|
|
uint64_t odd_start_even_end = odd_carry_ends & even_bits;
|
|
uint64_t odd_ends = even_start_odd_end | odd_start_even_end;
|
|
uint64_t quote_bits =
|
|
cmp_mask_against_input_mini(input_lo, input_hi, _mm256_set1_epi8('"'));
|
|
quote_bits = quote_bits & ~odd_ends;
|
|
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
|
|
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
|
|
quote_mask ^= prev_iter_inside_quote;
|
|
// prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63);// we
|
|
// don't need this anymore
|
|
|
|
__m256i mask_20 = _mm256_set1_epi8(0x20); // c==32
|
|
__m256i mask_70 =
|
|
_mm256_set1_epi8(0x70); // adding 0x70 does not check low 4-bits
|
|
// but moves any value >= 16 above 128
|
|
|
|
__m256i lut_cntrl = _mm256_setr_epi8(
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0x00,
|
|
0x00, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0xFF, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00);
|
|
|
|
__m256i tmp_ws_lo = _mm256_or_si256(
|
|
_mm256_cmpeq_epi8(mask_20, input_lo),
|
|
_mm256_shuffle_epi8(lut_cntrl, _mm256_adds_epu8(mask_70, input_lo)));
|
|
__m256i tmp_ws_hi = _mm256_or_si256(
|
|
_mm256_cmpeq_epi8(mask_20, input_hi),
|
|
_mm256_shuffle_epi8(lut_cntrl, _mm256_adds_epu8(mask_70, input_hi)));
|
|
uint64_t ws_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(tmp_ws_lo));
|
|
uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
|
|
uint64_t whitespace = (ws_res_0 | (ws_res_1 << 32));
|
|
whitespace &= ~quote_mask;
|
|
|
|
if (len - idx < 64) {
|
|
whitespace |= UINT64_C(0xFFFFFFFFFFFFFFFF) << (len - idx);
|
|
}
|
|
int mask1 = whitespace & 0xFFFF;
|
|
int mask2 = (whitespace >> 16) & 0xFFFF;
|
|
int mask3 = (whitespace >> 32) & 0xFFFF;
|
|
int mask4 = (whitespace >> 48) & 0xFFFF;
|
|
int pop1 = hamming((~whitespace) & 0xFFFF);
|
|
int pop2 = hamming((~whitespace) & UINT64_C(0xFFFFFFFF));
|
|
int pop3 = hamming((~whitespace) & UINT64_C(0xFFFFFFFFFFFF));
|
|
int pop4 = hamming((~whitespace));
|
|
__m256i vmask1 = _mm256_loadu2_m128i(
|
|
reinterpret_cast<const __m128i *>(mask128_epi8) + (mask2 & 0x7FFF),
|
|
reinterpret_cast<const __m128i *>(mask128_epi8) + (mask1 & 0x7FFF));
|
|
__m256i vmask2 = _mm256_loadu2_m128i(
|
|
reinterpret_cast<const __m128i *>(mask128_epi8) + (mask4 & 0x7FFF),
|
|
reinterpret_cast<const __m128i *>(mask128_epi8) + (mask3 & 0x7FFF));
|
|
__m256i result1 = _mm256_shuffle_epi8(input_lo, vmask1);
|
|
__m256i result2 = _mm256_shuffle_epi8(input_hi, vmask2);
|
|
_mm256_storeu2_m128i(reinterpret_cast<__m128i *>(buffer + pop1),
|
|
reinterpret_cast<__m128i *>(buffer), result1);
|
|
_mm256_storeu2_m128i(reinterpret_cast<__m128i *>(buffer + pop3),
|
|
reinterpret_cast<__m128i *>(buffer + pop2), result2);
|
|
memcpy(out, buffer, pop4);
|
|
out += pop4;
|
|
}
|
|
*out = '\0'; // NULL termination
|
|
return out - initout;
|
|
}
|
|
} // namespace simdjson
|
|
#endif
|
|
/* end file src/jsonminifier.cpp */
|
|
/* begin file src/jsonparser.cpp */
|
|
|
|
namespace simdjson {
|
|
|
|
// The function that users are expected to call is json_parse.
|
|
// We have more than one such function because we want to support several
|
|
// instruction sets.
|
|
|
|
// function pointer type for json_parse
|
|
using json_parse_functype = int(const uint8_t *buf, size_t len, ParsedJson &pj,
|
|
bool realloc_if_needed);
|
|
|
|
// Pointer that holds the json_parse implementation corresponding to the
|
|
// available SIMD instruction set
|
|
extern json_parse_functype *json_parse_ptr;
|
|
|
|
int json_parse(const uint8_t *buf, size_t len, ParsedJson &pj,
|
|
bool realloc_if_needed) {
|
|
return json_parse_ptr(buf, len, pj, realloc_if_needed);
|
|
}
|
|
|
|
int json_parse(const char *buf, size_t len, ParsedJson &pj,
|
|
bool realloc_if_needed) {
|
|
return json_parse_ptr(reinterpret_cast<const uint8_t *>(buf), len, pj,
|
|
realloc_if_needed);
|
|
}
|
|
|
|
Architecture find_best_supported_implementation() {
|
|
constexpr uint32_t haswell_flags =
|
|
instruction_set::AVX2 | instruction_set::PCLMULQDQ |
|
|
instruction_set::BMI1 | instruction_set::BMI2;
|
|
constexpr uint32_t westmere_flags =
|
|
instruction_set::SSE42 | instruction_set::PCLMULQDQ;
|
|
|
|
uint32_t supports = detect_supported_architectures();
|
|
// Order from best to worst (within architecture)
|
|
if ((haswell_flags & supports) == haswell_flags)
|
|
return Architecture::HASWELL;
|
|
if ((westmere_flags & supports) == westmere_flags)
|
|
return Architecture::WESTMERE;
|
|
if (instruction_set::NEON)
|
|
return Architecture::ARM64;
|
|
|
|
return Architecture::NONE;
|
|
}
|
|
|
|
// Responsible to select the best json_parse implementation
|
|
int json_parse_dispatch(const uint8_t *buf, size_t len, ParsedJson &pj,
|
|
bool realloc_if_needed) {
|
|
Architecture best_implementation = find_best_supported_implementation();
|
|
// Selecting the best implementation
|
|
switch (best_implementation) {
|
|
#ifdef IS_X86_64
|
|
case Architecture::HASWELL:
|
|
json_parse_ptr = &json_parse_implementation<Architecture::HASWELL>;
|
|
break;
|
|
case Architecture::WESTMERE:
|
|
json_parse_ptr = &json_parse_implementation<Architecture::WESTMERE>;
|
|
break;
|
|
#endif
|
|
#ifdef IS_ARM64
|
|
case Architecture::ARM64:
|
|
json_parse_ptr = &json_parse_implementation<Architecture::ARM64>;
|
|
break;
|
|
#endif
|
|
default:
|
|
std::cerr << "The processor is not supported by simdjson." << std::endl;
|
|
return simdjson::UNEXPECTED_ERROR;
|
|
}
|
|
|
|
return json_parse_ptr(buf, len, pj, realloc_if_needed);
|
|
}
|
|
|
|
json_parse_functype *json_parse_ptr = &json_parse_dispatch;
|
|
|
|
WARN_UNUSED
|
|
ParsedJson build_parsed_json(const uint8_t *buf, size_t len,
|
|
bool realloc_if_needed) {
|
|
ParsedJson pj;
|
|
bool ok = pj.allocate_capacity(len);
|
|
if (ok) {
|
|
json_parse(buf, len, pj, realloc_if_needed);
|
|
} else {
|
|
std::cerr << "failure during memory allocation " << std::endl;
|
|
}
|
|
return pj;
|
|
}
|
|
} // namespace simdjson
|
|
/* end file src/jsonparser.cpp */
|
|
/* begin file src/stage1_find_marks.cpp */
|
|
|
|
#ifdef IS_X86_64
|
|
|
|
TARGET_HASWELL
|
|
namespace simdjson {
|
|
template <>
|
|
int find_structural_bits<Architecture::HASWELL>(const uint8_t *buf, size_t len,
|
|
ParsedJson &pj) {
|
|
FIND_STRUCTURAL_BITS(Architecture::HASWELL, buf, len, pj,
|
|
simdjson::haswell::flatten_bits);
|
|
}
|
|
} // namespace simdjson
|
|
UNTARGET_REGION
|
|
|
|
TARGET_WESTMERE
|
|
namespace simdjson {
|
|
template <>
|
|
int find_structural_bits<Architecture::WESTMERE>(const uint8_t *buf, size_t len,
|
|
ParsedJson &pj) {
|
|
FIND_STRUCTURAL_BITS(Architecture::WESTMERE, buf, len, pj,
|
|
simdjson::flatten_bits);
|
|
}
|
|
} // namespace simdjson
|
|
UNTARGET_REGION
|
|
|
|
#endif
|
|
|
|
#ifdef IS_ARM64
|
|
namespace simdjson {
|
|
template <>
|
|
int find_structural_bits<Architecture::ARM64>(const uint8_t *buf, size_t len,
|
|
ParsedJson &pj) {
|
|
FIND_STRUCTURAL_BITS(Architecture::ARM64, buf, len, pj,
|
|
simdjson::flatten_bits);
|
|
}
|
|
} // namespace simdjson
|
|
#endif
|
|
/* end file src/stage1_find_marks.cpp */
|
|
/* begin file src/stage2_build_tape.cpp */
|
|
|
|
namespace simdjson {
|
|
|
|
// this macro reads the next structural character, updating idx, i and c.
|
|
#define UPDATE_CHAR() \
|
|
{ \
|
|
idx = pj.structural_indexes[i++]; \
|
|
c = buf[idx]; \
|
|
}
|
|
|
|
#ifdef SIMDJSON_USE_COMPUTED_GOTO
|
|
#define SET_GOTO_ARRAY_CONTINUE() pj.ret_address[depth] = &&array_continue;
|
|
#define SET_GOTO_OBJECT_CONTINUE() pj.ret_address[depth] = &&object_continue;
|
|
#define SET_GOTO_START_CONTINUE() pj.ret_address[depth] = &&start_continue;
|
|
#define GOTO_CONTINUE() goto *pj.ret_address[depth];
|
|
#else
|
|
#define SET_GOTO_ARRAY_CONTINUE() pj.ret_address[depth] = 'a';
|
|
#define SET_GOTO_OBJECT_CONTINUE() pj.ret_address[depth] = 'o';
|
|
#define SET_GOTO_START_CONTINUE() pj.ret_address[depth] = 's';
|
|
#define GOTO_CONTINUE() \
|
|
{ \
|
|
if (pj.ret_address[depth] == 'a') { \
|
|
goto array_continue; \
|
|
} else if (pj.ret_address[depth] == 'o') { \
|
|
goto object_continue; \
|
|
} else { \
|
|
goto start_continue; \
|
|
} \
|
|
}
|
|
#endif
|
|
|
|
/************
|
|
* The JSON is parsed to a tape, see the accompanying tape.md file
|
|
* for documentation.
|
|
***********/
|
|
// We need to compile that code for multiple architectures. However, target
|
|
// attributes can be used only once by function definition. Huge macro seemed
|
|
// better than huge code duplication. int UNIFIED_MACHINE(const uint8_t *buf,
|
|
// size_t len, ParsedJson &pj)
|
|
#define UNIFIED_MACHINE(T, buf, len, pj) \
|
|
{ \
|
|
if (ALLOW_SAME_PAGE_BUFFER_OVERRUN) { \
|
|
memset((uint8_t *)buf + len, 0, \
|
|
SIMDJSON_PADDING); /* to please valgrind */ \
|
|
} \
|
|
uint32_t i = 0; /* index of the structural character (0,1,2,3...) */ \
|
|
uint32_t \
|
|
idx; /* location of the structural character in the input (buf) */ \
|
|
uint8_t c; /* used to track the (structural) character we are looking at, \
|
|
updated */ \
|
|
/* by UPDATE_CHAR macro */ \
|
|
uint32_t depth = 0; /* could have an arbitrary starting depth */ \
|
|
pj.init(); /* sets is_valid to false */ \
|
|
if (pj.byte_capacity < len) { \
|
|
pj.error_code = simdjson::CAPACITY; \
|
|
return pj.error_code; \
|
|
} \
|
|
\
|
|
/*//////////////////////////// START STATE ///////////////////////////// \
|
|
*/ \
|
|
SET_GOTO_START_CONTINUE() \
|
|
pj.containing_scope_offset[depth] = pj.get_current_loc(); \
|
|
pj.write_tape(0, 'r'); /* r for root, 0 is going to get overwritten */ \
|
|
/* the root is used, if nothing else, to capture the size of the tape */ \
|
|
depth++; /* everything starts at depth = 1, depth = 0 is just for the \
|
|
root, the root may contain an object, an array or something \
|
|
else. */ \
|
|
if (depth >= pj.depth_capacity) { \
|
|
goto fail; \
|
|
} \
|
|
\
|
|
UPDATE_CHAR(); \
|
|
switch (c) { \
|
|
case '{': \
|
|
pj.containing_scope_offset[depth] = pj.get_current_loc(); \
|
|
SET_GOTO_START_CONTINUE(); \
|
|
depth++; \
|
|
if (depth >= pj.depth_capacity) { \
|
|
goto fail; \
|
|
} \
|
|
pj.write_tape( \
|
|
0, \
|
|
c); /* strangely, moving this to object_begin slows things down */ \
|
|
goto object_begin; \
|
|
case '[': \
|
|
pj.containing_scope_offset[depth] = pj.get_current_loc(); \
|
|
SET_GOTO_START_CONTINUE(); \
|
|
depth++; \
|
|
if (depth >= pj.depth_capacity) { \
|
|
goto fail; \
|
|
} \
|
|
pj.write_tape(0, c); \
|
|
goto array_begin; \
|
|
/* #define SIMDJSON_ALLOWANYTHINGINROOT \
|
|
* A JSON text is a serialized value. Note that certain previous \
|
|
* specifications of JSON constrained a JSON text to be an object or an \
|
|
* array. Implementations that generate only objects or arrays where a \
|
|
* JSON text is called for will be interoperable in the sense that all \
|
|
* implementations will accept these as conforming JSON texts. \
|
|
* https://tools.ietf.org/html/rfc8259 \
|
|
* #ifdef SIMDJSON_ALLOWANYTHINGINROOT */ \
|
|
case '"': { \
|
|
if (!parse_string<T>(buf, len, pj, depth, idx)) { \
|
|
goto fail; \
|
|
} \
|
|
break; \
|
|
} \
|
|
case 't': { \
|
|
/* we need to make a copy to make sure that the string is space \
|
|
* terminated. \
|
|
* this only applies to the JSON document made solely of the true value. \
|
|
* this will almost never be called in practice */ \
|
|
char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING)); \
|
|
if (copy == nullptr) { \
|
|
goto fail; \
|
|
} \
|
|
memcpy(copy, buf, len); \
|
|
copy[len] = ' '; \
|
|
if (!is_valid_true_atom(reinterpret_cast<const uint8_t *>(copy) + \
|
|
idx)) { \
|
|
free(copy); \
|
|
goto fail; \
|
|
} \
|
|
free(copy); \
|
|
pj.write_tape(0, c); \
|
|
break; \
|
|
} \
|
|
case 'f': { \
|
|
/* we need to make a copy to make sure that the string is space \
|
|
* terminated. \
|
|
* this only applies to the JSON document made solely of the false \
|
|
* value. \
|
|
* this will almost never be called in practice */ \
|
|
char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING)); \
|
|
if (copy == nullptr) { \
|
|
goto fail; \
|
|
} \
|
|
memcpy(copy, buf, len); \
|
|
copy[len] = ' '; \
|
|
if (!is_valid_false_atom(reinterpret_cast<const uint8_t *>(copy) + \
|
|
idx)) { \
|
|
free(copy); \
|
|
goto fail; \
|
|
} \
|
|
free(copy); \
|
|
pj.write_tape(0, c); \
|
|
break; \
|
|
} \
|
|
case 'n': { \
|
|
/* we need to make a copy to make sure that the string is space \
|
|
* terminated. \
|
|
* this only applies to the JSON document made solely of the null value. \
|
|
* this will almost never be called in practice */ \
|
|
char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING)); \
|
|
if (copy == nullptr) { \
|
|
goto fail; \
|
|
} \
|
|
memcpy(copy, buf, len); \
|
|
copy[len] = ' '; \
|
|
if (!is_valid_null_atom(reinterpret_cast<const uint8_t *>(copy) + \
|
|
idx)) { \
|
|
free(copy); \
|
|
goto fail; \
|
|
} \
|
|
free(copy); \
|
|
pj.write_tape(0, c); \
|
|
break; \
|
|
} \
|
|
case '0': \
|
|
case '1': \
|
|
case '2': \
|
|
case '3': \
|
|
case '4': \
|
|
case '5': \
|
|
case '6': \
|
|
case '7': \
|
|
case '8': \
|
|
case '9': { \
|
|
/* we need to make a copy to make sure that the string is space \
|
|
* terminated. \
|
|
* this is done only for JSON documents made of a sole number \
|
|
* this will almost never be called in practice. We terminate with a \
|
|
* space \
|
|
* because we do not want to allow NULLs in the middle of a number \
|
|
* (whereas a \
|
|
* space in the middle of a number would be identified in stage 1). */ \
|
|
char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING)); \
|
|
if (copy == nullptr) { \
|
|
goto fail; \
|
|
} \
|
|
memcpy(copy, buf, len); \
|
|
copy[len] = ' '; \
|
|
if (!parse_number(reinterpret_cast<const uint8_t *>(copy), pj, idx, \
|
|
false)) { \
|
|
free(copy); \
|
|
goto fail; \
|
|
} \
|
|
free(copy); \
|
|
break; \
|
|
} \
|
|
case '-': { \
|
|
/* we need to make a copy to make sure that the string is NULL \
|
|
* terminated. \
|
|
* this is done only for JSON documents made of a sole number \
|
|
* this will almost never be called in practice */ \
|
|
char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING)); \
|
|
if (copy == nullptr) { \
|
|
goto fail; \
|
|
} \
|
|
memcpy(copy, buf, len); \
|
|
copy[len] = ' '; \
|
|
if (!parse_number(reinterpret_cast<const uint8_t *>(copy), pj, idx, \
|
|
true)) { \
|
|
free(copy); \
|
|
goto fail; \
|
|
} \
|
|
free(copy); \
|
|
break; \
|
|
} \
|
|
default: \
|
|
goto fail; \
|
|
} \
|
|
start_continue: \
|
|
/* the string might not be NULL terminated. */ \
|
|
if (i + 1 == pj.n_structural_indexes) { \
|
|
goto succeed; \
|
|
} else { \
|
|
goto fail; \
|
|
} \
|
|
/*//////////////////////////// OBJECT STATES ///////////////////////////*/ \
|
|
\
|
|
object_begin: \
|
|
UPDATE_CHAR(); \
|
|
switch (c) { \
|
|
case '"': { \
|
|
if (!parse_string<T>(buf, len, pj, depth, idx)) { \
|
|
goto fail; \
|
|
} \
|
|
goto object_key_state; \
|
|
} \
|
|
case '}': \
|
|
goto scope_end; /* could also go to object_continue */ \
|
|
default: \
|
|
goto fail; \
|
|
} \
|
|
\
|
|
object_key_state: \
|
|
UPDATE_CHAR(); \
|
|
if (c != ':') { \
|
|
goto fail; \
|
|
} \
|
|
UPDATE_CHAR(); \
|
|
switch (c) { \
|
|
case '"': { \
|
|
if (!parse_string<T>(buf, len, pj, depth, idx)) { \
|
|
goto fail; \
|
|
} \
|
|
break; \
|
|
} \
|
|
case 't': \
|
|
if (!is_valid_true_atom(buf + idx)) { \
|
|
goto fail; \
|
|
} \
|
|
pj.write_tape(0, c); \
|
|
break; \
|
|
case 'f': \
|
|
if (!is_valid_false_atom(buf + idx)) { \
|
|
goto fail; \
|
|
} \
|
|
pj.write_tape(0, c); \
|
|
break; \
|
|
case 'n': \
|
|
if (!is_valid_null_atom(buf + idx)) { \
|
|
goto fail; \
|
|
} \
|
|
pj.write_tape(0, c); \
|
|
break; \
|
|
case '0': \
|
|
case '1': \
|
|
case '2': \
|
|
case '3': \
|
|
case '4': \
|
|
case '5': \
|
|
case '6': \
|
|
case '7': \
|
|
case '8': \
|
|
case '9': { \
|
|
if (!parse_number(buf, pj, idx, false)) { \
|
|
goto fail; \
|
|
} \
|
|
break; \
|
|
} \
|
|
case '-': { \
|
|
if (!parse_number(buf, pj, idx, true)) { \
|
|
goto fail; \
|
|
} \
|
|
break; \
|
|
} \
|
|
case '{': { \
|
|
pj.containing_scope_offset[depth] = pj.get_current_loc(); \
|
|
pj.write_tape(0, c); /* here the compilers knows what c is so this gets \
|
|
optimized */ \
|
|
/* we have not yet encountered } so we need to come back for it */ \
|
|
SET_GOTO_OBJECT_CONTINUE() \
|
|
/* we found an object inside an object, so we need to increment the \
|
|
* depth */ \
|
|
depth++; \
|
|
if (depth >= pj.depth_capacity) { \
|
|
goto fail; \
|
|
} \
|
|
\
|
|
goto object_begin; \
|
|
} \
|
|
case '[': { \
|
|
pj.containing_scope_offset[depth] = pj.get_current_loc(); \
|
|
pj.write_tape(0, c); /* here the compilers knows what c is so this gets \
|
|
optimized */ \
|
|
/* we have not yet encountered } so we need to come back for it */ \
|
|
SET_GOTO_OBJECT_CONTINUE() \
|
|
/* we found an array inside an object, so we need to increment the depth \
|
|
*/ \
|
|
depth++; \
|
|
if (depth >= pj.depth_capacity) { \
|
|
goto fail; \
|
|
} \
|
|
goto array_begin; \
|
|
} \
|
|
default: \
|
|
goto fail; \
|
|
} \
|
|
\
|
|
object_continue: \
|
|
UPDATE_CHAR(); \
|
|
switch (c) { \
|
|
case ',': \
|
|
UPDATE_CHAR(); \
|
|
if (c != '"') { \
|
|
goto fail; \
|
|
} else { \
|
|
if (!parse_string<T>(buf, len, pj, depth, idx)) { \
|
|
goto fail; \
|
|
} \
|
|
goto object_key_state; \
|
|
} \
|
|
case '}': \
|
|
goto scope_end; \
|
|
default: \
|
|
goto fail; \
|
|
} \
|
|
\
|
|
/*//////////////////////////// COMMON STATE ///////////////////////////*/ \
|
|
\
|
|
scope_end: \
|
|
/* write our tape location to the header scope */ \
|
|
depth--; \
|
|
pj.write_tape(pj.containing_scope_offset[depth], c); \
|
|
pj.annotate_previous_loc(pj.containing_scope_offset[depth], \
|
|
pj.get_current_loc()); \
|
|
/* goto saved_state */ \
|
|
GOTO_CONTINUE() \
|
|
\
|
|
/*//////////////////////////// ARRAY STATES ///////////////////////////*/ \
|
|
array_begin: \
|
|
UPDATE_CHAR(); \
|
|
if (c == ']') { \
|
|
goto scope_end; /* could also go to array_continue */ \
|
|
} \
|
|
\
|
|
main_array_switch: \
|
|
/* we call update char on all paths in, so we can peek at c on the \
|
|
* on paths that can accept a close square brace (post-, and at start) */ \
|
|
switch (c) { \
|
|
case '"': { \
|
|
if (!parse_string<T>(buf, len, pj, depth, idx)) { \
|
|
goto fail; \
|
|
} \
|
|
break; \
|
|
} \
|
|
case 't': \
|
|
if (!is_valid_true_atom(buf + idx)) { \
|
|
goto fail; \
|
|
} \
|
|
pj.write_tape(0, c); \
|
|
break; \
|
|
case 'f': \
|
|
if (!is_valid_false_atom(buf + idx)) { \
|
|
goto fail; \
|
|
} \
|
|
pj.write_tape(0, c); \
|
|
break; \
|
|
case 'n': \
|
|
if (!is_valid_null_atom(buf + idx)) { \
|
|
goto fail; \
|
|
} \
|
|
pj.write_tape(0, c); \
|
|
break; /* goto array_continue; */ \
|
|
\
|
|
case '0': \
|
|
case '1': \
|
|
case '2': \
|
|
case '3': \
|
|
case '4': \
|
|
case '5': \
|
|
case '6': \
|
|
case '7': \
|
|
case '8': \
|
|
case '9': { \
|
|
if (!parse_number(buf, pj, idx, false)) { \
|
|
goto fail; \
|
|
} \
|
|
break; /* goto array_continue; */ \
|
|
} \
|
|
case '-': { \
|
|
if (!parse_number(buf, pj, idx, true)) { \
|
|
goto fail; \
|
|
} \
|
|
break; /* goto array_continue; */ \
|
|
} \
|
|
case '{': { \
|
|
/* we have not yet encountered ] so we need to come back for it */ \
|
|
pj.containing_scope_offset[depth] = pj.get_current_loc(); \
|
|
pj.write_tape(0, c); /* here the compilers knows what c is so this gets \
|
|
optimized */ \
|
|
SET_GOTO_ARRAY_CONTINUE() \
|
|
/* we found an object inside an array, so we need to increment the depth \
|
|
*/ \
|
|
depth++; \
|
|
if (depth >= pj.depth_capacity) { \
|
|
goto fail; \
|
|
} \
|
|
\
|
|
goto object_begin; \
|
|
} \
|
|
case '[': { \
|
|
/* we have not yet encountered ] so we need to come back for it */ \
|
|
pj.containing_scope_offset[depth] = pj.get_current_loc(); \
|
|
pj.write_tape(0, c); /* here the compilers knows what c is so this gets \
|
|
optimized */ \
|
|
SET_GOTO_ARRAY_CONTINUE() \
|
|
/* we found an array inside an array, so we need to increment the depth \
|
|
*/ \
|
|
depth++; \
|
|
if (depth >= pj.depth_capacity) { \
|
|
goto fail; \
|
|
} \
|
|
goto array_begin; \
|
|
} \
|
|
default: \
|
|
goto fail; \
|
|
} \
|
|
\
|
|
array_continue: \
|
|
UPDATE_CHAR(); \
|
|
switch (c) { \
|
|
case ',': \
|
|
UPDATE_CHAR(); \
|
|
goto main_array_switch; \
|
|
case ']': \
|
|
goto scope_end; \
|
|
default: \
|
|
goto fail; \
|
|
} \
|
|
\
|
|
/*//////////////////////////// FINAL STATES ///////////////////////////*/ \
|
|
\
|
|
succeed: \
|
|
depth--; \
|
|
if (depth != 0) { \
|
|
fprintf(stderr, "internal bug\n"); \
|
|
abort(); \
|
|
} \
|
|
if (pj.containing_scope_offset[depth] != 0) { \
|
|
fprintf(stderr, "internal bug\n"); \
|
|
abort(); \
|
|
} \
|
|
pj.annotate_previous_loc(pj.containing_scope_offset[depth], \
|
|
pj.get_current_loc()); \
|
|
pj.write_tape(pj.containing_scope_offset[depth], 'r'); /* r is root */ \
|
|
\
|
|
pj.valid = true; \
|
|
pj.error_code = simdjson::SUCCESS; \
|
|
return pj.error_code; \
|
|
fail: \
|
|
/* we do not need the next line because this is done by pj.init(), \
|
|
* pessimistically. \
|
|
* pj.is_valid = false; \
|
|
* At this point in the code, we have all the time in the world. \
|
|
* Note that we know exactly where we are in the document so we could, \
|
|
* without any overhead on the processing code, report a specific \
|
|
* location. \
|
|
* We could even trigger special code paths to assess what happened \
|
|
* carefully, \
|
|
* all without any added cost. */ \
|
|
if (depth >= pj.depth_capacity) { \
|
|
pj.error_code = simdjson::DEPTH_ERROR; \
|
|
return pj.error_code; \
|
|
} \
|
|
switch (c) { \
|
|
case '"': \
|
|
pj.error_code = simdjson::STRING_ERROR; \
|
|
return pj.error_code; \
|
|
case '0': \
|
|
case '1': \
|
|
case '2': \
|
|
case '3': \
|
|
case '4': \
|
|
case '5': \
|
|
case '6': \
|
|
case '7': \
|
|
case '8': \
|
|
case '9': \
|
|
case '-': \
|
|
pj.error_code = simdjson::NUMBER_ERROR; \
|
|
return pj.error_code; \
|
|
case 't': \
|
|
pj.error_code = simdjson::T_ATOM_ERROR; \
|
|
return pj.error_code; \
|
|
case 'n': \
|
|
pj.error_code = simdjson::N_ATOM_ERROR; \
|
|
return pj.error_code; \
|
|
case 'f': \
|
|
pj.error_code = simdjson::F_ATOM_ERROR; \
|
|
return pj.error_code; \
|
|
default: \
|
|
break; \
|
|
} \
|
|
pj.error_code = simdjson::TAPE_ERROR; \
|
|
return pj.error_code; \
|
|
}
|
|
|
|
} // namespace simdjson
|
|
|
|
#ifdef IS_X86_64
|
|
TARGET_HASWELL
|
|
namespace simdjson {
|
|
template <>
|
|
WARN_UNUSED ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER LENIENT_MEM_SANITIZER int
|
|
unified_machine<Architecture::HASWELL>(const uint8_t *buf, size_t len,
|
|
ParsedJson &pj) {
|
|
UNIFIED_MACHINE(Architecture::HASWELL, buf, len, pj);
|
|
}
|
|
} // namespace simdjson
|
|
UNTARGET_REGION
|
|
|
|
TARGET_WESTMERE
|
|
namespace simdjson {
|
|
template <>
|
|
WARN_UNUSED ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER LENIENT_MEM_SANITIZER int
|
|
unified_machine<Architecture::WESTMERE>(const uint8_t *buf, size_t len,
|
|
ParsedJson &pj) {
|
|
UNIFIED_MACHINE(Architecture::WESTMERE, buf, len, pj);
|
|
}
|
|
} // namespace simdjson
|
|
UNTARGET_REGION
|
|
#endif // IS_X86_64
|
|
|
|
#ifdef IS_ARM64
|
|
namespace simdjson {
|
|
template <>
|
|
WARN_UNUSED ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER LENIENT_MEM_SANITIZER int
|
|
unified_machine<Architecture::ARM64>(const uint8_t *buf, size_t len,
|
|
ParsedJson &pj) {
|
|
UNIFIED_MACHINE(Architecture::ARM64, buf, len, pj);
|
|
}
|
|
} // namespace simdjson
|
|
#endif
|
|
/* end file src/stage2_build_tape.cpp */
|
|
/* begin file src/parsedjson.cpp */
|
|
|
|
namespace simdjson {
|
|
ParsedJson::ParsedJson()
|
|
: structural_indexes(nullptr), tape(nullptr),
|
|
containing_scope_offset(nullptr), ret_address(nullptr),
|
|
string_buf(nullptr), current_string_buf_loc(nullptr) {}
|
|
|
|
ParsedJson::~ParsedJson() { deallocate(); }
|
|
|
|
ParsedJson::ParsedJson(ParsedJson &&p)
|
|
: byte_capacity(p.byte_capacity), depth_capacity(p.depth_capacity),
|
|
tape_capacity(p.tape_capacity), string_capacity(p.string_capacity),
|
|
current_loc(p.current_loc), n_structural_indexes(p.n_structural_indexes),
|
|
structural_indexes(p.structural_indexes), tape(p.tape),
|
|
containing_scope_offset(p.containing_scope_offset),
|
|
ret_address(p.ret_address), string_buf(p.string_buf),
|
|
current_string_buf_loc(p.current_string_buf_loc), valid(p.valid) {
|
|
p.structural_indexes = nullptr;
|
|
p.tape = nullptr;
|
|
p.containing_scope_offset = nullptr;
|
|
p.ret_address = nullptr;
|
|
p.string_buf = nullptr;
|
|
p.current_string_buf_loc = nullptr;
|
|
}
|
|
|
|
WARN_UNUSED
|
|
bool ParsedJson::allocate_capacity(size_t len, size_t max_depth) {
|
|
if (max_depth <= 0) {
|
|
max_depth = 1; // don't let the user allocate nothing
|
|
}
|
|
if (len <= 0) {
|
|
len = 64; // allocating 0 bytes is wasteful.
|
|
}
|
|
if (len > SIMDJSON_MAXSIZE_BYTES) {
|
|
return false;
|
|
}
|
|
if ((len <= byte_capacity) && (depth_capacity < max_depth)) {
|
|
return true;
|
|
}
|
|
deallocate();
|
|
valid = false;
|
|
byte_capacity = 0; // will only set it to len after allocations are a success
|
|
n_structural_indexes = 0;
|
|
uint32_t max_structures = ROUNDUP_N(len, 64) + 2 + 7;
|
|
structural_indexes = new (std::nothrow) uint32_t[max_structures];
|
|
// a pathological input like "[[[[..." would generate len tape elements, so
|
|
// need a capacity of len + 1
|
|
size_t local_tape_capacity = ROUNDUP_N(len + 1, 64);
|
|
// a document with only zero-length strings... could have len/3 string
|
|
// and we would need len/3 * 5 bytes on the string buffer
|
|
size_t local_string_capacity = ROUNDUP_N(5 * len / 3 + 32, 64);
|
|
string_buf = new (std::nothrow) uint8_t[local_string_capacity];
|
|
tape = new (std::nothrow) uint64_t[local_tape_capacity];
|
|
containing_scope_offset = new (std::nothrow) uint32_t[max_depth];
|
|
#ifdef SIMDJSON_USE_COMPUTED_GOTO
|
|
ret_address = new (std::nothrow) void *[max_depth];
|
|
#else
|
|
ret_address = new (std::nothrow) char[max_depth];
|
|
#endif
|
|
if ((string_buf == nullptr) || (tape == nullptr) ||
|
|
(containing_scope_offset == nullptr) || (ret_address == nullptr) ||
|
|
(structural_indexes == nullptr)) {
|
|
std::cerr << "Could not allocate memory" << std::endl;
|
|
delete[] ret_address;
|
|
delete[] containing_scope_offset;
|
|
delete[] tape;
|
|
delete[] string_buf;
|
|
delete[] structural_indexes;
|
|
|
|
return false;
|
|
}
|
|
/*
|
|
// We do not need to initialize this content for parsing, though we could
|
|
// need to initialize it for safety.
|
|
memset(string_buf, 0 , local_string_capacity);
|
|
memset(structural_indexes, 0, max_structures * sizeof(uint32_t));
|
|
memset(tape, 0, local_tape_capacity * sizeof(uint64_t));
|
|
*/
|
|
byte_capacity = len;
|
|
depth_capacity = max_depth;
|
|
tape_capacity = local_tape_capacity;
|
|
string_capacity = local_string_capacity;
|
|
return true;
|
|
}
|
|
|
|
bool ParsedJson::is_valid() const { return valid; }
|
|
|
|
int ParsedJson::get_error_code() const { return error_code; }
|
|
|
|
std::string ParsedJson::get_error_message() const {
|
|
return error_message(error_code);
|
|
}
|
|
|
|
void ParsedJson::deallocate() {
|
|
byte_capacity = 0;
|
|
depth_capacity = 0;
|
|
tape_capacity = 0;
|
|
string_capacity = 0;
|
|
delete[] ret_address;
|
|
delete[] containing_scope_offset;
|
|
delete[] tape;
|
|
delete[] string_buf;
|
|
delete[] structural_indexes;
|
|
valid = false;
|
|
}
|
|
|
|
void ParsedJson::init() {
|
|
current_string_buf_loc = string_buf;
|
|
current_loc = 0;
|
|
valid = false;
|
|
}
|
|
|
|
WARN_UNUSED
|
|
bool ParsedJson::print_json(std::ostream &os) {
|
|
if (!valid) {
|
|
return false;
|
|
}
|
|
uint32_t string_length;
|
|
size_t tape_idx = 0;
|
|
uint64_t tape_val = tape[tape_idx];
|
|
uint8_t type = (tape_val >> 56);
|
|
size_t how_many = 0;
|
|
if (type == 'r') {
|
|
how_many = tape_val & JSON_VALUE_MASK;
|
|
} else {
|
|
fprintf(stderr, "Error: no starting root node?");
|
|
return false;
|
|
}
|
|
if (how_many > tape_capacity) {
|
|
fprintf(
|
|
stderr,
|
|
"We may be exceeding the tape capacity. Is this a valid document?\n");
|
|
return false;
|
|
}
|
|
tape_idx++;
|
|
bool *in_object = new bool[depth_capacity];
|
|
auto *in_object_idx = new size_t[depth_capacity];
|
|
int depth = 1; // only root at level 0
|
|
in_object_idx[depth] = 0;
|
|
in_object[depth] = false;
|
|
for (; tape_idx < how_many; tape_idx++) {
|
|
tape_val = tape[tape_idx];
|
|
uint64_t payload = tape_val & JSON_VALUE_MASK;
|
|
type = (tape_val >> 56);
|
|
if (!in_object[depth]) {
|
|
if ((in_object_idx[depth] > 0) && (type != ']')) {
|
|
os << ",";
|
|
}
|
|
in_object_idx[depth]++;
|
|
} else { // if (in_object) {
|
|
if ((in_object_idx[depth] > 0) && ((in_object_idx[depth] & 1) == 0) &&
|
|
(type != '}')) {
|
|
os << ",";
|
|
}
|
|
if (((in_object_idx[depth] & 1) == 1)) {
|
|
os << ":";
|
|
}
|
|
in_object_idx[depth]++;
|
|
}
|
|
switch (type) {
|
|
case '"': // we have a string
|
|
os << '"';
|
|
memcpy(&string_length, string_buf + payload, sizeof(uint32_t));
|
|
print_with_escapes(
|
|
(const unsigned char *)(string_buf + payload + sizeof(uint32_t)),
|
|
string_length);
|
|
os << '"';
|
|
break;
|
|
case 'l': // we have a long int
|
|
if (tape_idx + 1 >= how_many) {
|
|
delete[] in_object;
|
|
delete[] in_object_idx;
|
|
return false;
|
|
}
|
|
os << static_cast<int64_t>(tape[++tape_idx]);
|
|
break;
|
|
case 'd': // we have a double
|
|
if (tape_idx + 1 >= how_many) {
|
|
delete[] in_object;
|
|
delete[] in_object_idx;
|
|
return false;
|
|
}
|
|
double answer;
|
|
memcpy(&answer, &tape[++tape_idx], sizeof(answer));
|
|
os << answer;
|
|
break;
|
|
case 'n': // we have a null
|
|
os << "null";
|
|
break;
|
|
case 't': // we have a true
|
|
os << "true";
|
|
break;
|
|
case 'f': // we have a false
|
|
os << "false";
|
|
break;
|
|
case '{': // we have an object
|
|
os << '{';
|
|
depth++;
|
|
in_object[depth] = true;
|
|
in_object_idx[depth] = 0;
|
|
break;
|
|
case '}': // we end an object
|
|
depth--;
|
|
os << '}';
|
|
break;
|
|
case '[': // we start an array
|
|
os << '[';
|
|
depth++;
|
|
in_object[depth] = false;
|
|
in_object_idx[depth] = 0;
|
|
break;
|
|
case ']': // we end an array
|
|
depth--;
|
|
os << ']';
|
|
break;
|
|
case 'r': // we start and end with the root node
|
|
fprintf(stderr, "should we be hitting the root node?\n");
|
|
delete[] in_object;
|
|
delete[] in_object_idx;
|
|
return false;
|
|
default:
|
|
fprintf(stderr, "bug %c\n", type);
|
|
delete[] in_object;
|
|
delete[] in_object_idx;
|
|
return false;
|
|
}
|
|
}
|
|
delete[] in_object;
|
|
delete[] in_object_idx;
|
|
return true;
|
|
}
|
|
|
|
WARN_UNUSED
|
|
bool ParsedJson::dump_raw_tape(std::ostream &os) {
|
|
if (!valid) {
|
|
return false;
|
|
}
|
|
uint32_t string_length;
|
|
size_t tape_idx = 0;
|
|
uint64_t tape_val = tape[tape_idx];
|
|
uint8_t type = (tape_val >> 56);
|
|
os << tape_idx << " : " << type;
|
|
tape_idx++;
|
|
size_t how_many = 0;
|
|
if (type == 'r') {
|
|
how_many = tape_val & JSON_VALUE_MASK;
|
|
} else {
|
|
fprintf(stderr, "Error: no starting root node?");
|
|
return false;
|
|
}
|
|
os << "\t// pointing to " << how_many << " (right after last node)\n";
|
|
uint64_t payload;
|
|
for (; tape_idx < how_many; tape_idx++) {
|
|
os << tape_idx << " : ";
|
|
tape_val = tape[tape_idx];
|
|
payload = tape_val & JSON_VALUE_MASK;
|
|
type = (tape_val >> 56);
|
|
switch (type) {
|
|
case '"': // we have a string
|
|
os << "string \"";
|
|
memcpy(&string_length, string_buf + payload, sizeof(uint32_t));
|
|
print_with_escapes(
|
|
(const unsigned char *)(string_buf + payload + sizeof(uint32_t)),
|
|
string_length);
|
|
os << '"';
|
|
os << '\n';
|
|
break;
|
|
case 'l': // we have a long int
|
|
if (tape_idx + 1 >= how_many) {
|
|
return false;
|
|
}
|
|
os << "integer " << static_cast<int64_t>(tape[++tape_idx]) << "\n";
|
|
break;
|
|
case 'd': // we have a double
|
|
os << "float ";
|
|
if (tape_idx + 1 >= how_many) {
|
|
return false;
|
|
}
|
|
double answer;
|
|
memcpy(&answer, &tape[++tape_idx], sizeof(answer));
|
|
os << answer << '\n';
|
|
break;
|
|
case 'n': // we have a null
|
|
os << "null\n";
|
|
break;
|
|
case 't': // we have a true
|
|
os << "true\n";
|
|
break;
|
|
case 'f': // we have a false
|
|
os << "false\n";
|
|
break;
|
|
case '{': // we have an object
|
|
os << "{\t// pointing to next tape location " << payload
|
|
<< " (first node after the scope) \n";
|
|
break;
|
|
case '}': // we end an object
|
|
os << "}\t// pointing to previous tape location " << payload
|
|
<< " (start of the scope) \n";
|
|
break;
|
|
case '[': // we start an array
|
|
os << "[\t// pointing to next tape location " << payload
|
|
<< " (first node after the scope) \n";
|
|
break;
|
|
case ']': // we end an array
|
|
os << "]\t// pointing to previous tape location " << payload
|
|
<< " (start of the scope) \n";
|
|
break;
|
|
case 'r': // we start and end with the root node
|
|
printf("end of root\n");
|
|
return false;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
tape_val = tape[tape_idx];
|
|
payload = tape_val & JSON_VALUE_MASK;
|
|
type = (tape_val >> 56);
|
|
os << tape_idx << " : " << type << "\t// pointing to " << payload
|
|
<< " (start root)\n";
|
|
return true;
|
|
}
|
|
} // namespace simdjson
|
|
/* end file src/parsedjson.cpp */
|
|
/* begin file src/parsedjsoniterator.cpp */
|
|
#include <iterator>
|
|
|
|
namespace simdjson {
|
|
ParsedJson::Iterator::Iterator(ParsedJson &pj_)
|
|
: pj(pj_), depth(0), location(0), tape_length(0), depth_index(nullptr) {
|
|
if (!pj.is_valid()) {
|
|
throw InvalidJSON();
|
|
}
|
|
// we overallocate by "1" to silence a warning in Visual Studio
|
|
depth_index = new scopeindex_t[pj.depth_capacity + 1];
|
|
// memory allocation would throw
|
|
// if(depth_index == nullptr) {
|
|
// return;
|
|
//}
|
|
depth_index[0].start_of_scope = location;
|
|
current_val = pj.tape[location++];
|
|
current_type = (current_val >> 56);
|
|
depth_index[0].scope_type = current_type;
|
|
if (current_type == 'r') {
|
|
tape_length = current_val & JSON_VALUE_MASK;
|
|
if (location < tape_length) {
|
|
// If we make it here, then depth_capacity must >=2, but the compiler
|
|
// may not know this.
|
|
current_val = pj.tape[location];
|
|
current_type = (current_val >> 56);
|
|
depth++;
|
|
depth_index[depth].start_of_scope = location;
|
|
depth_index[depth].scope_type = current_type;
|
|
}
|
|
} else {
|
|
// should never happen
|
|
throw InvalidJSON();
|
|
}
|
|
}
|
|
|
|
ParsedJson::Iterator::~Iterator() { delete[] depth_index; }
|
|
|
|
ParsedJson::Iterator::Iterator(const Iterator &o) noexcept
|
|
: pj(o.pj), depth(o.depth), location(o.location), tape_length(0),
|
|
current_type(o.current_type), current_val(o.current_val),
|
|
depth_index(nullptr) {
|
|
depth_index = new scopeindex_t[pj.depth_capacity];
|
|
// allocation might throw
|
|
memcpy(depth_index, o.depth_index,
|
|
pj.depth_capacity * sizeof(depth_index[0]));
|
|
tape_length = o.tape_length;
|
|
}
|
|
|
|
ParsedJson::Iterator::Iterator(Iterator &&o) noexcept
|
|
: pj(o.pj), depth(o.depth), location(o.location),
|
|
tape_length(o.tape_length), current_type(o.current_type),
|
|
current_val(o.current_val), depth_index(o.depth_index) {
|
|
o.depth_index = nullptr; // we take ownership
|
|
}
|
|
|
|
bool ParsedJson::Iterator::print(std::ostream &os, bool escape_strings) const {
|
|
if (!is_ok()) {
|
|
return false;
|
|
}
|
|
switch (current_type) {
|
|
case '"': // we have a string
|
|
os << '"';
|
|
if (escape_strings) {
|
|
print_with_escapes(get_string(), os, get_string_length());
|
|
} else {
|
|
// was: os << get_string();, but given that we can include null chars, we
|
|
// have to do something crazier:
|
|
std::copy(get_string(), get_string() + get_string_length(),
|
|
std::ostream_iterator<char>(os));
|
|
}
|
|
os << '"';
|
|
break;
|
|
case 'l': // we have a long int
|
|
os << get_integer();
|
|
break;
|
|
case 'd':
|
|
os << get_double();
|
|
break;
|
|
case 'n': // we have a null
|
|
os << "null";
|
|
break;
|
|
case 't': // we have a true
|
|
os << "true";
|
|
break;
|
|
case 'f': // we have a false
|
|
os << "false";
|
|
break;
|
|
case '{': // we have an object
|
|
case '}': // we end an object
|
|
case '[': // we start an array
|
|
case ']': // we end an array
|
|
os << static_cast<char>(current_type);
|
|
break;
|
|
default:
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool ParsedJson::Iterator::move_to(const char *pointer, uint32_t length) {
|
|
char *new_pointer = nullptr;
|
|
if (pointer[0] == '#') {
|
|
// Converting fragment representation to string representation
|
|
new_pointer = new char[length];
|
|
uint32_t new_length = 0;
|
|
for (uint32_t i = 1; i < length; i++) {
|
|
if (pointer[i] == '%' && pointer[i + 1] == 'x') {
|
|
try {
|
|
int fragment =
|
|
std::stoi(std::string(&pointer[i + 2], 2), nullptr, 16);
|
|
if (fragment == '\\' || fragment == '"' || (fragment <= 0x1F)) {
|
|
// escaping the character
|
|
new_pointer[new_length] = '\\';
|
|
new_length++;
|
|
}
|
|
new_pointer[new_length] = fragment;
|
|
i += 3;
|
|
} catch (std::invalid_argument &) {
|
|
delete[] new_pointer;
|
|
return false; // the fragment is invalid
|
|
}
|
|
} else {
|
|
new_pointer[new_length] = pointer[i];
|
|
}
|
|
new_length++;
|
|
}
|
|
length = new_length;
|
|
pointer = new_pointer;
|
|
}
|
|
|
|
// saving the current state
|
|
size_t depth_s = depth;
|
|
size_t location_s = location;
|
|
uint8_t current_type_s = current_type;
|
|
uint64_t current_val_s = current_val;
|
|
scopeindex_t *depth_index_s = depth_index;
|
|
|
|
rewind(); // The json pointer is used from the root of the document.
|
|
|
|
bool found = relative_move_to(pointer, length);
|
|
delete[] new_pointer;
|
|
|
|
if (!found) {
|
|
// since the pointer has found nothing, we get back to the original
|
|
// position.
|
|
depth = depth_s;
|
|
location = location_s;
|
|
current_type = current_type_s;
|
|
current_val = current_val_s;
|
|
depth_index = depth_index_s;
|
|
}
|
|
|
|
return found;
|
|
}
|
|
|
|
bool ParsedJson::Iterator::relative_move_to(const char *pointer,
|
|
uint32_t length) {
|
|
if (length == 0) {
|
|
// returns the whole document
|
|
return true;
|
|
}
|
|
|
|
if (pointer[0] != '/') {
|
|
// '/' must be the first character
|
|
return false;
|
|
}
|
|
|
|
// finding the key in an object or the index in an array
|
|
std::string key_or_index;
|
|
uint32_t offset = 1;
|
|
|
|
// checking for the "-" case
|
|
if (is_array() && pointer[1] == '-') {
|
|
if (length != 2) {
|
|
// the pointer must be exactly "/-"
|
|
// there can't be anything more after '-' as an index
|
|
return false;
|
|
}
|
|
key_or_index = '-';
|
|
offset = length; // will skip the loop coming right after
|
|
}
|
|
|
|
// We either transform the first reference token to a valid json key
|
|
// or we make sure it is a valid index in an array.
|
|
for (; offset < length; offset++) {
|
|
if (pointer[offset] == '/') {
|
|
// beginning of the next key or index
|
|
break;
|
|
}
|
|
if (is_array() && (pointer[offset] < '0' || pointer[offset] > '9')) {
|
|
// the index of an array must be an integer
|
|
// we also make sure std::stoi won't discard whitespaces later
|
|
return false;
|
|
}
|
|
if (pointer[offset] == '~') {
|
|
// "~1" represents "/"
|
|
if (pointer[offset + 1] == '1') {
|
|
key_or_index += '/';
|
|
offset++;
|
|
continue;
|
|
}
|
|
// "~0" represents "~"
|
|
if (pointer[offset + 1] == '0') {
|
|
key_or_index += '~';
|
|
offset++;
|
|
continue;
|
|
}
|
|
}
|
|
if (pointer[offset] == '\\') {
|
|
if (pointer[offset + 1] == '\\' || pointer[offset + 1] == '"' ||
|
|
(pointer[offset + 1] <= 0x1F)) {
|
|
key_or_index += pointer[offset + 1];
|
|
offset++;
|
|
continue;
|
|
}
|
|
return false; // invalid escaped character
|
|
}
|
|
if (pointer[offset] == '\"') {
|
|
// unescaped quote character. this is an invalid case.
|
|
// lets do nothing and assume most pointers will be valid.
|
|
// it won't find any corresponding json key anyway.
|
|
// return false;
|
|
}
|
|
key_or_index += pointer[offset];
|
|
}
|
|
|
|
bool found = false;
|
|
if (is_object()) {
|
|
if (move_to_key(key_or_index.c_str(), key_or_index.length())) {
|
|
found = relative_move_to(pointer + offset, length - offset);
|
|
}
|
|
} else if (is_array()) {
|
|
if (key_or_index == "-") { // handling "-" case first
|
|
if (down()) {
|
|
while (next())
|
|
; // moving to the end of the array
|
|
// moving to the nonexistent value right after...
|
|
size_t npos;
|
|
if ((current_type == '[') || (current_type == '{')) {
|
|
// we need to jump
|
|
npos = (current_val & JSON_VALUE_MASK);
|
|
} else {
|
|
npos =
|
|
location + ((current_type == 'd' || current_type == 'l') ? 2 : 1);
|
|
}
|
|
location = npos;
|
|
current_val = pj.tape[npos];
|
|
current_type = (current_val >> 56);
|
|
return true; // how could it fail ?
|
|
}
|
|
} else { // regular numeric index
|
|
// The index can't have a leading '0'
|
|
if (key_or_index[0] == '0' && key_or_index.length() > 1) {
|
|
return false;
|
|
}
|
|
// it cannot be empty
|
|
if (key_or_index.length() == 0) {
|
|
return false;
|
|
}
|
|
// we already checked the index contains only valid digits
|
|
uint32_t index = std::stoi(key_or_index);
|
|
if (move_to_index(index)) {
|
|
found = relative_move_to(pointer + offset, length - offset);
|
|
}
|
|
}
|
|
}
|
|
|
|
return found;
|
|
}
|
|
} // namespace simdjson
|
|
/* end file src/parsedjsoniterator.cpp */
|