AVX minifier coded (first version).

This commit is contained in:
Daniel Lemire 2018-05-27 21:15:11 -04:00
parent fcc0391b58
commit 9cc00ab584
5 changed files with 67046 additions and 74 deletions

View File

@ -1,6 +1,6 @@
HEADERS:=include/avxprocessing.h include/benchmark.h include/common_defs.h include/jsonstruct.h include/scalarprocessing.h include/util.h
bench: benchmarks/bench.cpp rapidjson/license.txt $(HEADERS)
$(CXX) -std=c++11 -O3 -o $@ benchmarks/bench.cpp -Irapidjson/include -Iinclude -march=native -lm -Wall -Wextra
$(CXX) -std=c++11 -O3 -o $@ benchmarks/bench.cpp -Irapidjson/include -Iinclude -march=native -lm -Wall -Wextra -Wno-narrowing
rapidjson/license.txt:
git submodule update --init --recursive

View File

@ -1,4 +1,6 @@
#include "avxprocessing.h"
#include "avxminifier.h"
#include "benchmark.h"
#include "jsonstruct.h"
// #define RAPIDJSON_SSE2 // bad
@ -79,9 +81,11 @@ int main(int argc, char *argv[]) {
true);
rapidjson::Document d;
char * buffer = (char *) malloc(p.second);
memcpy(buffer, p.first, p.second);
buffer[p.second] = '\0';
BEST_TIME(d.Parse<kParseValidateEncodingFlag>((const char *)buffer).HasParseError(), false,
memcpy(buffer, p.first, p.second), repeat, volume, true);
BEST_TIME(d.Parse((const char *)buffer).HasParseError(), false,
@ -92,5 +96,15 @@ int main(int argc, char *argv[]) {
std::cout << "input length is "<< p.second << " stringified length is " << strlength << std::endl;
BEST_TIME_NOCHECK(rapidstringme((char*) p.first), , repeat, volume,
true);
memcpy(buffer, p.first, p.second);
size_t outlength = copy_without_useless_spaces((const uint8_t *)buffer, p.second,(uint8_t *) buffer);
printf("these should match: %zu %zu \n", strlength, outlength);
uint8_t * cbuffer = (uint8_t *)buffer;
BEST_TIME(copy_without_useless_spaces(cbuffer, p.second,cbuffer), outlength,
memcpy(buffer, p.first, p.second), repeat, volume, true);
buffer[outlength] = '\0';
free(buffer);
}

View File

@ -0,0 +1,197 @@
#include <stdint.h>
#include <x86intrin.h>
#include "simdprune_tables.h"
// a straightforward comparison of a mask against input.
static uint64_t cmp_mask_against_input_mini(__m256i input_lo, __m256i input_hi,
__m256i mask) {
__m256i cmp_res_0 = _mm256_cmpeq_epi8(input_lo, mask);
uint64_t res_0 = (uint32_t)_mm256_movemask_epi8(cmp_res_0);
__m256 cmp_res_1 = _mm256_cmpeq_epi8(input_hi, mask);
uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1);
return res_0 | (res_1 << 32);
}
// take input from buf and remove useless whitespace, input and output can be
// the same
static inline size_t copy_without_useless_spaces(const uint8_t *buf, size_t len,
uint8_t *out) {
// Useful constant masks
const uint64_t even_bits = 0x5555555555555555ULL;
const uint64_t odd_bits = ~even_bits;
uint8_t *initout(out);
uint64_t prev_iter_ends_odd_backslash =
0ULL; // either 0 or 1, but a 64-bit value
uint64_t prev_iter_inside_quote = 0ULL; // either all zeros or all ones
size_t idx = 0;
if (len >= 64) {
size_t avxlen = len - 63;
for (; idx < avxlen; idx += 64) {
__m256i input_lo = _mm256_load_si256((const __m256i *)(buf + idx + 0));
__m256i input_hi = _mm256_load_si256((const __m256i *)(buf + idx + 32));
uint64_t bs_bits = cmp_mask_against_input_mini(input_lo, input_hi,
_mm256_set1_epi8('\\'));
uint64_t start_edges = bs_bits & ~(bs_bits << 1);
uint64_t even_start_mask = even_bits ^ prev_iter_ends_odd_backslash;
uint64_t even_starts = start_edges & even_start_mask;
uint64_t odd_starts = start_edges & ~even_start_mask;
uint64_t even_carries = bs_bits + even_starts;
uint64_t odd_carries;
bool iter_ends_odd_backslash =
__builtin_uaddll_overflow(bs_bits, odd_starts, &odd_carries);
odd_carries |= prev_iter_ends_odd_backslash;
prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL;
uint64_t even_carry_ends = even_carries & ~bs_bits;
uint64_t odd_carry_ends = odd_carries & ~bs_bits;
uint64_t even_start_odd_end = even_carry_ends & odd_bits;
uint64_t odd_start_even_end = odd_carry_ends & even_bits;
uint64_t odd_ends = even_start_odd_end | odd_start_even_end;
uint64_t quote_bits = cmp_mask_against_input_mini(input_lo, input_hi,
_mm256_set1_epi8('"'));
quote_bits = quote_bits & ~odd_ends;
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
quote_mask ^= prev_iter_inside_quote;
prev_iter_inside_quote = (uint64_t)((s64)quote_mask >> 63);
const __m256 low_nibble_mask = _mm256_setr_epi8(
// 0 9 a b c d
16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0, 16, 0, 0, 0, 0, 0,
0, 0, 0, 8, 12, 1, 2, 9, 0, 0);
const __m256 high_nibble_mask = _mm256_setr_epi8(
// 0 2 3 5 7
8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0, 8, 0, 18, 4, 0, 1, 0,
1, 0, 0, 0, 3, 2, 1, 0, 0);
__m256 whitespace_shufti_mask = _mm256_set1_epi8(0x18);
__m256 v_lo = _mm256_and_si256(
_mm256_shuffle_epi8(low_nibble_mask, input_lo),
_mm256_shuffle_epi8(high_nibble_mask,
_mm256_and_si256(_mm256_srli_epi32(input_lo, 4),
_mm256_set1_epi8(0x7f))));
__m256 v_hi = _mm256_and_si256(
_mm256_shuffle_epi8(low_nibble_mask, input_hi),
_mm256_shuffle_epi8(high_nibble_mask,
_mm256_and_si256(_mm256_srli_epi32(input_hi, 4),
_mm256_set1_epi8(0x7f))));
__m256 tmp_ws_lo = _mm256_cmpeq_epi8(
_mm256_and_si256(v_lo, whitespace_shufti_mask), _mm256_set1_epi8(0));
__m256 tmp_ws_hi = _mm256_cmpeq_epi8(
_mm256_and_si256(v_hi, whitespace_shufti_mask), _mm256_set1_epi8(0));
uint64_t ws_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_ws_lo);
uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32));
whitespace &= ~quote_mask;
int mask1 = whitespace & 0xFFFF;
int mask2 = (whitespace >> 16) & 0xFFFF;
int mask3 = (whitespace >> 32) & 0xFFFF;
int mask4 = (whitespace >> 48) & 0xFFFF;
// dumpbits(whitespace,"whitespace");
int pop1 = _popcnt64((~whitespace) & 0xFFFF);
int pop2 = _popcnt64((~whitespace) & UINT64_C(0xFFFFFFFF));
int pop3 = _popcnt64((~whitespace) & UINT64_C(0xFFFFFFFFFFFF));
int pop4 = _popcnt64((~whitespace));
__m256i vmask1 =
_mm256_loadu2_m128i((const __m128i *)mask128_epi8 + mask2,
(const __m128i *)mask128_epi8 + mask1);
__m256i vmask2 =
_mm256_loadu2_m128i((const __m128i *)mask128_epi8 + mask4,
(const __m128i *)mask128_epi8 + mask3);
__m256i result1 = _mm256_shuffle_epi8(input_lo, vmask1);
__m256i result2 = _mm256_shuffle_epi8(input_hi, vmask2);
_mm256_storeu2_m128i((__m128i *)(out + pop1), (__m128i *)out, result1);
_mm256_storeu2_m128i((__m128i *)(out + pop3), (__m128i *)(out + pop2),
result2);
out += pop4;
}
}
// we finish off the job... copying and pasting the code is not ideal here,
// but it gets the job done.
if (idx < len) {
uint8_t buffer[64];
memset(buffer, 0, 64);
memcpy(buffer, buf + idx, 64);
__m256i input_lo = _mm256_load_si256((const __m256i *)(buffer));
__m256i input_hi = _mm256_load_si256((const __m256i *)(buffer + 32));
uint64_t bs_bits =
cmp_mask_against_input_mini(input_lo, input_hi, _mm256_set1_epi8('\\'));
uint64_t start_edges = bs_bits & ~(bs_bits << 1);
uint64_t even_start_mask = even_bits ^ prev_iter_ends_odd_backslash;
uint64_t even_starts = start_edges & even_start_mask;
uint64_t odd_starts = start_edges & ~even_start_mask;
uint64_t even_carries = bs_bits + even_starts;
uint64_t odd_carries;
bool iter_ends_odd_backslash =
__builtin_uaddll_overflow(bs_bits, odd_starts, &odd_carries);
odd_carries |= prev_iter_ends_odd_backslash;
prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL;
uint64_t even_carry_ends = even_carries & ~bs_bits;
uint64_t odd_carry_ends = odd_carries & ~bs_bits;
uint64_t even_start_odd_end = even_carry_ends & odd_bits;
uint64_t odd_start_even_end = odd_carry_ends & even_bits;
uint64_t odd_ends = even_start_odd_end | odd_start_even_end;
uint64_t quote_bits =
cmp_mask_against_input_mini(input_lo, input_hi, _mm256_set1_epi8('"'));
quote_bits = quote_bits & ~odd_ends;
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
quote_mask ^= prev_iter_inside_quote;
prev_iter_inside_quote = (uint64_t)((s64)quote_mask >> 63);
const __m256 low_nibble_mask = _mm256_setr_epi8(
// 0 9 a b c d
16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0, 16, 0, 0, 0, 0, 0, 0,
0, 0, 8, 12, 1, 2, 9, 0, 0);
const __m256 high_nibble_mask = _mm256_setr_epi8(
// 0 2 3 5 7
8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0, 8, 0, 18, 4, 0, 1, 0,
1, 0, 0, 0, 3, 2, 1, 0, 0);
__m256 whitespace_shufti_mask = _mm256_set1_epi8(0x18);
__m256 v_lo = _mm256_and_si256(
_mm256_shuffle_epi8(low_nibble_mask, input_lo),
_mm256_shuffle_epi8(high_nibble_mask,
_mm256_and_si256(_mm256_srli_epi32(input_lo, 4),
_mm256_set1_epi8(0x7f))));
__m256 v_hi = _mm256_and_si256(
_mm256_shuffle_epi8(low_nibble_mask, input_hi),
_mm256_shuffle_epi8(high_nibble_mask,
_mm256_and_si256(_mm256_srli_epi32(input_hi, 4),
_mm256_set1_epi8(0x7f))));
__m256 tmp_ws_lo = _mm256_cmpeq_epi8(
_mm256_and_si256(v_lo, whitespace_shufti_mask), _mm256_set1_epi8(0));
__m256 tmp_ws_hi = _mm256_cmpeq_epi8(
_mm256_and_si256(v_hi, whitespace_shufti_mask), _mm256_set1_epi8(0));
uint64_t ws_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_ws_lo);
uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32));
whitespace &= ~quote_mask;
//
if (len - idx < 64) {
whitespace |= UINT64_C(0xFFFFFFFFFFFFFFFF) << (len - idx);
}
int mask1 = whitespace & 0xFFFF;
int mask2 = (whitespace >> 16) & 0xFFFF;
int mask3 = (whitespace >> 32) & 0xFFFF;
int mask4 = (whitespace >> 48) & 0xFFFF;
// dumpbits(whitespace,"whitespace");
int pop1 = _popcnt64((~whitespace) & 0xFFFF);
int pop2 = _popcnt64((~whitespace) & UINT64_C(0xFFFFFFFF));
int pop3 = _popcnt64((~whitespace) & UINT64_C(0xFFFFFFFFFFFF));
int pop4 = _popcnt64((~whitespace));
__m256i vmask1 = _mm256_loadu2_m128i((const __m128i *)mask128_epi8 + mask2,
(const __m128i *)mask128_epi8 + mask1);
__m256i vmask2 = _mm256_loadu2_m128i((const __m128i *)mask128_epi8 + mask4,
(const __m128i *)mask128_epi8 + mask3);
__m256i result1 = _mm256_shuffle_epi8(input_lo, vmask1);
__m256i result2 = _mm256_shuffle_epi8(input_hi, vmask2);
_mm256_storeu2_m128i((__m128i *)(out + pop1), (__m128i *)out, result1);
_mm256_storeu2_m128i((__m128i *)(out + pop3), (__m128i *)(out + pop2),
result2);
out += pop4;
}
return out - initout;
}

View File

@ -15,10 +15,8 @@
#include "jsonstruct.h"
using namespace std;
// a straightforward comparison of a mask against input. 5 uops; would be cheaper in AVX512.
static inline u64 cmp_mask_against_input(m256 input_lo, m256 input_hi, m256 mask) {
static u64 cmp_mask_against_input(m256 input_lo, m256 input_hi, m256 mask) {
m256 cmp_res_0 = _mm256_cmpeq_epi8(input_lo, mask);
u64 res_0 = (u32)_mm256_movemask_epi8(cmp_res_0);
m256 cmp_res_1 = _mm256_cmpeq_epi8(input_hi, mask);
@ -26,7 +24,7 @@ static inline u64 cmp_mask_against_input(m256 input_lo, m256 input_hi, m256 mask
return res_0 | (res_1 << 32);
}
static bool find_structural_bits(const u8 * buf, size_t len, ParsedJson & pj) {
static bool find_structural_bits(const u8 * buf, size_t len, ParsedJson & pj) {
// Useful constant masks
const u64 even_bits = 0x5555555555555555ULL;
const u64 odd_bits = ~even_bits;
@ -37,9 +35,10 @@ static bool find_structural_bits(const u8 * buf, size_t len, ParsedJson & pj) {
// persistent state across loop
u64 prev_iter_ends_odd_backslash = 0ULL; // either 0 or 1, but a 64-bit value
u64 prev_iter_inside_quote = 0ULL; // either all zeros or all ones
u64 prev_iter_pseudo_structural_carry = 0ULL;
u64 prev_iter_ends_pseudo_pred = 0ULL;
for (size_t idx = 0; idx < len; idx+=64) {
__builtin_prefetch(buf + idx + 128);
m256 input_lo = _mm256_load_si256((const m256 *)(buf + idx + 0));
m256 input_hi = _mm256_load_si256((const m256 *)(buf + idx + 32));
@ -66,10 +65,8 @@ static bool find_structural_bits(const u8 * buf, size_t len, ParsedJson & pj) {
// if we had an odd-numbered run at the end of
// the previous iteration
prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL;
u64 even_carry_ends = even_carries & ~bs_bits;
u64 odd_carry_ends = odd_carries & ~bs_bits;
u64 even_start_odd_end = even_carry_ends & odd_bits;
u64 odd_start_even_end = odd_carry_ends & even_bits;
@ -136,40 +133,30 @@ static bool find_structural_bits(const u8 * buf, size_t len, ParsedJson & pj) {
u64 ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
u64 whitespace = ~(ws_res_0 | (ws_res_1 << 32));
// mask off anything inside quotes
structurals &= ~quote_mask;
// whitespace inside our quotes also doesn't count; otherwise " foo" would generate a spurious
// pseudo-structural-character at 'foo'
whitespace &= ~quote_mask;
// add the real quote bits back into our bitmask as well, so we can
// quickly traverse the strings we've spent all this trouble gathering
structurals |= quote_bits;
// Now, establish "pseudo-structural characters". These are characters that follow a structural
// character followed by zero or more whitespace
// this allows us to discover true/false/null and numbers in any location where they might legally
// occur; it will also create another 'checkpoint' where if a non-quoted region of our input
// has whitespace after a structural character fullowed by a syntax error, we can detect this
// and get an error in a later stage (i.e. the state machine)
// Now, establish "pseudo-structural characters". These are non-whitespace characters
// that are (a) outside quotes and (b) have a predecessor that's either whitespace or a structural
// character. This means that subsequent passes will get a chance to encounter the first character
// of every string of non-whitespace and, if we're parsing an atom like true/false/null or a number
// we can stop at the first whitespace or structural character following it.
// Slightly more painful than it would seem. It's possible that either structurals or whitespace are
// all 1s (e.g. {{{{{{{....{{{{x64, or a really long whitespace). As such there is no safe place
// to add a '1' from the previous iteration without *that* triggering the carry we are looking
// out for, so we must check both carries for overflow
u64 tmp = structurals | whitespace;
u64 tmp2;
bool ps_carry = __builtin_uaddll_overflow(tmp, structurals, &tmp2);
u64 tmp3;
ps_carry = ps_carry | __builtin_uaddll_overflow(tmp2, prev_iter_pseudo_structural_carry, &tmp3);
prev_iter_pseudo_structural_carry = ps_carry ? 0x1ULL : 0x0ULL;
tmp3 &= ~quote_mask;
tmp3 &= ~whitespace;
structurals |= tmp3;
// a qualified predecessor is something that can happen 1 position before an
// psuedo-structural character
u64 pseudo_pred = structurals | whitespace;
u64 shifted_pseudo_pred = (pseudo_pred << 1) | prev_iter_ends_pseudo_pred;
prev_iter_ends_pseudo_pred = pseudo_pred >> 63;
u64 pseudo_structurals = shifted_pseudo_pred & (~whitespace) & (~quote_mask);
structurals |= pseudo_structurals;
// now, we've used our close quotes all we need to. So let's switch them off
// they will be off in the quote mask and on in quote bits.
structurals &= ~(quote_bits & ~quote_mask);
*(u64 *)(pj.structurals + idx/8) = structurals;
}
return true;
@ -191,7 +178,7 @@ static bool flatten_indexes(size_t len, ParsedJson & pj) {
u32 cnt = __builtin_popcountll(s);
u32 next_base = base + cnt;
while (s) {
// spoil the suspense
// spoil the suspense by reducing dependency chains; actually a win even with cost of pdep
u64 s3 = _pdep_u64(~0x7ULL, s); // s3 will have bottom 3 1-bits unset
u64 s5 = _pdep_u64(~0x1fULL, s); // s5 will have bottom 5 1-bits unset
@ -202,63 +189,664 @@ static bool flatten_indexes(size_t len, ParsedJson & pj) {
base_ptr[base+4] = (u32)idx + __builtin_ctzll(s4); //u64 s5 = s4 & (s4 - 1ULL);
base_ptr[base+5] = (u32)idx + __builtin_ctzll(s5); u64 s6 = s5 & (s5 - 1ULL);
base_ptr[base+6] = (u32)idx + __builtin_ctzll(s6); u64 s7 = s6 & (s6 - 1ULL);
s = s7;
base += 7;
s = s6;
base += 6;
}
base = next_base;
}
pj.n_structural_indexes = base;
base_ptr[pj.n_structural_indexes] = 0; // make it safe to dereference one beyond this array
return true;
}
// Parse our json given a big array of 32-bit integers telling us where
// the interesting stuff is
static bool json_parse(const u8 * buf, UNUSED size_t len, ParsedJson & pj) {
u32 last; // index of previous structure at this level or 0 if none
u32 up; // index of structure that contains this one
JsonNode * nodes = pj.nodes;
const u32 MAX_DEPTH = 256;
JsonNode & dummy = nodes[DUMMY_NODE];
JsonNode & root = nodes[ROOT_NODE];
dummy.prev = dummy.up = DUMMY_NODE;
root.prev = DUMMY_NODE;
root.up = ROOT_NODE;
last = up = ROOT_NODE;
// the ape machine consists of two parts:
//
// 1) The "state machine", which is a multiple channel per-level state machine
// It is a conventional DFA except in that it 'changes track' on {}[] characters
//
// 2) The "tape machine": this records offsets of various structures as they go by
// These structures are either u32 offsets of other tapes or u32 offsets into our input
// or structures.
//
// The state machine doesn't record ouput.
// The tape machine doesn't validate.
//
// The output of the tape machine is meaningful only if the state machine is in non-error states.
// depth adjustment is strictly based on whether we are {[ or }]
// depth adjustment is a pre-increment which, in effect, means that a {[ contained in an object
// is in the level one deeper, while the corresponding }] is at the level
// TAPE MACHINE DEFINITIONS
const u32 DEPTH_PLUS_ONE = 0x01000000;
const u32 DEPTH_ZERO = 0x00000000;
const u32 DEPTH_MINUS_ONE = 0xff000000;
const u32 WRITE_ZERO = 0x0;
const u32 WRITE_FOUR = 0x1;
const u32 CDF = DEPTH_ZERO | WRITE_ZERO; // default 'control'
const u32 C04 = DEPTH_ZERO | WRITE_FOUR;
const u32 CP4 = DEPTH_PLUS_ONE | WRITE_FOUR;
const u32 CM4 = DEPTH_MINUS_ONE | WRITE_FOUR;
inline s8 get_depth_adjust(u32 control) { return (s8)(((s32)control) >> 24); }
inline size_t get_write_size(u32 control) { return control & 0xff; }
const u32 char_control[256] = {
// nothing interesting from 0x00-0x20
CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF,
CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF,
// " is 0x22, - is 0x2d
CDF,CDF,C04,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,C04,CDF,CDF,
// numbers are 0x30-0x39
C04,C04,C04,C04, C04,C04,C04,C04, C04,C04,CDF,CDF, CDF,CDF,CDF,CDF,
// nothing interesting from 0x40-0x49
CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF,
// 0x5b/5d are []
CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CP4, CDF,CM4,CDF,CDF,
// f is 0x66 n is 0x6e
CDF,CDF,CDF,CDF, CDF,CDF,C04,CDF, CDF,CDF,CDF,CDF, CDF,CDF,C04,CDF,
// 0x7b/7d are {}, 74 is t
CDF,CDF,CDF,CDF, C04,CDF,CDF,CDF, CDF,CDF,CDF,CP4, CDF,CM4,CDF,CDF,
// nothing interesting from 0x80-0xff
CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF,
CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF,
CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF,
CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF,
CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF,
CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF,
CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF,
CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF, CDF,CDF,CDF,CDF
};
const size_t MAX_TAPE_ENTRIES = 127*1024;
const size_t MAX_TAPE = MAX_DEPTH * MAX_TAPE_ENTRIES;
// all of this stuff needs to get moved somewhere reasonable
// like our ParsedJson structure
u32 tape[MAX_TAPE];
u32 tape_locs[MAX_DEPTH];
u8 string_buf[512*1024];
u8 * current_string_buf_loc;
u8 number_buf[512*1024]; // holds either doubles or longs, really
u8 * current_number_buf_loc;
// STATE MACHINE DECLARATIONS
const u32 MAX_STATES = 16;
u32 trans[MAX_STATES][256];
u32 states[MAX_DEPTH];
const int START_STATE = 1;
never_inline void init_state_machine() {
// states 10 and 6 eliminated
trans[ 1]['{'] = 2;
trans[ 2]['"'] = 4;
trans[ 4][':'] = 5;
// 5->7 on all values ftn0123456789-"
trans[ 7][','] = 8;
trans[ 8]['"'] = 4;
trans[ 1]['['] = 9;
// 9->11 on all values ftn0123456789-"
trans[11][','] = 12;
// 12->11 on all values ftn0123456789-"
const char * UNARIES = "}]ftn0123456789-\"";
for (u32 i = 0; i < strlen(UNARIES); i++) {
trans[ 5][(u32)UNARIES[i]] = 7;
trans[ 9][(u32)UNARIES[i]] = 11;
trans[12][(u32)UNARIES[i]] = 11;
}
// back transitions when new things are open
trans[2]['{'] = 2;
trans[7]['{'] = 2;
trans[9]['{'] = 2;
trans[11]['{'] = 2;
trans[2]['['] = 9;
trans[7]['['] = 9;
trans[9]['['] = 9;
trans[11]['['] = 9;
}
static bool ape_machine(const u8 * buf, UNUSED size_t len, ParsedJson & pj) {
// NOTE - our depth is used by both the tape machine and the state machine
// Further, in production we will set it to a largish value in a generous buffer as a rogue input
// could consist of many {[ characters or many }] characters. We aren't busily checking errors
// (and in fact, a aggressive sequence of [ characters is actually valid input!) so something that
// blows out maximum depth will need to be periodically checked for, as will something that tries
// to set depth very low. If we set our starting depth, say, to 256, we can tolerate 256 bogus close brace
// characters without aggressively going wrong and writing to bad memory
// Note that any specious depth can have a specious tape associated with and all these specious depths
// can share a region of the tape - it's harmless. Since tape is one-way, any movement in a specious tape
// is an error (so we can detect max_depth violations by making sure that specious tape locations haven't
// moved from their starting values)
u32 depth = 1;
for (u32 i = 0; i < MAX_DEPTH; i++) {
tape_locs[i] = i*MAX_TAPE_ENTRIES;
states[i] = START_STATE;
}
current_string_buf_loc = string_buf;
current_number_buf_loc = number_buf;
u32 error_sump = 0;
u32 old_tape_loc = tape_locs[depth]; // need to initialize for first write
u32 next_idx = pj.structural_indexes[0];
u8 next_c = buf[next_idx];
u32 next_control = char_control[next_c];
for (u32 i = NUM_RESERVED_NODES; i < pj.n_structural_indexes; i++) {
u32 idx = pj.structural_indexes[i];
JsonNode & n = nodes[i];
u8 c = buf[idx];
if (unlikely((c & 0xdf) == 0x5b)) { // meaning 7b or 5b, { or [
// open a scope
n.prev = last;
n.up = up;
up = i;
last = 0;
} else if (unlikely((c & 0xdf) == 0x5d)) { // meaning 7d or 5d, } or ]
// close a scope
n.prev = up;
n.up = pj.nodes[up].up;
up = pj.nodes[up].up;
last = i;
} else {
n.prev = last;
n.up = up;
last = i;
}
n.next = 0;
nodes[n.prev].next = i;
u32 idx = next_idx;
u8 c = next_c;
u32 control = next_control;
next_idx = pj.structural_indexes[i+1];
next_c = buf[next_idx];
next_control = char_control[next_c];
// TAPE MACHINE
s8 depth_adjust = get_depth_adjust(control);
u8 write_size = get_write_size(control);
u32 write_val = (depth_adjust != 0) ? old_tape_loc : idx;
depth += depth_adjust;
//states[depth] = trans[states[depth]][c];
// TAPE MACHINE, again
tape[tape_locs[depth]] = write_val | (c << 24); // hack. Assumes no more than 2^24 tape items and buffer size for now
old_tape_loc = tape_locs[depth] += write_size;
}
if (error_sump) {
return false;
}
dummy.next = DUMMY_NODE; // dummy.next is a sump for meaningless 'nexts', clear it
return true;
}
// they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c
// these go into the first 3 buckets of the comparison (1/2/4)
// we are also interested in the four whitespace characters
// space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d
const u32 structural_or_whitespace_negated[256] = {
1,1,1,1, 1,1,1,1, 1,0,0,1, 1,0,1,1,
1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1,
0,1,1,1, 1,1,1,1, 1,1,1,1, 0,1,1,1,
1,1,1,1, 1,1,1,1, 1,1,0,1, 1,1,1,1,
1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1,
1,1,1,1, 1,1,1,1, 1,1,1,0, 1,0,1,1,
1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1,
1,1,1,1, 1,1,1,1, 1,1,1,0, 1,0,1,1,
1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1,
1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1,
1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1,
1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1,
1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1,
1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1,
1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1,
1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1
};
// return non-zero if not a structural or whitespace char
// zero otherwise
really_inline u32 is_not_structural_or_whitespace(u8 c) {
return structural_or_whitespace_negated[c];
}
// These chars yield themselves: " \ /
// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab
// u not handled in this table as it's complex
const u8 escape_map[256] = {
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, //0x0.
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
0,0,0x22,0, 0,0,0,0, 0,0,0,0, 0,0,0,0x2f,
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, //0x4.
0,0,0,0, 0,0,0,0, 0,0,0,0, 0x5c,0,0,0, //0x5.
0,0,0x08,0, 0,0,0x12,0, 0,0,0,0, 0,0,0x0a,0, //0x6.
0,0,0x0d,0, 0x09,0,0,0, 0,0,0,0, 0,0,0,0, //0x7.
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
};
const u32 leading_zeros_to_utf_bytes[33] = {
1,
1, 1, 1, 1, 1, 1, 1, // 7 bits for first one
2, 2, 2, 2, // 11 bits for next
3, 3, 3, 3, 3, // 16 bits for next
4, 4, 4, 4, 4, // 21 bits for next
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; // error
const u32 UTF_PDEP_MASK[5] = {
0x00, // error
0x7f,
0x1f3f,
0x0f3f3f,
0x073f3f3f
};
const u32 UTF_OR_MASK[5] = {
0x00, // error
0x00,
0xc080,
0xe08080,
0xf0808080
};
bool is_hex_digit(u8 v) {
if (v >= '0' && v <= '9')
return true;
v &= 0xdf;
if (v >= 'A' && v <= 'F')
return true;
return false;
}
u8 digit_to_val(u8 v) {
if (v >= '0' && v <= '9')
return v - '0';
v &= 0xdf;
return v - 'A' + 10;
}
bool hex_to_u32(const u8 * src, u32 * res) {
u8 v1 = src[0];
u8 v2 = src[1];
u8 v3 = src[2];
u8 v4 = src[3];
if (!is_hex_digit(v1) || !is_hex_digit(v2) || !is_hex_digit(v3) || !is_hex_digit(v4)) {
return false;
}
*res = digit_to_val(v1) << 24 | digit_to_val(v2) << 16 | digit_to_val(v3) << 8 | digit_to_val(v4);
return true;
}
// handle a unicode codepoint
// write appropriate values into dest
// src will always advance 6 bytes
// dest will advance a variable amount (return via pointer)
// return true if the unicode codepoint was valid
// We work in little-endian then swap at write time
static bool handle_unicode_codepoint(const u8 ** src_ptr, u8 ** dst_ptr) {
u32 code_point = 0; // read the hex, potentially reading another \u beyond if it's a // wacky one
if (!hex_to_u32(*src_ptr + 2, &code_point)) {
return false;
}
*src_ptr += 6;
// check for the weirdo double-UTF-16 nonsense for things outside Basic Multilingual Plane.
if (code_point >= 0xd800 && code_point < 0xdc00) {
// TODO: sanity check and clean up; snippeted from RapidJSON and poorly understood at the moment
if (( (*src_ptr)[0] != '\\') || (*src_ptr)[1] != 'u') {
return false;
}
u32 code_point_2 = 0;
if (!hex_to_u32(*src_ptr + 2, &code_point_2)) {
return false;
}
if (code_point_2 < 0xdc00 || code_point_2 > 0xdfff) {
return false;
}
code_point = (((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000;
*src_ptr += 6;
}
// TODO: check to see whether the below code is nonsense (it's really only a sketch at this point)
u32 lz = __builtin_clz(code_point);
u32 utf_bytes = leading_zeros_to_utf_bytes[lz];
u32 tmp = _pdep_u32(code_point, UTF_PDEP_MASK[utf_bytes]) | UTF_OR_MASK[utf_bytes];
// swap and move to the other side of the register
tmp = __builtin_bswap32(tmp);
tmp >>= (4 - utf_bytes) * 8;
**(u32 **)dst_ptr = tmp;
*dst_ptr += utf_bytes;
return true;
}
static bool parse_string(const u8 * buf, UNUSED size_t len, UNUSED ParsedJson & pj, u32 tape_loc) {
u32 offset = tape[tape_loc] & 0xffffff;
const u8 * src = &buf[offset+1]; // we know that buf at offset is a "
u8 * dst = current_string_buf_loc;
// basic non-sexy parsing code
while (1) {
m256 v = _mm256_loadu_si256((const m256 *)(src));
u32 bs_bits = (u32)_mm256_movemask_epi8(_mm256_cmpeq_epi8(v, _mm256_set1_epi8('\\')));
u32 quote_bits = (u32)_mm256_movemask_epi8(_mm256_cmpeq_epi8(v, _mm256_set1_epi8('"')));
u32 quote_dist = __builtin_ctz(quote_bits);
u32 bs_dist = __builtin_ctz(bs_bits);
// store to dest unconditionally - we can overwrite the bits we don't like later
_mm256_storeu_si256((m256 *)(dst), v);
if (quote_dist < bs_dist) {
// we encountered quotes first. Move dst to point to quotes and exit
dst[quote_dist] = 0; // null terminate and get out
current_string_buf_loc = dst + quote_dist + 1;
tape[tape_loc] = ((u32)'"') << 24 | (current_string_buf_loc - string_buf); // assume 2^24 will hold all strings for now
return true;
} else if (quote_dist > bs_dist) {
u8 escape_char = src[bs_dist+1];
// we encountered backslash first. Handle backslash
if (escape_char == 'u') {
// move src/dst up to the start; they will be further adjusted
// within the unicode codepoint handling code.
src += bs_dist;
dst += bs_dist;
if (!handle_unicode_codepoint(&src, &dst)) {
return false;
}
return true;
} else {
// simple 1:1 conversion. Will eat bs_dist+2 characters in input and
// write bs_dist+1 characters to output
// note this may reach beyond the part of the buffer we've actually seen.
// I think this is ok
u8 escape_result = escape_map[escape_char];
if (!escape_result)
return false; // bogus escape value is an error
dst[bs_dist] = escape_result;
src += bs_dist+2;
dst += bs_dist+1;
}
} else {
// they are the same. Since they can't co-occur, it means we encountered neither.
src+=32;
dst+=32;
}
return true;
}
// later extensions -
// if \\ we could detect whether it's a substantial run of \ or just eat 2 chars and write 1
// handle anything short of \u or \\\ (as a prefix) with clever PSHUFB stuff and don't leave SIMD
return true;
}
// put a parsed version of number (either as a double or a signed long) into the number buffer,
// put a 'tag' indicating which type and where it is back onto the tape at that location
// return false if we can't parse the number which means either
// (a) the number isn't valid, or (b) the number is followed by something that isn't whitespace, comma or a close }] character
// which are the only things that should follow a number at this stage
// bools to detect what we found in our initial character already here - we are already
// switching on 0 vs 1-9 vs - so we may as well keep separate paths where that's useful
// TODO: see if we really need a separate number_buf or whether we should just
// have a generic scratch - would need to align before using for this
static bool parse_number(const u8 * buf, UNUSED size_t len, UNUSED ParsedJson & pj, u32 tape_loc, UNUSED bool found_zero, bool found_minus) {
u32 offset = tape[tape_loc] & 0xffffff;
if (found_minus) {
offset++;
}
const u8 * src = &buf[offset];
m256 v = _mm256_loadu_si256((const m256 *)(src));
u64 error_sump = 0;
// categories to extract
// Digits:
// 0 (0x30) - bucket 0
// 1-9 (never any distinction except if we didn't get the free kick at 0 due to the leading minus) (0x31-0x39) - bucket 1
// . (0x2e) - bucket 2
// E or e - no distinction (0x45/0x65) - bucket 3
// + (0x2b) - bucket 4
// - (0x2d) - bucket 4
// Terminators
// Whitespace: 0x20, 0x09, 0x0a, 0x0d - bucket 5+6
// Comma and the closes: 0x2c is comma, } is 0x5d, ] is 0x7d - bucket 5+7
// Another shufti - also a bit hand-hacked. Need to make a better construction
const m256 low_nibble_mask = _mm256_setr_epi8(
// 0 1 2 3 4 5 6 7 8 9 a b c d e f
33, 2, 2, 2, 2, 10, 2, 2, 2, 66, 64, 16, 32,208, 4, 0,
33, 2, 2, 2, 2, 10, 2, 2, 2, 66, 64, 16, 32,208, 4, 0
);
const m256 high_nibble_mask = _mm256_setr_epi8(
// 0 1 2 3 4 5 6 7 8 9 a b c d e f
64, 0, 52, 3, 8,128, 8,128, 0, 0, 0, 0, 0, 0, 0, 0,
64, 0, 52, 3, 8,128, 8,128, 0, 0, 0, 0, 0, 0, 0, 0
);
m256 tmp = _mm256_and_si256(
_mm256_shuffle_epi8(low_nibble_mask, v),
_mm256_shuffle_epi8(high_nibble_mask,
_mm256_and_si256(_mm256_srli_epi32(v, 4), _mm256_set1_epi8(0x7f))));
m256 enders_mask = _mm256_set1_epi8(0xe0);
m256 tmp_enders = _mm256_cmpeq_epi8(_mm256_and_si256(tmp, enders_mask),
_mm256_set1_epi8(0));
u32 enders = ~(u32)_mm256_movemask_epi8(tmp_enders);
if (enders == 0) {
// TODO: scream for help if enders == 0 which means we have
// a heroically long number string or some garbage
}
// TODO: make a mask that indicates where our digits are
u32 number_mask = ~enders & (enders-1);
m256 n_mask = _mm256_set1_epi8(0x1f);
m256 tmp_n = _mm256_cmpeq_epi8(_mm256_and_si256(tmp, n_mask),
_mm256_set1_epi8(0));
u32 number_characters = ~(u32)_mm256_movemask_epi8(tmp_n);
// put something into our error sump if we have something
// before our ending characters that isn't a valid character
// for the inside of our JSON
number_characters &= number_mask;
error_sump |= number_characters ^ number_mask;
m256 d_mask = _mm256_set1_epi8(0x03);
m256 tmp_d = _mm256_cmpeq_epi8(_mm256_and_si256(tmp, d_mask),
_mm256_set1_epi8(0));
u32 digit_characters = ~(u32)_mm256_movemask_epi8(tmp_d);
digit_characters &= number_mask;
m256 p_mask = _mm256_set1_epi8(0x04);
m256 tmp_p = _mm256_cmpeq_epi8(_mm256_and_si256(tmp, p_mask),
_mm256_set1_epi8(0));
u32 decimal_characters = ~(u32)_mm256_movemask_epi8(tmp_p);
decimal_characters &= number_mask;
m256 e_mask = _mm256_set1_epi8(0x08);
m256 tmp_e = _mm256_cmpeq_epi8(_mm256_and_si256(tmp, e_mask),
_mm256_set1_epi8(0));
u32 exponent_characters = ~(u32)_mm256_movemask_epi8(tmp_e);
exponent_characters &= number_mask;
m256 s_mask = _mm256_set1_epi8(0x10);
m256 tmp_s = _mm256_cmpeq_epi8(_mm256_and_si256(tmp, s_mask),
_mm256_set1_epi8(0));
u32 sign_characters = ~(u32)_mm256_movemask_epi8(tmp_s);
sign_characters &= number_mask;
u32 digit_edges = ~(digit_characters << 1) & digit_characters;
// check that we have 1-3 'edges' only
u32 t = digit_edges;
t &= t-1; t &= t-1; t &= t-1;
error_sump |= t;
// check that we start with a digit
error_sump |= ~digit_characters & 0x1;
// having done some checks, get lazy and fall back
// to strtoll or strtod
// TODO: handle the easy cases ourselves; these are
// expensive and we've done a lot of the prepwork.
// return errors if strto* fail, otherwise fill in a code on the tape
// 'd' for floating point and 'l' for long and put a pointer to the
// spot in the buffer.
if (__builtin_popcount(digit_edges) == 1) {
// try a strtoll
char * end;
u64 result = strtoll((const char *)src, &end, 10);
if ((errno != 0) || (end == (const char *)src)) {
error_sump |= 1;
}
error_sump |= is_not_structural_or_whitespace(*end);
if (found_minus) {
result = -result;
}
*((u64 *)current_number_buf_loc) = result;
tape[tape_loc] = ((u32)'l') << 24 | (current_number_buf_loc - number_buf); // assume 2^24 will hold all numbers for now
current_number_buf_loc += 8;
} else {
// try a strtod
char * end;
double result = strtod((const char *)src, &end);
if ((errno != 0) || (end == (const char *)src)) {
error_sump |= 1;
}
error_sump |= is_not_structural_or_whitespace(*end);
if (found_minus) {
result = -result;
}
*((double *)current_number_buf_loc) = result;
tape[tape_loc] = ((u32)'d') << 24 | (current_number_buf_loc - number_buf); // assume 2^24 will hold all numbers for now
current_number_buf_loc += 8;
}
// TODO: check the MSB element is a digit
// TODO: a whole bunch of checks
// TODO: <=1 decimal point, eE mark, +- construct
// TODO: first and last character in mask region must be
// digit
// TODO: if it exists,
// Decimal point is after the first cluster of numbers only
// and before the second cluster of numbers only. It must
// be digit_or_zero . digit_or_zero strictly
// TODO: eE mark and +- construct are adjacent with eE first
// eE mark preceeds final cluster of numbers only
// and immediately follows second-last cluster of numbers only (not
// necessarily second, as we may have 4e10).
// it may suffice to insist that eE is preceeded immediately
// by a digit of any kind and that it's followed locally by
// a digit immediately or a +- construct then a digit.
// TODO: if we have both . and the eE mark then the . must
// precede the eE mark
// TODO: if first character is a zero (we know in advance except for -0)
// second char must be . or eE.
if (error_sump)
return true;
return true;
}
static bool shovel_machine(const u8 * buf, size_t len, ParsedJson & pj) {
// fixup the mess made by the ape_machine
// as such it does a bunch of miscellaneous things on the tapes
u32 error_sump = 0;
u64 tv = *(const u64 *)"true ";
u64 nv = *(const u64 *)"null ";
u64 fv = *(const u64 *)"false ";
u64 mask4 = 0x00000000ffffffff;
u64 mask5 = 0x000000ffffffffff;
// walk over each tape
for (u32 i = 0; i < MAX_DEPTH; i++) {
u32 start_loc = i*MAX_TAPE_ENTRIES;
u32 end_loc = tape_locs[i];
for (u32 j = start_loc; j < end_loc; j++) {
switch (tape[j]>>24) {
case '{': case '[': {
// pivot our tapes
// point the enclosing structural char (}]) to the head marker ({[) and
// put the end of the sequence on the tape at the head marker
// we start with head marker pointing at the enclosing structural char
// and the enclosing structural char pointing at the end. Just swap them.
// also check the balanced-{} or [] property here
u8 head_marker_c = tape[j] >> 24;
u32 head_marker_loc = tape[j] & 0xffffff;
u32 tape_enclosing = tape[head_marker_loc];
u8 enclosing_c = tape_enclosing >> 24;
tape[head_marker_loc] = tape[j];
tape[j] = tape_enclosing;
error_sump |= (enclosing_c - head_marker_c - 2); // [] and {} only differ by 2 chars
break;
}
case '"': {
error_sump |= !parse_string(buf, len, pj, j);
break;
}
case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
error_sump |= !parse_number(buf, len, pj, j, false, false);
break;
case '0':
error_sump |= !parse_number(buf, len, pj, j, true, false);
break;
case '-':
error_sump |= !parse_number(buf, len, pj, j, false, true);
break;
case 't': {
u32 offset = tape[j] & 0xffffff;
const u8 * loc = buf + offset;
error_sump |= ((*(const u64 *)loc) & mask4) ^ tv;
error_sump |= is_not_structural_or_whitespace(loc[4]);
break;
}
case 'f': {
u32 offset = tape[j] & 0xffffff;
const u8 * loc = buf + offset;
error_sump |= ((*(const u64 *)loc) & mask5) ^ fv;
error_sump |= is_not_structural_or_whitespace(loc[5]);
break;
}
case 'n': {
u32 offset = tape[j] & 0xffffff;
const u8 * loc = buf + offset;
error_sump |= ((*(const u64 *)loc) & mask4) ^ nv;
error_sump |= is_not_structural_or_whitespace(loc[4]);
break;
}
default:
break;
}
}
}
if (error_sump) {
return false;
}
return true;
}
static bool avx_json_parse(const u8 * buf, size_t len, ParsedJson & pj) {
find_structural_bits(buf, len, pj);
flatten_indexes(len, pj);
json_parse(buf, len, pj);
return true;
return ape_machine(buf, len, pj) && shovel_machine(buf, len, pj);
}

File diff suppressed because it is too large Load Diff