625 lines
24 KiB
C++
625 lines
24 KiB
C++
#include <iostream>
|
|
#include <iomanip>
|
|
#include <chrono>
|
|
#include <fstream>
|
|
#include <sstream>
|
|
#include <string>
|
|
#include <cstring>
|
|
#include <vector>
|
|
#include <set>
|
|
#include <map>
|
|
#include <algorithm>
|
|
#include <x86intrin.h>
|
|
#include <assert.h>
|
|
#include "common_defs.h"
|
|
|
|
using namespace std;
|
|
|
|
//#define DEBUG
|
|
|
|
#ifdef DEBUG
|
|
inline void dump256(m256 d, string msg) {
|
|
for (u32 i = 0; i < 32; i++) {
|
|
cout << setw(3) << (int)*(((u8 *)(&d)) + i);
|
|
if (!((i+1)%8))
|
|
cout << "|";
|
|
else if (!((i+1)%4))
|
|
cout << ":";
|
|
else
|
|
cout << " ";
|
|
}
|
|
cout << " " << msg << "\n";
|
|
}
|
|
|
|
// dump bits low to high
|
|
void dumpbits(u64 v, string msg) {
|
|
for (u32 i = 0; i < 64; i++) {
|
|
std::cout << (((v>>(u64)i) & 0x1ULL) ? "1" : "_");
|
|
}
|
|
cout << " " << msg << "\n";
|
|
}
|
|
#else
|
|
#define dump256(a,b) ;
|
|
#define dumpbits(a,b) ;
|
|
#endif
|
|
|
|
// get a corpus; pad out to cache line so we can always use SIMD
|
|
pair<u8 *, size_t> get_corpus(string filename) {
|
|
ifstream is(filename, ios::binary);
|
|
if (is) {
|
|
stringstream buffer;
|
|
buffer << is.rdbuf();
|
|
size_t length = buffer.str().size();
|
|
char * aligned_buffer;
|
|
if (posix_memalign( (void **)&aligned_buffer, 64, ROUNDUP_N(length, 64))) {
|
|
throw "Allocation failed";
|
|
};
|
|
memset(aligned_buffer, 0x20, ROUNDUP_N(length, 64));
|
|
memcpy(aligned_buffer, buffer.str().c_str(), length);
|
|
is.close();
|
|
return make_pair((u8 *)aligned_buffer, length);
|
|
}
|
|
throw "No corpus";
|
|
return make_pair((u8 *)0, (size_t)0);
|
|
}
|
|
|
|
struct JsonNode {
|
|
u32 next;
|
|
u32 next_type;
|
|
u64 payload; // a freeform 'payload' holding a parsed representation of *something*
|
|
};
|
|
|
|
struct ParsedJson {
|
|
u8 * structurals;
|
|
u32 n_structural_indexes;
|
|
u32 * structural_indexes;
|
|
JsonNode * nodes;
|
|
};
|
|
|
|
// a straightforward comparison of a mask against input. 5 uops; would be cheaper in AVX512.
|
|
really_inline u64 cmp_mask_against_input(m256 input_lo, m256 input_hi, m256 mask) {
|
|
m256 cmp_res_0 = _mm256_cmpeq_epi8(input_lo, mask);
|
|
u64 res_0 = (u32)_mm256_movemask_epi8(cmp_res_0);
|
|
m256 cmp_res_1 = _mm256_cmpeq_epi8(input_hi, mask);
|
|
u64 res_1 = _mm256_movemask_epi8(cmp_res_1);
|
|
return res_0 | (res_1 << 32);
|
|
}
|
|
|
|
never_inline bool find_structural_bits(const u8 * buf, size_t len, ParsedJson & pj) {
|
|
// Useful constant masks
|
|
const u64 even_bits = 0x5555555555555555ULL;
|
|
const u64 odd_bits = ~even_bits;
|
|
|
|
// for now, just work in 64-byte chunks
|
|
// we have padded the input out to 64 byte multiple with the remainder being zeros
|
|
|
|
// persistent state across loop
|
|
u64 prev_iter_ends_odd_backslash = 0ULL; // either 0 or 1, but a 64-bit value
|
|
u64 prev_iter_inside_quote = 0ULL; // either all zeros or all ones
|
|
u64 prev_iter_pseudo_structural_carry = 0ULL;
|
|
|
|
for (size_t idx = 0; idx < len; idx+=64) {
|
|
#ifdef DEBUG
|
|
cout << "Idx is " << idx << "\n";
|
|
for (u32 j = 0; j < 64; j++) {
|
|
char c = *(buf+idx+j);
|
|
if (isprint(c)) {
|
|
cout << c;
|
|
} else {
|
|
cout << '_';
|
|
}
|
|
}
|
|
cout << "| ... input\n";
|
|
#endif
|
|
m256 input_lo = _mm256_load_si256((const m256 *)(buf + idx + 0));
|
|
m256 input_hi = _mm256_load_si256((const m256 *)(buf + idx + 32));
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////
|
|
// Step 1: detect odd sequences of backslashes
|
|
////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
u64 bs_bits = cmp_mask_against_input(input_lo, input_hi, _mm256_set1_epi8('\\'));
|
|
dumpbits(bs_bits, "backslash bits");
|
|
u64 start_edges = bs_bits & ~(bs_bits << 1);
|
|
dumpbits(start_edges, "start_edges");
|
|
|
|
// flip lowest if we have an odd-length run at the end of the prior iteration
|
|
u64 even_start_mask = even_bits ^ prev_iter_ends_odd_backslash;
|
|
u64 even_starts = start_edges & even_start_mask;
|
|
u64 odd_starts = start_edges & ~even_start_mask;
|
|
|
|
dumpbits(even_starts, "even_starts");
|
|
dumpbits(odd_starts, "odd_starts");
|
|
|
|
u64 even_carries = bs_bits + even_starts;
|
|
|
|
u64 odd_carries;
|
|
// must record the carry-out of our odd-carries out of bit 63; this indicates whether the
|
|
// sense of any edge going to the next iteration should be flipped
|
|
bool iter_ends_odd_backslash = __builtin_uaddll_overflow(bs_bits, odd_starts, &odd_carries);
|
|
|
|
odd_carries |= prev_iter_ends_odd_backslash; // push in bit zero as a potential end
|
|
// if we had an odd-numbered run at the end of
|
|
// the previous iteration
|
|
prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL;
|
|
|
|
dumpbits(even_carries, "even_carries");
|
|
dumpbits(odd_carries, "odd_carries");
|
|
|
|
u64 even_carry_ends = even_carries & ~bs_bits;
|
|
u64 odd_carry_ends = odd_carries & ~bs_bits;
|
|
dumpbits(even_carry_ends, "even_carry_ends");
|
|
dumpbits(odd_carry_ends, "odd_carry_ends");
|
|
|
|
u64 even_start_odd_end = even_carry_ends & odd_bits;
|
|
u64 odd_start_even_end = odd_carry_ends & even_bits;
|
|
dumpbits(even_start_odd_end, "esoe");
|
|
dumpbits(odd_start_even_end, "osee");
|
|
|
|
u64 odd_ends = even_start_odd_end | odd_start_even_end;
|
|
dumpbits(odd_ends, "odd_ends");
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////
|
|
// Step 2: detect insides of quote pairs
|
|
////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
u64 quote_bits = cmp_mask_against_input(input_lo, input_hi, _mm256_set1_epi8('"'));
|
|
quote_bits = quote_bits & ~odd_ends;
|
|
dumpbits(quote_bits, "quote_bits");
|
|
u64 quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(_mm_set_epi64x(0ULL, quote_bits),
|
|
_mm_set1_epi8(0xFF), 0));
|
|
quote_mask ^= prev_iter_inside_quote;
|
|
prev_iter_inside_quote = (u64)((s64)quote_mask>>63);
|
|
dumpbits(quote_mask, "quote_mask");
|
|
|
|
// How do we build up a user traversable data structure
|
|
// first, do a 'shufti' to detect structural JSON characters
|
|
// they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c
|
|
// these go into the first 3 buckets of the comparison (1/2/4)
|
|
|
|
// we are also interested in the four whitespace characters
|
|
// space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d
|
|
// these go into the next 2 buckets of the comparison (8/16)
|
|
const m256 low_nibble_mask = _mm256_setr_epi8(
|
|
// 0 9 a b c d
|
|
16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0,
|
|
16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0
|
|
);
|
|
const m256 high_nibble_mask = _mm256_setr_epi8(
|
|
// 0 2 3 5 7
|
|
8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0,
|
|
8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0
|
|
);
|
|
|
|
m256 structural_shufti_mask = _mm256_set1_epi8(0x7);
|
|
m256 whitespace_shufti_mask = _mm256_set1_epi8(0x18);
|
|
|
|
m256 v_lo = _mm256_and_si256(
|
|
_mm256_shuffle_epi8(low_nibble_mask, input_lo),
|
|
_mm256_shuffle_epi8(high_nibble_mask,
|
|
_mm256_and_si256(_mm256_srli_epi32(input_lo, 4), _mm256_set1_epi8(0x7f))));
|
|
|
|
m256 v_hi = _mm256_and_si256(
|
|
_mm256_shuffle_epi8(low_nibble_mask, input_hi),
|
|
_mm256_shuffle_epi8(high_nibble_mask,
|
|
_mm256_and_si256(_mm256_srli_epi32(input_hi, 4), _mm256_set1_epi8(0x7f))));
|
|
m256 tmp_lo = _mm256_cmpeq_epi8(_mm256_and_si256(v_lo, structural_shufti_mask),
|
|
_mm256_set1_epi8(0));
|
|
m256 tmp_hi = _mm256_cmpeq_epi8(_mm256_and_si256(v_hi, structural_shufti_mask),
|
|
_mm256_set1_epi8(0));
|
|
|
|
u64 structural_res_0 = (u32)_mm256_movemask_epi8(tmp_lo);
|
|
u64 structural_res_1 = _mm256_movemask_epi8(tmp_hi);
|
|
u64 structurals = ~(structural_res_0 | (structural_res_1 << 32));
|
|
|
|
// this additional mask and transfer is non-trivially expensive, unfortunately
|
|
m256 tmp_ws_lo = _mm256_cmpeq_epi8(_mm256_and_si256(v_lo, whitespace_shufti_mask),
|
|
_mm256_set1_epi8(0));
|
|
m256 tmp_ws_hi = _mm256_cmpeq_epi8(_mm256_and_si256(v_hi, whitespace_shufti_mask),
|
|
_mm256_set1_epi8(0));
|
|
|
|
u64 ws_res_0 = (u32)_mm256_movemask_epi8(tmp_ws_lo);
|
|
u64 ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
|
|
u64 whitespace = ~(ws_res_0 | (ws_res_1 << 32));
|
|
|
|
dumpbits(structurals, "structurals");
|
|
dumpbits(whitespace, "whitespace");
|
|
|
|
// mask off anything inside quotes
|
|
structurals &= ~quote_mask;
|
|
|
|
// whitespace inside our quotes also doesn't count; otherwise " foo" would generate a spurious
|
|
// pseudo-structural-character at 'foo'
|
|
whitespace &= ~quote_mask;
|
|
|
|
// add the real quote bits back into our bitmask as well, so we can
|
|
// quickly traverse the strings we've spent all this trouble gathering
|
|
structurals |= quote_bits;
|
|
|
|
// Now, establish "pseudo-structural characters". These are characters that follow a structural
|
|
// character followed by zero or more whitespace
|
|
// this allows us to discover true/false/null and numbers in any location where they might legally
|
|
// occur; it will also create another 'checkpoint' where if a non-quoted region of our input
|
|
// has whitespace after a structural character fullowed by a syntax error, we can detect this
|
|
// and get an error in a later stage (i.e. the state machine)
|
|
|
|
// Slightly more painful than it would seem. It's possible that either structurals or whitespace are
|
|
// all 1s (e.g. {{{{{{{....{{{{x64, or a really long whitespace). As such there is no safe place
|
|
// to add a '1' from the previous iteration without *that* triggering the carry we are looking
|
|
// out for, so we must check both carries for overflow
|
|
|
|
u64 tmp = structurals | whitespace;
|
|
u64 tmp2;
|
|
bool ps_carry = __builtin_uaddll_overflow(tmp, structurals, &tmp2);
|
|
dumpbits(tmp2, "pseudo_structural add calculation first part");
|
|
u64 tmp3;
|
|
ps_carry = ps_carry | __builtin_uaddll_overflow(tmp2, prev_iter_pseudo_structural_carry, &tmp3);
|
|
prev_iter_pseudo_structural_carry = ps_carry ? 0x1ULL : 0x0ULL;
|
|
dumpbits(tmp3, "pseudo_structural add calculation after adding carry");
|
|
tmp3 &= ~quote_mask;
|
|
tmp3 &= ~whitespace;
|
|
dumpbits(tmp3, "pseudo_structural add calculation without quotes and whitespace");
|
|
dumpbits(structurals, "final structurals without quotes");
|
|
structurals |= tmp3;
|
|
dumpbits(structurals, "final structurals and pseudo structurals");
|
|
|
|
*(u64 *)(pj.structurals + idx/8) = structurals;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
const u32 NUM_RESERVED_NODES = 2;
|
|
const u32 DUMMY_NODE = 0;
|
|
const u32 ROOT_NODE = 1;
|
|
|
|
// just transform the bitmask to a big list of 32-bit integers for now
|
|
// that's all; the type of character the offset points to will
|
|
// tell us exactly what we need to know. Naive but straightforward implementation
|
|
never_inline bool flatten_indexes(size_t len, ParsedJson & pj) {
|
|
u32 base = NUM_RESERVED_NODES;
|
|
u32 * base_ptr = pj.structural_indexes;
|
|
base_ptr[DUMMY_NODE] = base_ptr[ROOT_NODE] = 0; // really shouldn't matter
|
|
for (size_t idx = 0; idx < len; idx+=64) {
|
|
u64 s = *(u64 *)(pj.structurals + idx/8);
|
|
#ifdef SUPPRESS_CHEESY_FLATTEN
|
|
while (s) {
|
|
base_ptr[base++] = (u32)idx + __builtin_ctzll(s); s &= s - 1ULL;
|
|
}
|
|
#else
|
|
u32 cnt = __builtin_popcountll(s);
|
|
u32 next_base = base + cnt;
|
|
while (s) {
|
|
// spoil the suspense
|
|
u64 s3 = _pdep_u64(~0x7ULL, s); // s3 will have bottom 3 1-bits unset
|
|
u64 s5 = _pdep_u64(~0x1fULL, s); // s5 will have bottom 5 1-bits unset
|
|
|
|
base_ptr[base+0] = (u32)idx + __builtin_ctzll(s); u64 s1 = s & (s - 1ULL);
|
|
base_ptr[base+1] = (u32)idx + __builtin_ctzll(s1); u64 s2 = s1 & (s1 - 1ULL);
|
|
base_ptr[base+2] = (u32)idx + __builtin_ctzll(s2); //u64 s3 = s2 & (s2 - 1ULL);
|
|
base_ptr[base+3] = (u32)idx + __builtin_ctzll(s3); u64 s4 = s3 & (s3 - 1ULL);
|
|
|
|
base_ptr[base+4] = (u32)idx + __builtin_ctzll(s4); //u64 s5 = s4 & (s4 - 1ULL);
|
|
base_ptr[base+5] = (u32)idx + __builtin_ctzll(s5); u64 s6 = s5 & (s5 - 1ULL);
|
|
base_ptr[base+6] = (u32)idx + __builtin_ctzll(s6); u64 s7 = s6 & (s6 - 1ULL);
|
|
s = s7;
|
|
base += 7;
|
|
}
|
|
base = next_base;
|
|
#endif
|
|
}
|
|
pj.n_structural_indexes = base;
|
|
return true;
|
|
}
|
|
|
|
|
|
const u32 MAX_DEPTH = 256;
|
|
|
|
// the ape machine consists of two parts:
|
|
//
|
|
// 1) The "state machine", which is a multiple channel per-level state machine
|
|
// It is a conventional DFA except in that it 'changes track' on {}[] characters
|
|
//
|
|
// 2) The "tape machine": this records offsets of various structures as they go by
|
|
// These structures are either u32 offsets of other tapes or u32 offsets into our input
|
|
// or structures.
|
|
//
|
|
// The state machine doesn't record ouput.
|
|
// The tape machine doesn't validate.
|
|
//
|
|
// The output of the tape machine is meaningful only if the state machine is in non-error states.
|
|
|
|
// depth adjustment is strictly based on whether we are {[ or }]
|
|
|
|
// depth adjustment is a pre-increment which, in effect, means that a {[ contained in an object
|
|
// is in the level one deeper, while the corresponding }] is at the level
|
|
|
|
|
|
// TAPE MACHINE DEFINITIONS
|
|
|
|
const u32 DEPTH_PLUS_ONE = 0x2;
|
|
const u32 DEPTH_ZERO = 0x1;
|
|
const u32 DEPTH_MINUS_ONE = 0x0;
|
|
const u32 TAKE_UPTAPE = 0x80000000;
|
|
const u32 TAKE_INDEX = 0x0;
|
|
const u32 WRITE_ZERO = 0x0;
|
|
const u32 WRITE_FOUR = 0x4;
|
|
const u32 WRITE_EIGHT = 0x8;
|
|
|
|
const u32 CDEF = DEPTH_ZERO | TAKE_INDEX | WRITE_ZERO;
|
|
const u32 C0I4 = DEPTH_ZERO | TAKE_INDEX | WRITE_FOUR;
|
|
const u32 C0I8 = DEPTH_ZERO | TAKE_INDEX | WRITE_FOUR;
|
|
const u32 CPI0 = DEPTH_PLUS_ONE | TAKE_INDEX | WRITE_ZERO;
|
|
const u32 CMU8 = DEPTH_MINUS_ONE | TAKE_UPTAPE | WRITE_EIGHT;
|
|
|
|
inline s8 get_depth_adjust(u32 control) { return (s8)(control&0x3) - 1; }
|
|
inline bool is_uptape(u32 control) { return (control & TAKE_UPTAPE); }
|
|
inline size_t get_write_size(u32 control) { return control & 12; }
|
|
|
|
const u32 char_control[256] = {
|
|
// nothing interesting from 0x00-0x20
|
|
CDEF,CDEF,CDEF,CDEF, CDEF,CDEF,CDEF,CDEF, CDEF,CDEF,CDEF,CDEF, CDEF,CDEF,CDEF,CDEF,
|
|
CDEF,CDEF,CDEF,CDEF, CDEF,CDEF,CDEF,CDEF, CDEF,CDEF,CDEF,CDEF, CDEF,CDEF,CDEF,CDEF,
|
|
|
|
// " is 0x22, - is 0x2d
|
|
CDEF,CDEF,C0I4,CDEF, CDEF,CDEF,CDEF,CDEF, CDEF,CDEF,CDEF,CDEF, CDEF,C0I8,CDEF,CDEF,
|
|
|
|
// numbers are 0x30-0x39
|
|
C0I8,C0I8,C0I8,C0I8, C0I8,C0I8,C0I8,C0I8, C0I8,C0I8,CDEF,CDEF, CDEF,CDEF,CDEF,CDEF,
|
|
|
|
// nothing interesting from 0x40-0x49
|
|
CDEF,CDEF,CDEF,CDEF, CDEF,CDEF,CDEF,CDEF, CDEF,CDEF,CDEF,CDEF, CDEF,CDEF,CDEF,CDEF,
|
|
|
|
// 0x5b/5d are []
|
|
CDEF,CDEF,CDEF,CDEF, CDEF,CDEF,CDEF,CDEF, CDEF,CDEF,CDEF,CPI0, CDEF,CMU8,CDEF,CDEF,
|
|
|
|
// nothing interesting from 0x60-0x69
|
|
CDEF,CDEF,CDEF,CDEF, CDEF,CDEF,CDEF,CDEF, CDEF,CDEF,CDEF,CDEF, CDEF,CDEF,CDEF,CDEF,
|
|
|
|
// 0x7b/7d are {}
|
|
CDEF,CDEF,CDEF,CDEF, CDEF,CDEF,CDEF,CDEF, CDEF,CDEF,CDEF,CPI0, CDEF,CMU8,CDEF,CDEF,
|
|
|
|
// nothing interesting from 0x80-0xff
|
|
CDEF,CDEF,CDEF,CDEF, CDEF,CDEF,CDEF,CDEF, CDEF,CDEF,CDEF,CDEF, CDEF,CDEF,CDEF,CDEF,
|
|
CDEF,CDEF,CDEF,CDEF, CDEF,CDEF,CDEF,CDEF, CDEF,CDEF,CDEF,CDEF, CDEF,CDEF,CDEF,CDEF,
|
|
CDEF,CDEF,CDEF,CDEF, CDEF,CDEF,CDEF,CDEF, CDEF,CDEF,CDEF,CDEF, CDEF,CDEF,CDEF,CDEF,
|
|
CDEF,CDEF,CDEF,CDEF, CDEF,CDEF,CDEF,CDEF, CDEF,CDEF,CDEF,CDEF, CDEF,CDEF,CDEF,CDEF,
|
|
CDEF,CDEF,CDEF,CDEF, CDEF,CDEF,CDEF,CDEF, CDEF,CDEF,CDEF,CDEF, CDEF,CDEF,CDEF,CDEF,
|
|
CDEF,CDEF,CDEF,CDEF, CDEF,CDEF,CDEF,CDEF, CDEF,CDEF,CDEF,CDEF, CDEF,CDEF,CDEF,CDEF,
|
|
CDEF,CDEF,CDEF,CDEF, CDEF,CDEF,CDEF,CDEF, CDEF,CDEF,CDEF,CDEF, CDEF,CDEF,CDEF,CDEF,
|
|
CDEF,CDEF,CDEF,CDEF, CDEF,CDEF,CDEF,CDEF, CDEF,CDEF,CDEF,CDEF, CDEF,CDEF,CDEF,CDEF
|
|
};
|
|
|
|
const size_t MAX_TAPE_ENTRIES = 1024*1024;
|
|
const size_t MAX_TAPE = MAX_DEPTH * MAX_TAPE_ENTRIES;
|
|
u32 tape[MAX_TAPE];
|
|
|
|
// STATE MACHINE DECLARATIONS
|
|
|
|
const u32 MAX_STATES = 16;
|
|
|
|
|
|
u32 trans[MAX_STATES][256];
|
|
u32 disallow_exit[MAX_STATES][256];
|
|
|
|
u32 states[MAX_DEPTH];
|
|
const int START_STATE = 1;
|
|
never_inline void init_state_machine() {
|
|
trans[ 1]['{'] = 2;
|
|
trans[ 2]['"'] = 3;
|
|
trans[ 3]['"'] = 4;
|
|
trans[ 4][':'] = 5;
|
|
trans[ 5]['"'] = 6;
|
|
trans[ 6]['"'] = 7;
|
|
// 5->7 on all unary values ftn0123456789-
|
|
trans[ 7][','] = 8;
|
|
trans[ 8]['"'] = 3;
|
|
|
|
trans[ 1]['['] = 9;
|
|
trans[ 9]['"'] = 10;
|
|
trans[10]['"'] = 11;
|
|
// 9->11 on all unary values ftn0123456789-
|
|
trans[11][','] = 12;
|
|
trans[12]['"'] = 10;
|
|
// 12->11 on all unary values ftn0123456789-
|
|
|
|
const char * UNARIES = "}]ftn0123456789-";
|
|
for (u32 i = 0; i < strlen(UNARIES); i++) {
|
|
trans[ 5][(u32)UNARIES[i]] = 7;
|
|
trans[ 9][(u32)UNARIES[i]] = 11;
|
|
trans[12][(u32)UNARIES[i]] = 11;
|
|
}
|
|
|
|
// back transitions when new things are open
|
|
trans[2]['{'] = 2;
|
|
trans[7]['{'] = 2;
|
|
trans[9]['{'] = 2;
|
|
trans[11]['{'] = 2;
|
|
trans[2]['['] = 9;
|
|
trans[7]['['] = 9;
|
|
trans[9]['['] = 9;
|
|
trans[11]['['] = 9;
|
|
|
|
// note - extra-linguistic stuff in the DFA
|
|
// when we are in 2/7 we are OK to see a } at the shallower depth
|
|
// when we are in 9/11 we are OK to see a ] at the shallower depth
|
|
// nothing else should be illegal through this mechanism
|
|
for (u32 i = 0; i < MAX_STATES; i++) {
|
|
if ((i != 2) && (i != 7))
|
|
disallow_exit[i]['}'] = 1;
|
|
if ((i != 9) && (i != 11))
|
|
disallow_exit[i][']'] = 1;
|
|
}
|
|
}
|
|
|
|
never_inline bool ape_machine(const u8 * buf, UNUSED size_t len, ParsedJson & pj) {
|
|
|
|
// NOTE - our depth is used by both the tape machine and the state machine
|
|
// Further, in production we will set it to a largish value in a generous buffer as a rogue input
|
|
// could consist of many {[ characters or many }] characters. We aren't busily checking errors
|
|
// (and in fact, a aggressive sequence of [ characters is actually valid input!) so something that
|
|
// blows out maximum depth will need to be periodically checked for, as will something that tries
|
|
// to set depth very low. If we set our starting depth, say, to 256, we can tolerate 256 bogus close brace
|
|
// characters without aggressively going wrong and writing to bad memory
|
|
// Note that any specious depth can have a specious tape associated with and all these specious depths
|
|
// can share a region of the tape - it's harmless. Since tape is one-way, any movement in a specious tape
|
|
// is an error (so we can detect max_depth violations by making sure that specious tape locations haven't
|
|
// moved from their starting values)
|
|
|
|
u32 depth = 1;
|
|
u32 tape_locs[MAX_DEPTH];
|
|
|
|
for (u32 i = 0; i < MAX_DEPTH; i++) {
|
|
tape_locs[i] = i*MAX_TAPE_ENTRIES;
|
|
states[i] = START_STATE;
|
|
}
|
|
|
|
u32 error_sump = 0;
|
|
u32 old_state = 0; // experimental
|
|
for (u32 i = NUM_RESERVED_NODES; i < pj.n_structural_indexes; i++) {
|
|
u32 idx = pj.structural_indexes[i];
|
|
u8 c = buf[idx];
|
|
#ifdef DEBUG
|
|
cout << "i: " << i << " idx: " << idx << " c " << c << "\n";
|
|
#endif
|
|
// TAPE MACHINE
|
|
|
|
u32 control = char_control[c];
|
|
s8 depth_adjust = get_depth_adjust(control);
|
|
bool take_uptape = is_uptape(control);
|
|
u8 write_size = get_write_size(control)/4;
|
|
depth += depth_adjust;
|
|
#ifdef DEBUG
|
|
cout << "TAPE MACHINE: depth change " << (s32)depth_adjust << " take_uptape: " << (u32)take_uptape
|
|
<< " write_size " << (u32)write_size << " current_depth: " << depth << "\n";
|
|
#endif
|
|
u32 uptape = tape_locs[depth+1];
|
|
tape[tape_locs[depth]] = take_uptape ? uptape : idx;
|
|
tape_locs[depth] += write_size;
|
|
|
|
// STATE MACHINE
|
|
#ifdef DEBUG
|
|
cout << "STATE MACHINE: error_sump: " << error_sump << " old state " << old_state << " disallowed_exit[old_state][c]: " << disallow_exit[old_state][c] << "\n";
|
|
cout << "STATE MACHINE: state[depth] pre " << states[depth] << " ";
|
|
#endif
|
|
error_sump |= disallow_exit[old_state][c];
|
|
old_state = states[depth] = trans[states[depth]][c];
|
|
#ifdef DEBUG
|
|
cout << "post " << states[depth] << "\n";
|
|
#endif
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
for (u32 i = 0; i < MAX_DEPTH; i++) {
|
|
u32 start_loc = i*MAX_TAPE_ENTRIES;
|
|
cout << " tape section i " << i << " from: " << start_loc
|
|
<< " to: " << tape_locs[i] << " "
|
|
<< " size: " << (tape_locs[i]-start_loc) << "\n";
|
|
cout << " state: " << states[i] << "\n";
|
|
/*
|
|
for (u32 j = start_loc; j < tape_locs[i]; j++) {
|
|
cout << "j: " << j << " tape[j]: " << tape[j] << "\n";
|
|
}
|
|
*/
|
|
}
|
|
#endif
|
|
if (error_sump) {
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// https://stackoverflow.com/questions/2616906/how-do-i-output-coloured-text-to-a-linux-terminal
|
|
namespace Color {
|
|
enum Code {
|
|
FG_DEFAULT = 39, FG_BLACK = 30, FG_RED = 31, FG_GREEN = 32,
|
|
FG_YELLOW = 33, FG_BLUE = 34, FG_MAGENTA = 35, FG_CYAN = 36,
|
|
FG_LIGHT_GRAY = 37, FG_DARK_GRAY = 90, FG_LIGHT_RED = 91,
|
|
FG_LIGHT_GREEN = 92, FG_LIGHT_YELLOW = 93, FG_LIGHT_BLUE = 94,
|
|
FG_LIGHT_MAGENTA = 95, FG_LIGHT_CYAN = 96, FG_WHITE = 97,
|
|
BG_RED = 41, BG_GREEN = 42, BG_BLUE = 44, BG_DEFAULT = 49
|
|
};
|
|
class Modifier {
|
|
Code code;
|
|
public:
|
|
Modifier(Code pCode) : code(pCode) {}
|
|
friend std::ostream&
|
|
operator<<(std::ostream& os, const Modifier& mod) {
|
|
return os << "\033[" << mod.code << "m";
|
|
}
|
|
};
|
|
}
|
|
|
|
void colorfuldisplay(ParsedJson & pj, const u8 * buf) {
|
|
Color::Modifier greenfg(Color::FG_GREEN);
|
|
Color::Modifier yellowfg(Color::FG_YELLOW);
|
|
Color::Modifier deffg(Color::FG_DEFAULT);
|
|
size_t i = 0;
|
|
// skip initial fluff
|
|
while((i+1< pj.n_structural_indexes) && (pj.structural_indexes[i]==pj.structural_indexes[i+1])){
|
|
i++;
|
|
}
|
|
for (; i < pj.n_structural_indexes; i++) {
|
|
u32 idx = pj.structural_indexes[i];
|
|
u8 c = buf[idx];
|
|
if (((c & 0xdf) == 0x5b)) { // meaning 7b or 5b, { or [
|
|
std::cout << greenfg << buf[idx] << deffg;
|
|
} else if (((c & 0xdf) == 0x5d)) { // meaning 7d or 5d, } or ]
|
|
std::cout << greenfg << buf[idx] << deffg;
|
|
} else {
|
|
std::cout << yellowfg << buf[idx] << deffg;
|
|
}
|
|
if(i + 1 < pj.n_structural_indexes) {
|
|
u32 nextidx = pj.structural_indexes[i + 1];
|
|
for(u32 pos = idx + 1 ; pos < nextidx; pos++) {
|
|
std::cout << buf[pos];
|
|
}
|
|
}
|
|
}
|
|
std::cout << std::endl;
|
|
}
|
|
int main(int argc, char * argv[]) {
|
|
if (argc != 2) {
|
|
cerr << "Usage: " << argv[0] << " <jsonfile>\n";
|
|
exit(1);
|
|
}
|
|
pair<u8 *, size_t> p = get_corpus(argv[1]);
|
|
ParsedJson pj;
|
|
|
|
if (posix_memalign( (void **)&pj.structurals, 8, ROUNDUP_N(p.second, 64)/8)) {
|
|
throw "Allocation failed";
|
|
};
|
|
|
|
init_state_machine();
|
|
|
|
pj.n_structural_indexes = 0;
|
|
// we have potentially 1 structure per byte of input
|
|
// as well as a dummy structure and a root structure
|
|
// we also potentially write up to 7 iterations beyond
|
|
// in our 'cheesy flatten', so make some worst-case
|
|
// sapce for that too
|
|
u32 max_structures = ROUNDUP_N(p.second, 64) + 2 + 7;
|
|
pj.structural_indexes = new u32[max_structures];
|
|
pj.nodes = new JsonNode[max_structures];
|
|
|
|
#if defined(DEBUG) || defined(DEBUG_FSM)
|
|
const u32 iterations = 1;
|
|
#else
|
|
const u32 iterations = 1000;
|
|
#endif
|
|
vector<double> res;
|
|
res.resize(iterations);
|
|
for (u32 i = 0; i < iterations; i++) {
|
|
find_structural_bits(p.first, p.second, pj);
|
|
flatten_indexes(p.second, pj);
|
|
auto start = std::chrono::steady_clock::now();
|
|
ape_machine(p.first, p.second, pj);
|
|
auto end = std::chrono::steady_clock::now();
|
|
std::chrono::duration<double> secs = end - start;
|
|
res[i] = secs.count();
|
|
}
|
|
colorfuldisplay(pj, p.first);
|
|
double min_result = *min_element(res.begin(), res.end());
|
|
cout << "Min: " << min_result << " bytes read: " << p.second << " Gigabytes/second: " << (p.second) / (min_result * 1000000000.0) << "\n";
|
|
return 0;
|
|
}
|