Remove old 4-stage path.
This commit is contained in:
parent
b9706d462c
commit
9f91650e72
9
Makefile
9
Makefile
|
@ -8,11 +8,11 @@
|
|||
|
||||
CXXFLAGS = -std=c++11 -g2 -O2 -march=native -Wall -Wextra -Wshadow -Iinclude -Ibenchmark/linux -Idependencies/double-conversion -Idependencies/rapidjson/include -Ldependencies/double-conversion/release
|
||||
LIBFLAGS = -ldouble-conversion
|
||||
EXECUTABLES=parse jsoncheck minifiercompetition parsingcompetition parseunified
|
||||
EXECUTABLES=parse jsoncheck minifiercompetition parsingcompetition
|
||||
DOUBLEEXECUTABLES=parsedouble jsoncheckdouble parsingcompetitiondouble
|
||||
|
||||
HEADERS=include/jsonparser/jsonparser.h include/jsonparser/common_defs.h include/jsonparser/jsonioutil.h benchmark/benchmark.h benchmark/linux/linux-perf-events.h include/jsonparser/simdjson_internal.h include/jsonparser/stage1_find_marks.h include/jsonparser/stage2_flatten.h include/jsonparser/stage3_ape_machine.h include/jsonparser/stage4_shovel_machine.h include/jsonparser/stage34_unified.h
|
||||
LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/stage1_find_marks.cpp src/stage2_flatten.cpp src/stage3_ape_machine.cpp src/stage4_shovel_machine.cpp src/stage34_unified.cpp
|
||||
HEADERS=include/jsonparser/jsonparser.h include/jsonparser/common_defs.h include/jsonparser/jsonioutil.h benchmark/benchmark.h benchmark/linux/linux-perf-events.h include/jsonparser/simdjson_internal.h include/jsonparser/stage1_find_marks.h include/jsonparser/stage2_flatten.h include/jsonparser/stage34_unified.h
|
||||
LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/stage1_find_marks.cpp src/stage2_flatten.cpp src/stage34_unified.cpp
|
||||
MINIFIERHEADERS=include/jsonparser/jsonminifier.h include/jsonparser/simdprune_tables.h
|
||||
MINIFIERLIBFILES=src/jsonminifier.cpp
|
||||
|
||||
|
@ -39,9 +39,6 @@ bench: benchmarks/bench.cpp $(RAPIDJSON_INCLUDE) $(HEADERS)
|
|||
$(CXX) -std=c++11 -O3 -o $@ benchmarks/bench.cpp -I$(RAPIDJSON_INCLUDE) -Iinclude -march=native -lm -Wall -Wextra -Wno-narrowing
|
||||
|
||||
|
||||
parseunified: benchmark/parse.cpp $(HEADERS) $(LIBFILES)
|
||||
$(CXX) $(CXXFLAGS) -o parseunified $(LIBFILES) benchmark/parse.cpp $(LIBFLAGS) -DTEST_UNIFIED
|
||||
|
||||
parse: benchmark/parse.cpp $(HEADERS) $(LIBFILES)
|
||||
$(CXX) $(CXXFLAGS) -o parse $(LIBFILES) benchmark/parse.cpp $(LIBFLAGS)
|
||||
|
||||
|
|
|
@ -22,8 +22,6 @@
|
|||
#include <vector>
|
||||
#include <x86intrin.h>
|
||||
|
||||
//#define TEST_UNIFIED
|
||||
|
||||
/// Fixme: enable doube conv
|
||||
// #define DOUBLECONV
|
||||
#ifdef DOUBLECONV
|
||||
|
@ -39,8 +37,6 @@ using namespace double_conversion;
|
|||
#include "jsonparser/simdjson_internal.h"
|
||||
#include "jsonparser/stage1_find_marks.h"
|
||||
#include "jsonparser/stage2_flatten.h"
|
||||
#include "jsonparser/stage3_ape_machine.h"
|
||||
#include "jsonparser/stage4_shovel_machine.h"
|
||||
#include "jsonparser/stage34_unified.h"
|
||||
using namespace std;
|
||||
|
||||
|
@ -129,7 +125,6 @@ int main(int argc, char *argv[]) {
|
|||
cerr << "Currently only support JSON files < 16MB\n";
|
||||
exit(1);
|
||||
}
|
||||
init_state_machine();
|
||||
|
||||
pj.n_structural_indexes = 0;
|
||||
// we have potentially 1 structure per byte of input
|
||||
|
@ -159,8 +154,8 @@ int main(int argc, char *argv[]) {
|
|||
LinuxEvents<PERF_TYPE_HARDWARE> unified(evts);
|
||||
vector<u64> results;
|
||||
results.resize(evts.size());
|
||||
unsigned long cy1 = 0, cy2 = 0, cy3 = 0, cy4 = 0;
|
||||
unsigned long cl1 = 0, cl2 = 0, cl3 = 0, cl4 = 0;
|
||||
unsigned long cy1 = 0, cy2 = 0, cy3 = 0;
|
||||
unsigned long cl1 = 0, cl2 = 0, cl3 = 0;
|
||||
#endif
|
||||
bool isok = true;
|
||||
for (u32 i = 0; i < iterations; i++) {
|
||||
|
@ -191,31 +186,6 @@ int main(int argc, char *argv[]) {
|
|||
unified.start();
|
||||
#endif
|
||||
|
||||
#ifndef TEST_UNIFIED
|
||||
|
||||
isok = ape_machine(p.first, p.second, pj);
|
||||
#ifndef SQUASH_COUNTERS
|
||||
unified.end(results);
|
||||
cy3 += results[0];
|
||||
cl3 += results[1];
|
||||
if (!isok) {
|
||||
cout << "Failed out during stage 3\n";
|
||||
break;
|
||||
}
|
||||
unified.start();
|
||||
#endif
|
||||
isok = shovel_machine(p.first, p.second, pj);
|
||||
#ifndef SQUASH_COUNTERS
|
||||
unified.end(results);
|
||||
cy4 += results[0];
|
||||
cl4 += results[1];
|
||||
#endif
|
||||
if (!isok) {
|
||||
cout << "Failed out during stage 4\n";
|
||||
break;
|
||||
}
|
||||
#else
|
||||
|
||||
isok = unified_machine(p.first, p.second, pj);
|
||||
#ifndef SQUASH_COUNTERS
|
||||
unified.end(results);
|
||||
|
@ -227,7 +197,6 @@ int main(int argc, char *argv[]) {
|
|||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
auto end = std::chrono::steady_clock::now();
|
||||
std::chrono::duration<double> secs = end - start;
|
||||
res[i] = secs.count();
|
||||
|
@ -237,7 +206,7 @@ int main(int argc, char *argv[]) {
|
|||
printf("number of bytes %ld number of structural chars %d ratio %.3f\n",
|
||||
p.second, pj.n_structural_indexes,
|
||||
(double)pj.n_structural_indexes / p.second);
|
||||
unsigned long total = cy1 + cy2 + cy3 + cy4;
|
||||
unsigned long total = cy1 + cy2 + cy3;
|
||||
|
||||
printf(
|
||||
"stage 1 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: %.2f \n",
|
||||
|
@ -261,14 +230,6 @@ int main(int argc, char *argv[]) {
|
|||
printf("%.2f cycles per structural character.\n",
|
||||
(double)cy3 / (iterations * pj.n_structural_indexes));
|
||||
|
||||
printf(
|
||||
"stage 4 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: %.2f \n",
|
||||
cl4, cy4, 100. * cy4 / total, (double)cl4 / cy4);
|
||||
printf(" stage 4 runs at %.2f cycles per input byte and ",
|
||||
(double)cy4 / (iterations * p.second));
|
||||
printf("%.2f cycles per structural character.\n",
|
||||
(double)cy4 / (iterations * pj.n_structural_indexes));
|
||||
|
||||
printf(" all stages: %.2f cycles per input byte.\n",
|
||||
(double)total / (iterations * p.second));
|
||||
#endif
|
||||
|
|
|
@ -46,7 +46,6 @@ int main(int argc, char *argv[]) {
|
|||
int repeat = 10;
|
||||
int volume = p.second;
|
||||
BEST_TIME(json_parse(p.first, p.second, pj), true, , repeat, volume, true);
|
||||
BEST_TIME(json_parse_4stages(p.first, p.second, pj), true, , repeat, volume, true);
|
||||
|
||||
rapidjson::Document d;
|
||||
|
||||
|
|
|
@ -5,8 +5,6 @@
|
|||
#include "simdjson_internal.h"
|
||||
#include "stage1_find_marks.h"
|
||||
#include "stage2_flatten.h"
|
||||
#include "stage3_ape_machine.h"
|
||||
#include "stage4_shovel_machine.h"
|
||||
#include "stage34_unified.h"
|
||||
|
||||
// Allocate a ParsedJson structure that can support document
|
||||
|
@ -22,6 +20,3 @@ void deallocate_ParsedJson(ParsedJson *pj_ptr);
|
|||
// Parse a document found in buf, need to preallocate ParsedJson.
|
||||
// Return false in case of a failure.
|
||||
bool json_parse(const u8 *buf, size_t len, ParsedJson &pj);
|
||||
|
||||
// like json_parse but users 4 stages, slower.
|
||||
bool json_parse_4stages(const u8 *buf, size_t len, ParsedJson &pj);
|
||||
|
|
|
@ -1,7 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "common_defs.h"
|
||||
#include "simdjson_internal.h"
|
||||
|
||||
void init_state_machine();
|
||||
bool ape_machine(const u8 *buf, size_t len, ParsedJson &pj);
|
|
@ -1,6 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "common_defs.h"
|
||||
#include "simdjson_internal.h"
|
||||
|
||||
bool shovel_machine(const u8 *buf, size_t len, ParsedJson &pj);
|
|
@ -44,27 +44,6 @@ void deallocate_ParsedJson(ParsedJson *pj_ptr) {
|
|||
delete pj_ptr;
|
||||
}
|
||||
|
||||
// parse a document found in buf, need to preallocate ParsedJson.
|
||||
// this can probably be considered a legacy function at this point.
|
||||
bool json_parse_4stages(const u8 *buf, size_t len, ParsedJson &pj) {
|
||||
if (pj.bytecapacity < len) {
|
||||
std::cerr << "Your ParsedJson cannot support documents that big: " << len
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
bool isok = find_structural_bits(buf, len, pj);
|
||||
if (isok) {
|
||||
isok = flatten_indexes(len, pj);
|
||||
}
|
||||
if (isok) {
|
||||
isok = ape_machine(buf, len, pj);
|
||||
}
|
||||
if (isok) {
|
||||
isok = shovel_machine(buf, len, pj);
|
||||
}
|
||||
return isok;
|
||||
}
|
||||
|
||||
// parse a document found in buf, need to preallocate ParsedJson.
|
||||
bool json_parse(const u8 *buf, size_t len, ParsedJson &pj) {
|
||||
if (pj.bytecapacity < len) {
|
||||
|
|
|
@ -1,338 +0,0 @@
|
|||
#ifdef _MSC_VER
|
||||
/* Microsoft C/C++-compatible compiler */
|
||||
#include <intrin.h>
|
||||
#else
|
||||
#include <immintrin.h>
|
||||
#include <x86intrin.h>
|
||||
#endif
|
||||
|
||||
#include <cassert>
|
||||
#include <cstring>
|
||||
|
||||
#include "jsonparser/common_defs.h"
|
||||
#include "jsonparser/simdjson_internal.h"
|
||||
|
||||
// the ape machine consists of two parts:
|
||||
//
|
||||
// 1) The "state machine", which is a multiple channel per-level state machine
|
||||
// It is a conventional DFA except in that it 'changes track' on {}[]
|
||||
// characters
|
||||
//
|
||||
// 2) The "tape machine": this records offsets of various structures as they go
|
||||
// by
|
||||
// These structures are either u32 offsets of other tapes or u32 offsets into
|
||||
// our input or structures.
|
||||
//
|
||||
// The state machine doesn't record ouput.
|
||||
// The tape machine doesn't validate.
|
||||
//
|
||||
// The output of the tape machine is meaningful only if the state machine is in
|
||||
// non-error states.
|
||||
|
||||
// depth adjustment is strictly based on whether we are {[ or }]
|
||||
|
||||
// depth adjustment is a pre-increment which, in effect, means that a {[
|
||||
// contained in an object is in the level one deeper, while the corresponding }]
|
||||
// is at the level
|
||||
|
||||
// TAPE MACHINE DEFINITIONS
|
||||
|
||||
const u32 DEPTH_PLUS_ONE = 0x01000000;
|
||||
const u32 DEPTH_ZERO = 0x00000000;
|
||||
const u32 DEPTH_MINUS_ONE = 0xff000000;
|
||||
const u32 WRITE_ZERO = 0x0;
|
||||
const u32 WRITE_FOUR = 0x1;
|
||||
|
||||
const u32 CDF = DEPTH_ZERO | WRITE_ZERO; // default 'control'
|
||||
const u32 C04 = DEPTH_ZERO | WRITE_FOUR;
|
||||
const u32 CP4 = DEPTH_PLUS_ONE | WRITE_FOUR;
|
||||
const u32 CM4 = DEPTH_MINUS_ONE | WRITE_FOUR;
|
||||
|
||||
inline s8 get_depth_adjust(u32 control) { return (s8)(((s32)control) >> 24); }
|
||||
inline size_t get_write_size(u32 control) { return control & 0xff; }
|
||||
|
||||
const u32 char_control[256] = {
|
||||
// nothing interesting from 0x00-0x20
|
||||
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
|
||||
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
|
||||
CDF, CDF,
|
||||
|
||||
// " is 0x22, - is 0x2d
|
||||
CDF, CDF, C04, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, C04, CDF,
|
||||
CDF,
|
||||
|
||||
// numbers are 0x30-0x39
|
||||
C04, C04, C04, C04, C04, C04, C04, C04, C04, C04, CDF, CDF, CDF, CDF, CDF,
|
||||
CDF,
|
||||
|
||||
// nothing interesting from 0x40-0x49
|
||||
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
|
||||
CDF,
|
||||
|
||||
// 0x5b/5d are []
|
||||
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CP4, CDF, CM4, CDF,
|
||||
CDF,
|
||||
|
||||
// f is 0x66 n is 0x6e
|
||||
CDF, CDF, CDF, CDF, CDF, CDF, C04, CDF, CDF, CDF, CDF, CDF, CDF, CDF, C04,
|
||||
CDF,
|
||||
|
||||
// 0x7b/7d are {}, 74 is t
|
||||
CDF, CDF, CDF, CDF, C04, CDF, CDF, CDF, CDF, CDF, CDF, CP4, CDF, CM4, CDF,
|
||||
CDF,
|
||||
|
||||
// nothing interesting from 0x80-0xff
|
||||
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
|
||||
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
|
||||
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
|
||||
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
|
||||
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
|
||||
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
|
||||
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
|
||||
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF,
|
||||
CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF};
|
||||
|
||||
// all of this stuff needs to get moved somewhere reasonable
|
||||
// like our ParsedJson structure
|
||||
/*
|
||||
u64 tape[MAX_TAPE];
|
||||
u32 tape_locs[MAX_DEPTH];
|
||||
u8 string_buf[512*1024];
|
||||
u8 * current_string_buf_loc;
|
||||
u8 number_buf[512*1024]; // holds either doubles or longs, really
|
||||
u8 * current_number_buf_loc;
|
||||
*/
|
||||
|
||||
// STATE MACHINE DECLARATIONS
|
||||
const u32 MAX_STATES = 16;
|
||||
|
||||
/**
|
||||
* It is annoying to have to call init_state_machine each time.
|
||||
* Better to precompute the (small) result into a header file.
|
||||
*/
|
||||
// u32 trans[MAX_STATES][256];
|
||||
#include "jsonparser/transitions.h"
|
||||
|
||||
u32 states[MAX_DEPTH];
|
||||
const int START_STATE = 1;
|
||||
|
||||
u32 valid_end_states[MAX_STATES] = {
|
||||
0, // 0 state is by definition an error
|
||||
1, // ok to still be in start state
|
||||
1, // state 2: we've seen an { - if we left this level it's ok
|
||||
0, // state 3 is abolished, we shouldn't be in it
|
||||
|
||||
0, // state 4 means we saw a string inside an object. We can't end like
|
||||
// this!
|
||||
0, // similarly state 5 means we saw a string followed by a colon.
|
||||
0, // state 6 is abolished
|
||||
1, // it's ok to finish on 7
|
||||
|
||||
0, // state 8 we've seen a comma inside an object - can't finish here
|
||||
1, // state 9 is like state 2 only for arrays, so ok
|
||||
0, // state 10 abolished
|
||||
1, // state 11 is ok to finish on, we just saw a unary inside a array
|
||||
|
||||
0, // state 12 we've just seen a comma inside an array - can't finish
|
||||
0, // state 13 is our weird start state. I think we shouldn't end on it as
|
||||
// we need to see something
|
||||
1, // state 14 is ok. Its an error to see something *more* here but not to
|
||||
// be in this state
|
||||
0, // we don't use state 15
|
||||
};
|
||||
|
||||
// weird sub-machine for starting depth only
|
||||
// we start at 13 and go to 14 on a single UNARY
|
||||
// 14 doesn't have to have any transitions. Anything
|
||||
// else arrives after the single thing it's an error
|
||||
const int START_DEPTH_START_STATE = 13;
|
||||
|
||||
// ANYTHING_IS_ERROR_STATE is useful both as a target
|
||||
// for a transition at the start depth and also as
|
||||
// a good initial value for "red line" depths; that
|
||||
// is, depths that are maintained strictly to avoid
|
||||
// undefined behavior (e.g. depths below the starting
|
||||
// depth).
|
||||
const int ANYTHING_IS_ERROR_STATE = 14;
|
||||
|
||||
void init_state_machine() {
|
||||
// states 10 and 6 eliminated
|
||||
|
||||
trans[1][(int)'{'] = 2;
|
||||
trans[2][(int)'"'] = 4;
|
||||
trans[4][(int)':'] = 5;
|
||||
// 5->7 on all values ftn0123456789-"
|
||||
trans[7][(int)','] = 8;
|
||||
trans[8][(int)'"'] = 4;
|
||||
|
||||
trans[1][(int)'['] = 9;
|
||||
// 9->11 on all values ftn0123456789-"
|
||||
trans[11][(int)','] = 12;
|
||||
// 12->11 on all values ftn0123456789-"
|
||||
|
||||
const char *UNARIES = "}]ftn0123456789-\"";
|
||||
for (u32 i = 0; i < strlen(UNARIES); i++) {
|
||||
trans[5][(u32)UNARIES[i]] = 7;
|
||||
trans[9][(u32)UNARIES[i]] = 11;
|
||||
trans[12][(u32)UNARIES[i]] = 11;
|
||||
#ifdef PERMIT_RANDOM_UNARIES_AT_TOP_LEVEL
|
||||
// NOTE: if we permit JSON documents that
|
||||
// contain a single number or string, then we
|
||||
// allow all the unaries at the top level
|
||||
trans[13][(u32)UNARIES[i]] = 14;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifndef PERMIT_RANDOM_UNARIES_AT_TOP_LEVEL
|
||||
// NOTE: if we don't permit JSON documents that
|
||||
// that contain a single number or string, we must
|
||||
// make sure we accept the top-level closing braces
|
||||
// that are delivered to the start depth only
|
||||
trans[13][(int)'}'] = 14;
|
||||
trans[13][(int)']'] = 14;
|
||||
#endif
|
||||
|
||||
// back transitions when new things are open
|
||||
trans[2][(int)'{'] = 2;
|
||||
trans[7][(int)'{'] = 2;
|
||||
trans[9][(int)'{'] = 2;
|
||||
trans[11][(int)'{'] = 2;
|
||||
trans[2][(int)'['] = 9;
|
||||
trans[7][(int)'['] = 9;
|
||||
trans[9][(int)'['] = 9;
|
||||
trans[11][(int)'['] = 9;
|
||||
}
|
||||
|
||||
bool ape_machine(const u8 *buf, UNUSED size_t len, ParsedJson &pj) {
|
||||
|
||||
// NOTE - our depth is used by both the tape machine and the state machine
|
||||
// Further, in production we will set it to a largish value in a generous
|
||||
// buffer as a rogue input could consist of many {[ characters or many }]
|
||||
// characters. We aren't busily checking errors (and in fact, a aggressive
|
||||
// sequence of [ characters is actually valid input!) so something that blows
|
||||
// out maximum depth will need to be periodically checked for, as will
|
||||
// something that tries to set depth very low. If we set our starting depth,
|
||||
// say, to 256, we can tolerate 256 bogus close brace characters without
|
||||
// aggressively going wrong and writing to bad memory Note that any specious
|
||||
// depth can have a specious tape associated with and all these specious
|
||||
// depths can share a region of the tape - it's harmless. Since tape is
|
||||
// one-way, any movement in a specious tape is an error (so we can detect
|
||||
// max_depth violations by making sure that specious tape locations haven't
|
||||
// moved from their starting values)
|
||||
|
||||
u32 depth = START_DEPTH;
|
||||
|
||||
for (u32 i = 0; i < MAX_DEPTH; i++) {
|
||||
pj.tape_locs[i] = i * MAX_TAPE_ENTRIES;
|
||||
if (i == START_DEPTH) {
|
||||
states[i] = START_DEPTH_START_STATE;
|
||||
} else if ((i < START_DEPTH) || (i >= REDLINE_DEPTH)) {
|
||||
states[i] = ANYTHING_IS_ERROR_STATE;
|
||||
} else {
|
||||
states[i] = START_STATE;
|
||||
}
|
||||
}
|
||||
|
||||
pj.current_string_buf_loc = pj.string_buf;
|
||||
pj.current_number_buf_loc = pj.number_buf;
|
||||
|
||||
u32 error_sump = 0;
|
||||
u32 old_tape_loc = pj.tape_locs[depth]; // need to initialize for first write
|
||||
|
||||
u32 next_idx = pj.structural_indexes[0];
|
||||
u8 next_c = buf[next_idx];
|
||||
u32 next_control = char_control[next_c];
|
||||
|
||||
for (u32 i = 0; i < pj.n_structural_indexes; i++) {
|
||||
|
||||
// very periodic safety checking. This does NOT guarantee that we
|
||||
// haven't been in our dangerous zones above or below our normal
|
||||
// depths. It ONLY checks to be sure that we don't manage to leave
|
||||
// these zones and write completely off our tape.
|
||||
if (!(i % DEPTH_SAFETY_MARGIN)) {
|
||||
if (depth < START_DEPTH || depth >= REDLINE_DEPTH) {
|
||||
error_sump |= 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
u32 idx = next_idx;
|
||||
u8 c = next_c;
|
||||
u32 control = next_control;
|
||||
|
||||
next_idx = pj.structural_indexes[i + 1];
|
||||
next_c = buf[next_idx];
|
||||
next_control = char_control[next_c];
|
||||
|
||||
// TAPE MACHINE
|
||||
s8 depth_adjust = get_depth_adjust(control);
|
||||
u8 write_size = get_write_size(control);
|
||||
u32 write_val = (depth_adjust != 0) ? old_tape_loc : idx;
|
||||
depth += depth_adjust;
|
||||
#ifdef DEBUG
|
||||
cout << "i: " << i << " idx: " << idx << " c " << c << "\n";
|
||||
cout << "TAPE MACHINE: depth change " << (s32)depth_adjust << " write_size "
|
||||
<< (u32)write_size << " current_depth: " << depth << "\n";
|
||||
#endif
|
||||
|
||||
// STATE MACHINE - hoisted here to fill in during the tape machine's
|
||||
// latencies
|
||||
#ifdef DEBUG
|
||||
cout << "STATE MACHINE: state[depth] pre " << states[depth] << " ";
|
||||
#endif
|
||||
states[depth] = trans[states[depth]][c];
|
||||
#ifdef DEBUG
|
||||
cout << "post " << states[depth] << "\n";
|
||||
#endif
|
||||
// TAPE MACHINE, again
|
||||
pj.tape[pj.tape_locs[depth]] = write_val | (((u64)c) << 56);
|
||||
old_tape_loc = pj.tape_locs[depth] += write_size;
|
||||
}
|
||||
|
||||
if (depth != START_DEPTH) {
|
||||
// We haven't returned to our start depth, so our braces can't possibly
|
||||
// match Note this doesn't exclude the possibility that we have improperly
|
||||
// matched { } or [] pairs
|
||||
return false;
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < MAX_DEPTH; i++) {
|
||||
if (!valid_end_states[states[i]]) {
|
||||
#ifdef DEBUG
|
||||
printf("Invalid ending state: states[%d] == %d\n", states[i]);
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
#define DUMP_TAPES
|
||||
#ifdef DEBUG
|
||||
for (u32 i = 0; i < MAX_DEPTH; i++) {
|
||||
u32 start_loc = i * MAX_TAPE_ENTRIES;
|
||||
cout << " tape section i " << i;
|
||||
if (i == START_DEPTH) {
|
||||
cout << " (START) ";
|
||||
} else if ((i < START_DEPTH) || (i >= REDLINE_DEPTH)) {
|
||||
cout << " (REDLINE) ";
|
||||
} else {
|
||||
cout << " (NORMAL) ";
|
||||
}
|
||||
|
||||
cout << " from: " << start_loc << " to: " << tape_locs[i] << " "
|
||||
<< " size: " << (tape_locs[i] - start_loc) << "\n";
|
||||
cout << " state: " << states[i] << "\n";
|
||||
#ifdef DUMP_TAPES
|
||||
for (u32 j = start_loc; j < tape_locs[i]; j++) {
|
||||
if (tape[j]) {
|
||||
cout << "j: " << j << " tape[j] char " << (char)(tape[j] >> 56)
|
||||
<< " tape[j][0..55]: " << (tape[j] & 0xffffffffffffffULL) << "\n";
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
if (error_sump) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
|
@ -1,654 +0,0 @@
|
|||
#ifdef _MSC_VER
|
||||
/* Microsoft C/C++-compatible compiler */
|
||||
#include <intrin.h>
|
||||
#else
|
||||
#include <immintrin.h>
|
||||
#include <x86intrin.h>
|
||||
#endif
|
||||
|
||||
#include <cassert>
|
||||
#include <cstring>
|
||||
|
||||
#include "jsonparser/common_defs.h"
|
||||
#include "jsonparser/simdjson_internal.h"
|
||||
|
||||
// they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c
|
||||
// these go into the first 3 buckets of the comparison (1/2/4)
|
||||
|
||||
// we are also interested in the four whitespace characters
|
||||
// space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d
|
||||
|
||||
const u32 structural_or_whitespace_negated[256] = {
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
|
||||
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1,
|
||||
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
|
||||
|
||||
// return non-zero if not a structural or whitespace char
|
||||
// zero otherwise
|
||||
really_inline u32 is_not_structural_or_whitespace(u8 c) {
|
||||
return structural_or_whitespace_negated[c];
|
||||
}
|
||||
|
||||
// These chars yield themselves: " \ /
|
||||
// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab
|
||||
// u not handled in this table as it's complex
|
||||
const u8 escape_map[256] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0.
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4.
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5.
|
||||
0, 0, 0x08, 0, 0, 0, 0x12, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6.
|
||||
0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7.
|
||||
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
|
||||
const u32 leading_zeros_to_utf_bytes[33] = {
|
||||
1, 1, 1, 1, 1, 1, 1, 1, // 7 bits for first one
|
||||
2, 2, 2, 2, // 11 bits for next
|
||||
3, 3, 3, 3, 3, // 16 bits for next
|
||||
4, 4, 4, 4, 4, // 21 bits for next
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; // error
|
||||
|
||||
const u32 UTF_PDEP_MASK[5] = {0x00, // error
|
||||
0x7f, 0x1f3f, 0x0f3f3f, 0x073f3f3f};
|
||||
|
||||
const u32 UTF_OR_MASK[5] = {0x00, // error
|
||||
0x00, 0xc080, 0xe08080, 0xf0808080};
|
||||
|
||||
bool is_hex_digit(u8 v) {
|
||||
if (v >= '0' && v <= '9')
|
||||
return true;
|
||||
v &= 0xdf;
|
||||
if (v >= 'A' && v <= 'F')
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
u8 digit_to_val(u8 v) {
|
||||
if (v >= '0' && v <= '9')
|
||||
return v - '0';
|
||||
v &= 0xdf;
|
||||
return v - 'A' + 10;
|
||||
}
|
||||
|
||||
bool hex_to_u32(const u8 *src, u32 *res) {
|
||||
u8 v1 = src[0];
|
||||
u8 v2 = src[1];
|
||||
u8 v3 = src[2];
|
||||
u8 v4 = src[3];
|
||||
if (!is_hex_digit(v1) || !is_hex_digit(v2) || !is_hex_digit(v3) ||
|
||||
!is_hex_digit(v4)) {
|
||||
return false;
|
||||
}
|
||||
*res = digit_to_val(v1) << 24 | digit_to_val(v2) << 16 |
|
||||
digit_to_val(v3) << 8 | digit_to_val(v4);
|
||||
return true;
|
||||
}
|
||||
|
||||
// handle a unicode codepoint
|
||||
// write appropriate values into dest
|
||||
// src will always advance 6 bytes
|
||||
// dest will advance a variable amount (return via pointer)
|
||||
// return true if the unicode codepoint was valid
|
||||
// We work in little-endian then swap at write time
|
||||
really_inline bool handle_unicode_codepoint(const u8 **src_ptr, u8 **dst_ptr) {
|
||||
u32 code_point = 0; // read the hex, potentially reading another \u beyond if
|
||||
// it's a // wacky one
|
||||
if (!hex_to_u32(*src_ptr + 2, &code_point)) {
|
||||
return false;
|
||||
}
|
||||
*src_ptr += 6;
|
||||
// check for the weirdo double-UTF-16 nonsense for things outside Basic
|
||||
// Multilingual Plane.
|
||||
if (code_point >= 0xd800 && code_point < 0xdc00) {
|
||||
// TODO: sanity check and clean up; snippeted from RapidJSON and poorly
|
||||
// understood at the moment
|
||||
if (((*src_ptr)[0] != '\\') || (*src_ptr)[1] != 'u') {
|
||||
return false;
|
||||
}
|
||||
u32 code_point_2 = 0;
|
||||
if (!hex_to_u32(*src_ptr + 2, &code_point_2)) {
|
||||
return false;
|
||||
}
|
||||
if (code_point_2 < 0xdc00 || code_point_2 > 0xdfff) {
|
||||
return false;
|
||||
}
|
||||
code_point =
|
||||
(((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000;
|
||||
*src_ptr += 6;
|
||||
}
|
||||
// TODO: check to see whether the below code is nonsense (it's really only a
|
||||
// sketch at this point)
|
||||
u32 lz = __builtin_clz(code_point);
|
||||
u32 utf_bytes = leading_zeros_to_utf_bytes[lz];
|
||||
u32 tmp =
|
||||
_pdep_u32(code_point, UTF_PDEP_MASK[utf_bytes]) | UTF_OR_MASK[utf_bytes];
|
||||
// swap and move to the other side of the register
|
||||
tmp = __builtin_bswap32(tmp);
|
||||
tmp >>= ((4 - utf_bytes) * 8) & 31; // if utf_bytes, this could become a shift
|
||||
// by 32, hence the mask with 31
|
||||
// use memcpy to avoid undefined behavior:
|
||||
std::memcpy(*(u32 **)dst_ptr, &tmp, sizeof(u32)); //**(u32 **)dst_ptr = tmp;
|
||||
*dst_ptr += utf_bytes;
|
||||
return true;
|
||||
}
|
||||
|
||||
really_inline bool parse_string(const u8 *buf, UNUSED size_t len,
|
||||
ParsedJson &pj, u32 tape_loc) {
|
||||
u32 offset = pj.tape[tape_loc] & 0xffffff;
|
||||
const u8 *src = &buf[offset + 1]; // we know that buf at offset is a "
|
||||
u8 *dst = pj.current_string_buf_loc;
|
||||
#ifdef DEBUG
|
||||
cout << "Entering parse string with offset " << offset << "\n";
|
||||
#endif
|
||||
// basic non-sexy parsing code
|
||||
while (1) {
|
||||
#ifdef DEBUG
|
||||
for (u32 j = 0; j < 32; j++) {
|
||||
char c = *(src + j);
|
||||
if (isprint(c)) {
|
||||
cout << c;
|
||||
} else {
|
||||
cout << '_';
|
||||
}
|
||||
}
|
||||
cout << "| ... string handling input\n";
|
||||
#endif
|
||||
m256 v = _mm256_loadu_si256((const m256 *)(src));
|
||||
u32 bs_bits =
|
||||
(u32)_mm256_movemask_epi8(_mm256_cmpeq_epi8(v, _mm256_set1_epi8('\\')));
|
||||
dumpbits32(bs_bits, "backslash bits 2");
|
||||
u32 quote_bits =
|
||||
(u32)_mm256_movemask_epi8(_mm256_cmpeq_epi8(v, _mm256_set1_epi8('"')));
|
||||
dumpbits32(quote_bits, "quote_bits");
|
||||
u32 quote_dist = __builtin_ctz(quote_bits);
|
||||
u32 bs_dist = __builtin_ctz(bs_bits);
|
||||
// store to dest unconditionally - we can overwrite the bits we don't like
|
||||
// later
|
||||
_mm256_storeu_si256((m256 *)(dst), v);
|
||||
#ifdef DEBUG
|
||||
cout << "quote dist: " << quote_dist << " bs dist: " << bs_dist << "\n";
|
||||
#endif
|
||||
|
||||
if (quote_dist < bs_dist) {
|
||||
#ifdef DEBUG
|
||||
cout << "Found end, leaving!\n";
|
||||
#endif
|
||||
// we encountered quotes first. Move dst to point to quotes and exit
|
||||
dst[quote_dist] = 0; // null terminate and get out
|
||||
pj.current_string_buf_loc = dst + quote_dist + 1;
|
||||
pj.tape[tape_loc] =
|
||||
((u32)'"') << 24 |
|
||||
(pj.current_string_buf_loc -
|
||||
pj.string_buf); // assume 2^24 will hold all strings for now
|
||||
return true;
|
||||
} else if (quote_dist > bs_dist) {
|
||||
u8 escape_char = src[bs_dist + 1];
|
||||
#ifdef DEBUG
|
||||
cout << "Found escape char: " << escape_char << "\n";
|
||||
#endif
|
||||
// we encountered backslash first. Handle backslash
|
||||
if (escape_char == 'u') {
|
||||
// move src/dst up to the start; they will be further adjusted
|
||||
// within the unicode codepoint handling code.
|
||||
src += bs_dist;
|
||||
dst += bs_dist;
|
||||
if (!handle_unicode_codepoint(&src, &dst)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
} else {
|
||||
// simple 1:1 conversion. Will eat bs_dist+2 characters in input and
|
||||
// write bs_dist+1 characters to output
|
||||
// note this may reach beyond the part of the buffer we've actually
|
||||
// seen. I think this is ok
|
||||
u8 escape_result = escape_map[escape_char];
|
||||
if (!escape_result)
|
||||
return false; // bogus escape value is an error
|
||||
dst[bs_dist] = escape_result;
|
||||
src += bs_dist + 2;
|
||||
dst += bs_dist + 1;
|
||||
}
|
||||
} else {
|
||||
// they are the same. Since they can't co-occur, it means we encountered
|
||||
// neither.
|
||||
src += 32;
|
||||
dst += 32;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
// later extensions -
|
||||
// if \\ we could detect whether it's a substantial run of \ or just eat 2
|
||||
// chars and write 1 handle anything short of \u or \\\ (as a prefix) with
|
||||
// clever PSHUFB stuff and don't leave SIMD
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifdef DOUBLECONV
|
||||
#include "double-conversion/double-conversion.h"
|
||||
#include "double-conversion/ieee.h"
|
||||
using namespace double_conversion;
|
||||
static StringToDoubleConverter
|
||||
converter(StringToDoubleConverter::ALLOW_TRAILING_JUNK, 2000000.0,
|
||||
Double::NaN(), NULL, NULL);
|
||||
#endif
|
||||
|
||||
|
||||
// does not validation whatsoever, assumes that all digit
|
||||
// this is CS 101
|
||||
u64 naivestrtoll(const char *p, const char *end) {
|
||||
if(p == end) return 0; // should be an error?
|
||||
// this code could get a whole lot smarter if we have many long ints:
|
||||
// e.g., see http://0x80.pl/articles/simd-parsing-int-sequences.html
|
||||
u64 x = *p - '0';
|
||||
p++;
|
||||
for(;p < end;p++) {
|
||||
x = (x*10) + (*p - '0');
|
||||
}
|
||||
return x;
|
||||
}
|
||||
// put a parsed version of number (either as a double or a signed long) into the
|
||||
// number buffer, put a 'tag' indicating which type and where it is back onto
|
||||
// the tape at that location return false if we can't parse the number which
|
||||
// means either (a) the number isn't valid, or (b) the number is followed by
|
||||
// something that isn't whitespace, comma or a close }] character which are the
|
||||
// only things that should follow a number at this stage bools to detect what we
|
||||
// found in our initial character already here - we are already switching on 0
|
||||
// vs 1-9 vs - so we may as well keep separate paths where that's useful
|
||||
|
||||
// TODO: see if we really need a separate number_buf or whether we should just
|
||||
// have a generic scratch - would need to align before using for this
|
||||
really_inline bool parse_number(const u8 *buf, UNUSED size_t len,
|
||||
UNUSED ParsedJson &pj, u32 tape_loc,
|
||||
UNUSED bool found_zero, bool found_minus) {
|
||||
u32 offset = pj.tape[tape_loc] & 0xffffff;
|
||||
////////////////
|
||||
// This is temporary... but it illustrates how one could use Google's double
|
||||
// conv.
|
||||
///
|
||||
#ifdef DOUBLECONV
|
||||
// Maybe surprisingly, StringToDouble does not parse according to the JSON
|
||||
// spec (e.g., it will happily parse 012 as 12).
|
||||
int processed_characters_count;
|
||||
double result_double_conv = converter.StringToDouble(
|
||||
(const char *)(buf + offset), 10, &processed_characters_count);
|
||||
*((double *)pj.current_number_buf_loc) = result_double_conv;
|
||||
pj.tape[tape_loc] =
|
||||
((u32)'d') << 24 |
|
||||
(pj.current_number_buf_loc -
|
||||
pj.number_buf); // assume 2^24 will hold all numbers for now
|
||||
pj.current_number_buf_loc += 8;
|
||||
return result_double_conv == result_double_conv;
|
||||
#endif
|
||||
////////////////
|
||||
// end of double conv temporary stuff.
|
||||
////////////////
|
||||
if (found_minus) {
|
||||
offset++;
|
||||
}
|
||||
const u8 *src = &buf[offset];
|
||||
m256 v = _mm256_loadu_si256((const m256 *)(src));
|
||||
u64 error_sump = 0;
|
||||
#ifdef DEBUG
|
||||
for (u32 j = 0; j < 32; j++) {
|
||||
char c = *(src + j);
|
||||
if (isprint(c)) {
|
||||
cout << c;
|
||||
} else {
|
||||
cout << '_';
|
||||
}
|
||||
}
|
||||
cout << "| ... number handling input\n";
|
||||
#endif
|
||||
|
||||
// categories to extract
|
||||
// Digits:
|
||||
// 0 (0x30) - bucket 0
|
||||
// 1-9 (never any distinction except if we didn't get the free kick at 0 due
|
||||
// to the leading minus) (0x31-0x39) - bucket 1
|
||||
// . (0x2e) - bucket 2
|
||||
// E or e - no distinction (0x45/0x65) - bucket 3
|
||||
// + (0x2b) - bucket 4
|
||||
// - (0x2d) - bucket 4
|
||||
// Terminators
|
||||
// Whitespace: 0x20, 0x09, 0x0a, 0x0d - bucket 5+6
|
||||
// Comma and the closes: 0x2c is comma, } is 0x5d, ] is 0x7d - bucket 5+7
|
||||
|
||||
// Another shufti - also a bit hand-hacked. Need to make a better construction
|
||||
const m256 low_nibble_mask = _mm256_setr_epi8(
|
||||
// 0 1 2 3 4 5 6 7 8 9 a b c d e f
|
||||
33, 2, 2, 2, 2, 10, 2, 2, 2, 66, 64, 16, 32, 0xd0, 4, 0, 33, 2, 2, 2, 2,
|
||||
10, 2, 2, 2, 66, 64, 16, 32, 0xd0, 4, 0);
|
||||
const m256 high_nibble_mask = _mm256_setr_epi8(
|
||||
// 0 1 2 3 4 5 6 7 8 9 a b c d e f
|
||||
64, 0, 52, 3, 8, -128, 8, 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 52, 3, 8,
|
||||
-128, 8, 0x80, 0, 0, 0, 0, 0, 0, 0, 0);
|
||||
|
||||
m256 tmp = _mm256_and_si256(
|
||||
_mm256_shuffle_epi8(low_nibble_mask, v),
|
||||
_mm256_shuffle_epi8(
|
||||
high_nibble_mask,
|
||||
_mm256_and_si256(_mm256_srli_epi32(v, 4), _mm256_set1_epi8(0x7f))));
|
||||
#ifdef DEBUG
|
||||
// let us print out the magic:
|
||||
uint8_t buffer[32];
|
||||
_mm256_storeu_si256((__m256i *)buffer,tmp);
|
||||
for(int k = 0; k < 32; k++)
|
||||
printf("%.2x ",buffer[k]);
|
||||
printf("\n");
|
||||
#endif
|
||||
m256 enders_mask = _mm256_set1_epi8(0xe0);
|
||||
m256 tmp_enders = _mm256_cmpeq_epi8(_mm256_and_si256(tmp, enders_mask),
|
||||
_mm256_set1_epi8(0));
|
||||
u32 enders = ~(u32)_mm256_movemask_epi8(tmp_enders);
|
||||
dumpbits32(enders, "ender characters");
|
||||
//dumpbits32_always(enders, "ender characters");
|
||||
|
||||
if (enders == 0) {
|
||||
error_sump = 1;
|
||||
// if enders == 0 we have
|
||||
// a heroically long number string or some garbage
|
||||
}
|
||||
// TODO: make a mask that indicates where our digits are // DANIEL: Isn't that digit_characters?
|
||||
u32 number_mask = ~enders & (enders - 1);
|
||||
dumpbits32(number_mask, "number mask");
|
||||
//dumpbits32_always(number_mask, "number mask");
|
||||
m256 n_mask = _mm256_set1_epi8(0x1f);
|
||||
m256 tmp_n =
|
||||
_mm256_cmpeq_epi8(_mm256_and_si256(tmp, n_mask), _mm256_set1_epi8(0));
|
||||
u32 number_characters = ~(u32)_mm256_movemask_epi8(tmp_n);
|
||||
|
||||
// put something into our error sump if we have something
|
||||
// before our ending characters that isn't a valid character
|
||||
// for the inside of our JSON
|
||||
number_characters &= number_mask;
|
||||
error_sump |= number_characters ^ number_mask;
|
||||
dumpbits32(number_characters, "number characters");
|
||||
|
||||
m256 d_mask = _mm256_set1_epi8(0x03);
|
||||
m256 tmp_d =
|
||||
_mm256_cmpeq_epi8(_mm256_and_si256(tmp, d_mask), _mm256_set1_epi8(0));
|
||||
u32 digit_characters = ~(u32)_mm256_movemask_epi8(tmp_d);
|
||||
digit_characters &= number_mask;
|
||||
dumpbits32(digit_characters, "digit characters");
|
||||
// dumpbits32_always(digit_characters, "digit characters");
|
||||
|
||||
|
||||
m256 p_mask = _mm256_set1_epi8(0x04);
|
||||
m256 tmp_p =
|
||||
_mm256_cmpeq_epi8(_mm256_and_si256(tmp, p_mask), _mm256_set1_epi8(0));
|
||||
u32 decimal_characters = ~(u32)_mm256_movemask_epi8(tmp_p);
|
||||
decimal_characters &= number_mask;
|
||||
dumpbits32(decimal_characters, "decimal characters");
|
||||
|
||||
m256 e_mask = _mm256_set1_epi8(0x08);
|
||||
m256 tmp_e =
|
||||
_mm256_cmpeq_epi8(_mm256_and_si256(tmp, e_mask), _mm256_set1_epi8(0));
|
||||
u32 exponent_characters = ~(u32)_mm256_movemask_epi8(tmp_e);
|
||||
exponent_characters &= number_mask;
|
||||
dumpbits32(exponent_characters, "exponent characters");
|
||||
|
||||
|
||||
m256 zero_mask = _mm256_set1_epi8(0x1);
|
||||
m256 tmp_zero =
|
||||
_mm256_cmpeq_epi8(tmp, zero_mask);
|
||||
u32 zero_characters = (u32)_mm256_movemask_epi8(tmp_zero);
|
||||
dumpbits32(zero_characters, "zero characters");
|
||||
|
||||
// if the zero character is in first position, it
|
||||
// needs to be followed by decimal or exponent or ender (note: we
|
||||
// handle found_minus separately)
|
||||
u32 expo_or_decimal_or_ender = exponent_characters | decimal_characters | enders;
|
||||
error_sump |= zero_characters & 0x01 & (~(expo_or_decimal_or_ender >> 1));
|
||||
|
||||
m256 s_mask = _mm256_set1_epi8(0x10);
|
||||
m256 tmp_s =
|
||||
_mm256_cmpeq_epi8(_mm256_and_si256(tmp, s_mask), _mm256_set1_epi8(0));
|
||||
u32 sign_characters = ~(u32)_mm256_movemask_epi8(tmp_s);
|
||||
sign_characters &= number_mask;
|
||||
dumpbits32(sign_characters, "sign characters");
|
||||
|
||||
u32 digit_edges = ~(digit_characters << 1) & digit_characters;
|
||||
dumpbits32(digit_edges, "digit_edges");
|
||||
|
||||
// check that we have 1-3 'edges' only
|
||||
u32 t = digit_edges;
|
||||
t &= t - 1;
|
||||
t &= t - 1;
|
||||
t &= t - 1;
|
||||
error_sump |= t;
|
||||
|
||||
// check that we start with a digit
|
||||
error_sump |= ~digit_characters & 0x1;
|
||||
|
||||
// having done some checks, get lazy and fall back
|
||||
// to strtoll or strtod
|
||||
// TODO: handle the easy cases ourselves; these are
|
||||
// expensive and we've done a lot of the prepwork.
|
||||
// return errors if strto* fail, otherwise fill in a code on the tape
|
||||
// 'd' for floating point and 'l' for long and put a pointer to the
|
||||
// spot in the buffer.
|
||||
if ( digit_edges == 1) {
|
||||
//if (__builtin_popcount(digit_edges) == 1) { // DANIEL : shouldn't we have digit_edges == 1
|
||||
#define NAIVEINTPARSING // naive means "faster" in this case
|
||||
#ifdef NAIVEINTPARSING
|
||||
// this is faster, maybe, because we use a naive strtoll
|
||||
// should be all digits?
|
||||
error_sump |= number_characters ^ digit_characters;
|
||||
int stringlength = __builtin_ctz(~digit_characters);
|
||||
const char *end = (const char *)src + stringlength;
|
||||
u64 result = naivestrtoll((const char *)src,end);
|
||||
if (found_minus) { // unfortunate that it is a branch?
|
||||
result = -result;
|
||||
}
|
||||
#else
|
||||
// try a strtoll (this is likely slower because it revalidates)
|
||||
char *end;
|
||||
u64 result = strtoll((const char *)src, &end, 10);
|
||||
if ((errno != 0) || (end == (const char *)src)) {
|
||||
error_sump |= 1;
|
||||
}
|
||||
error_sump |= is_not_structural_or_whitespace(*end);
|
||||
if (found_minus) {
|
||||
result = -result;
|
||||
}
|
||||
#endif
|
||||
#ifdef DEBUG
|
||||
cout << "Found number " << result << "\n";
|
||||
#endif
|
||||
*((u64 *)pj.current_number_buf_loc) = result;
|
||||
pj.tape[tape_loc] =
|
||||
((u32)'l') << 24 |
|
||||
(pj.current_number_buf_loc -
|
||||
pj.number_buf); // assume 2^24 will hold all numbers for now
|
||||
pj.current_number_buf_loc += 8;
|
||||
} else {
|
||||
// try a strtod
|
||||
char *end;
|
||||
double result = strtod((const char *)src, &end);
|
||||
if ((errno != 0) || (end == (const char *)src)) {
|
||||
error_sump |= 1;
|
||||
}
|
||||
error_sump |= is_not_structural_or_whitespace(*end);
|
||||
if (found_minus) {
|
||||
result = -result;
|
||||
}
|
||||
#ifdef DEBUG
|
||||
cout << "Found number " << result << "\n";
|
||||
#endif
|
||||
*((double *)pj.current_number_buf_loc) = result;
|
||||
pj.tape[tape_loc] =
|
||||
((u32)'d') << 24 |
|
||||
(pj.current_number_buf_loc -
|
||||
pj.number_buf); // assume 2^24 will hold all numbers for now
|
||||
pj.current_number_buf_loc += 8;
|
||||
}
|
||||
// TODO: check the MSB element is a digit
|
||||
|
||||
// TODO: a whole bunch of checks
|
||||
|
||||
// TODO: <=1 decimal point, eE mark, +- construct
|
||||
|
||||
// TODO: first and last character in mask region must be
|
||||
// digit
|
||||
|
||||
// TODO: if it exists,
|
||||
// Decimal point is after the first cluster of numbers only
|
||||
// and before the second cluster of numbers only. It must
|
||||
// be digit_or_zero . digit_or_zero strictly
|
||||
|
||||
// TODO: eE mark and +- construct are adjacent with eE first
|
||||
// eE mark preceeds final cluster of numbers only
|
||||
// and immediately follows second-last cluster of numbers only (not
|
||||
// necessarily second, as we may have 4e10).
|
||||
// it may suffice to insist that eE is preceeded immediately
|
||||
// by a digit of any kind and that it's followed locally by
|
||||
// a digit immediately or a +- construct then a digit.
|
||||
|
||||
// TODO: if we have both . and the eE mark then the . must
|
||||
// precede the eE mark
|
||||
|
||||
if (error_sump)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool tape_disturbed(u32 i, ParsedJson &pj) {
|
||||
u32 start_loc = i * MAX_TAPE_ENTRIES;
|
||||
u32 end_loc = pj.tape_locs[i];
|
||||
return start_loc != end_loc;
|
||||
}
|
||||
|
||||
bool shovel_machine(const u8 *buf, size_t len, ParsedJson &pj) {
|
||||
// fixup the mess made by the ape_machine
|
||||
// as such it does a bunch of miscellaneous things on the tapes
|
||||
u32 error_sump = 0;
|
||||
u64 tv = *(const u64 *)"true ";
|
||||
u64 nv = *(const u64 *)"null ";
|
||||
u64 fv = *(const u64 *)"false ";
|
||||
u64 mask4 = 0x00000000ffffffff;
|
||||
u64 mask5 = 0x000000ffffffffff;
|
||||
|
||||
// if the tape has been touched at all at the depths outside the safe
|
||||
// zone we need to quit. Note that our periodic checks to see that we're
|
||||
// inside our safe zone in stage 3 don't guarantee that the system did
|
||||
// not get into the danger area briefly.
|
||||
if (tape_disturbed(START_DEPTH - 1, pj) ||
|
||||
tape_disturbed(REDLINE_DEPTH, pj)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// walk over each tape
|
||||
for (u32 i = START_DEPTH; i < MAX_DEPTH; i++) {
|
||||
u32 start_loc = i * MAX_TAPE_ENTRIES;
|
||||
u32 end_loc = pj.tape_locs[i];
|
||||
if (start_loc == end_loc) {
|
||||
break;
|
||||
}
|
||||
for (u32 j = start_loc; j < end_loc; j++) {
|
||||
switch (pj.tape[j] >> 56) {
|
||||
case '{':
|
||||
case '[': {
|
||||
// pivot our tapes
|
||||
// point the enclosing structural char (}]) to the head marker ({[) and
|
||||
// put the end of the sequence on the tape at the head marker
|
||||
// we start with head marker pointing at the enclosing structural char
|
||||
// and the enclosing structural char pointing at the end. Just swap
|
||||
// them. also check the balanced-{} or [] property here
|
||||
u8 head_marker_c = pj.tape[j] >> 56;
|
||||
u32 head_marker_loc = pj.tape[j] & 0xffffffffffffffULL;
|
||||
u64 tape_enclosing = pj.tape[head_marker_loc];
|
||||
u8 enclosing_c = tape_enclosing >> 56;
|
||||
pj.tape[head_marker_loc] = pj.tape[j];
|
||||
pj.tape[j] = tape_enclosing;
|
||||
error_sump |= (enclosing_c - head_marker_c -
|
||||
2); // [] and {} only differ by 2 chars
|
||||
break;
|
||||
}
|
||||
case '"': {
|
||||
error_sump |= !parse_string(buf, len, pj, j);
|
||||
break;
|
||||
}
|
||||
case '1':
|
||||
case '2':
|
||||
case '3':
|
||||
case '4':
|
||||
case '5':
|
||||
case '6':
|
||||
case '7':
|
||||
case '8':
|
||||
case '9':
|
||||
error_sump |= !parse_number(buf, len, pj, j, false, false);
|
||||
break;
|
||||
case '0':
|
||||
error_sump |= !parse_number(buf, len, pj, j, true, false);
|
||||
break;
|
||||
case '-':
|
||||
error_sump |= !parse_number(buf, len, pj, j, false, true);
|
||||
break;
|
||||
case 't': {
|
||||
u32 offset = pj.tape[j] & 0xffffffffffffffULL;
|
||||
const u8 *loc = buf + offset;
|
||||
u64 locval; // we want to avoid unaligned 64-bit loads (undefined in
|
||||
// C/C++)
|
||||
std::memcpy(&locval, loc, sizeof(u64));
|
||||
error_sump |= (locval & mask4) ^ tv;
|
||||
error_sump |= is_not_structural_or_whitespace(loc[4]);
|
||||
break;
|
||||
}
|
||||
case 'f': {
|
||||
u32 offset = pj.tape[j] & 0xffffffffffffffULL;
|
||||
const u8 *loc = buf + offset;
|
||||
u64 locval; // we want to avoid unaligned 64-bit loads (undefined in
|
||||
// C/C++)
|
||||
std::memcpy(&locval, loc, sizeof(u64));
|
||||
error_sump |= (locval & mask5) ^ fv;
|
||||
error_sump |= is_not_structural_or_whitespace(loc[5]);
|
||||
break;
|
||||
}
|
||||
case 'n': {
|
||||
u32 offset = pj.tape[j] & 0xffffffffffffffULL;
|
||||
const u8 *loc = buf + offset;
|
||||
u64 locval; // we want to avoid unaligned 64-bit loads (undefined in
|
||||
// C/C++)
|
||||
std::memcpy(&locval, loc, sizeof(u64));
|
||||
error_sump |= (locval & mask4) ^ nv;
|
||||
error_sump |= is_not_structural_or_whitespace(loc[4]);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
if (error_sump) {
|
||||
return false;
|
||||
}
|
||||
*/
|
||||
return true;
|
||||
}
|
Loading…
Reference in New Issue