From 9f91650e72f50ff6ff2d118d9746674cbcdc0b8d Mon Sep 17 00:00:00 2001 From: Geoff Langdale Date: Wed, 26 Sep 2018 15:22:55 +1000 Subject: [PATCH] Remove old 4-stage path. --- Makefile | 9 +- benchmark/parse.cpp | 45 +- benchmark/parsingcompetition.cpp | 1 - include/jsonparser/jsonparser.h | 5 - include/jsonparser/stage3_ape_machine.h | 7 - include/jsonparser/stage4_shovel_machine.h | 6 - src/jsonparser.cpp | 21 - src/stage3_ape_machine.cpp | 338 ----------- src/stage4_shovel_machine.cpp | 654 --------------------- 9 files changed, 6 insertions(+), 1080 deletions(-) delete mode 100644 include/jsonparser/stage3_ape_machine.h delete mode 100644 include/jsonparser/stage4_shovel_machine.h delete mode 100644 src/stage3_ape_machine.cpp delete mode 100644 src/stage4_shovel_machine.cpp diff --git a/Makefile b/Makefile index 2ccb4ef8..72b976eb 100644 --- a/Makefile +++ b/Makefile @@ -8,11 +8,11 @@ CXXFLAGS = -std=c++11 -g2 -O2 -march=native -Wall -Wextra -Wshadow -Iinclude -Ibenchmark/linux -Idependencies/double-conversion -Idependencies/rapidjson/include -Ldependencies/double-conversion/release LIBFLAGS = -ldouble-conversion -EXECUTABLES=parse jsoncheck minifiercompetition parsingcompetition parseunified +EXECUTABLES=parse jsoncheck minifiercompetition parsingcompetition DOUBLEEXECUTABLES=parsedouble jsoncheckdouble parsingcompetitiondouble -HEADERS=include/jsonparser/jsonparser.h include/jsonparser/common_defs.h include/jsonparser/jsonioutil.h benchmark/benchmark.h benchmark/linux/linux-perf-events.h include/jsonparser/simdjson_internal.h include/jsonparser/stage1_find_marks.h include/jsonparser/stage2_flatten.h include/jsonparser/stage3_ape_machine.h include/jsonparser/stage4_shovel_machine.h include/jsonparser/stage34_unified.h -LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/stage1_find_marks.cpp src/stage2_flatten.cpp src/stage3_ape_machine.cpp src/stage4_shovel_machine.cpp src/stage34_unified.cpp +HEADERS=include/jsonparser/jsonparser.h include/jsonparser/common_defs.h include/jsonparser/jsonioutil.h benchmark/benchmark.h benchmark/linux/linux-perf-events.h include/jsonparser/simdjson_internal.h include/jsonparser/stage1_find_marks.h include/jsonparser/stage2_flatten.h include/jsonparser/stage34_unified.h +LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/stage1_find_marks.cpp src/stage2_flatten.cpp src/stage34_unified.cpp MINIFIERHEADERS=include/jsonparser/jsonminifier.h include/jsonparser/simdprune_tables.h MINIFIERLIBFILES=src/jsonminifier.cpp @@ -39,9 +39,6 @@ bench: benchmarks/bench.cpp $(RAPIDJSON_INCLUDE) $(HEADERS) $(CXX) -std=c++11 -O3 -o $@ benchmarks/bench.cpp -I$(RAPIDJSON_INCLUDE) -Iinclude -march=native -lm -Wall -Wextra -Wno-narrowing -parseunified: benchmark/parse.cpp $(HEADERS) $(LIBFILES) - $(CXX) $(CXXFLAGS) -o parseunified $(LIBFILES) benchmark/parse.cpp $(LIBFLAGS) -DTEST_UNIFIED - parse: benchmark/parse.cpp $(HEADERS) $(LIBFILES) $(CXX) $(CXXFLAGS) -o parse $(LIBFILES) benchmark/parse.cpp $(LIBFLAGS) diff --git a/benchmark/parse.cpp b/benchmark/parse.cpp index 2a8aeb94..22dc0968 100644 --- a/benchmark/parse.cpp +++ b/benchmark/parse.cpp @@ -22,8 +22,6 @@ #include #include -//#define TEST_UNIFIED - /// Fixme: enable doube conv // #define DOUBLECONV #ifdef DOUBLECONV @@ -39,8 +37,6 @@ using namespace double_conversion; #include "jsonparser/simdjson_internal.h" #include "jsonparser/stage1_find_marks.h" #include "jsonparser/stage2_flatten.h" -#include "jsonparser/stage3_ape_machine.h" -#include "jsonparser/stage4_shovel_machine.h" #include "jsonparser/stage34_unified.h" using namespace std; @@ -129,7 +125,6 @@ int main(int argc, char *argv[]) { cerr << "Currently only support JSON files < 16MB\n"; exit(1); } - init_state_machine(); pj.n_structural_indexes = 0; // we have potentially 1 structure per byte of input @@ -159,8 +154,8 @@ int main(int argc, char *argv[]) { LinuxEvents unified(evts); vector results; results.resize(evts.size()); - unsigned long cy1 = 0, cy2 = 0, cy3 = 0, cy4 = 0; - unsigned long cl1 = 0, cl2 = 0, cl3 = 0, cl4 = 0; + unsigned long cy1 = 0, cy2 = 0, cy3 = 0; + unsigned long cl1 = 0, cl2 = 0, cl3 = 0; #endif bool isok = true; for (u32 i = 0; i < iterations; i++) { @@ -191,31 +186,6 @@ int main(int argc, char *argv[]) { unified.start(); #endif -#ifndef TEST_UNIFIED - - isok = ape_machine(p.first, p.second, pj); -#ifndef SQUASH_COUNTERS - unified.end(results); - cy3 += results[0]; - cl3 += results[1]; - if (!isok) { - cout << "Failed out during stage 3\n"; - break; - } - unified.start(); -#endif - isok = shovel_machine(p.first, p.second, pj); -#ifndef SQUASH_COUNTERS - unified.end(results); - cy4 += results[0]; - cl4 += results[1]; -#endif - if (!isok) { - cout << "Failed out during stage 4\n"; - break; - } -#else - isok = unified_machine(p.first, p.second, pj); #ifndef SQUASH_COUNTERS unified.end(results); @@ -227,7 +197,6 @@ int main(int argc, char *argv[]) { } #endif -#endif auto end = std::chrono::steady_clock::now(); std::chrono::duration secs = end - start; res[i] = secs.count(); @@ -237,7 +206,7 @@ int main(int argc, char *argv[]) { printf("number of bytes %ld number of structural chars %d ratio %.3f\n", p.second, pj.n_structural_indexes, (double)pj.n_structural_indexes / p.second); - unsigned long total = cy1 + cy2 + cy3 + cy4; + unsigned long total = cy1 + cy2 + cy3; printf( "stage 1 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: %.2f \n", @@ -261,14 +230,6 @@ int main(int argc, char *argv[]) { printf("%.2f cycles per structural character.\n", (double)cy3 / (iterations * pj.n_structural_indexes)); - printf( - "stage 4 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: %.2f \n", - cl4, cy4, 100. * cy4 / total, (double)cl4 / cy4); - printf(" stage 4 runs at %.2f cycles per input byte and ", - (double)cy4 / (iterations * p.second)); - printf("%.2f cycles per structural character.\n", - (double)cy4 / (iterations * pj.n_structural_indexes)); - printf(" all stages: %.2f cycles per input byte.\n", (double)total / (iterations * p.second)); #endif diff --git a/benchmark/parsingcompetition.cpp b/benchmark/parsingcompetition.cpp index ba4dad71..dca069e3 100644 --- a/benchmark/parsingcompetition.cpp +++ b/benchmark/parsingcompetition.cpp @@ -46,7 +46,6 @@ int main(int argc, char *argv[]) { int repeat = 10; int volume = p.second; BEST_TIME(json_parse(p.first, p.second, pj), true, , repeat, volume, true); - BEST_TIME(json_parse_4stages(p.first, p.second, pj), true, , repeat, volume, true); rapidjson::Document d; diff --git a/include/jsonparser/jsonparser.h b/include/jsonparser/jsonparser.h index 33d5d0db..6b138e24 100644 --- a/include/jsonparser/jsonparser.h +++ b/include/jsonparser/jsonparser.h @@ -5,8 +5,6 @@ #include "simdjson_internal.h" #include "stage1_find_marks.h" #include "stage2_flatten.h" -#include "stage3_ape_machine.h" -#include "stage4_shovel_machine.h" #include "stage34_unified.h" // Allocate a ParsedJson structure that can support document @@ -22,6 +20,3 @@ void deallocate_ParsedJson(ParsedJson *pj_ptr); // Parse a document found in buf, need to preallocate ParsedJson. // Return false in case of a failure. bool json_parse(const u8 *buf, size_t len, ParsedJson &pj); - -// like json_parse but users 4 stages, slower. -bool json_parse_4stages(const u8 *buf, size_t len, ParsedJson &pj); diff --git a/include/jsonparser/stage3_ape_machine.h b/include/jsonparser/stage3_ape_machine.h deleted file mode 100644 index 4afd1a0b..00000000 --- a/include/jsonparser/stage3_ape_machine.h +++ /dev/null @@ -1,7 +0,0 @@ -#pragma once - -#include "common_defs.h" -#include "simdjson_internal.h" - -void init_state_machine(); -bool ape_machine(const u8 *buf, size_t len, ParsedJson &pj); diff --git a/include/jsonparser/stage4_shovel_machine.h b/include/jsonparser/stage4_shovel_machine.h deleted file mode 100644 index 44132f1d..00000000 --- a/include/jsonparser/stage4_shovel_machine.h +++ /dev/null @@ -1,6 +0,0 @@ -#pragma once - -#include "common_defs.h" -#include "simdjson_internal.h" - -bool shovel_machine(const u8 *buf, size_t len, ParsedJson &pj); diff --git a/src/jsonparser.cpp b/src/jsonparser.cpp index def9f436..9ad4daea 100644 --- a/src/jsonparser.cpp +++ b/src/jsonparser.cpp @@ -44,27 +44,6 @@ void deallocate_ParsedJson(ParsedJson *pj_ptr) { delete pj_ptr; } -// parse a document found in buf, need to preallocate ParsedJson. -// this can probably be considered a legacy function at this point. -bool json_parse_4stages(const u8 *buf, size_t len, ParsedJson &pj) { - if (pj.bytecapacity < len) { - std::cerr << "Your ParsedJson cannot support documents that big: " << len - << std::endl; - return false; - } - bool isok = find_structural_bits(buf, len, pj); - if (isok) { - isok = flatten_indexes(len, pj); - } - if (isok) { - isok = ape_machine(buf, len, pj); - } - if (isok) { - isok = shovel_machine(buf, len, pj); - } - return isok; -} - // parse a document found in buf, need to preallocate ParsedJson. bool json_parse(const u8 *buf, size_t len, ParsedJson &pj) { if (pj.bytecapacity < len) { diff --git a/src/stage3_ape_machine.cpp b/src/stage3_ape_machine.cpp deleted file mode 100644 index 592e71ed..00000000 --- a/src/stage3_ape_machine.cpp +++ /dev/null @@ -1,338 +0,0 @@ -#ifdef _MSC_VER -/* Microsoft C/C++-compatible compiler */ -#include -#else -#include -#include -#endif - -#include -#include - -#include "jsonparser/common_defs.h" -#include "jsonparser/simdjson_internal.h" - -// the ape machine consists of two parts: -// -// 1) The "state machine", which is a multiple channel per-level state machine -// It is a conventional DFA except in that it 'changes track' on {}[] -// characters -// -// 2) The "tape machine": this records offsets of various structures as they go -// by -// These structures are either u32 offsets of other tapes or u32 offsets into -// our input or structures. -// -// The state machine doesn't record ouput. -// The tape machine doesn't validate. -// -// The output of the tape machine is meaningful only if the state machine is in -// non-error states. - -// depth adjustment is strictly based on whether we are {[ or }] - -// depth adjustment is a pre-increment which, in effect, means that a {[ -// contained in an object is in the level one deeper, while the corresponding }] -// is at the level - -// TAPE MACHINE DEFINITIONS - -const u32 DEPTH_PLUS_ONE = 0x01000000; -const u32 DEPTH_ZERO = 0x00000000; -const u32 DEPTH_MINUS_ONE = 0xff000000; -const u32 WRITE_ZERO = 0x0; -const u32 WRITE_FOUR = 0x1; - -const u32 CDF = DEPTH_ZERO | WRITE_ZERO; // default 'control' -const u32 C04 = DEPTH_ZERO | WRITE_FOUR; -const u32 CP4 = DEPTH_PLUS_ONE | WRITE_FOUR; -const u32 CM4 = DEPTH_MINUS_ONE | WRITE_FOUR; - -inline s8 get_depth_adjust(u32 control) { return (s8)(((s32)control) >> 24); } -inline size_t get_write_size(u32 control) { return control & 0xff; } - -const u32 char_control[256] = { - // nothing interesting from 0x00-0x20 - CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, - CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, - CDF, CDF, - - // " is 0x22, - is 0x2d - CDF, CDF, C04, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, C04, CDF, - CDF, - - // numbers are 0x30-0x39 - C04, C04, C04, C04, C04, C04, C04, C04, C04, C04, CDF, CDF, CDF, CDF, CDF, - CDF, - - // nothing interesting from 0x40-0x49 - CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, - CDF, - - // 0x5b/5d are [] - CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CP4, CDF, CM4, CDF, - CDF, - - // f is 0x66 n is 0x6e - CDF, CDF, CDF, CDF, CDF, CDF, C04, CDF, CDF, CDF, CDF, CDF, CDF, CDF, C04, - CDF, - - // 0x7b/7d are {}, 74 is t - CDF, CDF, CDF, CDF, C04, CDF, CDF, CDF, CDF, CDF, CDF, CP4, CDF, CM4, CDF, - CDF, - - // nothing interesting from 0x80-0xff - CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, - CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, - CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, - CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, - CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, - CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, - CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, - CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF, - CDF, CDF, CDF, CDF, CDF, CDF, CDF, CDF}; - -// all of this stuff needs to get moved somewhere reasonable -// like our ParsedJson structure -/* -u64 tape[MAX_TAPE]; -u32 tape_locs[MAX_DEPTH]; -u8 string_buf[512*1024]; -u8 * current_string_buf_loc; -u8 number_buf[512*1024]; // holds either doubles or longs, really -u8 * current_number_buf_loc; -*/ - -// STATE MACHINE DECLARATIONS -const u32 MAX_STATES = 16; - -/** - * It is annoying to have to call init_state_machine each time. - * Better to precompute the (small) result into a header file. - */ -// u32 trans[MAX_STATES][256]; -#include "jsonparser/transitions.h" - -u32 states[MAX_DEPTH]; -const int START_STATE = 1; - -u32 valid_end_states[MAX_STATES] = { - 0, // 0 state is by definition an error - 1, // ok to still be in start state - 1, // state 2: we've seen an { - if we left this level it's ok - 0, // state 3 is abolished, we shouldn't be in it - - 0, // state 4 means we saw a string inside an object. We can't end like - // this! - 0, // similarly state 5 means we saw a string followed by a colon. - 0, // state 6 is abolished - 1, // it's ok to finish on 7 - - 0, // state 8 we've seen a comma inside an object - can't finish here - 1, // state 9 is like state 2 only for arrays, so ok - 0, // state 10 abolished - 1, // state 11 is ok to finish on, we just saw a unary inside a array - - 0, // state 12 we've just seen a comma inside an array - can't finish - 0, // state 13 is our weird start state. I think we shouldn't end on it as - // we need to see something - 1, // state 14 is ok. Its an error to see something *more* here but not to - // be in this state - 0, // we don't use state 15 -}; - -// weird sub-machine for starting depth only -// we start at 13 and go to 14 on a single UNARY -// 14 doesn't have to have any transitions. Anything -// else arrives after the single thing it's an error -const int START_DEPTH_START_STATE = 13; - -// ANYTHING_IS_ERROR_STATE is useful both as a target -// for a transition at the start depth and also as -// a good initial value for "red line" depths; that -// is, depths that are maintained strictly to avoid -// undefined behavior (e.g. depths below the starting -// depth). -const int ANYTHING_IS_ERROR_STATE = 14; - -void init_state_machine() { - // states 10 and 6 eliminated - - trans[1][(int)'{'] = 2; - trans[2][(int)'"'] = 4; - trans[4][(int)':'] = 5; - // 5->7 on all values ftn0123456789-" - trans[7][(int)','] = 8; - trans[8][(int)'"'] = 4; - - trans[1][(int)'['] = 9; - // 9->11 on all values ftn0123456789-" - trans[11][(int)','] = 12; - // 12->11 on all values ftn0123456789-" - - const char *UNARIES = "}]ftn0123456789-\""; - for (u32 i = 0; i < strlen(UNARIES); i++) { - trans[5][(u32)UNARIES[i]] = 7; - trans[9][(u32)UNARIES[i]] = 11; - trans[12][(u32)UNARIES[i]] = 11; -#ifdef PERMIT_RANDOM_UNARIES_AT_TOP_LEVEL - // NOTE: if we permit JSON documents that - // contain a single number or string, then we - // allow all the unaries at the top level - trans[13][(u32)UNARIES[i]] = 14; -#endif - } - -#ifndef PERMIT_RANDOM_UNARIES_AT_TOP_LEVEL - // NOTE: if we don't permit JSON documents that - // that contain a single number or string, we must - // make sure we accept the top-level closing braces - // that are delivered to the start depth only - trans[13][(int)'}'] = 14; - trans[13][(int)']'] = 14; -#endif - - // back transitions when new things are open - trans[2][(int)'{'] = 2; - trans[7][(int)'{'] = 2; - trans[9][(int)'{'] = 2; - trans[11][(int)'{'] = 2; - trans[2][(int)'['] = 9; - trans[7][(int)'['] = 9; - trans[9][(int)'['] = 9; - trans[11][(int)'['] = 9; -} - -bool ape_machine(const u8 *buf, UNUSED size_t len, ParsedJson &pj) { - - // NOTE - our depth is used by both the tape machine and the state machine - // Further, in production we will set it to a largish value in a generous - // buffer as a rogue input could consist of many {[ characters or many }] - // characters. We aren't busily checking errors (and in fact, a aggressive - // sequence of [ characters is actually valid input!) so something that blows - // out maximum depth will need to be periodically checked for, as will - // something that tries to set depth very low. If we set our starting depth, - // say, to 256, we can tolerate 256 bogus close brace characters without - // aggressively going wrong and writing to bad memory Note that any specious - // depth can have a specious tape associated with and all these specious - // depths can share a region of the tape - it's harmless. Since tape is - // one-way, any movement in a specious tape is an error (so we can detect - // max_depth violations by making sure that specious tape locations haven't - // moved from their starting values) - - u32 depth = START_DEPTH; - - for (u32 i = 0; i < MAX_DEPTH; i++) { - pj.tape_locs[i] = i * MAX_TAPE_ENTRIES; - if (i == START_DEPTH) { - states[i] = START_DEPTH_START_STATE; - } else if ((i < START_DEPTH) || (i >= REDLINE_DEPTH)) { - states[i] = ANYTHING_IS_ERROR_STATE; - } else { - states[i] = START_STATE; - } - } - - pj.current_string_buf_loc = pj.string_buf; - pj.current_number_buf_loc = pj.number_buf; - - u32 error_sump = 0; - u32 old_tape_loc = pj.tape_locs[depth]; // need to initialize for first write - - u32 next_idx = pj.structural_indexes[0]; - u8 next_c = buf[next_idx]; - u32 next_control = char_control[next_c]; - - for (u32 i = 0; i < pj.n_structural_indexes; i++) { - - // very periodic safety checking. This does NOT guarantee that we - // haven't been in our dangerous zones above or below our normal - // depths. It ONLY checks to be sure that we don't manage to leave - // these zones and write completely off our tape. - if (!(i % DEPTH_SAFETY_MARGIN)) { - if (depth < START_DEPTH || depth >= REDLINE_DEPTH) { - error_sump |= 1; - break; - } - } - - u32 idx = next_idx; - u8 c = next_c; - u32 control = next_control; - - next_idx = pj.structural_indexes[i + 1]; - next_c = buf[next_idx]; - next_control = char_control[next_c]; - - // TAPE MACHINE - s8 depth_adjust = get_depth_adjust(control); - u8 write_size = get_write_size(control); - u32 write_val = (depth_adjust != 0) ? old_tape_loc : idx; - depth += depth_adjust; -#ifdef DEBUG - cout << "i: " << i << " idx: " << idx << " c " << c << "\n"; - cout << "TAPE MACHINE: depth change " << (s32)depth_adjust << " write_size " - << (u32)write_size << " current_depth: " << depth << "\n"; -#endif - - // STATE MACHINE - hoisted here to fill in during the tape machine's - // latencies -#ifdef DEBUG - cout << "STATE MACHINE: state[depth] pre " << states[depth] << " "; -#endif - states[depth] = trans[states[depth]][c]; -#ifdef DEBUG - cout << "post " << states[depth] << "\n"; -#endif - // TAPE MACHINE, again - pj.tape[pj.tape_locs[depth]] = write_val | (((u64)c) << 56); - old_tape_loc = pj.tape_locs[depth] += write_size; - } - - if (depth != START_DEPTH) { - // We haven't returned to our start depth, so our braces can't possibly - // match Note this doesn't exclude the possibility that we have improperly - // matched { } or [] pairs - return false; - } - - for (u32 i = 0; i < MAX_DEPTH; i++) { - if (!valid_end_states[states[i]]) { -#ifdef DEBUG - printf("Invalid ending state: states[%d] == %d\n", states[i]); -#endif - return false; - } - } - -#define DUMP_TAPES -#ifdef DEBUG - for (u32 i = 0; i < MAX_DEPTH; i++) { - u32 start_loc = i * MAX_TAPE_ENTRIES; - cout << " tape section i " << i; - if (i == START_DEPTH) { - cout << " (START) "; - } else if ((i < START_DEPTH) || (i >= REDLINE_DEPTH)) { - cout << " (REDLINE) "; - } else { - cout << " (NORMAL) "; - } - - cout << " from: " << start_loc << " to: " << tape_locs[i] << " " - << " size: " << (tape_locs[i] - start_loc) << "\n"; - cout << " state: " << states[i] << "\n"; -#ifdef DUMP_TAPES - for (u32 j = start_loc; j < tape_locs[i]; j++) { - if (tape[j]) { - cout << "j: " << j << " tape[j] char " << (char)(tape[j] >> 56) - << " tape[j][0..55]: " << (tape[j] & 0xffffffffffffffULL) << "\n"; - } - } -#endif - } -#endif - if (error_sump) { - return false; - } - return true; -} diff --git a/src/stage4_shovel_machine.cpp b/src/stage4_shovel_machine.cpp deleted file mode 100644 index 2048718a..00000000 --- a/src/stage4_shovel_machine.cpp +++ /dev/null @@ -1,654 +0,0 @@ -#ifdef _MSC_VER -/* Microsoft C/C++-compatible compiler */ -#include -#else -#include -#include -#endif - -#include -#include - -#include "jsonparser/common_defs.h" -#include "jsonparser/simdjson_internal.h" - -// they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c -// these go into the first 3 buckets of the comparison (1/2/4) - -// we are also interested in the four whitespace characters -// space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d - -const u32 structural_or_whitespace_negated[256] = { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, - - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, - - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; - -// return non-zero if not a structural or whitespace char -// zero otherwise -really_inline u32 is_not_structural_or_whitespace(u8 c) { - return structural_or_whitespace_negated[c]; -} - -// These chars yield themselves: " \ / -// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab -// u not handled in this table as it's complex -const u8 escape_map[256] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. - 0, 0, 0x08, 0, 0, 0, 0x12, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. - 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -const u32 leading_zeros_to_utf_bytes[33] = { - 1, 1, 1, 1, 1, 1, 1, 1, // 7 bits for first one - 2, 2, 2, 2, // 11 bits for next - 3, 3, 3, 3, 3, // 16 bits for next - 4, 4, 4, 4, 4, // 21 bits for next - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; // error - -const u32 UTF_PDEP_MASK[5] = {0x00, // error - 0x7f, 0x1f3f, 0x0f3f3f, 0x073f3f3f}; - -const u32 UTF_OR_MASK[5] = {0x00, // error - 0x00, 0xc080, 0xe08080, 0xf0808080}; - -bool is_hex_digit(u8 v) { - if (v >= '0' && v <= '9') - return true; - v &= 0xdf; - if (v >= 'A' && v <= 'F') - return true; - return false; -} - -u8 digit_to_val(u8 v) { - if (v >= '0' && v <= '9') - return v - '0'; - v &= 0xdf; - return v - 'A' + 10; -} - -bool hex_to_u32(const u8 *src, u32 *res) { - u8 v1 = src[0]; - u8 v2 = src[1]; - u8 v3 = src[2]; - u8 v4 = src[3]; - if (!is_hex_digit(v1) || !is_hex_digit(v2) || !is_hex_digit(v3) || - !is_hex_digit(v4)) { - return false; - } - *res = digit_to_val(v1) << 24 | digit_to_val(v2) << 16 | - digit_to_val(v3) << 8 | digit_to_val(v4); - return true; -} - -// handle a unicode codepoint -// write appropriate values into dest -// src will always advance 6 bytes -// dest will advance a variable amount (return via pointer) -// return true if the unicode codepoint was valid -// We work in little-endian then swap at write time -really_inline bool handle_unicode_codepoint(const u8 **src_ptr, u8 **dst_ptr) { - u32 code_point = 0; // read the hex, potentially reading another \u beyond if - // it's a // wacky one - if (!hex_to_u32(*src_ptr + 2, &code_point)) { - return false; - } - *src_ptr += 6; - // check for the weirdo double-UTF-16 nonsense for things outside Basic - // Multilingual Plane. - if (code_point >= 0xd800 && code_point < 0xdc00) { - // TODO: sanity check and clean up; snippeted from RapidJSON and poorly - // understood at the moment - if (((*src_ptr)[0] != '\\') || (*src_ptr)[1] != 'u') { - return false; - } - u32 code_point_2 = 0; - if (!hex_to_u32(*src_ptr + 2, &code_point_2)) { - return false; - } - if (code_point_2 < 0xdc00 || code_point_2 > 0xdfff) { - return false; - } - code_point = - (((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000; - *src_ptr += 6; - } - // TODO: check to see whether the below code is nonsense (it's really only a - // sketch at this point) - u32 lz = __builtin_clz(code_point); - u32 utf_bytes = leading_zeros_to_utf_bytes[lz]; - u32 tmp = - _pdep_u32(code_point, UTF_PDEP_MASK[utf_bytes]) | UTF_OR_MASK[utf_bytes]; - // swap and move to the other side of the register - tmp = __builtin_bswap32(tmp); - tmp >>= ((4 - utf_bytes) * 8) & 31; // if utf_bytes, this could become a shift - // by 32, hence the mask with 31 - // use memcpy to avoid undefined behavior: - std::memcpy(*(u32 **)dst_ptr, &tmp, sizeof(u32)); //**(u32 **)dst_ptr = tmp; - *dst_ptr += utf_bytes; - return true; -} - -really_inline bool parse_string(const u8 *buf, UNUSED size_t len, - ParsedJson &pj, u32 tape_loc) { - u32 offset = pj.tape[tape_loc] & 0xffffff; - const u8 *src = &buf[offset + 1]; // we know that buf at offset is a " - u8 *dst = pj.current_string_buf_loc; -#ifdef DEBUG - cout << "Entering parse string with offset " << offset << "\n"; -#endif - // basic non-sexy parsing code - while (1) { -#ifdef DEBUG - for (u32 j = 0; j < 32; j++) { - char c = *(src + j); - if (isprint(c)) { - cout << c; - } else { - cout << '_'; - } - } - cout << "| ... string handling input\n"; -#endif - m256 v = _mm256_loadu_si256((const m256 *)(src)); - u32 bs_bits = - (u32)_mm256_movemask_epi8(_mm256_cmpeq_epi8(v, _mm256_set1_epi8('\\'))); - dumpbits32(bs_bits, "backslash bits 2"); - u32 quote_bits = - (u32)_mm256_movemask_epi8(_mm256_cmpeq_epi8(v, _mm256_set1_epi8('"'))); - dumpbits32(quote_bits, "quote_bits"); - u32 quote_dist = __builtin_ctz(quote_bits); - u32 bs_dist = __builtin_ctz(bs_bits); - // store to dest unconditionally - we can overwrite the bits we don't like - // later - _mm256_storeu_si256((m256 *)(dst), v); -#ifdef DEBUG - cout << "quote dist: " << quote_dist << " bs dist: " << bs_dist << "\n"; -#endif - - if (quote_dist < bs_dist) { -#ifdef DEBUG - cout << "Found end, leaving!\n"; -#endif - // we encountered quotes first. Move dst to point to quotes and exit - dst[quote_dist] = 0; // null terminate and get out - pj.current_string_buf_loc = dst + quote_dist + 1; - pj.tape[tape_loc] = - ((u32)'"') << 24 | - (pj.current_string_buf_loc - - pj.string_buf); // assume 2^24 will hold all strings for now - return true; - } else if (quote_dist > bs_dist) { - u8 escape_char = src[bs_dist + 1]; -#ifdef DEBUG - cout << "Found escape char: " << escape_char << "\n"; -#endif - // we encountered backslash first. Handle backslash - if (escape_char == 'u') { - // move src/dst up to the start; they will be further adjusted - // within the unicode codepoint handling code. - src += bs_dist; - dst += bs_dist; - if (!handle_unicode_codepoint(&src, &dst)) { - return false; - } - return true; - } else { - // simple 1:1 conversion. Will eat bs_dist+2 characters in input and - // write bs_dist+1 characters to output - // note this may reach beyond the part of the buffer we've actually - // seen. I think this is ok - u8 escape_result = escape_map[escape_char]; - if (!escape_result) - return false; // bogus escape value is an error - dst[bs_dist] = escape_result; - src += bs_dist + 2; - dst += bs_dist + 1; - } - } else { - // they are the same. Since they can't co-occur, it means we encountered - // neither. - src += 32; - dst += 32; - } - return true; - } - // later extensions - - // if \\ we could detect whether it's a substantial run of \ or just eat 2 - // chars and write 1 handle anything short of \u or \\\ (as a prefix) with - // clever PSHUFB stuff and don't leave SIMD - return true; -} - -#ifdef DOUBLECONV -#include "double-conversion/double-conversion.h" -#include "double-conversion/ieee.h" -using namespace double_conversion; -static StringToDoubleConverter - converter(StringToDoubleConverter::ALLOW_TRAILING_JUNK, 2000000.0, - Double::NaN(), NULL, NULL); -#endif - - -// does not validation whatsoever, assumes that all digit -// this is CS 101 -u64 naivestrtoll(const char *p, const char *end) { - if(p == end) return 0; // should be an error? - // this code could get a whole lot smarter if we have many long ints: - // e.g., see http://0x80.pl/articles/simd-parsing-int-sequences.html - u64 x = *p - '0'; - p++; - for(;p < end;p++) { - x = (x*10) + (*p - '0'); - } - return x; -} -// put a parsed version of number (either as a double or a signed long) into the -// number buffer, put a 'tag' indicating which type and where it is back onto -// the tape at that location return false if we can't parse the number which -// means either (a) the number isn't valid, or (b) the number is followed by -// something that isn't whitespace, comma or a close }] character which are the -// only things that should follow a number at this stage bools to detect what we -// found in our initial character already here - we are already switching on 0 -// vs 1-9 vs - so we may as well keep separate paths where that's useful - -// TODO: see if we really need a separate number_buf or whether we should just -// have a generic scratch - would need to align before using for this -really_inline bool parse_number(const u8 *buf, UNUSED size_t len, - UNUSED ParsedJson &pj, u32 tape_loc, - UNUSED bool found_zero, bool found_minus) { - u32 offset = pj.tape[tape_loc] & 0xffffff; -//////////////// -// This is temporary... but it illustrates how one could use Google's double -// conv. -/// -#ifdef DOUBLECONV - // Maybe surprisingly, StringToDouble does not parse according to the JSON - // spec (e.g., it will happily parse 012 as 12). - int processed_characters_count; - double result_double_conv = converter.StringToDouble( - (const char *)(buf + offset), 10, &processed_characters_count); - *((double *)pj.current_number_buf_loc) = result_double_conv; - pj.tape[tape_loc] = - ((u32)'d') << 24 | - (pj.current_number_buf_loc - - pj.number_buf); // assume 2^24 will hold all numbers for now - pj.current_number_buf_loc += 8; - return result_double_conv == result_double_conv; -#endif - //////////////// - // end of double conv temporary stuff. - //////////////// - if (found_minus) { - offset++; - } - const u8 *src = &buf[offset]; - m256 v = _mm256_loadu_si256((const m256 *)(src)); - u64 error_sump = 0; -#ifdef DEBUG - for (u32 j = 0; j < 32; j++) { - char c = *(src + j); - if (isprint(c)) { - cout << c; - } else { - cout << '_'; - } - } - cout << "| ... number handling input\n"; -#endif - - // categories to extract - // Digits: - // 0 (0x30) - bucket 0 - // 1-9 (never any distinction except if we didn't get the free kick at 0 due - // to the leading minus) (0x31-0x39) - bucket 1 - // . (0x2e) - bucket 2 - // E or e - no distinction (0x45/0x65) - bucket 3 - // + (0x2b) - bucket 4 - // - (0x2d) - bucket 4 - // Terminators - // Whitespace: 0x20, 0x09, 0x0a, 0x0d - bucket 5+6 - // Comma and the closes: 0x2c is comma, } is 0x5d, ] is 0x7d - bucket 5+7 - - // Another shufti - also a bit hand-hacked. Need to make a better construction - const m256 low_nibble_mask = _mm256_setr_epi8( - // 0 1 2 3 4 5 6 7 8 9 a b c d e f - 33, 2, 2, 2, 2, 10, 2, 2, 2, 66, 64, 16, 32, 0xd0, 4, 0, 33, 2, 2, 2, 2, - 10, 2, 2, 2, 66, 64, 16, 32, 0xd0, 4, 0); - const m256 high_nibble_mask = _mm256_setr_epi8( - // 0 1 2 3 4 5 6 7 8 9 a b c d e f - 64, 0, 52, 3, 8, -128, 8, 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 52, 3, 8, - -128, 8, 0x80, 0, 0, 0, 0, 0, 0, 0, 0); - - m256 tmp = _mm256_and_si256( - _mm256_shuffle_epi8(low_nibble_mask, v), - _mm256_shuffle_epi8( - high_nibble_mask, - _mm256_and_si256(_mm256_srli_epi32(v, 4), _mm256_set1_epi8(0x7f)))); -#ifdef DEBUG - // let us print out the magic: - uint8_t buffer[32]; - _mm256_storeu_si256((__m256i *)buffer,tmp); - for(int k = 0; k < 32; k++) - printf("%.2x ",buffer[k]); - printf("\n"); -#endif - m256 enders_mask = _mm256_set1_epi8(0xe0); - m256 tmp_enders = _mm256_cmpeq_epi8(_mm256_and_si256(tmp, enders_mask), - _mm256_set1_epi8(0)); - u32 enders = ~(u32)_mm256_movemask_epi8(tmp_enders); - dumpbits32(enders, "ender characters"); -//dumpbits32_always(enders, "ender characters"); - - if (enders == 0) { - error_sump = 1; - // if enders == 0 we have - // a heroically long number string or some garbage - } - // TODO: make a mask that indicates where our digits are // DANIEL: Isn't that digit_characters? - u32 number_mask = ~enders & (enders - 1); - dumpbits32(number_mask, "number mask"); -//dumpbits32_always(number_mask, "number mask"); - m256 n_mask = _mm256_set1_epi8(0x1f); - m256 tmp_n = - _mm256_cmpeq_epi8(_mm256_and_si256(tmp, n_mask), _mm256_set1_epi8(0)); - u32 number_characters = ~(u32)_mm256_movemask_epi8(tmp_n); - - // put something into our error sump if we have something - // before our ending characters that isn't a valid character - // for the inside of our JSON - number_characters &= number_mask; - error_sump |= number_characters ^ number_mask; - dumpbits32(number_characters, "number characters"); - - m256 d_mask = _mm256_set1_epi8(0x03); - m256 tmp_d = - _mm256_cmpeq_epi8(_mm256_and_si256(tmp, d_mask), _mm256_set1_epi8(0)); - u32 digit_characters = ~(u32)_mm256_movemask_epi8(tmp_d); - digit_characters &= number_mask; - dumpbits32(digit_characters, "digit characters"); - // dumpbits32_always(digit_characters, "digit characters"); - - - m256 p_mask = _mm256_set1_epi8(0x04); - m256 tmp_p = - _mm256_cmpeq_epi8(_mm256_and_si256(tmp, p_mask), _mm256_set1_epi8(0)); - u32 decimal_characters = ~(u32)_mm256_movemask_epi8(tmp_p); - decimal_characters &= number_mask; - dumpbits32(decimal_characters, "decimal characters"); - - m256 e_mask = _mm256_set1_epi8(0x08); - m256 tmp_e = - _mm256_cmpeq_epi8(_mm256_and_si256(tmp, e_mask), _mm256_set1_epi8(0)); - u32 exponent_characters = ~(u32)_mm256_movemask_epi8(tmp_e); - exponent_characters &= number_mask; - dumpbits32(exponent_characters, "exponent characters"); - - - m256 zero_mask = _mm256_set1_epi8(0x1); - m256 tmp_zero = - _mm256_cmpeq_epi8(tmp, zero_mask); - u32 zero_characters = (u32)_mm256_movemask_epi8(tmp_zero); - dumpbits32(zero_characters, "zero characters"); - - // if the zero character is in first position, it - // needs to be followed by decimal or exponent or ender (note: we - // handle found_minus separately) - u32 expo_or_decimal_or_ender = exponent_characters | decimal_characters | enders; - error_sump |= zero_characters & 0x01 & (~(expo_or_decimal_or_ender >> 1)); - - m256 s_mask = _mm256_set1_epi8(0x10); - m256 tmp_s = - _mm256_cmpeq_epi8(_mm256_and_si256(tmp, s_mask), _mm256_set1_epi8(0)); - u32 sign_characters = ~(u32)_mm256_movemask_epi8(tmp_s); - sign_characters &= number_mask; - dumpbits32(sign_characters, "sign characters"); - - u32 digit_edges = ~(digit_characters << 1) & digit_characters; - dumpbits32(digit_edges, "digit_edges"); - - // check that we have 1-3 'edges' only - u32 t = digit_edges; - t &= t - 1; - t &= t - 1; - t &= t - 1; - error_sump |= t; - - // check that we start with a digit - error_sump |= ~digit_characters & 0x1; - - // having done some checks, get lazy and fall back - // to strtoll or strtod - // TODO: handle the easy cases ourselves; these are - // expensive and we've done a lot of the prepwork. - // return errors if strto* fail, otherwise fill in a code on the tape - // 'd' for floating point and 'l' for long and put a pointer to the - // spot in the buffer. - if ( digit_edges == 1) { - //if (__builtin_popcount(digit_edges) == 1) { // DANIEL : shouldn't we have digit_edges == 1 -#define NAIVEINTPARSING // naive means "faster" in this case -#ifdef NAIVEINTPARSING - // this is faster, maybe, because we use a naive strtoll - // should be all digits? - error_sump |= number_characters ^ digit_characters; - int stringlength = __builtin_ctz(~digit_characters); - const char *end = (const char *)src + stringlength; - u64 result = naivestrtoll((const char *)src,end); - if (found_minus) { // unfortunate that it is a branch? - result = -result; - } -#else - // try a strtoll (this is likely slower because it revalidates) - char *end; - u64 result = strtoll((const char *)src, &end, 10); - if ((errno != 0) || (end == (const char *)src)) { - error_sump |= 1; - } - error_sump |= is_not_structural_or_whitespace(*end); - if (found_minus) { - result = -result; - } -#endif -#ifdef DEBUG - cout << "Found number " << result << "\n"; -#endif - *((u64 *)pj.current_number_buf_loc) = result; - pj.tape[tape_loc] = - ((u32)'l') << 24 | - (pj.current_number_buf_loc - - pj.number_buf); // assume 2^24 will hold all numbers for now - pj.current_number_buf_loc += 8; - } else { - // try a strtod - char *end; - double result = strtod((const char *)src, &end); - if ((errno != 0) || (end == (const char *)src)) { - error_sump |= 1; - } - error_sump |= is_not_structural_or_whitespace(*end); - if (found_minus) { - result = -result; - } -#ifdef DEBUG - cout << "Found number " << result << "\n"; -#endif - *((double *)pj.current_number_buf_loc) = result; - pj.tape[tape_loc] = - ((u32)'d') << 24 | - (pj.current_number_buf_loc - - pj.number_buf); // assume 2^24 will hold all numbers for now - pj.current_number_buf_loc += 8; - } - // TODO: check the MSB element is a digit - - // TODO: a whole bunch of checks - - // TODO: <=1 decimal point, eE mark, +- construct - - // TODO: first and last character in mask region must be - // digit - - // TODO: if it exists, - // Decimal point is after the first cluster of numbers only - // and before the second cluster of numbers only. It must - // be digit_or_zero . digit_or_zero strictly - - // TODO: eE mark and +- construct are adjacent with eE first - // eE mark preceeds final cluster of numbers only - // and immediately follows second-last cluster of numbers only (not - // necessarily second, as we may have 4e10). - // it may suffice to insist that eE is preceeded immediately - // by a digit of any kind and that it's followed locally by - // a digit immediately or a +- construct then a digit. - - // TODO: if we have both . and the eE mark then the . must - // precede the eE mark - - if (error_sump) - return false; - return true; -} - -bool tape_disturbed(u32 i, ParsedJson &pj) { - u32 start_loc = i * MAX_TAPE_ENTRIES; - u32 end_loc = pj.tape_locs[i]; - return start_loc != end_loc; -} - -bool shovel_machine(const u8 *buf, size_t len, ParsedJson &pj) { - // fixup the mess made by the ape_machine - // as such it does a bunch of miscellaneous things on the tapes - u32 error_sump = 0; - u64 tv = *(const u64 *)"true "; - u64 nv = *(const u64 *)"null "; - u64 fv = *(const u64 *)"false "; - u64 mask4 = 0x00000000ffffffff; - u64 mask5 = 0x000000ffffffffff; - - // if the tape has been touched at all at the depths outside the safe - // zone we need to quit. Note that our periodic checks to see that we're - // inside our safe zone in stage 3 don't guarantee that the system did - // not get into the danger area briefly. - if (tape_disturbed(START_DEPTH - 1, pj) || - tape_disturbed(REDLINE_DEPTH, pj)) { - return false; - } - - // walk over each tape - for (u32 i = START_DEPTH; i < MAX_DEPTH; i++) { - u32 start_loc = i * MAX_TAPE_ENTRIES; - u32 end_loc = pj.tape_locs[i]; - if (start_loc == end_loc) { - break; - } - for (u32 j = start_loc; j < end_loc; j++) { - switch (pj.tape[j] >> 56) { - case '{': - case '[': { - // pivot our tapes - // point the enclosing structural char (}]) to the head marker ({[) and - // put the end of the sequence on the tape at the head marker - // we start with head marker pointing at the enclosing structural char - // and the enclosing structural char pointing at the end. Just swap - // them. also check the balanced-{} or [] property here - u8 head_marker_c = pj.tape[j] >> 56; - u32 head_marker_loc = pj.tape[j] & 0xffffffffffffffULL; - u64 tape_enclosing = pj.tape[head_marker_loc]; - u8 enclosing_c = tape_enclosing >> 56; - pj.tape[head_marker_loc] = pj.tape[j]; - pj.tape[j] = tape_enclosing; - error_sump |= (enclosing_c - head_marker_c - - 2); // [] and {} only differ by 2 chars - break; - } - case '"': { - error_sump |= !parse_string(buf, len, pj, j); - break; - } - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - error_sump |= !parse_number(buf, len, pj, j, false, false); - break; - case '0': - error_sump |= !parse_number(buf, len, pj, j, true, false); - break; - case '-': - error_sump |= !parse_number(buf, len, pj, j, false, true); - break; - case 't': { - u32 offset = pj.tape[j] & 0xffffffffffffffULL; - const u8 *loc = buf + offset; - u64 locval; // we want to avoid unaligned 64-bit loads (undefined in - // C/C++) - std::memcpy(&locval, loc, sizeof(u64)); - error_sump |= (locval & mask4) ^ tv; - error_sump |= is_not_structural_or_whitespace(loc[4]); - break; - } - case 'f': { - u32 offset = pj.tape[j] & 0xffffffffffffffULL; - const u8 *loc = buf + offset; - u64 locval; // we want to avoid unaligned 64-bit loads (undefined in - // C/C++) - std::memcpy(&locval, loc, sizeof(u64)); - error_sump |= (locval & mask5) ^ fv; - error_sump |= is_not_structural_or_whitespace(loc[5]); - break; - } - case 'n': { - u32 offset = pj.tape[j] & 0xffffffffffffffULL; - const u8 *loc = buf + offset; - u64 locval; // we want to avoid unaligned 64-bit loads (undefined in - // C/C++) - std::memcpy(&locval, loc, sizeof(u64)); - error_sump |= (locval & mask4) ^ nv; - error_sump |= is_not_structural_or_whitespace(loc[4]); - break; - } - default: - break; - } - } - } - /* - if (error_sump) { - return false; - } - */ - return true; -}