Some refactoring.
This commit is contained in:
parent
12b518578d
commit
e5707331e9
8
Makefile
8
Makefile
|
@ -19,9 +19,9 @@ endif
|
|||
|
||||
EXECUTABLES=parse jsoncheck numberparsingcheck stringparsingcheck minifiercompetition parsingcompetition minify allparserscheckfile
|
||||
|
||||
HEADERS= include/jsonparser/simdutf8check.h include/jsonparser/stringparsing.h include/jsonparser/numberparsing.h include/jsonparser/jsonparser.h include/jsonparser/common_defs.h include/jsonparser/jsonioutil.h benchmark/benchmark.h benchmark/linux/linux-perf-events.h include/jsonparser/simdjson_internal.h include/jsonparser/stage1_find_marks.h include/jsonparser/stage2_flatten.h include/jsonparser/stage34_unified.h include/jsonparser/jsoncharutils.h
|
||||
HEADERS= include/simdjson/simdutf8check.h include/simdjson/stringparsing.h include/simdjson/numberparsing.h include/simdjson/jsonparser.h include/simdjson/common_defs.h include/simdjson/jsonioutil.h benchmark/benchmark.h benchmark/linux/linux-perf-events.h include/simdjson/parsedjson.h include/simdjson/stage1_find_marks.h include/simdjson/stage2_flatten.h include/simdjson/stage34_unified.h include/simdjson/jsoncharutils.h include/simdjson/jsonformatutils.h
|
||||
LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/stage1_find_marks.cpp src/stage2_flatten.cpp src/stage34_unified.cpp
|
||||
MINIFIERHEADERS=include/jsonparser/jsonminifier.h include/jsonparser/simdprune_tables.h
|
||||
MINIFIERHEADERS=include/simdjson/jsonminifier.h include/simdjson/simdprune_tables.h
|
||||
MINIFIERLIBFILES=src/jsonminifier.cpp
|
||||
|
||||
|
||||
|
@ -100,6 +100,10 @@ allparserscheckfile: tests/allparserscheckfile.cpp $(HEADERS) $(LIBFILES) $(OBJE
|
|||
parsehisto: benchmark/parse.cpp $(HEADERS) $(LIBFILES)
|
||||
$(CXX) $(CXXFLAGS) -o parsehisto benchmark/parse.cpp $(LIBFILES) $(LIBFLAGS) -DBUILDHISTOGRAM
|
||||
|
||||
cppcheck:
|
||||
cppcheck --enable=all src/*.cpp benchmarks/*.cpp tests/*.cpp -Iinclude -I. -Ibenchmark/linux
|
||||
|
||||
|
||||
clean:
|
||||
rm -f $(OBJECTS) $(EXECUTABLES) $(EXTRA_EXECUTABLES)
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@ Goal: Speed up the parsing of JSON per se.
|
|||
## Code example
|
||||
|
||||
```C
|
||||
#include "jsonparser/jsonparser.h"
|
||||
#include "simdjson/jsonparser.h"
|
||||
|
||||
/...
|
||||
|
||||
|
|
|
@ -2,9 +2,9 @@
|
|||
#include <iostream>
|
||||
|
||||
#include "benchmark.h"
|
||||
#include "jsonparser/jsonioutil.h"
|
||||
#include "jsonparser/jsonminifier.h"
|
||||
#include "jsonparser/jsonparser.h"
|
||||
#include "simdjson/jsonioutil.h"
|
||||
#include "simdjson/jsonminifier.h"
|
||||
#include "simdjson/jsonparser.h"
|
||||
|
||||
// #define RAPIDJSON_SSE2 // bad
|
||||
// #define RAPIDJSON_SSE42 // bad
|
||||
|
|
|
@ -1,34 +1,37 @@
|
|||
#include "jsonparser/common_defs.h"
|
||||
#include "linux-perf-events.h"
|
||||
#include <algorithm>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <x86intrin.h>
|
||||
#include <ctype.h>
|
||||
#include <assert.h>
|
||||
#include <dirent.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <chrono>
|
||||
#include <cstring>
|
||||
#include <dirent.h>
|
||||
#include <fstream>
|
||||
#include <inttypes.h>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <sstream>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <string>
|
||||
#include <unistd.h>
|
||||
#include <vector>
|
||||
#include <x86intrin.h>
|
||||
#include <ctype.h>
|
||||
|
||||
|
||||
#include "linux-perf-events.h"
|
||||
|
||||
//#define DEBUG
|
||||
#include "jsonparser/jsonparser.h"
|
||||
#include "jsonparser/jsonioutil.h"
|
||||
#include "jsonparser/simdjson_internal.h"
|
||||
#include "jsonparser/stage1_find_marks.h"
|
||||
#include "jsonparser/stage2_flatten.h"
|
||||
#include "jsonparser/stage34_unified.h"
|
||||
#include "simdjson/common_defs.h"
|
||||
#include "simdjson/jsonparser.h"
|
||||
#include "simdjson/jsonioutil.h"
|
||||
#include "simdjson/parsedjson.h"
|
||||
#include "simdjson/stage1_find_marks.h"
|
||||
#include "simdjson/stage2_flatten.h"
|
||||
#include "simdjson/stage34_unified.h"
|
||||
using namespace std;
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
|
|
|
@ -1,14 +1,15 @@
|
|||
#include <unistd.h>
|
||||
#include "jsonparser/jsonparser.h"
|
||||
#include "simdjson/jsonparser.h"
|
||||
|
||||
#include "benchmark.h"
|
||||
|
||||
// #define RAPIDJSON_SSE2 // bad
|
||||
// #define RAPIDJSON_SSE42 // bad
|
||||
// #define RAPIDJSON_SSE2 // bad for performance
|
||||
// #define RAPIDJSON_SSE42 // bad for performance
|
||||
#include "rapidjson/document.h"
|
||||
#include "rapidjson/reader.h" // you have to check in the submodule
|
||||
#include "rapidjson/reader.h"
|
||||
#include "rapidjson/stringbuffer.h"
|
||||
#include "rapidjson/writer.h"
|
||||
|
||||
#include "json11.cpp"
|
||||
#include "sajson.h"
|
||||
#include "fastjson.cpp"
|
||||
|
@ -18,7 +19,6 @@ extern "C"
|
|||
{
|
||||
#include "ultrajsondec.c"
|
||||
#include "ujdecode.h"
|
||||
|
||||
}
|
||||
using namespace rapidjson;
|
||||
using namespace std;
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#pragma once
|
||||
|
||||
#include "jsonparser/common_defs.h"
|
||||
#include "jsonparser/simdjson_internal.h"
|
||||
#include "simdjson/common_defs.h"
|
||||
#include "simdjson/parsedjson.h"
|
||||
|
||||
// structural chars here are
|
||||
// they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c
|
||||
|
@ -109,4 +109,3 @@ inline size_t codepoint_to_utf8(uint32_t cp, u8 *c) {
|
|||
return 0; // bad r
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
#pragma once
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
static inline void print_with_escapes(const unsigned char *src) {
|
||||
while (*src) {
|
||||
switch (*src) {
|
||||
case '\n':
|
||||
putchar('\\');
|
||||
putchar('n');
|
||||
break;
|
||||
case '\"':
|
||||
putchar('\\');
|
||||
putchar('"');
|
||||
break;
|
||||
case '\t':
|
||||
putchar('\\');
|
||||
putchar('t');
|
||||
break;
|
||||
case '\\':
|
||||
putchar('\\');
|
||||
putchar('\\');
|
||||
break;
|
||||
default:
|
||||
if (*src <= 0x1F) {
|
||||
printf("\\u%x", *src);
|
||||
} else
|
||||
putchar(*src);
|
||||
}
|
||||
src++;
|
||||
}
|
||||
}
|
||||
|
|
@ -7,7 +7,7 @@
|
|||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
#include "common_defs.h"
|
||||
#include "simdjson/common_defs.h"
|
||||
|
||||
// if you must provide a pointer to some data, create it with this function:
|
||||
// length is the max. size in bytes of the string
|
|
@ -1,11 +1,11 @@
|
|||
#pragma once
|
||||
|
||||
#include "common_defs.h"
|
||||
#include "jsonioutil.h"
|
||||
#include "simdjson_internal.h"
|
||||
#include "stage1_find_marks.h"
|
||||
#include "stage2_flatten.h"
|
||||
#include "stage34_unified.h"
|
||||
#include "simdjson/common_defs.h"
|
||||
#include "simdjson/jsonioutil.h"
|
||||
#include "simdjson/parsedjson.h"
|
||||
#include "simdjson/stage1_find_marks.h"
|
||||
#include "simdjson/stage2_flatten.h"
|
||||
#include "simdjson/stage34_unified.h"
|
||||
|
||||
// Allocate a ParsedJson structure that can support document
|
||||
// up to len bytes.
|
|
@ -1,8 +1,8 @@
|
|||
#pragma once
|
||||
|
||||
#include "common_defs.h"
|
||||
#include "jsonparser/jsoncharutils.h"
|
||||
#include "jsonparser/simdjson_internal.h"
|
||||
#include "simdjson/common_defs.h"
|
||||
#include "simdjson/jsoncharutils.h"
|
||||
#include "simdjson/parsedjson.h"
|
||||
|
||||
static const double power_of_ten[] = {
|
||||
1e-308, 1e-307, 1e-306, 1e-305, 1e-304, 1e-303, 1e-302, 1e-301, 1e-300,
|
|
@ -6,51 +6,16 @@
|
|||
/* Microsoft C/C++-compatible compiler */
|
||||
#include <intrin.h>
|
||||
#else
|
||||
#include <immintrin.h>
|
||||
#include <x86intrin.h>
|
||||
#endif
|
||||
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
|
||||
#include "simdjson/jsonformatutils.h"
|
||||
|
||||
#define JSONVALUEMASK 0xFFFFFFFFFFFFFF;
|
||||
|
||||
static inline void print_with_escapes(const unsigned char *src) {
|
||||
while (*src) {
|
||||
switch (*src) {
|
||||
case '\n':
|
||||
putchar('\\');
|
||||
putchar('n');
|
||||
break;
|
||||
case '\"':
|
||||
putchar('\\');
|
||||
putchar('"');
|
||||
break;
|
||||
case '\t':
|
||||
putchar('\\');
|
||||
putchar('t');
|
||||
break;
|
||||
case '\\':
|
||||
putchar('\\');
|
||||
putchar('\\');
|
||||
break;
|
||||
default:
|
||||
if (*src <= 0x1F) {
|
||||
printf("\\u%x", *src);
|
||||
} else
|
||||
putchar(*src);
|
||||
}
|
||||
src++;
|
||||
}
|
||||
}
|
||||
|
||||
// const u32 MAX_DEPTH = 2048;
|
||||
// const u32 DEPTH_SAFETY_MARGIN = 32; // should be power-of-2 as we check this
|
||||
// with a modulo in our hot stage 3 loop
|
||||
// const u32 START_DEPTH = DEPTH_SAFETY_MARGIN;
|
||||
// const u32 REDLINE_DEPTH = MAX_DEPTH - DEPTH_SAFETY_MARGIN;
|
||||
// const size_t MAX_TAPE_ENTRIES = 127 * 1024;
|
||||
// const size_t MAX_TAPE = MAX_DEPTH * MAX_TAPE_ENTRIES;
|
||||
|
||||
/////////////
|
||||
// TODO: move this to be more like a real class
|
||||
// currently, you need to create it like so...
|
||||
|
@ -114,7 +79,7 @@ public:
|
|||
if ((inobjectidx[depth] > 0) && (type != ']'))
|
||||
printf(", ");
|
||||
inobjectidx[depth]++;
|
||||
} else if (inobject) {
|
||||
} else { //if (inobject) {
|
||||
if ((inobjectidx[depth] > 0) && ((inobjectidx[depth] & 1) == 0) &&
|
||||
(type != '}'))
|
||||
printf(", ");
|
||||
|
@ -204,6 +169,8 @@ public:
|
|||
|
||||
really_inline void write_tape_double(double d) {
|
||||
write_tape(0, 'd');
|
||||
static_assert(sizeof(d) == sizeof(tape[current_loc]),
|
||||
"mismatch size");
|
||||
tape[current_loc++] =*( (u64*) &d);
|
||||
}
|
||||
|
|
@ -4,7 +4,12 @@
|
|||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#ifdef _MSC_VER
|
||||
/* Microsoft C/C++-compatible compiler */
|
||||
#include <intrin.h>
|
||||
#else
|
||||
#include <x86intrin.h>
|
||||
#endif
|
||||
#include <string.h>
|
||||
/*
|
||||
* legal utf-8 byte sequence
|
|
@ -1,7 +1,7 @@
|
|||
#pragma once
|
||||
|
||||
#include "common_defs.h"
|
||||
#include "simdjson_internal.h"
|
||||
#include "parsedjson.h"
|
||||
|
||||
WARN_UNUSED
|
||||
bool find_structural_bits(const u8 *buf, size_t len, ParsedJson &pj);
|
|
@ -1,7 +1,7 @@
|
|||
#pragma once
|
||||
|
||||
#include "common_defs.h"
|
||||
#include "simdjson_internal.h"
|
||||
#include "simdjson/common_defs.h"
|
||||
#include "simdjson/parsedjson.h"
|
||||
|
||||
WARN_UNUSED
|
||||
bool flatten_indexes(size_t len, ParsedJson &pj);
|
|
@ -1,7 +1,7 @@
|
|||
#pragma once
|
||||
|
||||
#include "common_defs.h"
|
||||
#include "simdjson_internal.h"
|
||||
#include "simdjson/common_defs.h"
|
||||
#include "simdjson/parsedjson.h"
|
||||
|
||||
void init_state_machine();
|
||||
|
|
@ -1,8 +1,8 @@
|
|||
#pragma once
|
||||
|
||||
#include "common_defs.h"
|
||||
#include "jsonparser/simdjson_internal.h"
|
||||
#include "jsonparser/jsoncharutils.h"
|
||||
#include "simdjson/common_defs.h"
|
||||
#include "simdjson/parsedjson.h"
|
||||
#include "simdjson/jsoncharutils.h"
|
||||
|
||||
|
||||
// begin copypasta
|
|
@ -1,4 +1,4 @@
|
|||
#include "jsonparser/jsonioutil.h"
|
||||
#include "simdjson/jsonioutil.h"
|
||||
#include <cstring>
|
||||
|
||||
|
||||
|
|
|
@ -67,7 +67,7 @@ size_t jsonminify(const unsigned char *bytes, size_t howmany,
|
|||
#include <x86intrin.h>
|
||||
#endif // _MSC_VER
|
||||
|
||||
#include "jsonparser/simdprune_tables.h"
|
||||
#include "simdjson/simdprune_tables.h"
|
||||
#include <cstring>
|
||||
#ifndef __clang__
|
||||
static inline __m256i _mm256_loadu2_m128i(__m128i const *__addr_hi,
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#include "jsonparser/jsonparser.h"
|
||||
#include "simdjson/jsonparser.h"
|
||||
|
||||
// allocate a ParsedJson structure that can support document
|
||||
// up to len bytes.
|
||||
|
|
|
@ -2,14 +2,13 @@
|
|||
/* Microsoft C/C++-compatible compiler */
|
||||
#include <intrin.h>
|
||||
#else
|
||||
#include <immintrin.h>
|
||||
#include <x86intrin.h>
|
||||
#endif
|
||||
|
||||
#include <cassert>
|
||||
|
||||
#include "jsonparser/common_defs.h"
|
||||
#include "jsonparser/simdjson_internal.h"
|
||||
#include "simdjson/common_defs.h"
|
||||
#include "simdjson/parsedjson.h"
|
||||
|
||||
#define UTF8VALIDATE
|
||||
// It seems that many parsers do UTF-8 validation.
|
||||
|
@ -17,7 +16,7 @@
|
|||
// allows it. It appears that sajson might do utf-8
|
||||
// validation
|
||||
#ifdef UTF8VALIDATE
|
||||
#include "jsonparser/simdutf8check.h"
|
||||
#include "simdjson/simdutf8check.h"
|
||||
#endif
|
||||
using namespace std;
|
||||
|
||||
|
@ -156,7 +155,7 @@ WARN_UNUSED
|
|||
u64 quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
|
||||
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
|
||||
quote_mask ^= prev_iter_inside_quote;
|
||||
prev_iter_inside_quote = (u64)((s64)quote_mask >> 63);
|
||||
prev_iter_inside_quote = (u64)((s64)quote_mask >> 63); // right shift of a signed value expected to be well-defined and standard compliant as of C++20
|
||||
dumpbits(quote_mask, "quote_mask");
|
||||
|
||||
// How do we build up a user traversable data structure
|
||||
|
|
|
@ -2,14 +2,13 @@
|
|||
/* Microsoft C/C++-compatible compiler */
|
||||
#include <intrin.h>
|
||||
#else
|
||||
#include <immintrin.h>
|
||||
#include <x86intrin.h>
|
||||
#endif
|
||||
|
||||
#include <cassert>
|
||||
|
||||
#include "jsonparser/common_defs.h"
|
||||
#include "jsonparser/simdjson_internal.h"
|
||||
#include "simdjson/common_defs.h"
|
||||
#include "simdjson/parsedjson.h"
|
||||
|
||||
#ifndef NO_PDEP_PLEASE
|
||||
#define NO_PDEP_PLEASE // though this is not always a win, it seems to
|
||||
|
|
|
@ -2,18 +2,17 @@
|
|||
/* Microsoft C/C++-compatible compiler */
|
||||
#include <intrin.h>
|
||||
#else
|
||||
#include <immintrin.h>
|
||||
#include <x86intrin.h>
|
||||
#endif
|
||||
|
||||
#include <cassert>
|
||||
#include <cstring>
|
||||
|
||||
#include "jsonparser/common_defs.h"
|
||||
#include "jsonparser/jsoncharutils.h"
|
||||
#include "jsonparser/numberparsing.h"
|
||||
#include "jsonparser/simdjson_internal.h"
|
||||
#include "jsonparser/stringparsing.h"
|
||||
#include "simdjson/common_defs.h"
|
||||
#include "simdjson/jsoncharutils.h"
|
||||
#include "simdjson/numberparsing.h"
|
||||
#include "simdjson/parsedjson.h"
|
||||
#include "simdjson/stringparsing.h"
|
||||
|
||||
#include <iostream>
|
||||
//#define DEBUG
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#include <unistd.h>
|
||||
|
||||
#include "jsonparser/jsonparser.h"
|
||||
#include "simdjson/jsonparser.h"
|
||||
|
||||
|
||||
// #define RAPIDJSON_SSE2 // bad
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "jsonparser/jsonparser.h"
|
||||
#include "simdjson/jsonparser.h"
|
||||
|
||||
/**
|
||||
* Does the file filename ends with the given extension.
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
#define JSON_TEST_NUMBERS
|
||||
#endif
|
||||
|
||||
#include "jsonparser/common_defs.h"
|
||||
#include "simdjson/common_defs.h"
|
||||
|
||||
int parse_error;
|
||||
char *fullpath;
|
||||
|
@ -82,7 +82,7 @@ inline void foundFloat(double result, const u8 *buf) {
|
|||
}
|
||||
}
|
||||
|
||||
#include "jsonparser/jsonparser.h"
|
||||
#include "simdjson/jsonparser.h"
|
||||
#include "src/stage34_unified.cpp"
|
||||
|
||||
/**
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
#define JSON_TEST_STRINGS
|
||||
#endif
|
||||
|
||||
#include "jsonparser/common_defs.h"
|
||||
#include "simdjson/common_defs.h"
|
||||
|
||||
char *fullpath;
|
||||
|
||||
|
@ -281,7 +281,7 @@ inline void foundString(const u8 *buf, const u8 *parsed_begin,
|
|||
}
|
||||
}
|
||||
|
||||
#include "jsonparser/jsonparser.h"
|
||||
#include "simdjson/jsonparser.h"
|
||||
#include "src/stage34_unified.cpp"
|
||||
|
||||
/**
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#include <iostream>
|
||||
|
||||
#include "jsonparser/jsonioutil.h"
|
||||
#include "jsonparser/jsonminifier.h"
|
||||
#include "simdjson/jsonioutil.h"
|
||||
#include "simdjson/jsonminifier.h"
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
if (argc != 2) {
|
||||
|
|
Loading…
Reference in New Issue