Some refactoring.

This commit is contained in:
Daniel Lemire 2018-11-30 09:37:57 -05:00
parent 12b518578d
commit e5707331e9
30 changed files with 122 additions and 114 deletions

View File

@ -19,9 +19,9 @@ endif
EXECUTABLES=parse jsoncheck numberparsingcheck stringparsingcheck minifiercompetition parsingcompetition minify allparserscheckfile EXECUTABLES=parse jsoncheck numberparsingcheck stringparsingcheck minifiercompetition parsingcompetition minify allparserscheckfile
HEADERS= include/jsonparser/simdutf8check.h include/jsonparser/stringparsing.h include/jsonparser/numberparsing.h include/jsonparser/jsonparser.h include/jsonparser/common_defs.h include/jsonparser/jsonioutil.h benchmark/benchmark.h benchmark/linux/linux-perf-events.h include/jsonparser/simdjson_internal.h include/jsonparser/stage1_find_marks.h include/jsonparser/stage2_flatten.h include/jsonparser/stage34_unified.h include/jsonparser/jsoncharutils.h HEADERS= include/simdjson/simdutf8check.h include/simdjson/stringparsing.h include/simdjson/numberparsing.h include/simdjson/jsonparser.h include/simdjson/common_defs.h include/simdjson/jsonioutil.h benchmark/benchmark.h benchmark/linux/linux-perf-events.h include/simdjson/parsedjson.h include/simdjson/stage1_find_marks.h include/simdjson/stage2_flatten.h include/simdjson/stage34_unified.h include/simdjson/jsoncharutils.h include/simdjson/jsonformatutils.h
LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/stage1_find_marks.cpp src/stage2_flatten.cpp src/stage34_unified.cpp LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/stage1_find_marks.cpp src/stage2_flatten.cpp src/stage34_unified.cpp
MINIFIERHEADERS=include/jsonparser/jsonminifier.h include/jsonparser/simdprune_tables.h MINIFIERHEADERS=include/simdjson/jsonminifier.h include/simdjson/simdprune_tables.h
MINIFIERLIBFILES=src/jsonminifier.cpp MINIFIERLIBFILES=src/jsonminifier.cpp
@ -100,6 +100,10 @@ allparserscheckfile: tests/allparserscheckfile.cpp $(HEADERS) $(LIBFILES) $(OBJE
parsehisto: benchmark/parse.cpp $(HEADERS) $(LIBFILES) parsehisto: benchmark/parse.cpp $(HEADERS) $(LIBFILES)
$(CXX) $(CXXFLAGS) -o parsehisto benchmark/parse.cpp $(LIBFILES) $(LIBFLAGS) -DBUILDHISTOGRAM $(CXX) $(CXXFLAGS) -o parsehisto benchmark/parse.cpp $(LIBFILES) $(LIBFLAGS) -DBUILDHISTOGRAM
cppcheck:
cppcheck --enable=all src/*.cpp benchmarks/*.cpp tests/*.cpp -Iinclude -I. -Ibenchmark/linux
clean: clean:
rm -f $(OBJECTS) $(EXECUTABLES) $(EXTRA_EXECUTABLES) rm -f $(OBJECTS) $(EXECUTABLES) $(EXTRA_EXECUTABLES)

View File

@ -7,7 +7,7 @@ Goal: Speed up the parsing of JSON per se.
## Code example ## Code example
```C ```C
#include "jsonparser/jsonparser.h" #include "simdjson/jsonparser.h"
/... /...

View File

@ -2,9 +2,9 @@
#include <iostream> #include <iostream>
#include "benchmark.h" #include "benchmark.h"
#include "jsonparser/jsonioutil.h" #include "simdjson/jsonioutil.h"
#include "jsonparser/jsonminifier.h" #include "simdjson/jsonminifier.h"
#include "jsonparser/jsonparser.h" #include "simdjson/jsonparser.h"
// #define RAPIDJSON_SSE2 // bad // #define RAPIDJSON_SSE2 // bad
// #define RAPIDJSON_SSE42 // bad // #define RAPIDJSON_SSE42 // bad

View File

@ -1,34 +1,37 @@
#include "jsonparser/common_defs.h" #include <stdbool.h>
#include "linux-perf-events.h" #include <stdio.h>
#include <algorithm> #include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <x86intrin.h>
#include <ctype.h>
#include <assert.h> #include <assert.h>
#include <dirent.h>
#include <inttypes.h>
#include <algorithm>
#include <chrono> #include <chrono>
#include <cstring> #include <cstring>
#include <dirent.h>
#include <fstream> #include <fstream>
#include <inttypes.h>
#include <iomanip> #include <iomanip>
#include <iostream> #include <iostream>
#include <map> #include <map>
#include <set> #include <set>
#include <sstream> #include <sstream>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <string> #include <string>
#include <unistd.h>
#include <vector> #include <vector>
#include <x86intrin.h>
#include <ctype.h>
#include "linux-perf-events.h"
//#define DEBUG //#define DEBUG
#include "jsonparser/jsonparser.h" #include "simdjson/common_defs.h"
#include "jsonparser/jsonioutil.h" #include "simdjson/jsonparser.h"
#include "jsonparser/simdjson_internal.h" #include "simdjson/jsonioutil.h"
#include "jsonparser/stage1_find_marks.h" #include "simdjson/parsedjson.h"
#include "jsonparser/stage2_flatten.h" #include "simdjson/stage1_find_marks.h"
#include "jsonparser/stage34_unified.h" #include "simdjson/stage2_flatten.h"
#include "simdjson/stage34_unified.h"
using namespace std; using namespace std;
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {

View File

@ -1,14 +1,15 @@
#include <unistd.h> #include <unistd.h>
#include "jsonparser/jsonparser.h" #include "simdjson/jsonparser.h"
#include "benchmark.h" #include "benchmark.h"
// #define RAPIDJSON_SSE2 // bad // #define RAPIDJSON_SSE2 // bad for performance
// #define RAPIDJSON_SSE42 // bad // #define RAPIDJSON_SSE42 // bad for performance
#include "rapidjson/document.h" #include "rapidjson/document.h"
#include "rapidjson/reader.h" // you have to check in the submodule #include "rapidjson/reader.h"
#include "rapidjson/stringbuffer.h" #include "rapidjson/stringbuffer.h"
#include "rapidjson/writer.h" #include "rapidjson/writer.h"
#include "json11.cpp" #include "json11.cpp"
#include "sajson.h" #include "sajson.h"
#include "fastjson.cpp" #include "fastjson.cpp"
@ -18,7 +19,6 @@ extern "C"
{ {
#include "ultrajsondec.c" #include "ultrajsondec.c"
#include "ujdecode.h" #include "ujdecode.h"
} }
using namespace rapidjson; using namespace rapidjson;
using namespace std; using namespace std;

View File

@ -1,7 +1,7 @@
#pragma once #pragma once
#include "jsonparser/common_defs.h" #include "simdjson/common_defs.h"
#include "jsonparser/simdjson_internal.h" #include "simdjson/parsedjson.h"
// structural chars here are // structural chars here are
// they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c // they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c
@ -109,4 +109,3 @@ inline size_t codepoint_to_utf8(uint32_t cp, u8 *c) {
return 0; // bad r return 0; // bad r
} }

View File

@ -0,0 +1,33 @@
#pragma once
#include <stdio.h>
static inline void print_with_escapes(const unsigned char *src) {
while (*src) {
switch (*src) {
case '\n':
putchar('\\');
putchar('n');
break;
case '\"':
putchar('\\');
putchar('"');
break;
case '\t':
putchar('\\');
putchar('t');
break;
case '\\':
putchar('\\');
putchar('\\');
break;
default:
if (*src <= 0x1F) {
printf("\\u%x", *src);
} else
putchar(*src);
}
src++;
}
}

View File

@ -7,7 +7,7 @@
#include <sstream> #include <sstream>
#include <string> #include <string>
#include "common_defs.h" #include "simdjson/common_defs.h"
// if you must provide a pointer to some data, create it with this function: // if you must provide a pointer to some data, create it with this function:
// length is the max. size in bytes of the string // length is the max. size in bytes of the string

View File

@ -1,11 +1,11 @@
#pragma once #pragma once
#include "common_defs.h" #include "simdjson/common_defs.h"
#include "jsonioutil.h" #include "simdjson/jsonioutil.h"
#include "simdjson_internal.h" #include "simdjson/parsedjson.h"
#include "stage1_find_marks.h" #include "simdjson/stage1_find_marks.h"
#include "stage2_flatten.h" #include "simdjson/stage2_flatten.h"
#include "stage34_unified.h" #include "simdjson/stage34_unified.h"
// Allocate a ParsedJson structure that can support document // Allocate a ParsedJson structure that can support document
// up to len bytes. // up to len bytes.

View File

@ -1,8 +1,8 @@
#pragma once #pragma once
#include "common_defs.h" #include "simdjson/common_defs.h"
#include "jsonparser/jsoncharutils.h" #include "simdjson/jsoncharutils.h"
#include "jsonparser/simdjson_internal.h" #include "simdjson/parsedjson.h"
static const double power_of_ten[] = { static const double power_of_ten[] = {
1e-308, 1e-307, 1e-306, 1e-305, 1e-304, 1e-303, 1e-302, 1e-301, 1e-300, 1e-308, 1e-307, 1e-306, 1e-305, 1e-304, 1e-303, 1e-302, 1e-301, 1e-300,

View File

@ -6,51 +6,16 @@
/* Microsoft C/C++-compatible compiler */ /* Microsoft C/C++-compatible compiler */
#include <intrin.h> #include <intrin.h>
#else #else
#include <immintrin.h>
#include <x86intrin.h> #include <x86intrin.h>
#endif #endif
#include <iomanip> #include <iomanip>
#include <iostream> #include <iostream>
#include "simdjson/jsonformatutils.h"
#define JSONVALUEMASK 0xFFFFFFFFFFFFFF; #define JSONVALUEMASK 0xFFFFFFFFFFFFFF;
static inline void print_with_escapes(const unsigned char *src) {
while (*src) {
switch (*src) {
case '\n':
putchar('\\');
putchar('n');
break;
case '\"':
putchar('\\');
putchar('"');
break;
case '\t':
putchar('\\');
putchar('t');
break;
case '\\':
putchar('\\');
putchar('\\');
break;
default:
if (*src <= 0x1F) {
printf("\\u%x", *src);
} else
putchar(*src);
}
src++;
}
}
// const u32 MAX_DEPTH = 2048;
// const u32 DEPTH_SAFETY_MARGIN = 32; // should be power-of-2 as we check this
// with a modulo in our hot stage 3 loop
// const u32 START_DEPTH = DEPTH_SAFETY_MARGIN;
// const u32 REDLINE_DEPTH = MAX_DEPTH - DEPTH_SAFETY_MARGIN;
// const size_t MAX_TAPE_ENTRIES = 127 * 1024;
// const size_t MAX_TAPE = MAX_DEPTH * MAX_TAPE_ENTRIES;
///////////// /////////////
// TODO: move this to be more like a real class // TODO: move this to be more like a real class
// currently, you need to create it like so... // currently, you need to create it like so...
@ -114,7 +79,7 @@ public:
if ((inobjectidx[depth] > 0) && (type != ']')) if ((inobjectidx[depth] > 0) && (type != ']'))
printf(", "); printf(", ");
inobjectidx[depth]++; inobjectidx[depth]++;
} else if (inobject) { } else { //if (inobject) {
if ((inobjectidx[depth] > 0) && ((inobjectidx[depth] & 1) == 0) && if ((inobjectidx[depth] > 0) && ((inobjectidx[depth] & 1) == 0) &&
(type != '}')) (type != '}'))
printf(", "); printf(", ");
@ -204,6 +169,8 @@ public:
really_inline void write_tape_double(double d) { really_inline void write_tape_double(double d) {
write_tape(0, 'd'); write_tape(0, 'd');
static_assert(sizeof(d) == sizeof(tape[current_loc]),
"mismatch size");
tape[current_loc++] =*( (u64*) &d); tape[current_loc++] =*( (u64*) &d);
} }

View File

@ -4,7 +4,12 @@
#include <stdbool.h> #include <stdbool.h>
#include <stddef.h> #include <stddef.h>
#include <stdint.h> #include <stdint.h>
#ifdef _MSC_VER
/* Microsoft C/C++-compatible compiler */
#include <intrin.h>
#else
#include <x86intrin.h> #include <x86intrin.h>
#endif
#include <string.h> #include <string.h>
/* /*
* legal utf-8 byte sequence * legal utf-8 byte sequence

View File

@ -1,7 +1,7 @@
#pragma once #pragma once
#include "common_defs.h" #include "common_defs.h"
#include "simdjson_internal.h" #include "parsedjson.h"
WARN_UNUSED WARN_UNUSED
bool find_structural_bits(const u8 *buf, size_t len, ParsedJson &pj); bool find_structural_bits(const u8 *buf, size_t len, ParsedJson &pj);

View File

@ -1,7 +1,7 @@
#pragma once #pragma once
#include "common_defs.h" #include "simdjson/common_defs.h"
#include "simdjson_internal.h" #include "simdjson/parsedjson.h"
WARN_UNUSED WARN_UNUSED
bool flatten_indexes(size_t len, ParsedJson &pj); bool flatten_indexes(size_t len, ParsedJson &pj);

View File

@ -1,7 +1,7 @@
#pragma once #pragma once
#include "common_defs.h" #include "simdjson/common_defs.h"
#include "simdjson_internal.h" #include "simdjson/parsedjson.h"
void init_state_machine(); void init_state_machine();

View File

@ -1,8 +1,8 @@
#pragma once #pragma once
#include "common_defs.h" #include "simdjson/common_defs.h"
#include "jsonparser/simdjson_internal.h" #include "simdjson/parsedjson.h"
#include "jsonparser/jsoncharutils.h" #include "simdjson/jsoncharutils.h"
// begin copypasta // begin copypasta

View File

@ -1,4 +1,4 @@
#include "jsonparser/jsonioutil.h" #include "simdjson/jsonioutil.h"
#include <cstring> #include <cstring>

View File

@ -67,7 +67,7 @@ size_t jsonminify(const unsigned char *bytes, size_t howmany,
#include <x86intrin.h> #include <x86intrin.h>
#endif // _MSC_VER #endif // _MSC_VER
#include "jsonparser/simdprune_tables.h" #include "simdjson/simdprune_tables.h"
#include <cstring> #include <cstring>
#ifndef __clang__ #ifndef __clang__
static inline __m256i _mm256_loadu2_m128i(__m128i const *__addr_hi, static inline __m256i _mm256_loadu2_m128i(__m128i const *__addr_hi,

View File

@ -1,4 +1,4 @@
#include "jsonparser/jsonparser.h" #include "simdjson/jsonparser.h"
// allocate a ParsedJson structure that can support document // allocate a ParsedJson structure that can support document
// up to len bytes. // up to len bytes.

View File

@ -2,14 +2,13 @@
/* Microsoft C/C++-compatible compiler */ /* Microsoft C/C++-compatible compiler */
#include <intrin.h> #include <intrin.h>
#else #else
#include <immintrin.h>
#include <x86intrin.h> #include <x86intrin.h>
#endif #endif
#include <cassert> #include <cassert>
#include "jsonparser/common_defs.h" #include "simdjson/common_defs.h"
#include "jsonparser/simdjson_internal.h" #include "simdjson/parsedjson.h"
#define UTF8VALIDATE #define UTF8VALIDATE
// It seems that many parsers do UTF-8 validation. // It seems that many parsers do UTF-8 validation.
@ -17,7 +16,7 @@
// allows it. It appears that sajson might do utf-8 // allows it. It appears that sajson might do utf-8
// validation // validation
#ifdef UTF8VALIDATE #ifdef UTF8VALIDATE
#include "jsonparser/simdutf8check.h" #include "simdjson/simdutf8check.h"
#endif #endif
using namespace std; using namespace std;
@ -156,7 +155,7 @@ WARN_UNUSED
u64 quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128( u64 quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0)); _mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
quote_mask ^= prev_iter_inside_quote; quote_mask ^= prev_iter_inside_quote;
prev_iter_inside_quote = (u64)((s64)quote_mask >> 63); prev_iter_inside_quote = (u64)((s64)quote_mask >> 63); // right shift of a signed value expected to be well-defined and standard compliant as of C++20
dumpbits(quote_mask, "quote_mask"); dumpbits(quote_mask, "quote_mask");
// How do we build up a user traversable data structure // How do we build up a user traversable data structure

View File

@ -2,14 +2,13 @@
/* Microsoft C/C++-compatible compiler */ /* Microsoft C/C++-compatible compiler */
#include <intrin.h> #include <intrin.h>
#else #else
#include <immintrin.h>
#include <x86intrin.h> #include <x86intrin.h>
#endif #endif
#include <cassert> #include <cassert>
#include "jsonparser/common_defs.h" #include "simdjson/common_defs.h"
#include "jsonparser/simdjson_internal.h" #include "simdjson/parsedjson.h"
#ifndef NO_PDEP_PLEASE #ifndef NO_PDEP_PLEASE
#define NO_PDEP_PLEASE // though this is not always a win, it seems to #define NO_PDEP_PLEASE // though this is not always a win, it seems to

View File

@ -2,18 +2,17 @@
/* Microsoft C/C++-compatible compiler */ /* Microsoft C/C++-compatible compiler */
#include <intrin.h> #include <intrin.h>
#else #else
#include <immintrin.h>
#include <x86intrin.h> #include <x86intrin.h>
#endif #endif
#include <cassert> #include <cassert>
#include <cstring> #include <cstring>
#include "jsonparser/common_defs.h" #include "simdjson/common_defs.h"
#include "jsonparser/jsoncharutils.h" #include "simdjson/jsoncharutils.h"
#include "jsonparser/numberparsing.h" #include "simdjson/numberparsing.h"
#include "jsonparser/simdjson_internal.h" #include "simdjson/parsedjson.h"
#include "jsonparser/stringparsing.h" #include "simdjson/stringparsing.h"
#include <iostream> #include <iostream>
//#define DEBUG //#define DEBUG

View File

@ -1,6 +1,6 @@
#include <unistd.h> #include <unistd.h>
#include "jsonparser/jsonparser.h" #include "simdjson/jsonparser.h"
// #define RAPIDJSON_SSE2 // bad // #define RAPIDJSON_SSE2 // bad

View File

@ -7,7 +7,7 @@
#include <stdlib.h> #include <stdlib.h>
#include <unistd.h> #include <unistd.h>
#include "jsonparser/jsonparser.h" #include "simdjson/jsonparser.h"
/** /**
* Does the file filename ends with the given extension. * Does the file filename ends with the given extension.

View File

@ -11,7 +11,7 @@
#define JSON_TEST_NUMBERS #define JSON_TEST_NUMBERS
#endif #endif
#include "jsonparser/common_defs.h" #include "simdjson/common_defs.h"
int parse_error; int parse_error;
char *fullpath; char *fullpath;
@ -82,7 +82,7 @@ inline void foundFloat(double result, const u8 *buf) {
} }
} }
#include "jsonparser/jsonparser.h" #include "simdjson/jsonparser.h"
#include "src/stage34_unified.cpp" #include "src/stage34_unified.cpp"
/** /**

View File

@ -11,7 +11,7 @@
#define JSON_TEST_STRINGS #define JSON_TEST_STRINGS
#endif #endif
#include "jsonparser/common_defs.h" #include "simdjson/common_defs.h"
char *fullpath; char *fullpath;
@ -281,7 +281,7 @@ inline void foundString(const u8 *buf, const u8 *parsed_begin,
} }
} }
#include "jsonparser/jsonparser.h" #include "simdjson/jsonparser.h"
#include "src/stage34_unified.cpp" #include "src/stage34_unified.cpp"
/** /**

View File

@ -1,7 +1,7 @@
#include <iostream> #include <iostream>
#include "jsonparser/jsonioutil.h" #include "simdjson/jsonioutil.h"
#include "jsonparser/jsonminifier.h" #include "simdjson/jsonminifier.h"
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
if (argc != 2) { if (argc != 2) {