Merge branch 'stage12unified_attempt2'

This commit is contained in:
Daniel Lemire 2018-12-31 17:33:01 -05:00
commit bad32be5f6
16 changed files with 195 additions and 349 deletions

View File

@ -27,8 +27,8 @@ TESTEXECUTABLES=jsoncheck numberparsingcheck stringparsingcheck
COMPARISONEXECUTABLES=minifiercompetition parsingcompetition parseandstatcompetition distinctuseridcompetition allparserscheckfile
SUPPLEMENTARYEXECUTABLES=parse_noutf8validation parse_nonumberparsing parse_nostringparsing
HEADERS= include/simdjson/simdutf8check.h include/simdjson/stringparsing.h include/simdjson/numberparsing.h include/simdjson/jsonparser.h include/simdjson/common_defs.h include/simdjson/jsonioutil.h benchmark/benchmark.h benchmark/linux/linux-perf-events.h include/simdjson/parsedjson.h include/simdjson/stage1_find_marks.h include/simdjson/stage2_flatten.h include/simdjson/stage34_unified.h include/simdjson/jsoncharutils.h include/simdjson/jsonformatutils.h
LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/stage1_find_marks.cpp src/stage2_flatten.cpp src/stage34_unified.cpp
HEADERS= include/simdjson/simdutf8check.h include/simdjson/stringparsing.h include/simdjson/numberparsing.h include/simdjson/jsonparser.h include/simdjson/common_defs.h include/simdjson/jsonioutil.h benchmark/benchmark.h benchmark/linux/linux-perf-events.h include/simdjson/parsedjson.h include/simdjson/stage1_find_marks.h include/simdjson/stage2_build_tape.h include/simdjson/jsoncharutils.h include/simdjson/jsonformatutils.h
LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/stage1_find_marks.cpp src/stage2_build_tape.cpp
MINIFIERHEADERS=include/simdjson/jsonminifier.h include/simdjson/simdprune_tables.h
MINIFIERLIBFILES=src/jsonminifier.cpp
@ -106,11 +106,11 @@ jsoncheck:tests/jsoncheck.cpp $(HEADERS) $(LIBFILES)
$(CXX) $(CXXFLAGS) -o jsoncheck $(LIBFILES) tests/jsoncheck.cpp -I. $(LIBFLAGS)
numberparsingcheck:tests/numberparsingcheck.cpp $(HEADERS) $(LIBFILES)
$(CXX) $(CXXFLAGS) -o numberparsingcheck tests/numberparsingcheck.cpp src/jsonioutil.cpp src/jsonparser.cpp src/stage1_find_marks.cpp src/stage2_flatten.cpp -I. $(LIBFLAGS) -DJSON_TEST_NUMBERS
$(CXX) $(CXXFLAGS) -o numberparsingcheck tests/numberparsingcheck.cpp src/jsonioutil.cpp src/jsonparser.cpp src/stage1_find_marks.cpp -I. $(LIBFLAGS) -DJSON_TEST_NUMBERS
stringparsingcheck:tests/stringparsingcheck.cpp $(HEADERS) $(LIBFILES)
$(CXX) $(CXXFLAGS) -o stringparsingcheck tests/stringparsingcheck.cpp src/jsonioutil.cpp src/jsonparser.cpp src/stage1_find_marks.cpp src/stage2_flatten.cpp -I. $(LIBFLAGS) -DJSON_TEST_STRINGS
$(CXX) $(CXXFLAGS) -o stringparsingcheck tests/stringparsingcheck.cpp src/jsonioutil.cpp src/jsonparser.cpp src/stage1_find_marks.cpp -I. $(LIBFLAGS) -DJSON_TEST_STRINGS
minifiercompetition: benchmark/minifiercompetition.cpp $(HEADERS) $(MINIFIERHEADERS) $(LIBFILES) $(MINIFIERLIBFILES)

View File

@ -17,8 +17,7 @@ $SCRIPTPATH/src/jsonioutil.cpp
$SCRIPTPATH/src/jsonminifier.cpp
$SCRIPTPATH/src/jsonparser.cpp
$SCRIPTPATH/src/stage1_find_marks.cpp
$SCRIPTPATH/src/stage2_flatten.cpp
$SCRIPTPATH/src/stage34_unified.cpp
$SCRIPTPATH/src/stage2_build_tape.cpp
"
# order matters
@ -33,10 +32,9 @@ $SCRIPTPATH/include/simdjson/simdutf8check.h
$SCRIPTPATH/include/simdjson/jsonminifier.h
$SCRIPTPATH/include/simdjson/parsedjson.h
$SCRIPTPATH/include/simdjson/stage1_find_marks.h
$SCRIPTPATH/include/simdjson/stage2_flatten.h
$SCRIPTPATH/include/simdjson/stringparsing.h
$SCRIPTPATH/include/simdjson/numberparsing.h
$SCRIPTPATH/include/simdjson/stage34_unified.h
$SCRIPTPATH/include/simdjson/stage2_build_tape.h
$SCRIPTPATH/include/simdjson/jsonparser.h
"

View File

@ -35,8 +35,7 @@
#include "simdjson/jsonparser.h"
#include "simdjson/parsedjson.h"
#include "simdjson/stage1_find_marks.h"
#include "simdjson/stage2_flatten.h"
#include "simdjson/stage34_unified.h"
#include "simdjson/stage2_build_tape.h"
using namespace std;
int main(int argc, char *argv[]) {
@ -166,20 +165,6 @@ int main(int argc, char *argv[]) {
}
unified.start();
#endif
isok = isok && flatten_indexes(p.size(), pj);
#ifndef SQUASH_COUNTERS
unified.end(results);
cy2 += results[0];
cl2 += results[1];
mis2 += results[2];
cref2 += results[3];
cmis2 += results[4];
if (!isok) {
cout << "Failed out during stage 2\n";
break;
}
unified.start();
#endif
isok = isok && unified_machine(p.data(), p.size(), pj);
#ifndef SQUASH_COUNTERS
@ -249,23 +234,12 @@ int main(int argc, char *argv[]) {
(double)cy1 / (iterations * p.size()));
printf("stage 2 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: "
"%.2f mis. branches: %10lu (cycles/mis.branch %.2f) cache "
"accesses: %10lu (failure %10lu)\n",
cl2 / iterations, cy2 / iterations, 100. * cy2 / total,
(double)cl2 / cy2, mis2 / iterations, (double)cy2 / mis2,
cref2 / iterations, cmis2 / iterations);
printf(" stage 2 runs at %.2f cycles per input byte and ",
(double)cy2 / (iterations * p.size()));
printf("%.2f cycles per structural character.\n",
(double)cy2 / (iterations * pj.n_structural_indexes));
printf("stage 3 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: "
"%.2f mis. branches: %10lu (cycles/mis.branch %.2f) cache "
"accesses: %10lu (failure %10lu)\n",
cl3 / iterations, cy3 / iterations, 100. * cy3 / total,
(double)cl3 / cy3, mis3 / iterations, (double)cy3 / mis3,
cref3 / iterations, cmis3 / iterations);
printf(" stage 3 runs at %.2f cycles per input byte and ",
printf(" stage 2 runs at %.2f cycles per input byte and ",
(double)cy3 / (iterations * p.size()));
printf("%.2f cycles per structural character.\n",
(double)cy3 / (iterations * pj.n_structural_indexes));

View File

@ -5,8 +5,7 @@
#include "simdjson/jsonioutil.h"
#include "simdjson/parsedjson.h"
#include "simdjson/stage1_find_marks.h"
#include "simdjson/stage2_flatten.h"
#include "simdjson/stage34_unified.h"
#include "simdjson/stage2_build_tape.h"

View File

@ -26,7 +26,7 @@ public:
// allocate memory
ParsedJson()
: bytecapacity(0), depthcapacity(0), tapecapacity(0), stringcapacity(0),
current_loc(0), structurals(NULL), n_structural_indexes(0),
current_loc(0), n_structural_indexes(0),
structural_indexes(NULL), tape(NULL), containing_scope_offset(NULL),
ret_address(NULL), string_buf(NULL), current_string_buf_loc(NULL), isvalid(false) {}
@ -45,11 +45,6 @@ public:
}
isvalid = false;
bytecapacity = 0; // will only set it to len after allocations are a success
structurals = (uint8_t *)aligned_malloc(8, ROUNDUP_N(len, 64) / 8);
if (structurals == NULL) {
std::cerr << "Could not allocate memory for structurals" << std::endl;
return false;
};
n_structural_indexes = 0;
uint32_t max_structures = ROUNDUP_N(len, 64) + 2 + 7;
structural_indexes = new uint32_t[max_structures];
@ -71,7 +66,6 @@ public:
if(tape != NULL) delete[] tape;
if(string_buf != NULL) delete[] string_buf;
if(structural_indexes != NULL) delete[] structural_indexes;
aligned_free(structurals);
return false;
}
@ -98,7 +92,6 @@ public:
if(tape != NULL) delete[] tape;
if(string_buf != NULL) delete[] string_buf;
if(structural_indexes != NULL) delete[] structural_indexes;
aligned_free(structurals);
isvalid = false;
}
@ -682,14 +675,12 @@ private:
};
size_t bytecapacity; // indicates how many bits are meant to be supported by
// structurals
size_t bytecapacity; // indicates how many bits are meant to be supported
size_t depthcapacity; // how deep we can go
size_t tapecapacity;
size_t stringcapacity;
uint32_t current_loc;
uint8_t *structurals;
uint32_t n_structural_indexes;
uint32_t *structural_indexes;
@ -712,7 +703,6 @@ private:
tapecapacity(std::move(p.tapecapacity)),
stringcapacity(std::move(p.stringcapacity)),
current_loc(std::move(p.current_loc)),
structurals(std::move(p.structurals)),
n_structural_indexes(std::move(p.n_structural_indexes)),
structural_indexes(std::move(p.structural_indexes)),
tape(std::move(p.tape)),
@ -721,7 +711,6 @@ private:
string_buf(std::move(p.string_buf)),
current_string_buf_loc(std::move(p.current_string_buf_loc)),
isvalid(std::move(p.isvalid)) {
p.structurals=NULL;
p.structural_indexes=NULL;
p.tape=NULL;
p.containing_scope_offset=NULL;

View File

@ -1,10 +0,0 @@
#ifndef SIMDJSON_STAGE2_FLATTEN_H
#define SIMDJSON_STAGE2_FLATTEN_H
#include "simdjson/common_defs.h"
#include "simdjson/parsedjson.h"
WARN_UNUSED
bool flatten_indexes(size_t len, ParsedJson &pj);
#endif

View File

@ -1,4 +1,4 @@
/* auto-generated on Mon Dec 31 11:59:09 EST 2018. Do not edit! */
/* auto-generated on Mon Dec 31 17:13:28 EST 2018. Do not edit! */
#include <iostream>
#include "simdjson.h"

View File

@ -1,4 +1,4 @@
/* auto-generated on Mon Dec 31 11:59:09 EST 2018. Do not edit! */
/* auto-generated on Mon Dec 31 17:13:28 EST 2018. Do not edit! */
#include "simdjson.h"
/* used for http://dmalloc.com/ Dmalloc - Debug Malloc Library */
@ -341,12 +341,12 @@ bool json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifne
}
}
bool isok = find_structural_bits(buf, len, pj);
if (isok) {
/*if (isok) {
isok = flatten_indexes(len, pj);
} else {
if(reallocated) free((void*)buf);
return false;
}
}*/
if (isok) {
isok = unified_machine(buf, len, pj);
} else {
@ -377,6 +377,35 @@ ParsedJson build_parsed_json(const uint8_t *buf, size_t len, bool reallocifneede
#define SIMDJSON_UTF8VALIDATE
#endif
#ifndef NO_PDEP_WIDTH
#define NO_PDEP_WIDTH 8
#endif
#define SET_BIT(i) \
base_ptr[base + i] = (uint32_t)idx - 64 + trailingzeroes(structurals); \
structurals = structurals & (structurals - 1);
#define SET_BIT1 SET_BIT(0)
#define SET_BIT2 SET_BIT1 SET_BIT(1)
#define SET_BIT3 SET_BIT2 SET_BIT(2)
#define SET_BIT4 SET_BIT3 SET_BIT(3)
#define SET_BIT5 SET_BIT4 SET_BIT(4)
#define SET_BIT6 SET_BIT5 SET_BIT(5)
#define SET_BIT7 SET_BIT6 SET_BIT(6)
#define SET_BIT8 SET_BIT7 SET_BIT(7)
#define SET_BIT9 SET_BIT8 SET_BIT(8)
#define SET_BIT10 SET_BIT9 SET_BIT(9)
#define SET_BIT11 SET_BIT10 SET_BIT(10)
#define SET_BIT12 SET_BIT11 SET_BIT(11)
#define SET_BIT13 SET_BIT12 SET_BIT(12)
#define SET_BIT14 SET_BIT13 SET_BIT(13)
#define SET_BIT15 SET_BIT14 SET_BIT(14)
#define SET_BIT16 SET_BIT15 SET_BIT(15)
#define CALL(macro, ...) macro(__VA_ARGS__)
#define SET_BITLOOPN(n) SET_BIT##n
// It seems that many parsers do UTF-8 validation.
// RapidJSON does not do it by default, but a flag
// allows it.
@ -402,11 +431,13 @@ WARN_UNUSED
cerr << "Your ParsedJson object only supports documents up to "<< pj.bytecapacity << " bytes but you are trying to process " << len << " bytes\n";
return false;
}
uint32_t *base_ptr = pj.structural_indexes;
uint32_t base = 0;
#ifdef SIMDJSON_UTF8VALIDATE
__m256i has_error = _mm256_setzero_si256();
struct avx_processed_utf_bytes previous;
previous.rawbytes = _mm256_setzero_si256();
previous.high_nibbles = _mm256_setzero_si256();
previous.rawbytes = _mm256_setzero_si256();
previous.high_nibbles = _mm256_setzero_si256();
previous.carried_continuations = _mm256_setzero_si256();
#endif
@ -427,6 +458,7 @@ WARN_UNUSED
uint64_t prev_iter_ends_pseudo_pred = 1ULL;
size_t lenminus64 = len < 64 ? 0 : len - 64;
size_t idx = 0;
uint64_t structurals = 0;
for (; idx < lenminus64; idx += 64) {
#ifndef _MSC_VER
__builtin_prefetch(buf + idx + 128);
@ -490,6 +522,21 @@ WARN_UNUSED
quote_bits = quote_bits & ~odd_ends;
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
uint32_t cnt = hamming(structurals);
uint32_t next_base = base + cnt;
while (structurals) {
CALL(SET_BITLOOPN, NO_PDEP_WIDTH)
/*for(size_t i = 0; i < NO_PDEP_WIDTH; i++) {
base_ptr[base+i] = (uint32_t)idx + trailingzeroes(s);
s = s & (s - 1);
}*/
base += NO_PDEP_WIDTH;
}
base = next_base;
quote_mask ^= prev_iter_inside_quote;
prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63); // right shift of a signed value expected to be well-defined and standard compliant as of C++20, John Regher from Utah U. says this is fine code
@ -531,7 +578,7 @@ WARN_UNUSED
uint64_t structural_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_lo);
uint64_t structural_res_1 = _mm256_movemask_epi8(tmp_hi);
uint64_t structurals = ~(structural_res_0 | (structural_res_1 << 32));
structurals = ~(structural_res_0 | (structural_res_1 << 32));
// this additional mask and transfer is non-trivially expensive,
// unfortunately
@ -570,7 +617,8 @@ WARN_UNUSED
// now, we've used our close quotes all we need to. So let's switch them off
// they will be off in the quote mask and on in quote bits.
structurals &= ~(quote_bits & ~quote_mask);
*(uint64_t *)(pj.structurals + idx / 8) = structurals;
//*(uint64_t *)(pj.structurals + idx / 8) = structurals;
}
////////////////
@ -644,6 +692,17 @@ WARN_UNUSED
quote_mask ^= prev_iter_inside_quote;
//prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63); // right shift of a signed value expected to be well-defined and standard compliant as of C++20
uint32_t cnt = hamming(structurals);
uint32_t next_base = base + cnt;
while (structurals) {
CALL(SET_BITLOOPN, NO_PDEP_WIDTH)
/*for(size_t i = 0; i < NO_PDEP_WIDTH; i++) {
base_ptr[base+i] = (uint32_t)idx + trailingzeroes(s);
s = s & (s - 1);
}*/
base += NO_PDEP_WIDTH;
}
base = next_base;
// How do we build up a user traversable data structure
// first, do a 'shufti' to detect structural JSON characters
// they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c
@ -682,7 +741,7 @@ WARN_UNUSED
uint64_t structural_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_lo);
uint64_t structural_res_1 = _mm256_movemask_epi8(tmp_hi);
uint64_t structurals = ~(structural_res_0 | (structural_res_1 << 32));
structurals = ~(structural_res_0 | (structural_res_1 << 32));
// this additional mask and transfer is non-trivially expensive,
// unfortunately
@ -723,90 +782,12 @@ WARN_UNUSED
// now, we've used our close quotes all we need to. So let's switch them off
// they will be off in the quote mask and on in quote bits.
structurals &= ~(quote_bits & ~quote_mask);
*(uint64_t *)(pj.structurals + idx / 8) = structurals;
//*(uint64_t *)(pj.structurals + idx / 8) = structurals;
idx += 64;
}
#ifdef SIMDJSON_UTF8VALIDATE
return _mm256_testz_si256(has_error, has_error);
#else
return true;
#endif
}
/* end file /home/dlemire/CVS/github/simdjson/src/stage1_find_marks.cpp */
/* begin file /home/dlemire/CVS/github/simdjson/src/stage2_flatten.cpp */
#include <cassert>
#ifndef NO_PDEP_PLEASE
#define NO_PDEP_PLEASE // though this is not always a win, it seems to
// be more often a win than not. And it will be faster on AMD.
#endif
#ifndef NO_PDEP_WIDTH
#define NO_PDEP_WIDTH 8
#endif
#define SET_BIT(i) \
base_ptr[base + i] = (uint32_t)idx + trailingzeroes(s); \
s = s & (s - 1);
#define SET_BIT1 SET_BIT(0)
#define SET_BIT2 SET_BIT1 SET_BIT(1)
#define SET_BIT3 SET_BIT2 SET_BIT(2)
#define SET_BIT4 SET_BIT3 SET_BIT(3)
#define SET_BIT5 SET_BIT4 SET_BIT(4)
#define SET_BIT6 SET_BIT5 SET_BIT(5)
#define SET_BIT7 SET_BIT6 SET_BIT(6)
#define SET_BIT8 SET_BIT7 SET_BIT(7)
#define SET_BIT9 SET_BIT8 SET_BIT(8)
#define SET_BIT10 SET_BIT9 SET_BIT(9)
#define SET_BIT11 SET_BIT10 SET_BIT(10)
#define SET_BIT12 SET_BIT11 SET_BIT(11)
#define SET_BIT13 SET_BIT12 SET_BIT(12)
#define SET_BIT14 SET_BIT13 SET_BIT(13)
#define SET_BIT15 SET_BIT14 SET_BIT(14)
#define SET_BIT16 SET_BIT15 SET_BIT(15)
#define CALL(macro, ...) macro(__VA_ARGS__)
#define SET_BITLOOPN(n) SET_BIT##n
// just transform the bitmask to a big list of 32-bit integers for now
// that's all; the type of character the offset points to will
// tell us exactly what we need to know. Naive but straightforward
// implementation
WARN_UNUSED
bool flatten_indexes(size_t len, ParsedJson &pj) {
uint32_t *base_ptr = pj.structural_indexes;
uint32_t base = 0;
#ifdef BUILDHISTOGRAM
uint32_t counters[66];
uint32_t total = 0;
for (int k = 0; k < 66; k++)
counters[k] = 0;
for (size_t idx = 0; idx < len; idx += 64) {
uint64_t s = *(uint64_t *)(pj.structurals + idx / 8);
uint32_t cnt = hamming(s);
total++;
counters[cnt]++;
}
printf("\n histogram:\n");
for (int k = 0; k < 66; k++) {
if (counters[k] > 0)
printf("%10d %10.u %10.3f \n", k, counters[k], counters[k] * 1.0 / total);
}
printf("\n\n");
#endif
for (size_t idx = 0; idx < len; idx += 64) {
uint64_t s = *(uint64_t *)(pj.structurals + idx / 8);
#ifdef SUPPRESS_CHEESY_FLATTEN
while (s) {
base_ptr[base++] = (uint32_t)idx + trailingzeroes(s);
s &= s - 1ULL;
}
#elif defined(NO_PDEP_PLEASE)
uint32_t cnt = hamming(s);
uint32_t cnt = hamming(structurals);
uint32_t next_base = base + cnt;
while (s) {
while (structurals) {
CALL(SET_BITLOOPN, NO_PDEP_WIDTH)
/*for(size_t i = 0; i < NO_PDEP_WIDTH; i++) {
base_ptr[base+i] = (uint32_t)idx + trailingzeroes(s);
@ -815,37 +796,10 @@ bool flatten_indexes(size_t len, ParsedJson &pj) {
base += NO_PDEP_WIDTH;
}
base = next_base;
#else
uint32_t cnt = hamming(s);
uint32_t next_base = base + cnt;
while (s) {
// spoil the suspense by reducing dependency chains; actually a win even
// with cost of pdep
uint64_t s3 = _pdep_u64(~0x7ULL, s); // s3 will have bottom 3 1-bits unset
uint64_t s5 = _pdep_u64(~0x1fULL, s); // s5 will have bottom 5 1-bits unset
base_ptr[base + 0] = (uint32_t)idx + trailingzeroes(s);
uint64_t s1 = s & (s - 1ULL);
base_ptr[base + 1] = (uint32_t)idx + trailingzeroes(s1);
uint64_t s2 = s1 & (s1 - 1ULL);
base_ptr[base + 2] =
(uint32_t)idx + trailingzeroes(s2); // uint64_t s3 = s2 & (s2 - 1ULL);
base_ptr[base + 3] = (uint32_t)idx + trailingzeroes(s3);
uint64_t s4 = s3 & (s3 - 1ULL);
base_ptr[base + 4] =
(uint32_t)idx + trailingzeroes(s4); // uint64_t s5 = s4 & (s4 - 1ULL);
base_ptr[base + 5] = (uint32_t)idx + trailingzeroes(s5);
uint64_t s6 = s5 & (s5 - 1ULL);
s = s6;
base += 6;
}
base = next_base;
#endif
}
pj.n_structural_indexes = base;
if(base_ptr[pj.n_structural_indexes-1] > len) {
printf("Internal bug\n");
fprintf( stderr,"Internal bug\n");
return false;
}
if(len != base_ptr[pj.n_structural_indexes-1]) {
@ -853,10 +807,15 @@ bool flatten_indexes(size_t len, ParsedJson &pj) {
base_ptr[pj.n_structural_indexes++] = len;
}
base_ptr[pj.n_structural_indexes] = 0; // make it safe to dereference one beyond this array
#ifdef SIMDJSON_UTF8VALIDATE
return _mm256_testz_si256(has_error, has_error);
#else
return true;
#endif
}
/* end file /home/dlemire/CVS/github/simdjson/src/stage2_flatten.cpp */
/* begin file /home/dlemire/CVS/github/simdjson/src/stage34_unified.cpp */
/* end file /Users/lemire/CVS/github/simdjson/src/stage1_find_marks.cpp */
/* begin file /Users/lemire/CVS/github/simdjson/src/stage2_build_tape.cpp */
#ifdef _MSC_VER
/* Microsoft C/C++-compatible compiler */
#include <intrin.h>
@ -1363,4 +1322,4 @@ succeed:
fail:
return false;
}
/* end file /home/dlemire/CVS/github/simdjson/src/stage34_unified.cpp */
/* end file /Users/lemire/CVS/github/simdjson/src/stage2_build_tape.cpp */

View File

@ -1,5 +1,5 @@
/* auto-generated on Mon Dec 31 11:59:09 EST 2018. Do not edit! */
/* begin file /home/dlemire/CVS/github/simdjson/include/simdjson/portability.h */
/* auto-generated on Mon Dec 31 17:13:28 EST 2018. Do not edit! */
/* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/portability.h */
#ifndef SIMDJSON_PORTABILITY_H
#define SIMDJSON_PORTABILITY_H
@ -35779,7 +35779,7 @@ public:
// allocate memory
ParsedJson()
: bytecapacity(0), depthcapacity(0), tapecapacity(0), stringcapacity(0),
current_loc(0), structurals(NULL), n_structural_indexes(0),
current_loc(0), n_structural_indexes(0),
structural_indexes(NULL), tape(NULL), containing_scope_offset(NULL),
ret_address(NULL), string_buf(NULL), current_string_buf_loc(NULL), isvalid(false) {}
@ -35798,11 +35798,6 @@ public:
}
isvalid = false;
bytecapacity = 0; // will only set it to len after allocations are a success
structurals = (uint8_t *)aligned_malloc(8, ROUNDUP_N(len, 64) / 8);
if (structurals == NULL) {
std::cerr << "Could not allocate memory for structurals" << std::endl;
return false;
};
n_structural_indexes = 0;
uint32_t max_structures = ROUNDUP_N(len, 64) + 2 + 7;
structural_indexes = new uint32_t[max_structures];
@ -35824,7 +35819,6 @@ public:
if(tape != NULL) delete[] tape;
if(string_buf != NULL) delete[] string_buf;
if(structural_indexes != NULL) delete[] structural_indexes;
aligned_free(structurals);
return false;
}
@ -35851,7 +35845,6 @@ public:
if(tape != NULL) delete[] tape;
if(string_buf != NULL) delete[] string_buf;
if(structural_indexes != NULL) delete[] structural_indexes;
aligned_free(structurals);
isvalid = false;
}
@ -36435,14 +36428,12 @@ private:
};
size_t bytecapacity; // indicates how many bits are meant to be supported by
// structurals
size_t bytecapacity; // indicates how many bits are meant to be supported
size_t depthcapacity; // how deep we can go
size_t tapecapacity;
size_t stringcapacity;
uint32_t current_loc;
uint8_t *structurals;
uint32_t n_structural_indexes;
uint32_t *structural_indexes;
@ -36465,7 +36456,6 @@ private:
tapecapacity(std::move(p.tapecapacity)),
stringcapacity(std::move(p.stringcapacity)),
current_loc(std::move(p.current_loc)),
structurals(std::move(p.structurals)),
n_structural_indexes(std::move(p.n_structural_indexes)),
structural_indexes(std::move(p.structural_indexes)),
tape(std::move(p.tape)),
@ -36474,7 +36464,6 @@ private:
string_buf(std::move(p.string_buf)),
current_string_buf_loc(std::move(p.current_string_buf_loc)),
isvalid(std::move(p.isvalid)) {
p.structurals=NULL;
p.structural_indexes=NULL;
p.tape=NULL;
p.containing_scope_offset=NULL;
@ -36526,18 +36515,8 @@ static inline bool find_structural_bits(const char *buf, size_t len, ParsedJson
}
#endif
/* end file /home/dlemire/CVS/github/simdjson/include/simdjson/stage1_find_marks.h */
/* begin file /home/dlemire/CVS/github/simdjson/include/simdjson/stage2_flatten.h */
#ifndef SIMDJSON_STAGE2_FLATTEN_H
#define SIMDJSON_STAGE2_FLATTEN_H
WARN_UNUSED
bool flatten_indexes(size_t len, ParsedJson &pj);
#endif
/* end file /home/dlemire/CVS/github/simdjson/include/simdjson/stage2_flatten.h */
/* begin file /home/dlemire/CVS/github/simdjson/include/simdjson/stringparsing.h */
/* end file /Users/lemire/CVS/github/simdjson/include/simdjson/stage1_find_marks.h */
/* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/stringparsing.h */
#ifndef SIMDJSON_STRINGPARSING_H
#define SIMDJSON_STRINGPARSING_H
@ -37213,8 +37192,8 @@ static really_inline bool parse_number(const uint8_t *const buf,
}
#endif
/* end file /home/dlemire/CVS/github/simdjson/include/simdjson/numberparsing.h */
/* begin file /home/dlemire/CVS/github/simdjson/include/simdjson/stage34_unified.h */
/* end file /Users/lemire/CVS/github/simdjson/include/simdjson/numberparsing.h */
/* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/stage2_build_tape.h */
#ifndef SIMDJSON_STAGE34_UNIFIED_H
#define SIMDJSON_STAGE34_UNIFIED_H
@ -37230,8 +37209,8 @@ static inline bool unified_machine(const char *buf, size_t len, ParsedJson &pj)
}
#endif
/* end file /home/dlemire/CVS/github/simdjson/include/simdjson/stage34_unified.h */
/* begin file /home/dlemire/CVS/github/simdjson/include/simdjson/jsonparser.h */
/* end file /Users/lemire/CVS/github/simdjson/include/simdjson/stage2_build_tape.h */
/* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/jsonparser.h */
#ifndef SIMDJSON_JSONPARSER_H
#define SIMDJSON_JSONPARSER_H

View File

@ -40,12 +40,12 @@ bool json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifne
}
}
bool isok = find_structural_bits(buf, len, pj);
if (isok) {
/*if (isok) {
isok = flatten_indexes(len, pj);
} else {
if(reallocated) free((void*)buf);
return false;
}
}*/
if (isok) {
isok = unified_machine(buf, len, pj);
} else {

View File

@ -7,6 +7,35 @@
#define SIMDJSON_UTF8VALIDATE
#endif
#ifndef NO_PDEP_WIDTH
#define NO_PDEP_WIDTH 8
#endif
#define SET_BIT(i) \
base_ptr[base + i] = (uint32_t)idx - 64 + trailingzeroes(structurals); \
structurals = structurals & (structurals - 1);
#define SET_BIT1 SET_BIT(0)
#define SET_BIT2 SET_BIT1 SET_BIT(1)
#define SET_BIT3 SET_BIT2 SET_BIT(2)
#define SET_BIT4 SET_BIT3 SET_BIT(3)
#define SET_BIT5 SET_BIT4 SET_BIT(4)
#define SET_BIT6 SET_BIT5 SET_BIT(5)
#define SET_BIT7 SET_BIT6 SET_BIT(6)
#define SET_BIT8 SET_BIT7 SET_BIT(7)
#define SET_BIT9 SET_BIT8 SET_BIT(8)
#define SET_BIT10 SET_BIT9 SET_BIT(9)
#define SET_BIT11 SET_BIT10 SET_BIT(10)
#define SET_BIT12 SET_BIT11 SET_BIT(11)
#define SET_BIT13 SET_BIT12 SET_BIT(12)
#define SET_BIT14 SET_BIT13 SET_BIT(13)
#define SET_BIT15 SET_BIT14 SET_BIT(14)
#define SET_BIT16 SET_BIT15 SET_BIT(15)
#define CALL(macro, ...) macro(__VA_ARGS__)
#define SET_BITLOOPN(n) SET_BIT##n
// It seems that many parsers do UTF-8 validation.
// RapidJSON does not do it by default, but a flag
// allows it.
@ -33,11 +62,13 @@ WARN_UNUSED
cerr << "Your ParsedJson object only supports documents up to "<< pj.bytecapacity << " bytes but you are trying to process " << len << " bytes\n";
return false;
}
uint32_t *base_ptr = pj.structural_indexes;
uint32_t base = 0;
#ifdef SIMDJSON_UTF8VALIDATE
__m256i has_error = _mm256_setzero_si256();
struct avx_processed_utf_bytes previous;
previous.rawbytes = _mm256_setzero_si256();
previous.high_nibbles = _mm256_setzero_si256();
previous.rawbytes = _mm256_setzero_si256();
previous.high_nibbles = _mm256_setzero_si256();
previous.carried_continuations = _mm256_setzero_si256();
#endif
@ -58,6 +89,7 @@ WARN_UNUSED
uint64_t prev_iter_ends_pseudo_pred = 1ULL;
size_t lenminus64 = len < 64 ? 0 : len - 64;
size_t idx = 0;
uint64_t structurals = 0;
for (; idx < lenminus64; idx += 64) {
#ifndef _MSC_VER
__builtin_prefetch(buf + idx + 128);
@ -121,6 +153,21 @@ WARN_UNUSED
quote_bits = quote_bits & ~odd_ends;
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
uint32_t cnt = hamming(structurals);
uint32_t next_base = base + cnt;
while (structurals) {
CALL(SET_BITLOOPN, NO_PDEP_WIDTH)
/*for(size_t i = 0; i < NO_PDEP_WIDTH; i++) {
base_ptr[base+i] = (uint32_t)idx + trailingzeroes(s);
s = s & (s - 1);
}*/
base += NO_PDEP_WIDTH;
}
base = next_base;
quote_mask ^= prev_iter_inside_quote;
prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63); // right shift of a signed value expected to be well-defined and standard compliant as of C++20, John Regher from Utah U. says this is fine code
@ -162,7 +209,7 @@ WARN_UNUSED
uint64_t structural_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_lo);
uint64_t structural_res_1 = _mm256_movemask_epi8(tmp_hi);
uint64_t structurals = ~(structural_res_0 | (structural_res_1 << 32));
structurals = ~(structural_res_0 | (structural_res_1 << 32));
// this additional mask and transfer is non-trivially expensive,
// unfortunately
@ -201,7 +248,8 @@ WARN_UNUSED
// now, we've used our close quotes all we need to. So let's switch them off
// they will be off in the quote mask and on in quote bits.
structurals &= ~(quote_bits & ~quote_mask);
*(uint64_t *)(pj.structurals + idx / 8) = structurals;
//*(uint64_t *)(pj.structurals + idx / 8) = structurals;
}
////////////////
@ -275,6 +323,17 @@ WARN_UNUSED
quote_mask ^= prev_iter_inside_quote;
//prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63); // right shift of a signed value expected to be well-defined and standard compliant as of C++20
uint32_t cnt = hamming(structurals);
uint32_t next_base = base + cnt;
while (structurals) {
CALL(SET_BITLOOPN, NO_PDEP_WIDTH)
/*for(size_t i = 0; i < NO_PDEP_WIDTH; i++) {
base_ptr[base+i] = (uint32_t)idx + trailingzeroes(s);
s = s & (s - 1);
}*/
base += NO_PDEP_WIDTH;
}
base = next_base;
// How do we build up a user traversable data structure
// first, do a 'shufti' to detect structural JSON characters
// they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c
@ -313,7 +372,7 @@ WARN_UNUSED
uint64_t structural_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_lo);
uint64_t structural_res_1 = _mm256_movemask_epi8(tmp_hi);
uint64_t structurals = ~(structural_res_0 | (structural_res_1 << 32));
structurals = ~(structural_res_0 | (structural_res_1 << 32));
// this additional mask and transfer is non-trivially expensive,
// unfortunately
@ -354,8 +413,32 @@ WARN_UNUSED
// now, we've used our close quotes all we need to. So let's switch them off
// they will be off in the quote mask and on in quote bits.
structurals &= ~(quote_bits & ~quote_mask);
*(uint64_t *)(pj.structurals + idx / 8) = structurals;
//*(uint64_t *)(pj.structurals + idx / 8) = structurals;
idx += 64;
}
uint32_t cnt = hamming(structurals);
uint32_t next_base = base + cnt;
while (structurals) {
CALL(SET_BITLOOPN, NO_PDEP_WIDTH)
/*for(size_t i = 0; i < NO_PDEP_WIDTH; i++) {
base_ptr[base+i] = (uint32_t)idx + trailingzeroes(s);
s = s & (s - 1);
}*/
base += NO_PDEP_WIDTH;
}
base = next_base;
pj.n_structural_indexes = base;
if(base_ptr[pj.n_structural_indexes-1] > len) {
fprintf( stderr,"Internal bug\n");
return false;
}
if(len != base_ptr[pj.n_structural_indexes-1]) {
// the string might not be NULL terminated, but we add a virtual NULL ending character.
base_ptr[pj.n_structural_indexes++] = len;
}
base_ptr[pj.n_structural_indexes] = 0; // make it safe to dereference one beyond this array
#ifdef SIMDJSON_UTF8VALIDATE
return _mm256_testz_si256(has_error, has_error);
#else

View File

@ -1,125 +0,0 @@
#include <cassert>
#include "simdjson/portability.h"
#include "simdjson/common_defs.h"
#include "simdjson/parsedjson.h"
#ifndef NO_PDEP_PLEASE
#define NO_PDEP_PLEASE // though this is not always a win, it seems to
// be more often a win than not. And it will be faster on AMD.
#endif
#ifndef NO_PDEP_WIDTH
#define NO_PDEP_WIDTH 8
#endif
#define SET_BIT(i) \
base_ptr[base + i] = (uint32_t)idx + trailingzeroes(s); \
s = s & (s - 1);
#define SET_BIT1 SET_BIT(0)
#define SET_BIT2 SET_BIT1 SET_BIT(1)
#define SET_BIT3 SET_BIT2 SET_BIT(2)
#define SET_BIT4 SET_BIT3 SET_BIT(3)
#define SET_BIT5 SET_BIT4 SET_BIT(4)
#define SET_BIT6 SET_BIT5 SET_BIT(5)
#define SET_BIT7 SET_BIT6 SET_BIT(6)
#define SET_BIT8 SET_BIT7 SET_BIT(7)
#define SET_BIT9 SET_BIT8 SET_BIT(8)
#define SET_BIT10 SET_BIT9 SET_BIT(9)
#define SET_BIT11 SET_BIT10 SET_BIT(10)
#define SET_BIT12 SET_BIT11 SET_BIT(11)
#define SET_BIT13 SET_BIT12 SET_BIT(12)
#define SET_BIT14 SET_BIT13 SET_BIT(13)
#define SET_BIT15 SET_BIT14 SET_BIT(14)
#define SET_BIT16 SET_BIT15 SET_BIT(15)
#define CALL(macro, ...) macro(__VA_ARGS__)
#define SET_BITLOOPN(n) SET_BIT##n
// just transform the bitmask to a big list of 32-bit integers for now
// that's all; the type of character the offset points to will
// tell us exactly what we need to know. Naive but straightforward
// implementation
WARN_UNUSED
bool flatten_indexes(size_t len, ParsedJson &pj) {
uint32_t *base_ptr = pj.structural_indexes;
uint32_t base = 0;
#ifdef BUILDHISTOGRAM
uint32_t counters[66];
uint32_t total = 0;
for (int k = 0; k < 66; k++)
counters[k] = 0;
for (size_t idx = 0; idx < len; idx += 64) {
uint64_t s = *(uint64_t *)(pj.structurals + idx / 8);
uint32_t cnt = hamming(s);
total++;
counters[cnt]++;
}
printf("\n histogram:\n");
for (int k = 0; k < 66; k++) {
if (counters[k] > 0)
printf("%10d %10.u %10.3f \n", k, counters[k], counters[k] * 1.0 / total);
}
printf("\n\n");
#endif
for (size_t idx = 0; idx < len; idx += 64) {
uint64_t s = *(uint64_t *)(pj.structurals + idx / 8);
#ifdef SUPPRESS_CHEESY_FLATTEN
while (s) {
base_ptr[base++] = (uint32_t)idx + trailingzeroes(s);
s &= s - 1ULL;
}
#elif defined(NO_PDEP_PLEASE)
uint32_t cnt = hamming(s);
uint32_t next_base = base + cnt;
while (s) {
CALL(SET_BITLOOPN, NO_PDEP_WIDTH)
/*for(size_t i = 0; i < NO_PDEP_WIDTH; i++) {
base_ptr[base+i] = (uint32_t)idx + trailingzeroes(s);
s = s & (s - 1);
}*/
base += NO_PDEP_WIDTH;
}
base = next_base;
#else
uint32_t cnt = hamming(s);
uint32_t next_base = base + cnt;
while (s) {
// spoil the suspense by reducing dependency chains; actually a win even
// with cost of pdep
uint64_t s3 = _pdep_u64(~0x7ULL, s); // s3 will have bottom 3 1-bits unset
uint64_t s5 = _pdep_u64(~0x1fULL, s); // s5 will have bottom 5 1-bits unset
base_ptr[base + 0] = (uint32_t)idx + trailingzeroes(s);
uint64_t s1 = s & (s - 1ULL);
base_ptr[base + 1] = (uint32_t)idx + trailingzeroes(s1);
uint64_t s2 = s1 & (s1 - 1ULL);
base_ptr[base + 2] =
(uint32_t)idx + trailingzeroes(s2); // uint64_t s3 = s2 & (s2 - 1ULL);
base_ptr[base + 3] = (uint32_t)idx + trailingzeroes(s3);
uint64_t s4 = s3 & (s3 - 1ULL);
base_ptr[base + 4] =
(uint32_t)idx + trailingzeroes(s4); // uint64_t s5 = s4 & (s4 - 1ULL);
base_ptr[base + 5] = (uint32_t)idx + trailingzeroes(s5);
uint64_t s6 = s5 & (s5 - 1ULL);
s = s6;
base += 6;
}
base = next_base;
#endif
}
pj.n_structural_indexes = base;
if(base_ptr[pj.n_structural_indexes-1] > len) {
printf("Internal bug\n");
return false;
}
if(len != base_ptr[pj.n_structural_indexes-1]) {
// the string might not be NULL terminated, but we add a virtual NULL ending character.
base_ptr[pj.n_structural_indexes++] = len;
}
base_ptr[pj.n_structural_indexes] = 0; // make it safe to dereference one beyond this array
return true;
}

View File

@ -90,7 +90,7 @@ inline void foundFloat(double result, const uint8_t *buf) {
}
#include "simdjson/jsonparser.h"
#include "src/stage34_unified.cpp"
#include "src/stage2_build_tape.cpp"
/**
* Does the file filename ends with the given extension.

View File

@ -282,7 +282,7 @@ inline void foundString(const uint8_t *buf, const uint8_t *parsed_begin,
}
#include "simdjson/jsonparser.h"
#include "src/stage34_unified.cpp"
#include "src/stage2_build_tape.cpp"
/**
* Does the file filename ends with the given extension.