Merge branch 'stage12unified_attempt2'
This commit is contained in:
commit
bad32be5f6
8
Makefile
8
Makefile
|
@ -27,8 +27,8 @@ TESTEXECUTABLES=jsoncheck numberparsingcheck stringparsingcheck
|
|||
COMPARISONEXECUTABLES=minifiercompetition parsingcompetition parseandstatcompetition distinctuseridcompetition allparserscheckfile
|
||||
SUPPLEMENTARYEXECUTABLES=parse_noutf8validation parse_nonumberparsing parse_nostringparsing
|
||||
|
||||
HEADERS= include/simdjson/simdutf8check.h include/simdjson/stringparsing.h include/simdjson/numberparsing.h include/simdjson/jsonparser.h include/simdjson/common_defs.h include/simdjson/jsonioutil.h benchmark/benchmark.h benchmark/linux/linux-perf-events.h include/simdjson/parsedjson.h include/simdjson/stage1_find_marks.h include/simdjson/stage2_flatten.h include/simdjson/stage34_unified.h include/simdjson/jsoncharutils.h include/simdjson/jsonformatutils.h
|
||||
LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/stage1_find_marks.cpp src/stage2_flatten.cpp src/stage34_unified.cpp
|
||||
HEADERS= include/simdjson/simdutf8check.h include/simdjson/stringparsing.h include/simdjson/numberparsing.h include/simdjson/jsonparser.h include/simdjson/common_defs.h include/simdjson/jsonioutil.h benchmark/benchmark.h benchmark/linux/linux-perf-events.h include/simdjson/parsedjson.h include/simdjson/stage1_find_marks.h include/simdjson/stage2_build_tape.h include/simdjson/jsoncharutils.h include/simdjson/jsonformatutils.h
|
||||
LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/stage1_find_marks.cpp src/stage2_build_tape.cpp
|
||||
MINIFIERHEADERS=include/simdjson/jsonminifier.h include/simdjson/simdprune_tables.h
|
||||
MINIFIERLIBFILES=src/jsonminifier.cpp
|
||||
|
||||
|
@ -106,11 +106,11 @@ jsoncheck:tests/jsoncheck.cpp $(HEADERS) $(LIBFILES)
|
|||
$(CXX) $(CXXFLAGS) -o jsoncheck $(LIBFILES) tests/jsoncheck.cpp -I. $(LIBFLAGS)
|
||||
|
||||
numberparsingcheck:tests/numberparsingcheck.cpp $(HEADERS) $(LIBFILES)
|
||||
$(CXX) $(CXXFLAGS) -o numberparsingcheck tests/numberparsingcheck.cpp src/jsonioutil.cpp src/jsonparser.cpp src/stage1_find_marks.cpp src/stage2_flatten.cpp -I. $(LIBFLAGS) -DJSON_TEST_NUMBERS
|
||||
$(CXX) $(CXXFLAGS) -o numberparsingcheck tests/numberparsingcheck.cpp src/jsonioutil.cpp src/jsonparser.cpp src/stage1_find_marks.cpp -I. $(LIBFLAGS) -DJSON_TEST_NUMBERS
|
||||
|
||||
|
||||
stringparsingcheck:tests/stringparsingcheck.cpp $(HEADERS) $(LIBFILES)
|
||||
$(CXX) $(CXXFLAGS) -o stringparsingcheck tests/stringparsingcheck.cpp src/jsonioutil.cpp src/jsonparser.cpp src/stage1_find_marks.cpp src/stage2_flatten.cpp -I. $(LIBFLAGS) -DJSON_TEST_STRINGS
|
||||
$(CXX) $(CXXFLAGS) -o stringparsingcheck tests/stringparsingcheck.cpp src/jsonioutil.cpp src/jsonparser.cpp src/stage1_find_marks.cpp -I. $(LIBFLAGS) -DJSON_TEST_STRINGS
|
||||
|
||||
|
||||
minifiercompetition: benchmark/minifiercompetition.cpp $(HEADERS) $(MINIFIERHEADERS) $(LIBFILES) $(MINIFIERLIBFILES)
|
||||
|
|
|
@ -17,8 +17,7 @@ $SCRIPTPATH/src/jsonioutil.cpp
|
|||
$SCRIPTPATH/src/jsonminifier.cpp
|
||||
$SCRIPTPATH/src/jsonparser.cpp
|
||||
$SCRIPTPATH/src/stage1_find_marks.cpp
|
||||
$SCRIPTPATH/src/stage2_flatten.cpp
|
||||
$SCRIPTPATH/src/stage34_unified.cpp
|
||||
$SCRIPTPATH/src/stage2_build_tape.cpp
|
||||
"
|
||||
|
||||
# order matters
|
||||
|
@ -33,10 +32,9 @@ $SCRIPTPATH/include/simdjson/simdutf8check.h
|
|||
$SCRIPTPATH/include/simdjson/jsonminifier.h
|
||||
$SCRIPTPATH/include/simdjson/parsedjson.h
|
||||
$SCRIPTPATH/include/simdjson/stage1_find_marks.h
|
||||
$SCRIPTPATH/include/simdjson/stage2_flatten.h
|
||||
$SCRIPTPATH/include/simdjson/stringparsing.h
|
||||
$SCRIPTPATH/include/simdjson/numberparsing.h
|
||||
$SCRIPTPATH/include/simdjson/stage34_unified.h
|
||||
$SCRIPTPATH/include/simdjson/stage2_build_tape.h
|
||||
$SCRIPTPATH/include/simdjson/jsonparser.h
|
||||
"
|
||||
|
||||
|
|
|
@ -35,8 +35,7 @@
|
|||
#include "simdjson/jsonparser.h"
|
||||
#include "simdjson/parsedjson.h"
|
||||
#include "simdjson/stage1_find_marks.h"
|
||||
#include "simdjson/stage2_flatten.h"
|
||||
#include "simdjson/stage34_unified.h"
|
||||
#include "simdjson/stage2_build_tape.h"
|
||||
using namespace std;
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
|
@ -166,20 +165,6 @@ int main(int argc, char *argv[]) {
|
|||
}
|
||||
unified.start();
|
||||
#endif
|
||||
isok = isok && flatten_indexes(p.size(), pj);
|
||||
#ifndef SQUASH_COUNTERS
|
||||
unified.end(results);
|
||||
cy2 += results[0];
|
||||
cl2 += results[1];
|
||||
mis2 += results[2];
|
||||
cref2 += results[3];
|
||||
cmis2 += results[4];
|
||||
if (!isok) {
|
||||
cout << "Failed out during stage 2\n";
|
||||
break;
|
||||
}
|
||||
unified.start();
|
||||
#endif
|
||||
|
||||
isok = isok && unified_machine(p.data(), p.size(), pj);
|
||||
#ifndef SQUASH_COUNTERS
|
||||
|
@ -249,23 +234,12 @@ int main(int argc, char *argv[]) {
|
|||
(double)cy1 / (iterations * p.size()));
|
||||
|
||||
printf("stage 2 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: "
|
||||
"%.2f mis. branches: %10lu (cycles/mis.branch %.2f) cache "
|
||||
"accesses: %10lu (failure %10lu)\n",
|
||||
cl2 / iterations, cy2 / iterations, 100. * cy2 / total,
|
||||
(double)cl2 / cy2, mis2 / iterations, (double)cy2 / mis2,
|
||||
cref2 / iterations, cmis2 / iterations);
|
||||
printf(" stage 2 runs at %.2f cycles per input byte and ",
|
||||
(double)cy2 / (iterations * p.size()));
|
||||
printf("%.2f cycles per structural character.\n",
|
||||
(double)cy2 / (iterations * pj.n_structural_indexes));
|
||||
|
||||
printf("stage 3 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: "
|
||||
"%.2f mis. branches: %10lu (cycles/mis.branch %.2f) cache "
|
||||
"accesses: %10lu (failure %10lu)\n",
|
||||
cl3 / iterations, cy3 / iterations, 100. * cy3 / total,
|
||||
(double)cl3 / cy3, mis3 / iterations, (double)cy3 / mis3,
|
||||
cref3 / iterations, cmis3 / iterations);
|
||||
printf(" stage 3 runs at %.2f cycles per input byte and ",
|
||||
printf(" stage 2 runs at %.2f cycles per input byte and ",
|
||||
(double)cy3 / (iterations * p.size()));
|
||||
printf("%.2f cycles per structural character.\n",
|
||||
(double)cy3 / (iterations * pj.n_structural_indexes));
|
||||
|
|
|
@ -5,8 +5,7 @@
|
|||
#include "simdjson/jsonioutil.h"
|
||||
#include "simdjson/parsedjson.h"
|
||||
#include "simdjson/stage1_find_marks.h"
|
||||
#include "simdjson/stage2_flatten.h"
|
||||
#include "simdjson/stage34_unified.h"
|
||||
#include "simdjson/stage2_build_tape.h"
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -26,7 +26,7 @@ public:
|
|||
// allocate memory
|
||||
ParsedJson()
|
||||
: bytecapacity(0), depthcapacity(0), tapecapacity(0), stringcapacity(0),
|
||||
current_loc(0), structurals(NULL), n_structural_indexes(0),
|
||||
current_loc(0), n_structural_indexes(0),
|
||||
structural_indexes(NULL), tape(NULL), containing_scope_offset(NULL),
|
||||
ret_address(NULL), string_buf(NULL), current_string_buf_loc(NULL), isvalid(false) {}
|
||||
|
||||
|
@ -45,11 +45,6 @@ public:
|
|||
}
|
||||
isvalid = false;
|
||||
bytecapacity = 0; // will only set it to len after allocations are a success
|
||||
structurals = (uint8_t *)aligned_malloc(8, ROUNDUP_N(len, 64) / 8);
|
||||
if (structurals == NULL) {
|
||||
std::cerr << "Could not allocate memory for structurals" << std::endl;
|
||||
return false;
|
||||
};
|
||||
n_structural_indexes = 0;
|
||||
uint32_t max_structures = ROUNDUP_N(len, 64) + 2 + 7;
|
||||
structural_indexes = new uint32_t[max_structures];
|
||||
|
@ -71,7 +66,6 @@ public:
|
|||
if(tape != NULL) delete[] tape;
|
||||
if(string_buf != NULL) delete[] string_buf;
|
||||
if(structural_indexes != NULL) delete[] structural_indexes;
|
||||
aligned_free(structurals);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -98,7 +92,6 @@ public:
|
|||
if(tape != NULL) delete[] tape;
|
||||
if(string_buf != NULL) delete[] string_buf;
|
||||
if(structural_indexes != NULL) delete[] structural_indexes;
|
||||
aligned_free(structurals);
|
||||
isvalid = false;
|
||||
}
|
||||
|
||||
|
@ -682,14 +675,12 @@ private:
|
|||
};
|
||||
|
||||
|
||||
size_t bytecapacity; // indicates how many bits are meant to be supported by
|
||||
// structurals
|
||||
size_t bytecapacity; // indicates how many bits are meant to be supported
|
||||
|
||||
size_t depthcapacity; // how deep we can go
|
||||
size_t tapecapacity;
|
||||
size_t stringcapacity;
|
||||
uint32_t current_loc;
|
||||
uint8_t *structurals;
|
||||
uint32_t n_structural_indexes;
|
||||
|
||||
uint32_t *structural_indexes;
|
||||
|
@ -712,7 +703,6 @@ private:
|
|||
tapecapacity(std::move(p.tapecapacity)),
|
||||
stringcapacity(std::move(p.stringcapacity)),
|
||||
current_loc(std::move(p.current_loc)),
|
||||
structurals(std::move(p.structurals)),
|
||||
n_structural_indexes(std::move(p.n_structural_indexes)),
|
||||
structural_indexes(std::move(p.structural_indexes)),
|
||||
tape(std::move(p.tape)),
|
||||
|
@ -721,7 +711,6 @@ private:
|
|||
string_buf(std::move(p.string_buf)),
|
||||
current_string_buf_loc(std::move(p.current_string_buf_loc)),
|
||||
isvalid(std::move(p.isvalid)) {
|
||||
p.structurals=NULL;
|
||||
p.structural_indexes=NULL;
|
||||
p.tape=NULL;
|
||||
p.containing_scope_offset=NULL;
|
||||
|
|
|
@ -1,10 +0,0 @@
|
|||
#ifndef SIMDJSON_STAGE2_FLATTEN_H
|
||||
#define SIMDJSON_STAGE2_FLATTEN_H
|
||||
|
||||
#include "simdjson/common_defs.h"
|
||||
#include "simdjson/parsedjson.h"
|
||||
|
||||
WARN_UNUSED
|
||||
bool flatten_indexes(size_t len, ParsedJson &pj);
|
||||
|
||||
#endif
|
|
@ -1,4 +1,4 @@
|
|||
/* auto-generated on Mon Dec 31 11:59:09 EST 2018. Do not edit! */
|
||||
/* auto-generated on Mon Dec 31 17:13:28 EST 2018. Do not edit! */
|
||||
|
||||
#include <iostream>
|
||||
#include "simdjson.h"
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* auto-generated on Mon Dec 31 11:59:09 EST 2018. Do not edit! */
|
||||
/* auto-generated on Mon Dec 31 17:13:28 EST 2018. Do not edit! */
|
||||
#include "simdjson.h"
|
||||
|
||||
/* used for http://dmalloc.com/ Dmalloc - Debug Malloc Library */
|
||||
|
@ -341,12 +341,12 @@ bool json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifne
|
|||
}
|
||||
}
|
||||
bool isok = find_structural_bits(buf, len, pj);
|
||||
if (isok) {
|
||||
/*if (isok) {
|
||||
isok = flatten_indexes(len, pj);
|
||||
} else {
|
||||
if(reallocated) free((void*)buf);
|
||||
return false;
|
||||
}
|
||||
}*/
|
||||
if (isok) {
|
||||
isok = unified_machine(buf, len, pj);
|
||||
} else {
|
||||
|
@ -377,6 +377,35 @@ ParsedJson build_parsed_json(const uint8_t *buf, size_t len, bool reallocifneede
|
|||
#define SIMDJSON_UTF8VALIDATE
|
||||
#endif
|
||||
|
||||
#ifndef NO_PDEP_WIDTH
|
||||
#define NO_PDEP_WIDTH 8
|
||||
#endif
|
||||
|
||||
#define SET_BIT(i) \
|
||||
base_ptr[base + i] = (uint32_t)idx - 64 + trailingzeroes(structurals); \
|
||||
structurals = structurals & (structurals - 1);
|
||||
|
||||
#define SET_BIT1 SET_BIT(0)
|
||||
#define SET_BIT2 SET_BIT1 SET_BIT(1)
|
||||
#define SET_BIT3 SET_BIT2 SET_BIT(2)
|
||||
#define SET_BIT4 SET_BIT3 SET_BIT(3)
|
||||
#define SET_BIT5 SET_BIT4 SET_BIT(4)
|
||||
#define SET_BIT6 SET_BIT5 SET_BIT(5)
|
||||
#define SET_BIT7 SET_BIT6 SET_BIT(6)
|
||||
#define SET_BIT8 SET_BIT7 SET_BIT(7)
|
||||
#define SET_BIT9 SET_BIT8 SET_BIT(8)
|
||||
#define SET_BIT10 SET_BIT9 SET_BIT(9)
|
||||
#define SET_BIT11 SET_BIT10 SET_BIT(10)
|
||||
#define SET_BIT12 SET_BIT11 SET_BIT(11)
|
||||
#define SET_BIT13 SET_BIT12 SET_BIT(12)
|
||||
#define SET_BIT14 SET_BIT13 SET_BIT(13)
|
||||
#define SET_BIT15 SET_BIT14 SET_BIT(14)
|
||||
#define SET_BIT16 SET_BIT15 SET_BIT(15)
|
||||
|
||||
#define CALL(macro, ...) macro(__VA_ARGS__)
|
||||
|
||||
#define SET_BITLOOPN(n) SET_BIT##n
|
||||
|
||||
// It seems that many parsers do UTF-8 validation.
|
||||
// RapidJSON does not do it by default, but a flag
|
||||
// allows it.
|
||||
|
@ -402,11 +431,13 @@ WARN_UNUSED
|
|||
cerr << "Your ParsedJson object only supports documents up to "<< pj.bytecapacity << " bytes but you are trying to process " << len << " bytes\n";
|
||||
return false;
|
||||
}
|
||||
uint32_t *base_ptr = pj.structural_indexes;
|
||||
uint32_t base = 0;
|
||||
#ifdef SIMDJSON_UTF8VALIDATE
|
||||
__m256i has_error = _mm256_setzero_si256();
|
||||
struct avx_processed_utf_bytes previous;
|
||||
previous.rawbytes = _mm256_setzero_si256();
|
||||
previous.high_nibbles = _mm256_setzero_si256();
|
||||
previous.rawbytes = _mm256_setzero_si256();
|
||||
previous.high_nibbles = _mm256_setzero_si256();
|
||||
previous.carried_continuations = _mm256_setzero_si256();
|
||||
#endif
|
||||
|
||||
|
@ -427,6 +458,7 @@ WARN_UNUSED
|
|||
uint64_t prev_iter_ends_pseudo_pred = 1ULL;
|
||||
size_t lenminus64 = len < 64 ? 0 : len - 64;
|
||||
size_t idx = 0;
|
||||
uint64_t structurals = 0;
|
||||
for (; idx < lenminus64; idx += 64) {
|
||||
#ifndef _MSC_VER
|
||||
__builtin_prefetch(buf + idx + 128);
|
||||
|
@ -490,6 +522,21 @@ WARN_UNUSED
|
|||
quote_bits = quote_bits & ~odd_ends;
|
||||
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
|
||||
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
|
||||
|
||||
|
||||
|
||||
uint32_t cnt = hamming(structurals);
|
||||
uint32_t next_base = base + cnt;
|
||||
while (structurals) {
|
||||
CALL(SET_BITLOOPN, NO_PDEP_WIDTH)
|
||||
/*for(size_t i = 0; i < NO_PDEP_WIDTH; i++) {
|
||||
base_ptr[base+i] = (uint32_t)idx + trailingzeroes(s);
|
||||
s = s & (s - 1);
|
||||
}*/
|
||||
base += NO_PDEP_WIDTH;
|
||||
}
|
||||
base = next_base;
|
||||
|
||||
quote_mask ^= prev_iter_inside_quote;
|
||||
prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63); // right shift of a signed value expected to be well-defined and standard compliant as of C++20, John Regher from Utah U. says this is fine code
|
||||
|
||||
|
@ -531,7 +578,7 @@ WARN_UNUSED
|
|||
|
||||
uint64_t structural_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_lo);
|
||||
uint64_t structural_res_1 = _mm256_movemask_epi8(tmp_hi);
|
||||
uint64_t structurals = ~(structural_res_0 | (structural_res_1 << 32));
|
||||
structurals = ~(structural_res_0 | (structural_res_1 << 32));
|
||||
|
||||
// this additional mask and transfer is non-trivially expensive,
|
||||
// unfortunately
|
||||
|
@ -570,7 +617,8 @@ WARN_UNUSED
|
|||
// now, we've used our close quotes all we need to. So let's switch them off
|
||||
// they will be off in the quote mask and on in quote bits.
|
||||
structurals &= ~(quote_bits & ~quote_mask);
|
||||
*(uint64_t *)(pj.structurals + idx / 8) = structurals;
|
||||
|
||||
//*(uint64_t *)(pj.structurals + idx / 8) = structurals;
|
||||
}
|
||||
|
||||
////////////////
|
||||
|
@ -644,6 +692,17 @@ WARN_UNUSED
|
|||
quote_mask ^= prev_iter_inside_quote;
|
||||
//prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63); // right shift of a signed value expected to be well-defined and standard compliant as of C++20
|
||||
|
||||
uint32_t cnt = hamming(structurals);
|
||||
uint32_t next_base = base + cnt;
|
||||
while (structurals) {
|
||||
CALL(SET_BITLOOPN, NO_PDEP_WIDTH)
|
||||
/*for(size_t i = 0; i < NO_PDEP_WIDTH; i++) {
|
||||
base_ptr[base+i] = (uint32_t)idx + trailingzeroes(s);
|
||||
s = s & (s - 1);
|
||||
}*/
|
||||
base += NO_PDEP_WIDTH;
|
||||
}
|
||||
base = next_base;
|
||||
// How do we build up a user traversable data structure
|
||||
// first, do a 'shufti' to detect structural JSON characters
|
||||
// they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c
|
||||
|
@ -682,7 +741,7 @@ WARN_UNUSED
|
|||
|
||||
uint64_t structural_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_lo);
|
||||
uint64_t structural_res_1 = _mm256_movemask_epi8(tmp_hi);
|
||||
uint64_t structurals = ~(structural_res_0 | (structural_res_1 << 32));
|
||||
structurals = ~(structural_res_0 | (structural_res_1 << 32));
|
||||
|
||||
// this additional mask and transfer is non-trivially expensive,
|
||||
// unfortunately
|
||||
|
@ -723,90 +782,12 @@ WARN_UNUSED
|
|||
// now, we've used our close quotes all we need to. So let's switch them off
|
||||
// they will be off in the quote mask and on in quote bits.
|
||||
structurals &= ~(quote_bits & ~quote_mask);
|
||||
*(uint64_t *)(pj.structurals + idx / 8) = structurals;
|
||||
//*(uint64_t *)(pj.structurals + idx / 8) = structurals;
|
||||
idx += 64;
|
||||
}
|
||||
#ifdef SIMDJSON_UTF8VALIDATE
|
||||
return _mm256_testz_si256(has_error, has_error);
|
||||
#else
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
/* end file /home/dlemire/CVS/github/simdjson/src/stage1_find_marks.cpp */
|
||||
/* begin file /home/dlemire/CVS/github/simdjson/src/stage2_flatten.cpp */
|
||||
|
||||
#include <cassert>
|
||||
|
||||
#ifndef NO_PDEP_PLEASE
|
||||
#define NO_PDEP_PLEASE // though this is not always a win, it seems to
|
||||
// be more often a win than not. And it will be faster on AMD.
|
||||
#endif
|
||||
|
||||
#ifndef NO_PDEP_WIDTH
|
||||
#define NO_PDEP_WIDTH 8
|
||||
#endif
|
||||
|
||||
#define SET_BIT(i) \
|
||||
base_ptr[base + i] = (uint32_t)idx + trailingzeroes(s); \
|
||||
s = s & (s - 1);
|
||||
|
||||
#define SET_BIT1 SET_BIT(0)
|
||||
#define SET_BIT2 SET_BIT1 SET_BIT(1)
|
||||
#define SET_BIT3 SET_BIT2 SET_BIT(2)
|
||||
#define SET_BIT4 SET_BIT3 SET_BIT(3)
|
||||
#define SET_BIT5 SET_BIT4 SET_BIT(4)
|
||||
#define SET_BIT6 SET_BIT5 SET_BIT(5)
|
||||
#define SET_BIT7 SET_BIT6 SET_BIT(6)
|
||||
#define SET_BIT8 SET_BIT7 SET_BIT(7)
|
||||
#define SET_BIT9 SET_BIT8 SET_BIT(8)
|
||||
#define SET_BIT10 SET_BIT9 SET_BIT(9)
|
||||
#define SET_BIT11 SET_BIT10 SET_BIT(10)
|
||||
#define SET_BIT12 SET_BIT11 SET_BIT(11)
|
||||
#define SET_BIT13 SET_BIT12 SET_BIT(12)
|
||||
#define SET_BIT14 SET_BIT13 SET_BIT(13)
|
||||
#define SET_BIT15 SET_BIT14 SET_BIT(14)
|
||||
#define SET_BIT16 SET_BIT15 SET_BIT(15)
|
||||
|
||||
#define CALL(macro, ...) macro(__VA_ARGS__)
|
||||
|
||||
#define SET_BITLOOPN(n) SET_BIT##n
|
||||
|
||||
// just transform the bitmask to a big list of 32-bit integers for now
|
||||
// that's all; the type of character the offset points to will
|
||||
// tell us exactly what we need to know. Naive but straightforward
|
||||
// implementation
|
||||
WARN_UNUSED
|
||||
bool flatten_indexes(size_t len, ParsedJson &pj) {
|
||||
uint32_t *base_ptr = pj.structural_indexes;
|
||||
uint32_t base = 0;
|
||||
#ifdef BUILDHISTOGRAM
|
||||
uint32_t counters[66];
|
||||
uint32_t total = 0;
|
||||
for (int k = 0; k < 66; k++)
|
||||
counters[k] = 0;
|
||||
for (size_t idx = 0; idx < len; idx += 64) {
|
||||
uint64_t s = *(uint64_t *)(pj.structurals + idx / 8);
|
||||
uint32_t cnt = hamming(s);
|
||||
total++;
|
||||
counters[cnt]++;
|
||||
}
|
||||
printf("\n histogram:\n");
|
||||
for (int k = 0; k < 66; k++) {
|
||||
if (counters[k] > 0)
|
||||
printf("%10d %10.u %10.3f \n", k, counters[k], counters[k] * 1.0 / total);
|
||||
}
|
||||
printf("\n\n");
|
||||
#endif
|
||||
for (size_t idx = 0; idx < len; idx += 64) {
|
||||
uint64_t s = *(uint64_t *)(pj.structurals + idx / 8);
|
||||
#ifdef SUPPRESS_CHEESY_FLATTEN
|
||||
while (s) {
|
||||
base_ptr[base++] = (uint32_t)idx + trailingzeroes(s);
|
||||
s &= s - 1ULL;
|
||||
}
|
||||
#elif defined(NO_PDEP_PLEASE)
|
||||
uint32_t cnt = hamming(s);
|
||||
uint32_t cnt = hamming(structurals);
|
||||
uint32_t next_base = base + cnt;
|
||||
while (s) {
|
||||
while (structurals) {
|
||||
CALL(SET_BITLOOPN, NO_PDEP_WIDTH)
|
||||
/*for(size_t i = 0; i < NO_PDEP_WIDTH; i++) {
|
||||
base_ptr[base+i] = (uint32_t)idx + trailingzeroes(s);
|
||||
|
@ -815,37 +796,10 @@ bool flatten_indexes(size_t len, ParsedJson &pj) {
|
|||
base += NO_PDEP_WIDTH;
|
||||
}
|
||||
base = next_base;
|
||||
#else
|
||||
uint32_t cnt = hamming(s);
|
||||
uint32_t next_base = base + cnt;
|
||||
while (s) {
|
||||
// spoil the suspense by reducing dependency chains; actually a win even
|
||||
// with cost of pdep
|
||||
uint64_t s3 = _pdep_u64(~0x7ULL, s); // s3 will have bottom 3 1-bits unset
|
||||
uint64_t s5 = _pdep_u64(~0x1fULL, s); // s5 will have bottom 5 1-bits unset
|
||||
|
||||
base_ptr[base + 0] = (uint32_t)idx + trailingzeroes(s);
|
||||
uint64_t s1 = s & (s - 1ULL);
|
||||
base_ptr[base + 1] = (uint32_t)idx + trailingzeroes(s1);
|
||||
uint64_t s2 = s1 & (s1 - 1ULL);
|
||||
base_ptr[base + 2] =
|
||||
(uint32_t)idx + trailingzeroes(s2); // uint64_t s3 = s2 & (s2 - 1ULL);
|
||||
base_ptr[base + 3] = (uint32_t)idx + trailingzeroes(s3);
|
||||
uint64_t s4 = s3 & (s3 - 1ULL);
|
||||
|
||||
base_ptr[base + 4] =
|
||||
(uint32_t)idx + trailingzeroes(s4); // uint64_t s5 = s4 & (s4 - 1ULL);
|
||||
base_ptr[base + 5] = (uint32_t)idx + trailingzeroes(s5);
|
||||
uint64_t s6 = s5 & (s5 - 1ULL);
|
||||
s = s6;
|
||||
base += 6;
|
||||
}
|
||||
base = next_base;
|
||||
#endif
|
||||
}
|
||||
pj.n_structural_indexes = base;
|
||||
if(base_ptr[pj.n_structural_indexes-1] > len) {
|
||||
printf("Internal bug\n");
|
||||
fprintf( stderr,"Internal bug\n");
|
||||
return false;
|
||||
}
|
||||
if(len != base_ptr[pj.n_structural_indexes-1]) {
|
||||
|
@ -853,10 +807,15 @@ bool flatten_indexes(size_t len, ParsedJson &pj) {
|
|||
base_ptr[pj.n_structural_indexes++] = len;
|
||||
}
|
||||
base_ptr[pj.n_structural_indexes] = 0; // make it safe to dereference one beyond this array
|
||||
|
||||
#ifdef SIMDJSON_UTF8VALIDATE
|
||||
return _mm256_testz_si256(has_error, has_error);
|
||||
#else
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
/* end file /home/dlemire/CVS/github/simdjson/src/stage2_flatten.cpp */
|
||||
/* begin file /home/dlemire/CVS/github/simdjson/src/stage34_unified.cpp */
|
||||
/* end file /Users/lemire/CVS/github/simdjson/src/stage1_find_marks.cpp */
|
||||
/* begin file /Users/lemire/CVS/github/simdjson/src/stage2_build_tape.cpp */
|
||||
#ifdef _MSC_VER
|
||||
/* Microsoft C/C++-compatible compiler */
|
||||
#include <intrin.h>
|
||||
|
@ -1363,4 +1322,4 @@ succeed:
|
|||
fail:
|
||||
return false;
|
||||
}
|
||||
/* end file /home/dlemire/CVS/github/simdjson/src/stage34_unified.cpp */
|
||||
/* end file /Users/lemire/CVS/github/simdjson/src/stage2_build_tape.cpp */
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* auto-generated on Mon Dec 31 11:59:09 EST 2018. Do not edit! */
|
||||
/* begin file /home/dlemire/CVS/github/simdjson/include/simdjson/portability.h */
|
||||
/* auto-generated on Mon Dec 31 17:13:28 EST 2018. Do not edit! */
|
||||
/* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/portability.h */
|
||||
#ifndef SIMDJSON_PORTABILITY_H
|
||||
#define SIMDJSON_PORTABILITY_H
|
||||
|
||||
|
@ -35779,7 +35779,7 @@ public:
|
|||
// allocate memory
|
||||
ParsedJson()
|
||||
: bytecapacity(0), depthcapacity(0), tapecapacity(0), stringcapacity(0),
|
||||
current_loc(0), structurals(NULL), n_structural_indexes(0),
|
||||
current_loc(0), n_structural_indexes(0),
|
||||
structural_indexes(NULL), tape(NULL), containing_scope_offset(NULL),
|
||||
ret_address(NULL), string_buf(NULL), current_string_buf_loc(NULL), isvalid(false) {}
|
||||
|
||||
|
@ -35798,11 +35798,6 @@ public:
|
|||
}
|
||||
isvalid = false;
|
||||
bytecapacity = 0; // will only set it to len after allocations are a success
|
||||
structurals = (uint8_t *)aligned_malloc(8, ROUNDUP_N(len, 64) / 8);
|
||||
if (structurals == NULL) {
|
||||
std::cerr << "Could not allocate memory for structurals" << std::endl;
|
||||
return false;
|
||||
};
|
||||
n_structural_indexes = 0;
|
||||
uint32_t max_structures = ROUNDUP_N(len, 64) + 2 + 7;
|
||||
structural_indexes = new uint32_t[max_structures];
|
||||
|
@ -35824,7 +35819,6 @@ public:
|
|||
if(tape != NULL) delete[] tape;
|
||||
if(string_buf != NULL) delete[] string_buf;
|
||||
if(structural_indexes != NULL) delete[] structural_indexes;
|
||||
aligned_free(structurals);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -35851,7 +35845,6 @@ public:
|
|||
if(tape != NULL) delete[] tape;
|
||||
if(string_buf != NULL) delete[] string_buf;
|
||||
if(structural_indexes != NULL) delete[] structural_indexes;
|
||||
aligned_free(structurals);
|
||||
isvalid = false;
|
||||
}
|
||||
|
||||
|
@ -36435,14 +36428,12 @@ private:
|
|||
};
|
||||
|
||||
|
||||
size_t bytecapacity; // indicates how many bits are meant to be supported by
|
||||
// structurals
|
||||
size_t bytecapacity; // indicates how many bits are meant to be supported
|
||||
|
||||
size_t depthcapacity; // how deep we can go
|
||||
size_t tapecapacity;
|
||||
size_t stringcapacity;
|
||||
uint32_t current_loc;
|
||||
uint8_t *structurals;
|
||||
uint32_t n_structural_indexes;
|
||||
|
||||
uint32_t *structural_indexes;
|
||||
|
@ -36465,7 +36456,6 @@ private:
|
|||
tapecapacity(std::move(p.tapecapacity)),
|
||||
stringcapacity(std::move(p.stringcapacity)),
|
||||
current_loc(std::move(p.current_loc)),
|
||||
structurals(std::move(p.structurals)),
|
||||
n_structural_indexes(std::move(p.n_structural_indexes)),
|
||||
structural_indexes(std::move(p.structural_indexes)),
|
||||
tape(std::move(p.tape)),
|
||||
|
@ -36474,7 +36464,6 @@ private:
|
|||
string_buf(std::move(p.string_buf)),
|
||||
current_string_buf_loc(std::move(p.current_string_buf_loc)),
|
||||
isvalid(std::move(p.isvalid)) {
|
||||
p.structurals=NULL;
|
||||
p.structural_indexes=NULL;
|
||||
p.tape=NULL;
|
||||
p.containing_scope_offset=NULL;
|
||||
|
@ -36526,18 +36515,8 @@ static inline bool find_structural_bits(const char *buf, size_t len, ParsedJson
|
|||
}
|
||||
|
||||
#endif
|
||||
/* end file /home/dlemire/CVS/github/simdjson/include/simdjson/stage1_find_marks.h */
|
||||
/* begin file /home/dlemire/CVS/github/simdjson/include/simdjson/stage2_flatten.h */
|
||||
#ifndef SIMDJSON_STAGE2_FLATTEN_H
|
||||
#define SIMDJSON_STAGE2_FLATTEN_H
|
||||
|
||||
|
||||
WARN_UNUSED
|
||||
bool flatten_indexes(size_t len, ParsedJson &pj);
|
||||
|
||||
#endif
|
||||
/* end file /home/dlemire/CVS/github/simdjson/include/simdjson/stage2_flatten.h */
|
||||
/* begin file /home/dlemire/CVS/github/simdjson/include/simdjson/stringparsing.h */
|
||||
/* end file /Users/lemire/CVS/github/simdjson/include/simdjson/stage1_find_marks.h */
|
||||
/* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/stringparsing.h */
|
||||
#ifndef SIMDJSON_STRINGPARSING_H
|
||||
#define SIMDJSON_STRINGPARSING_H
|
||||
|
||||
|
@ -37213,8 +37192,8 @@ static really_inline bool parse_number(const uint8_t *const buf,
|
|||
}
|
||||
|
||||
#endif
|
||||
/* end file /home/dlemire/CVS/github/simdjson/include/simdjson/numberparsing.h */
|
||||
/* begin file /home/dlemire/CVS/github/simdjson/include/simdjson/stage34_unified.h */
|
||||
/* end file /Users/lemire/CVS/github/simdjson/include/simdjson/numberparsing.h */
|
||||
/* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/stage2_build_tape.h */
|
||||
#ifndef SIMDJSON_STAGE34_UNIFIED_H
|
||||
#define SIMDJSON_STAGE34_UNIFIED_H
|
||||
|
||||
|
@ -37230,8 +37209,8 @@ static inline bool unified_machine(const char *buf, size_t len, ParsedJson &pj)
|
|||
}
|
||||
|
||||
#endif
|
||||
/* end file /home/dlemire/CVS/github/simdjson/include/simdjson/stage34_unified.h */
|
||||
/* begin file /home/dlemire/CVS/github/simdjson/include/simdjson/jsonparser.h */
|
||||
/* end file /Users/lemire/CVS/github/simdjson/include/simdjson/stage2_build_tape.h */
|
||||
/* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/jsonparser.h */
|
||||
#ifndef SIMDJSON_JSONPARSER_H
|
||||
#define SIMDJSON_JSONPARSER_H
|
||||
|
||||
|
|
|
@ -40,12 +40,12 @@ bool json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifne
|
|||
}
|
||||
}
|
||||
bool isok = find_structural_bits(buf, len, pj);
|
||||
if (isok) {
|
||||
/*if (isok) {
|
||||
isok = flatten_indexes(len, pj);
|
||||
} else {
|
||||
if(reallocated) free((void*)buf);
|
||||
return false;
|
||||
}
|
||||
}*/
|
||||
if (isok) {
|
||||
isok = unified_machine(buf, len, pj);
|
||||
} else {
|
||||
|
|
|
@ -7,6 +7,35 @@
|
|||
#define SIMDJSON_UTF8VALIDATE
|
||||
#endif
|
||||
|
||||
#ifndef NO_PDEP_WIDTH
|
||||
#define NO_PDEP_WIDTH 8
|
||||
#endif
|
||||
|
||||
#define SET_BIT(i) \
|
||||
base_ptr[base + i] = (uint32_t)idx - 64 + trailingzeroes(structurals); \
|
||||
structurals = structurals & (structurals - 1);
|
||||
|
||||
#define SET_BIT1 SET_BIT(0)
|
||||
#define SET_BIT2 SET_BIT1 SET_BIT(1)
|
||||
#define SET_BIT3 SET_BIT2 SET_BIT(2)
|
||||
#define SET_BIT4 SET_BIT3 SET_BIT(3)
|
||||
#define SET_BIT5 SET_BIT4 SET_BIT(4)
|
||||
#define SET_BIT6 SET_BIT5 SET_BIT(5)
|
||||
#define SET_BIT7 SET_BIT6 SET_BIT(6)
|
||||
#define SET_BIT8 SET_BIT7 SET_BIT(7)
|
||||
#define SET_BIT9 SET_BIT8 SET_BIT(8)
|
||||
#define SET_BIT10 SET_BIT9 SET_BIT(9)
|
||||
#define SET_BIT11 SET_BIT10 SET_BIT(10)
|
||||
#define SET_BIT12 SET_BIT11 SET_BIT(11)
|
||||
#define SET_BIT13 SET_BIT12 SET_BIT(12)
|
||||
#define SET_BIT14 SET_BIT13 SET_BIT(13)
|
||||
#define SET_BIT15 SET_BIT14 SET_BIT(14)
|
||||
#define SET_BIT16 SET_BIT15 SET_BIT(15)
|
||||
|
||||
#define CALL(macro, ...) macro(__VA_ARGS__)
|
||||
|
||||
#define SET_BITLOOPN(n) SET_BIT##n
|
||||
|
||||
// It seems that many parsers do UTF-8 validation.
|
||||
// RapidJSON does not do it by default, but a flag
|
||||
// allows it.
|
||||
|
@ -33,11 +62,13 @@ WARN_UNUSED
|
|||
cerr << "Your ParsedJson object only supports documents up to "<< pj.bytecapacity << " bytes but you are trying to process " << len << " bytes\n";
|
||||
return false;
|
||||
}
|
||||
uint32_t *base_ptr = pj.structural_indexes;
|
||||
uint32_t base = 0;
|
||||
#ifdef SIMDJSON_UTF8VALIDATE
|
||||
__m256i has_error = _mm256_setzero_si256();
|
||||
struct avx_processed_utf_bytes previous;
|
||||
previous.rawbytes = _mm256_setzero_si256();
|
||||
previous.high_nibbles = _mm256_setzero_si256();
|
||||
previous.rawbytes = _mm256_setzero_si256();
|
||||
previous.high_nibbles = _mm256_setzero_si256();
|
||||
previous.carried_continuations = _mm256_setzero_si256();
|
||||
#endif
|
||||
|
||||
|
@ -58,6 +89,7 @@ WARN_UNUSED
|
|||
uint64_t prev_iter_ends_pseudo_pred = 1ULL;
|
||||
size_t lenminus64 = len < 64 ? 0 : len - 64;
|
||||
size_t idx = 0;
|
||||
uint64_t structurals = 0;
|
||||
for (; idx < lenminus64; idx += 64) {
|
||||
#ifndef _MSC_VER
|
||||
__builtin_prefetch(buf + idx + 128);
|
||||
|
@ -121,6 +153,21 @@ WARN_UNUSED
|
|||
quote_bits = quote_bits & ~odd_ends;
|
||||
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
|
||||
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
|
||||
|
||||
|
||||
|
||||
uint32_t cnt = hamming(structurals);
|
||||
uint32_t next_base = base + cnt;
|
||||
while (structurals) {
|
||||
CALL(SET_BITLOOPN, NO_PDEP_WIDTH)
|
||||
/*for(size_t i = 0; i < NO_PDEP_WIDTH; i++) {
|
||||
base_ptr[base+i] = (uint32_t)idx + trailingzeroes(s);
|
||||
s = s & (s - 1);
|
||||
}*/
|
||||
base += NO_PDEP_WIDTH;
|
||||
}
|
||||
base = next_base;
|
||||
|
||||
quote_mask ^= prev_iter_inside_quote;
|
||||
prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63); // right shift of a signed value expected to be well-defined and standard compliant as of C++20, John Regher from Utah U. says this is fine code
|
||||
|
||||
|
@ -162,7 +209,7 @@ WARN_UNUSED
|
|||
|
||||
uint64_t structural_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_lo);
|
||||
uint64_t structural_res_1 = _mm256_movemask_epi8(tmp_hi);
|
||||
uint64_t structurals = ~(structural_res_0 | (structural_res_1 << 32));
|
||||
structurals = ~(structural_res_0 | (structural_res_1 << 32));
|
||||
|
||||
// this additional mask and transfer is non-trivially expensive,
|
||||
// unfortunately
|
||||
|
@ -201,7 +248,8 @@ WARN_UNUSED
|
|||
// now, we've used our close quotes all we need to. So let's switch them off
|
||||
// they will be off in the quote mask and on in quote bits.
|
||||
structurals &= ~(quote_bits & ~quote_mask);
|
||||
*(uint64_t *)(pj.structurals + idx / 8) = structurals;
|
||||
|
||||
//*(uint64_t *)(pj.structurals + idx / 8) = structurals;
|
||||
}
|
||||
|
||||
////////////////
|
||||
|
@ -275,6 +323,17 @@ WARN_UNUSED
|
|||
quote_mask ^= prev_iter_inside_quote;
|
||||
//prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63); // right shift of a signed value expected to be well-defined and standard compliant as of C++20
|
||||
|
||||
uint32_t cnt = hamming(structurals);
|
||||
uint32_t next_base = base + cnt;
|
||||
while (structurals) {
|
||||
CALL(SET_BITLOOPN, NO_PDEP_WIDTH)
|
||||
/*for(size_t i = 0; i < NO_PDEP_WIDTH; i++) {
|
||||
base_ptr[base+i] = (uint32_t)idx + trailingzeroes(s);
|
||||
s = s & (s - 1);
|
||||
}*/
|
||||
base += NO_PDEP_WIDTH;
|
||||
}
|
||||
base = next_base;
|
||||
// How do we build up a user traversable data structure
|
||||
// first, do a 'shufti' to detect structural JSON characters
|
||||
// they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c
|
||||
|
@ -313,7 +372,7 @@ WARN_UNUSED
|
|||
|
||||
uint64_t structural_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_lo);
|
||||
uint64_t structural_res_1 = _mm256_movemask_epi8(tmp_hi);
|
||||
uint64_t structurals = ~(structural_res_0 | (structural_res_1 << 32));
|
||||
structurals = ~(structural_res_0 | (structural_res_1 << 32));
|
||||
|
||||
// this additional mask and transfer is non-trivially expensive,
|
||||
// unfortunately
|
||||
|
@ -354,8 +413,32 @@ WARN_UNUSED
|
|||
// now, we've used our close quotes all we need to. So let's switch them off
|
||||
// they will be off in the quote mask and on in quote bits.
|
||||
structurals &= ~(quote_bits & ~quote_mask);
|
||||
*(uint64_t *)(pj.structurals + idx / 8) = structurals;
|
||||
//*(uint64_t *)(pj.structurals + idx / 8) = structurals;
|
||||
idx += 64;
|
||||
}
|
||||
uint32_t cnt = hamming(structurals);
|
||||
uint32_t next_base = base + cnt;
|
||||
while (structurals) {
|
||||
CALL(SET_BITLOOPN, NO_PDEP_WIDTH)
|
||||
/*for(size_t i = 0; i < NO_PDEP_WIDTH; i++) {
|
||||
base_ptr[base+i] = (uint32_t)idx + trailingzeroes(s);
|
||||
s = s & (s - 1);
|
||||
}*/
|
||||
base += NO_PDEP_WIDTH;
|
||||
}
|
||||
base = next_base;
|
||||
|
||||
pj.n_structural_indexes = base;
|
||||
if(base_ptr[pj.n_structural_indexes-1] > len) {
|
||||
fprintf( stderr,"Internal bug\n");
|
||||
return false;
|
||||
}
|
||||
if(len != base_ptr[pj.n_structural_indexes-1]) {
|
||||
// the string might not be NULL terminated, but we add a virtual NULL ending character.
|
||||
base_ptr[pj.n_structural_indexes++] = len;
|
||||
}
|
||||
base_ptr[pj.n_structural_indexes] = 0; // make it safe to dereference one beyond this array
|
||||
|
||||
#ifdef SIMDJSON_UTF8VALIDATE
|
||||
return _mm256_testz_si256(has_error, has_error);
|
||||
#else
|
||||
|
|
|
@ -1,125 +0,0 @@
|
|||
|
||||
#include <cassert>
|
||||
#include "simdjson/portability.h"
|
||||
#include "simdjson/common_defs.h"
|
||||
#include "simdjson/parsedjson.h"
|
||||
|
||||
#ifndef NO_PDEP_PLEASE
|
||||
#define NO_PDEP_PLEASE // though this is not always a win, it seems to
|
||||
// be more often a win than not. And it will be faster on AMD.
|
||||
#endif
|
||||
|
||||
#ifndef NO_PDEP_WIDTH
|
||||
#define NO_PDEP_WIDTH 8
|
||||
#endif
|
||||
|
||||
#define SET_BIT(i) \
|
||||
base_ptr[base + i] = (uint32_t)idx + trailingzeroes(s); \
|
||||
s = s & (s - 1);
|
||||
|
||||
#define SET_BIT1 SET_BIT(0)
|
||||
#define SET_BIT2 SET_BIT1 SET_BIT(1)
|
||||
#define SET_BIT3 SET_BIT2 SET_BIT(2)
|
||||
#define SET_BIT4 SET_BIT3 SET_BIT(3)
|
||||
#define SET_BIT5 SET_BIT4 SET_BIT(4)
|
||||
#define SET_BIT6 SET_BIT5 SET_BIT(5)
|
||||
#define SET_BIT7 SET_BIT6 SET_BIT(6)
|
||||
#define SET_BIT8 SET_BIT7 SET_BIT(7)
|
||||
#define SET_BIT9 SET_BIT8 SET_BIT(8)
|
||||
#define SET_BIT10 SET_BIT9 SET_BIT(9)
|
||||
#define SET_BIT11 SET_BIT10 SET_BIT(10)
|
||||
#define SET_BIT12 SET_BIT11 SET_BIT(11)
|
||||
#define SET_BIT13 SET_BIT12 SET_BIT(12)
|
||||
#define SET_BIT14 SET_BIT13 SET_BIT(13)
|
||||
#define SET_BIT15 SET_BIT14 SET_BIT(14)
|
||||
#define SET_BIT16 SET_BIT15 SET_BIT(15)
|
||||
|
||||
#define CALL(macro, ...) macro(__VA_ARGS__)
|
||||
|
||||
#define SET_BITLOOPN(n) SET_BIT##n
|
||||
|
||||
// just transform the bitmask to a big list of 32-bit integers for now
|
||||
// that's all; the type of character the offset points to will
|
||||
// tell us exactly what we need to know. Naive but straightforward
|
||||
// implementation
|
||||
WARN_UNUSED
|
||||
bool flatten_indexes(size_t len, ParsedJson &pj) {
|
||||
uint32_t *base_ptr = pj.structural_indexes;
|
||||
uint32_t base = 0;
|
||||
#ifdef BUILDHISTOGRAM
|
||||
uint32_t counters[66];
|
||||
uint32_t total = 0;
|
||||
for (int k = 0; k < 66; k++)
|
||||
counters[k] = 0;
|
||||
for (size_t idx = 0; idx < len; idx += 64) {
|
||||
uint64_t s = *(uint64_t *)(pj.structurals + idx / 8);
|
||||
uint32_t cnt = hamming(s);
|
||||
total++;
|
||||
counters[cnt]++;
|
||||
}
|
||||
printf("\n histogram:\n");
|
||||
for (int k = 0; k < 66; k++) {
|
||||
if (counters[k] > 0)
|
||||
printf("%10d %10.u %10.3f \n", k, counters[k], counters[k] * 1.0 / total);
|
||||
}
|
||||
printf("\n\n");
|
||||
#endif
|
||||
for (size_t idx = 0; idx < len; idx += 64) {
|
||||
uint64_t s = *(uint64_t *)(pj.structurals + idx / 8);
|
||||
#ifdef SUPPRESS_CHEESY_FLATTEN
|
||||
while (s) {
|
||||
base_ptr[base++] = (uint32_t)idx + trailingzeroes(s);
|
||||
s &= s - 1ULL;
|
||||
}
|
||||
#elif defined(NO_PDEP_PLEASE)
|
||||
uint32_t cnt = hamming(s);
|
||||
uint32_t next_base = base + cnt;
|
||||
while (s) {
|
||||
CALL(SET_BITLOOPN, NO_PDEP_WIDTH)
|
||||
/*for(size_t i = 0; i < NO_PDEP_WIDTH; i++) {
|
||||
base_ptr[base+i] = (uint32_t)idx + trailingzeroes(s);
|
||||
s = s & (s - 1);
|
||||
}*/
|
||||
base += NO_PDEP_WIDTH;
|
||||
}
|
||||
base = next_base;
|
||||
#else
|
||||
uint32_t cnt = hamming(s);
|
||||
uint32_t next_base = base + cnt;
|
||||
while (s) {
|
||||
// spoil the suspense by reducing dependency chains; actually a win even
|
||||
// with cost of pdep
|
||||
uint64_t s3 = _pdep_u64(~0x7ULL, s); // s3 will have bottom 3 1-bits unset
|
||||
uint64_t s5 = _pdep_u64(~0x1fULL, s); // s5 will have bottom 5 1-bits unset
|
||||
|
||||
base_ptr[base + 0] = (uint32_t)idx + trailingzeroes(s);
|
||||
uint64_t s1 = s & (s - 1ULL);
|
||||
base_ptr[base + 1] = (uint32_t)idx + trailingzeroes(s1);
|
||||
uint64_t s2 = s1 & (s1 - 1ULL);
|
||||
base_ptr[base + 2] =
|
||||
(uint32_t)idx + trailingzeroes(s2); // uint64_t s3 = s2 & (s2 - 1ULL);
|
||||
base_ptr[base + 3] = (uint32_t)idx + trailingzeroes(s3);
|
||||
uint64_t s4 = s3 & (s3 - 1ULL);
|
||||
|
||||
base_ptr[base + 4] =
|
||||
(uint32_t)idx + trailingzeroes(s4); // uint64_t s5 = s4 & (s4 - 1ULL);
|
||||
base_ptr[base + 5] = (uint32_t)idx + trailingzeroes(s5);
|
||||
uint64_t s6 = s5 & (s5 - 1ULL);
|
||||
s = s6;
|
||||
base += 6;
|
||||
}
|
||||
base = next_base;
|
||||
#endif
|
||||
}
|
||||
pj.n_structural_indexes = base;
|
||||
if(base_ptr[pj.n_structural_indexes-1] > len) {
|
||||
printf("Internal bug\n");
|
||||
return false;
|
||||
}
|
||||
if(len != base_ptr[pj.n_structural_indexes-1]) {
|
||||
// the string might not be NULL terminated, but we add a virtual NULL ending character.
|
||||
base_ptr[pj.n_structural_indexes++] = len;
|
||||
}
|
||||
base_ptr[pj.n_structural_indexes] = 0; // make it safe to dereference one beyond this array
|
||||
return true;
|
||||
}
|
|
@ -90,7 +90,7 @@ inline void foundFloat(double result, const uint8_t *buf) {
|
|||
}
|
||||
|
||||
#include "simdjson/jsonparser.h"
|
||||
#include "src/stage34_unified.cpp"
|
||||
#include "src/stage2_build_tape.cpp"
|
||||
|
||||
/**
|
||||
* Does the file filename ends with the given extension.
|
||||
|
|
|
@ -282,7 +282,7 @@ inline void foundString(const uint8_t *buf, const uint8_t *parsed_begin,
|
|||
}
|
||||
|
||||
#include "simdjson/jsonparser.h"
|
||||
#include "src/stage34_unified.cpp"
|
||||
#include "src/stage2_build_tape.cpp"
|
||||
|
||||
/**
|
||||
* Does the file filename ends with the given extension.
|
||||
|
|
Loading…
Reference in New Issue