Move stage1 into a class to pass fewer parameters

This commit is contained in:
John Keiser 2019-10-13 11:36:52 -07:00
parent 9bbd6bd874
commit 81f2249575
4 changed files with 319 additions and 328 deletions

View File

@ -3,221 +3,10 @@
// We assume the file in which it is included already includes
// "simdjson/stage1_find_marks.h" (this simplifies amalgation)
// return a bitvector indicating where we have characters that end an odd-length
// sequence of backslashes (and thus change the behavior of the next character
// to follow). A even-length sequence of backslashes, and, for that matter, the
// largest even-length prefix of our odd-length sequence of backslashes, simply
// modify the behavior of the backslashes themselves.
// We also update the prev_iter_ends_odd_backslash reference parameter to
// indicate whether we end an iteration on an odd-length sequence of
// backslashes, which modifies our subsequent search for odd-length
// sequences of backslashes in an obvious way.
really_inline uint64_t follows_odd_sequence_of(const uint64_t match, uint64_t &overflow) {
const uint64_t even_bits = 0x5555555555555555ULL;
const uint64_t odd_bits = ~even_bits;
uint64_t start_edges = match & ~(match << 1);
/* flip lowest if we have an odd-length run at the end of the prior
* iteration */
uint64_t even_start_mask = even_bits ^ overflow;
uint64_t even_starts = start_edges & even_start_mask;
uint64_t odd_starts = start_edges & ~even_start_mask;
uint64_t even_carries = match + even_starts;
uint64_t odd_carries;
/* must record the carry-out of our odd-carries out of bit 63; this
* indicates whether the sense of any edge going to the next iteration
* should be flipped */
bool new_overflow = add_overflow(match, odd_starts, &odd_carries);
odd_carries |= overflow; /* push in bit zero as a
* potential end if we had an
* odd-numbered run at the
* end of the previous
* iteration */
overflow = new_overflow ? 0x1ULL : 0x0ULL;
uint64_t even_carry_ends = even_carries & ~match;
uint64_t odd_carry_ends = odd_carries & ~match;
uint64_t even_start_odd_end = even_carry_ends & odd_bits;
uint64_t odd_start_even_end = odd_carry_ends & even_bits;
uint64_t odd_ends = even_start_odd_end | odd_start_even_end;
return odd_ends;
}
//
// Check if the current character immediately follows a matching character.
//
// For example, this checks for quotes with backslashes in front of them:
//
// const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash);
//
really_inline uint64_t follows(const uint64_t match, uint64_t &overflow) {
const uint64_t result = match << 1 | overflow;
overflow = match >> 63;
return result;
}
//
// Check if the current character follows a matching character, with possible "filler" between.
// For example, this checks for empty curly braces, e.g.
//
// in.eq('}') & follows(in.eq('['), in.eq(' '), prev_empty_array) // { <whitespace>* }
//
really_inline uint64_t follows(const uint64_t match, const uint64_t filler, uint64_t &overflow ) {
uint64_t follows_match = follows(match, overflow);
uint64_t result;
overflow |= add_overflow(follows_match, filler, &result);
return result;
}
really_inline ErrorValues detect_errors_on_eof(
uint64_t &unescaped_chars_error,
const uint64_t prev_in_string) {
if (prev_in_string) {
return UNCLOSED_STRING;
}
if (unescaped_chars_error) {
return UNESCAPED_CHARS;
}
return SUCCESS;
}
//
// Return a mask of all string characters plus end quotes.
//
// prev_escaped is overflow saying whether the next character is escaped.
// prev_in_string is overflow saying whether we're still in a string.
//
// Backslash sequences outside of quotes will be detected in stage 2.
//
really_inline uint64_t find_strings(const simd_input in, uint64_t &prev_escaped, uint64_t &prev_in_string) {
const uint64_t backslash = in.eq('\\');
const uint64_t escaped = follows_odd_sequence_of(backslash, prev_escaped);
const uint64_t quote = in.eq('"') & ~escaped;
// compute_quote_mask returns start quote plus string contents.
const uint64_t in_string = compute_quote_mask(quote) ^ prev_in_string;
/* right shift of a signed value expected to be well-defined and standard
* compliant as of C++20,
* John Regher from Utah U. says this is fine code */
prev_in_string = static_cast<uint64_t>(static_cast<int64_t>(in_string) >> 63);
// Use ^ to turn the beginning quote off, and the end quote on.
return in_string ^ quote;
}
really_inline uint64_t invalid_string_bytes(const uint64_t unescaped, const uint64_t quote_mask) {
/* All Unicode characters may be placed within the
* quotation marks, except for the characters that MUST be escaped:
* quotation mark, reverse solidus, and the control characters (U+0000
* through U+001F).
* https://tools.ietf.org/html/rfc8259 */
return quote_mask & unescaped;
}
//
// Determine which characters are *structural*:
// - braces: [] and {}
// - the start of primitives (123, true, false, null)
// - the start of invalid non-whitespace (+, &, ture, UTF-8)
//
// Also detects value sequence errors:
// - two values with no separator between ("hello" "world")
// - separators with no values ([1,] [1,,]and [,2])
//
// This method will find all of the above whether it is in a string or not.
//
// To reduce dependency on the expensive "what is in a string" computation, this method treats the
// contents of a string the same as content outside. Errors and structurals inside the string or on
// the trailing quote will need to be removed later when the correct string information is known.
//
really_inline uint64_t find_potential_structurals(const simd_input in, uint64_t &prev_primitive) {
// These use SIMD so let's kick them off before running the regular 64-bit stuff ...
uint64_t whitespace, op;
find_whitespace_and_operators(in, whitespace, op);
// Detect the start of a run of primitive characters. Includes numbers, booleans, and strings (").
// Everything except whitespace, braces, colon and comma.
const uint64_t primitive = ~(op | whitespace);
const uint64_t follows_primitive = follows(primitive, prev_primitive);
const uint64_t start_primitive = primitive & ~follows_primitive;
// Return final structurals
return op | start_primitive;
}
static const size_t STEP_SIZE = 128;
//
// Find the important bits of JSON in a 128-byte chunk, and add them to :
//
//
//
// PERF NOTES:
// We pipe 2 inputs through these stages:
// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load
// 2 inputs' worth at once so that by the time step 2 is looking for them input, it's available.
// 2. Scan the JSON for critical data: strings, primitives and operators. This is the critical path.
// The output of step 1 depends entirely on this information. These functions don't quite use
// up enough CPU: the second half of the functions is highly serial, only using 1 execution core
// at a time. The second input's scans has some dependency on the first ones finishing it, but
// they can make a lot of progress before they need that information.
// 3. Step 1 doesn't use enough capacity, so we run some extra stuff while we're waiting for that
// to finish: utf-8 checks and generating the output from the last iteration.
//
// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all
// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough
// workout.
//
really_inline void find_structural_bits_128(
const uint8_t *buf, const size_t idx, uint32_t *&base_ptr,
uint64_t &prev_escaped, uint64_t &prev_in_string,
uint64_t &prev_primitive,
uint64_t &prev_structurals,
uint64_t &unescaped_chars_error,
utf8_checker &utf8_state) {
//
// Load up all 128 bytes into SIMD registers
//
simd_input in_1(buf);
simd_input in_2(buf+64);
//
// Find the strings and potential structurals (operators / primitives).
//
// This will include false structurals that are *inside* strings--we'll filter strings out
// before we return.
//
uint64_t string_1 = find_strings(in_1, prev_escaped, prev_in_string);
uint64_t structurals_1 = find_potential_structurals(in_1, prev_primitive);
uint64_t string_2 = find_strings(in_2, prev_escaped, prev_in_string);
uint64_t structurals_2 = find_potential_structurals(in_2, prev_primitive);
//
// Do miscellaneous work while the processor is busy calculating strings and structurals.
//
// After that, weed out structurals that are inside strings and find invalid string characters.
//
uint64_t unescaped_1 = in_1.lteq(0x1F);
utf8_state.check_next_input(in_1);
flatten_bits(base_ptr, idx, prev_structurals); // Output *last* iteration's structurals to ParsedJson
prev_structurals = structurals_1 & ~string_1;
unescaped_chars_error |= unescaped_1 & string_1;
uint64_t unescaped_2 = in_2.lteq(0x1F);
utf8_state.check_next_input(in_2);
flatten_bits(base_ptr, idx+64, prev_structurals); // Output *last* iteration's structurals to ParsedJson
prev_structurals = structurals_2 & ~string_2;
unescaped_chars_error |= unescaped_2 & string_2;
}
int find_structural_bits(const uint8_t *buf, size_t len, simdjson::ParsedJson &pj) {
if (unlikely(len > pj.byte_capacity)) {
std::cerr << "Your ParsedJson object only supports documents up to "
<< pj.byte_capacity << " bytes but you are trying to process "
<< len << " bytes" << std::endl;
return simdjson::CAPACITY;
}
uint32_t *base_ptr = pj.structural_indexes;
utf8_checker utf8_state;
class json_structural_scanner {
public:
// Whether the first character of the next iteration is escaped.
uint64_t prev_escaped = 0ULL;
// Whether the last iteration was still inside a string (all 1's = true, all 0's = false).
@ -228,41 +17,249 @@ int find_structural_bits(const uint8_t *buf, size_t len, simdjson::ParsedJson &p
// Mask of structural characters from the last iteration.
// Kept around for performance reasons, so we can call flatten_bits to soak up some unused
// CPU capacity while the next iteration is busy with an expensive clmul in compute_quote_mask.
uint64_t structurals = 0;
size_t lenminusstep = len < STEP_SIZE ? 0 : len - STEP_SIZE;
size_t idx = 0;
uint64_t prev_structurals = 0;
// Errors with unescaped characters in strings (ASCII codepoints < 0x20)
uint64_t unescaped_chars_error = 0;
bit_indexer structural_indexes;
for (; idx < lenminusstep; idx += STEP_SIZE) {
find_structural_bits_128(&buf[idx], idx, base_ptr,
prev_escaped, prev_in_string, prev_primitive,
structurals, unescaped_chars_error, utf8_state);
json_structural_scanner(uint32_t *_structural_indexes) : structural_indexes{_structural_indexes} {}
// return a bitvector indicating where we have characters that end an odd-length
// sequence of backslashes (and thus change the behavior of the next character
// to follow). A even-length sequence of backslashes, and, for that matter, the
// largest even-length prefix of our odd-length sequence of backslashes, simply
// modify the behavior of the backslashes themselves.
// We also update the prev_iter_ends_odd_backslash reference parameter to
// indicate whether we end an iteration on an odd-length sequence of
// backslashes, which modifies our subsequent search for odd-length
// sequences of backslashes in an obvious way.
really_inline uint64_t follows_odd_sequence_of(const uint64_t match, uint64_t &overflow) {
const uint64_t even_bits = 0x5555555555555555ULL;
const uint64_t odd_bits = ~even_bits;
uint64_t start_edges = match & ~(match << 1);
/* flip lowest if we have an odd-length run at the end of the prior
* iteration */
uint64_t even_start_mask = even_bits ^ overflow;
uint64_t even_starts = start_edges & even_start_mask;
uint64_t odd_starts = start_edges & ~even_start_mask;
uint64_t even_carries = match + even_starts;
uint64_t odd_carries;
/* must record the carry-out of our odd-carries out of bit 63; this
* indicates whether the sense of any edge going to the next iteration
* should be flipped */
bool new_overflow = add_overflow(match, odd_starts, &odd_carries);
odd_carries |= overflow; /* push in bit zero as a
* potential end if we had an
* odd-numbered run at the
* end of the previous
* iteration */
overflow = new_overflow ? 0x1ULL : 0x0ULL;
uint64_t even_carry_ends = even_carries & ~match;
uint64_t odd_carry_ends = odd_carries & ~match;
uint64_t even_start_odd_end = even_carry_ends & odd_bits;
uint64_t odd_start_even_end = odd_carry_ends & even_bits;
uint64_t odd_ends = even_start_odd_end | odd_start_even_end;
return odd_ends;
}
/* If we have a final chunk of less than 64 bytes, pad it to 64 with
* spaces before processing it (otherwise, we risk invalidating the UTF-8
* checks). */
if (likely(idx < len)) {
uint8_t tmp_buf[STEP_SIZE];
memset(tmp_buf, 0x20, STEP_SIZE);
memcpy(tmp_buf, buf + idx, len - idx);
find_structural_bits_128(&tmp_buf[0], idx, base_ptr,
prev_escaped, prev_in_string, prev_primitive,
structurals, unescaped_chars_error, utf8_state);
idx += STEP_SIZE;
//
// Check if the current character immediately follows a matching character.
//
// For example, this checks for quotes with backslashes in front of them:
//
// const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash);
//
really_inline uint64_t follows(const uint64_t match, uint64_t &overflow) {
const uint64_t result = match << 1 | overflow;
overflow = match >> 63;
return result;
}
/* finally, flatten out the remaining structurals from the last iteration */
flatten_bits(base_ptr, idx, structurals);
//
// Check if the current character follows a matching character, with possible "filler" between.
// For example, this checks for empty curly braces, e.g.
//
// in.eq('}') & follows(in.eq('['), in.eq(' '), prev_empty_array) // { <whitespace>* }
//
really_inline uint64_t follows(const uint64_t match, const uint64_t filler, uint64_t &overflow) {
uint64_t follows_match = follows(match, overflow);
uint64_t result;
overflow |= add_overflow(follows_match, filler, &result);
return result;
}
simdjson::ErrorValues error = detect_errors_on_eof(unescaped_chars_error, prev_in_string);
really_inline ErrorValues detect_errors_on_eof() {
if (prev_in_string) {
return UNCLOSED_STRING;
}
if (unescaped_chars_error) {
return UNESCAPED_CHARS;
}
return SUCCESS;
}
//
// Return a mask of all string characters plus end quotes.
//
// prev_escaped is overflow saying whether the next character is escaped.
// prev_in_string is overflow saying whether we're still in a string.
//
// Backslash sequences outside of quotes will be detected in stage 2.
//
really_inline uint64_t find_strings(const simd_input in) {
const uint64_t backslash = in.eq('\\');
const uint64_t escaped = follows_odd_sequence_of(backslash, prev_escaped);
const uint64_t quote = in.eq('"') & ~escaped;
// compute_quote_mask returns start quote plus string contents.
const uint64_t in_string = compute_quote_mask(quote) ^ prev_in_string;
/* right shift of a signed value expected to be well-defined and standard
* compliant as of C++20,
* John Regher from Utah U. says this is fine code */
prev_in_string = static_cast<uint64_t>(static_cast<int64_t>(in_string) >> 63);
// Use ^ to turn the beginning quote off, and the end quote on.
return in_string ^ quote;
}
really_inline uint64_t invalid_string_bytes(const uint64_t unescaped, const uint64_t quote_mask) {
/* All Unicode characters may be placed within the
* quotation marks, except for the characters that MUST be escaped:
* quotation mark, reverse solidus, and the control characters (U+0000
* through U+001F).
* https://tools.ietf.org/html/rfc8259 */
return quote_mask & unescaped;
}
//
// Determine which characters are *structural*:
// - braces: [] and {}
// - the start of primitives (123, true, false, null)
// - the start of invalid non-whitespace (+, &, ture, UTF-8)
//
// Also detects value sequence errors:
// - two values with no separator between ("hello" "world")
// - separators with no values ([1,] [1,,]and [,2])
//
// This method will find all of the above whether it is in a string or not.
//
// To reduce dependency on the expensive "what is in a string" computation, this method treats the
// contents of a string the same as content outside. Errors and structurals inside the string or on
// the trailing quote will need to be removed later when the correct string information is known.
//
really_inline uint64_t find_potential_structurals(const simd_input in) {
// These use SIMD so let's kick them off before running the regular 64-bit stuff ...
uint64_t whitespace, op;
find_whitespace_and_operators(in, whitespace, op);
// Detect the start of a run of primitive characters. Includes numbers, booleans, and strings (").
// Everything except whitespace, braces, colon and comma.
const uint64_t primitive = ~(op | whitespace);
const uint64_t follows_primitive = follows(primitive, prev_primitive);
const uint64_t start_primitive = primitive & ~follows_primitive;
// Return final structurals
return op | start_primitive;
}
//
// Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes.
//
// PERF NOTES:
// We pipe 2 inputs through these stages:
// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load
// 2 inputs' worth at once so that by the time step 2 is looking for them input, it's available.
// 2. Scan the JSON for critical data: strings, primitives and operators. This is the critical path.
// The output of step 1 depends entirely on this information. These functions don't quite use
// up enough CPU: the second half of the functions is highly serial, only using 1 execution core
// at a time. The second input's scans has some dependency on the first ones finishing it, but
// they can make a lot of progress before they need that information.
// 3. Step 1 doesn't use enough capacity, so we run some extra stuff while we're waiting for that
// to finish: utf-8 checks and generating the output from the last iteration.
//
// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all
// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough
// workout.
//
really_inline void scan_step(const uint8_t *buf, const size_t idx, utf8_checker &utf8_checker) {
//
// Load up all 128 bytes into SIMD registers
//
simd_input in_1(buf);
simd_input in_2(buf+64);
//
// Find the strings and potential structurals (operators / primitives).
//
// This will include false structurals that are *inside* strings--we'll filter strings out
// before we return.
//
uint64_t string_1 = this->find_strings(in_1);
uint64_t structurals_1 = this->find_potential_structurals(in_1);
uint64_t string_2 = this->find_strings(in_2);
uint64_t structurals_2 = this->find_potential_structurals(in_2);
//
// Do miscellaneous work while the processor is busy calculating strings and structurals.
//
// After that, weed out structurals that are inside strings and find invalid string characters.
//
uint64_t unescaped_1 = in_1.lteq(0x1F);
utf8_checker.check_next_input(in_1);
this->structural_indexes.write_indexes(idx-64, prev_structurals); // Output *last* iteration's structurals to ParsedJson
this->prev_structurals = structurals_1 & ~string_1;
this->unescaped_chars_error |= unescaped_1 & string_1;
uint64_t unescaped_2 = in_2.lteq(0x1F);
utf8_checker.check_next_input(in_2);
this->structural_indexes.write_indexes(idx, prev_structurals); // Output *last* iteration's structurals to ParsedJson
this->prev_structurals = structurals_2 & ~string_2;
this->unescaped_chars_error |= unescaped_2 & string_2;
}
really_inline void scan(const uint8_t *buf, const size_t len, utf8_checker &utf8_checker) {
size_t lenminusstep = len < STEP_SIZE ? 0 : len - STEP_SIZE;
size_t idx = 0;
for (; idx < lenminusstep; idx += STEP_SIZE) {
this->scan_step(&buf[idx], idx, utf8_checker);
}
/* If we have a final chunk of less than 64 bytes, pad it to 64 with
* spaces before processing it (otherwise, we risk invalidating the UTF-8
* checks). */
if (likely(idx < len)) {
uint8_t tmp_buf[STEP_SIZE];
memset(tmp_buf, 0x20, STEP_SIZE);
memcpy(tmp_buf, buf + idx, len - idx);
this->scan_step(&tmp_buf[0], idx, utf8_checker);
idx += STEP_SIZE;
}
/* finally, flatten out the remaining structurals from the last iteration */
this->structural_indexes.write_indexes(idx-64, this->prev_structurals);
}
};
int find_structural_bits(const uint8_t *buf, size_t len, simdjson::ParsedJson &pj) {
if (unlikely(len > pj.byte_capacity)) {
std::cerr << "Your ParsedJson object only supports documents up to "
<< pj.byte_capacity << " bytes but you are trying to process "
<< len << " bytes" << std::endl;
return simdjson::CAPACITY;
}
utf8_checker utf8_checker{};
json_structural_scanner scanner{pj.structural_indexes};
scanner.scan(buf, len, utf8_checker);
simdjson::ErrorValues error = scanner.detect_errors_on_eof();
if (unlikely(error != simdjson::SUCCESS)) {
return error;
}
pj.n_structural_indexes = base_ptr - pj.structural_indexes;
pj.n_structural_indexes = scanner.structural_indexes.tail - pj.structural_indexes;
/* a valid JSON file cannot have zero structural indexes - we should have
* found something */
if (unlikely(pj.n_structural_indexes == 0u)) {
@ -278,5 +275,5 @@ int find_structural_bits(const uint8_t *buf, size_t len, simdjson::ParsedJson &p
}
/* make it safe to dereference one beyond this array */
pj.structural_indexes[pj.n_structural_indexes] = 0;
return utf8_state.errors();
return utf8_checker.errors();
}

View File

@ -3,65 +3,52 @@
// We assume the file in which it is include already includes
// "simdjson/stage1_find_marks.h" (this simplifies amalgation)
#ifdef SIMDJSON_NAIVE_FLATTEN // useful for benchmarking
class bit_indexer {
public:
uint32_t *tail;
// This is just a naive implementation. It should be normally
// disable, but can be used for research purposes to compare
// again our optimized version.
really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base, uint32_t idx, uint64_t bits) {
uint32_t *out_ptr = base_ptr + base;
idx -= 64;
while (bits != 0) {
out_ptr[0] = idx + trailing_zeroes(bits);
bits = bits & (bits - 1);
out_ptr++;
}
base = (out_ptr - base_ptr);
}
bit_indexer(uint32_t *index_buf) : tail(index_buf) {}
#else // SIMDJSON_NAIVE_FLATTEN
// flatten out values in 'bits' assuming that they are are to have values of idx
// plus their position in the bitvector, and store these indexes at
// this->tail[base] incrementing base as we go
// will potentially store extra values beyond end of valid bits, so this->tail
// needs to be large enough to handle this
really_inline void write_indexes(uint32_t idx, uint64_t bits) {
// In some instances, the next branch is expensive because it is mispredicted.
// Unfortunately, in other cases,
// it helps tremendously.
if (bits == 0)
return;
uint32_t cnt = hamming(bits);
// flatten out values in 'bits' assuming that they are are to have values of idx
// plus their position in the bitvector, and store these indexes at
// base_ptr[base] incrementing base as we go
// will potentially store extra values beyond end of valid bits, so base_ptr
// needs to be large enough to handle this
really_inline void flatten_bits(uint32_t *&base_ptr, uint32_t idx, uint64_t bits) {
// In some instances, the next branch is expensive because it is mispredicted.
// Unfortunately, in other cases,
// it helps tremendously.
if (bits == 0)
return;
uint32_t cnt = hamming(bits);
idx -= 64;
// Do the first 8 all together
for (int i=0; i<8; i++) {
base_ptr[i] = idx + trailing_zeroes(bits);
bits = bits & (bits - 1);
}
// Do the next 8 all together (we hope in most cases it won't happen at all
// and the branch is easily predicted).
if (unlikely(cnt > 8)) {
for (int i=8; i<16; i++) {
base_ptr[i] = idx + trailing_zeroes(bits);
// Do the first 8 all together
for (int i=0; i<8; i++) {
this->tail[i] = idx + trailing_zeroes(bits);
bits = bits & (bits - 1);
}
// Most files don't have 16+ structurals per block, so we take several basically guaranteed
// branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :)
// or the start of a value ("abc" true 123) every 4 characters.
if (unlikely(cnt > 16)) {
uint32_t i = 16;
do {
base_ptr[i] = idx + trailing_zeroes(bits);
// Do the next 8 all together (we hope in most cases it won't happen at all
// and the branch is easily predicted).
if (unlikely(cnt > 8)) {
for (int i=8; i<16; i++) {
this->tail[i] = idx + trailing_zeroes(bits);
bits = bits & (bits - 1);
i++;
} while (i < cnt);
}
}
}
base_ptr += cnt;
}
#endif // SIMDJSON_NAIVE_FLATTEN
// Most files don't have 16+ structurals per block, so we take several basically guaranteed
// branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :)
// or the start of a value ("abc" true 123) every 4 characters.
if (unlikely(cnt > 16)) {
uint32_t i = 16;
do {
this->tail[i] = idx + trailing_zeroes(bits);
bits = bits & (bits - 1);
i++;
} while (i < cnt);
}
}
this->tail += cnt;
}
};

View File

@ -84,49 +84,55 @@ really_inline void find_whitespace_and_operators(
#endif // else SIMDJSON_NAIVE_STRUCTURAL
}
// flatten out values in 'bits' assuming that they are are to have values of idx
// plus their position in the bitvector, and store these indexes at
// base_ptr[base] incrementing base as we go
// will potentially store extra values beyond end of valid bits, so base_ptr
// needs to be large enough to handle this
really_inline void flatten_bits(uint32_t *&base_ptr, uint32_t idx, uint64_t bits) {
// In some instances, the next branch is expensive because it is mispredicted.
// Unfortunately, in other cases,
// it helps tremendously.
if (bits == 0)
return;
uint32_t cnt = _mm_popcnt_u64(bits);
idx -= 64;
class bit_indexer {
public:
uint32_t *tail;
// Do the first 8 all together
for (int i=0; i<8; i++) {
base_ptr[i] = idx + trailing_zeroes(bits);
bits = _blsr_u64(bits);
}
bit_indexer(uint32_t *index_buf) : tail(index_buf) {}
// Do the next 8 all together (we hope in most cases it won't happen at all
// and the branch is easily predicted).
if (unlikely(cnt > 8)) {
for (int i=8; i<16; i++) {
base_ptr[i] = idx + trailing_zeroes(bits);
// flatten out values in 'bits' assuming that they are are to have values of idx
// plus their position in the bitvector, and store these indexes at
// base_ptr[base] incrementing base as we go
// will potentially store extra values beyond end of valid bits, so base_ptr
// needs to be large enough to handle this
really_inline void write_indexes(uint32_t idx, uint64_t bits) {
// In some instances, the next branch is expensive because it is mispredicted.
// Unfortunately, in other cases,
// it helps tremendously.
if (bits == 0)
return;
uint32_t cnt = _mm_popcnt_u64(bits);
// Do the first 8 all together
for (int i=0; i<8; i++) {
this->tail[i] = idx + trailing_zeroes(bits);
bits = _blsr_u64(bits);
}
// Most files don't have 16+ structurals per block, so we take several basically guaranteed
// branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :)
// or the start of a value ("abc" true 123) every four characters.
if (unlikely(cnt > 16)) {
uint32_t i = 16;
do {
base_ptr[i] = idx + trailing_zeroes(bits);
// Do the next 8 all together (we hope in most cases it won't happen at all
// and the branch is easily predicted).
if (unlikely(cnt > 8)) {
for (int i=8; i<16; i++) {
this->tail[i] = idx + trailing_zeroes(bits);
bits = _blsr_u64(bits);
i++;
} while (i < cnt);
}
}
}
base_ptr += cnt;
}
// Most files don't have 16+ structurals per block, so we take several basically guaranteed
// branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :)
// or the start of a value ("abc" true 123) every four characters.
if (unlikely(cnt > 16)) {
uint32_t i = 16;
do {
this->tail[i] = idx + trailing_zeroes(bits);
bits = _blsr_u64(bits);
i++;
} while (i < cnt);
}
}
this->tail += cnt;
}
};
#include "generic/stage1_find_marks.h"

View File

@ -1,8 +1,9 @@
#include "simdjson/portability.h"
#include "simdjson/common_defs.h"
namespace {
// for when clmul is unavailable
[[maybe_unused]] uint64_t portable_compute_quote_mask(uint64_t quote_bits) {
[[maybe_unused]] really_inline uint64_t portable_compute_quote_mask(uint64_t quote_bits) {
uint64_t quote_mask = quote_bits ^ (quote_bits << 1);
quote_mask = quote_mask ^ (quote_mask << 2);
quote_mask = quote_mask ^ (quote_mask << 4);