Separate definition from declaration, arrange top down
This commit is contained in:
parent
89332e1696
commit
9be4a17687
|
@ -2,24 +2,13 @@
|
||||||
template<size_t STEP_SIZE>
|
template<size_t STEP_SIZE>
|
||||||
struct buf_block_reader {
|
struct buf_block_reader {
|
||||||
public:
|
public:
|
||||||
really_inline buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {}
|
really_inline buf_block_reader(const uint8_t *_buf, size_t _len);
|
||||||
really_inline size_t block_index() { return idx; }
|
really_inline size_t block_index();
|
||||||
really_inline bool has_full_block() const {
|
really_inline bool has_full_block() const;
|
||||||
return idx < lenminusstep;
|
really_inline const uint8_t *full_block() const;
|
||||||
}
|
really_inline bool has_remainder() const;
|
||||||
really_inline const uint8_t *full_block() const {
|
really_inline void get_remainder(uint8_t *tmp_buf) const;
|
||||||
return &buf[idx];
|
really_inline void advance();
|
||||||
}
|
|
||||||
really_inline bool has_remainder() const {
|
|
||||||
return idx < len;
|
|
||||||
}
|
|
||||||
really_inline void get_remainder(uint8_t *tmp_buf) const {
|
|
||||||
memset(tmp_buf, 0x20, STEP_SIZE);
|
|
||||||
memcpy(tmp_buf, buf + idx, len - idx);
|
|
||||||
}
|
|
||||||
really_inline void advance() {
|
|
||||||
idx += STEP_SIZE;
|
|
||||||
}
|
|
||||||
private:
|
private:
|
||||||
const uint8_t *buf;
|
const uint8_t *buf;
|
||||||
const size_t len;
|
const size_t len;
|
||||||
|
@ -27,6 +16,38 @@ private:
|
||||||
size_t idx;
|
size_t idx;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template<size_t STEP_SIZE>
|
||||||
|
really_inline buf_block_reader<STEP_SIZE>::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {}
|
||||||
|
|
||||||
|
template<size_t STEP_SIZE>
|
||||||
|
really_inline size_t buf_block_reader<STEP_SIZE>::block_index() { return idx; }
|
||||||
|
|
||||||
|
template<size_t STEP_SIZE>
|
||||||
|
really_inline bool buf_block_reader<STEP_SIZE>::has_full_block() const {
|
||||||
|
return idx < lenminusstep;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<size_t STEP_SIZE>
|
||||||
|
really_inline const uint8_t *buf_block_reader<STEP_SIZE>::full_block() const {
|
||||||
|
return &buf[idx];
|
||||||
|
}
|
||||||
|
|
||||||
|
template<size_t STEP_SIZE>
|
||||||
|
really_inline bool buf_block_reader<STEP_SIZE>::has_remainder() const {
|
||||||
|
return idx < len;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<size_t STEP_SIZE>
|
||||||
|
really_inline void buf_block_reader<STEP_SIZE>::get_remainder(uint8_t *tmp_buf) const {
|
||||||
|
memset(tmp_buf, 0x20, STEP_SIZE);
|
||||||
|
memcpy(tmp_buf, buf + idx, len - idx);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<size_t STEP_SIZE>
|
||||||
|
really_inline void buf_block_reader<STEP_SIZE>::advance() {
|
||||||
|
idx += STEP_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
// Routines to print masks and text for debugging bitmask operations
|
// Routines to print masks and text for debugging bitmask operations
|
||||||
UNUSED static char * format_input_text(const simd8x64<uint8_t> in) {
|
UNUSED static char * format_input_text(const simd8x64<uint8_t> in) {
|
||||||
static char *buf = (char*)malloc(sizeof(simd8x64<uint8_t>) + 1);
|
static char *buf = (char*)malloc(sizeof(simd8x64<uint8_t>) + 1);
|
||||||
|
|
|
@ -57,12 +57,18 @@ public:
|
||||||
|
|
||||||
class json_structural_indexer {
|
class json_structural_indexer {
|
||||||
public:
|
public:
|
||||||
|
/**
|
||||||
|
* Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes.
|
||||||
|
*
|
||||||
|
* @param streaming Setting the streaming parameter to true allows the find_structural_bits to
|
||||||
|
* tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If
|
||||||
|
* you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8.
|
||||||
|
*/
|
||||||
template<size_t STEP_SIZE>
|
template<size_t STEP_SIZE>
|
||||||
static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, bool streaming) noexcept;
|
static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, bool streaming) noexcept;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
really_inline json_structural_indexer(uint32_t *structural_indexes)
|
really_inline json_structural_indexer(uint32_t *structural_indexes);
|
||||||
: indexer{structural_indexes} {}
|
|
||||||
template<size_t STEP_SIZE>
|
template<size_t STEP_SIZE>
|
||||||
really_inline void step(const uint8_t *block, buf_block_reader<STEP_SIZE> &reader) noexcept;
|
really_inline void step(const uint8_t *block, buf_block_reader<STEP_SIZE> &reader) noexcept;
|
||||||
really_inline void next(simd::simd8x64<uint8_t> in, json_block block, size_t idx);
|
really_inline void next(simd::simd8x64<uint8_t> in, json_block block, size_t idx);
|
||||||
|
@ -75,6 +81,63 @@ private:
|
||||||
uint64_t unescaped_chars_error = 0;
|
uint64_t unescaped_chars_error = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
really_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {}
|
||||||
|
|
||||||
|
//
|
||||||
|
// PERF NOTES:
|
||||||
|
// We pipe 2 inputs through these stages:
|
||||||
|
// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load
|
||||||
|
// 2 inputs' worth at once so that by the time step 2 is looking for them input, it's available.
|
||||||
|
// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path.
|
||||||
|
// The output of step 1 depends entirely on this information. These functions don't quite use
|
||||||
|
// up enough CPU: the second half of the functions is highly serial, only using 1 execution core
|
||||||
|
// at a time. The second input's scans has some dependency on the first ones finishing it, but
|
||||||
|
// they can make a lot of progress before they need that information.
|
||||||
|
// 3. Step 1 doesn't use enough capacity, so we run some extra stuff while we're waiting for that
|
||||||
|
// to finish: utf-8 checks and generating the output from the last iteration.
|
||||||
|
//
|
||||||
|
// The reason we run 2 inputs at a time, is steps 2 and 3 are//still* not enough to soak up all
|
||||||
|
// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough
|
||||||
|
// workout.
|
||||||
|
//
|
||||||
|
template<size_t STEP_SIZE>
|
||||||
|
error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, bool streaming) noexcept {
|
||||||
|
if (unlikely(len > parser.capacity())) { return CAPACITY; }
|
||||||
|
|
||||||
|
buf_block_reader<STEP_SIZE> reader(buf, len);
|
||||||
|
json_structural_indexer indexer(parser.structural_indexes.get());
|
||||||
|
while (reader.has_full_block()) {
|
||||||
|
indexer.step<STEP_SIZE>(reader.full_block(), reader);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (likely(reader.has_remainder())) {
|
||||||
|
uint8_t block[STEP_SIZE];
|
||||||
|
reader.get_remainder(block);
|
||||||
|
indexer.step<STEP_SIZE>(block, reader);
|
||||||
|
}
|
||||||
|
|
||||||
|
return indexer.finish(parser, reader.block_index(), len, streaming);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
really_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept {
|
||||||
|
simd::simd8x64<uint8_t> in_1(block);
|
||||||
|
simd::simd8x64<uint8_t> in_2(block+64);
|
||||||
|
json_block block_1 = scanner.next(in_1);
|
||||||
|
json_block block_2 = scanner.next(in_2);
|
||||||
|
this->next(in_1, block_1, reader.block_index());
|
||||||
|
this->next(in_2, block_2, reader.block_index()+64);
|
||||||
|
reader.advance();
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
really_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept {
|
||||||
|
simd::simd8x64<uint8_t> in_1(block);
|
||||||
|
json_block block_1 = scanner.next(in_1);
|
||||||
|
this->next(in_1, block_1, reader.block_index());
|
||||||
|
reader.advance();
|
||||||
|
}
|
||||||
|
|
||||||
really_inline void json_structural_indexer::next(simd::simd8x64<uint8_t> in, json_block block, size_t idx) {
|
really_inline void json_structural_indexer::next(simd::simd8x64<uint8_t> in, json_block block, size_t idx) {
|
||||||
uint64_t unescaped = in.lteq(0x1F);
|
uint64_t unescaped = in.lteq(0x1F);
|
||||||
checker.check_next_input(in);
|
checker.check_next_input(in);
|
||||||
|
@ -128,64 +191,4 @@ really_inline error_code json_structural_indexer::finish(dom_parser_implementati
|
||||||
return checker.errors();
|
return checker.errors();
|
||||||
}
|
}
|
||||||
|
|
||||||
template<>
|
|
||||||
really_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept {
|
|
||||||
simd::simd8x64<uint8_t> in_1(block);
|
|
||||||
simd::simd8x64<uint8_t> in_2(block+64);
|
|
||||||
json_block block_1 = scanner.next(in_1);
|
|
||||||
json_block block_2 = scanner.next(in_2);
|
|
||||||
this->next(in_1, block_1, reader.block_index());
|
|
||||||
this->next(in_2, block_2, reader.block_index()+64);
|
|
||||||
reader.advance();
|
|
||||||
}
|
|
||||||
|
|
||||||
template<>
|
|
||||||
really_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept {
|
|
||||||
simd::simd8x64<uint8_t> in_1(block);
|
|
||||||
json_block block_1 = scanner.next(in_1);
|
|
||||||
this->next(in_1, block_1, reader.block_index());
|
|
||||||
reader.advance();
|
|
||||||
}
|
|
||||||
|
|
||||||
//
|
|
||||||
// Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes.
|
|
||||||
//
|
|
||||||
// PERF NOTES:
|
|
||||||
// We pipe 2 inputs through these stages:
|
|
||||||
// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load
|
|
||||||
// 2 inputs' worth at once so that by the time step 2 is looking for them input, it's available.
|
|
||||||
// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path.
|
|
||||||
// The output of step 1 depends entirely on this information. These functions don't quite use
|
|
||||||
// up enough CPU: the second half of the functions is highly serial, only using 1 execution core
|
|
||||||
// at a time. The second input's scans has some dependency on the first ones finishing it, but
|
|
||||||
// they can make a lot of progress before they need that information.
|
|
||||||
// 3. Step 1 doesn't use enough capacity, so we run some extra stuff while we're waiting for that
|
|
||||||
// to finish: utf-8 checks and generating the output from the last iteration.
|
|
||||||
//
|
|
||||||
// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all
|
|
||||||
// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough
|
|
||||||
// workout.
|
|
||||||
//
|
|
||||||
// Setting the streaming parameter to true allows the find_structural_bits to tolerate unclosed strings.
|
|
||||||
// The caller should still ensure that the input is valid UTF-8. If you are processing substrings,
|
|
||||||
// you may want to call on a function like trimmed_length_safe_utf8.
|
|
||||||
template<size_t STEP_SIZE>
|
|
||||||
error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, bool streaming) noexcept {
|
|
||||||
if (unlikely(len > parser.capacity())) { return CAPACITY; }
|
|
||||||
|
|
||||||
buf_block_reader<STEP_SIZE> reader(buf, len);
|
|
||||||
json_structural_indexer indexer(parser.structural_indexes.get());
|
|
||||||
while (reader.has_full_block()) {
|
|
||||||
indexer.step<STEP_SIZE>(reader.full_block(), reader);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (likely(reader.has_remainder())) {
|
|
||||||
uint8_t block[STEP_SIZE];
|
|
||||||
reader.get_remainder(block);
|
|
||||||
indexer.step<STEP_SIZE>(block, reader);
|
|
||||||
}
|
|
||||||
|
|
||||||
return indexer.finish(parser, reader.block_index(), len, streaming);
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace stage1
|
} // namespace stage1
|
||||||
|
|
Loading…
Reference in New Issue