Separate definition from declaration, arrange top down
This commit is contained in:
parent
89332e1696
commit
9be4a17687
|
@ -2,24 +2,13 @@
|
|||
template<size_t STEP_SIZE>
|
||||
struct buf_block_reader {
|
||||
public:
|
||||
really_inline buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {}
|
||||
really_inline size_t block_index() { return idx; }
|
||||
really_inline bool has_full_block() const {
|
||||
return idx < lenminusstep;
|
||||
}
|
||||
really_inline const uint8_t *full_block() const {
|
||||
return &buf[idx];
|
||||
}
|
||||
really_inline bool has_remainder() const {
|
||||
return idx < len;
|
||||
}
|
||||
really_inline void get_remainder(uint8_t *tmp_buf) const {
|
||||
memset(tmp_buf, 0x20, STEP_SIZE);
|
||||
memcpy(tmp_buf, buf + idx, len - idx);
|
||||
}
|
||||
really_inline void advance() {
|
||||
idx += STEP_SIZE;
|
||||
}
|
||||
really_inline buf_block_reader(const uint8_t *_buf, size_t _len);
|
||||
really_inline size_t block_index();
|
||||
really_inline bool has_full_block() const;
|
||||
really_inline const uint8_t *full_block() const;
|
||||
really_inline bool has_remainder() const;
|
||||
really_inline void get_remainder(uint8_t *tmp_buf) const;
|
||||
really_inline void advance();
|
||||
private:
|
||||
const uint8_t *buf;
|
||||
const size_t len;
|
||||
|
@ -27,6 +16,38 @@ private:
|
|||
size_t idx;
|
||||
};
|
||||
|
||||
template<size_t STEP_SIZE>
|
||||
really_inline buf_block_reader<STEP_SIZE>::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {}
|
||||
|
||||
template<size_t STEP_SIZE>
|
||||
really_inline size_t buf_block_reader<STEP_SIZE>::block_index() { return idx; }
|
||||
|
||||
template<size_t STEP_SIZE>
|
||||
really_inline bool buf_block_reader<STEP_SIZE>::has_full_block() const {
|
||||
return idx < lenminusstep;
|
||||
}
|
||||
|
||||
template<size_t STEP_SIZE>
|
||||
really_inline const uint8_t *buf_block_reader<STEP_SIZE>::full_block() const {
|
||||
return &buf[idx];
|
||||
}
|
||||
|
||||
template<size_t STEP_SIZE>
|
||||
really_inline bool buf_block_reader<STEP_SIZE>::has_remainder() const {
|
||||
return idx < len;
|
||||
}
|
||||
|
||||
template<size_t STEP_SIZE>
|
||||
really_inline void buf_block_reader<STEP_SIZE>::get_remainder(uint8_t *tmp_buf) const {
|
||||
memset(tmp_buf, 0x20, STEP_SIZE);
|
||||
memcpy(tmp_buf, buf + idx, len - idx);
|
||||
}
|
||||
|
||||
template<size_t STEP_SIZE>
|
||||
really_inline void buf_block_reader<STEP_SIZE>::advance() {
|
||||
idx += STEP_SIZE;
|
||||
}
|
||||
|
||||
// Routines to print masks and text for debugging bitmask operations
|
||||
UNUSED static char * format_input_text(const simd8x64<uint8_t> in) {
|
||||
static char *buf = (char*)malloc(sizeof(simd8x64<uint8_t>) + 1);
|
||||
|
|
|
@ -57,12 +57,18 @@ public:
|
|||
|
||||
class json_structural_indexer {
|
||||
public:
|
||||
/**
|
||||
* Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes.
|
||||
*
|
||||
* @param streaming Setting the streaming parameter to true allows the find_structural_bits to
|
||||
* tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If
|
||||
* you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8.
|
||||
*/
|
||||
template<size_t STEP_SIZE>
|
||||
static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, bool streaming) noexcept;
|
||||
|
||||
private:
|
||||
really_inline json_structural_indexer(uint32_t *structural_indexes)
|
||||
: indexer{structural_indexes} {}
|
||||
really_inline json_structural_indexer(uint32_t *structural_indexes);
|
||||
template<size_t STEP_SIZE>
|
||||
really_inline void step(const uint8_t *block, buf_block_reader<STEP_SIZE> &reader) noexcept;
|
||||
really_inline void next(simd::simd8x64<uint8_t> in, json_block block, size_t idx);
|
||||
|
@ -75,6 +81,63 @@ private:
|
|||
uint64_t unescaped_chars_error = 0;
|
||||
};
|
||||
|
||||
really_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {}
|
||||
|
||||
//
|
||||
// PERF NOTES:
|
||||
// We pipe 2 inputs through these stages:
|
||||
// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load
|
||||
// 2 inputs' worth at once so that by the time step 2 is looking for them input, it's available.
|
||||
// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path.
|
||||
// The output of step 1 depends entirely on this information. These functions don't quite use
|
||||
// up enough CPU: the second half of the functions is highly serial, only using 1 execution core
|
||||
// at a time. The second input's scans has some dependency on the first ones finishing it, but
|
||||
// they can make a lot of progress before they need that information.
|
||||
// 3. Step 1 doesn't use enough capacity, so we run some extra stuff while we're waiting for that
|
||||
// to finish: utf-8 checks and generating the output from the last iteration.
|
||||
//
|
||||
// The reason we run 2 inputs at a time, is steps 2 and 3 are//still* not enough to soak up all
|
||||
// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough
|
||||
// workout.
|
||||
//
|
||||
template<size_t STEP_SIZE>
|
||||
error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, bool streaming) noexcept {
|
||||
if (unlikely(len > parser.capacity())) { return CAPACITY; }
|
||||
|
||||
buf_block_reader<STEP_SIZE> reader(buf, len);
|
||||
json_structural_indexer indexer(parser.structural_indexes.get());
|
||||
while (reader.has_full_block()) {
|
||||
indexer.step<STEP_SIZE>(reader.full_block(), reader);
|
||||
}
|
||||
|
||||
if (likely(reader.has_remainder())) {
|
||||
uint8_t block[STEP_SIZE];
|
||||
reader.get_remainder(block);
|
||||
indexer.step<STEP_SIZE>(block, reader);
|
||||
}
|
||||
|
||||
return indexer.finish(parser, reader.block_index(), len, streaming);
|
||||
}
|
||||
|
||||
template<>
|
||||
really_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept {
|
||||
simd::simd8x64<uint8_t> in_1(block);
|
||||
simd::simd8x64<uint8_t> in_2(block+64);
|
||||
json_block block_1 = scanner.next(in_1);
|
||||
json_block block_2 = scanner.next(in_2);
|
||||
this->next(in_1, block_1, reader.block_index());
|
||||
this->next(in_2, block_2, reader.block_index()+64);
|
||||
reader.advance();
|
||||
}
|
||||
|
||||
template<>
|
||||
really_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept {
|
||||
simd::simd8x64<uint8_t> in_1(block);
|
||||
json_block block_1 = scanner.next(in_1);
|
||||
this->next(in_1, block_1, reader.block_index());
|
||||
reader.advance();
|
||||
}
|
||||
|
||||
really_inline void json_structural_indexer::next(simd::simd8x64<uint8_t> in, json_block block, size_t idx) {
|
||||
uint64_t unescaped = in.lteq(0x1F);
|
||||
checker.check_next_input(in);
|
||||
|
@ -128,64 +191,4 @@ really_inline error_code json_structural_indexer::finish(dom_parser_implementati
|
|||
return checker.errors();
|
||||
}
|
||||
|
||||
template<>
|
||||
really_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept {
|
||||
simd::simd8x64<uint8_t> in_1(block);
|
||||
simd::simd8x64<uint8_t> in_2(block+64);
|
||||
json_block block_1 = scanner.next(in_1);
|
||||
json_block block_2 = scanner.next(in_2);
|
||||
this->next(in_1, block_1, reader.block_index());
|
||||
this->next(in_2, block_2, reader.block_index()+64);
|
||||
reader.advance();
|
||||
}
|
||||
|
||||
template<>
|
||||
really_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept {
|
||||
simd::simd8x64<uint8_t> in_1(block);
|
||||
json_block block_1 = scanner.next(in_1);
|
||||
this->next(in_1, block_1, reader.block_index());
|
||||
reader.advance();
|
||||
}
|
||||
|
||||
//
|
||||
// Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes.
|
||||
//
|
||||
// PERF NOTES:
|
||||
// We pipe 2 inputs through these stages:
|
||||
// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load
|
||||
// 2 inputs' worth at once so that by the time step 2 is looking for them input, it's available.
|
||||
// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path.
|
||||
// The output of step 1 depends entirely on this information. These functions don't quite use
|
||||
// up enough CPU: the second half of the functions is highly serial, only using 1 execution core
|
||||
// at a time. The second input's scans has some dependency on the first ones finishing it, but
|
||||
// they can make a lot of progress before they need that information.
|
||||
// 3. Step 1 doesn't use enough capacity, so we run some extra stuff while we're waiting for that
|
||||
// to finish: utf-8 checks and generating the output from the last iteration.
|
||||
//
|
||||
// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all
|
||||
// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough
|
||||
// workout.
|
||||
//
|
||||
// Setting the streaming parameter to true allows the find_structural_bits to tolerate unclosed strings.
|
||||
// The caller should still ensure that the input is valid UTF-8. If you are processing substrings,
|
||||
// you may want to call on a function like trimmed_length_safe_utf8.
|
||||
template<size_t STEP_SIZE>
|
||||
error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, bool streaming) noexcept {
|
||||
if (unlikely(len > parser.capacity())) { return CAPACITY; }
|
||||
|
||||
buf_block_reader<STEP_SIZE> reader(buf, len);
|
||||
json_structural_indexer indexer(parser.structural_indexes.get());
|
||||
while (reader.has_full_block()) {
|
||||
indexer.step<STEP_SIZE>(reader.full_block(), reader);
|
||||
}
|
||||
|
||||
if (likely(reader.has_remainder())) {
|
||||
uint8_t block[STEP_SIZE];
|
||||
reader.get_remainder(block);
|
||||
indexer.step<STEP_SIZE>(block, reader);
|
||||
}
|
||||
|
||||
return indexer.finish(parser, reader.block_index(), len, streaming);
|
||||
}
|
||||
|
||||
} // namespace stage1
|
||||
|
|
Loading…
Reference in New Issue