Acknowledge that we always have a remainder

This commit is contained in:
John Keiser 2020-06-02 08:32:41 -07:00
parent 9be4a17687
commit 8c16ba372e
3 changed files with 25 additions and 18 deletions

View File

@ -6,8 +6,16 @@ public:
really_inline size_t block_index(); really_inline size_t block_index();
really_inline bool has_full_block() const; really_inline bool has_full_block() const;
really_inline const uint8_t *full_block() const; really_inline const uint8_t *full_block() const;
really_inline bool has_remainder() const; /**
really_inline void get_remainder(uint8_t *tmp_buf) const; * Get the last block, padded with spaces.
*
* There will always be a last block, with at least 1 byte, unless len == 0 (in which case this
* function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there
* will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding.
*
* @return the number of effective characters in the last block.
*/
really_inline size_t get_remainder(uint8_t *dst) const;
really_inline void advance(); really_inline void advance();
private: private:
const uint8_t *buf; const uint8_t *buf;
@ -33,14 +41,10 @@ really_inline const uint8_t *buf_block_reader<STEP_SIZE>::full_block() const {
} }
template<size_t STEP_SIZE> template<size_t STEP_SIZE>
really_inline bool buf_block_reader<STEP_SIZE>::has_remainder() const { really_inline size_t buf_block_reader<STEP_SIZE>::get_remainder(uint8_t *dst) const {
return idx < len; memset(dst, 0x20, STEP_SIZE); // memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once.
} memcpy(dst, buf + idx, len - idx);
return len - idx;
template<size_t STEP_SIZE>
really_inline void buf_block_reader<STEP_SIZE>::get_remainder(uint8_t *tmp_buf) const {
memset(tmp_buf, 0x20, STEP_SIZE);
memcpy(tmp_buf, buf + idx, len - idx);
} }
template<size_t STEP_SIZE> template<size_t STEP_SIZE>

View File

@ -59,13 +59,15 @@ template<size_t STEP_SIZE>
error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept { error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept {
buf_block_reader<STEP_SIZE> reader(buf, len); buf_block_reader<STEP_SIZE> reader(buf, len);
json_minifier minifier(dst); json_minifier minifier(dst);
// Index the first n-1 blocks
while (reader.has_full_block()) { while (reader.has_full_block()) {
minifier.step<STEP_SIZE>(reader.full_block(), reader); minifier.step<STEP_SIZE>(reader.full_block(), reader);
} }
if (likely(reader.has_remainder())) { // Index the last (remainder) block, padded with spaces
uint8_t block[STEP_SIZE]; uint8_t block[STEP_SIZE];
reader.get_remainder(block); if (likely(reader.get_remainder(block)) > 0) {
minifier.step<STEP_SIZE>(block, reader); minifier.step<STEP_SIZE>(block, reader);
} }

View File

@ -106,15 +106,16 @@ error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_pa
buf_block_reader<STEP_SIZE> reader(buf, len); buf_block_reader<STEP_SIZE> reader(buf, len);
json_structural_indexer indexer(parser.structural_indexes.get()); json_structural_indexer indexer(parser.structural_indexes.get());
// Read all but the last block
while (reader.has_full_block()) { while (reader.has_full_block()) {
indexer.step<STEP_SIZE>(reader.full_block(), reader); indexer.step<STEP_SIZE>(reader.full_block(), reader);
} }
if (likely(reader.has_remainder())) { // Take care of the last block (will always be there unless file is empty)
uint8_t block[STEP_SIZE]; uint8_t block[STEP_SIZE];
reader.get_remainder(block); if (unlikely(reader.get_remainder(block) == 0)) { return EMPTY; }
indexer.step<STEP_SIZE>(block, reader); indexer.step<STEP_SIZE>(block, reader);
}
return indexer.finish(parser, reader.block_index(), len, streaming); return indexer.finish(parser, reader.block_index(), len, streaming);
} }