This commit is contained in:
Daniel Lemire 2020-06-21 17:05:55 -04:00
parent c25928e44f
commit 04139eb82e
5 changed files with 35 additions and 3 deletions

View File

@ -105,7 +105,7 @@ WARN_UNUSED error_code dom_parser_implementation::stage1(const uint8_t *_buf, si
this->len = _len;
return arm64::stage1::json_structural_indexer::index<64>(buf, len, *this, streaming);
}
#include "generic/stage1/utf8_validator.h"
} // namespace arm64
} // namespace simdjson

View File

@ -6,6 +6,9 @@ public:
really_inline size_t block_index();
really_inline bool has_full_block() const;
really_inline const uint8_t *full_block() const;
really_inline bool has_remainder() const {
return idx < len;
}
/**
* Get the last block, padded with spaces.
*

View File

@ -0,0 +1,29 @@
namespace stage1 {
/**
* Validates that the string is actual UTF-8.
*/
template<class checker>
bool utf8_validate(const uint8_t * input, size_t length) {
checker c{};
buf_block_reader<64> reader(input, length);
while (reader.has_full_block()) {
simd::simd8x64<uint8_t> in(reader.full_block());
c.check_next_input(in);
reader.advance();
}
if (likely(reader.has_remainder())) {
uint8_t block[64]{};
reader.get_remainder(block);
simd::simd8x64<uint8_t> in(block);
c.check_next_input(in);
reader.advance();
}
return c.errors() == error_code::SUCCESS;
}
bool utf8_validate(const char * input, size_t length) {
return utf8_validate<utf8_checker>((const uint8_t *)input,length);
}
} // namespace stage1

View File

@ -93,7 +93,7 @@ WARN_UNUSED error_code dom_parser_implementation::stage1(const uint8_t *_buf, si
this->len = _len;
return haswell::stage1::json_structural_indexer::index<128>(_buf, _len, *this, streaming);
}
#include "generic/stage1/utf8_validator.h"
} // namespace haswell
} // namespace simdjson
UNTARGET_REGION

View File

@ -94,7 +94,7 @@ WARN_UNUSED error_code dom_parser_implementation::stage1(const uint8_t *_buf, si
this->len = _len;
return westmere::stage1::json_structural_indexer::index<64>(_buf, _len, *this, streaming);
}
#include "generic/stage1/utf8_validator.h"
} // namespace westmere
} // namespace simdjson
UNTARGET_REGION