Attempt to fix issue 1187. (#1192)

This commit is contained in:
Daniel Lemire 2020-09-27 12:04:47 -04:00 committed by GitHub
parent f44386008c
commit 0e584fa4a5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 19 additions and 6 deletions

View File

@ -89,7 +89,8 @@ public:
* - IO_ERROR if there was an error opening or reading the file.
* - MEMALLOC if the parser does not have enough capacity and memory allocation fails.
* - CAPACITY if the parser does not have enough capacity and len > max_capacity.
* - other json errors if parsing fails.
* - other json errors if parsing fails. You should not rely on these errors to always the same for the
* same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware).
*/
inline simdjson_result<element> load(const std::string &path) & noexcept;
inline simdjson_result<element> load(const std::string &path) && = delete ;
@ -151,7 +152,8 @@ public:
* - MEMALLOC if realloc_if_needed is true or the parser does not have enough capacity,
* and memory allocation fails.
* - CAPACITY if the parser does not have enough capacity and len > max_capacity.
* - other json errors if parsing fails.
* - other json errors if parsing fails. You should not rely on these errors to always the same for the
* same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware).
*/
inline simdjson_result<element> parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true) & noexcept;
inline simdjson_result<element> parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true) && =delete;
@ -235,7 +237,8 @@ public:
* - IO_ERROR if there was an error opening or reading the file.
* - MEMALLOC if the parser does not have enough capacity and memory allocation fails.
* - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity.
* - other json errors if parsing fails.
* - other json errors if parsing fails. You should not rely on these errors to always the same for the
* same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware).
*/
inline simdjson_result<document_stream> load_many(const std::string &path, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept;
@ -328,7 +331,8 @@ public:
* @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors:
* - MEMALLOC if the parser does not have enough capacity and memory allocation fails
* - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity.
* - other json errors if parsing fails.
* - other json errors if parsing fails. You should not rely on these errors to always the same for the
* same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware).
*/
inline simdjson_result<document_stream> parse_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept;
/** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */

View File

@ -297,7 +297,7 @@ SIMDJSON_WARN_UNUSED bool implementation::validate_utf8(const char *buf, size_t
code_point =
(byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 |
(data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111);
if (code_point < 0xffff || 0x10ffff < code_point) { return false; }
if (code_point <= 0xffff || 0x10ffff < code_point) { return false; }
} else {
// we may have a continuation
return false;

View File

@ -1262,6 +1262,14 @@ namespace type_tests {
namespace validate_tests {
bool issue1187() {
std::cout << "Running " << __func__ << std::endl;
const std::string test = "\xf0\x8f\xbf\xbf";
if(simdjson::validate_utf8(test.data(), test.size())) {
return false;
}
return true;
}
bool test_validate() {
std::cout << "Running " << __func__ << std::endl;
const std::string test = R"({ "foo" : 1, "bar" : [ 1, 2, 3 ], "baz": { "a": 1, "b": 2, "c": 3 } })";
@ -1322,7 +1330,8 @@ namespace validate_tests {
return true;
}
bool run() {
return test_range() &&
return issue1187() &&
test_range() &&
test_issue1169_long() &&
test_issue1169() &&
test_random() &&