This commit is contained in:
Daniel Lemire 2020-06-21 17:52:30 -04:00
parent f03a6ab5a4
commit 5dbcdf1484
6 changed files with 54 additions and 2 deletions

View File

@ -10,6 +10,24 @@
namespace simdjson {
/**
* Validate the UTF-8 string.
*
* @param buf the string to validate.
* @param len the length of the string in bytes.
* @return true if the string is valid UTF-8.
*/
WARN_UNUSED bool validate_utf8(const char * buf, size_t length) noexcept;
/**
* Validate the UTF-8 string.
*
* @param p the string_view to validate.
* @return true if the string is valid UTF-8.
*/
WARN_UNUSED bool validate_utf8(std::string_view& p) noexcept;
namespace dom {
class document;
} // namespace dom

View File

@ -7,5 +7,4 @@
#include "simdjson/compiler_check.h"
#include "simdjson/error.h"
#endif // SIMDJSON_H

View File

@ -106,6 +106,9 @@ WARN_UNUSED error_code dom_parser_implementation::stage1(const uint8_t *_buf, si
return arm64::stage1::json_structural_indexer::index<64>(buf, len, *this, streaming);
}
#include "generic/stage1/utf8_validator.h"
WARN_UNUSED bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
return simdjson::arm64::stage1::utf8_validate(buf,len);
}
} // namespace arm64
} // namespace simdjson

View File

@ -141,6 +141,9 @@ SIMDJSON_DLLIMPORTEXPORT internal::atomic_ptr<const implementation> active_imple
WARN_UNUSED error_code minify(const char *buf, size_t len, char *dst, size_t &dst_len) noexcept {
return active_implementation->minify((const uint8_t *)buf, len, (uint8_t *)dst, dst_len);
}
WARN_UNUSED bool validate_utf8(const char *buf, size_t len) noexcept {
return active_implementation->validate_utf8(buf, len);
}
} // namespace simdjson

View File

@ -95,6 +95,9 @@ WARN_UNUSED error_code dom_parser_implementation::stage1(const uint8_t *_buf, si
return westmere::stage1::json_structural_indexer::index<64>(_buf, _len, *this, streaming);
}
#include "generic/stage1/utf8_validator.h"
WARN_UNUSED bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
return simdjson::westmere::stage1::utf8_validate(buf,len);
}
} // namespace westmere
} // namespace simdjson
UNTARGET_REGION

View File

@ -1649,6 +1649,31 @@ namespace type_tests {
}
namespace validate_tests {
bool test_validate() {
std::cout << "Running " << __func__ << std::endl;
const std::string test = R"({ "foo" : 1, "bar" : [ 1, 2, 3 ], "baz": { "a": 1, "b": 2, "c": 3 } })";
if(!simdjson::validate_utf8(test.data(), test.size())) {
return false;
}
return true;
}
bool test_bad_validate() {
std::cout << "Running " << __func__ << std::endl;
const std::string test = "\x80\x81";
if(simdjson::validate_utf8(test.data(), test.size())) {
return false;
}
return true;
}
bool run() {
return test_validate() &&
test_bad_validate();
}
}
namespace minify_tests {
@ -1960,7 +1985,8 @@ int main(int argc, char *argv[]) {
printf("unsupported CPU\n");
}
std::cout << "Running basic tests." << std::endl;
if (minify_tests::run() &&
if (validate_tests::run() &&
minify_tests::run() &&
parse_api_tests::run() &&
dom_api_tests::run() &&
type_tests::run() &&