Reamalgamate

This commit is contained in:
John Keiser 2020-06-23 09:53:36 -07:00
parent b4b968ff44
commit 42a8b40de0
3 changed files with 264 additions and 58 deletions

View File

@ -1,4 +1,4 @@
/* auto-generated on Sun Jun 21 11:49:12 PDT 2020. Do not edit! */
/* auto-generated on Tue Jun 23 09:15:19 PDT 2020. Do not edit! */
#include <iostream>
#include "simdjson.h"

View File

@ -1,4 +1,4 @@
/* auto-generated on Sun Jun 21 11:49:12 PDT 2020. Do not edit! */
/* auto-generated on Tue Jun 23 09:15:19 PDT 2020. Do not edit! */
/* begin file src/simdjson.cpp */
#include "simdjson.h"
@ -371,6 +371,7 @@ public:
std::unique_ptr<internal::dom_parser_implementation>& dst
) const noexcept final;
WARN_UNUSED error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final;
WARN_UNUSED bool validate_utf8(const char *buf, size_t len) const noexcept final;
};
} // namespace haswell
@ -402,6 +403,7 @@ public:
std::unique_ptr<internal::dom_parser_implementation>& dst
) const noexcept final;
WARN_UNUSED error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final;
WARN_UNUSED bool validate_utf8(const char *buf, size_t len) const noexcept final;
};
} // namespace westmere
@ -433,6 +435,7 @@ public:
std::unique_ptr<internal::dom_parser_implementation>& dst
) const noexcept final;
WARN_UNUSED error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final;
WARN_UNUSED bool validate_utf8(const char *buf, size_t len) const noexcept final;
};
} // namespace arm64
@ -468,6 +471,7 @@ public:
std::unique_ptr<internal::dom_parser_implementation>& dst
) const noexcept final;
WARN_UNUSED error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final;
WARN_UNUSED bool validate_utf8(const char *buf, size_t len) const noexcept final;
};
} // namespace fallback
@ -500,7 +504,9 @@ public:
WARN_UNUSED error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final {
return set_best()->minify(buf, len, dst, dst_len);
}
WARN_UNUSED bool validate_utf8(const char * buf, size_t len) const noexcept final override {
return set_best()->validate_utf8(buf, len);
}
really_inline detect_best_supported_implementation_on_first_use() noexcept : implementation("best_supported_detector", "Detects the best supported implementation and sets it", 0) {}
private:
const implementation *set_best() const noexcept;
@ -535,10 +541,19 @@ public:
) const noexcept final {
return UNSUPPORTED_ARCHITECTURE;
}
WARN_UNUSED error_code minify(const uint8_t *, size_t, uint8_t *, size_t &) const noexcept final {
WARN_UNUSED error_code minify(const uint8_t *, size_t, uint8_t *, size_t &) const noexcept final override {
return UNSUPPORTED_ARCHITECTURE;
}
WARN_UNUSED bool validate_utf8(const char *, size_t) const noexcept final override {
return false; // Just refuse to validate. Given that we have a fallback implementation
// it seems unlikely that unsupported_implementation will ever be used. If it is used,
// then it will flag all strings as invalid. The alternative is to return an error_code
// from which the user has to figure out whether the string is valid UTF-8... which seems
// like a lot of work just to handle the very unlikely case that we have an unsupported
// implementation. And, when it does happen (that we have an unsupported implementation),
// what are the chances that the programmer has a fallback? Given that *we* provide the
// fallback, it implies that the programmer would need a fallback for our fallback.
}
unsupported_implementation() : implementation("unsupported", "Unsupported CPU (no detected SIMD instructions)", 0) {}
};
@ -589,6 +604,9 @@ SIMDJSON_DLLIMPORTEXPORT internal::atomic_ptr<const implementation> active_imple
WARN_UNUSED error_code minify(const char *buf, size_t len, char *dst, size_t &dst_len) noexcept {
return active_implementation->minify((const uint8_t *)buf, len, (uint8_t *)dst, dst_len);
}
WARN_UNUSED bool validate_utf8(const char *buf, size_t len) noexcept {
return active_implementation->validate_utf8(buf, len);
}
} // namespace simdjson
@ -3757,7 +3775,37 @@ WARN_UNUSED error_code dom_parser_implementation::stage1(const uint8_t *_buf, si
this->len = _len;
return arm64::stage1::json_structural_indexer::index<64>(buf, len, *this, streaming);
}
/* begin file src/generic/stage1/utf8_validator.h */
namespace stage1 {
/**
* Validates that the string is actual UTF-8.
*/
template<class checker>
bool generic_validate_utf8(const uint8_t * input, size_t length) {
checker c{};
buf_block_reader<64> reader(input, length);
while (reader.has_full_block()) {
simd::simd8x64<uint8_t> in(reader.full_block());
c.check_next_input(in);
reader.advance();
}
uint8_t block[64]{};
reader.get_remainder(block);
simd::simd8x64<uint8_t> in(block);
c.check_next_input(in);
reader.advance();
return c.errors() == error_code::SUCCESS;
}
bool generic_validate_utf8(const char * input, size_t length) {
return generic_validate_utf8<utf8_checker>((const uint8_t *)input,length);
}
} // namespace stage1
/* end file src/generic/stage1/utf8_validator.h */
WARN_UNUSED bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
return simdjson::arm64::stage1::generic_validate_utf8(buf,len);
}
} // namespace arm64
} // namespace simdjson
@ -5798,6 +5846,70 @@ WARN_UNUSED error_code implementation::minify(const uint8_t *buf, size_t len, ui
return SUCCESS;
}
// credit: based on code from Google Fuchsia (Apache Licensed)
WARN_UNUSED bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
const uint8_t *data = (const uint8_t *)buf;
uint64_t pos = 0;
uint64_t next_pos = 0;
uint32_t code_point = 0;
while (pos < len) {
// check of the next 8 bytes are ascii.
next_pos = pos + 16;
if (next_pos <= len) { // if it is safe to read 8 more bytes, check that they are ascii
uint64_t v1;
memcpy(&v1, data + pos, sizeof(uint64_t));
uint64_t v2;
memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
uint64_t v{v1 | v2};
if ((v & 0x8080808080808080) == 0) {
pos = next_pos;
continue;
}
}
unsigned char byte = data[pos];
if (byte < 0b10000000) {
pos++;
continue;
} else if ((byte & 0b11100000) == 0b11000000) {
next_pos = pos + 2;
if (next_pos > len) { return false; }
if ((data[pos + 1] & 0b11000000) != 0b10000000) { return false; }
// range check
code_point = (byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111);
if (code_point < 0x80 || 0x7ff < code_point) { return false; }
} else if ((byte & 0b11110000) == 0b11100000) {
next_pos = pos + 3;
if (next_pos > len) { return false; }
if ((data[pos + 1] & 0b11000000) != 0b10000000) { return false; }
if ((data[pos + 2] & 0b11000000) != 0b10000000) { return false; }
// range check
code_point = (byte & 0b00001111) << 12 |
(data[pos + 1] & 0b00111111) << 6 |
(data[pos + 2] & 0b00111111);
if (code_point < 0x800 || 0xffff < code_point ||
(0xd7ff < code_point && code_point < 0xe000)) {
return false;
}
} else if ((byte & 0b11111000) == 0b11110000) { // 0b11110000
next_pos = pos + 4;
if (next_pos > len) { return false; }
if ((data[pos + 1] & 0b11000000) != 0b10000000) { return false; }
if ((data[pos + 2] & 0b11000000) != 0b10000000) { return false; }
if ((data[pos + 3] & 0b11000000) != 0b10000000) { return false; }
// range check
code_point =
(byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 |
(data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111);
if (code_point < 0xffff || 0x10ffff < code_point) { return false; }
} else {
// we may have a continuation
return false;
}
pos = next_pos;
}
return true;
}
} // namespace fallback
} // namespace simdjson
@ -9121,7 +9233,37 @@ WARN_UNUSED error_code dom_parser_implementation::stage1(const uint8_t *_buf, si
this->len = _len;
return haswell::stage1::json_structural_indexer::index<128>(_buf, _len, *this, streaming);
}
/* begin file src/generic/stage1/utf8_validator.h */
namespace stage1 {
/**
* Validates that the string is actual UTF-8.
*/
template<class checker>
bool generic_validate_utf8(const uint8_t * input, size_t length) {
checker c{};
buf_block_reader<64> reader(input, length);
while (reader.has_full_block()) {
simd::simd8x64<uint8_t> in(reader.full_block());
c.check_next_input(in);
reader.advance();
}
uint8_t block[64]{};
reader.get_remainder(block);
simd::simd8x64<uint8_t> in(block);
c.check_next_input(in);
reader.advance();
return c.errors() == error_code::SUCCESS;
}
bool generic_validate_utf8(const char * input, size_t length) {
return generic_validate_utf8<utf8_checker>((const uint8_t *)input,length);
}
} // namespace stage1
/* end file src/generic/stage1/utf8_validator.h */
WARN_UNUSED bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
return simdjson::haswell::stage1::generic_validate_utf8(buf,len);
}
} // namespace haswell
} // namespace simdjson
UNTARGET_REGION
@ -12368,7 +12510,37 @@ WARN_UNUSED error_code dom_parser_implementation::stage1(const uint8_t *_buf, si
this->len = _len;
return westmere::stage1::json_structural_indexer::index<64>(_buf, _len, *this, streaming);
}
/* begin file src/generic/stage1/utf8_validator.h */
namespace stage1 {
/**
* Validates that the string is actual UTF-8.
*/
template<class checker>
bool generic_validate_utf8(const uint8_t * input, size_t length) {
checker c{};
buf_block_reader<64> reader(input, length);
while (reader.has_full_block()) {
simd::simd8x64<uint8_t> in(reader.full_block());
c.check_next_input(in);
reader.advance();
}
uint8_t block[64]{};
reader.get_remainder(block);
simd::simd8x64<uint8_t> in(block);
c.check_next_input(in);
reader.advance();
return c.errors() == error_code::SUCCESS;
}
bool generic_validate_utf8(const char * input, size_t length) {
return generic_validate_utf8<utf8_checker>((const uint8_t *)input,length);
}
} // namespace stage1
/* end file src/generic/stage1/utf8_validator.h */
WARN_UNUSED bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
return simdjson::westmere::stage1::generic_validate_utf8(buf,len);
}
} // namespace westmere
} // namespace simdjson
UNTARGET_REGION

View File

@ -1,4 +1,4 @@
/* auto-generated on Sun Jun 21 11:49:12 PDT 2020. Do not edit! */
/* auto-generated on Tue Jun 23 09:15:19 PDT 2020. Do not edit! */
/* begin file include/simdjson.h */
#ifndef SIMDJSON_H
#define SIMDJSON_H
@ -2637,6 +2637,36 @@ inline error_code dom_parser_implementation::allocate(size_t capacity, size_t ma
namespace simdjson {
/**
* Validate the UTF-8 string.
*
* @param buf the string to validate.
* @param len the length of the string in bytes.
* @return true if the string is valid UTF-8.
*/
WARN_UNUSED bool validate_utf8(const char * buf, size_t len) noexcept;
/**
* Validate the UTF-8 string.
*
* @param sv the string_view to validate.
* @return true if the string is valid UTF-8.
*/
really_inline WARN_UNUSED bool validate_utf8(const std::string_view sv) noexcept {
return validate_utf8(sv.data(), sv.size());
}
/**
* Validate the UTF-8 string.
*
* @param p the string to validate.
* @return true if the string is valid UTF-8.
*/
really_inline WARN_UNUSED bool validate_utf8(const std::string& s) noexcept {
return validate_utf8(s.data(), s.size());
}
namespace dom {
class document;
} // namespace dom
@ -2710,6 +2740,18 @@ public:
* @return the error code, or SUCCESS if there was no error.
*/
WARN_UNUSED virtual error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept = 0;
/**
* Validate the UTF-8 string.
*
* Overridden by each implementation.
*
* @param buf the string to validate.
* @param len the length of the string in bytes.
* @return true if and only if the string is valid UTF-8.
*/
WARN_UNUSED virtual bool validate_utf8(const char *buf, size_t len) const noexcept = 0;
protected:
/** @private Construct an implementation with the given name and description. For subclasses. */
@ -3984,7 +4026,7 @@ public:
* Returns INCORRECT_TYPE if the JSON element is not an integer, or NUMBER_OUT_OF_RANGE
* if it is negative.
*/
inline simdjson_result<int64_t> get_int64_t() const noexcept;
inline simdjson_result<int64_t> get_int64() const noexcept;
/**
* Cast this element to an unsigned integer.
*
@ -3994,7 +4036,7 @@ public:
* Returns INCORRECT_TYPE if the JSON element is not an integer, or NUMBER_OUT_OF_RANGE
* if it is too large.
*/
inline simdjson_result<uint64_t> get_uint64_t() const noexcept;
inline simdjson_result<uint64_t> get_uint64() const noexcept;
/**
* Cast this element to an double floating-point.
*
@ -4037,13 +4079,13 @@ public:
*
* Equivalent to is<int64_t>().
*/
inline bool is_int64_t() const noexcept;
inline bool is_int64() const noexcept;
/**
* Whether this element is a json number that fits in an unsigned 64-bit integer.
*
* Equivalent to is<uint64_t>().
*/
inline bool is_uint64_t() const noexcept;
inline bool is_uint64() const noexcept;
/**
* Whether this element is a json number that fits in a double.
*
@ -4078,6 +4120,7 @@ public:
* - Object: dom::object
*
* @tparam T bool, double, uint64_t, int64_t, std::string_view, const char *, dom::array, dom::object
* @returns true if the value can be cast to the given type, false if not.
*/
template<typename T>
really_inline bool is() const noexcept;
@ -4349,7 +4392,7 @@ public:
really_inline simdjson_result<dom::element_type> type() const noexcept;
template<typename T>
really_inline simdjson_result<bool> is() const noexcept;
really_inline bool is() const noexcept;
template<typename T>
really_inline simdjson_result<T> get() const noexcept;
template<typename T>
@ -4359,19 +4402,19 @@ public:
really_inline simdjson_result<dom::object> get_object() const noexcept;
really_inline simdjson_result<const char *> get_c_str() const noexcept;
really_inline simdjson_result<std::string_view> get_string() const noexcept;
really_inline simdjson_result<int64_t> get_int64_t() const noexcept;
really_inline simdjson_result<uint64_t> get_uint64_t() const noexcept;
really_inline simdjson_result<int64_t> get_int64() const noexcept;
really_inline simdjson_result<uint64_t> get_uint64() const noexcept;
really_inline simdjson_result<double> get_double() const noexcept;
really_inline simdjson_result<bool> get_bool() const noexcept;
really_inline simdjson_result<bool> is_array() const noexcept;
really_inline simdjson_result<bool> is_object() const noexcept;
really_inline simdjson_result<bool> is_string() const noexcept;
really_inline simdjson_result<bool> is_int64_t() const noexcept;
really_inline simdjson_result<bool> is_uint64_t() const noexcept;
really_inline simdjson_result<bool> is_double() const noexcept;
really_inline simdjson_result<bool> is_bool() const noexcept;
really_inline simdjson_result<bool> is_null() const noexcept;
really_inline bool is_array() const noexcept;
really_inline bool is_object() const noexcept;
really_inline bool is_string() const noexcept;
really_inline bool is_int64() const noexcept;
really_inline bool is_uint64() const noexcept;
really_inline bool is_double() const noexcept;
really_inline bool is_bool() const noexcept;
really_inline bool is_null() const noexcept;
really_inline simdjson_result<dom::element> operator[](const std::string_view &key) const noexcept;
really_inline simdjson_result<dom::element> operator[](const char *key) const noexcept;
@ -5739,9 +5782,8 @@ inline simdjson_result<dom::element_type> simdjson_result<dom::element>::type()
}
template<typename T>
really_inline simdjson_result<bool> simdjson_result<dom::element>::is() const noexcept {
if (error()) { return error(); }
return first.is<T>();
really_inline bool simdjson_result<dom::element>::is() const noexcept {
return !error() && first.is<T>();
}
template<typename T>
really_inline simdjson_result<T> simdjson_result<dom::element>::get() const noexcept {
@ -5770,13 +5812,13 @@ really_inline simdjson_result<std::string_view> simdjson_result<dom::element>::g
if (error()) { return error(); }
return first.get_string();
}
really_inline simdjson_result<int64_t> simdjson_result<dom::element>::get_int64_t() const noexcept {
really_inline simdjson_result<int64_t> simdjson_result<dom::element>::get_int64() const noexcept {
if (error()) { return error(); }
return first.get_int64_t();
return first.get_int64();
}
really_inline simdjson_result<uint64_t> simdjson_result<dom::element>::get_uint64_t() const noexcept {
really_inline simdjson_result<uint64_t> simdjson_result<dom::element>::get_uint64() const noexcept {
if (error()) { return error(); }
return first.get_uint64_t();
return first.get_uint64();
}
really_inline simdjson_result<double> simdjson_result<dom::element>::get_double() const noexcept {
if (error()) { return error(); }
@ -5787,38 +5829,30 @@ really_inline simdjson_result<bool> simdjson_result<dom::element>::get_bool() co
return first.get_bool();
}
really_inline simdjson_result<bool> simdjson_result<dom::element>::is_array() const noexcept {
if (error()) { return error(); }
return first.is_array();
really_inline bool simdjson_result<dom::element>::is_array() const noexcept {
return !error() && first.is_array();
}
really_inline simdjson_result<bool> simdjson_result<dom::element>::is_object() const noexcept {
if (error()) { return error(); }
return first.is_object();
really_inline bool simdjson_result<dom::element>::is_object() const noexcept {
return !error() && first.is_object();
}
really_inline simdjson_result<bool> simdjson_result<dom::element>::is_string() const noexcept {
if (error()) { return error(); }
return first.is_string();
really_inline bool simdjson_result<dom::element>::is_string() const noexcept {
return !error() && first.is_string();
}
really_inline simdjson_result<bool> simdjson_result<dom::element>::is_int64_t() const noexcept {
if (error()) { return error(); }
return first.is_int64_t();
really_inline bool simdjson_result<dom::element>::is_int64() const noexcept {
return !error() && first.is_int64();
}
really_inline simdjson_result<bool> simdjson_result<dom::element>::is_uint64_t() const noexcept {
if (error()) { return error(); }
return first.is_uint64_t();
really_inline bool simdjson_result<dom::element>::is_uint64() const noexcept {
return !error() && first.is_uint64();
}
really_inline simdjson_result<bool> simdjson_result<dom::element>::is_double() const noexcept {
if (error()) { return error(); }
return first.is_double();
really_inline bool simdjson_result<dom::element>::is_double() const noexcept {
return !error() && first.is_double();
}
really_inline simdjson_result<bool> simdjson_result<dom::element>::is_bool() const noexcept {
if (error()) { return error(); }
return first.is_bool();
really_inline bool simdjson_result<dom::element>::is_bool() const noexcept {
return !error() && first.is_bool();
}
really_inline simdjson_result<bool> simdjson_result<dom::element>::is_null() const noexcept {
if (error()) { return error(); }
return first.is_null();
really_inline bool simdjson_result<dom::element>::is_null() const noexcept {
return !error() && first.is_null();
}
really_inline simdjson_result<dom::element> simdjson_result<dom::element>::operator[](const std::string_view &key) const noexcept {
@ -5922,7 +5956,7 @@ inline simdjson_result<std::string_view> element::get_string() const noexcept {
return INCORRECT_TYPE;
}
}
inline simdjson_result<uint64_t> element::get_uint64_t() const noexcept {
inline simdjson_result<uint64_t> element::get_uint64() const noexcept {
if(unlikely(!tape.is_uint64())) { // branch rarely taken
if(tape.is_int64()) {
int64_t result = tape.next_tape_value<int64_t>();
@ -5935,7 +5969,7 @@ inline simdjson_result<uint64_t> element::get_uint64_t() const noexcept {
}
return tape.next_tape_value<int64_t>();
}
inline simdjson_result<int64_t> element::get_int64_t() const noexcept {
inline simdjson_result<int64_t> element::get_int64() const noexcept {
if(unlikely(!tape.is_int64())) { // branch rarely taken
if(tape.is_uint64()) {
uint64_t result = tape.next_tape_value<uint64_t>();
@ -6008,16 +6042,16 @@ template<> inline simdjson_result<array> element::get<array>() const noexcept {
template<> inline simdjson_result<object> element::get<object>() const noexcept { return get_object(); }
template<> inline simdjson_result<const char *> element::get<const char *>() const noexcept { return get_c_str(); }
template<> inline simdjson_result<std::string_view> element::get<std::string_view>() const noexcept { return get_string(); }
template<> inline simdjson_result<int64_t> element::get<int64_t>() const noexcept { return get_int64_t(); }
template<> inline simdjson_result<uint64_t> element::get<uint64_t>() const noexcept { return get_uint64_t(); }
template<> inline simdjson_result<int64_t> element::get<int64_t>() const noexcept { return get_int64(); }
template<> inline simdjson_result<uint64_t> element::get<uint64_t>() const noexcept { return get_uint64(); }
template<> inline simdjson_result<double> element::get<double>() const noexcept { return get_double(); }
template<> inline simdjson_result<bool> element::get<bool>() const noexcept { return get_bool(); }
inline bool element::is_array() const noexcept { return is<array>(); }
inline bool element::is_object() const noexcept { return is<object>(); }
inline bool element::is_string() const noexcept { return is<std::string_view>(); }
inline bool element::is_int64_t() const noexcept { return is<int64_t>(); }
inline bool element::is_uint64_t() const noexcept { return is<uint64_t>(); }
inline bool element::is_int64() const noexcept { return is<int64_t>(); }
inline bool element::is_uint64() const noexcept { return is<uint64_t>(); }
inline bool element::is_double() const noexcept { return is<double>(); }
inline bool element::is_bool() const noexcept { return is<bool>(); }