Massive performance boost for get<double>. (#719)

* Massive performance boost for get<double>.
This commit is contained in:
Daniel Lemire 2020-04-15 20:09:45 -04:00 committed by GitHub
parent 6d7c77ddc1
commit 326c175dcb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 131 additions and 33 deletions

View File

@ -65,6 +65,63 @@ static void numbers_size_scan(State& state) {
BENCHMARK(numbers_size_scan);
static void numbers_type_scan(State& state) {
// Prints the number of results in twitter.json
dom::parser parser;
dom::array arr;
simdjson::error_code error;
parser.load(NUMBERS_JSON).get<dom::array>().tie(arr, error);
if(error) {
cerr << "could not read " << NUMBERS_JSON << " as an array" << endl;
return;
}
for (auto _ : state) {
std::vector<double> container;
for (auto e : arr) {
dom::element_type actual_type = e.type();
if(actual_type != dom::element_type::DOUBLE) {
cerr << "found a node that is not an number?" << endl; break;
}
double x;
e.get<double>().tie(x,error);
container.push_back(x);
}
benchmark::DoNotOptimize(container.data());
benchmark::ClobberMemory();
}
}
BENCHMARK(numbers_type_scan);
static void numbers_type_size_scan(State& state) {
// Prints the number of results in twitter.json
dom::parser parser;
dom::array arr;
simdjson::error_code error;
parser.load(NUMBERS_JSON).get<dom::array>().tie(arr, error);
if(error) {
cerr << "could not read " << NUMBERS_JSON << " as an array" << endl;
return;
}
for (auto _ : state) {
std::vector<double> container;
container.resize(arr.size());
size_t pos = 0;
for (auto e : arr) {
dom::element_type actual_type = e.type();
if(actual_type != dom::element_type::DOUBLE) {
cerr << "found a node that is not an number?" << endl; break;
}
double x;
e.get<double>().tie(x,error);
container[pos++] = x;
}
if(pos != container.size()) { cerr << "bad count" << endl; }
benchmark::DoNotOptimize(container.data());
benchmark::ClobberMemory();
}
}
BENCHMARK(numbers_type_size_scan);
static void numbers_load_scan(State& state) {
// Prints the number of results in twitter.json
dom::parser parser;

View File

@ -71,6 +71,12 @@ public:
inline size_t after_element() const noexcept;
really_inline tape_type tape_ref_type() const noexcept;
really_inline uint64_t tape_value() const noexcept;
really_inline bool is_double() const noexcept;
really_inline bool is_int64() const noexcept;
really_inline bool is_uint64() const noexcept;
really_inline bool is_false() const noexcept;
really_inline bool is_true() const noexcept;
really_inline bool is_null_on_tape() const noexcept;// different name to avoid clash with is_null.
really_inline uint32_t matching_brace_index() const noexcept;
really_inline uint32_t scope_count() const noexcept;
template<typename T>

View File

@ -736,6 +736,7 @@ inline key_value_pair::key_value_pair(const std::string_view &_key, element _val
really_inline element::element() noexcept : internal::tape_ref() {}
really_inline element::element(const document *_doc, size_t _json_index) noexcept : internal::tape_ref(_doc, _json_index) { }
inline element_type element::type() const noexcept {
switch (tape_ref_type()) {
case internal::tape_type::START_ARRAY:
@ -763,19 +764,17 @@ inline element_type element::type() const noexcept {
}
}
really_inline bool element::is_null() const noexcept {
return tape_ref_type() == internal::tape_type::NULL_VALUE;
return is_null_on_tape();
}
template<>
inline simdjson_result<bool> element::get<bool>() const noexcept {
switch (tape_ref_type()) {
case internal::tape_type::TRUE_VALUE:
return true;
case internal::tape_type::FALSE_VALUE:
return false;
default:
return INCORRECT_TYPE;
if(is_true()) {
return true;
} else if(is_false()) {
return false;
}
return INCORRECT_TYPE;
}
template<>
inline simdjson_result<const char *> element::get<const char *>() const noexcept {
@ -799,24 +798,22 @@ inline simdjson_result<std::string_view> element::get<std::string_view>() const
}
template<>
inline simdjson_result<uint64_t> element::get<uint64_t>() const noexcept {
switch (tape_ref_type()) {
case internal::tape_type::UINT64:
return next_tape_value<uint64_t>();
case internal::tape_type::INT64: {
if(unlikely(!is_uint64())) { // branch rarely taken
if(is_int64()) {
int64_t result = next_tape_value<int64_t>();
if (result < 0) {
return NUMBER_OUT_OF_RANGE;
}
return static_cast<uint64_t>(result);
}
default:
return INCORRECT_TYPE;
return INCORRECT_TYPE;
}
return next_tape_value<int64_t>();
}
template<>
inline simdjson_result<int64_t> element::get<int64_t>() const noexcept {
switch (tape_ref_type()) {
case internal::tape_type::UINT64: {
if(unlikely(!is_int64())) { // branch rarely taken
if(is_uint64()) {
uint64_t result = next_tape_value<uint64_t>();
// Wrapping max in parens to handle Windows issue: https://stackoverflow.com/questions/11544073/how-do-i-deal-with-the-max-macro-in-windows-h-colliding-with-max-in-std
if (result > (std::numeric_limits<int64_t>::max)()) {
@ -824,30 +821,31 @@ inline simdjson_result<int64_t> element::get<int64_t>() const noexcept {
}
return static_cast<int64_t>(result);
}
case internal::tape_type::INT64:
return next_tape_value<int64_t>();
default:
return INCORRECT_TYPE;
return INCORRECT_TYPE;
}
return next_tape_value<int64_t>();
}
template<>
inline simdjson_result<double> element::get<double>() const noexcept {
switch (tape_ref_type()) {
case internal::tape_type::UINT64:
// Performance considerations:
// 1. Querying tape_ref_type() implies doing a shift, it is fast to just do a straight
// comparison.
// 2. Using a switch-case relies on the compiler guessing what kind of code generation
// we want... But the compiler cannot know that we expect the type to be "double"
// most of the time.
// We can expect get<double> to refer to a double type almost all the time.
// It is important to craft the code accordingly so that the compiler can use this
// information. (This could also be solved with profile-guided optimization.)
if(unlikely(!is_double())) { // branch rarely taken
if(is_uint64()) {
return next_tape_value<uint64_t>();
case internal::tape_type::INT64: {
} else if(is_int64()) {
return next_tape_value<int64_t>();
int64_t result = tape_value();
if (result < 0) {
return NUMBER_OUT_OF_RANGE;
}
return double(result);
}
case internal::tape_type::DOUBLE:
return next_tape_value<double>();
default:
return INCORRECT_TYPE;
return INCORRECT_TYPE;
}
// this is common:
return next_tape_value<double>();
}
template<>
inline simdjson_result<array> element::get<array>() const noexcept {
@ -1116,6 +1114,37 @@ namespace simdjson::internal {
really_inline tape_ref::tape_ref() noexcept : doc{nullptr}, json_index{0} {}
really_inline tape_ref::tape_ref(const document *_doc, size_t _json_index) noexcept : doc{_doc}, json_index{_json_index} {}
// Some value types have a specific on-tape word value. It can be faster
// to check the type by doing a word-to-word comparison instead of extracting the
// most significant 8 bits.
really_inline bool tape_ref::is_double() const noexcept {
constexpr uint64_t tape_double = static_cast<uint64_t>(tape_type::DOUBLE)<<56;
return doc->tape[json_index] == tape_double;
}
really_inline bool tape_ref::is_int64() const noexcept {
constexpr uint64_t tape_int64 = static_cast<uint64_t>(tape_type::INT64)<<56;
return doc->tape[json_index] == tape_int64;
}
really_inline bool tape_ref::is_uint64() const noexcept {
constexpr uint64_t tape_uint64 = static_cast<uint64_t>(tape_type::UINT64)<<56;
return doc->tape[json_index] == tape_uint64;
}
really_inline bool tape_ref::is_false() const noexcept {
constexpr uint64_t tape_false = static_cast<uint64_t>(tape_type::FALSE_VALUE)<<56;
return doc->tape[json_index] == tape_false;
}
really_inline bool tape_ref::is_true() const noexcept {
constexpr uint64_t tape_true = static_cast<uint64_t>(tape_type::TRUE_VALUE)<<56;
return doc->tape[json_index] == tape_true;
}
really_inline bool tape_ref::is_null_on_tape() const noexcept {
constexpr uint64_t tape_null = static_cast<uint64_t>(tape_type::NULL_VALUE)<<56;
return doc->tape[json_index] == tape_null;
}
inline size_t tape_ref::after_element() const noexcept {
switch (tape_ref_type()) {
case tape_type::START_ARRAY:
@ -1145,7 +1174,13 @@ really_inline uint32_t internal::tape_ref::scope_count() const noexcept {
template<typename T>
really_inline T tape_ref::next_tape_value() const noexcept {
static_assert(sizeof(T) == sizeof(uint64_t));
return *reinterpret_cast<const T*>(&doc->tape[json_index + 1]);
// Though the following is tempting...
// return *reinterpret_cast<const T*>(&doc->tape[json_index + 1]);
// It is not generally safe. It is safer, and often faster to rely
// on memcpy. Yes, it is uglier, but it is also encapsulated.
T x;
memcpy(&x,&doc->tape[json_index + 1],sizeof(uint64_t));
return x;
}
inline std::string_view internal::tape_ref::get_string_view() const noexcept {
size_t string_buf_index = tape_value();