This massively improves the performance of tight loops relying on a type() call. (#721)

* This massively improves the performance of tight loops relying on a type() call.

* Adding a few more benchmarks
This commit is contained in:
Daniel Lemire 2020-04-15 20:45:40 -04:00 committed by GitHub
parent 326c175dcb
commit befa6423be
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 55 additions and 35 deletions

View File

@ -202,8 +202,8 @@ static void numbers_exceptions_size_scan(State& state) {
std::vector<double> container; std::vector<double> container;
container.resize(arr.size()); container.resize(arr.size());
size_t pos = 0; size_t pos = 0;
for (double x : arr) { for (auto e : arr) {
container[pos++] = x; container[pos++] = double(e);
} }
if(pos != container.size()) { cerr << "bad count" << endl; } if(pos != container.size()) { cerr << "bad count" << endl; }
benchmark::DoNotOptimize(container.data()); benchmark::DoNotOptimize(container.data());
@ -213,6 +213,48 @@ static void numbers_exceptions_size_scan(State& state) {
BENCHMARK(numbers_exceptions_size_scan); BENCHMARK(numbers_exceptions_size_scan);
static void numbers_type_exceptions_scan(State& state) {
// Prints the number of results in twitter.json
dom::parser parser;
dom::array arr = parser.load(NUMBERS_JSON);
for (auto _ : state) {
std::vector<double> container;
for (auto e : arr) {
dom::element_type actual_type = e.type();
if(actual_type != dom::element_type::DOUBLE) {
cerr << "found a node that is not an number?" << endl; break;
}
container.push_back(double(e));
}
benchmark::DoNotOptimize(container.data());
benchmark::ClobberMemory();
}
}
BENCHMARK(numbers_type_exceptions_scan);
static void numbers_type_exceptions_size_scan(State& state) {
// Prints the number of results in twitter.json
dom::parser parser;
dom::array arr = parser.load(NUMBERS_JSON);
for (auto _ : state) {
std::vector<double> container;
container.resize(arr.size());
size_t pos = 0;
for (auto e : arr) {
dom::element_type actual_type = e.type();
if(actual_type != dom::element_type::DOUBLE) {
cerr << "found a node that is not an number?" << endl; break;
}
container[pos++] = double(e);
}
if(pos != container.size()) { cerr << "bad count" << endl; }
benchmark::DoNotOptimize(container.data());
benchmark::ClobberMemory();
}
}
BENCHMARK(numbers_type_exceptions_size_scan);
static void numbers_exceptions_load_scan(State& state) { static void numbers_exceptions_load_scan(State& state) {
// Prints the number of results in twitter.json // Prints the number of results in twitter.json
dom::parser parser; dom::parser parser;

View File

@ -44,7 +44,7 @@ constexpr const uint64_t JSON_VALUE_MASK = 0x00FFFFFFFFFFFFFF;
constexpr const uint32_t JSON_COUNT_MASK = 0xFFFFFF; constexpr const uint32_t JSON_COUNT_MASK = 0xFFFFFF;
/** /**
* The possible types in the tape. Internal only. * The possible types in the tape.
*/ */
enum class tape_type { enum class tape_type {
ROOT = 'r', ROOT = 'r',
@ -99,14 +99,14 @@ namespace simdjson::dom {
* This is the type it is most easily cast to with get<>. * This is the type it is most easily cast to with get<>.
*/ */
enum class element_type { enum class element_type {
ARRAY, ///< dom::array ARRAY = '[', ///< dom::array
OBJECT, ///< dom::object OBJECT = '{', ///< dom::object
INT64, ///< int64_t INT64 = 'l', ///< int64_t
UINT64, ///< uint64_t: any integer that fits in uint64_t but *not* int64_t UINT64 = 'u', ///< uint64_t: any integer that fits in uint64_t but *not* int64_t
DOUBLE, ///< double: Any number with a "." or "e" that fits in double. DOUBLE = 'd', ///< double: Any number with a "." or "e" that fits in double.
STRING, ///< std::string_view STRING = '"', ///< std::string_view
BOOL, ///< bool BOOL = 't', ///< bool
NULL_VALUE ///< null NULL_VALUE = 'n' ///< null
}; };
/** /**

View File

@ -738,30 +738,8 @@ really_inline element::element(const document *_doc, size_t _json_index) noexcep
inline element_type element::type() const noexcept { inline element_type element::type() const noexcept {
switch (tape_ref_type()) { auto tape_type = tape_ref_type();
case internal::tape_type::START_ARRAY: return tape_type == internal::tape_type::FALSE_VALUE ? element_type::BOOL : static_cast<element_type>(tape_type);
return element_type::ARRAY;
case internal::tape_type::START_OBJECT:
return element_type::OBJECT;
case internal::tape_type::INT64:
return element_type::INT64;
case internal::tape_type::UINT64:
return element_type::UINT64;
case internal::tape_type::DOUBLE:
return element_type::DOUBLE;
case internal::tape_type::STRING:
return element_type::STRING;
case internal::tape_type::TRUE_VALUE:
case internal::tape_type::FALSE_VALUE:
return element_type::BOOL;
case internal::tape_type::NULL_VALUE:
return element_type::NULL_VALUE;
case internal::tape_type::ROOT:
case internal::tape_type::END_ARRAY:
case internal::tape_type::END_OBJECT:
default:
abort();
}
} }
really_inline bool element::is_null() const noexcept { really_inline bool element::is_null() const noexcept {
return is_null_on_tape(); return is_null_on_tape();