This is implementing @jkeiser optimization idea. (#431)

This commit is contained in:
Daniel Lemire 2020-01-03 09:21:36 -05:00 committed by GitHub
parent 3b9e6bff3c
commit 5042dd52ce
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 22 additions and 32 deletions

View File

@ -10,56 +10,46 @@ using namespace simdjson;
WARN_UNUSED
really_inline bool is_valid_true_atom(const uint8_t *loc) {
uint64_t tv = *reinterpret_cast<const uint64_t *>("true ");
uint64_t mask4 = 0x00000000ffffffff;
uint32_t tv = *reinterpret_cast<const uint32_t *>("true");
uint32_t error = 0;
uint64_t
uint32_t
locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
// this can read up to 7 bytes beyond the buffer size, but we require
// this can read up to 3 bytes beyond the buffer size, but we require
// SIMDJSON_PADDING of padding
static_assert(sizeof(uint64_t) - 1 <= SIMDJSON_PADDING);
std::memcpy(&locval, loc, sizeof(uint64_t));
error = (locval & mask4) ^ tv;
static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING);
std::memcpy(&locval, loc, sizeof(uint32_t));
error = locval ^ tv;
error |= is_not_structural_or_whitespace(loc[4]);
return error == 0;
}
WARN_UNUSED
really_inline bool is_valid_false_atom(const uint8_t *loc) {
// We have to use an integer constant because the space in the cast
// below would lead to values illegally being qualified
// uint64_t fv = *reinterpret_cast<const uint64_t *>("false ");
// using this constant (that is the same false) but nulls out the
// unused bits solves that
uint64_t fv = 0x00000065736c6166; // takes into account endianness
uint64_t mask5 = 0x000000ffffffffff;
// we can't use the 32 bit value for checking for errors otherwise
// the last character of false (it being 5 byte long!) would be
// ignored
uint64_t error = 0;
uint64_t
locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
// this can read up to 7 bytes beyond the buffer size, but we require
// assume that loc starts with "f"
uint32_t fv = *reinterpret_cast<const uint32_t *>("alse");
uint32_t error = 0;
uint32_t
locval; // we want to avoid unaligned 32-bit loads (undefined in C/C++)
// this can read up to 4 bytes beyond the buffer size, but we require
// SIMDJSON_PADDING of padding
static_assert(sizeof(uint64_t) - 1 <= SIMDJSON_PADDING);
std::memcpy(&locval, loc, sizeof(uint64_t));
error = (locval & mask5) ^ fv;
static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING);
std::memcpy(&locval, loc + 1, sizeof(uint32_t));
error = locval ^ fv;
error |= is_not_structural_or_whitespace(loc[5]);
return error == 0;
}
WARN_UNUSED
really_inline bool is_valid_null_atom(const uint8_t *loc) {
uint64_t nv = *reinterpret_cast<const uint64_t *>("null ");
uint64_t mask4 = 0x00000000ffffffff;
uint32_t nv = *reinterpret_cast<const uint32_t *>("null");
uint32_t error = 0;
uint64_t
locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
// this can read up to 7 bytes beyond the buffer size, but we require
uint32_t
locval; // we want to avoid unaligned 32-bit loads (undefined in C/C++)
// this can read up to 2 bytes beyond the buffer size, but we require
// SIMDJSON_PADDING of padding
static_assert(sizeof(uint64_t) - 1 <= SIMDJSON_PADDING);
std::memcpy(&locval, loc, sizeof(uint64_t));
error = (locval & mask4) ^ nv;
static_assert(sizeof(uint32_t) - 1 <= SIMDJSON_PADDING);
std::memcpy(&locval, loc, sizeof(uint32_t));
error = locval ^ nv;
error |= is_not_structural_or_whitespace(loc[4]);
return error == 0;
}