The new number parsing code is faster.
This commit is contained in:
parent
57b840327f
commit
4ee515fa4b
|
@ -4,20 +4,6 @@
|
|||
#include "jsonparser/jsoncharutils.h"
|
||||
#include "jsonparser/simdjson_internal.h"
|
||||
|
||||
// does not validation whatsoever, assumes that all digit
|
||||
// this is CS 101
|
||||
inline u64 naivestrtoll(const char *p, const char *end) {
|
||||
if (p == end)
|
||||
return 0; // should be an error?
|
||||
// this code could get a whole lot smarter if we have many long ints:
|
||||
u64 x = *p - '0';
|
||||
p++;
|
||||
for (; p < end; p++) {
|
||||
x = (x * 10) + (*p - '0');
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
static const double power_of_ten[] = {
|
||||
1e-308, 1e-307, 1e-306, 1e-305, 1e-304, 1e-303, 1e-302, 1e-301, 1e-300,
|
||||
1e-299, 1e-298, 1e-297, 1e-296, 1e-295, 1e-294, 1e-293, 1e-292, 1e-291,
|
||||
|
@ -89,10 +75,6 @@ static const double power_of_ten[] = {
|
|||
1e295, 1e296, 1e297, 1e298, 1e299, 1e300, 1e301, 1e302, 1e303,
|
||||
1e304, 1e305, 1e306, 1e307, 1e308};
|
||||
|
||||
//#define SIMPLENUMBERPARSING
|
||||
|
||||
#ifdef SIMPLENUMBERPARSING
|
||||
|
||||
static inline bool is_integer(char c) { return (c >= '0' && c <= '9'); }
|
||||
|
||||
const bool structural_or_whitespace_or_exponent_or_decimal_negated[256] = {
|
||||
|
@ -115,6 +97,8 @@ is_not_structural_or_whitespace_or_exponent_or_decimal(unsigned char c) {
|
|||
return structural_or_whitespace_or_exponent_or_decimal_negated[c];
|
||||
}
|
||||
|
||||
// parse the number at buf + offset
|
||||
// define JSON_TEST_NUMBERS for unit testing
|
||||
static really_inline bool parse_number(const u8 *const buf, UNUSED size_t len,
|
||||
ParsedJson &pj, const u32 depth,
|
||||
const u32 offset, UNUSED bool found_zero,
|
||||
|
@ -229,244 +213,4 @@ static really_inline bool parse_number(const u8 *const buf, UNUSED size_t len,
|
|||
return true;
|
||||
}
|
||||
|
||||
#else
|
||||
// parse the number at buf + offset
|
||||
// define JSON_TEST_NUMBERS for unit testing
|
||||
static really_inline bool parse_number(const u8 *const buf, UNUSED size_t len,
|
||||
ParsedJson &pj, const u32 depth,
|
||||
const u32 offset, bool found_zero,
|
||||
bool found_minus) {
|
||||
const u8 *src = &buf[offset];
|
||||
if (found_minus) {
|
||||
src++;
|
||||
found_zero = (buf[offset] == '0');
|
||||
}
|
||||
|
||||
// this can read past the string content, so we need to have overallocated
|
||||
m256 v = _mm256_loadu_si256((const m256 *)(src));
|
||||
u64 error_sump = 0;
|
||||
|
||||
// categories to extract
|
||||
// Digits:
|
||||
// 0 (0x30) - bucket 0
|
||||
// 1-9 (never any distinction except if we didn't get the free kick at 0 due
|
||||
// to the leading minus) (0x31-0x39) - bucket 1
|
||||
// . (0x2e) - bucket 2
|
||||
// E or e - no distinction (0x45/0x65) - bucket 3
|
||||
// + (0x2b) - bucket 4
|
||||
// - (0x2d) - bucket 4
|
||||
// Terminators
|
||||
// Whitespace: 0x20, 0x09, 0x0a, 0x0d - bucket 5+6
|
||||
// Comma and the closes: 0x2c is comma, } is 0x5d, ] is 0x7d - bucket 5+7
|
||||
// Geoff suggests that this is not ideal, but it seems to work well enough.
|
||||
const m256 low_nibble_mask = _mm256_setr_epi8(
|
||||
// 0 1 2 3 4 5 6 7 8 9 a b c d e f
|
||||
33, 2, 2, 2, 2, 10, 2, 2, 2, 66, 64, 16, 32, 0xd0, 4, 0, 33, 2, 2, 2, 2,
|
||||
10, 2, 2, 2, 66, 64, 16, 32, 0xd0, 4, 0);
|
||||
const m256 high_nibble_mask = _mm256_setr_epi8(
|
||||
// 0 1 2 3 4 5 6 7 8 9 a b c d e f
|
||||
64, 0, 52, 3, 8, -128, 8, 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 52, 3, 8,
|
||||
-128, 8, 0x80, 0, 0, 0, 0, 0, 0, 0, 0);
|
||||
|
||||
m256 tmp = _mm256_and_si256(
|
||||
_mm256_shuffle_epi8(low_nibble_mask, v),
|
||||
_mm256_shuffle_epi8(
|
||||
high_nibble_mask,
|
||||
_mm256_and_si256(_mm256_srli_epi32(v, 4), _mm256_set1_epi8(0x7f))));
|
||||
|
||||
m256 enders_mask = _mm256_set1_epi8(0xe0);
|
||||
m256 tmp_enders = _mm256_cmpeq_epi8(_mm256_and_si256(tmp, enders_mask),
|
||||
_mm256_set1_epi8(0));
|
||||
u32 enders = ~(u32)_mm256_movemask_epi8(tmp_enders);
|
||||
if (enders == 0) {
|
||||
// if there are no ender characters then we are using more than 31 bytes for
|
||||
// the number, and we can safely assume that it is a floating-point numbers
|
||||
// hopefully, this is uncommon and we can fall back on the standard API
|
||||
char *end;
|
||||
double result = strtod((const char *)src, &end);
|
||||
if ((errno != 0) || (end == (const char *)src) ||
|
||||
is_not_structural_or_whitespace(*end)) {
|
||||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
foundInvalidNumber(buf + offset);
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
if (found_minus) {
|
||||
result = -result;
|
||||
}
|
||||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
foundFloat(result, buf + offset);
|
||||
#endif
|
||||
pj.write_tape_double(depth, result);
|
||||
return true;
|
||||
}
|
||||
///////////
|
||||
// From this point forward, we know that the
|
||||
// the number fits in 31 bytes.
|
||||
///////////
|
||||
|
||||
// number_mask captures everything before the first ender
|
||||
u32 number_mask = ~enders & (enders - 1);
|
||||
// let us identify just the digits 0-9
|
||||
m256 d_mask = _mm256_set1_epi8(0x03);
|
||||
m256 tmp_d =
|
||||
_mm256_cmpeq_epi8(_mm256_and_si256(tmp, d_mask), _mm256_set1_epi8(0));
|
||||
u32 digit_characters = ~(u32)_mm256_movemask_epi8(tmp_d);
|
||||
digit_characters &= number_mask;
|
||||
// integers are probably common, so let us do them right away
|
||||
if (number_mask == digit_characters) {
|
||||
int stringlength = __builtin_ctz(~digit_characters);
|
||||
const char *end = (const char *)src + stringlength;
|
||||
u64 result = naivestrtoll((const char *)src, end);
|
||||
if (found_minus) {
|
||||
result = -result;
|
||||
}
|
||||
// it is valid as long as it does not start with zero!
|
||||
// or just 0, whether -0 is allowed is debatable?
|
||||
bool isvalid = !((found_zero) && (stringlength > 1));
|
||||
|
||||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
if (isvalid) {
|
||||
foundInteger(result, buf + offset);
|
||||
} else {
|
||||
foundInvalidNumber(buf + offset);
|
||||
}
|
||||
#endif
|
||||
pj.write_tape_s64(depth, result);
|
||||
// it is valid as long as it does not start with zero!
|
||||
// or just 0, whether -0 is allowed is debatable?
|
||||
return isvalid;
|
||||
}
|
||||
|
||||
m256 n_mask = _mm256_set1_epi8(0x1f);
|
||||
m256 tmp_n =
|
||||
_mm256_cmpeq_epi8(_mm256_and_si256(tmp, n_mask), _mm256_set1_epi8(0));
|
||||
u32 number_characters = ~(u32)_mm256_movemask_epi8(tmp_n);
|
||||
|
||||
// put something into our error sump if we have something
|
||||
// before our ending characters that isn't a valid character
|
||||
// for the inside of our JSON
|
||||
number_characters &= number_mask;
|
||||
error_sump |= number_characters ^ number_mask;
|
||||
|
||||
// we can now assume that all of the content made of relevant characters
|
||||
|
||||
m256 p_mask = _mm256_set1_epi8(0x04);
|
||||
m256 tmp_p =
|
||||
_mm256_cmpeq_epi8(_mm256_and_si256(tmp, p_mask), _mm256_set1_epi8(0));
|
||||
u32 decimal_characters = ~(u32)_mm256_movemask_epi8(tmp_p);
|
||||
decimal_characters &= number_mask;
|
||||
|
||||
// the decimal character must be unique or absent
|
||||
// we might have 1e10, 0.1, ...
|
||||
error_sump |= ((decimal_characters) & (decimal_characters - 1));
|
||||
|
||||
// detect the exponential characters
|
||||
m256 e_mask = _mm256_set1_epi8(0x08);
|
||||
m256 tmp_e =
|
||||
_mm256_cmpeq_epi8(_mm256_and_si256(tmp, e_mask), _mm256_set1_epi8(0));
|
||||
u32 exponent_characters = ~(u32)_mm256_movemask_epi8(tmp_e);
|
||||
exponent_characters &= number_mask;
|
||||
|
||||
// the exponent character must be unique or absent
|
||||
error_sump |= ((exponent_characters) & (exponent_characters - 1));
|
||||
|
||||
// if they exist the exponent character must follow the decimal_characters
|
||||
// character
|
||||
error_sump |=
|
||||
((exponent_characters - 1) & decimal_characters) ^ decimal_characters;
|
||||
|
||||
// if the zero character is in first position, it
|
||||
// needs to be followed by the decimal
|
||||
error_sump |= found_zero ^ ((decimal_characters >> 1) & found_zero);
|
||||
|
||||
// let us detect the sign characters
|
||||
m256 s_mask = _mm256_set1_epi8(0x10);
|
||||
m256 tmp_s =
|
||||
_mm256_cmpeq_epi8(_mm256_and_si256(tmp, s_mask), _mm256_set1_epi8(0));
|
||||
|
||||
u32 sign_characters = ~(u32)_mm256_movemask_epi8(tmp_s);
|
||||
sign_characters &= number_mask;
|
||||
|
||||
// any sign character must be followed by a digit
|
||||
error_sump |= (~(digit_characters >> 1)) & sign_characters;
|
||||
|
||||
// there is at most one sign character
|
||||
error_sump |= ((sign_characters) & (sign_characters - 1));
|
||||
|
||||
// the exponent must be followed by either a sign character or a digit
|
||||
error_sump |=
|
||||
(~((digit_characters | sign_characters) >> 1)) & exponent_characters;
|
||||
|
||||
if (error_sump != 0) {
|
||||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
foundInvalidNumber(buf + offset);
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
// so we have a nice float-point at this time
|
||||
|
||||
const char *p = (const char *)src;
|
||||
// we start with digits followed by "." or "e" or "E".
|
||||
// scan them
|
||||
int integerpartlength =
|
||||
__builtin_ctz(exponent_characters | decimal_characters);
|
||||
const char *endjustinteger = p + integerpartlength;
|
||||
uint64_t integerpart = *p - '0'; // there must be at least one digit
|
||||
p++;
|
||||
for (; p != endjustinteger; p++) {
|
||||
integerpart = (integerpart * 10) + (*p - '0');
|
||||
}
|
||||
double result = integerpart;
|
||||
if (decimal_characters != 0) {
|
||||
p++;
|
||||
int mantissalength = __builtin_ctz(exponent_characters | enders);
|
||||
const char *endmantissa = (const char *)src + mantissalength;
|
||||
int fracdigitcount = endmantissa - p; // could be zero!
|
||||
uint64_t fractionalpart = 0; // there could be nothing
|
||||
for (; p != endmantissa; p++) {
|
||||
fractionalpart = (fractionalpart * 10) + (*p - '0');
|
||||
}
|
||||
result += fractionalpart * power_of_ten[308 - fracdigitcount];
|
||||
}
|
||||
if (exponent_characters != 0) {
|
||||
p++; // skip exponent
|
||||
int sign = +1;
|
||||
if (p[0] == '+')
|
||||
p++;
|
||||
if (p[0] == '-') {
|
||||
p++;
|
||||
sign = -1;
|
||||
}
|
||||
int stringlength = __builtin_ctz(~number_mask);
|
||||
const char *endnumber = (const char *)src + stringlength;
|
||||
|
||||
uint64_t exppart = *p - '0';
|
||||
p++;
|
||||
for (; p != endnumber; p++) {
|
||||
exppart = (exppart * 10) + (*p - '0');
|
||||
}
|
||||
if (exppart > 308) {
|
||||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
foundInvalidNumber(buf + offset);
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
// betting that these branches are highly predictible
|
||||
// could use arithmetic instead
|
||||
if (sign == 1) {
|
||||
result = result * power_of_ten[308 + exppart];
|
||||
} else {
|
||||
result = result * power_of_ten[308 - exppart];
|
||||
}
|
||||
}
|
||||
if (found_minus) {
|
||||
result = -result;
|
||||
}
|
||||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
foundFloat(result, buf + offset);
|
||||
#endif
|
||||
pj.write_tape_double(depth, result);
|
||||
return true;
|
||||
}
|
||||
#endif
|
Loading…
Reference in New Issue