The new number parsing code is faster.

This commit is contained in:
Daniel Lemire 2018-09-28 14:45:34 -04:00
parent 57b840327f
commit 4ee515fa4b
1 changed files with 2 additions and 258 deletions

View File

@ -4,20 +4,6 @@
#include "jsonparser/jsoncharutils.h"
#include "jsonparser/simdjson_internal.h"
// does not validation whatsoever, assumes that all digit
// this is CS 101
inline u64 naivestrtoll(const char *p, const char *end) {
if (p == end)
return 0; // should be an error?
// this code could get a whole lot smarter if we have many long ints:
u64 x = *p - '0';
p++;
for (; p < end; p++) {
x = (x * 10) + (*p - '0');
}
return x;
}
static const double power_of_ten[] = {
1e-308, 1e-307, 1e-306, 1e-305, 1e-304, 1e-303, 1e-302, 1e-301, 1e-300,
1e-299, 1e-298, 1e-297, 1e-296, 1e-295, 1e-294, 1e-293, 1e-292, 1e-291,
@ -89,10 +75,6 @@ static const double power_of_ten[] = {
1e295, 1e296, 1e297, 1e298, 1e299, 1e300, 1e301, 1e302, 1e303,
1e304, 1e305, 1e306, 1e307, 1e308};
//#define SIMPLENUMBERPARSING
#ifdef SIMPLENUMBERPARSING
static inline bool is_integer(char c) { return (c >= '0' && c <= '9'); }
const bool structural_or_whitespace_or_exponent_or_decimal_negated[256] = {
@ -115,6 +97,8 @@ is_not_structural_or_whitespace_or_exponent_or_decimal(unsigned char c) {
return structural_or_whitespace_or_exponent_or_decimal_negated[c];
}
// parse the number at buf + offset
// define JSON_TEST_NUMBERS for unit testing
static really_inline bool parse_number(const u8 *const buf, UNUSED size_t len,
ParsedJson &pj, const u32 depth,
const u32 offset, UNUSED bool found_zero,
@ -229,244 +213,4 @@ static really_inline bool parse_number(const u8 *const buf, UNUSED size_t len,
return true;
}
#else
// parse the number at buf + offset
// define JSON_TEST_NUMBERS for unit testing
static really_inline bool parse_number(const u8 *const buf, UNUSED size_t len,
ParsedJson &pj, const u32 depth,
const u32 offset, bool found_zero,
bool found_minus) {
const u8 *src = &buf[offset];
if (found_minus) {
src++;
found_zero = (buf[offset] == '0');
}
// this can read past the string content, so we need to have overallocated
m256 v = _mm256_loadu_si256((const m256 *)(src));
u64 error_sump = 0;
// categories to extract
// Digits:
// 0 (0x30) - bucket 0
// 1-9 (never any distinction except if we didn't get the free kick at 0 due
// to the leading minus) (0x31-0x39) - bucket 1
// . (0x2e) - bucket 2
// E or e - no distinction (0x45/0x65) - bucket 3
// + (0x2b) - bucket 4
// - (0x2d) - bucket 4
// Terminators
// Whitespace: 0x20, 0x09, 0x0a, 0x0d - bucket 5+6
// Comma and the closes: 0x2c is comma, } is 0x5d, ] is 0x7d - bucket 5+7
// Geoff suggests that this is not ideal, but it seems to work well enough.
const m256 low_nibble_mask = _mm256_setr_epi8(
// 0 1 2 3 4 5 6 7 8 9 a b c d e f
33, 2, 2, 2, 2, 10, 2, 2, 2, 66, 64, 16, 32, 0xd0, 4, 0, 33, 2, 2, 2, 2,
10, 2, 2, 2, 66, 64, 16, 32, 0xd0, 4, 0);
const m256 high_nibble_mask = _mm256_setr_epi8(
// 0 1 2 3 4 5 6 7 8 9 a b c d e f
64, 0, 52, 3, 8, -128, 8, 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 52, 3, 8,
-128, 8, 0x80, 0, 0, 0, 0, 0, 0, 0, 0);
m256 tmp = _mm256_and_si256(
_mm256_shuffle_epi8(low_nibble_mask, v),
_mm256_shuffle_epi8(
high_nibble_mask,
_mm256_and_si256(_mm256_srli_epi32(v, 4), _mm256_set1_epi8(0x7f))));
m256 enders_mask = _mm256_set1_epi8(0xe0);
m256 tmp_enders = _mm256_cmpeq_epi8(_mm256_and_si256(tmp, enders_mask),
_mm256_set1_epi8(0));
u32 enders = ~(u32)_mm256_movemask_epi8(tmp_enders);
if (enders == 0) {
// if there are no ender characters then we are using more than 31 bytes for
// the number, and we can safely assume that it is a floating-point numbers
// hopefully, this is uncommon and we can fall back on the standard API
char *end;
double result = strtod((const char *)src, &end);
if ((errno != 0) || (end == (const char *)src) ||
is_not_structural_or_whitespace(*end)) {
#ifdef JSON_TEST_NUMBERS // for unit testing
foundInvalidNumber(buf + offset);
#endif
return false;
}
if (found_minus) {
result = -result;
}
#ifdef JSON_TEST_NUMBERS // for unit testing
foundFloat(result, buf + offset);
#endif
pj.write_tape_double(depth, result);
return true;
}
///////////
// From this point forward, we know that the
// the number fits in 31 bytes.
///////////
// number_mask captures everything before the first ender
u32 number_mask = ~enders & (enders - 1);
// let us identify just the digits 0-9
m256 d_mask = _mm256_set1_epi8(0x03);
m256 tmp_d =
_mm256_cmpeq_epi8(_mm256_and_si256(tmp, d_mask), _mm256_set1_epi8(0));
u32 digit_characters = ~(u32)_mm256_movemask_epi8(tmp_d);
digit_characters &= number_mask;
// integers are probably common, so let us do them right away
if (number_mask == digit_characters) {
int stringlength = __builtin_ctz(~digit_characters);
const char *end = (const char *)src + stringlength;
u64 result = naivestrtoll((const char *)src, end);
if (found_minus) {
result = -result;
}
// it is valid as long as it does not start with zero!
// or just 0, whether -0 is allowed is debatable?
bool isvalid = !((found_zero) && (stringlength > 1));
#ifdef JSON_TEST_NUMBERS // for unit testing
if (isvalid) {
foundInteger(result, buf + offset);
} else {
foundInvalidNumber(buf + offset);
}
#endif
pj.write_tape_s64(depth, result);
// it is valid as long as it does not start with zero!
// or just 0, whether -0 is allowed is debatable?
return isvalid;
}
m256 n_mask = _mm256_set1_epi8(0x1f);
m256 tmp_n =
_mm256_cmpeq_epi8(_mm256_and_si256(tmp, n_mask), _mm256_set1_epi8(0));
u32 number_characters = ~(u32)_mm256_movemask_epi8(tmp_n);
// put something into our error sump if we have something
// before our ending characters that isn't a valid character
// for the inside of our JSON
number_characters &= number_mask;
error_sump |= number_characters ^ number_mask;
// we can now assume that all of the content made of relevant characters
m256 p_mask = _mm256_set1_epi8(0x04);
m256 tmp_p =
_mm256_cmpeq_epi8(_mm256_and_si256(tmp, p_mask), _mm256_set1_epi8(0));
u32 decimal_characters = ~(u32)_mm256_movemask_epi8(tmp_p);
decimal_characters &= number_mask;
// the decimal character must be unique or absent
// we might have 1e10, 0.1, ...
error_sump |= ((decimal_characters) & (decimal_characters - 1));
// detect the exponential characters
m256 e_mask = _mm256_set1_epi8(0x08);
m256 tmp_e =
_mm256_cmpeq_epi8(_mm256_and_si256(tmp, e_mask), _mm256_set1_epi8(0));
u32 exponent_characters = ~(u32)_mm256_movemask_epi8(tmp_e);
exponent_characters &= number_mask;
// the exponent character must be unique or absent
error_sump |= ((exponent_characters) & (exponent_characters - 1));
// if they exist the exponent character must follow the decimal_characters
// character
error_sump |=
((exponent_characters - 1) & decimal_characters) ^ decimal_characters;
// if the zero character is in first position, it
// needs to be followed by the decimal
error_sump |= found_zero ^ ((decimal_characters >> 1) & found_zero);
// let us detect the sign characters
m256 s_mask = _mm256_set1_epi8(0x10);
m256 tmp_s =
_mm256_cmpeq_epi8(_mm256_and_si256(tmp, s_mask), _mm256_set1_epi8(0));
u32 sign_characters = ~(u32)_mm256_movemask_epi8(tmp_s);
sign_characters &= number_mask;
// any sign character must be followed by a digit
error_sump |= (~(digit_characters >> 1)) & sign_characters;
// there is at most one sign character
error_sump |= ((sign_characters) & (sign_characters - 1));
// the exponent must be followed by either a sign character or a digit
error_sump |=
(~((digit_characters | sign_characters) >> 1)) & exponent_characters;
if (error_sump != 0) {
#ifdef JSON_TEST_NUMBERS // for unit testing
foundInvalidNumber(buf + offset);
#endif
return false;
}
// so we have a nice float-point at this time
const char *p = (const char *)src;
// we start with digits followed by "." or "e" or "E".
// scan them
int integerpartlength =
__builtin_ctz(exponent_characters | decimal_characters);
const char *endjustinteger = p + integerpartlength;
uint64_t integerpart = *p - '0'; // there must be at least one digit
p++;
for (; p != endjustinteger; p++) {
integerpart = (integerpart * 10) + (*p - '0');
}
double result = integerpart;
if (decimal_characters != 0) {
p++;
int mantissalength = __builtin_ctz(exponent_characters | enders);
const char *endmantissa = (const char *)src + mantissalength;
int fracdigitcount = endmantissa - p; // could be zero!
uint64_t fractionalpart = 0; // there could be nothing
for (; p != endmantissa; p++) {
fractionalpart = (fractionalpart * 10) + (*p - '0');
}
result += fractionalpart * power_of_ten[308 - fracdigitcount];
}
if (exponent_characters != 0) {
p++; // skip exponent
int sign = +1;
if (p[0] == '+')
p++;
if (p[0] == '-') {
p++;
sign = -1;
}
int stringlength = __builtin_ctz(~number_mask);
const char *endnumber = (const char *)src + stringlength;
uint64_t exppart = *p - '0';
p++;
for (; p != endnumber; p++) {
exppart = (exppart * 10) + (*p - '0');
}
if (exppart > 308) {
#ifdef JSON_TEST_NUMBERS // for unit testing
foundInvalidNumber(buf + offset);
#endif
return false;
}
// betting that these branches are highly predictible
// could use arithmetic instead
if (sign == 1) {
result = result * power_of_ten[308 + exppart];
} else {
result = result * power_of_ten[308 - exppart];
}
}
if (found_minus) {
result = -result;
}
#ifdef JSON_TEST_NUMBERS // for unit testing
foundFloat(result, buf + offset);
#endif
pj.write_tape_double(depth, result);
return true;
}
#endif