Encapsulate significant_digits()

This commit is contained in:
John Keiser 2020-08-12 14:54:41 -07:00
parent eb3e640003
commit 988c62baed
1 changed files with 31 additions and 30 deletions

View File

@ -8,14 +8,14 @@ namespace numberparsing {
#ifdef JSON_TEST_NUMBERS #ifdef JSON_TEST_NUMBERS
#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) #define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR)
#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), writer.append_s64((VALUE))) #define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE)))
#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), writer.append_u64((VALUE))) #define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE)))
#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), writer.append_double((VALUE))) #define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE)))
#else #else
#define INVALID_NUMBER(SRC) (NUMBER_ERROR) #define INVALID_NUMBER(SRC) (NUMBER_ERROR)
#define WRITE_INTEGER(VALUE, SRC, WRITER) writer.append_s64((VALUE)) #define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE))
#define WRITE_UNSIGNED(VALUE, SRC, WRITER) writer.append_u64((VALUE)) #define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE))
#define WRITE_DOUBLE(VALUE, SRC, WRITER) writer.append_double((VALUE)) #define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE))
#endif #endif
// Attempts to compute i * 10^(power) exactly; and if "negative" is // Attempts to compute i * 10^(power) exactly; and if "negative" is
@ -250,7 +250,7 @@ template<typename W>
error_code slow_float_parsing(SIMDJSON_UNUSED const uint8_t * src, W writer) { error_code slow_float_parsing(SIMDJSON_UNUSED const uint8_t * src, W writer) {
double d; double d;
if (parse_float_strtod(src, &d)) { if (parse_float_strtod(src, &d)) {
WRITE_DOUBLE(d, src, writer); writer.append_double(d);
return SUCCESS; return SUCCESS;
} }
return INVALID_NUMBER(src); return INVALID_NUMBER(src);
@ -345,35 +345,36 @@ simdjson_really_inline error_code parse_exponent(SIMDJSON_UNUSED const uint8_t *
return SUCCESS; return SUCCESS;
} }
simdjson_really_inline int significant_digits(const uint8_t * start_digits, int digit_count) {
// It is possible that the integer had an overflow.
// We have to handle the case where we have 0.0000somenumber.
const uint8_t *start = start_digits;
while ((*start == '0') || (*start == '.')) {
start++;
}
// we over-decrement by one when there is a '.'
return digit_count - int(start - start_digits);
}
template<typename W> template<typename W>
simdjson_really_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, int digit_count, int64_t exponent, W &writer) { simdjson_really_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, int digit_count, int64_t exponent, W &writer) {
// If we frequently had to deal with long strings of digits, // If we frequently had to deal with long strings of digits,
// we could extend our code by using a 128-bit integer instead // we could extend our code by using a 128-bit integer instead
// of a 64-bit integer. However, this is uncommon in practice. // of a 64-bit integer. However, this is uncommon in practice.
// digit count is off by 1 because of the decimal (assuming there was one). // digit count is off by 1 because of the decimal (assuming there was one).
if (simdjson_unlikely((digit_count-1 >= 19))) { // this is uncommon if (simdjson_unlikely(digit_count-1 >= 19 && significant_digits(start_digits, digit_count) >= 19)) {
// It is possible that the integer had an overflow. // Ok, chances are good that we had an overflow!
// We have to handle the case where we have 0.0000somenumber. // this is almost never going to get called!!!
const uint8_t *start = start_digits; // we start anew, going slowly!!!
while ((*start == '0') || (*start == '.')) { // This will happen in the following examples:
start++; // 10000000000000000000000000000000000000000000e+308
} // 3.1415926535897932384626433832795028841971693993751
// we over-decrement by one when there is a '.' //
digit_count -= int(start - start_digits); error_code error = slow_float_parsing(src, writer);
if (digit_count >= 19) { // The number was already written, but we made a copy of the writer
// Ok, chances are good that we had an overflow! // when we passed it to the parse_large_integer() function, so
// this is almost never going to get called!!! writer.skip_double();
// we start anew, going slowly!!! return error;
// This will happen in the following examples:
// 10000000000000000000000000000000000000000000e+308
// 3.1415926535897932384626433832795028841971693993751
//
error_code error = slow_float_parsing(src, writer);
// The number was already written, but we made a copy of the writer
// when we passed it to the parse_large_integer() function, so
writer.skip_double();
return error;
}
} }
// NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other
// way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331