Issue182: fixed (#183)

* Verifying issue 182.

* Fixing the corresponding bug.
This commit is contained in:
Daniel Lemire 2019-06-05 18:51:29 -04:00 committed by GitHub
parent b32c72f1fc
commit 59194dcf4d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 42 additions and 14 deletions

View File

@ -314,6 +314,7 @@ _We do not aim to provide a general-purpose JSON library._ A library like RapidJ
- The input string is unmodified. (Parsers like sajson and RapidJSON use the input string as a buffer.)
- We parse integers and floating-point numbers as separate types which allows us to support large 64-bit integers in [-9223372036854775808,9223372036854775808), like a Java `long` or a C/C++ `long long`. Among the parsers that differentiate between integers and floating-point numbers, not all support 64-bit integers. (For example, sajson rejects JSON files with integers larger than or equal to 2147483648. RapidJSON will parse a file containing an overly long integer like 18446744073709551616 as a floating-point number.) When we cannot represent exactly an integer as a signed 64-bit value, we reject the JSON document.
- We support the full range of 64-bit floating-point numbers (binary64). The values range from ` std::numeric_limits<double>::lowest()` to `std::numeric_limits<double>::max()`, so from -1.7976e308 all the way to 1.7975e308. Extreme values (less or equal to -1e308, greater or equal to 1e308) are rejected: we refuse to parse the input document.
- We do full UTF-8 validation as part of the parsing. (Parsers like fastjson, gason and dropbox json11 do not do UTF-8 validation.)
- We fully validate the numbers. (Parsers like gason and ultranjson will accept `[0e+]` as valid JSON.)
- We validate string content for unescaped characters. (Parsers like fastjson and ultrajson accept unescaped line breaks and tabs in strings.)

View File

@ -6,6 +6,10 @@
#include "simdjson/parsedjson.h"
#include "simdjson/portability.h"
// Allowable floating-point values range from std::numeric_limits<double>::lowest()
// to std::numeric_limits<double>::max(), so from
// -1.7976e308 all the way to 1.7975e308 in binary64. The lowest non-zero
// normal values is std::numeric_limits<double>::min() or about 2.225074e-308.
static const double power_of_ten[] = {
1e-308, 1e-307, 1e-306, 1e-305, 1e-304, 1e-303, 1e-302, 1e-301, 1e-300,
1e-299, 1e-298, 1e-297, 1e-296, 1e-295, 1e-294, 1e-293, 1e-292, 1e-291,
@ -163,6 +167,15 @@ static inline uint32_t parse_eight_digits_unrolled(const char *chars) {
#endif
//
// This function computes base * 10 ^ (- negative_exponent ).
// It is only even going to be used when negative_exponent is tiny.
static double subnormal_power10(double base, int negative_exponent) {
// this is probably not going to be fast
return base * 1e-308 * pow(10, negative_exponent + 308);
}
// called by parse_number when we know that the output is a float,
// but where there might be some integer overflow. The trick here is to
// parse using floats from the start.
@ -258,15 +271,27 @@ parse_float(const uint8_t *const buf,
#endif
return false;
}
if (expnumber > 308) {
if (unlikely(expnumber > 308)) {
// this path is unlikely
if(negexp) {
// We either have zero or a subnormal.
// We expect this to be uncommon so we go through a slow path.
i = subnormal_power10(i, - expnumber);
} else {
// We know for sure that we have a number that is too large,
// we refuse to parse this
#ifdef JSON_TEST_NUMBERS // for unit testing
foundInvalidNumber(buf + offset);
foundInvalidNumber(buf + offset);
#endif
return false;
}
int exponent = (negexp ? -expnumber : expnumber);
i *= power_of_ten[308 + exponent];
return false;
}
} else {
int exponent = (negexp ? -expnumber : expnumber);
// we have that expnumber is [0,308] so that
// exponent is [-308,308] so that
// 308 + exponent is in [0, 2 * 308]
i *= power_of_ten[308 + exponent];
}
}
if(is_not_structural_or_whitespace(*p)) {
return false;
@ -474,13 +499,6 @@ static really_inline bool parse_number(const uint8_t *const buf,
#endif
return false;
}
if(expnumber > 308) {
// we refuse to parse this
#ifdef JSON_TEST_NUMBERS // for unit testing
foundInvalidNumber(buf + offset);
#endif
return false;
}
exponent += (negexp ? -expnumber : expnumber);
}
if ((exponent != 0) || (expnumber != 0)) {
@ -501,7 +519,15 @@ static really_inline bool parse_number(const uint8_t *const buf,
} else {
double d = i;
d = negative ? -d : d;
d *= power_of_ten[308 + exponent];
uint64_t powerindex = 308 + exponent;
if(likely(powerindex <= 2 * 308)) {
// common case
d *= power_of_ten[powerindex];
} else {
// this is uncommon so let us move this special case out
// of the main loop
return parse_float(buf, pj, offset,found_minus);
}
pj.write_tape_double(d);
#ifdef JSON_TEST_NUMBERS // for unit testing
foundFloat(d, buf + offset);

1
jsonchecker/pass17.json Normal file
View File

@ -0,0 +1 @@
[1.0e-308,0.1e-308,0.01e-307,1.79769e+308,2.22507e-308,-1.79769e+308,-2.22507e-308]