From d0ce2f0b5a6b93707bf6872cc591f32cf5d9eef2 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Mon, 6 Jul 2020 18:58:19 -0400 Subject: [PATCH] Fixing clang under visual studio (#1028) * Lots of fixes * Removing some lambdas * Removing some functional programming. Co-authored-by: Daniel Lemire --- .github/workflows/vs16-clang-ci.yml | 25 + benchmark/benchmark.h | 2 - singleheader/amalgamate_demo.cpp | 2 +- singleheader/simdjson.cpp | 2319 +++++++++----------- singleheader/simdjson.h | 2 +- src/arm64/dom_parser_implementation.cpp | 43 +- src/arm64/simd.h | 58 +- src/generic/stage2/structural_iterator.h | 26 +- src/generic/stage2/structural_parser.h | 44 +- src/haswell/dom_parser_implementation.cpp | 19 +- src/haswell/simd.h | 45 +- src/westmere/dom_parser_implementation.cpp | 22 +- src/westmere/simd.h | 58 +- 13 files changed, 1165 insertions(+), 1500 deletions(-) create mode 100644 .github/workflows/vs16-clang-ci.yml diff --git a/.github/workflows/vs16-clang-ci.yml b/.github/workflows/vs16-clang-ci.yml new file mode 100644 index 00000000..164c7eca --- /dev/null +++ b/.github/workflows/vs16-clang-ci.yml @@ -0,0 +1,25 @@ +name: VS16-CLANG-CI + +on: push + +jobs: + ci: + name: windows-vs16 + runs-on: windows-latest + steps: + - uses: actions/checkout@v2 + - name: 'Run CMake with VS16' + uses: lukka/run-cmake@v2 + with: + cmakeListsOrSettingsJson: CMakeListsTxtAdvanced + cmakeListsTxtPath: '${{ github.workspace }}/CMakeLists.txt' + buildDirectory: "${{ github.workspace }}/../../_temp/windows" + cmakeBuildType: Release + buildWithCMake: true + cmakeGenerator: VS16Win64 + cmakeAppendedArgs: -T ClangCL -DSIMDJSON_COMPETITION=OFF -DSIMDJSON_BUILD_STATIC=ON + buildWithCMakeArgs: --config Release + + - name: 'Run CTest' + run: ctest -C Release -E checkperf --output-on-failure + working-directory: "${{ github.workspace }}/../../_temp/windows" \ No newline at end of file diff --git a/benchmark/benchmark.h b/benchmark/benchmark.h index a1213ce6..836f8507 100644 --- a/benchmark/benchmark.h +++ b/benchmark/benchmark.h @@ -30,7 +30,6 @@ event_count allocate_count = collector.end(); \ aggregate << allocate_count; \ } \ - uint64_t S = size; \ if (collector.has_events()) { \ printf("%7.3f", aggregate.best.cycles() / static_cast(size)); \ if (verbose) { \ @@ -76,7 +75,6 @@ event_count allocate_count = collector.end(); \ aggregate << allocate_count; \ } \ - uint64_t S = size; \ if (collector.has_events()) { \ printf("%7.3f", aggregate.best.cycles() / static_cast(size)); \ if (verbose) { \ diff --git a/singleheader/amalgamate_demo.cpp b/singleheader/amalgamate_demo.cpp index c4560272..e485458f 100644 --- a/singleheader/amalgamate_demo.cpp +++ b/singleheader/amalgamate_demo.cpp @@ -1,4 +1,4 @@ -/* auto-generated on Wed Jul 1 14:00:57 EDT 2020. Do not edit! */ +/* auto-generated on Mon Jul 6 18:16:52 EDT 2020. Do not edit! */ #include #include "simdjson.h" diff --git a/singleheader/simdjson.cpp b/singleheader/simdjson.cpp index 7f9449b1..dae19e04 100644 --- a/singleheader/simdjson.cpp +++ b/singleheader/simdjson.cpp @@ -1,4 +1,4 @@ -/* auto-generated on Wed Jul 1 14:00:57 EDT 2020. Do not edit! */ +/* auto-generated on Mon Jul 6 18:16:52 EDT 2020. Do not edit! */ /* begin file src/simdjson.cpp */ #include "simdjson.h" @@ -2677,43 +2677,6 @@ really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x3, int8_ each(3); } - template - really_inline void each(F const& each_chunk) const - { - each_chunk(this->chunks[0]); - each_chunk(this->chunks[1]); - each_chunk(this->chunks[2]); - each_chunk(this->chunks[3]); - } - - template - really_inline simd8x64 map(F const& map_chunk) const { - return simd8x64( - map_chunk(this->chunks[0]), - map_chunk(this->chunks[1]), - map_chunk(this->chunks[2]), - map_chunk(this->chunks[3]) - ); - } - - template - really_inline simd8x64 map(const simd8x64 b, F const& map_chunk) const { - return simd8x64( - map_chunk(this->chunks[0], b.chunks[0]), - map_chunk(this->chunks[1], b.chunks[1]), - map_chunk(this->chunks[2], b.chunks[2]), - map_chunk(this->chunks[3], b.chunks[3]) - ); - } - - template - really_inline simd8 reduce(F const& reduce_pair) const { - return reduce_pair( - reduce_pair(this->chunks[0], this->chunks[1]), - reduce_pair(this->chunks[2], this->chunks[3]) - ); - } - really_inline uint64_t to_bitmask() const { #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO const uint8x16_t bit_mask = make_uint8x16_t( @@ -2736,17 +2699,32 @@ really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x3, int8_ really_inline simd8x64 bit_or(const T m) const { const simd8 mask = simd8::splat(m); - return this->map( [&](simd8 a) { return a | mask; } ); + return simd8x64( + this->chunks[0] | mask, + this->chunks[1] | mask, + this->chunks[2] | mask, + this->chunks[3] | mask + ); } really_inline uint64_t eq(const T m) const { const simd8 mask = simd8::splat(m); - return this->map( [&](simd8 a) { return a == mask; } ).to_bitmask(); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); } really_inline uint64_t lteq(const T m) const { const simd8 mask = simd8::splat(m); - return this->map( [&](simd8 a) { return a <= mask; } ).to_bitmask(); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); } }; // struct simd8x64 @@ -2775,13 +2753,24 @@ struct json_character_block { }; really_inline json_character_block json_character_block::classify(const simd::simd8x64 in) { - auto v = in.map([&](simd8 chunk) { - auto nib_lo = chunk & 0xf; - auto nib_hi = chunk.shr<4>(); - auto shuf_lo = nib_lo.lookup_16(16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0); - auto shuf_hi = nib_hi.lookup_16(8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0); - return shuf_lo & shuf_hi; - }); + // Functional programming causes trouble with Visual Studio. + // Keeping this version in comments since it is much nicer: + // auto v = in.map([&](simd8 chunk) { + // auto nib_lo = chunk & 0xf; + // auto nib_hi = chunk.shr<4>(); + // auto shuf_lo = nib_lo.lookup_16(16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0); + // auto shuf_hi = nib_hi.lookup_16(8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0); + // return shuf_lo & shuf_hi; + // }); + const simd8 table1(16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0); + const simd8 table2(8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0); + + auto v = simd8x64( + (in.chunks[0] & 0xf).lookup_16(table1) & (in.chunks[0].shr<4>()).lookup_16(table2), + (in.chunks[1] & 0xf).lookup_16(table1) & (in.chunks[1].shr<4>()).lookup_16(table2), + (in.chunks[2] & 0xf).lookup_16(table1) & (in.chunks[2].shr<4>()).lookup_16(table2), + (in.chunks[3] & 0xf).lookup_16(table1) & (in.chunks[3].shr<4>()).lookup_16(table2) + ); // We compute whitespace and op separately. If the code later only use one or the @@ -2800,13 +2789,25 @@ really_inline json_character_block json_character_block::classify(const simd::si // there is a small untaken optimization opportunity here. We deliberately // do not pick it up. - uint64_t op = v.map([&](simd8 _v) { return _v.any_bits_set(0x7); }).to_bitmask(); - uint64_t whitespace = v.map([&](simd8 _v) { return _v.any_bits_set(0x18); }).to_bitmask(); + uint64_t op = simd8x64( + v.chunks[0].any_bits_set(0x7), + v.chunks[1].any_bits_set(0x7), + v.chunks[2].any_bits_set(0x7), + v.chunks[3].any_bits_set(0x7) + ).to_bitmask(); + + uint64_t whitespace = simd8x64( + v.chunks[0].any_bits_set(0x18), + v.chunks[1].any_bits_set(0x18), + v.chunks[2].any_bits_set(0x18), + v.chunks[3].any_bits_set(0x18) + ).to_bitmask(); + return { whitespace, op }; } really_inline bool is_ascii(simd8x64 input) { - simd8 bits = input.reduce([&](simd8 a,simd8 b) { return a|b; }); + simd8 bits = (input.chunks[0] | input.chunks[1]) | (input.chunks[2] | input.chunks[3]); return bits.max() < 0b10000000u; } @@ -4031,14 +4032,25 @@ static inline uint32_t parse_eight_digits_unrolled(const char *chars) { namespace stage2 { namespace numberparsing { +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), false) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), writer.append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), writer.append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), writer.append_double((VALUE))) +#else +#define INVALID_NUMBER(SRC) (false) +#define WRITE_INTEGER(VALUE, SRC, WRITER) writer.append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) writer.append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) writer.append_double((VALUE)) +#endif + // Attempts to compute i * 10^(power) exactly; and if "negative" is // true, negate the result. // This function will only work in some cases, when it does not work, success is // set to false. This should work *most of the time* (like 99% of the time). // We assume that power is in the [FASTFLOAT_SMALLEST_POWER, // FASTFLOAT_LARGEST_POWER] interval: the caller is responsible for this check. -really_inline double compute_float_64(int64_t power, uint64_t i, bool negative, - bool *success) { +really_inline double compute_float_64(int64_t power, uint64_t i, bool negative, bool *success) { // we start with a fast path // It was described in // Clinger WD. How to read floating point numbers accurately. @@ -4271,109 +4283,132 @@ really_inline bool is_made_of_eight_digits_fast(const char *chars) { 0x3333333333333333); } -// called by parse_number when we know that the output is an integer, -// but where there might be some integer overflow. -// we want to catch overflows! -// Do not call this function directly as it skips some of the checks from -// parse_number -// -// This function will almost never be called!!! -// -template -never_inline bool parse_large_integer(const uint8_t *const src, - W writer, - bool found_minus) { - const char *p = reinterpret_cast(src); - - bool negative = false; - if (found_minus) { - ++p; - negative = true; - } - uint64_t i; - if (*p == '0') { // 0 cannot be followed by an integer - ++p; - i = 0; - } else { - unsigned char digit = static_cast(*p - '0'); - i = digit; - p++; - // the is_made_of_eight_digits_fast routine is unlikely to help here because - // we rarely see large integer parts like 123456789 - while (is_integer(*p)) { - digit = static_cast(*p - '0'); - if (mul_overflow(i, 10, &i)) { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(src); -#endif - return false; // overflow - } - if (add_overflow(i, digit, &i)) { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(src); -#endif - return false; // overflow - } - ++p; - } - } - if (negative) { - if (i > 0x8000000000000000) { - // overflows! -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(src); -#endif - return false; // overflow - } else if (i == 0x8000000000000000) { - // In two's complement, we cannot represent 0x8000000000000000 - // as a positive signed integer, but the negative version is - // possible. - constexpr int64_t signed_answer = INT64_MIN; - writer.append_s64(signed_answer); -#ifdef JSON_TEST_NUMBERS // for unit testing - found_integer(signed_answer, src); -#endif - } else { - // we can negate safely - int64_t signed_answer = -static_cast(i); - writer.append_s64(signed_answer); -#ifdef JSON_TEST_NUMBERS // for unit testing - found_integer(signed_answer, src); -#endif - } - } else { - // we have a positive integer, the contract is that - // we try to represent it as a signed integer and only - // fallback on unsigned integers if absolutely necessary. - if (i < 0x8000000000000000) { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_integer(i, src); -#endif - writer.append_s64(i); - } else { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_unsigned_integer(i, src); -#endif - writer.append_u64(i); - } - } - return is_structural_or_whitespace(*p); -} - template bool slow_float_parsing(UNUSED const char * src, W writer) { double d; if (parse_float_strtod(src, &d)) { - writer.append_double(d); -#ifdef JSON_TEST_NUMBERS // for unit testing - found_float(d, (const uint8_t *)src); -#endif + WRITE_DOUBLE(d, (const uint8_t *)src, writer); return true; } -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number((const uint8_t *)src); + return INVALID_NUMBER((const uint8_t *)src); +} + +really_inline bool parse_decimal(UNUSED const uint8_t *const src, const char *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const char *const first_after_period = p; + if (!is_integer(*p)) { return INVALID_NUMBER(src); } // There must be at least one digit after the . + + unsigned char digit = static_cast(*p - '0'); + ++p; + i = i * 10 + digit; // might overflow + multiplication by 10 is likely + // cheaper than arbitrary mult. + // we will handle the overflow later +#ifdef SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } #endif - return false; + while (is_integer(*p)) { + digit = static_cast(*p - '0'); + ++p; + i = i * 10 + digit; // in rare cases, this will overflow, but that's ok + // because we have parse_highprecision_float later. + } + exponent = first_after_period - p; + return true; +} + +really_inline bool parse_exponent(UNUSED const uint8_t *const src, const char *&p, int64_t &exponent) { + bool neg_exp = false; + if ('-' == *p) { + neg_exp = true; + ++p; + } else if ('+' == *p) { + ++p; + } + + // e[+-] must be followed by a number + if (!is_integer(*p)) { return INVALID_NUMBER(src); } + unsigned char digit = static_cast(*p - '0'); + int64_t exp_number = digit; + p++; + if (is_integer(*p)) { + digit = static_cast(*p - '0'); + exp_number = 10 * exp_number + digit; + ++p; + } + if (is_integer(*p)) { + digit = static_cast(*p - '0'); + exp_number = 10 * exp_number + digit; + ++p; + } + while (is_integer(*p)) { + // we need to check for overflows; we refuse to parse this + if (exp_number > 0x100000000) { return INVALID_NUMBER(src); } + digit = static_cast(*p - '0'); + exp_number = 10 * exp_number + digit; + ++p; + } + exponent += (neg_exp ? -exp_number : exp_number); + return true; +} + +template +really_inline bool write_float(const uint8_t *const src, bool negative, uint64_t i, const char * start_digits, int digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // digit count is off by 1 because of the decimal (assuming there was one). + if (unlikely((digit_count-1 >= 19))) { // this is uncommon + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const char *start = start_digits; + while ((*start == '0') || (*start == '.')) { + start++; + } + // we over-decrement by one when there is a '.' + digit_count -= int(start - start_digits); + if (digit_count >= 19) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + bool success = slow_float_parsing((const char *) src, writer); + // The number was already written, but we made a copy of the writer + // when we passed it to the parse_large_integer() function, so + writer.skip_double(); + return success; + } + } + // NOTE: it's weird that the unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (unlikely(exponent < FASTFLOAT_SMALLEST_POWER) || (exponent > FASTFLOAT_LARGEST_POWER)) { + // this is almost never going to get called!!! + // we start anew, going slowly!!! + bool success = slow_float_parsing((const char *) src, writer); + // The number was already written, but we made a copy of the writer when we passed it to the + // slow_float_parsing() function, so we have to skip those tape spots now that we've returned + writer.skip_double(); + return success; + } + bool success = true; + double d = compute_float_64(exponent, i, negative, &success); + if (!success) { + // we are almost never going to get here. + if (!parse_float_strtod((const char *)src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return true; } // parse the number at src @@ -4399,32 +4434,22 @@ really_inline bool parse_number(UNUSED const uint8_t *const src, if (found_minus) { ++p; negative = true; - if (!is_integer(*p)) { // a negative sign must be followed by an integer -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(src); -#endif - return false; - } + // a negative sign must be followed by an integer + if (!is_integer(*p)) { return INVALID_NUMBER(src); } } const char *const start_digits = p; uint64_t i; // an unsigned int avoids signed overflows (which are bad) - if (*p == '0') { // 0 cannot be followed by an integer + if (*p == '0') { ++p; - if (is_integer(*p)) { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(src); -#endif - return false; - } + if (is_integer(*p)) { return INVALID_NUMBER(src); } // 0 cannot be followed by an integer i = 0; } else { - if (!(is_integer(*p))) { // must start with an integer -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(src); -#endif - return false; - } + // NOTE: This is a redundant check--either we're negative, in which case we checked whether this + // is a digit above, or the caller already determined we start with a digit. But removing this + // check seems to make things slower: https://github.com/simdjson/simdjson/pull/990#discussion_r448512448 + // Please do try yourself, or think of ways to explain it--we'd love to understand :) + if (!is_integer(*p)) { return INVALID_NUMBER(src); } // must start with an integer unsigned char digit = static_cast(*p - '0'); i = digit; p++; @@ -4438,163 +4463,67 @@ really_inline bool parse_number(UNUSED const uint8_t *const src, ++p; } } + + // + // Handle floats if there is a . or e (or both) + // int64_t exponent = 0; bool is_float = false; if ('.' == *p) { - is_float = true; // At this point we know that we have a float - // we continue with the fiction that we have an integer. If the - // floating point number is representable as x * 10^z for some integer - // z that fits in 53 bits, then we will be able to convert back the - // the integer into a float in a lossless manner. + is_float = true; ++p; - const char *const first_after_period = p; - if (is_integer(*p)) { - unsigned char digit = static_cast(*p - '0'); - ++p; - i = i * 10 + digit; // might overflow + multiplication by 10 is likely - // cheaper than arbitrary mult. - // we will handle the overflow later - } else { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(src); -#endif - return false; - } -#ifdef SWAR_NUMBER_PARSING - // this helps if we have lots of decimals! - // this turns out to be frequent enough. - if (is_made_of_eight_digits_fast(p)) { - i = i * 100000000 + parse_eight_digits_unrolled(p); - p += 8; - } -#endif - while (is_integer(*p)) { - unsigned char digit = static_cast(*p - '0'); - ++p; - i = i * 10 + digit; // in rare cases, this will overflow, but that's ok - // because we have parse_highprecision_float later. - } - exponent = first_after_period - p; + if (!parse_decimal(src, p, i, exponent)) { return false; } } - int digit_count = - int(p - start_digits) - 1; // used later to guard against overflows - int64_t exp_number = 0; // exponential part + int digit_count = int(p - start_digits); // used later to guard against overflows if (('e' == *p) || ('E' == *p)) { is_float = true; ++p; - bool neg_exp = false; - if ('-' == *p) { - neg_exp = true; - ++p; - } else if ('+' == *p) { - ++p; - } - if (!is_integer(*p)) { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(src); -#endif - return false; - } - unsigned char digit = static_cast(*p - '0'); - exp_number = digit; - p++; - if (is_integer(*p)) { - digit = static_cast(*p - '0'); - exp_number = 10 * exp_number + digit; - ++p; - } - if (is_integer(*p)) { - digit = static_cast(*p - '0'); - exp_number = 10 * exp_number + digit; - ++p; - } - while (is_integer(*p)) { - if (exp_number > 0x100000000) { // we need to check for overflows - // we refuse to parse this -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(src); -#endif - return false; - } - digit = static_cast(*p - '0'); - exp_number = 10 * exp_number + digit; - ++p; - } - exponent += (neg_exp ? -exp_number : exp_number); + if (!parse_exponent(src, p, exponent)) { return false; } } if (is_float) { - // If we frequently had to deal with long strings of digits, - // we could extend our code by using a 128-bit integer instead - // of a 64-bit integer. However, this is uncommon in practice. - if (unlikely((digit_count >= 19))) { // this is uncommon - // It is possible that the integer had an overflow. - // We have to handle the case where we have 0.0000somenumber. - const char *start = start_digits; - while ((*start == '0') || (*start == '.')) { - start++; - } - // we over-decrement by one when there is a '.' - digit_count -= int(start - start_digits); - if (digit_count >= 19) { - // Ok, chances are good that we had an overflow! - // this is almost never going to get called!!! - // we start anew, going slowly!!! - // This will happen in the following examples: - // 10000000000000000000000000000000000000000000e+308 - // 3.1415926535897932384626433832795028841971693993751 - // - bool success = slow_float_parsing((const char *) src, writer); - // The number was already written, but we made a copy of the writer - // when we passed it to the parse_large_integer() function, so - writer.skip_double(); - return success; - } - } - if (unlikely(exponent < FASTFLOAT_SMALLEST_POWER) || - (exponent > FASTFLOAT_LARGEST_POWER)) { // this is uncommon!!! - // this is almost never going to get called!!! - // we start anew, going slowly!!! - bool success = slow_float_parsing((const char *) src, writer); - // The number was already written, but we made a copy of the writer when we passed it to the - // slow_float_parsing() function, so we have to skip those tape spots now that we've returned - writer.skip_double(); - return success; - } - bool success = true; - double d = compute_float_64(exponent, i, negative, &success); - if (!success) { - // we are almost never going to get here. - success = parse_float_strtod((const char *)src, &d); - } - if (success) { - writer.append_double(d); -#ifdef JSON_TEST_NUMBERS // for unit testing - found_float(d, src); -#endif - return true; - } else { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(src); -#endif - return false; + return write_float(src, negative, i, start_digits, digit_count, exponent, writer); + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + int longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } + if (digit_count == longest_digit_count) { + // Anything negative above INT64_MAX is either invalid or INT64_MIN. + if (negative && i > uint64_t(INT64_MAX)) { + // If the number is negative and can't fit in a signed integer, it's invalid. + if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } + + // If it's negative, it has to be INT64_MAX+1 now (or INT64_MIN). + // C++ can't reliably negate uint64_t INT64_MIN, it seems. Special case it. + WRITE_INTEGER(INT64_MIN, src, writer); + return is_structural_or_whitespace(*p); } + + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to 9,223,372,036,854,775,808 (INT64_MAX). + // + if (!negative && (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX))) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it doesn't fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); } else { - if (unlikely(digit_count >= 18)) { // this is uncommon!!! - // there is a good chance that we had an overflow, so we need - // need to recover: we parse the whole thing again. - bool success = parse_large_integer(src, writer, found_minus); - // The number was already written, but we made a copy of the writer - // when we passed it to the parse_large_integer() function, so - writer.skip_large_integer(); - return success; - } - i = negative ? 0 - i : i; - writer.append_s64(i); -#ifdef JSON_TEST_NUMBERS // for unit testing - found_integer(i, src); -#endif + WRITE_INTEGER(negative ? 0 - i : i, src, writer); } return is_structural_or_whitespace(*p); + #endif // SIMDJSON_SKIPNUMBERPARSING } @@ -4764,31 +4693,7 @@ public: really_inline size_t remaining_len() { return parser.len - *current_structural; } - template - really_inline bool with_space_terminated_copy(const F& f) { - /** - * We need to make a copy to make sure that the string is space terminated. - * This is not about padding the input, which should already padded up - * to len + SIMDJSON_PADDING. However, we have no control at this stage - * on how the padding was done. What if the input string was padded with nulls? - * It is quite common for an input string to have an extra null character (C string). - * We do not want to allow 9\0 (where \0 is the null character) inside a JSON - * document, but the string "9\0" by itself is fine. So we make a copy and - * pad the input with spaces when we know that there is just one input element. - * This copy is relatively expensive, but it will almost never be called in - * practice unless you are in the strange scenario where you have many JSON - * documents made of single atoms. - */ - char *copy = static_cast(malloc(parser.len + SIMDJSON_PADDING)); - if (copy == nullptr) { - return true; - } - memcpy(copy, buf, parser.len); - memset(copy + parser.len, ' ', SIMDJSON_PADDING); - bool result = f(reinterpret_cast(copy), *current_structural); - free(copy); - return result; - } + really_inline bool past_end(uint32_t n_structural_indexes) { return current_structural >= &parser.structural_indexes[n_structural_indexes]; } @@ -5070,6 +4975,31 @@ struct structural_parser : structural_iterator { return parse_number(current(), found_minus); } + really_inline bool parse_number_with_space_terminated_copy(const bool is_negative) { + /** + * We need to make a copy to make sure that the string is space terminated. + * This is not about padding the input, which should already padded up + * to len + SIMDJSON_PADDING. However, we have no control at this stage + * on how the padding was done. What if the input string was padded with nulls? + * It is quite common for an input string to have an extra null character (C string). + * We do not want to allow 9\0 (where \0 is the null character) inside a JSON + * document, but the string "9\0" by itself is fine. So we make a copy and + * pad the input with spaces when we know that there is just one input element. + * This copy is relatively expensive, but it will almost never be called in + * practice unless you are in the strange scenario where you have many JSON + * documents made of single atoms. + */ + uint8_t *copy = static_cast(malloc(parser.len + SIMDJSON_PADDING)); + if (copy == nullptr) { + return true; + } + memcpy(copy, buf, parser.len); + memset(copy + parser.len, ' ', SIMDJSON_PADDING); + size_t idx = *current_structural; + bool result = parse_number(©[idx], is_negative); // parse_number does not throw + free(copy); + return result; + } WARN_UNUSED really_inline ret_address_t parse_value(const unified_machine_addresses &addresses, ret_address_t continue_state) { switch (advance_char()) { case '"': @@ -5207,6 +5137,7 @@ struct structural_parser : structural_iterator { #undef FAIL_IF #define FAIL_IF(EXPR) { if (EXPR) { goto error; } } + template WARN_UNUSED static error_code parse_structurals(dom_parser_implementation &dom_parser, dom::document &doc) noexcept { dom_parser.doc = &doc; @@ -5252,18 +5183,16 @@ WARN_UNUSED static error_code parse_structurals(dom_parser_implementation &dom_p goto finish; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': - FAIL_IF( - parser.with_space_terminated_copy([&](const uint8_t *copy, size_t idx) { - return parser.parse_number(©[idx], false); - }) - ); + // Next line used to be an interesting functional programming exercise with + // a lambda that gets passed to another function via a closure. This would confuse the + // clangcl compiler under Visual Studio 2019 (recent release). + { if(parser.parse_number_with_space_terminated_copy(false)) { goto error; }} goto finish; case '-': - FAIL_IF( - parser.with_space_terminated_copy([&](const uint8_t *copy, size_t idx) { - return parser.parse_number(©[idx], true); - }) - ); + // Next line used to be an interesting functional programming exercise with + // a lambda that gets passed to another function via a closure. This would confuse the + // clangcl compiler under Visual Studio 2019 (recent release). + { if(parser.parse_number_with_space_terminated_copy(true)) { goto error; }} goto finish; default: parser.log_error("Document starts with a non-value character"); @@ -6196,14 +6125,25 @@ static inline uint32_t parse_eight_digits_unrolled(const char *chars) { namespace stage2 { namespace numberparsing { +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), false) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), writer.append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), writer.append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), writer.append_double((VALUE))) +#else +#define INVALID_NUMBER(SRC) (false) +#define WRITE_INTEGER(VALUE, SRC, WRITER) writer.append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) writer.append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) writer.append_double((VALUE)) +#endif + // Attempts to compute i * 10^(power) exactly; and if "negative" is // true, negate the result. // This function will only work in some cases, when it does not work, success is // set to false. This should work *most of the time* (like 99% of the time). // We assume that power is in the [FASTFLOAT_SMALLEST_POWER, // FASTFLOAT_LARGEST_POWER] interval: the caller is responsible for this check. -really_inline double compute_float_64(int64_t power, uint64_t i, bool negative, - bool *success) { +really_inline double compute_float_64(int64_t power, uint64_t i, bool negative, bool *success) { // we start with a fast path // It was described in // Clinger WD. How to read floating point numbers accurately. @@ -6436,109 +6376,132 @@ really_inline bool is_made_of_eight_digits_fast(const char *chars) { 0x3333333333333333); } -// called by parse_number when we know that the output is an integer, -// but where there might be some integer overflow. -// we want to catch overflows! -// Do not call this function directly as it skips some of the checks from -// parse_number -// -// This function will almost never be called!!! -// -template -never_inline bool parse_large_integer(const uint8_t *const src, - W writer, - bool found_minus) { - const char *p = reinterpret_cast(src); - - bool negative = false; - if (found_minus) { - ++p; - negative = true; - } - uint64_t i; - if (*p == '0') { // 0 cannot be followed by an integer - ++p; - i = 0; - } else { - unsigned char digit = static_cast(*p - '0'); - i = digit; - p++; - // the is_made_of_eight_digits_fast routine is unlikely to help here because - // we rarely see large integer parts like 123456789 - while (is_integer(*p)) { - digit = static_cast(*p - '0'); - if (mul_overflow(i, 10, &i)) { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(src); -#endif - return false; // overflow - } - if (add_overflow(i, digit, &i)) { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(src); -#endif - return false; // overflow - } - ++p; - } - } - if (negative) { - if (i > 0x8000000000000000) { - // overflows! -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(src); -#endif - return false; // overflow - } else if (i == 0x8000000000000000) { - // In two's complement, we cannot represent 0x8000000000000000 - // as a positive signed integer, but the negative version is - // possible. - constexpr int64_t signed_answer = INT64_MIN; - writer.append_s64(signed_answer); -#ifdef JSON_TEST_NUMBERS // for unit testing - found_integer(signed_answer, src); -#endif - } else { - // we can negate safely - int64_t signed_answer = -static_cast(i); - writer.append_s64(signed_answer); -#ifdef JSON_TEST_NUMBERS // for unit testing - found_integer(signed_answer, src); -#endif - } - } else { - // we have a positive integer, the contract is that - // we try to represent it as a signed integer and only - // fallback on unsigned integers if absolutely necessary. - if (i < 0x8000000000000000) { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_integer(i, src); -#endif - writer.append_s64(i); - } else { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_unsigned_integer(i, src); -#endif - writer.append_u64(i); - } - } - return is_structural_or_whitespace(*p); -} - template bool slow_float_parsing(UNUSED const char * src, W writer) { double d; if (parse_float_strtod(src, &d)) { - writer.append_double(d); -#ifdef JSON_TEST_NUMBERS // for unit testing - found_float(d, (const uint8_t *)src); -#endif + WRITE_DOUBLE(d, (const uint8_t *)src, writer); return true; } -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number((const uint8_t *)src); + return INVALID_NUMBER((const uint8_t *)src); +} + +really_inline bool parse_decimal(UNUSED const uint8_t *const src, const char *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const char *const first_after_period = p; + if (!is_integer(*p)) { return INVALID_NUMBER(src); } // There must be at least one digit after the . + + unsigned char digit = static_cast(*p - '0'); + ++p; + i = i * 10 + digit; // might overflow + multiplication by 10 is likely + // cheaper than arbitrary mult. + // we will handle the overflow later +#ifdef SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } #endif - return false; + while (is_integer(*p)) { + digit = static_cast(*p - '0'); + ++p; + i = i * 10 + digit; // in rare cases, this will overflow, but that's ok + // because we have parse_highprecision_float later. + } + exponent = first_after_period - p; + return true; +} + +really_inline bool parse_exponent(UNUSED const uint8_t *const src, const char *&p, int64_t &exponent) { + bool neg_exp = false; + if ('-' == *p) { + neg_exp = true; + ++p; + } else if ('+' == *p) { + ++p; + } + + // e[+-] must be followed by a number + if (!is_integer(*p)) { return INVALID_NUMBER(src); } + unsigned char digit = static_cast(*p - '0'); + int64_t exp_number = digit; + p++; + if (is_integer(*p)) { + digit = static_cast(*p - '0'); + exp_number = 10 * exp_number + digit; + ++p; + } + if (is_integer(*p)) { + digit = static_cast(*p - '0'); + exp_number = 10 * exp_number + digit; + ++p; + } + while (is_integer(*p)) { + // we need to check for overflows; we refuse to parse this + if (exp_number > 0x100000000) { return INVALID_NUMBER(src); } + digit = static_cast(*p - '0'); + exp_number = 10 * exp_number + digit; + ++p; + } + exponent += (neg_exp ? -exp_number : exp_number); + return true; +} + +template +really_inline bool write_float(const uint8_t *const src, bool negative, uint64_t i, const char * start_digits, int digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // digit count is off by 1 because of the decimal (assuming there was one). + if (unlikely((digit_count-1 >= 19))) { // this is uncommon + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const char *start = start_digits; + while ((*start == '0') || (*start == '.')) { + start++; + } + // we over-decrement by one when there is a '.' + digit_count -= int(start - start_digits); + if (digit_count >= 19) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + bool success = slow_float_parsing((const char *) src, writer); + // The number was already written, but we made a copy of the writer + // when we passed it to the parse_large_integer() function, so + writer.skip_double(); + return success; + } + } + // NOTE: it's weird that the unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (unlikely(exponent < FASTFLOAT_SMALLEST_POWER) || (exponent > FASTFLOAT_LARGEST_POWER)) { + // this is almost never going to get called!!! + // we start anew, going slowly!!! + bool success = slow_float_parsing((const char *) src, writer); + // The number was already written, but we made a copy of the writer when we passed it to the + // slow_float_parsing() function, so we have to skip those tape spots now that we've returned + writer.skip_double(); + return success; + } + bool success = true; + double d = compute_float_64(exponent, i, negative, &success); + if (!success) { + // we are almost never going to get here. + if (!parse_float_strtod((const char *)src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return true; } // parse the number at src @@ -6564,32 +6527,22 @@ really_inline bool parse_number(UNUSED const uint8_t *const src, if (found_minus) { ++p; negative = true; - if (!is_integer(*p)) { // a negative sign must be followed by an integer -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(src); -#endif - return false; - } + // a negative sign must be followed by an integer + if (!is_integer(*p)) { return INVALID_NUMBER(src); } } const char *const start_digits = p; uint64_t i; // an unsigned int avoids signed overflows (which are bad) - if (*p == '0') { // 0 cannot be followed by an integer + if (*p == '0') { ++p; - if (is_integer(*p)) { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(src); -#endif - return false; - } + if (is_integer(*p)) { return INVALID_NUMBER(src); } // 0 cannot be followed by an integer i = 0; } else { - if (!(is_integer(*p))) { // must start with an integer -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(src); -#endif - return false; - } + // NOTE: This is a redundant check--either we're negative, in which case we checked whether this + // is a digit above, or the caller already determined we start with a digit. But removing this + // check seems to make things slower: https://github.com/simdjson/simdjson/pull/990#discussion_r448512448 + // Please do try yourself, or think of ways to explain it--we'd love to understand :) + if (!is_integer(*p)) { return INVALID_NUMBER(src); } // must start with an integer unsigned char digit = static_cast(*p - '0'); i = digit; p++; @@ -6603,163 +6556,67 @@ really_inline bool parse_number(UNUSED const uint8_t *const src, ++p; } } + + // + // Handle floats if there is a . or e (or both) + // int64_t exponent = 0; bool is_float = false; if ('.' == *p) { - is_float = true; // At this point we know that we have a float - // we continue with the fiction that we have an integer. If the - // floating point number is representable as x * 10^z for some integer - // z that fits in 53 bits, then we will be able to convert back the - // the integer into a float in a lossless manner. + is_float = true; ++p; - const char *const first_after_period = p; - if (is_integer(*p)) { - unsigned char digit = static_cast(*p - '0'); - ++p; - i = i * 10 + digit; // might overflow + multiplication by 10 is likely - // cheaper than arbitrary mult. - // we will handle the overflow later - } else { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(src); -#endif - return false; - } -#ifdef SWAR_NUMBER_PARSING - // this helps if we have lots of decimals! - // this turns out to be frequent enough. - if (is_made_of_eight_digits_fast(p)) { - i = i * 100000000 + parse_eight_digits_unrolled(p); - p += 8; - } -#endif - while (is_integer(*p)) { - unsigned char digit = static_cast(*p - '0'); - ++p; - i = i * 10 + digit; // in rare cases, this will overflow, but that's ok - // because we have parse_highprecision_float later. - } - exponent = first_after_period - p; + if (!parse_decimal(src, p, i, exponent)) { return false; } } - int digit_count = - int(p - start_digits) - 1; // used later to guard against overflows - int64_t exp_number = 0; // exponential part + int digit_count = int(p - start_digits); // used later to guard against overflows if (('e' == *p) || ('E' == *p)) { is_float = true; ++p; - bool neg_exp = false; - if ('-' == *p) { - neg_exp = true; - ++p; - } else if ('+' == *p) { - ++p; - } - if (!is_integer(*p)) { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(src); -#endif - return false; - } - unsigned char digit = static_cast(*p - '0'); - exp_number = digit; - p++; - if (is_integer(*p)) { - digit = static_cast(*p - '0'); - exp_number = 10 * exp_number + digit; - ++p; - } - if (is_integer(*p)) { - digit = static_cast(*p - '0'); - exp_number = 10 * exp_number + digit; - ++p; - } - while (is_integer(*p)) { - if (exp_number > 0x100000000) { // we need to check for overflows - // we refuse to parse this -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(src); -#endif - return false; - } - digit = static_cast(*p - '0'); - exp_number = 10 * exp_number + digit; - ++p; - } - exponent += (neg_exp ? -exp_number : exp_number); + if (!parse_exponent(src, p, exponent)) { return false; } } if (is_float) { - // If we frequently had to deal with long strings of digits, - // we could extend our code by using a 128-bit integer instead - // of a 64-bit integer. However, this is uncommon in practice. - if (unlikely((digit_count >= 19))) { // this is uncommon - // It is possible that the integer had an overflow. - // We have to handle the case where we have 0.0000somenumber. - const char *start = start_digits; - while ((*start == '0') || (*start == '.')) { - start++; - } - // we over-decrement by one when there is a '.' - digit_count -= int(start - start_digits); - if (digit_count >= 19) { - // Ok, chances are good that we had an overflow! - // this is almost never going to get called!!! - // we start anew, going slowly!!! - // This will happen in the following examples: - // 10000000000000000000000000000000000000000000e+308 - // 3.1415926535897932384626433832795028841971693993751 - // - bool success = slow_float_parsing((const char *) src, writer); - // The number was already written, but we made a copy of the writer - // when we passed it to the parse_large_integer() function, so - writer.skip_double(); - return success; - } - } - if (unlikely(exponent < FASTFLOAT_SMALLEST_POWER) || - (exponent > FASTFLOAT_LARGEST_POWER)) { // this is uncommon!!! - // this is almost never going to get called!!! - // we start anew, going slowly!!! - bool success = slow_float_parsing((const char *) src, writer); - // The number was already written, but we made a copy of the writer when we passed it to the - // slow_float_parsing() function, so we have to skip those tape spots now that we've returned - writer.skip_double(); - return success; - } - bool success = true; - double d = compute_float_64(exponent, i, negative, &success); - if (!success) { - // we are almost never going to get here. - success = parse_float_strtod((const char *)src, &d); - } - if (success) { - writer.append_double(d); -#ifdef JSON_TEST_NUMBERS // for unit testing - found_float(d, src); -#endif - return true; - } else { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(src); -#endif - return false; + return write_float(src, negative, i, start_digits, digit_count, exponent, writer); + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + int longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } + if (digit_count == longest_digit_count) { + // Anything negative above INT64_MAX is either invalid or INT64_MIN. + if (negative && i > uint64_t(INT64_MAX)) { + // If the number is negative and can't fit in a signed integer, it's invalid. + if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } + + // If it's negative, it has to be INT64_MAX+1 now (or INT64_MIN). + // C++ can't reliably negate uint64_t INT64_MIN, it seems. Special case it. + WRITE_INTEGER(INT64_MIN, src, writer); + return is_structural_or_whitespace(*p); } + + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to 9,223,372,036,854,775,808 (INT64_MAX). + // + if (!negative && (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX))) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it doesn't fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); } else { - if (unlikely(digit_count >= 18)) { // this is uncommon!!! - // there is a good chance that we had an overflow, so we need - // need to recover: we parse the whole thing again. - bool success = parse_large_integer(src, writer, found_minus); - // The number was already written, but we made a copy of the writer - // when we passed it to the parse_large_integer() function, so - writer.skip_large_integer(); - return success; - } - i = negative ? 0 - i : i; - writer.append_s64(i); -#ifdef JSON_TEST_NUMBERS // for unit testing - found_integer(i, src); -#endif + WRITE_INTEGER(negative ? 0 - i : i, src, writer); } return is_structural_or_whitespace(*p); + #endif // SIMDJSON_SKIPNUMBERPARSING } @@ -6930,31 +6787,7 @@ public: really_inline size_t remaining_len() { return parser.len - *current_structural; } - template - really_inline bool with_space_terminated_copy(const F& f) { - /** - * We need to make a copy to make sure that the string is space terminated. - * This is not about padding the input, which should already padded up - * to len + SIMDJSON_PADDING. However, we have no control at this stage - * on how the padding was done. What if the input string was padded with nulls? - * It is quite common for an input string to have an extra null character (C string). - * We do not want to allow 9\0 (where \0 is the null character) inside a JSON - * document, but the string "9\0" by itself is fine. So we make a copy and - * pad the input with spaces when we know that there is just one input element. - * This copy is relatively expensive, but it will almost never be called in - * practice unless you are in the strange scenario where you have many JSON - * documents made of single atoms. - */ - char *copy = static_cast(malloc(parser.len + SIMDJSON_PADDING)); - if (copy == nullptr) { - return true; - } - memcpy(copy, buf, parser.len); - memset(copy + parser.len, ' ', SIMDJSON_PADDING); - bool result = f(reinterpret_cast(copy), *current_structural); - free(copy); - return result; - } + really_inline bool past_end(uint32_t n_structural_indexes) { return current_structural >= &parser.structural_indexes[n_structural_indexes]; } @@ -7236,6 +7069,31 @@ struct structural_parser : structural_iterator { return parse_number(current(), found_minus); } + really_inline bool parse_number_with_space_terminated_copy(const bool is_negative) { + /** + * We need to make a copy to make sure that the string is space terminated. + * This is not about padding the input, which should already padded up + * to len + SIMDJSON_PADDING. However, we have no control at this stage + * on how the padding was done. What if the input string was padded with nulls? + * It is quite common for an input string to have an extra null character (C string). + * We do not want to allow 9\0 (where \0 is the null character) inside a JSON + * document, but the string "9\0" by itself is fine. So we make a copy and + * pad the input with spaces when we know that there is just one input element. + * This copy is relatively expensive, but it will almost never be called in + * practice unless you are in the strange scenario where you have many JSON + * documents made of single atoms. + */ + uint8_t *copy = static_cast(malloc(parser.len + SIMDJSON_PADDING)); + if (copy == nullptr) { + return true; + } + memcpy(copy, buf, parser.len); + memset(copy + parser.len, ' ', SIMDJSON_PADDING); + size_t idx = *current_structural; + bool result = parse_number(©[idx], is_negative); // parse_number does not throw + free(copy); + return result; + } WARN_UNUSED really_inline ret_address_t parse_value(const unified_machine_addresses &addresses, ret_address_t continue_state) { switch (advance_char()) { case '"': @@ -7373,6 +7231,7 @@ struct structural_parser : structural_iterator { #undef FAIL_IF #define FAIL_IF(EXPR) { if (EXPR) { goto error; } } + template WARN_UNUSED static error_code parse_structurals(dom_parser_implementation &dom_parser, dom::document &doc) noexcept { dom_parser.doc = &doc; @@ -7418,18 +7277,16 @@ WARN_UNUSED static error_code parse_structurals(dom_parser_implementation &dom_p goto finish; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': - FAIL_IF( - parser.with_space_terminated_copy([&](const uint8_t *copy, size_t idx) { - return parser.parse_number(©[idx], false); - }) - ); + // Next line used to be an interesting functional programming exercise with + // a lambda that gets passed to another function via a closure. This would confuse the + // clangcl compiler under Visual Studio 2019 (recent release). + { if(parser.parse_number_with_space_terminated_copy(false)) { goto error; }} goto finish; case '-': - FAIL_IF( - parser.with_space_terminated_copy([&](const uint8_t *copy, size_t idx) { - return parser.parse_number(©[idx], true); - }) - ); + // Next line used to be an interesting functional programming exercise with + // a lambda that gets passed to another function via a closure. This would confuse the + // clangcl compiler under Visual Studio 2019 (recent release). + { if(parser.parse_number_with_space_terminated_copy(true)) { goto error; }} goto finish; default: parser.log_error("Document starts with a non-value character"); @@ -8180,36 +8037,6 @@ namespace simd { this->chunks[1].store(ptr+sizeof(simd8)*1); } - template - really_inline void each(F const& each_chunk) const - { - each_chunk(this->chunks[0]); - each_chunk(this->chunks[1]); - } - - template - really_inline simd8x64 map(F const& map_chunk) const { - return simd8x64( - map_chunk(this->chunks[0]), - map_chunk(this->chunks[1]) - ); - } - - - - template - really_inline simd8x64 map(const simd8x64 b, F const& map_chunk) const { - return simd8x64( - map_chunk(this->chunks[0], b.chunks[0]), - map_chunk(this->chunks[1], b.chunks[1]) - ); - } - - template - really_inline simd8 reduce(F const& reduce_pair) const { - return reduce_pair(this->chunks[0], this->chunks[1]); - } - really_inline uint64_t to_bitmask() const { uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask()); uint64_t r_hi = this->chunks[1].to_bitmask(); @@ -8218,17 +8045,26 @@ namespace simd { really_inline simd8x64 bit_or(const T m) const { const simd8 mask = simd8::splat(m); - return this->map( [&](simd8 a) { return a | mask; } ); + return simd8x64( + this->chunks[0] | mask, + this->chunks[1] | mask + ); } really_inline uint64_t eq(const T m) const { const simd8 mask = simd8::splat(m); - return this->map( [&](simd8 a) { return a == mask; } ).to_bitmask(); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask + ).to_bitmask(); } really_inline uint64_t lteq(const T m) const { const simd8 mask = simd8::splat(m); - return this->map( [&](simd8 a) { return a <= mask; } ).to_bitmask(); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask + ).to_bitmask(); } }; // struct simd8x64 @@ -8270,19 +8106,20 @@ really_inline json_character_block json_character_block::classify(const simd::si // hope that useless computations will be omitted. This is namely case when // minifying (we only need whitespace). - uint64_t whitespace = in.map([&](simd8 _in) { - return _in == simd8(_mm256_shuffle_epi8(whitespace_table, _in)); - }).to_bitmask(); - - uint64_t op = in.map([&](simd8 _in) { - // | 32 handles the fact that { } and [ ] are exactly 32 bytes apart - return (_in | 32) == simd8(_mm256_shuffle_epi8(op_table, _in-',')); - }).to_bitmask(); + uint64_t whitespace = simd8x64( + in.chunks[0] == simd8(_mm256_shuffle_epi8(whitespace_table, in.chunks[0])), + in.chunks[1] == simd8(_mm256_shuffle_epi8(whitespace_table, in.chunks[1])) + ).to_bitmask(); + + uint64_t op = simd8x64( + (in.chunks[0] | 32) == simd8(_mm256_shuffle_epi8(op_table, in.chunks[0]-',')), + (in.chunks[1] | 32) == simd8(_mm256_shuffle_epi8(op_table, in.chunks[1]-',')) + ).to_bitmask(); return { whitespace, op }; } really_inline bool is_ascii(simd8x64 input) { - simd8 bits = input.reduce([&](simd8 a,simd8 b) { return a|b; }); + simd8 bits = (input.chunks[0] | input.chunks[1]); return !bits.any_bits_set_anywhere(0b10000000u); } @@ -9508,14 +9345,25 @@ static inline uint32_t parse_eight_digits_unrolled(const char *chars) { namespace stage2 { namespace numberparsing { +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), false) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), writer.append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), writer.append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), writer.append_double((VALUE))) +#else +#define INVALID_NUMBER(SRC) (false) +#define WRITE_INTEGER(VALUE, SRC, WRITER) writer.append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) writer.append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) writer.append_double((VALUE)) +#endif + // Attempts to compute i * 10^(power) exactly; and if "negative" is // true, negate the result. // This function will only work in some cases, when it does not work, success is // set to false. This should work *most of the time* (like 99% of the time). // We assume that power is in the [FASTFLOAT_SMALLEST_POWER, // FASTFLOAT_LARGEST_POWER] interval: the caller is responsible for this check. -really_inline double compute_float_64(int64_t power, uint64_t i, bool negative, - bool *success) { +really_inline double compute_float_64(int64_t power, uint64_t i, bool negative, bool *success) { // we start with a fast path // It was described in // Clinger WD. How to read floating point numbers accurately. @@ -9748,109 +9596,132 @@ really_inline bool is_made_of_eight_digits_fast(const char *chars) { 0x3333333333333333); } -// called by parse_number when we know that the output is an integer, -// but where there might be some integer overflow. -// we want to catch overflows! -// Do not call this function directly as it skips some of the checks from -// parse_number -// -// This function will almost never be called!!! -// -template -never_inline bool parse_large_integer(const uint8_t *const src, - W writer, - bool found_minus) { - const char *p = reinterpret_cast(src); - - bool negative = false; - if (found_minus) { - ++p; - negative = true; - } - uint64_t i; - if (*p == '0') { // 0 cannot be followed by an integer - ++p; - i = 0; - } else { - unsigned char digit = static_cast(*p - '0'); - i = digit; - p++; - // the is_made_of_eight_digits_fast routine is unlikely to help here because - // we rarely see large integer parts like 123456789 - while (is_integer(*p)) { - digit = static_cast(*p - '0'); - if (mul_overflow(i, 10, &i)) { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(src); -#endif - return false; // overflow - } - if (add_overflow(i, digit, &i)) { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(src); -#endif - return false; // overflow - } - ++p; - } - } - if (negative) { - if (i > 0x8000000000000000) { - // overflows! -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(src); -#endif - return false; // overflow - } else if (i == 0x8000000000000000) { - // In two's complement, we cannot represent 0x8000000000000000 - // as a positive signed integer, but the negative version is - // possible. - constexpr int64_t signed_answer = INT64_MIN; - writer.append_s64(signed_answer); -#ifdef JSON_TEST_NUMBERS // for unit testing - found_integer(signed_answer, src); -#endif - } else { - // we can negate safely - int64_t signed_answer = -static_cast(i); - writer.append_s64(signed_answer); -#ifdef JSON_TEST_NUMBERS // for unit testing - found_integer(signed_answer, src); -#endif - } - } else { - // we have a positive integer, the contract is that - // we try to represent it as a signed integer and only - // fallback on unsigned integers if absolutely necessary. - if (i < 0x8000000000000000) { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_integer(i, src); -#endif - writer.append_s64(i); - } else { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_unsigned_integer(i, src); -#endif - writer.append_u64(i); - } - } - return is_structural_or_whitespace(*p); -} - template bool slow_float_parsing(UNUSED const char * src, W writer) { double d; if (parse_float_strtod(src, &d)) { - writer.append_double(d); -#ifdef JSON_TEST_NUMBERS // for unit testing - found_float(d, (const uint8_t *)src); -#endif + WRITE_DOUBLE(d, (const uint8_t *)src, writer); return true; } -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number((const uint8_t *)src); + return INVALID_NUMBER((const uint8_t *)src); +} + +really_inline bool parse_decimal(UNUSED const uint8_t *const src, const char *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const char *const first_after_period = p; + if (!is_integer(*p)) { return INVALID_NUMBER(src); } // There must be at least one digit after the . + + unsigned char digit = static_cast(*p - '0'); + ++p; + i = i * 10 + digit; // might overflow + multiplication by 10 is likely + // cheaper than arbitrary mult. + // we will handle the overflow later +#ifdef SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } #endif - return false; + while (is_integer(*p)) { + digit = static_cast(*p - '0'); + ++p; + i = i * 10 + digit; // in rare cases, this will overflow, but that's ok + // because we have parse_highprecision_float later. + } + exponent = first_after_period - p; + return true; +} + +really_inline bool parse_exponent(UNUSED const uint8_t *const src, const char *&p, int64_t &exponent) { + bool neg_exp = false; + if ('-' == *p) { + neg_exp = true; + ++p; + } else if ('+' == *p) { + ++p; + } + + // e[+-] must be followed by a number + if (!is_integer(*p)) { return INVALID_NUMBER(src); } + unsigned char digit = static_cast(*p - '0'); + int64_t exp_number = digit; + p++; + if (is_integer(*p)) { + digit = static_cast(*p - '0'); + exp_number = 10 * exp_number + digit; + ++p; + } + if (is_integer(*p)) { + digit = static_cast(*p - '0'); + exp_number = 10 * exp_number + digit; + ++p; + } + while (is_integer(*p)) { + // we need to check for overflows; we refuse to parse this + if (exp_number > 0x100000000) { return INVALID_NUMBER(src); } + digit = static_cast(*p - '0'); + exp_number = 10 * exp_number + digit; + ++p; + } + exponent += (neg_exp ? -exp_number : exp_number); + return true; +} + +template +really_inline bool write_float(const uint8_t *const src, bool negative, uint64_t i, const char * start_digits, int digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // digit count is off by 1 because of the decimal (assuming there was one). + if (unlikely((digit_count-1 >= 19))) { // this is uncommon + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const char *start = start_digits; + while ((*start == '0') || (*start == '.')) { + start++; + } + // we over-decrement by one when there is a '.' + digit_count -= int(start - start_digits); + if (digit_count >= 19) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + bool success = slow_float_parsing((const char *) src, writer); + // The number was already written, but we made a copy of the writer + // when we passed it to the parse_large_integer() function, so + writer.skip_double(); + return success; + } + } + // NOTE: it's weird that the unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (unlikely(exponent < FASTFLOAT_SMALLEST_POWER) || (exponent > FASTFLOAT_LARGEST_POWER)) { + // this is almost never going to get called!!! + // we start anew, going slowly!!! + bool success = slow_float_parsing((const char *) src, writer); + // The number was already written, but we made a copy of the writer when we passed it to the + // slow_float_parsing() function, so we have to skip those tape spots now that we've returned + writer.skip_double(); + return success; + } + bool success = true; + double d = compute_float_64(exponent, i, negative, &success); + if (!success) { + // we are almost never going to get here. + if (!parse_float_strtod((const char *)src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return true; } // parse the number at src @@ -9876,32 +9747,22 @@ really_inline bool parse_number(UNUSED const uint8_t *const src, if (found_minus) { ++p; negative = true; - if (!is_integer(*p)) { // a negative sign must be followed by an integer -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(src); -#endif - return false; - } + // a negative sign must be followed by an integer + if (!is_integer(*p)) { return INVALID_NUMBER(src); } } const char *const start_digits = p; uint64_t i; // an unsigned int avoids signed overflows (which are bad) - if (*p == '0') { // 0 cannot be followed by an integer + if (*p == '0') { ++p; - if (is_integer(*p)) { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(src); -#endif - return false; - } + if (is_integer(*p)) { return INVALID_NUMBER(src); } // 0 cannot be followed by an integer i = 0; } else { - if (!(is_integer(*p))) { // must start with an integer -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(src); -#endif - return false; - } + // NOTE: This is a redundant check--either we're negative, in which case we checked whether this + // is a digit above, or the caller already determined we start with a digit. But removing this + // check seems to make things slower: https://github.com/simdjson/simdjson/pull/990#discussion_r448512448 + // Please do try yourself, or think of ways to explain it--we'd love to understand :) + if (!is_integer(*p)) { return INVALID_NUMBER(src); } // must start with an integer unsigned char digit = static_cast(*p - '0'); i = digit; p++; @@ -9915,163 +9776,67 @@ really_inline bool parse_number(UNUSED const uint8_t *const src, ++p; } } + + // + // Handle floats if there is a . or e (or both) + // int64_t exponent = 0; bool is_float = false; if ('.' == *p) { - is_float = true; // At this point we know that we have a float - // we continue with the fiction that we have an integer. If the - // floating point number is representable as x * 10^z for some integer - // z that fits in 53 bits, then we will be able to convert back the - // the integer into a float in a lossless manner. + is_float = true; ++p; - const char *const first_after_period = p; - if (is_integer(*p)) { - unsigned char digit = static_cast(*p - '0'); - ++p; - i = i * 10 + digit; // might overflow + multiplication by 10 is likely - // cheaper than arbitrary mult. - // we will handle the overflow later - } else { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(src); -#endif - return false; - } -#ifdef SWAR_NUMBER_PARSING - // this helps if we have lots of decimals! - // this turns out to be frequent enough. - if (is_made_of_eight_digits_fast(p)) { - i = i * 100000000 + parse_eight_digits_unrolled(p); - p += 8; - } -#endif - while (is_integer(*p)) { - unsigned char digit = static_cast(*p - '0'); - ++p; - i = i * 10 + digit; // in rare cases, this will overflow, but that's ok - // because we have parse_highprecision_float later. - } - exponent = first_after_period - p; + if (!parse_decimal(src, p, i, exponent)) { return false; } } - int digit_count = - int(p - start_digits) - 1; // used later to guard against overflows - int64_t exp_number = 0; // exponential part + int digit_count = int(p - start_digits); // used later to guard against overflows if (('e' == *p) || ('E' == *p)) { is_float = true; ++p; - bool neg_exp = false; - if ('-' == *p) { - neg_exp = true; - ++p; - } else if ('+' == *p) { - ++p; - } - if (!is_integer(*p)) { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(src); -#endif - return false; - } - unsigned char digit = static_cast(*p - '0'); - exp_number = digit; - p++; - if (is_integer(*p)) { - digit = static_cast(*p - '0'); - exp_number = 10 * exp_number + digit; - ++p; - } - if (is_integer(*p)) { - digit = static_cast(*p - '0'); - exp_number = 10 * exp_number + digit; - ++p; - } - while (is_integer(*p)) { - if (exp_number > 0x100000000) { // we need to check for overflows - // we refuse to parse this -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(src); -#endif - return false; - } - digit = static_cast(*p - '0'); - exp_number = 10 * exp_number + digit; - ++p; - } - exponent += (neg_exp ? -exp_number : exp_number); + if (!parse_exponent(src, p, exponent)) { return false; } } if (is_float) { - // If we frequently had to deal with long strings of digits, - // we could extend our code by using a 128-bit integer instead - // of a 64-bit integer. However, this is uncommon in practice. - if (unlikely((digit_count >= 19))) { // this is uncommon - // It is possible that the integer had an overflow. - // We have to handle the case where we have 0.0000somenumber. - const char *start = start_digits; - while ((*start == '0') || (*start == '.')) { - start++; - } - // we over-decrement by one when there is a '.' - digit_count -= int(start - start_digits); - if (digit_count >= 19) { - // Ok, chances are good that we had an overflow! - // this is almost never going to get called!!! - // we start anew, going slowly!!! - // This will happen in the following examples: - // 10000000000000000000000000000000000000000000e+308 - // 3.1415926535897932384626433832795028841971693993751 - // - bool success = slow_float_parsing((const char *) src, writer); - // The number was already written, but we made a copy of the writer - // when we passed it to the parse_large_integer() function, so - writer.skip_double(); - return success; - } - } - if (unlikely(exponent < FASTFLOAT_SMALLEST_POWER) || - (exponent > FASTFLOAT_LARGEST_POWER)) { // this is uncommon!!! - // this is almost never going to get called!!! - // we start anew, going slowly!!! - bool success = slow_float_parsing((const char *) src, writer); - // The number was already written, but we made a copy of the writer when we passed it to the - // slow_float_parsing() function, so we have to skip those tape spots now that we've returned - writer.skip_double(); - return success; - } - bool success = true; - double d = compute_float_64(exponent, i, negative, &success); - if (!success) { - // we are almost never going to get here. - success = parse_float_strtod((const char *)src, &d); - } - if (success) { - writer.append_double(d); -#ifdef JSON_TEST_NUMBERS // for unit testing - found_float(d, src); -#endif - return true; - } else { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(src); -#endif - return false; + return write_float(src, negative, i, start_digits, digit_count, exponent, writer); + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + int longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } + if (digit_count == longest_digit_count) { + // Anything negative above INT64_MAX is either invalid or INT64_MIN. + if (negative && i > uint64_t(INT64_MAX)) { + // If the number is negative and can't fit in a signed integer, it's invalid. + if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } + + // If it's negative, it has to be INT64_MAX+1 now (or INT64_MIN). + // C++ can't reliably negate uint64_t INT64_MIN, it seems. Special case it. + WRITE_INTEGER(INT64_MIN, src, writer); + return is_structural_or_whitespace(*p); } + + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to 9,223,372,036,854,775,808 (INT64_MAX). + // + if (!negative && (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX))) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it doesn't fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); } else { - if (unlikely(digit_count >= 18)) { // this is uncommon!!! - // there is a good chance that we had an overflow, so we need - // need to recover: we parse the whole thing again. - bool success = parse_large_integer(src, writer, found_minus); - // The number was already written, but we made a copy of the writer - // when we passed it to the parse_large_integer() function, so - writer.skip_large_integer(); - return success; - } - i = negative ? 0 - i : i; - writer.append_s64(i); -#ifdef JSON_TEST_NUMBERS // for unit testing - found_integer(i, src); -#endif + WRITE_INTEGER(negative ? 0 - i : i, src, writer); } return is_structural_or_whitespace(*p); + #endif // SIMDJSON_SKIPNUMBERPARSING } @@ -10244,31 +10009,7 @@ public: really_inline size_t remaining_len() { return parser.len - *current_structural; } - template - really_inline bool with_space_terminated_copy(const F& f) { - /** - * We need to make a copy to make sure that the string is space terminated. - * This is not about padding the input, which should already padded up - * to len + SIMDJSON_PADDING. However, we have no control at this stage - * on how the padding was done. What if the input string was padded with nulls? - * It is quite common for an input string to have an extra null character (C string). - * We do not want to allow 9\0 (where \0 is the null character) inside a JSON - * document, but the string "9\0" by itself is fine. So we make a copy and - * pad the input with spaces when we know that there is just one input element. - * This copy is relatively expensive, but it will almost never be called in - * practice unless you are in the strange scenario where you have many JSON - * documents made of single atoms. - */ - char *copy = static_cast(malloc(parser.len + SIMDJSON_PADDING)); - if (copy == nullptr) { - return true; - } - memcpy(copy, buf, parser.len); - memset(copy + parser.len, ' ', SIMDJSON_PADDING); - bool result = f(reinterpret_cast(copy), *current_structural); - free(copy); - return result; - } + really_inline bool past_end(uint32_t n_structural_indexes) { return current_structural >= &parser.structural_indexes[n_structural_indexes]; } @@ -10550,6 +10291,31 @@ struct structural_parser : structural_iterator { return parse_number(current(), found_minus); } + really_inline bool parse_number_with_space_terminated_copy(const bool is_negative) { + /** + * We need to make a copy to make sure that the string is space terminated. + * This is not about padding the input, which should already padded up + * to len + SIMDJSON_PADDING. However, we have no control at this stage + * on how the padding was done. What if the input string was padded with nulls? + * It is quite common for an input string to have an extra null character (C string). + * We do not want to allow 9\0 (where \0 is the null character) inside a JSON + * document, but the string "9\0" by itself is fine. So we make a copy and + * pad the input with spaces when we know that there is just one input element. + * This copy is relatively expensive, but it will almost never be called in + * practice unless you are in the strange scenario where you have many JSON + * documents made of single atoms. + */ + uint8_t *copy = static_cast(malloc(parser.len + SIMDJSON_PADDING)); + if (copy == nullptr) { + return true; + } + memcpy(copy, buf, parser.len); + memset(copy + parser.len, ' ', SIMDJSON_PADDING); + size_t idx = *current_structural; + bool result = parse_number(©[idx], is_negative); // parse_number does not throw + free(copy); + return result; + } WARN_UNUSED really_inline ret_address_t parse_value(const unified_machine_addresses &addresses, ret_address_t continue_state) { switch (advance_char()) { case '"': @@ -10687,6 +10453,7 @@ struct structural_parser : structural_iterator { #undef FAIL_IF #define FAIL_IF(EXPR) { if (EXPR) { goto error; } } + template WARN_UNUSED static error_code parse_structurals(dom_parser_implementation &dom_parser, dom::document &doc) noexcept { dom_parser.doc = &doc; @@ -10732,18 +10499,16 @@ WARN_UNUSED static error_code parse_structurals(dom_parser_implementation &dom_p goto finish; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': - FAIL_IF( - parser.with_space_terminated_copy([&](const uint8_t *copy, size_t idx) { - return parser.parse_number(©[idx], false); - }) - ); + // Next line used to be an interesting functional programming exercise with + // a lambda that gets passed to another function via a closure. This would confuse the + // clangcl compiler under Visual Studio 2019 (recent release). + { if(parser.parse_number_with_space_terminated_copy(false)) { goto error; }} goto finish; case '-': - FAIL_IF( - parser.with_space_terminated_copy([&](const uint8_t *copy, size_t idx) { - return parser.parse_number(©[idx], true); - }) - ); + // Next line used to be an interesting functional programming exercise with + // a lambda that gets passed to another function via a closure. This would confuse the + // clangcl compiler under Visual Studio 2019 (recent release). + { if(parser.parse_number_with_space_terminated_copy(true)) { goto error; }} goto finish; default: parser.log_error("Document starts with a non-value character"); @@ -11453,43 +11218,6 @@ namespace simd { each(3); } - template - really_inline void each(F const& each_chunk) const - { - each_chunk(this->chunks[0]); - each_chunk(this->chunks[1]); - each_chunk(this->chunks[2]); - each_chunk(this->chunks[3]); - } - - template - really_inline simd8x64 map(F const& map_chunk) const { - return simd8x64( - map_chunk(this->chunks[0]), - map_chunk(this->chunks[1]), - map_chunk(this->chunks[2]), - map_chunk(this->chunks[3]) - ); - } - - template - really_inline simd8x64 map(const simd8x64 b, F const& map_chunk) const { - return simd8x64( - map_chunk(this->chunks[0], b.chunks[0]), - map_chunk(this->chunks[1], b.chunks[1]), - map_chunk(this->chunks[2], b.chunks[2]), - map_chunk(this->chunks[3], b.chunks[3]) - ); - } - - template - really_inline simd8 reduce(F const& reduce_pair) const { - return reduce_pair( - reduce_pair(this->chunks[0], this->chunks[1]), - reduce_pair(this->chunks[2], this->chunks[3]) - ); - } - really_inline uint64_t to_bitmask() const { uint64_t r0 = uint32_t(this->chunks[0].to_bitmask()); uint64_t r1 = this->chunks[1].to_bitmask(); @@ -11500,17 +11228,32 @@ namespace simd { really_inline simd8x64 bit_or(const T m) const { const simd8 mask = simd8::splat(m); - return this->map( [&](simd8 a) { return a | mask; } ); + return simd8x64( + this->chunks[0] | mask, + this->chunks[1] | mask, + this->chunks[2] | mask, + this->chunks[3] | mask + ); } really_inline uint64_t eq(const T m) const { const simd8 mask = simd8::splat(m); - return this->map( [&](simd8 a) { return a == mask; } ).to_bitmask(); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); } really_inline uint64_t lteq(const T m) const { const simd8 mask = simd8::splat(m); - return this->map( [&](simd8 a) { return a <= mask; } ).to_bitmask(); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); } }; // struct simd8x64 @@ -11553,19 +11296,25 @@ really_inline json_character_block json_character_block::classify(const simd::si // hope that useless computations will be omitted. This is namely case when // minifying (we only need whitespace). - uint64_t whitespace = in.map([&](simd8 _in) { - return _in == simd8(_mm_shuffle_epi8(whitespace_table, _in)); - }).to_bitmask(); + uint64_t whitespace = simd8x64( + in.chunks[0] == simd8(_mm_shuffle_epi8(whitespace_table, in.chunks[0])), + in.chunks[1] == simd8(_mm_shuffle_epi8(whitespace_table, in.chunks[1])), + in.chunks[2] == simd8(_mm_shuffle_epi8(whitespace_table, in.chunks[2])), + in.chunks[3] == simd8(_mm_shuffle_epi8(whitespace_table, in.chunks[3])) + ).to_bitmask(); - uint64_t op = in.map([&](simd8 _in) { - // | 32 handles the fact that { } and [ ] are exactly 32 bytes apart - return (_in | 32) == simd8(_mm_shuffle_epi8(op_table, _in-',')); - }).to_bitmask(); + // | 32 handles the fact that { } and [ ] are exactly 32 bytes apart + uint64_t op = simd8x64( + (in.chunks[0] | 32) == simd8(_mm_shuffle_epi8(op_table, in.chunks[0]-',')), + (in.chunks[1] | 32) == simd8(_mm_shuffle_epi8(op_table, in.chunks[1]-',')), + (in.chunks[2] | 32) == simd8(_mm_shuffle_epi8(op_table, in.chunks[2]-',')), + (in.chunks[3] | 32) == simd8(_mm_shuffle_epi8(op_table, in.chunks[3]-',')) + ).to_bitmask(); return { whitespace, op }; } really_inline bool is_ascii(simd8x64 input) { - simd8 bits = input.reduce([&](simd8 a,simd8 b) { return a|b; }); + simd8 bits = (input.chunks[0] | input.chunks[1]) | (input.chunks[2] | input.chunks[3]); return !bits.any_bits_set_anywhere(0b10000000u); } @@ -12794,14 +12543,25 @@ static inline uint32_t parse_eight_digits_unrolled(const char *chars) { namespace stage2 { namespace numberparsing { +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), false) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), writer.append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), writer.append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), writer.append_double((VALUE))) +#else +#define INVALID_NUMBER(SRC) (false) +#define WRITE_INTEGER(VALUE, SRC, WRITER) writer.append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) writer.append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) writer.append_double((VALUE)) +#endif + // Attempts to compute i * 10^(power) exactly; and if "negative" is // true, negate the result. // This function will only work in some cases, when it does not work, success is // set to false. This should work *most of the time* (like 99% of the time). // We assume that power is in the [FASTFLOAT_SMALLEST_POWER, // FASTFLOAT_LARGEST_POWER] interval: the caller is responsible for this check. -really_inline double compute_float_64(int64_t power, uint64_t i, bool negative, - bool *success) { +really_inline double compute_float_64(int64_t power, uint64_t i, bool negative, bool *success) { // we start with a fast path // It was described in // Clinger WD. How to read floating point numbers accurately. @@ -13034,109 +12794,132 @@ really_inline bool is_made_of_eight_digits_fast(const char *chars) { 0x3333333333333333); } -// called by parse_number when we know that the output is an integer, -// but where there might be some integer overflow. -// we want to catch overflows! -// Do not call this function directly as it skips some of the checks from -// parse_number -// -// This function will almost never be called!!! -// -template -never_inline bool parse_large_integer(const uint8_t *const src, - W writer, - bool found_minus) { - const char *p = reinterpret_cast(src); - - bool negative = false; - if (found_minus) { - ++p; - negative = true; - } - uint64_t i; - if (*p == '0') { // 0 cannot be followed by an integer - ++p; - i = 0; - } else { - unsigned char digit = static_cast(*p - '0'); - i = digit; - p++; - // the is_made_of_eight_digits_fast routine is unlikely to help here because - // we rarely see large integer parts like 123456789 - while (is_integer(*p)) { - digit = static_cast(*p - '0'); - if (mul_overflow(i, 10, &i)) { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(src); -#endif - return false; // overflow - } - if (add_overflow(i, digit, &i)) { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(src); -#endif - return false; // overflow - } - ++p; - } - } - if (negative) { - if (i > 0x8000000000000000) { - // overflows! -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(src); -#endif - return false; // overflow - } else if (i == 0x8000000000000000) { - // In two's complement, we cannot represent 0x8000000000000000 - // as a positive signed integer, but the negative version is - // possible. - constexpr int64_t signed_answer = INT64_MIN; - writer.append_s64(signed_answer); -#ifdef JSON_TEST_NUMBERS // for unit testing - found_integer(signed_answer, src); -#endif - } else { - // we can negate safely - int64_t signed_answer = -static_cast(i); - writer.append_s64(signed_answer); -#ifdef JSON_TEST_NUMBERS // for unit testing - found_integer(signed_answer, src); -#endif - } - } else { - // we have a positive integer, the contract is that - // we try to represent it as a signed integer and only - // fallback on unsigned integers if absolutely necessary. - if (i < 0x8000000000000000) { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_integer(i, src); -#endif - writer.append_s64(i); - } else { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_unsigned_integer(i, src); -#endif - writer.append_u64(i); - } - } - return is_structural_or_whitespace(*p); -} - template bool slow_float_parsing(UNUSED const char * src, W writer) { double d; if (parse_float_strtod(src, &d)) { - writer.append_double(d); -#ifdef JSON_TEST_NUMBERS // for unit testing - found_float(d, (const uint8_t *)src); -#endif + WRITE_DOUBLE(d, (const uint8_t *)src, writer); return true; } -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number((const uint8_t *)src); + return INVALID_NUMBER((const uint8_t *)src); +} + +really_inline bool parse_decimal(UNUSED const uint8_t *const src, const char *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const char *const first_after_period = p; + if (!is_integer(*p)) { return INVALID_NUMBER(src); } // There must be at least one digit after the . + + unsigned char digit = static_cast(*p - '0'); + ++p; + i = i * 10 + digit; // might overflow + multiplication by 10 is likely + // cheaper than arbitrary mult. + // we will handle the overflow later +#ifdef SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } #endif - return false; + while (is_integer(*p)) { + digit = static_cast(*p - '0'); + ++p; + i = i * 10 + digit; // in rare cases, this will overflow, but that's ok + // because we have parse_highprecision_float later. + } + exponent = first_after_period - p; + return true; +} + +really_inline bool parse_exponent(UNUSED const uint8_t *const src, const char *&p, int64_t &exponent) { + bool neg_exp = false; + if ('-' == *p) { + neg_exp = true; + ++p; + } else if ('+' == *p) { + ++p; + } + + // e[+-] must be followed by a number + if (!is_integer(*p)) { return INVALID_NUMBER(src); } + unsigned char digit = static_cast(*p - '0'); + int64_t exp_number = digit; + p++; + if (is_integer(*p)) { + digit = static_cast(*p - '0'); + exp_number = 10 * exp_number + digit; + ++p; + } + if (is_integer(*p)) { + digit = static_cast(*p - '0'); + exp_number = 10 * exp_number + digit; + ++p; + } + while (is_integer(*p)) { + // we need to check for overflows; we refuse to parse this + if (exp_number > 0x100000000) { return INVALID_NUMBER(src); } + digit = static_cast(*p - '0'); + exp_number = 10 * exp_number + digit; + ++p; + } + exponent += (neg_exp ? -exp_number : exp_number); + return true; +} + +template +really_inline bool write_float(const uint8_t *const src, bool negative, uint64_t i, const char * start_digits, int digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // digit count is off by 1 because of the decimal (assuming there was one). + if (unlikely((digit_count-1 >= 19))) { // this is uncommon + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const char *start = start_digits; + while ((*start == '0') || (*start == '.')) { + start++; + } + // we over-decrement by one when there is a '.' + digit_count -= int(start - start_digits); + if (digit_count >= 19) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + bool success = slow_float_parsing((const char *) src, writer); + // The number was already written, but we made a copy of the writer + // when we passed it to the parse_large_integer() function, so + writer.skip_double(); + return success; + } + } + // NOTE: it's weird that the unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (unlikely(exponent < FASTFLOAT_SMALLEST_POWER) || (exponent > FASTFLOAT_LARGEST_POWER)) { + // this is almost never going to get called!!! + // we start anew, going slowly!!! + bool success = slow_float_parsing((const char *) src, writer); + // The number was already written, but we made a copy of the writer when we passed it to the + // slow_float_parsing() function, so we have to skip those tape spots now that we've returned + writer.skip_double(); + return success; + } + bool success = true; + double d = compute_float_64(exponent, i, negative, &success); + if (!success) { + // we are almost never going to get here. + if (!parse_float_strtod((const char *)src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return true; } // parse the number at src @@ -13162,32 +12945,22 @@ really_inline bool parse_number(UNUSED const uint8_t *const src, if (found_minus) { ++p; negative = true; - if (!is_integer(*p)) { // a negative sign must be followed by an integer -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(src); -#endif - return false; - } + // a negative sign must be followed by an integer + if (!is_integer(*p)) { return INVALID_NUMBER(src); } } const char *const start_digits = p; uint64_t i; // an unsigned int avoids signed overflows (which are bad) - if (*p == '0') { // 0 cannot be followed by an integer + if (*p == '0') { ++p; - if (is_integer(*p)) { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(src); -#endif - return false; - } + if (is_integer(*p)) { return INVALID_NUMBER(src); } // 0 cannot be followed by an integer i = 0; } else { - if (!(is_integer(*p))) { // must start with an integer -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(src); -#endif - return false; - } + // NOTE: This is a redundant check--either we're negative, in which case we checked whether this + // is a digit above, or the caller already determined we start with a digit. But removing this + // check seems to make things slower: https://github.com/simdjson/simdjson/pull/990#discussion_r448512448 + // Please do try yourself, or think of ways to explain it--we'd love to understand :) + if (!is_integer(*p)) { return INVALID_NUMBER(src); } // must start with an integer unsigned char digit = static_cast(*p - '0'); i = digit; p++; @@ -13201,163 +12974,67 @@ really_inline bool parse_number(UNUSED const uint8_t *const src, ++p; } } + + // + // Handle floats if there is a . or e (or both) + // int64_t exponent = 0; bool is_float = false; if ('.' == *p) { - is_float = true; // At this point we know that we have a float - // we continue with the fiction that we have an integer. If the - // floating point number is representable as x * 10^z for some integer - // z that fits in 53 bits, then we will be able to convert back the - // the integer into a float in a lossless manner. + is_float = true; ++p; - const char *const first_after_period = p; - if (is_integer(*p)) { - unsigned char digit = static_cast(*p - '0'); - ++p; - i = i * 10 + digit; // might overflow + multiplication by 10 is likely - // cheaper than arbitrary mult. - // we will handle the overflow later - } else { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(src); -#endif - return false; - } -#ifdef SWAR_NUMBER_PARSING - // this helps if we have lots of decimals! - // this turns out to be frequent enough. - if (is_made_of_eight_digits_fast(p)) { - i = i * 100000000 + parse_eight_digits_unrolled(p); - p += 8; - } -#endif - while (is_integer(*p)) { - unsigned char digit = static_cast(*p - '0'); - ++p; - i = i * 10 + digit; // in rare cases, this will overflow, but that's ok - // because we have parse_highprecision_float later. - } - exponent = first_after_period - p; + if (!parse_decimal(src, p, i, exponent)) { return false; } } - int digit_count = - int(p - start_digits) - 1; // used later to guard against overflows - int64_t exp_number = 0; // exponential part + int digit_count = int(p - start_digits); // used later to guard against overflows if (('e' == *p) || ('E' == *p)) { is_float = true; ++p; - bool neg_exp = false; - if ('-' == *p) { - neg_exp = true; - ++p; - } else if ('+' == *p) { - ++p; - } - if (!is_integer(*p)) { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(src); -#endif - return false; - } - unsigned char digit = static_cast(*p - '0'); - exp_number = digit; - p++; - if (is_integer(*p)) { - digit = static_cast(*p - '0'); - exp_number = 10 * exp_number + digit; - ++p; - } - if (is_integer(*p)) { - digit = static_cast(*p - '0'); - exp_number = 10 * exp_number + digit; - ++p; - } - while (is_integer(*p)) { - if (exp_number > 0x100000000) { // we need to check for overflows - // we refuse to parse this -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(src); -#endif - return false; - } - digit = static_cast(*p - '0'); - exp_number = 10 * exp_number + digit; - ++p; - } - exponent += (neg_exp ? -exp_number : exp_number); + if (!parse_exponent(src, p, exponent)) { return false; } } if (is_float) { - // If we frequently had to deal with long strings of digits, - // we could extend our code by using a 128-bit integer instead - // of a 64-bit integer. However, this is uncommon in practice. - if (unlikely((digit_count >= 19))) { // this is uncommon - // It is possible that the integer had an overflow. - // We have to handle the case where we have 0.0000somenumber. - const char *start = start_digits; - while ((*start == '0') || (*start == '.')) { - start++; - } - // we over-decrement by one when there is a '.' - digit_count -= int(start - start_digits); - if (digit_count >= 19) { - // Ok, chances are good that we had an overflow! - // this is almost never going to get called!!! - // we start anew, going slowly!!! - // This will happen in the following examples: - // 10000000000000000000000000000000000000000000e+308 - // 3.1415926535897932384626433832795028841971693993751 - // - bool success = slow_float_parsing((const char *) src, writer); - // The number was already written, but we made a copy of the writer - // when we passed it to the parse_large_integer() function, so - writer.skip_double(); - return success; - } - } - if (unlikely(exponent < FASTFLOAT_SMALLEST_POWER) || - (exponent > FASTFLOAT_LARGEST_POWER)) { // this is uncommon!!! - // this is almost never going to get called!!! - // we start anew, going slowly!!! - bool success = slow_float_parsing((const char *) src, writer); - // The number was already written, but we made a copy of the writer when we passed it to the - // slow_float_parsing() function, so we have to skip those tape spots now that we've returned - writer.skip_double(); - return success; - } - bool success = true; - double d = compute_float_64(exponent, i, negative, &success); - if (!success) { - // we are almost never going to get here. - success = parse_float_strtod((const char *)src, &d); - } - if (success) { - writer.append_double(d); -#ifdef JSON_TEST_NUMBERS // for unit testing - found_float(d, src); -#endif - return true; - } else { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(src); -#endif - return false; + return write_float(src, negative, i, start_digits, digit_count, exponent, writer); + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + int longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } + if (digit_count == longest_digit_count) { + // Anything negative above INT64_MAX is either invalid or INT64_MIN. + if (negative && i > uint64_t(INT64_MAX)) { + // If the number is negative and can't fit in a signed integer, it's invalid. + if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } + + // If it's negative, it has to be INT64_MAX+1 now (or INT64_MIN). + // C++ can't reliably negate uint64_t INT64_MIN, it seems. Special case it. + WRITE_INTEGER(INT64_MIN, src, writer); + return is_structural_or_whitespace(*p); } + + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to 9,223,372,036,854,775,808 (INT64_MAX). + // + if (!negative && (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX))) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it doesn't fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); } else { - if (unlikely(digit_count >= 18)) { // this is uncommon!!! - // there is a good chance that we had an overflow, so we need - // need to recover: we parse the whole thing again. - bool success = parse_large_integer(src, writer, found_minus); - // The number was already written, but we made a copy of the writer - // when we passed it to the parse_large_integer() function, so - writer.skip_large_integer(); - return success; - } - i = negative ? 0 - i : i; - writer.append_s64(i); -#ifdef JSON_TEST_NUMBERS // for unit testing - found_integer(i, src); -#endif + WRITE_INTEGER(negative ? 0 - i : i, src, writer); } return is_structural_or_whitespace(*p); + #endif // SIMDJSON_SKIPNUMBERPARSING } @@ -13530,31 +13207,7 @@ public: really_inline size_t remaining_len() { return parser.len - *current_structural; } - template - really_inline bool with_space_terminated_copy(const F& f) { - /** - * We need to make a copy to make sure that the string is space terminated. - * This is not about padding the input, which should already padded up - * to len + SIMDJSON_PADDING. However, we have no control at this stage - * on how the padding was done. What if the input string was padded with nulls? - * It is quite common for an input string to have an extra null character (C string). - * We do not want to allow 9\0 (where \0 is the null character) inside a JSON - * document, but the string "9\0" by itself is fine. So we make a copy and - * pad the input with spaces when we know that there is just one input element. - * This copy is relatively expensive, but it will almost never be called in - * practice unless you are in the strange scenario where you have many JSON - * documents made of single atoms. - */ - char *copy = static_cast(malloc(parser.len + SIMDJSON_PADDING)); - if (copy == nullptr) { - return true; - } - memcpy(copy, buf, parser.len); - memset(copy + parser.len, ' ', SIMDJSON_PADDING); - bool result = f(reinterpret_cast(copy), *current_structural); - free(copy); - return result; - } + really_inline bool past_end(uint32_t n_structural_indexes) { return current_structural >= &parser.structural_indexes[n_structural_indexes]; } @@ -13836,6 +13489,31 @@ struct structural_parser : structural_iterator { return parse_number(current(), found_minus); } + really_inline bool parse_number_with_space_terminated_copy(const bool is_negative) { + /** + * We need to make a copy to make sure that the string is space terminated. + * This is not about padding the input, which should already padded up + * to len + SIMDJSON_PADDING. However, we have no control at this stage + * on how the padding was done. What if the input string was padded with nulls? + * It is quite common for an input string to have an extra null character (C string). + * We do not want to allow 9\0 (where \0 is the null character) inside a JSON + * document, but the string "9\0" by itself is fine. So we make a copy and + * pad the input with spaces when we know that there is just one input element. + * This copy is relatively expensive, but it will almost never be called in + * practice unless you are in the strange scenario where you have many JSON + * documents made of single atoms. + */ + uint8_t *copy = static_cast(malloc(parser.len + SIMDJSON_PADDING)); + if (copy == nullptr) { + return true; + } + memcpy(copy, buf, parser.len); + memset(copy + parser.len, ' ', SIMDJSON_PADDING); + size_t idx = *current_structural; + bool result = parse_number(©[idx], is_negative); // parse_number does not throw + free(copy); + return result; + } WARN_UNUSED really_inline ret_address_t parse_value(const unified_machine_addresses &addresses, ret_address_t continue_state) { switch (advance_char()) { case '"': @@ -13973,6 +13651,7 @@ struct structural_parser : structural_iterator { #undef FAIL_IF #define FAIL_IF(EXPR) { if (EXPR) { goto error; } } + template WARN_UNUSED static error_code parse_structurals(dom_parser_implementation &dom_parser, dom::document &doc) noexcept { dom_parser.doc = &doc; @@ -14018,18 +13697,16 @@ WARN_UNUSED static error_code parse_structurals(dom_parser_implementation &dom_p goto finish; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': - FAIL_IF( - parser.with_space_terminated_copy([&](const uint8_t *copy, size_t idx) { - return parser.parse_number(©[idx], false); - }) - ); + // Next line used to be an interesting functional programming exercise with + // a lambda that gets passed to another function via a closure. This would confuse the + // clangcl compiler under Visual Studio 2019 (recent release). + { if(parser.parse_number_with_space_terminated_copy(false)) { goto error; }} goto finish; case '-': - FAIL_IF( - parser.with_space_terminated_copy([&](const uint8_t *copy, size_t idx) { - return parser.parse_number(©[idx], true); - }) - ); + // Next line used to be an interesting functional programming exercise with + // a lambda that gets passed to another function via a closure. This would confuse the + // clangcl compiler under Visual Studio 2019 (recent release). + { if(parser.parse_number_with_space_terminated_copy(true)) { goto error; }} goto finish; default: parser.log_error("Document starts with a non-value character"); diff --git a/singleheader/simdjson.h b/singleheader/simdjson.h index 7a7e64a4..0fe96e93 100644 --- a/singleheader/simdjson.h +++ b/singleheader/simdjson.h @@ -1,4 +1,4 @@ -/* auto-generated on Wed Jul 1 14:00:57 EDT 2020. Do not edit! */ +/* auto-generated on Mon Jul 6 18:16:52 EDT 2020. Do not edit! */ /* begin file include/simdjson.h */ #ifndef SIMDJSON_H #define SIMDJSON_H diff --git a/src/arm64/dom_parser_implementation.cpp b/src/arm64/dom_parser_implementation.cpp index 2acaec12..e30b4fcc 100644 --- a/src/arm64/dom_parser_implementation.cpp +++ b/src/arm64/dom_parser_implementation.cpp @@ -26,13 +26,24 @@ struct json_character_block { }; really_inline json_character_block json_character_block::classify(const simd::simd8x64 in) { - auto v = in.map([&](simd8 chunk) { - auto nib_lo = chunk & 0xf; - auto nib_hi = chunk.shr<4>(); - auto shuf_lo = nib_lo.lookup_16(16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0); - auto shuf_hi = nib_hi.lookup_16(8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0); - return shuf_lo & shuf_hi; - }); + // Functional programming causes trouble with Visual Studio. + // Keeping this version in comments since it is much nicer: + // auto v = in.map([&](simd8 chunk) { + // auto nib_lo = chunk & 0xf; + // auto nib_hi = chunk.shr<4>(); + // auto shuf_lo = nib_lo.lookup_16(16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0); + // auto shuf_hi = nib_hi.lookup_16(8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0); + // return shuf_lo & shuf_hi; + // }); + const simd8 table1(16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0); + const simd8 table2(8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0); + + auto v = simd8x64( + (in.chunks[0] & 0xf).lookup_16(table1) & (in.chunks[0].shr<4>()).lookup_16(table2), + (in.chunks[1] & 0xf).lookup_16(table1) & (in.chunks[1].shr<4>()).lookup_16(table2), + (in.chunks[2] & 0xf).lookup_16(table1) & (in.chunks[2].shr<4>()).lookup_16(table2), + (in.chunks[3] & 0xf).lookup_16(table1) & (in.chunks[3].shr<4>()).lookup_16(table2) + ); // We compute whitespace and op separately. If the code later only use one or the @@ -51,13 +62,25 @@ really_inline json_character_block json_character_block::classify(const simd::si // there is a small untaken optimization opportunity here. We deliberately // do not pick it up. - uint64_t op = v.map([&](simd8 _v) { return _v.any_bits_set(0x7); }).to_bitmask(); - uint64_t whitespace = v.map([&](simd8 _v) { return _v.any_bits_set(0x18); }).to_bitmask(); + uint64_t op = simd8x64( + v.chunks[0].any_bits_set(0x7), + v.chunks[1].any_bits_set(0x7), + v.chunks[2].any_bits_set(0x7), + v.chunks[3].any_bits_set(0x7) + ).to_bitmask(); + + uint64_t whitespace = simd8x64( + v.chunks[0].any_bits_set(0x18), + v.chunks[1].any_bits_set(0x18), + v.chunks[2].any_bits_set(0x18), + v.chunks[3].any_bits_set(0x18) + ).to_bitmask(); + return { whitespace, op }; } really_inline bool is_ascii(simd8x64 input) { - simd8 bits = input.reduce([&](simd8 a,simd8 b) { return a|b; }); + simd8 bits = (input.chunks[0] | input.chunks[1]) | (input.chunks[2] | input.chunks[3]); return bits.max() < 0b10000000u; } diff --git a/src/arm64/simd.h b/src/arm64/simd.h index ebce9d62..a89098cd 100644 --- a/src/arm64/simd.h +++ b/src/arm64/simd.h @@ -442,43 +442,6 @@ really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x3, int8_ each(3); } - template - really_inline void each(F const& each_chunk) const - { - each_chunk(this->chunks[0]); - each_chunk(this->chunks[1]); - each_chunk(this->chunks[2]); - each_chunk(this->chunks[3]); - } - - template - really_inline simd8x64 map(F const& map_chunk) const { - return simd8x64( - map_chunk(this->chunks[0]), - map_chunk(this->chunks[1]), - map_chunk(this->chunks[2]), - map_chunk(this->chunks[3]) - ); - } - - template - really_inline simd8x64 map(const simd8x64 b, F const& map_chunk) const { - return simd8x64( - map_chunk(this->chunks[0], b.chunks[0]), - map_chunk(this->chunks[1], b.chunks[1]), - map_chunk(this->chunks[2], b.chunks[2]), - map_chunk(this->chunks[3], b.chunks[3]) - ); - } - - template - really_inline simd8 reduce(F const& reduce_pair) const { - return reduce_pair( - reduce_pair(this->chunks[0], this->chunks[1]), - reduce_pair(this->chunks[2], this->chunks[3]) - ); - } - really_inline uint64_t to_bitmask() const { #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO const uint8x16_t bit_mask = make_uint8x16_t( @@ -501,17 +464,32 @@ really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x3, int8_ really_inline simd8x64 bit_or(const T m) const { const simd8 mask = simd8::splat(m); - return this->map( [&](simd8 a) { return a | mask; } ); + return simd8x64( + this->chunks[0] | mask, + this->chunks[1] | mask, + this->chunks[2] | mask, + this->chunks[3] | mask + ); } really_inline uint64_t eq(const T m) const { const simd8 mask = simd8::splat(m); - return this->map( [&](simd8 a) { return a == mask; } ).to_bitmask(); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); } really_inline uint64_t lteq(const T m) const { const simd8 mask = simd8::splat(m); - return this->map( [&](simd8 a) { return a <= mask; } ).to_bitmask(); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); } }; // struct simd8x64 diff --git a/src/generic/stage2/structural_iterator.h b/src/generic/stage2/structural_iterator.h index ae47ec91..2682b6d0 100644 --- a/src/generic/stage2/structural_iterator.h +++ b/src/generic/stage2/structural_iterator.h @@ -31,31 +31,7 @@ public: really_inline size_t remaining_len() { return parser.len - *current_structural; } - template - really_inline bool with_space_terminated_copy(const F& f) { - /** - * We need to make a copy to make sure that the string is space terminated. - * This is not about padding the input, which should already padded up - * to len + SIMDJSON_PADDING. However, we have no control at this stage - * on how the padding was done. What if the input string was padded with nulls? - * It is quite common for an input string to have an extra null character (C string). - * We do not want to allow 9\0 (where \0 is the null character) inside a JSON - * document, but the string "9\0" by itself is fine. So we make a copy and - * pad the input with spaces when we know that there is just one input element. - * This copy is relatively expensive, but it will almost never be called in - * practice unless you are in the strange scenario where you have many JSON - * documents made of single atoms. - */ - char *copy = static_cast(malloc(parser.len + SIMDJSON_PADDING)); - if (copy == nullptr) { - return true; - } - memcpy(copy, buf, parser.len); - memset(copy + parser.len, ' ', SIMDJSON_PADDING); - bool result = f(reinterpret_cast(copy), *current_structural); - free(copy); - return result; - } + really_inline bool past_end(uint32_t n_structural_indexes) { return current_structural >= &parser.structural_indexes[n_structural_indexes]; } diff --git a/src/generic/stage2/structural_parser.h b/src/generic/stage2/structural_parser.h index 53bcc3ac..359026b7 100644 --- a/src/generic/stage2/structural_parser.h +++ b/src/generic/stage2/structural_parser.h @@ -169,6 +169,31 @@ struct structural_parser : structural_iterator { return parse_number(current(), found_minus); } + really_inline bool parse_number_with_space_terminated_copy(const bool is_negative) { + /** + * We need to make a copy to make sure that the string is space terminated. + * This is not about padding the input, which should already padded up + * to len + SIMDJSON_PADDING. However, we have no control at this stage + * on how the padding was done. What if the input string was padded with nulls? + * It is quite common for an input string to have an extra null character (C string). + * We do not want to allow 9\0 (where \0 is the null character) inside a JSON + * document, but the string "9\0" by itself is fine. So we make a copy and + * pad the input with spaces when we know that there is just one input element. + * This copy is relatively expensive, but it will almost never be called in + * practice unless you are in the strange scenario where you have many JSON + * documents made of single atoms. + */ + uint8_t *copy = static_cast(malloc(parser.len + SIMDJSON_PADDING)); + if (copy == nullptr) { + return true; + } + memcpy(copy, buf, parser.len); + memset(copy + parser.len, ' ', SIMDJSON_PADDING); + size_t idx = *current_structural; + bool result = parse_number(©[idx], is_negative); // parse_number does not throw + free(copy); + return result; + } WARN_UNUSED really_inline ret_address_t parse_value(const unified_machine_addresses &addresses, ret_address_t continue_state) { switch (advance_char()) { case '"': @@ -306,6 +331,7 @@ struct structural_parser : structural_iterator { #undef FAIL_IF #define FAIL_IF(EXPR) { if (EXPR) { goto error; } } + template WARN_UNUSED static error_code parse_structurals(dom_parser_implementation &dom_parser, dom::document &doc) noexcept { dom_parser.doc = &doc; @@ -351,18 +377,16 @@ WARN_UNUSED static error_code parse_structurals(dom_parser_implementation &dom_p goto finish; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': - FAIL_IF( - parser.with_space_terminated_copy([&](const uint8_t *copy, size_t idx) { - return parser.parse_number(©[idx], false); - }) - ); + // Next line used to be an interesting functional programming exercise with + // a lambda that gets passed to another function via a closure. This would confuse the + // clangcl compiler under Visual Studio 2019 (recent release). + { if(parser.parse_number_with_space_terminated_copy(false)) { goto error; }} goto finish; case '-': - FAIL_IF( - parser.with_space_terminated_copy([&](const uint8_t *copy, size_t idx) { - return parser.parse_number(©[idx], true); - }) - ); + // Next line used to be an interesting functional programming exercise with + // a lambda that gets passed to another function via a closure. This would confuse the + // clangcl compiler under Visual Studio 2019 (recent release). + { if(parser.parse_number_with_space_terminated_copy(true)) { goto error; }} goto finish; default: parser.log_error("Document starts with a non-value character"); diff --git a/src/haswell/dom_parser_implementation.cpp b/src/haswell/dom_parser_implementation.cpp index b7b096d7..863c8cae 100644 --- a/src/haswell/dom_parser_implementation.cpp +++ b/src/haswell/dom_parser_implementation.cpp @@ -37,19 +37,20 @@ really_inline json_character_block json_character_block::classify(const simd::si // hope that useless computations will be omitted. This is namely case when // minifying (we only need whitespace). - uint64_t whitespace = in.map([&](simd8 _in) { - return _in == simd8(_mm256_shuffle_epi8(whitespace_table, _in)); - }).to_bitmask(); - - uint64_t op = in.map([&](simd8 _in) { - // | 32 handles the fact that { } and [ ] are exactly 32 bytes apart - return (_in | 32) == simd8(_mm256_shuffle_epi8(op_table, _in-',')); - }).to_bitmask(); + uint64_t whitespace = simd8x64( + in.chunks[0] == simd8(_mm256_shuffle_epi8(whitespace_table, in.chunks[0])), + in.chunks[1] == simd8(_mm256_shuffle_epi8(whitespace_table, in.chunks[1])) + ).to_bitmask(); + + uint64_t op = simd8x64( + (in.chunks[0] | 32) == simd8(_mm256_shuffle_epi8(op_table, in.chunks[0]-',')), + (in.chunks[1] | 32) == simd8(_mm256_shuffle_epi8(op_table, in.chunks[1]-',')) + ).to_bitmask(); return { whitespace, op }; } really_inline bool is_ascii(simd8x64 input) { - simd8 bits = input.reduce([&](simd8 a,simd8 b) { return a|b; }); + simd8 bits = (input.chunks[0] | input.chunks[1]); return !bits.any_bits_set_anywhere(0b10000000u); } diff --git a/src/haswell/simd.h b/src/haswell/simd.h index 9033ff57..140e01d1 100644 --- a/src/haswell/simd.h +++ b/src/haswell/simd.h @@ -316,36 +316,6 @@ namespace simd { this->chunks[1].store(ptr+sizeof(simd8)*1); } - template - really_inline void each(F const& each_chunk) const - { - each_chunk(this->chunks[0]); - each_chunk(this->chunks[1]); - } - - template - really_inline simd8x64 map(F const& map_chunk) const { - return simd8x64( - map_chunk(this->chunks[0]), - map_chunk(this->chunks[1]) - ); - } - - - - template - really_inline simd8x64 map(const simd8x64 b, F const& map_chunk) const { - return simd8x64( - map_chunk(this->chunks[0], b.chunks[0]), - map_chunk(this->chunks[1], b.chunks[1]) - ); - } - - template - really_inline simd8 reduce(F const& reduce_pair) const { - return reduce_pair(this->chunks[0], this->chunks[1]); - } - really_inline uint64_t to_bitmask() const { uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask()); uint64_t r_hi = this->chunks[1].to_bitmask(); @@ -354,17 +324,26 @@ namespace simd { really_inline simd8x64 bit_or(const T m) const { const simd8 mask = simd8::splat(m); - return this->map( [&](simd8 a) { return a | mask; } ); + return simd8x64( + this->chunks[0] | mask, + this->chunks[1] | mask + ); } really_inline uint64_t eq(const T m) const { const simd8 mask = simd8::splat(m); - return this->map( [&](simd8 a) { return a == mask; } ).to_bitmask(); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask + ).to_bitmask(); } really_inline uint64_t lteq(const T m) const { const simd8 mask = simd8::splat(m); - return this->map( [&](simd8 a) { return a <= mask; } ).to_bitmask(); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask + ).to_bitmask(); } }; // struct simd8x64 diff --git a/src/westmere/dom_parser_implementation.cpp b/src/westmere/dom_parser_implementation.cpp index 8b173634..5016ebaa 100644 --- a/src/westmere/dom_parser_implementation.cpp +++ b/src/westmere/dom_parser_implementation.cpp @@ -38,19 +38,25 @@ really_inline json_character_block json_character_block::classify(const simd::si // hope that useless computations will be omitted. This is namely case when // minifying (we only need whitespace). - uint64_t whitespace = in.map([&](simd8 _in) { - return _in == simd8(_mm_shuffle_epi8(whitespace_table, _in)); - }).to_bitmask(); + uint64_t whitespace = simd8x64( + in.chunks[0] == simd8(_mm_shuffle_epi8(whitespace_table, in.chunks[0])), + in.chunks[1] == simd8(_mm_shuffle_epi8(whitespace_table, in.chunks[1])), + in.chunks[2] == simd8(_mm_shuffle_epi8(whitespace_table, in.chunks[2])), + in.chunks[3] == simd8(_mm_shuffle_epi8(whitespace_table, in.chunks[3])) + ).to_bitmask(); - uint64_t op = in.map([&](simd8 _in) { - // | 32 handles the fact that { } and [ ] are exactly 32 bytes apart - return (_in | 32) == simd8(_mm_shuffle_epi8(op_table, _in-',')); - }).to_bitmask(); + // | 32 handles the fact that { } and [ ] are exactly 32 bytes apart + uint64_t op = simd8x64( + (in.chunks[0] | 32) == simd8(_mm_shuffle_epi8(op_table, in.chunks[0]-',')), + (in.chunks[1] | 32) == simd8(_mm_shuffle_epi8(op_table, in.chunks[1]-',')), + (in.chunks[2] | 32) == simd8(_mm_shuffle_epi8(op_table, in.chunks[2]-',')), + (in.chunks[3] | 32) == simd8(_mm_shuffle_epi8(op_table, in.chunks[3]-',')) + ).to_bitmask(); return { whitespace, op }; } really_inline bool is_ascii(simd8x64 input) { - simd8 bits = input.reduce([&](simd8 a,simd8 b) { return a|b; }); + simd8 bits = (input.chunks[0] | input.chunks[1]) | (input.chunks[2] | input.chunks[3]); return !bits.any_bits_set_anywhere(0b10000000u); } diff --git a/src/westmere/simd.h b/src/westmere/simd.h index d8648335..705d6b2c 100644 --- a/src/westmere/simd.h +++ b/src/westmere/simd.h @@ -292,43 +292,6 @@ namespace simd { each(3); } - template - really_inline void each(F const& each_chunk) const - { - each_chunk(this->chunks[0]); - each_chunk(this->chunks[1]); - each_chunk(this->chunks[2]); - each_chunk(this->chunks[3]); - } - - template - really_inline simd8x64 map(F const& map_chunk) const { - return simd8x64( - map_chunk(this->chunks[0]), - map_chunk(this->chunks[1]), - map_chunk(this->chunks[2]), - map_chunk(this->chunks[3]) - ); - } - - template - really_inline simd8x64 map(const simd8x64 b, F const& map_chunk) const { - return simd8x64( - map_chunk(this->chunks[0], b.chunks[0]), - map_chunk(this->chunks[1], b.chunks[1]), - map_chunk(this->chunks[2], b.chunks[2]), - map_chunk(this->chunks[3], b.chunks[3]) - ); - } - - template - really_inline simd8 reduce(F const& reduce_pair) const { - return reduce_pair( - reduce_pair(this->chunks[0], this->chunks[1]), - reduce_pair(this->chunks[2], this->chunks[3]) - ); - } - really_inline uint64_t to_bitmask() const { uint64_t r0 = uint32_t(this->chunks[0].to_bitmask()); uint64_t r1 = this->chunks[1].to_bitmask(); @@ -339,17 +302,32 @@ namespace simd { really_inline simd8x64 bit_or(const T m) const { const simd8 mask = simd8::splat(m); - return this->map( [&](simd8 a) { return a | mask; } ); + return simd8x64( + this->chunks[0] | mask, + this->chunks[1] | mask, + this->chunks[2] | mask, + this->chunks[3] | mask + ); } really_inline uint64_t eq(const T m) const { const simd8 mask = simd8::splat(m); - return this->map( [&](simd8 a) { return a == mask; } ).to_bitmask(); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); } really_inline uint64_t lteq(const T m) const { const simd8 mask = simd8::splat(m); - return this->map( [&](simd8 a) { return a <= mask; } ).to_bitmask(); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); } }; // struct simd8x64