From 9e93509a5677882ba02c201b6fc9cd6d38858831 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Mon, 10 Aug 2020 18:10:11 -0400 Subject: [PATCH] Fix number parsing (too lenient). (#1107) * Fix number parsing (too lenient). * Minor tweak. * These are Booleans. * Tweaking test config --- .cirrus.yml | 4 ++-- .github/workflows/mingw-ci.yml | 2 +- .github/workflows/mingw64-ci.yml | 2 +- .github/workflows/ubuntu18.yml | 2 +- .github/workflows/ubuntu20.yml | 2 +- jsonchecker/fail82.json | 1 + src/generic/stage2/numberparsing.h | 3 ++- src/jsoncharutils_tables.h | 4 ++-- 8 files changed, 11 insertions(+), 9 deletions(-) create mode 100644 jsonchecker/fail82.json diff --git a/.cirrus.yml b/.cirrus.yml index abef7f33..7d16de28 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -15,7 +15,7 @@ task: - mkdir build - cd build - cmake .. - - make -j4 + - make test_script: - cd build - - ctest -j4 --output-on-failure -E checkperf \ No newline at end of file + - ctest --output-on-failure -E checkperf \ No newline at end of file diff --git a/.github/workflows/mingw-ci.yml b/.github/workflows/mingw-ci.yml index 05822599..8aff9ea1 100644 --- a/.github/workflows/mingw-ci.yml +++ b/.github/workflows/mingw-ci.yml @@ -50,4 +50,4 @@ jobs: cd build32 cmake -DSIMDJSON_BUILD_STATIC=ON -DSIMDJSON_COMPETITION=OFF -DSIMDJSON_GOOGLE_BENCHMARKS=OFF -DSIMDJSON_ENABLE_THREADS=OFF .. cmake --build . --target parse_many_test jsoncheck basictests numberparsingcheck stringparsingcheck errortests integer_tests pointercheck --verbose - ctest . -R "(parse_many_test|jsoncheck|basictests|stringparsingcheck|numberparsingcheck|errortests|integer_tests|pointercheck)" --output-on-failure + ctest -R "(parse_many_test|jsoncheck|basictests|stringparsingcheck|numberparsingcheck|errortests|integer_tests|pointercheck)" --output-on-failure diff --git a/.github/workflows/mingw64-ci.yml b/.github/workflows/mingw64-ci.yml index 8b9b2785..397236e8 100644 --- a/.github/workflows/mingw64-ci.yml +++ b/.github/workflows/mingw64-ci.yml @@ -50,4 +50,4 @@ jobs: cd build64 cmake -DSIMDJSON_BUILD_STATIC=ON -DSIMDJSON_COMPETITION=OFF -DSIMDJSON_GOOGLE_BENCHMARKS=OFF -DSIMDJSON_ENABLE_THREADS=OFF .. cmake --build . --target parse_many_test jsoncheck basictests numberparsingcheck stringparsingcheck errortests integer_tests pointercheck --verbose - ctest . -R "(parse_many_test|jsoncheck|basictests|stringparsingcheck|numberparsingcheck|errortests|integer_tests|pointercheck)" --output-on-failure + ctest -R "(parse_many_test|jsoncheck|basictests|stringparsingcheck|numberparsingcheck|errortests|integer_tests|pointercheck)" --output-on-failure diff --git a/.github/workflows/ubuntu18.yml b/.github/workflows/ubuntu18.yml index c76b17f4..ed032913 100644 --- a/.github/workflows/ubuntu18.yml +++ b/.github/workflows/ubuntu18.yml @@ -17,6 +17,6 @@ jobs: cd build && cmake -DSIMDJSON_GOOGLE_BENCHMARKS=ON -DSIMDJSON_BUILD_STATIC=ON -DCMAKE_INSTALL_PREFIX:PATH=destination .. && cmake --build . && - ctest . -j --output-on-failure && + ctest -j --output-on-failure && make install && echo -e '#include \nint main(int argc,char**argv) {simdjson::dom::parser parser;simdjson::dom::element tweets = parser.load(argv[1]); }' > tmp.cpp && c++ -Idestination/include -Ldestination/lib -std=c++17 -Wl,-rpath,destination/lib -o linkandrun tmp.cpp -lsimdjson && ./linkandrun jsonexamples/twitter.json diff --git a/.github/workflows/ubuntu20.yml b/.github/workflows/ubuntu20.yml index 12830ddc..271eb935 100644 --- a/.github/workflows/ubuntu20.yml +++ b/.github/workflows/ubuntu20.yml @@ -17,6 +17,6 @@ jobs: cd build && cmake -DSIMDJSON_GOOGLE_BENCHMARKS=ON -DSIMDJSON_BUILD_STATIC=ON -DCMAKE_INSTALL_PREFIX:PATH=destination .. && cmake --build . && - ctest . -j --output-on-failure && + ctest -j --output-on-failure && make install && echo -e '#include \nint main(int argc,char**argv) {simdjson::dom::parser parser;simdjson::dom::element tweets = parser.load(argv[1]); }' > tmp.cpp && c++ -Idestination/include -Ldestination/lib -std=c++17 -Wl,-rpath,destination/lib -o linkandrun tmp.cpp -lsimdjson && ./linkandrun jsonexamples/twitter.json diff --git a/jsonchecker/fail82.json b/jsonchecker/fail82.json new file mode 100644 index 00000000..68e8fced --- /dev/null +++ b/jsonchecker/fail82.json @@ -0,0 +1 @@ +10.2.2 diff --git a/src/generic/stage2/numberparsing.h b/src/generic/stage2/numberparsing.h index 4524aac1..fbb30c12 100644 --- a/src/generic/stage2/numberparsing.h +++ b/src/generic/stage2/numberparsing.h @@ -460,7 +460,8 @@ really_inline bool parse_number(const uint8_t *const src, W &writer) { if (!parse_exponent(src, p, exponent)) { return false; } } if (is_float) { - return write_float(src, negative, i, start_digits, digit_count, exponent, writer); + const bool clean_end = is_structural_or_whitespace(*p); + return write_float(src, negative, i, start_digits, digit_count, exponent, writer) && clean_end; } // The longest negative 64-bit number is 19 digits. diff --git a/src/jsoncharutils_tables.h b/src/jsoncharutils_tables.h index afa1ebe5..46fa0b56 100644 --- a/src/jsoncharutils_tables.h +++ b/src/jsoncharutils_tables.h @@ -15,7 +15,7 @@ namespace simdjson { // we are also interested in the four whitespace characters // space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d -const uint32_t structural_or_whitespace_negated[256] = { +const bool structural_or_whitespace_negated[256] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, @@ -32,7 +32,7 @@ const uint32_t structural_or_whitespace_negated[256] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; -const uint32_t structural_or_whitespace[256] = { +const bool structural_or_whitespace[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,