From 0c8f2b9d85850e5a5f7e048a5550024275ed7f3b Mon Sep 17 00:00:00 2001 From: John Keiser Date: Mon, 3 Feb 2020 09:51:24 -0800 Subject: [PATCH] Make "make amalgamate" more automatic (#480) - automatically include local includes in the right places --- amalgamation.sh | 143 +- singleheader/amalgamation_demo.cpp | 2 +- singleheader/simdjson.cpp | 9410 ++++++++++++++-------------- singleheader/simdjson.h | 26 +- 4 files changed, 4826 insertions(+), 4755 deletions(-) diff --git a/amalgamation.sh b/amalgamation.sh index 5d58a650..f3a35db6 100755 --- a/amalgamation.sh +++ b/amalgamation.sh @@ -11,69 +11,79 @@ echo "See https://www.sqlite.org/amalgamation.html and https://en.wikipedia.org/ AMAL_H="simdjson.h" AMAL_C="simdjson.cpp" +SRCPATH="$SCRIPTPATH/src" +INCLUDEPATH="$SCRIPTPATH/include" + # this list excludes the "src/generic headers" ALLCFILES=" -$SCRIPTPATH/src/arm64/intrinsics.h -$SCRIPTPATH/src/haswell/intrinsics.h -$SCRIPTPATH/src/westmere/intrinsics.h -$SCRIPTPATH/src/simdprune_tables.h -$SCRIPTPATH/src/simdjson.cpp -$SCRIPTPATH/src/jsonioutil.cpp -$SCRIPTPATH/src/jsonminifier.cpp -$SCRIPTPATH/src/jsonparser.cpp -$SCRIPTPATH/src/arm64/bitmanipulation.h -$SCRIPTPATH/src/haswell/bitmanipulation.h -$SCRIPTPATH/src/westmere/bitmanipulation.h -$SCRIPTPATH/src/arm64/numberparsing.h -$SCRIPTPATH/src/haswell/numberparsing.h -$SCRIPTPATH/src/westmere/numberparsing.h -$SCRIPTPATH/src/arm64/bitmask.h -$SCRIPTPATH/src/haswell/bitmask.h -$SCRIPTPATH/src/westmere/bitmask.h -$SCRIPTPATH/src/arm64/simd.h -$SCRIPTPATH/src/haswell/simd.h -$SCRIPTPATH/src/westmere/simd.h -$SCRIPTPATH/src/arm64/stage1_find_marks.h -$SCRIPTPATH/src/haswell/stage1_find_marks.h -$SCRIPTPATH/src/westmere/stage1_find_marks.h -$SCRIPTPATH/src/stage1_find_marks.cpp -$SCRIPTPATH/src/arm64/stringparsing.h -$SCRIPTPATH/src/haswell/stringparsing.h -$SCRIPTPATH/src/westmere/stringparsing.h -$SCRIPTPATH/src/stage2_build_tape.cpp -$SCRIPTPATH/src/arm64/stage2_build_tape.h -$SCRIPTPATH/src/haswell/stage2_build_tape.h -$SCRIPTPATH/src/westmere/stage2_build_tape.h -$SCRIPTPATH/src/parsedjson.cpp -$SCRIPTPATH/src/parsedjsoniterator.cpp +simdjson.cpp +jsonioutil.cpp +jsonminifier.cpp +jsonparser.cpp +stage1_find_marks.cpp +stage2_build_tape.cpp +parsedjson.cpp +parsedjsoniterator.cpp " # order matters ALLCHEADERS=" -$SCRIPTPATH/include/simdjson/simdjson_version.h -$SCRIPTPATH/include/simdjson/portability.h -$SCRIPTPATH/include/simdjson/isadetection.h -$SCRIPTPATH/include/simdjson/jsonformatutils.h -$SCRIPTPATH/include/simdjson/simdjson.h -$SCRIPTPATH/include/simdjson/common_defs.h -$SCRIPTPATH/include/simdjson/padded_string.h -$SCRIPTPATH/include/simdjson/jsonioutil.h -$SCRIPTPATH/include/simdjson/jsonminifier.h -$SCRIPTPATH/include/simdjson/parsedjson.h -$SCRIPTPATH/include/simdjson/parsedjsoniterator.h -$SCRIPTPATH/include/simdjson/stage1_find_marks.h -$SCRIPTPATH/include/simdjson/stage2_build_tape.h -$SCRIPTPATH/include/simdjson/jsonparser.h -$SCRIPTPATH/src/jsoncharutils.h -$SCRIPTPATH/include/simdjson/jsonstream.h +simdjson/simdjson_version.h +simdjson/portability.h +simdjson/isadetection.h +simdjson/jsonformatutils.h +simdjson/simdjson.h +simdjson/common_defs.h +simdjson/padded_string.h +simdjson/jsonioutil.h +simdjson/jsonminifier.h +simdjson/parsedjson.h +simdjson/parsedjsoniterator.h +simdjson/stage1_find_marks.h +simdjson/stage2_build_tape.h +simdjson/jsonparser.h +simdjson/jsonstream.h " -for i in ${ALLCHEADERS} ${ALLCFILES}; do - test -e $i && continue - echo "FATAL: source file [$i] not found." +found_includes=() + +for file in ${ALLCFILES}; do + test -e "$SRCPATH/$file" && continue + echo "FATAL: source file [$SRCPATH/$file] not found." exit 127 done +for file in ${ALLCHEADERS}; do + test -e "$INCLUDEPATH/$file" && continue + echo "FATAL: source file [$INCLUDEPATH/$file] not found." + exit 127 +done + +function doinclude() +{ + file=$1 + line="${@:2}" + if [ -f $INCLUDEPATH/$file ]; then + if [[ ! " ${found_includes[@]} " =~ " ${file} " ]]; then + found_includes+=("$file") + dofile $INCLUDEPATH/$file + fi; + elif [ -f $SRCPATH/$file ]; then + # generic includes are included multiple times + if [[ "${file}" == *'generic/'*'.h' ]]; then + dofile $SRCPATH/$file + elif [[ ! " ${found_includes[@]} " =~ " ${file} " ]]; then + found_includes+=("$file") + dofile $SRCPATH/$file + else + echo "/* $file already included: $line */" + fi + else + # If we don't recognize it, just emit the #include + echo "$line" + fi +} + function dofile() { # Last lines are always ignored. Files should end by an empty lines. @@ -86,23 +96,15 @@ function dofile() file=$(echo $line| cut -d'"' -f 2) if [[ "${file}" == '../'* ]]; then - file=$(echo $file| cut -d'/' -f 2-) + file=$(echo $file| cut -d'/' -f 2-) fi; - # we ignore simdjson headers (except src/generic/*.h); they are handled in the above list - if [ -f include/$file ]; then - continue; - elif [ -f src/$file ]; then - # we paste the contents of src/generic/*.h - if [[ "${file}" == *'generic/'*'.h' ]]; then - echo "$( "${AMAL_H}" { for h in ${ALLCHEADERS}; do - dofile $h + doinclude $h "ERROR $h not found" done } >> "${AMAL_H}" @@ -128,13 +130,12 @@ echo "/* auto-generated on ${timestamp}. Do not edit! */" > "${AMAL_C}" echo "#endif" echo "" - for h in ${ALLCFILES}; do - dofile $h + for file in ${ALLCFILES}; do + dofile "$SRCPATH/$file" done } >> "${AMAL_C}" - DEMOCPP="amalgamation_demo.cpp" echo "Creating ${DEMOCPP}..." echo "/* auto-generated on ${timestamp}. Do not edit! */" > "${DEMOCPP}" diff --git a/singleheader/amalgamation_demo.cpp b/singleheader/amalgamation_demo.cpp index eded2e5f..0715f6b6 100755 --- a/singleheader/amalgamation_demo.cpp +++ b/singleheader/amalgamation_demo.cpp @@ -1,4 +1,4 @@ -/* auto-generated on Thu Jan 30 10:52:58 EST 2020. Do not edit! */ +/* auto-generated on Sun Feb 2 15:10:09 PST 2020. Do not edit! */ #include #include "simdjson.h" diff --git a/singleheader/simdjson.cpp b/singleheader/simdjson.cpp index 21259465..eeefe6f5 100755 --- a/singleheader/simdjson.cpp +++ b/singleheader/simdjson.cpp @@ -1,4 +1,4 @@ -/* auto-generated on Thu Jan 30 10:52:58 EST 2020. Do not edit! */ +/* auto-generated on Sun Feb 2 15:10:09 PST 2020. Do not edit! */ #include "simdjson.h" /* used for http://dmalloc.com/ Dmalloc - Debug Malloc Library */ @@ -6,176 +6,6 @@ #include "dmalloc.h" #endif -/* begin file src/arm64/intrinsics.h */ -#ifndef SIMDJSON_ARM64_INTRINSICS_H -#define SIMDJSON_ARM64_INTRINSICS_H -#ifdef IS_ARM64 - -// This should be the correct header whether -// you use visual studio or other compilers. -#include -#endif // IS_ARM64 -#endif // SIMDJSON_ARM64_INTRINSICS_H -/* end file src/arm64/intrinsics.h */ -/* begin file src/haswell/intrinsics.h */ -#ifndef SIMDJSON_HASWELL_INTRINSICS_H -#define SIMDJSON_HASWELL_INTRINSICS_H - -#ifdef IS_X86_64 - -#ifdef _MSC_VER -#include // visual studio -#else -#include // elsewhere -#endif // _MSC_VER -#endif // IS_X86_64 -#endif // SIMDJSON_HASWELL_INTRINSICS_H -/* end file src/haswell/intrinsics.h */ -/* begin file src/westmere/intrinsics.h */ -#ifndef SIMDJSON_WESTMERE_INTRINSICS_H -#define SIMDJSON_WESTMERE_INTRINSICS_H - -#ifdef IS_X86_64 -#ifdef _MSC_VER -#include // visual studio -#else -#include // elsewhere -#endif // _MSC_VER -#endif // IS_X86_64 -#endif // SIMDJSON_WESTMERE_INTRINSICS_H -/* end file src/westmere/intrinsics.h */ -/* begin file src/simdprune_tables.h */ -#ifndef SIMDJSON_SIMDPRUNE_TABLES_H -#define SIMDJSON_SIMDPRUNE_TABLES_H -#include - -namespace simdjson { // table modified and copied from - // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetTable -static const unsigned char BitsSetTable256mul2[256] = { - 0, 2, 2, 4, 2, 4, 4, 6, 2, 4, 4, 6, 4, 6, 6, 8, 2, 4, 4, - 6, 4, 6, 6, 8, 4, 6, 6, 8, 6, 8, 8, 10, 2, 4, 4, 6, 4, 6, - 6, 8, 4, 6, 6, 8, 6, 8, 8, 10, 4, 6, 6, 8, 6, 8, 8, 10, 6, - 8, 8, 10, 8, 10, 10, 12, 2, 4, 4, 6, 4, 6, 6, 8, 4, 6, 6, 8, - 6, 8, 8, 10, 4, 6, 6, 8, 6, 8, 8, 10, 6, 8, 8, 10, 8, 10, 10, - 12, 4, 6, 6, 8, 6, 8, 8, 10, 6, 8, 8, 10, 8, 10, 10, 12, 6, 8, - 8, 10, 8, 10, 10, 12, 8, 10, 10, 12, 10, 12, 12, 14, 2, 4, 4, 6, 4, - 6, 6, 8, 4, 6, 6, 8, 6, 8, 8, 10, 4, 6, 6, 8, 6, 8, 8, 10, - 6, 8, 8, 10, 8, 10, 10, 12, 4, 6, 6, 8, 6, 8, 8, 10, 6, 8, 8, - 10, 8, 10, 10, 12, 6, 8, 8, 10, 8, 10, 10, 12, 8, 10, 10, 12, 10, 12, - 12, 14, 4, 6, 6, 8, 6, 8, 8, 10, 6, 8, 8, 10, 8, 10, 10, 12, 6, - 8, 8, 10, 8, 10, 10, 12, 8, 10, 10, 12, 10, 12, 12, 14, 6, 8, 8, 10, - 8, 10, 10, 12, 8, 10, 10, 12, 10, 12, 12, 14, 8, 10, 10, 12, 10, 12, 12, - 14, 10, 12, 12, 14, 12, 14, 14, 16}; - -static const uint8_t pshufb_combine_table[272] = { - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, - 0x0c, 0x0d, 0x0e, 0x0f, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x08, - 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x80, 0x00, 0x01, 0x02, 0x03, - 0x04, 0x05, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x80, 0x80, - 0x00, 0x01, 0x02, 0x03, 0x04, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, - 0x0f, 0x80, 0x80, 0x80, 0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b, - 0x0c, 0x0d, 0x0e, 0x0f, 0x80, 0x80, 0x80, 0x80, 0x00, 0x01, 0x02, 0x08, - 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x00, 0x01, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80, 0x00, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, - 0x0f, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x08, 0x09, 0x0a, 0x0b, - 0x0c, 0x0d, 0x0e, 0x0f, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, -}; - -// 256 * 8 bytes = 2kB, easily fits in cache. -static const uint64_t thintable_epi8[256] = { - 0x0706050403020100, 0x0007060504030201, 0x0007060504030200, - 0x0000070605040302, 0x0007060504030100, 0x0000070605040301, - 0x0000070605040300, 0x0000000706050403, 0x0007060504020100, - 0x0000070605040201, 0x0000070605040200, 0x0000000706050402, - 0x0000070605040100, 0x0000000706050401, 0x0000000706050400, - 0x0000000007060504, 0x0007060503020100, 0x0000070605030201, - 0x0000070605030200, 0x0000000706050302, 0x0000070605030100, - 0x0000000706050301, 0x0000000706050300, 0x0000000007060503, - 0x0000070605020100, 0x0000000706050201, 0x0000000706050200, - 0x0000000007060502, 0x0000000706050100, 0x0000000007060501, - 0x0000000007060500, 0x0000000000070605, 0x0007060403020100, - 0x0000070604030201, 0x0000070604030200, 0x0000000706040302, - 0x0000070604030100, 0x0000000706040301, 0x0000000706040300, - 0x0000000007060403, 0x0000070604020100, 0x0000000706040201, - 0x0000000706040200, 0x0000000007060402, 0x0000000706040100, - 0x0000000007060401, 0x0000000007060400, 0x0000000000070604, - 0x0000070603020100, 0x0000000706030201, 0x0000000706030200, - 0x0000000007060302, 0x0000000706030100, 0x0000000007060301, - 0x0000000007060300, 0x0000000000070603, 0x0000000706020100, - 0x0000000007060201, 0x0000000007060200, 0x0000000000070602, - 0x0000000007060100, 0x0000000000070601, 0x0000000000070600, - 0x0000000000000706, 0x0007050403020100, 0x0000070504030201, - 0x0000070504030200, 0x0000000705040302, 0x0000070504030100, - 0x0000000705040301, 0x0000000705040300, 0x0000000007050403, - 0x0000070504020100, 0x0000000705040201, 0x0000000705040200, - 0x0000000007050402, 0x0000000705040100, 0x0000000007050401, - 0x0000000007050400, 0x0000000000070504, 0x0000070503020100, - 0x0000000705030201, 0x0000000705030200, 0x0000000007050302, - 0x0000000705030100, 0x0000000007050301, 0x0000000007050300, - 0x0000000000070503, 0x0000000705020100, 0x0000000007050201, - 0x0000000007050200, 0x0000000000070502, 0x0000000007050100, - 0x0000000000070501, 0x0000000000070500, 0x0000000000000705, - 0x0000070403020100, 0x0000000704030201, 0x0000000704030200, - 0x0000000007040302, 0x0000000704030100, 0x0000000007040301, - 0x0000000007040300, 0x0000000000070403, 0x0000000704020100, - 0x0000000007040201, 0x0000000007040200, 0x0000000000070402, - 0x0000000007040100, 0x0000000000070401, 0x0000000000070400, - 0x0000000000000704, 0x0000000703020100, 0x0000000007030201, - 0x0000000007030200, 0x0000000000070302, 0x0000000007030100, - 0x0000000000070301, 0x0000000000070300, 0x0000000000000703, - 0x0000000007020100, 0x0000000000070201, 0x0000000000070200, - 0x0000000000000702, 0x0000000000070100, 0x0000000000000701, - 0x0000000000000700, 0x0000000000000007, 0x0006050403020100, - 0x0000060504030201, 0x0000060504030200, 0x0000000605040302, - 0x0000060504030100, 0x0000000605040301, 0x0000000605040300, - 0x0000000006050403, 0x0000060504020100, 0x0000000605040201, - 0x0000000605040200, 0x0000000006050402, 0x0000000605040100, - 0x0000000006050401, 0x0000000006050400, 0x0000000000060504, - 0x0000060503020100, 0x0000000605030201, 0x0000000605030200, - 0x0000000006050302, 0x0000000605030100, 0x0000000006050301, - 0x0000000006050300, 0x0000000000060503, 0x0000000605020100, - 0x0000000006050201, 0x0000000006050200, 0x0000000000060502, - 0x0000000006050100, 0x0000000000060501, 0x0000000000060500, - 0x0000000000000605, 0x0000060403020100, 0x0000000604030201, - 0x0000000604030200, 0x0000000006040302, 0x0000000604030100, - 0x0000000006040301, 0x0000000006040300, 0x0000000000060403, - 0x0000000604020100, 0x0000000006040201, 0x0000000006040200, - 0x0000000000060402, 0x0000000006040100, 0x0000000000060401, - 0x0000000000060400, 0x0000000000000604, 0x0000000603020100, - 0x0000000006030201, 0x0000000006030200, 0x0000000000060302, - 0x0000000006030100, 0x0000000000060301, 0x0000000000060300, - 0x0000000000000603, 0x0000000006020100, 0x0000000000060201, - 0x0000000000060200, 0x0000000000000602, 0x0000000000060100, - 0x0000000000000601, 0x0000000000000600, 0x0000000000000006, - 0x0000050403020100, 0x0000000504030201, 0x0000000504030200, - 0x0000000005040302, 0x0000000504030100, 0x0000000005040301, - 0x0000000005040300, 0x0000000000050403, 0x0000000504020100, - 0x0000000005040201, 0x0000000005040200, 0x0000000000050402, - 0x0000000005040100, 0x0000000000050401, 0x0000000000050400, - 0x0000000000000504, 0x0000000503020100, 0x0000000005030201, - 0x0000000005030200, 0x0000000000050302, 0x0000000005030100, - 0x0000000000050301, 0x0000000000050300, 0x0000000000000503, - 0x0000000005020100, 0x0000000000050201, 0x0000000000050200, - 0x0000000000000502, 0x0000000000050100, 0x0000000000000501, - 0x0000000000000500, 0x0000000000000005, 0x0000000403020100, - 0x0000000004030201, 0x0000000004030200, 0x0000000000040302, - 0x0000000004030100, 0x0000000000040301, 0x0000000000040300, - 0x0000000000000403, 0x0000000004020100, 0x0000000000040201, - 0x0000000000040200, 0x0000000000000402, 0x0000000000040100, - 0x0000000000000401, 0x0000000000000400, 0x0000000000000004, - 0x0000000003020100, 0x0000000000030201, 0x0000000000030200, - 0x0000000000000302, 0x0000000000030100, 0x0000000000000301, - 0x0000000000000300, 0x0000000000000003, 0x0000000000020100, - 0x0000000000000201, 0x0000000000000200, 0x0000000000000002, - 0x0000000000000100, 0x0000000000000001, 0x0000000000000000, - 0x0000000000000000, -}; //static uint64_t thintable_epi8[256] - -} // namespace simdjson - -#endif // SIMDJSON_SIMDPRUNE_TABLES_H -/* end file src/simdprune_tables.h */ /* begin file src/simdjson.cpp */ #include @@ -320,6 +150,138 @@ size_t json_minify(const unsigned char *bytes, size_t how_many, // This fast code is disabled. // See issue https://github.com/lemire/simdjson/issues/384 // +/* begin file src/simdprune_tables.h */ +#ifndef SIMDJSON_SIMDPRUNE_TABLES_H +#define SIMDJSON_SIMDPRUNE_TABLES_H +#include + +namespace simdjson { // table modified and copied from + // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetTable +static const unsigned char BitsSetTable256mul2[256] = { + 0, 2, 2, 4, 2, 4, 4, 6, 2, 4, 4, 6, 4, 6, 6, 8, 2, 4, 4, + 6, 4, 6, 6, 8, 4, 6, 6, 8, 6, 8, 8, 10, 2, 4, 4, 6, 4, 6, + 6, 8, 4, 6, 6, 8, 6, 8, 8, 10, 4, 6, 6, 8, 6, 8, 8, 10, 6, + 8, 8, 10, 8, 10, 10, 12, 2, 4, 4, 6, 4, 6, 6, 8, 4, 6, 6, 8, + 6, 8, 8, 10, 4, 6, 6, 8, 6, 8, 8, 10, 6, 8, 8, 10, 8, 10, 10, + 12, 4, 6, 6, 8, 6, 8, 8, 10, 6, 8, 8, 10, 8, 10, 10, 12, 6, 8, + 8, 10, 8, 10, 10, 12, 8, 10, 10, 12, 10, 12, 12, 14, 2, 4, 4, 6, 4, + 6, 6, 8, 4, 6, 6, 8, 6, 8, 8, 10, 4, 6, 6, 8, 6, 8, 8, 10, + 6, 8, 8, 10, 8, 10, 10, 12, 4, 6, 6, 8, 6, 8, 8, 10, 6, 8, 8, + 10, 8, 10, 10, 12, 6, 8, 8, 10, 8, 10, 10, 12, 8, 10, 10, 12, 10, 12, + 12, 14, 4, 6, 6, 8, 6, 8, 8, 10, 6, 8, 8, 10, 8, 10, 10, 12, 6, + 8, 8, 10, 8, 10, 10, 12, 8, 10, 10, 12, 10, 12, 12, 14, 6, 8, 8, 10, + 8, 10, 10, 12, 8, 10, 10, 12, 10, 12, 12, 14, 8, 10, 10, 12, 10, 12, 12, + 14, 10, 12, 12, 14, 12, 14, 14, 16}; + +static const uint8_t pshufb_combine_table[272] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, + 0x0c, 0x0d, 0x0e, 0x0f, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x08, + 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x80, 0x00, 0x01, 0x02, 0x03, + 0x04, 0x05, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x80, 0x80, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, + 0x0f, 0x80, 0x80, 0x80, 0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b, + 0x0c, 0x0d, 0x0e, 0x0f, 0x80, 0x80, 0x80, 0x80, 0x00, 0x01, 0x02, 0x08, + 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x00, 0x01, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x00, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, + 0x0f, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x08, 0x09, 0x0a, 0x0b, + 0x0c, 0x0d, 0x0e, 0x0f, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, +}; + +// 256 * 8 bytes = 2kB, easily fits in cache. +static const uint64_t thintable_epi8[256] = { + 0x0706050403020100, 0x0007060504030201, 0x0007060504030200, + 0x0000070605040302, 0x0007060504030100, 0x0000070605040301, + 0x0000070605040300, 0x0000000706050403, 0x0007060504020100, + 0x0000070605040201, 0x0000070605040200, 0x0000000706050402, + 0x0000070605040100, 0x0000000706050401, 0x0000000706050400, + 0x0000000007060504, 0x0007060503020100, 0x0000070605030201, + 0x0000070605030200, 0x0000000706050302, 0x0000070605030100, + 0x0000000706050301, 0x0000000706050300, 0x0000000007060503, + 0x0000070605020100, 0x0000000706050201, 0x0000000706050200, + 0x0000000007060502, 0x0000000706050100, 0x0000000007060501, + 0x0000000007060500, 0x0000000000070605, 0x0007060403020100, + 0x0000070604030201, 0x0000070604030200, 0x0000000706040302, + 0x0000070604030100, 0x0000000706040301, 0x0000000706040300, + 0x0000000007060403, 0x0000070604020100, 0x0000000706040201, + 0x0000000706040200, 0x0000000007060402, 0x0000000706040100, + 0x0000000007060401, 0x0000000007060400, 0x0000000000070604, + 0x0000070603020100, 0x0000000706030201, 0x0000000706030200, + 0x0000000007060302, 0x0000000706030100, 0x0000000007060301, + 0x0000000007060300, 0x0000000000070603, 0x0000000706020100, + 0x0000000007060201, 0x0000000007060200, 0x0000000000070602, + 0x0000000007060100, 0x0000000000070601, 0x0000000000070600, + 0x0000000000000706, 0x0007050403020100, 0x0000070504030201, + 0x0000070504030200, 0x0000000705040302, 0x0000070504030100, + 0x0000000705040301, 0x0000000705040300, 0x0000000007050403, + 0x0000070504020100, 0x0000000705040201, 0x0000000705040200, + 0x0000000007050402, 0x0000000705040100, 0x0000000007050401, + 0x0000000007050400, 0x0000000000070504, 0x0000070503020100, + 0x0000000705030201, 0x0000000705030200, 0x0000000007050302, + 0x0000000705030100, 0x0000000007050301, 0x0000000007050300, + 0x0000000000070503, 0x0000000705020100, 0x0000000007050201, + 0x0000000007050200, 0x0000000000070502, 0x0000000007050100, + 0x0000000000070501, 0x0000000000070500, 0x0000000000000705, + 0x0000070403020100, 0x0000000704030201, 0x0000000704030200, + 0x0000000007040302, 0x0000000704030100, 0x0000000007040301, + 0x0000000007040300, 0x0000000000070403, 0x0000000704020100, + 0x0000000007040201, 0x0000000007040200, 0x0000000000070402, + 0x0000000007040100, 0x0000000000070401, 0x0000000000070400, + 0x0000000000000704, 0x0000000703020100, 0x0000000007030201, + 0x0000000007030200, 0x0000000000070302, 0x0000000007030100, + 0x0000000000070301, 0x0000000000070300, 0x0000000000000703, + 0x0000000007020100, 0x0000000000070201, 0x0000000000070200, + 0x0000000000000702, 0x0000000000070100, 0x0000000000000701, + 0x0000000000000700, 0x0000000000000007, 0x0006050403020100, + 0x0000060504030201, 0x0000060504030200, 0x0000000605040302, + 0x0000060504030100, 0x0000000605040301, 0x0000000605040300, + 0x0000000006050403, 0x0000060504020100, 0x0000000605040201, + 0x0000000605040200, 0x0000000006050402, 0x0000000605040100, + 0x0000000006050401, 0x0000000006050400, 0x0000000000060504, + 0x0000060503020100, 0x0000000605030201, 0x0000000605030200, + 0x0000000006050302, 0x0000000605030100, 0x0000000006050301, + 0x0000000006050300, 0x0000000000060503, 0x0000000605020100, + 0x0000000006050201, 0x0000000006050200, 0x0000000000060502, + 0x0000000006050100, 0x0000000000060501, 0x0000000000060500, + 0x0000000000000605, 0x0000060403020100, 0x0000000604030201, + 0x0000000604030200, 0x0000000006040302, 0x0000000604030100, + 0x0000000006040301, 0x0000000006040300, 0x0000000000060403, + 0x0000000604020100, 0x0000000006040201, 0x0000000006040200, + 0x0000000000060402, 0x0000000006040100, 0x0000000000060401, + 0x0000000000060400, 0x0000000000000604, 0x0000000603020100, + 0x0000000006030201, 0x0000000006030200, 0x0000000000060302, + 0x0000000006030100, 0x0000000000060301, 0x0000000000060300, + 0x0000000000000603, 0x0000000006020100, 0x0000000000060201, + 0x0000000000060200, 0x0000000000000602, 0x0000000000060100, + 0x0000000000000601, 0x0000000000000600, 0x0000000000000006, + 0x0000050403020100, 0x0000000504030201, 0x0000000504030200, + 0x0000000005040302, 0x0000000504030100, 0x0000000005040301, + 0x0000000005040300, 0x0000000000050403, 0x0000000504020100, + 0x0000000005040201, 0x0000000005040200, 0x0000000000050402, + 0x0000000005040100, 0x0000000000050401, 0x0000000000050400, + 0x0000000000000504, 0x0000000503020100, 0x0000000005030201, + 0x0000000005030200, 0x0000000000050302, 0x0000000005030100, + 0x0000000000050301, 0x0000000000050300, 0x0000000000000503, + 0x0000000005020100, 0x0000000000050201, 0x0000000000050200, + 0x0000000000000502, 0x0000000000050100, 0x0000000000000501, + 0x0000000000000500, 0x0000000000000005, 0x0000000403020100, + 0x0000000004030201, 0x0000000004030200, 0x0000000000040302, + 0x0000000004030100, 0x0000000000040301, 0x0000000000040300, + 0x0000000000000403, 0x0000000004020100, 0x0000000000040201, + 0x0000000000040200, 0x0000000000000402, 0x0000000000040100, + 0x0000000000000401, 0x0000000000000400, 0x0000000000000004, + 0x0000000003020100, 0x0000000000030201, 0x0000000000030200, + 0x0000000000000302, 0x0000000000030100, 0x0000000000000301, + 0x0000000000000300, 0x0000000000000003, 0x0000000000020100, + 0x0000000000000201, 0x0000000000000200, 0x0000000000000002, + 0x0000000000000100, 0x0000000000000001, 0x0000000000000000, + 0x0000000000000000, +}; //static uint64_t thintable_epi8[256] + +} // namespace simdjson + +#endif // SIMDJSON_SIMDPRUNE_TABLES_H +/* end file src/simdprune_tables.h */ #include #include // currently, there is no runtime dispatch for the minifier @@ -729,7 +691,7 @@ size_t oldjson_minify(const uint8_t *buf, size_t len, uint8_t *out) { } // namespace simdjson #endif -/* end file src/jsonminifier.cpp */ +/* end file src/simdprune_tables.h */ /* begin file src/jsonparser.cpp */ #include @@ -825,2068 +787,14 @@ ParsedJson build_parsed_json(const uint8_t *buf, size_t len, } } // namespace simdjson /* end file src/jsonparser.cpp */ -/* begin file src/arm64/bitmanipulation.h */ -#ifndef SIMDJSON_ARM64_BITMANIPULATION_H -#define SIMDJSON_ARM64_BITMANIPULATION_H +/* begin file src/stage1_find_marks.cpp */ +/* begin file src/arm64/stage1_find_marks.h */ +#ifndef SIMDJSON_ARM64_STAGE1_FIND_MARKS_H +#define SIMDJSON_ARM64_STAGE1_FIND_MARKS_H #ifdef IS_ARM64 - -namespace simdjson::arm64 { - -#ifndef _MSC_VER -// We sometimes call trailing_zero on inputs that are zero, -// but the algorithms do not end up using the returned value. -// Sadly, sanitizers are not smart enough to figure it out. -__attribute__((no_sanitize("undefined"))) // this is deliberate -#endif -/* result might be undefined when input_num is zero */ -really_inline int trailing_zeroes(uint64_t input_num) { -#ifdef _MSC_VER - unsigned long ret; - // Search the mask data from least significant bit (LSB) - // to the most significant bit (MSB) for a set bit (1). - _BitScanForward64(&ret, input_num); - return (int)ret; -#else - return __builtin_ctzll(input_num); -#endif// _MSC_VER -} - -/* result might be undefined when input_num is zero */ -really_inline uint64_t clear_lowest_bit(uint64_t input_num) { - return input_num & (input_num-1); -} - -/* result might be undefined when input_num is zero */ -really_inline int leading_zeroes(uint64_t input_num) { -#ifdef _MSC_VER - unsigned long leading_zero = 0; - // Search the mask data from most significant bit (MSB) - // to least significant bit (LSB) for a set bit (1). - if (_BitScanReverse64(&leading_zero, input_num)) - return (int)(63 - leading_zero); - else - return 64; -#else - return __builtin_clzll(input_num); -#endif// _MSC_VER -} - -/* result might be undefined when input_num is zero */ -really_inline int hamming(uint64_t input_num) { - return vaddv_u8(vcnt_u8((uint8x8_t)input_num)); -} - -really_inline bool add_overflow(uint64_t value1, uint64_t value2, - uint64_t *result) { -#ifdef _MSC_VER - // todo: this might fail under visual studio for ARM - return _addcarry_u64(0, value1, value2, - reinterpret_cast(result)); -#else - return __builtin_uaddll_overflow(value1, value2, - (unsigned long long *)result); -#endif -} - -#ifdef _MSC_VER -#pragma intrinsic(_umul128) // todo: this might fail under visual studio for ARM -#endif - -really_inline bool mul_overflow(uint64_t value1, uint64_t value2, - uint64_t *result) { -#ifdef _MSC_VER - // todo: this might fail under visual studio for ARM - uint64_t high; - *result = _umul128(value1, value2, &high); - return high; -#else - return __builtin_umulll_overflow(value1, value2, - (unsigned long long *)result); -#endif -} - -}// namespace simdjson::arm64 - -#endif //IS_ARM64 -#endif // SIMDJSON_ARM64_BITMANIPULATION_H -/* end file src/arm64/bitmanipulation.h */ -/* begin file src/haswell/bitmanipulation.h */ -#ifndef SIMDJSON_HASWELL_BITMANIPULATION_H -#define SIMDJSON_HASWELL_BITMANIPULATION_H - - -#ifdef IS_X86_64 - -TARGET_HASWELL -namespace simdjson::haswell { - -#ifndef _MSC_VER -// We sometimes call trailing_zero on inputs that are zero, -// but the algorithms do not end up using the returned value. -// Sadly, sanitizers are not smart enough to figure it out. -__attribute__((no_sanitize("undefined"))) // this is deliberate -#endif -really_inline int trailing_zeroes(uint64_t input_num) { -#ifdef _MSC_VER - return (int)_tzcnt_u64(input_num); -#else - //////// - // You might expect the next line to be equivalent to - // return (int)_tzcnt_u64(input_num); - // but the generated code differs and might be less efficient? - //////// - return __builtin_ctzll(input_num); -#endif// _MSC_VER -} - -/* result might be undefined when input_num is zero */ -really_inline uint64_t clear_lowest_bit(uint64_t input_num) { - return _blsr_u64(input_num); -} - -/* result might be undefined when input_num is zero */ -really_inline int leading_zeroes(uint64_t input_num) { - return static_cast(_lzcnt_u64(input_num)); -} - -really_inline int hamming(uint64_t input_num) { -#ifdef _MSC_VER - // note: we do not support legacy 32-bit Windows - return __popcnt64(input_num);// Visual Studio wants two underscores -#else - return _popcnt64(input_num); -#endif -} - -really_inline bool add_overflow(uint64_t value1, uint64_t value2, - uint64_t *result) { -#ifdef _MSC_VER - return _addcarry_u64(0, value1, value2, - reinterpret_cast(result)); -#else - return __builtin_uaddll_overflow(value1, value2, - (unsigned long long *)result); -#endif -} - -#ifdef _MSC_VER -#pragma intrinsic(_umul128) -#endif -really_inline bool mul_overflow(uint64_t value1, uint64_t value2, - uint64_t *result) { -#ifdef _MSC_VER - uint64_t high; - *result = _umul128(value1, value2, &high); - return high; -#else - return __builtin_umulll_overflow(value1, value2, - (unsigned long long *)result); -#endif -} -}// namespace simdjson::haswell -UNTARGET_REGION -#endif -#endif // SIMDJSON_HASWELL_BITMANIPULATION_H -/* end file src/haswell/bitmanipulation.h */ -/* begin file src/westmere/bitmanipulation.h */ -#ifndef SIMDJSON_WESTMERE_BITMANIPULATION_H -#define SIMDJSON_WESTMERE_BITMANIPULATION_H - -#ifdef IS_X86_64 - -TARGET_WESTMERE -namespace simdjson::westmere { - -#ifndef _MSC_VER -// We sometimes call trailing_zero on inputs that are zero, -// but the algorithms do not end up using the returned value. -// Sadly, sanitizers are not smart enough to figure it out. -__attribute__((no_sanitize("undefined"))) // this is deliberate -#endif -/* result might be undefined when input_num is zero */ -really_inline int trailing_zeroes(uint64_t input_num) { -#ifdef _MSC_VER - unsigned long ret; - // Search the mask data from least significant bit (LSB) - // to the most significant bit (MSB) for a set bit (1). - _BitScanForward64(&ret, input_num); - return (int)ret; -#else - return __builtin_ctzll(input_num); -#endif// _MSC_VER -} - -/* result might be undefined when input_num is zero */ -really_inline uint64_t clear_lowest_bit(uint64_t input_num) { - return input_num & (input_num-1); -} - -/* result might be undefined when input_num is zero */ -really_inline int leading_zeroes(uint64_t input_num) { -#ifdef _MSC_VER - unsigned long leading_zero = 0; - // Search the mask data from most significant bit (MSB) - // to least significant bit (LSB) for a set bit (1). - if (_BitScanReverse64(&leading_zero, input_num)) - return (int)(63 - leading_zero); - else - return 64; -#else - return __builtin_clzll(input_num); -#endif// _MSC_VER -} - -really_inline int hamming(uint64_t input_num) { -#ifdef _MSC_VER - // note: we do not support legacy 32-bit Windows - return __popcnt64(input_num);// Visual Studio wants two underscores -#else - return _popcnt64(input_num); -#endif -} - -really_inline bool add_overflow(uint64_t value1, uint64_t value2, - uint64_t *result) { -#ifdef _MSC_VER - return _addcarry_u64(0, value1, value2, - reinterpret_cast(result)); -#else - return __builtin_uaddll_overflow(value1, value2, - (unsigned long long *)result); -#endif -} - -#ifdef _MSC_VER -#pragma intrinsic(_umul128) -#endif -really_inline bool mul_overflow(uint64_t value1, uint64_t value2, - uint64_t *result) { -#ifdef _MSC_VER - uint64_t high; - *result = _umul128(value1, value2, &high); - return high; -#else - return __builtin_umulll_overflow(value1, value2, - (unsigned long long *)result); -#endif -} - -}// namespace simdjson::westmere -UNTARGET_REGION - -#endif -#endif // SIMDJSON_WESTMERE_BITMANIPULATION_H -/* end file src/westmere/bitmanipulation.h */ -/* begin file src/arm64/numberparsing.h */ -#ifndef SIMDJSON_ARM64_NUMBERPARSING_H -#define SIMDJSON_ARM64_NUMBERPARSING_H - -#ifdef IS_ARM64 - -#include -#include - - -#ifdef JSON_TEST_NUMBERS // for unit testing -void found_invalid_number(const uint8_t *buf); -void found_integer(int64_t result, const uint8_t *buf); -void found_unsigned_integer(uint64_t result, const uint8_t *buf); -void found_float(double result, const uint8_t *buf); -#endif - -namespace simdjson::arm64 { - -// we don't have SSE, so let us use a scalar function -// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ -static inline uint32_t parse_eight_digits_unrolled(const char *chars) { - uint64_t val; - memcpy(&val, chars, sizeof(uint64_t)); - val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; - val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; - return (val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32; -} - -#define SWAR_NUMBER_PARSING - - -// Allowable floating-point values range -// std::numeric_limits::lowest() to std::numeric_limits::max(), -// so from -1.7976e308 all the way to 1.7975e308 in binary64. The lowest -// non-zero normal values is std::numeric_limits::min() or -// about 2.225074e-308. -static const double power_of_ten[] = { - 1e-308, 1e-307, 1e-306, 1e-305, 1e-304, 1e-303, 1e-302, 1e-301, 1e-300, - 1e-299, 1e-298, 1e-297, 1e-296, 1e-295, 1e-294, 1e-293, 1e-292, 1e-291, - 1e-290, 1e-289, 1e-288, 1e-287, 1e-286, 1e-285, 1e-284, 1e-283, 1e-282, - 1e-281, 1e-280, 1e-279, 1e-278, 1e-277, 1e-276, 1e-275, 1e-274, 1e-273, - 1e-272, 1e-271, 1e-270, 1e-269, 1e-268, 1e-267, 1e-266, 1e-265, 1e-264, - 1e-263, 1e-262, 1e-261, 1e-260, 1e-259, 1e-258, 1e-257, 1e-256, 1e-255, - 1e-254, 1e-253, 1e-252, 1e-251, 1e-250, 1e-249, 1e-248, 1e-247, 1e-246, - 1e-245, 1e-244, 1e-243, 1e-242, 1e-241, 1e-240, 1e-239, 1e-238, 1e-237, - 1e-236, 1e-235, 1e-234, 1e-233, 1e-232, 1e-231, 1e-230, 1e-229, 1e-228, - 1e-227, 1e-226, 1e-225, 1e-224, 1e-223, 1e-222, 1e-221, 1e-220, 1e-219, - 1e-218, 1e-217, 1e-216, 1e-215, 1e-214, 1e-213, 1e-212, 1e-211, 1e-210, - 1e-209, 1e-208, 1e-207, 1e-206, 1e-205, 1e-204, 1e-203, 1e-202, 1e-201, - 1e-200, 1e-199, 1e-198, 1e-197, 1e-196, 1e-195, 1e-194, 1e-193, 1e-192, - 1e-191, 1e-190, 1e-189, 1e-188, 1e-187, 1e-186, 1e-185, 1e-184, 1e-183, - 1e-182, 1e-181, 1e-180, 1e-179, 1e-178, 1e-177, 1e-176, 1e-175, 1e-174, - 1e-173, 1e-172, 1e-171, 1e-170, 1e-169, 1e-168, 1e-167, 1e-166, 1e-165, - 1e-164, 1e-163, 1e-162, 1e-161, 1e-160, 1e-159, 1e-158, 1e-157, 1e-156, - 1e-155, 1e-154, 1e-153, 1e-152, 1e-151, 1e-150, 1e-149, 1e-148, 1e-147, - 1e-146, 1e-145, 1e-144, 1e-143, 1e-142, 1e-141, 1e-140, 1e-139, 1e-138, - 1e-137, 1e-136, 1e-135, 1e-134, 1e-133, 1e-132, 1e-131, 1e-130, 1e-129, - 1e-128, 1e-127, 1e-126, 1e-125, 1e-124, 1e-123, 1e-122, 1e-121, 1e-120, - 1e-119, 1e-118, 1e-117, 1e-116, 1e-115, 1e-114, 1e-113, 1e-112, 1e-111, - 1e-110, 1e-109, 1e-108, 1e-107, 1e-106, 1e-105, 1e-104, 1e-103, 1e-102, - 1e-101, 1e-100, 1e-99, 1e-98, 1e-97, 1e-96, 1e-95, 1e-94, 1e-93, - 1e-92, 1e-91, 1e-90, 1e-89, 1e-88, 1e-87, 1e-86, 1e-85, 1e-84, - 1e-83, 1e-82, 1e-81, 1e-80, 1e-79, 1e-78, 1e-77, 1e-76, 1e-75, - 1e-74, 1e-73, 1e-72, 1e-71, 1e-70, 1e-69, 1e-68, 1e-67, 1e-66, - 1e-65, 1e-64, 1e-63, 1e-62, 1e-61, 1e-60, 1e-59, 1e-58, 1e-57, - 1e-56, 1e-55, 1e-54, 1e-53, 1e-52, 1e-51, 1e-50, 1e-49, 1e-48, - 1e-47, 1e-46, 1e-45, 1e-44, 1e-43, 1e-42, 1e-41, 1e-40, 1e-39, - 1e-38, 1e-37, 1e-36, 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, - 1e-29, 1e-28, 1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, - 1e-20, 1e-19, 1e-18, 1e-17, 1e-16, 1e-15, 1e-14, 1e-13, 1e-12, - 1e-11, 1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, - 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, - 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, - 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22, 1e23, 1e24, - 1e25, 1e26, 1e27, 1e28, 1e29, 1e30, 1e31, 1e32, 1e33, - 1e34, 1e35, 1e36, 1e37, 1e38, 1e39, 1e40, 1e41, 1e42, - 1e43, 1e44, 1e45, 1e46, 1e47, 1e48, 1e49, 1e50, 1e51, - 1e52, 1e53, 1e54, 1e55, 1e56, 1e57, 1e58, 1e59, 1e60, - 1e61, 1e62, 1e63, 1e64, 1e65, 1e66, 1e67, 1e68, 1e69, - 1e70, 1e71, 1e72, 1e73, 1e74, 1e75, 1e76, 1e77, 1e78, - 1e79, 1e80, 1e81, 1e82, 1e83, 1e84, 1e85, 1e86, 1e87, - 1e88, 1e89, 1e90, 1e91, 1e92, 1e93, 1e94, 1e95, 1e96, - 1e97, 1e98, 1e99, 1e100, 1e101, 1e102, 1e103, 1e104, 1e105, - 1e106, 1e107, 1e108, 1e109, 1e110, 1e111, 1e112, 1e113, 1e114, - 1e115, 1e116, 1e117, 1e118, 1e119, 1e120, 1e121, 1e122, 1e123, - 1e124, 1e125, 1e126, 1e127, 1e128, 1e129, 1e130, 1e131, 1e132, - 1e133, 1e134, 1e135, 1e136, 1e137, 1e138, 1e139, 1e140, 1e141, - 1e142, 1e143, 1e144, 1e145, 1e146, 1e147, 1e148, 1e149, 1e150, - 1e151, 1e152, 1e153, 1e154, 1e155, 1e156, 1e157, 1e158, 1e159, - 1e160, 1e161, 1e162, 1e163, 1e164, 1e165, 1e166, 1e167, 1e168, - 1e169, 1e170, 1e171, 1e172, 1e173, 1e174, 1e175, 1e176, 1e177, - 1e178, 1e179, 1e180, 1e181, 1e182, 1e183, 1e184, 1e185, 1e186, - 1e187, 1e188, 1e189, 1e190, 1e191, 1e192, 1e193, 1e194, 1e195, - 1e196, 1e197, 1e198, 1e199, 1e200, 1e201, 1e202, 1e203, 1e204, - 1e205, 1e206, 1e207, 1e208, 1e209, 1e210, 1e211, 1e212, 1e213, - 1e214, 1e215, 1e216, 1e217, 1e218, 1e219, 1e220, 1e221, 1e222, - 1e223, 1e224, 1e225, 1e226, 1e227, 1e228, 1e229, 1e230, 1e231, - 1e232, 1e233, 1e234, 1e235, 1e236, 1e237, 1e238, 1e239, 1e240, - 1e241, 1e242, 1e243, 1e244, 1e245, 1e246, 1e247, 1e248, 1e249, - 1e250, 1e251, 1e252, 1e253, 1e254, 1e255, 1e256, 1e257, 1e258, - 1e259, 1e260, 1e261, 1e262, 1e263, 1e264, 1e265, 1e266, 1e267, - 1e268, 1e269, 1e270, 1e271, 1e272, 1e273, 1e274, 1e275, 1e276, - 1e277, 1e278, 1e279, 1e280, 1e281, 1e282, 1e283, 1e284, 1e285, - 1e286, 1e287, 1e288, 1e289, 1e290, 1e291, 1e292, 1e293, 1e294, - 1e295, 1e296, 1e297, 1e298, 1e299, 1e300, 1e301, 1e302, 1e303, - 1e304, 1e305, 1e306, 1e307, 1e308}; - -static inline bool is_integer(char c) { - return (c >= '0' && c <= '9'); - // this gets compiled to (uint8_t)(c - '0') <= 9 on all decent compilers -} - -// We need to check that the character following a zero is valid. This is -// probably frequent and it is hard than it looks. We are building all of this -// just to differentiate between 0x1 (invalid), 0,1 (valid) 0e1 (valid)... -const bool structural_or_whitespace_or_exponent_or_decimal_negated[256] = { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, - 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; - -really_inline bool -is_not_structural_or_whitespace_or_exponent_or_decimal(unsigned char c) { - return structural_or_whitespace_or_exponent_or_decimal_negated[c]; -} - -// check quickly whether the next 8 chars are made of digits -// at a glance, it looks better than Mula's -// http://0x80.pl/articles/swar-digits-validate.html -static inline bool is_made_of_eight_digits_fast(const char *chars) { - uint64_t val; - // this can read up to 7 bytes beyond the buffer size, but we require - // SIMDJSON_PADDING of padding - static_assert(7 <= SIMDJSON_PADDING); - memcpy(&val, chars, 8); - // a branchy method might be faster: - // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) - // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == - // 0x3030303030303030); - return (((val & 0xF0F0F0F0F0F0F0F0) | - (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == - 0x3333333333333333); -} - - -// -// This function computes base * 10 ^ (- negative_exponent ). -// It is only even going to be used when negative_exponent is tiny. -static double subnormal_power10(double base, int64_t negative_exponent) { - // avoid integer overflows in the pow expression, those values would - // become zero anyway. - if(negative_exponent < -1000) { - return 0; - } - - // this is probably not going to be fast - return base * 1e-308 * pow(10, negative_exponent + 308); -} - -// called by parse_number when we know that the output is a float, -// but where there might be some integer overflow. The trick here is to -// parse using floats from the start. -// Do not call this function directly as it skips some of the checks from -// parse_number -// -// This function will almost never be called!!! -// -// Note: a redesign could avoid this function entirely. -// -static never_inline bool parse_float(const uint8_t *const buf, ParsedJson &pj, - const uint32_t offset, bool found_minus) { - const char *p = reinterpret_cast(buf + offset); - bool negative = false; - if (found_minus) { - ++p; - negative = true; - } - long double i; - if (*p == '0') { // 0 cannot be followed by an integer - ++p; - i = 0; - } else { - unsigned char digit = *p - '0'; - i = digit; - p++; - while (is_integer(*p)) { - digit = *p - '0'; - i = 10 * i + digit; - ++p; - } - } - if ('.' == *p) { - ++p; - int fractional_weight = 308; - if (is_integer(*p)) { - unsigned char digit = *p - '0'; - ++p; - - fractional_weight--; - i = i + digit * (fractional_weight >= 0 ? power_of_ten[fractional_weight] - : 0); - } else { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(buf + offset); -#endif - return false; - } - while (is_integer(*p)) { - unsigned char digit = *p - '0'; - ++p; - fractional_weight--; - i = i + digit * (fractional_weight >= 0 ? power_of_ten[fractional_weight] - : 0); - } - } - if (('e' == *p) || ('E' == *p)) { - ++p; - bool neg_exp = false; - if ('-' == *p) { - neg_exp = true; - ++p; - } else if ('+' == *p) { - ++p; - } - if (!is_integer(*p)) { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(buf + offset); -#endif - return false; - } - unsigned char digit = *p - '0'; - int64_t exp_number = digit; // exponential part - p++; - if (is_integer(*p)) { - digit = *p - '0'; - exp_number = 10 * exp_number + digit; - ++p; - } - if (is_integer(*p)) { - digit = *p - '0'; - exp_number = 10 * exp_number + digit; - ++p; - } - if (is_integer(*p)) { - digit = *p - '0'; - exp_number = 10 * exp_number + digit; - ++p; - } - while (is_integer(*p)) { - if (exp_number > 0x100000000) { // we need to check for overflows -// we refuse to parse this -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(buf + offset); -#endif - return false; - } - digit = *p - '0'; - exp_number = 10 * exp_number + digit; - ++p; - } - if (unlikely(exp_number > 308)) { - // this path is unlikely - if (neg_exp) { - // We either have zero or a subnormal. - // We expect this to be uncommon so we go through a slow path. - i = subnormal_power10(i, -exp_number); - } else { -// We know for sure that we have a number that is too large, -// we refuse to parse this -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(buf + offset); -#endif - return false; - } - } else { - int exponent = (neg_exp ? -exp_number : exp_number); - // we have that exp_number is [0,308] so that - // exponent is [-308,308] so that - // 308 + exponent is in [0, 2 * 308] - i *= power_of_ten[308 + exponent]; - } - } - if (is_not_structural_or_whitespace(*p)) { - return false; - } - // check that we can go from long double to double safely. - if(i > std::numeric_limits::max()) { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(buf + offset); -#endif - return false; - } - double d = negative ? -i : i; - pj.write_tape_double(d); -#ifdef JSON_TEST_NUMBERS // for unit testing - found_float(d, buf + offset); -#endif - return is_structural_or_whitespace(*p); -} - -// called by parse_number when we know that the output is an integer, -// but where there might be some integer overflow. -// we want to catch overflows! -// Do not call this function directly as it skips some of the checks from -// parse_number -// -// This function will almost never be called!!! -// -static never_inline bool parse_large_integer(const uint8_t *const buf, - ParsedJson &pj, - const uint32_t offset, - bool found_minus) { - const char *p = reinterpret_cast(buf + offset); - - bool negative = false; - if (found_minus) { - ++p; - negative = true; - } - uint64_t i; - if (*p == '0') { // 0 cannot be followed by an integer - ++p; - i = 0; - } else { - unsigned char digit = *p - '0'; - i = digit; - p++; - // the is_made_of_eight_digits_fast routine is unlikely to help here because - // we rarely see large integer parts like 123456789 - while (is_integer(*p)) { - digit = *p - '0'; - if (mul_overflow(i, 10, &i)) { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(buf + offset); -#endif - return false; // overflow - } - if (add_overflow(i, digit, &i)) { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(buf + offset); -#endif - return false; // overflow - } - ++p; - } - } - if (negative) { - if (i > 0x8000000000000000) { - // overflows! -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(buf + offset); -#endif - return false; // overflow - } else if (i == 0x8000000000000000) { - // In two's complement, we cannot represent 0x8000000000000000 - // as a positive signed integer, but the negative version is - // possible. - constexpr int64_t signed_answer = INT64_MIN; - pj.write_tape_s64(signed_answer); -#ifdef JSON_TEST_NUMBERS // for unit testing - found_integer(signed_answer, buf + offset); -#endif - } else { - // we can negate safely - int64_t signed_answer = -static_cast(i); - pj.write_tape_s64(signed_answer); -#ifdef JSON_TEST_NUMBERS // for unit testing - found_integer(signed_answer, buf + offset); -#endif - } - } else { - // we have a positive integer, the contract is that - // we try to represent it as a signed integer and only - // fallback on unsigned integers if absolutely necessary. - if(i < 0x8000000000000000) { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_integer(i, buf + offset); -#endif - pj.write_tape_s64(i); - } else { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_unsigned_integer(i, buf + offset); -#endif - pj.write_tape_u64(i); - } - } - return is_structural_or_whitespace(*p); -} - -// parse the number at buf + offset -// define JSON_TEST_NUMBERS for unit testing -// -// It is assumed that the number is followed by a structural ({,},],[) character -// or a white space character. If that is not the case (e.g., when the JSON -// document is made of a single number), then it is necessary to copy the -// content and append a space before calling this function. -// -// Our objective is accurate parsing (ULP of 0 or 1) at high speed. -static really_inline bool parse_number(const uint8_t *const buf, ParsedJson &pj, - const uint32_t offset, - bool found_minus) { -#ifdef SIMDJSON_SKIPNUMBERPARSING // for performance analysis, it is sometimes - // useful to skip parsing - pj.write_tape_s64(0); // always write zero - return true; // always succeeds -#else - const char *p = reinterpret_cast(buf + offset); - bool negative = false; - if (found_minus) { - ++p; - negative = true; - if (!is_integer(*p)) { // a negative sign must be followed by an integer -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(buf + offset); -#endif - return false; - } - } - const char *const start_digits = p; - - uint64_t i; // an unsigned int avoids signed overflows (which are bad) - if (*p == '0') { // 0 cannot be followed by an integer - ++p; - if (is_not_structural_or_whitespace_or_exponent_or_decimal(*p)) { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(buf + offset); -#endif - return false; - } - i = 0; - } else { - if (!(is_integer(*p))) { // must start with an integer -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(buf + offset); -#endif - return false; - } - unsigned char digit = *p - '0'; - i = digit; - p++; - // the is_made_of_eight_digits_fast routine is unlikely to help here because - // we rarely see large integer parts like 123456789 - while (is_integer(*p)) { - digit = *p - '0'; - // a multiplication by 10 is cheaper than an arbitrary integer - // multiplication - i = 10 * i + digit; // might overflow, we will handle the overflow later - ++p; - } - } - int64_t exponent = 0; - bool is_float = false; - if ('.' == *p) { - is_float = true; // At this point we know that we have a float - // we continue with the fiction that we have an integer. If the - // floating point number is representable as x * 10^z for some integer - // z that fits in 53 bits, then we will be able to convert back the - // the integer into a float in a lossless manner. - ++p; - const char *const first_after_period = p; - if (is_integer(*p)) { - unsigned char digit = *p - '0'; - ++p; - i = i * 10 + digit; // might overflow + multiplication by 10 is likely - // cheaper than arbitrary mult. - // we will handle the overflow later - } else { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(buf + offset); -#endif - return false; - } -#ifdef SWAR_NUMBER_PARSING - // this helps if we have lots of decimals! - // this turns out to be frequent enough. - if (is_made_of_eight_digits_fast(p)) { - i = i * 100000000 + parse_eight_digits_unrolled(p); - p += 8; - } -#endif - while (is_integer(*p)) { - unsigned char digit = *p - '0'; - ++p; - i = i * 10 + digit; // in rare cases, this will overflow, but that's ok - // because we have parse_highprecision_float later. - } - exponent = first_after_period - p; - } - int digit_count = - p - start_digits - 1; // used later to guard against overflows - int64_t exp_number = 0; // exponential part - if (('e' == *p) || ('E' == *p)) { - is_float = true; - ++p; - bool neg_exp = false; - if ('-' == *p) { - neg_exp = true; - ++p; - } else if ('+' == *p) { - ++p; - } - if (!is_integer(*p)) { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(buf + offset); -#endif - return false; - } - unsigned char digit = *p - '0'; - exp_number = digit; - p++; - if (is_integer(*p)) { - digit = *p - '0'; - exp_number = 10 * exp_number + digit; - ++p; - } - if (is_integer(*p)) { - digit = *p - '0'; - exp_number = 10 * exp_number + digit; - ++p; - } - while (is_integer(*p)) { - if (exp_number > 0x100000000) { // we need to check for overflows - // we refuse to parse this -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(buf + offset); -#endif - return false; - } - digit = *p - '0'; - exp_number = 10 * exp_number + digit; - ++p; - } - exponent += (neg_exp ? -exp_number : exp_number); - } - if (is_float) { - uint64_t power_index = 308 + exponent; - if (unlikely((digit_count >= 19))) { // this is uncommon - // It is possible that the integer had an overflow. - // We have to handle the case where we have 0.0000somenumber. - const char *start = start_digits; - while ((*start == '0') || (*start == '.')) { - start++; - } - // we over-decrement by one when there is a '.' - digit_count -= (start - start_digits); - if (digit_count >= 19) { - // Ok, chances are good that we had an overflow! - // this is almost never going to get called!!! - // we start anew, going slowly!!! - return parse_float(buf, pj, offset, found_minus); - } - } - if (unlikely((power_index > 2 * 308))) { // this is uncommon!!! - // this is almost never going to get called!!! - // we start anew, going slowly!!! - return parse_float(buf, pj, offset, found_minus); - } - double factor = power_of_ten[power_index]; - factor = negative ? -factor : factor; - double d = i * factor; - pj.write_tape_double(d); -#ifdef JSON_TEST_NUMBERS // for unit testing - found_float(d, buf + offset); -#endif - } else { - if (unlikely(digit_count >= 18)) { // this is uncommon!!! - // there is a good chance that we had an overflow, so we need - // need to recover: we parse the whole thing again. - return parse_large_integer(buf, pj, offset, found_minus); - } - i = negative ? 0 - i : i; - pj.write_tape_s64(i); -#ifdef JSON_TEST_NUMBERS // for unit testing - found_integer(i, buf + offset); -#endif - } - return is_structural_or_whitespace(*p); -#endif // SIMDJSON_SKIPNUMBERPARSING -} - - -}// namespace simdjson::arm64 - - -#endif // IS_ARM64 -#endif // SIMDJSON_ARM64_NUMBERPARSING_H -/* end file src/arm64/numberparsing.h */ -/* begin file src/haswell/numberparsing.h */ -#ifndef SIMDJSON_HASWELL_NUMBERPARSING_H -#define SIMDJSON_HASWELL_NUMBERPARSING_H - -#ifdef IS_X86_64 - -#include -#include - - -#ifdef JSON_TEST_NUMBERS // for unit testing -void found_invalid_number(const uint8_t *buf); -void found_integer(int64_t result, const uint8_t *buf); -void found_unsigned_integer(uint64_t result, const uint8_t *buf); -void found_float(double result, const uint8_t *buf); -#endif - -TARGET_HASWELL -namespace simdjson::haswell { -static inline uint32_t parse_eight_digits_unrolled(const char *chars) { - // this actually computes *16* values so we are being wasteful. - const __m128i ascii0 = _mm_set1_epi8('0'); - const __m128i mul_1_10 = - _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); - const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); - const __m128i mul_1_10000 = - _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); - const __m128i input = _mm_sub_epi8( - _mm_loadu_si128(reinterpret_cast(chars)), ascii0); - const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); - const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); - const __m128i t3 = _mm_packus_epi32(t2, t2); - const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); - return _mm_cvtsi128_si32( - t4); // only captures the sum of the first 8 digits, drop the rest -} - -#define SWAR_NUMBER_PARSING - - -// Allowable floating-point values range -// std::numeric_limits::lowest() to std::numeric_limits::max(), -// so from -1.7976e308 all the way to 1.7975e308 in binary64. The lowest -// non-zero normal values is std::numeric_limits::min() or -// about 2.225074e-308. -static const double power_of_ten[] = { - 1e-308, 1e-307, 1e-306, 1e-305, 1e-304, 1e-303, 1e-302, 1e-301, 1e-300, - 1e-299, 1e-298, 1e-297, 1e-296, 1e-295, 1e-294, 1e-293, 1e-292, 1e-291, - 1e-290, 1e-289, 1e-288, 1e-287, 1e-286, 1e-285, 1e-284, 1e-283, 1e-282, - 1e-281, 1e-280, 1e-279, 1e-278, 1e-277, 1e-276, 1e-275, 1e-274, 1e-273, - 1e-272, 1e-271, 1e-270, 1e-269, 1e-268, 1e-267, 1e-266, 1e-265, 1e-264, - 1e-263, 1e-262, 1e-261, 1e-260, 1e-259, 1e-258, 1e-257, 1e-256, 1e-255, - 1e-254, 1e-253, 1e-252, 1e-251, 1e-250, 1e-249, 1e-248, 1e-247, 1e-246, - 1e-245, 1e-244, 1e-243, 1e-242, 1e-241, 1e-240, 1e-239, 1e-238, 1e-237, - 1e-236, 1e-235, 1e-234, 1e-233, 1e-232, 1e-231, 1e-230, 1e-229, 1e-228, - 1e-227, 1e-226, 1e-225, 1e-224, 1e-223, 1e-222, 1e-221, 1e-220, 1e-219, - 1e-218, 1e-217, 1e-216, 1e-215, 1e-214, 1e-213, 1e-212, 1e-211, 1e-210, - 1e-209, 1e-208, 1e-207, 1e-206, 1e-205, 1e-204, 1e-203, 1e-202, 1e-201, - 1e-200, 1e-199, 1e-198, 1e-197, 1e-196, 1e-195, 1e-194, 1e-193, 1e-192, - 1e-191, 1e-190, 1e-189, 1e-188, 1e-187, 1e-186, 1e-185, 1e-184, 1e-183, - 1e-182, 1e-181, 1e-180, 1e-179, 1e-178, 1e-177, 1e-176, 1e-175, 1e-174, - 1e-173, 1e-172, 1e-171, 1e-170, 1e-169, 1e-168, 1e-167, 1e-166, 1e-165, - 1e-164, 1e-163, 1e-162, 1e-161, 1e-160, 1e-159, 1e-158, 1e-157, 1e-156, - 1e-155, 1e-154, 1e-153, 1e-152, 1e-151, 1e-150, 1e-149, 1e-148, 1e-147, - 1e-146, 1e-145, 1e-144, 1e-143, 1e-142, 1e-141, 1e-140, 1e-139, 1e-138, - 1e-137, 1e-136, 1e-135, 1e-134, 1e-133, 1e-132, 1e-131, 1e-130, 1e-129, - 1e-128, 1e-127, 1e-126, 1e-125, 1e-124, 1e-123, 1e-122, 1e-121, 1e-120, - 1e-119, 1e-118, 1e-117, 1e-116, 1e-115, 1e-114, 1e-113, 1e-112, 1e-111, - 1e-110, 1e-109, 1e-108, 1e-107, 1e-106, 1e-105, 1e-104, 1e-103, 1e-102, - 1e-101, 1e-100, 1e-99, 1e-98, 1e-97, 1e-96, 1e-95, 1e-94, 1e-93, - 1e-92, 1e-91, 1e-90, 1e-89, 1e-88, 1e-87, 1e-86, 1e-85, 1e-84, - 1e-83, 1e-82, 1e-81, 1e-80, 1e-79, 1e-78, 1e-77, 1e-76, 1e-75, - 1e-74, 1e-73, 1e-72, 1e-71, 1e-70, 1e-69, 1e-68, 1e-67, 1e-66, - 1e-65, 1e-64, 1e-63, 1e-62, 1e-61, 1e-60, 1e-59, 1e-58, 1e-57, - 1e-56, 1e-55, 1e-54, 1e-53, 1e-52, 1e-51, 1e-50, 1e-49, 1e-48, - 1e-47, 1e-46, 1e-45, 1e-44, 1e-43, 1e-42, 1e-41, 1e-40, 1e-39, - 1e-38, 1e-37, 1e-36, 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, - 1e-29, 1e-28, 1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, - 1e-20, 1e-19, 1e-18, 1e-17, 1e-16, 1e-15, 1e-14, 1e-13, 1e-12, - 1e-11, 1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, - 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, - 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, - 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22, 1e23, 1e24, - 1e25, 1e26, 1e27, 1e28, 1e29, 1e30, 1e31, 1e32, 1e33, - 1e34, 1e35, 1e36, 1e37, 1e38, 1e39, 1e40, 1e41, 1e42, - 1e43, 1e44, 1e45, 1e46, 1e47, 1e48, 1e49, 1e50, 1e51, - 1e52, 1e53, 1e54, 1e55, 1e56, 1e57, 1e58, 1e59, 1e60, - 1e61, 1e62, 1e63, 1e64, 1e65, 1e66, 1e67, 1e68, 1e69, - 1e70, 1e71, 1e72, 1e73, 1e74, 1e75, 1e76, 1e77, 1e78, - 1e79, 1e80, 1e81, 1e82, 1e83, 1e84, 1e85, 1e86, 1e87, - 1e88, 1e89, 1e90, 1e91, 1e92, 1e93, 1e94, 1e95, 1e96, - 1e97, 1e98, 1e99, 1e100, 1e101, 1e102, 1e103, 1e104, 1e105, - 1e106, 1e107, 1e108, 1e109, 1e110, 1e111, 1e112, 1e113, 1e114, - 1e115, 1e116, 1e117, 1e118, 1e119, 1e120, 1e121, 1e122, 1e123, - 1e124, 1e125, 1e126, 1e127, 1e128, 1e129, 1e130, 1e131, 1e132, - 1e133, 1e134, 1e135, 1e136, 1e137, 1e138, 1e139, 1e140, 1e141, - 1e142, 1e143, 1e144, 1e145, 1e146, 1e147, 1e148, 1e149, 1e150, - 1e151, 1e152, 1e153, 1e154, 1e155, 1e156, 1e157, 1e158, 1e159, - 1e160, 1e161, 1e162, 1e163, 1e164, 1e165, 1e166, 1e167, 1e168, - 1e169, 1e170, 1e171, 1e172, 1e173, 1e174, 1e175, 1e176, 1e177, - 1e178, 1e179, 1e180, 1e181, 1e182, 1e183, 1e184, 1e185, 1e186, - 1e187, 1e188, 1e189, 1e190, 1e191, 1e192, 1e193, 1e194, 1e195, - 1e196, 1e197, 1e198, 1e199, 1e200, 1e201, 1e202, 1e203, 1e204, - 1e205, 1e206, 1e207, 1e208, 1e209, 1e210, 1e211, 1e212, 1e213, - 1e214, 1e215, 1e216, 1e217, 1e218, 1e219, 1e220, 1e221, 1e222, - 1e223, 1e224, 1e225, 1e226, 1e227, 1e228, 1e229, 1e230, 1e231, - 1e232, 1e233, 1e234, 1e235, 1e236, 1e237, 1e238, 1e239, 1e240, - 1e241, 1e242, 1e243, 1e244, 1e245, 1e246, 1e247, 1e248, 1e249, - 1e250, 1e251, 1e252, 1e253, 1e254, 1e255, 1e256, 1e257, 1e258, - 1e259, 1e260, 1e261, 1e262, 1e263, 1e264, 1e265, 1e266, 1e267, - 1e268, 1e269, 1e270, 1e271, 1e272, 1e273, 1e274, 1e275, 1e276, - 1e277, 1e278, 1e279, 1e280, 1e281, 1e282, 1e283, 1e284, 1e285, - 1e286, 1e287, 1e288, 1e289, 1e290, 1e291, 1e292, 1e293, 1e294, - 1e295, 1e296, 1e297, 1e298, 1e299, 1e300, 1e301, 1e302, 1e303, - 1e304, 1e305, 1e306, 1e307, 1e308}; - -static inline bool is_integer(char c) { - return (c >= '0' && c <= '9'); - // this gets compiled to (uint8_t)(c - '0') <= 9 on all decent compilers -} - -// We need to check that the character following a zero is valid. This is -// probably frequent and it is hard than it looks. We are building all of this -// just to differentiate between 0x1 (invalid), 0,1 (valid) 0e1 (valid)... -const bool structural_or_whitespace_or_exponent_or_decimal_negated[256] = { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, - 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; - -really_inline bool -is_not_structural_or_whitespace_or_exponent_or_decimal(unsigned char c) { - return structural_or_whitespace_or_exponent_or_decimal_negated[c]; -} - -// check quickly whether the next 8 chars are made of digits -// at a glance, it looks better than Mula's -// http://0x80.pl/articles/swar-digits-validate.html -static inline bool is_made_of_eight_digits_fast(const char *chars) { - uint64_t val; - // this can read up to 7 bytes beyond the buffer size, but we require - // SIMDJSON_PADDING of padding - static_assert(7 <= SIMDJSON_PADDING); - memcpy(&val, chars, 8); - // a branchy method might be faster: - // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) - // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == - // 0x3030303030303030); - return (((val & 0xF0F0F0F0F0F0F0F0) | - (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == - 0x3333333333333333); -} - - -// -// This function computes base * 10 ^ (- negative_exponent ). -// It is only even going to be used when negative_exponent is tiny. -static double subnormal_power10(double base, int64_t negative_exponent) { - // avoid integer overflows in the pow expression, those values would - // become zero anyway. - if(negative_exponent < -1000) { - return 0; - } - - // this is probably not going to be fast - return base * 1e-308 * pow(10, negative_exponent + 308); -} - -// called by parse_number when we know that the output is a float, -// but where there might be some integer overflow. The trick here is to -// parse using floats from the start. -// Do not call this function directly as it skips some of the checks from -// parse_number -// -// This function will almost never be called!!! -// -// Note: a redesign could avoid this function entirely. -// -static never_inline bool parse_float(const uint8_t *const buf, ParsedJson &pj, - const uint32_t offset, bool found_minus) { - const char *p = reinterpret_cast(buf + offset); - bool negative = false; - if (found_minus) { - ++p; - negative = true; - } - long double i; - if (*p == '0') { // 0 cannot be followed by an integer - ++p; - i = 0; - } else { - unsigned char digit = *p - '0'; - i = digit; - p++; - while (is_integer(*p)) { - digit = *p - '0'; - i = 10 * i + digit; - ++p; - } - } - if ('.' == *p) { - ++p; - int fractional_weight = 308; - if (is_integer(*p)) { - unsigned char digit = *p - '0'; - ++p; - - fractional_weight--; - i = i + digit * (fractional_weight >= 0 ? power_of_ten[fractional_weight] - : 0); - } else { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(buf + offset); -#endif - return false; - } - while (is_integer(*p)) { - unsigned char digit = *p - '0'; - ++p; - fractional_weight--; - i = i + digit * (fractional_weight >= 0 ? power_of_ten[fractional_weight] - : 0); - } - } - if (('e' == *p) || ('E' == *p)) { - ++p; - bool neg_exp = false; - if ('-' == *p) { - neg_exp = true; - ++p; - } else if ('+' == *p) { - ++p; - } - if (!is_integer(*p)) { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(buf + offset); -#endif - return false; - } - unsigned char digit = *p - '0'; - int64_t exp_number = digit; // exponential part - p++; - if (is_integer(*p)) { - digit = *p - '0'; - exp_number = 10 * exp_number + digit; - ++p; - } - if (is_integer(*p)) { - digit = *p - '0'; - exp_number = 10 * exp_number + digit; - ++p; - } - if (is_integer(*p)) { - digit = *p - '0'; - exp_number = 10 * exp_number + digit; - ++p; - } - while (is_integer(*p)) { - if (exp_number > 0x100000000) { // we need to check for overflows -// we refuse to parse this -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(buf + offset); -#endif - return false; - } - digit = *p - '0'; - exp_number = 10 * exp_number + digit; - ++p; - } - if (unlikely(exp_number > 308)) { - // this path is unlikely - if (neg_exp) { - // We either have zero or a subnormal. - // We expect this to be uncommon so we go through a slow path. - i = subnormal_power10(i, -exp_number); - } else { -// We know for sure that we have a number that is too large, -// we refuse to parse this -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(buf + offset); -#endif - return false; - } - } else { - int exponent = (neg_exp ? -exp_number : exp_number); - // we have that exp_number is [0,308] so that - // exponent is [-308,308] so that - // 308 + exponent is in [0, 2 * 308] - i *= power_of_ten[308 + exponent]; - } - } - if (is_not_structural_or_whitespace(*p)) { - return false; - } - // check that we can go from long double to double safely. - if(i > std::numeric_limits::max()) { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(buf + offset); -#endif - return false; - } - double d = negative ? -i : i; - pj.write_tape_double(d); -#ifdef JSON_TEST_NUMBERS // for unit testing - found_float(d, buf + offset); -#endif - return is_structural_or_whitespace(*p); -} - -// called by parse_number when we know that the output is an integer, -// but where there might be some integer overflow. -// we want to catch overflows! -// Do not call this function directly as it skips some of the checks from -// parse_number -// -// This function will almost never be called!!! -// -static never_inline bool parse_large_integer(const uint8_t *const buf, - ParsedJson &pj, - const uint32_t offset, - bool found_minus) { - const char *p = reinterpret_cast(buf + offset); - - bool negative = false; - if (found_minus) { - ++p; - negative = true; - } - uint64_t i; - if (*p == '0') { // 0 cannot be followed by an integer - ++p; - i = 0; - } else { - unsigned char digit = *p - '0'; - i = digit; - p++; - // the is_made_of_eight_digits_fast routine is unlikely to help here because - // we rarely see large integer parts like 123456789 - while (is_integer(*p)) { - digit = *p - '0'; - if (mul_overflow(i, 10, &i)) { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(buf + offset); -#endif - return false; // overflow - } - if (add_overflow(i, digit, &i)) { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(buf + offset); -#endif - return false; // overflow - } - ++p; - } - } - if (negative) { - if (i > 0x8000000000000000) { - // overflows! -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(buf + offset); -#endif - return false; // overflow - } else if (i == 0x8000000000000000) { - // In two's complement, we cannot represent 0x8000000000000000 - // as a positive signed integer, but the negative version is - // possible. - constexpr int64_t signed_answer = INT64_MIN; - pj.write_tape_s64(signed_answer); -#ifdef JSON_TEST_NUMBERS // for unit testing - found_integer(signed_answer, buf + offset); -#endif - } else { - // we can negate safely - int64_t signed_answer = -static_cast(i); - pj.write_tape_s64(signed_answer); -#ifdef JSON_TEST_NUMBERS // for unit testing - found_integer(signed_answer, buf + offset); -#endif - } - } else { - // we have a positive integer, the contract is that - // we try to represent it as a signed integer and only - // fallback on unsigned integers if absolutely necessary. - if(i < 0x8000000000000000) { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_integer(i, buf + offset); -#endif - pj.write_tape_s64(i); - } else { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_unsigned_integer(i, buf + offset); -#endif - pj.write_tape_u64(i); - } - } - return is_structural_or_whitespace(*p); -} - -// parse the number at buf + offset -// define JSON_TEST_NUMBERS for unit testing -// -// It is assumed that the number is followed by a structural ({,},],[) character -// or a white space character. If that is not the case (e.g., when the JSON -// document is made of a single number), then it is necessary to copy the -// content and append a space before calling this function. -// -// Our objective is accurate parsing (ULP of 0 or 1) at high speed. -static really_inline bool parse_number(const uint8_t *const buf, ParsedJson &pj, - const uint32_t offset, - bool found_minus) { -#ifdef SIMDJSON_SKIPNUMBERPARSING // for performance analysis, it is sometimes - // useful to skip parsing - pj.write_tape_s64(0); // always write zero - return true; // always succeeds -#else - const char *p = reinterpret_cast(buf + offset); - bool negative = false; - if (found_minus) { - ++p; - negative = true; - if (!is_integer(*p)) { // a negative sign must be followed by an integer -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(buf + offset); -#endif - return false; - } - } - const char *const start_digits = p; - - uint64_t i; // an unsigned int avoids signed overflows (which are bad) - if (*p == '0') { // 0 cannot be followed by an integer - ++p; - if (is_not_structural_or_whitespace_or_exponent_or_decimal(*p)) { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(buf + offset); -#endif - return false; - } - i = 0; - } else { - if (!(is_integer(*p))) { // must start with an integer -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(buf + offset); -#endif - return false; - } - unsigned char digit = *p - '0'; - i = digit; - p++; - // the is_made_of_eight_digits_fast routine is unlikely to help here because - // we rarely see large integer parts like 123456789 - while (is_integer(*p)) { - digit = *p - '0'; - // a multiplication by 10 is cheaper than an arbitrary integer - // multiplication - i = 10 * i + digit; // might overflow, we will handle the overflow later - ++p; - } - } - int64_t exponent = 0; - bool is_float = false; - if ('.' == *p) { - is_float = true; // At this point we know that we have a float - // we continue with the fiction that we have an integer. If the - // floating point number is representable as x * 10^z for some integer - // z that fits in 53 bits, then we will be able to convert back the - // the integer into a float in a lossless manner. - ++p; - const char *const first_after_period = p; - if (is_integer(*p)) { - unsigned char digit = *p - '0'; - ++p; - i = i * 10 + digit; // might overflow + multiplication by 10 is likely - // cheaper than arbitrary mult. - // we will handle the overflow later - } else { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(buf + offset); -#endif - return false; - } -#ifdef SWAR_NUMBER_PARSING - // this helps if we have lots of decimals! - // this turns out to be frequent enough. - if (is_made_of_eight_digits_fast(p)) { - i = i * 100000000 + parse_eight_digits_unrolled(p); - p += 8; - } -#endif - while (is_integer(*p)) { - unsigned char digit = *p - '0'; - ++p; - i = i * 10 + digit; // in rare cases, this will overflow, but that's ok - // because we have parse_highprecision_float later. - } - exponent = first_after_period - p; - } - int digit_count = - p - start_digits - 1; // used later to guard against overflows - int64_t exp_number = 0; // exponential part - if (('e' == *p) || ('E' == *p)) { - is_float = true; - ++p; - bool neg_exp = false; - if ('-' == *p) { - neg_exp = true; - ++p; - } else if ('+' == *p) { - ++p; - } - if (!is_integer(*p)) { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(buf + offset); -#endif - return false; - } - unsigned char digit = *p - '0'; - exp_number = digit; - p++; - if (is_integer(*p)) { - digit = *p - '0'; - exp_number = 10 * exp_number + digit; - ++p; - } - if (is_integer(*p)) { - digit = *p - '0'; - exp_number = 10 * exp_number + digit; - ++p; - } - while (is_integer(*p)) { - if (exp_number > 0x100000000) { // we need to check for overflows - // we refuse to parse this -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(buf + offset); -#endif - return false; - } - digit = *p - '0'; - exp_number = 10 * exp_number + digit; - ++p; - } - exponent += (neg_exp ? -exp_number : exp_number); - } - if (is_float) { - uint64_t power_index = 308 + exponent; - if (unlikely((digit_count >= 19))) { // this is uncommon - // It is possible that the integer had an overflow. - // We have to handle the case where we have 0.0000somenumber. - const char *start = start_digits; - while ((*start == '0') || (*start == '.')) { - start++; - } - // we over-decrement by one when there is a '.' - digit_count -= (start - start_digits); - if (digit_count >= 19) { - // Ok, chances are good that we had an overflow! - // this is almost never going to get called!!! - // we start anew, going slowly!!! - return parse_float(buf, pj, offset, found_minus); - } - } - if (unlikely((power_index > 2 * 308))) { // this is uncommon!!! - // this is almost never going to get called!!! - // we start anew, going slowly!!! - return parse_float(buf, pj, offset, found_minus); - } - double factor = power_of_ten[power_index]; - factor = negative ? -factor : factor; - double d = i * factor; - pj.write_tape_double(d); -#ifdef JSON_TEST_NUMBERS // for unit testing - found_float(d, buf + offset); -#endif - } else { - if (unlikely(digit_count >= 18)) { // this is uncommon!!! - // there is a good chance that we had an overflow, so we need - // need to recover: we parse the whole thing again. - return parse_large_integer(buf, pj, offset, found_minus); - } - i = negative ? 0 - i : i; - pj.write_tape_s64(i); -#ifdef JSON_TEST_NUMBERS // for unit testing - found_integer(i, buf + offset); -#endif - } - return is_structural_or_whitespace(*p); -#endif // SIMDJSON_SKIPNUMBERPARSING -} - -} // namespace simdjson::haswell -UNTARGET_REGION - - - - -#endif // IS_X86_64 - - -#endif // SIMDJSON_HASWELL_NUMBERPARSING_H -/* end file src/haswell/numberparsing.h */ -/* begin file src/westmere/numberparsing.h */ -#ifndef SIMDJSON_WESTMERE_NUMBERPARSING_H -#define SIMDJSON_WESTMERE_NUMBERPARSING_H - -#ifdef IS_X86_64 - -#include -#include - - -#ifdef JSON_TEST_NUMBERS // for unit testing -void found_invalid_number(const uint8_t *buf); -void found_integer(int64_t result, const uint8_t *buf); -void found_unsigned_integer(uint64_t result, const uint8_t *buf); -void found_float(double result, const uint8_t *buf); -#endif - - -TARGET_WESTMERE -namespace simdjson::westmere { -static inline uint32_t parse_eight_digits_unrolled(const char *chars) { - // this actually computes *16* values so we are being wasteful. - const __m128i ascii0 = _mm_set1_epi8('0'); - const __m128i mul_1_10 = - _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); - const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); - const __m128i mul_1_10000 = - _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); - const __m128i input = _mm_sub_epi8( - _mm_loadu_si128(reinterpret_cast(chars)), ascii0); - const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); - const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); - const __m128i t3 = _mm_packus_epi32(t2, t2); - const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); - return _mm_cvtsi128_si32( - t4); // only captures the sum of the first 8 digits, drop the rest -} - -#define SWAR_NUMBER_PARSING - - -// Allowable floating-point values range -// std::numeric_limits::lowest() to std::numeric_limits::max(), -// so from -1.7976e308 all the way to 1.7975e308 in binary64. The lowest -// non-zero normal values is std::numeric_limits::min() or -// about 2.225074e-308. -static const double power_of_ten[] = { - 1e-308, 1e-307, 1e-306, 1e-305, 1e-304, 1e-303, 1e-302, 1e-301, 1e-300, - 1e-299, 1e-298, 1e-297, 1e-296, 1e-295, 1e-294, 1e-293, 1e-292, 1e-291, - 1e-290, 1e-289, 1e-288, 1e-287, 1e-286, 1e-285, 1e-284, 1e-283, 1e-282, - 1e-281, 1e-280, 1e-279, 1e-278, 1e-277, 1e-276, 1e-275, 1e-274, 1e-273, - 1e-272, 1e-271, 1e-270, 1e-269, 1e-268, 1e-267, 1e-266, 1e-265, 1e-264, - 1e-263, 1e-262, 1e-261, 1e-260, 1e-259, 1e-258, 1e-257, 1e-256, 1e-255, - 1e-254, 1e-253, 1e-252, 1e-251, 1e-250, 1e-249, 1e-248, 1e-247, 1e-246, - 1e-245, 1e-244, 1e-243, 1e-242, 1e-241, 1e-240, 1e-239, 1e-238, 1e-237, - 1e-236, 1e-235, 1e-234, 1e-233, 1e-232, 1e-231, 1e-230, 1e-229, 1e-228, - 1e-227, 1e-226, 1e-225, 1e-224, 1e-223, 1e-222, 1e-221, 1e-220, 1e-219, - 1e-218, 1e-217, 1e-216, 1e-215, 1e-214, 1e-213, 1e-212, 1e-211, 1e-210, - 1e-209, 1e-208, 1e-207, 1e-206, 1e-205, 1e-204, 1e-203, 1e-202, 1e-201, - 1e-200, 1e-199, 1e-198, 1e-197, 1e-196, 1e-195, 1e-194, 1e-193, 1e-192, - 1e-191, 1e-190, 1e-189, 1e-188, 1e-187, 1e-186, 1e-185, 1e-184, 1e-183, - 1e-182, 1e-181, 1e-180, 1e-179, 1e-178, 1e-177, 1e-176, 1e-175, 1e-174, - 1e-173, 1e-172, 1e-171, 1e-170, 1e-169, 1e-168, 1e-167, 1e-166, 1e-165, - 1e-164, 1e-163, 1e-162, 1e-161, 1e-160, 1e-159, 1e-158, 1e-157, 1e-156, - 1e-155, 1e-154, 1e-153, 1e-152, 1e-151, 1e-150, 1e-149, 1e-148, 1e-147, - 1e-146, 1e-145, 1e-144, 1e-143, 1e-142, 1e-141, 1e-140, 1e-139, 1e-138, - 1e-137, 1e-136, 1e-135, 1e-134, 1e-133, 1e-132, 1e-131, 1e-130, 1e-129, - 1e-128, 1e-127, 1e-126, 1e-125, 1e-124, 1e-123, 1e-122, 1e-121, 1e-120, - 1e-119, 1e-118, 1e-117, 1e-116, 1e-115, 1e-114, 1e-113, 1e-112, 1e-111, - 1e-110, 1e-109, 1e-108, 1e-107, 1e-106, 1e-105, 1e-104, 1e-103, 1e-102, - 1e-101, 1e-100, 1e-99, 1e-98, 1e-97, 1e-96, 1e-95, 1e-94, 1e-93, - 1e-92, 1e-91, 1e-90, 1e-89, 1e-88, 1e-87, 1e-86, 1e-85, 1e-84, - 1e-83, 1e-82, 1e-81, 1e-80, 1e-79, 1e-78, 1e-77, 1e-76, 1e-75, - 1e-74, 1e-73, 1e-72, 1e-71, 1e-70, 1e-69, 1e-68, 1e-67, 1e-66, - 1e-65, 1e-64, 1e-63, 1e-62, 1e-61, 1e-60, 1e-59, 1e-58, 1e-57, - 1e-56, 1e-55, 1e-54, 1e-53, 1e-52, 1e-51, 1e-50, 1e-49, 1e-48, - 1e-47, 1e-46, 1e-45, 1e-44, 1e-43, 1e-42, 1e-41, 1e-40, 1e-39, - 1e-38, 1e-37, 1e-36, 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, - 1e-29, 1e-28, 1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, - 1e-20, 1e-19, 1e-18, 1e-17, 1e-16, 1e-15, 1e-14, 1e-13, 1e-12, - 1e-11, 1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, - 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, - 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, - 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22, 1e23, 1e24, - 1e25, 1e26, 1e27, 1e28, 1e29, 1e30, 1e31, 1e32, 1e33, - 1e34, 1e35, 1e36, 1e37, 1e38, 1e39, 1e40, 1e41, 1e42, - 1e43, 1e44, 1e45, 1e46, 1e47, 1e48, 1e49, 1e50, 1e51, - 1e52, 1e53, 1e54, 1e55, 1e56, 1e57, 1e58, 1e59, 1e60, - 1e61, 1e62, 1e63, 1e64, 1e65, 1e66, 1e67, 1e68, 1e69, - 1e70, 1e71, 1e72, 1e73, 1e74, 1e75, 1e76, 1e77, 1e78, - 1e79, 1e80, 1e81, 1e82, 1e83, 1e84, 1e85, 1e86, 1e87, - 1e88, 1e89, 1e90, 1e91, 1e92, 1e93, 1e94, 1e95, 1e96, - 1e97, 1e98, 1e99, 1e100, 1e101, 1e102, 1e103, 1e104, 1e105, - 1e106, 1e107, 1e108, 1e109, 1e110, 1e111, 1e112, 1e113, 1e114, - 1e115, 1e116, 1e117, 1e118, 1e119, 1e120, 1e121, 1e122, 1e123, - 1e124, 1e125, 1e126, 1e127, 1e128, 1e129, 1e130, 1e131, 1e132, - 1e133, 1e134, 1e135, 1e136, 1e137, 1e138, 1e139, 1e140, 1e141, - 1e142, 1e143, 1e144, 1e145, 1e146, 1e147, 1e148, 1e149, 1e150, - 1e151, 1e152, 1e153, 1e154, 1e155, 1e156, 1e157, 1e158, 1e159, - 1e160, 1e161, 1e162, 1e163, 1e164, 1e165, 1e166, 1e167, 1e168, - 1e169, 1e170, 1e171, 1e172, 1e173, 1e174, 1e175, 1e176, 1e177, - 1e178, 1e179, 1e180, 1e181, 1e182, 1e183, 1e184, 1e185, 1e186, - 1e187, 1e188, 1e189, 1e190, 1e191, 1e192, 1e193, 1e194, 1e195, - 1e196, 1e197, 1e198, 1e199, 1e200, 1e201, 1e202, 1e203, 1e204, - 1e205, 1e206, 1e207, 1e208, 1e209, 1e210, 1e211, 1e212, 1e213, - 1e214, 1e215, 1e216, 1e217, 1e218, 1e219, 1e220, 1e221, 1e222, - 1e223, 1e224, 1e225, 1e226, 1e227, 1e228, 1e229, 1e230, 1e231, - 1e232, 1e233, 1e234, 1e235, 1e236, 1e237, 1e238, 1e239, 1e240, - 1e241, 1e242, 1e243, 1e244, 1e245, 1e246, 1e247, 1e248, 1e249, - 1e250, 1e251, 1e252, 1e253, 1e254, 1e255, 1e256, 1e257, 1e258, - 1e259, 1e260, 1e261, 1e262, 1e263, 1e264, 1e265, 1e266, 1e267, - 1e268, 1e269, 1e270, 1e271, 1e272, 1e273, 1e274, 1e275, 1e276, - 1e277, 1e278, 1e279, 1e280, 1e281, 1e282, 1e283, 1e284, 1e285, - 1e286, 1e287, 1e288, 1e289, 1e290, 1e291, 1e292, 1e293, 1e294, - 1e295, 1e296, 1e297, 1e298, 1e299, 1e300, 1e301, 1e302, 1e303, - 1e304, 1e305, 1e306, 1e307, 1e308}; - -static inline bool is_integer(char c) { - return (c >= '0' && c <= '9'); - // this gets compiled to (uint8_t)(c - '0') <= 9 on all decent compilers -} - -// We need to check that the character following a zero is valid. This is -// probably frequent and it is hard than it looks. We are building all of this -// just to differentiate between 0x1 (invalid), 0,1 (valid) 0e1 (valid)... -const bool structural_or_whitespace_or_exponent_or_decimal_negated[256] = { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, - 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; - -really_inline bool -is_not_structural_or_whitespace_or_exponent_or_decimal(unsigned char c) { - return structural_or_whitespace_or_exponent_or_decimal_negated[c]; -} - -// check quickly whether the next 8 chars are made of digits -// at a glance, it looks better than Mula's -// http://0x80.pl/articles/swar-digits-validate.html -static inline bool is_made_of_eight_digits_fast(const char *chars) { - uint64_t val; - // this can read up to 7 bytes beyond the buffer size, but we require - // SIMDJSON_PADDING of padding - static_assert(7 <= SIMDJSON_PADDING); - memcpy(&val, chars, 8); - // a branchy method might be faster: - // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) - // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == - // 0x3030303030303030); - return (((val & 0xF0F0F0F0F0F0F0F0) | - (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == - 0x3333333333333333); -} - - -// -// This function computes base * 10 ^ (- negative_exponent ). -// It is only even going to be used when negative_exponent is tiny. -static double subnormal_power10(double base, int64_t negative_exponent) { - // avoid integer overflows in the pow expression, those values would - // become zero anyway. - if(negative_exponent < -1000) { - return 0; - } - - // this is probably not going to be fast - return base * 1e-308 * pow(10, negative_exponent + 308); -} - -// called by parse_number when we know that the output is a float, -// but where there might be some integer overflow. The trick here is to -// parse using floats from the start. -// Do not call this function directly as it skips some of the checks from -// parse_number -// -// This function will almost never be called!!! -// -// Note: a redesign could avoid this function entirely. -// -static never_inline bool parse_float(const uint8_t *const buf, ParsedJson &pj, - const uint32_t offset, bool found_minus) { - const char *p = reinterpret_cast(buf + offset); - bool negative = false; - if (found_minus) { - ++p; - negative = true; - } - long double i; - if (*p == '0') { // 0 cannot be followed by an integer - ++p; - i = 0; - } else { - unsigned char digit = *p - '0'; - i = digit; - p++; - while (is_integer(*p)) { - digit = *p - '0'; - i = 10 * i + digit; - ++p; - } - } - if ('.' == *p) { - ++p; - int fractional_weight = 308; - if (is_integer(*p)) { - unsigned char digit = *p - '0'; - ++p; - - fractional_weight--; - i = i + digit * (fractional_weight >= 0 ? power_of_ten[fractional_weight] - : 0); - } else { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(buf + offset); -#endif - return false; - } - while (is_integer(*p)) { - unsigned char digit = *p - '0'; - ++p; - fractional_weight--; - i = i + digit * (fractional_weight >= 0 ? power_of_ten[fractional_weight] - : 0); - } - } - if (('e' == *p) || ('E' == *p)) { - ++p; - bool neg_exp = false; - if ('-' == *p) { - neg_exp = true; - ++p; - } else if ('+' == *p) { - ++p; - } - if (!is_integer(*p)) { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(buf + offset); -#endif - return false; - } - unsigned char digit = *p - '0'; - int64_t exp_number = digit; // exponential part - p++; - if (is_integer(*p)) { - digit = *p - '0'; - exp_number = 10 * exp_number + digit; - ++p; - } - if (is_integer(*p)) { - digit = *p - '0'; - exp_number = 10 * exp_number + digit; - ++p; - } - if (is_integer(*p)) { - digit = *p - '0'; - exp_number = 10 * exp_number + digit; - ++p; - } - while (is_integer(*p)) { - if (exp_number > 0x100000000) { // we need to check for overflows -// we refuse to parse this -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(buf + offset); -#endif - return false; - } - digit = *p - '0'; - exp_number = 10 * exp_number + digit; - ++p; - } - if (unlikely(exp_number > 308)) { - // this path is unlikely - if (neg_exp) { - // We either have zero or a subnormal. - // We expect this to be uncommon so we go through a slow path. - i = subnormal_power10(i, -exp_number); - } else { -// We know for sure that we have a number that is too large, -// we refuse to parse this -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(buf + offset); -#endif - return false; - } - } else { - int exponent = (neg_exp ? -exp_number : exp_number); - // we have that exp_number is [0,308] so that - // exponent is [-308,308] so that - // 308 + exponent is in [0, 2 * 308] - i *= power_of_ten[308 + exponent]; - } - } - if (is_not_structural_or_whitespace(*p)) { - return false; - } - // check that we can go from long double to double safely. - if(i > std::numeric_limits::max()) { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(buf + offset); -#endif - return false; - } - double d = negative ? -i : i; - pj.write_tape_double(d); -#ifdef JSON_TEST_NUMBERS // for unit testing - found_float(d, buf + offset); -#endif - return is_structural_or_whitespace(*p); -} - -// called by parse_number when we know that the output is an integer, -// but where there might be some integer overflow. -// we want to catch overflows! -// Do not call this function directly as it skips some of the checks from -// parse_number -// -// This function will almost never be called!!! -// -static never_inline bool parse_large_integer(const uint8_t *const buf, - ParsedJson &pj, - const uint32_t offset, - bool found_minus) { - const char *p = reinterpret_cast(buf + offset); - - bool negative = false; - if (found_minus) { - ++p; - negative = true; - } - uint64_t i; - if (*p == '0') { // 0 cannot be followed by an integer - ++p; - i = 0; - } else { - unsigned char digit = *p - '0'; - i = digit; - p++; - // the is_made_of_eight_digits_fast routine is unlikely to help here because - // we rarely see large integer parts like 123456789 - while (is_integer(*p)) { - digit = *p - '0'; - if (mul_overflow(i, 10, &i)) { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(buf + offset); -#endif - return false; // overflow - } - if (add_overflow(i, digit, &i)) { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(buf + offset); -#endif - return false; // overflow - } - ++p; - } - } - if (negative) { - if (i > 0x8000000000000000) { - // overflows! -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(buf + offset); -#endif - return false; // overflow - } else if (i == 0x8000000000000000) { - // In two's complement, we cannot represent 0x8000000000000000 - // as a positive signed integer, but the negative version is - // possible. - constexpr int64_t signed_answer = INT64_MIN; - pj.write_tape_s64(signed_answer); -#ifdef JSON_TEST_NUMBERS // for unit testing - found_integer(signed_answer, buf + offset); -#endif - } else { - // we can negate safely - int64_t signed_answer = -static_cast(i); - pj.write_tape_s64(signed_answer); -#ifdef JSON_TEST_NUMBERS // for unit testing - found_integer(signed_answer, buf + offset); -#endif - } - } else { - // we have a positive integer, the contract is that - // we try to represent it as a signed integer and only - // fallback on unsigned integers if absolutely necessary. - if(i < 0x8000000000000000) { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_integer(i, buf + offset); -#endif - pj.write_tape_s64(i); - } else { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_unsigned_integer(i, buf + offset); -#endif - pj.write_tape_u64(i); - } - } - return is_structural_or_whitespace(*p); -} - -// parse the number at buf + offset -// define JSON_TEST_NUMBERS for unit testing -// -// It is assumed that the number is followed by a structural ({,},],[) character -// or a white space character. If that is not the case (e.g., when the JSON -// document is made of a single number), then it is necessary to copy the -// content and append a space before calling this function. -// -// Our objective is accurate parsing (ULP of 0 or 1) at high speed. -static really_inline bool parse_number(const uint8_t *const buf, ParsedJson &pj, - const uint32_t offset, - bool found_minus) { -#ifdef SIMDJSON_SKIPNUMBERPARSING // for performance analysis, it is sometimes - // useful to skip parsing - pj.write_tape_s64(0); // always write zero - return true; // always succeeds -#else - const char *p = reinterpret_cast(buf + offset); - bool negative = false; - if (found_minus) { - ++p; - negative = true; - if (!is_integer(*p)) { // a negative sign must be followed by an integer -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(buf + offset); -#endif - return false; - } - } - const char *const start_digits = p; - - uint64_t i; // an unsigned int avoids signed overflows (which are bad) - if (*p == '0') { // 0 cannot be followed by an integer - ++p; - if (is_not_structural_or_whitespace_or_exponent_or_decimal(*p)) { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(buf + offset); -#endif - return false; - } - i = 0; - } else { - if (!(is_integer(*p))) { // must start with an integer -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(buf + offset); -#endif - return false; - } - unsigned char digit = *p - '0'; - i = digit; - p++; - // the is_made_of_eight_digits_fast routine is unlikely to help here because - // we rarely see large integer parts like 123456789 - while (is_integer(*p)) { - digit = *p - '0'; - // a multiplication by 10 is cheaper than an arbitrary integer - // multiplication - i = 10 * i + digit; // might overflow, we will handle the overflow later - ++p; - } - } - int64_t exponent = 0; - bool is_float = false; - if ('.' == *p) { - is_float = true; // At this point we know that we have a float - // we continue with the fiction that we have an integer. If the - // floating point number is representable as x * 10^z for some integer - // z that fits in 53 bits, then we will be able to convert back the - // the integer into a float in a lossless manner. - ++p; - const char *const first_after_period = p; - if (is_integer(*p)) { - unsigned char digit = *p - '0'; - ++p; - i = i * 10 + digit; // might overflow + multiplication by 10 is likely - // cheaper than arbitrary mult. - // we will handle the overflow later - } else { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(buf + offset); -#endif - return false; - } -#ifdef SWAR_NUMBER_PARSING - // this helps if we have lots of decimals! - // this turns out to be frequent enough. - if (is_made_of_eight_digits_fast(p)) { - i = i * 100000000 + parse_eight_digits_unrolled(p); - p += 8; - } -#endif - while (is_integer(*p)) { - unsigned char digit = *p - '0'; - ++p; - i = i * 10 + digit; // in rare cases, this will overflow, but that's ok - // because we have parse_highprecision_float later. - } - exponent = first_after_period - p; - } - int digit_count = - p - start_digits - 1; // used later to guard against overflows - int64_t exp_number = 0; // exponential part - if (('e' == *p) || ('E' == *p)) { - is_float = true; - ++p; - bool neg_exp = false; - if ('-' == *p) { - neg_exp = true; - ++p; - } else if ('+' == *p) { - ++p; - } - if (!is_integer(*p)) { -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(buf + offset); -#endif - return false; - } - unsigned char digit = *p - '0'; - exp_number = digit; - p++; - if (is_integer(*p)) { - digit = *p - '0'; - exp_number = 10 * exp_number + digit; - ++p; - } - if (is_integer(*p)) { - digit = *p - '0'; - exp_number = 10 * exp_number + digit; - ++p; - } - while (is_integer(*p)) { - if (exp_number > 0x100000000) { // we need to check for overflows - // we refuse to parse this -#ifdef JSON_TEST_NUMBERS // for unit testing - found_invalid_number(buf + offset); -#endif - return false; - } - digit = *p - '0'; - exp_number = 10 * exp_number + digit; - ++p; - } - exponent += (neg_exp ? -exp_number : exp_number); - } - if (is_float) { - uint64_t power_index = 308 + exponent; - if (unlikely((digit_count >= 19))) { // this is uncommon - // It is possible that the integer had an overflow. - // We have to handle the case where we have 0.0000somenumber. - const char *start = start_digits; - while ((*start == '0') || (*start == '.')) { - start++; - } - // we over-decrement by one when there is a '.' - digit_count -= (start - start_digits); - if (digit_count >= 19) { - // Ok, chances are good that we had an overflow! - // this is almost never going to get called!!! - // we start anew, going slowly!!! - return parse_float(buf, pj, offset, found_minus); - } - } - if (unlikely((power_index > 2 * 308))) { // this is uncommon!!! - // this is almost never going to get called!!! - // we start anew, going slowly!!! - return parse_float(buf, pj, offset, found_minus); - } - double factor = power_of_ten[power_index]; - factor = negative ? -factor : factor; - double d = i * factor; - pj.write_tape_double(d); -#ifdef JSON_TEST_NUMBERS // for unit testing - found_float(d, buf + offset); -#endif - } else { - if (unlikely(digit_count >= 18)) { // this is uncommon!!! - // there is a good chance that we had an overflow, so we need - // need to recover: we parse the whole thing again. - return parse_large_integer(buf, pj, offset, found_minus); - } - i = negative ? 0 - i : i; - pj.write_tape_s64(i); -#ifdef JSON_TEST_NUMBERS // for unit testing - found_integer(i, buf + offset); -#endif - } - return is_structural_or_whitespace(*p); -#endif // SIMDJSON_SKIPNUMBERPARSING -} - -} // namespace simdjson::westmere -UNTARGET_REGION - - - -#endif // IS_X86_64 -#endif // SIMDJSON_WESTMERE_NUMBERPARSING_H -/* end file src/westmere/numberparsing.h */ /* begin file src/arm64/bitmask.h */ #ifndef SIMDJSON_ARM64_BITMASK_H #define SIMDJSON_ARM64_BITMASK_H @@ -2894,6 +802,17 @@ UNTARGET_REGION #ifdef IS_ARM64 +/* begin file src/arm64/intrinsics.h */ +#ifndef SIMDJSON_ARM64_INTRINSICS_H +#define SIMDJSON_ARM64_INTRINSICS_H +#ifdef IS_ARM64 + +// This should be the correct header whether +// you use visual studio or other compilers. +#include +#endif // IS_ARM64 +#endif // SIMDJSON_ARM64_INTRINSICS_H +/* end file src/arm64/intrinsics.h */ namespace simdjson::arm64 { @@ -2930,67 +849,7 @@ UNTARGET_REGION #endif // IS_ARM64 #endif -/* end file src/arm64/bitmask.h */ -/* begin file src/haswell/bitmask.h */ -#ifndef SIMDJSON_HASWELL_BITMASK_H -#define SIMDJSON_HASWELL_BITMASK_H - - -#ifdef IS_X86_64 - - -TARGET_HASWELL -namespace simdjson::haswell { - -// -// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. -// -// For example, prefix_xor(00100100) == 00011100 -// -really_inline uint64_t prefix_xor(const uint64_t bitmask) { - // There should be no such thing with a processor supporting avx2 - // but not clmul. - __m128i all_ones = _mm_set1_epi8('\xFF'); - __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); - return _mm_cvtsi128_si64(result); -} - -} // namespace simdjson::haswell -UNTARGET_REGION - -#endif // IS_X86_64 -#endif -/* end file src/haswell/bitmask.h */ -/* begin file src/westmere/bitmask.h */ -#ifndef SIMDJSON_WESTMERE_BITMASK_H -#define SIMDJSON_WESTMERE_BITMASK_H - - -#ifdef IS_X86_64 - - -TARGET_WESTMERE -namespace simdjson::westmere { - -// -// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. -// -// For example, prefix_xor(00100100) == 00011100 -// -really_inline uint64_t prefix_xor(const uint64_t bitmask) { - // There should be no such thing with a processing supporting avx2 - // but not clmul. - __m128i all_ones = _mm_set1_epi8('\xFF'); - __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); - return _mm_cvtsi128_si64(result); -} - -} // namespace simdjson::westmere -UNTARGET_REGION - -#endif // IS_X86_64 -#endif -/* end file src/westmere/bitmask.h */ +/* end file src/arm64/intrinsics.h */ /* begin file src/arm64/simd.h */ #ifndef SIMDJSON_ARM64_SIMD_H #define SIMDJSON_ARM64_SIMD_H @@ -2998,6 +857,7 @@ UNTARGET_REGION #ifdef IS_ARM64 +/* arm64/intrinsics.h already included: #include "arm64/intrinsics.h" */ namespace simdjson::arm64::simd { @@ -3342,6 +1202,1053 @@ namespace simdjson::arm64::simd { #endif // IS_ARM64 #endif // SIMDJSON_ARM64_SIMD_H /* end file src/arm64/simd.h */ +/* begin file src/arm64/bitmanipulation.h */ +#ifndef SIMDJSON_ARM64_BITMANIPULATION_H +#define SIMDJSON_ARM64_BITMANIPULATION_H + + +#ifdef IS_ARM64 + +/* arm64/intrinsics.h already included: #include "arm64/intrinsics.h" */ + +namespace simdjson::arm64 { + +#ifndef _MSC_VER +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +__attribute__((no_sanitize("undefined"))) // this is deliberate +#endif +/* result might be undefined when input_num is zero */ +really_inline int trailing_zeroes(uint64_t input_num) { +#ifdef _MSC_VER + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else + return __builtin_ctzll(input_num); +#endif// _MSC_VER +} + +/* result might be undefined when input_num is zero */ +really_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +really_inline int leading_zeroes(uint64_t input_num) { +#ifdef _MSC_VER + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// _MSC_VER +} + +/* result might be undefined when input_num is zero */ +really_inline int hamming(uint64_t input_num) { + return vaddv_u8(vcnt_u8((uint8x8_t)input_num)); +} + +really_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#ifdef _MSC_VER + // todo: this might fail under visual studio for ARM + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + (unsigned long long *)result); +#endif +} + +#ifdef _MSC_VER +#pragma intrinsic(_umul128) // todo: this might fail under visual studio for ARM +#endif + +really_inline bool mul_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#ifdef _MSC_VER + // todo: this might fail under visual studio for ARM + uint64_t high; + *result = _umul128(value1, value2, &high); + return high; +#else + return __builtin_umulll_overflow(value1, value2, + (unsigned long long *)result); +#endif +} + +}// namespace simdjson::arm64 + +#endif //IS_ARM64 +#endif // SIMDJSON_ARM64_BITMANIPULATION_H +/* end file src/arm64/bitmanipulation.h */ + +namespace simdjson::arm64 { + +using namespace simd; + +really_inline void find_whitespace_and_operators( + const simd::simd8x64 in, + uint64_t &whitespace, uint64_t &op) { + + auto v = in.map([&](simd8 chunk) { + auto nib_lo = chunk & 0xf; + auto nib_hi = chunk.shr<4>(); + auto shuf_lo = nib_lo.lookup_16(16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0); + auto shuf_hi = nib_hi.lookup_16(8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0); + return shuf_lo & shuf_hi; + }); + + op = v.map([&](simd8 _v) { return _v.any_bits_set(0x7); }).to_bitmask(); + whitespace = v.map([&](simd8 _v) { return _v.any_bits_set(0x18); }).to_bitmask(); +} + +really_inline bool is_ascii(simd8x64 input) { + simd8 bits = input.reduce([&](auto a,auto b) { return a|b; }); + return bits.max() < 0b10000000u; +} + +really_inline simd8 must_be_continuation(simd8 prev1, simd8 prev2, simd8 prev3) { + simd8 is_second_byte = prev1 >= uint8_t(0b11000000u); + simd8 is_third_byte = prev2 >= uint8_t(0b11100000u); + simd8 is_fourth_byte = prev3 >= uint8_t(0b11110000u); + // Use ^ instead of | for is_*_byte, because ^ is commutative, and the caller is using ^ as well. + // This will work fine because we only have to report errors for cases with 0-1 lead bytes. + // Multiple lead bytes implies 2 overlapping multibyte characters, and if that happens, there is + // guaranteed to be at least *one* lead byte that is part of only 1 other multibyte character. + // The error will be detected there. + return is_second_byte ^ is_third_byte ^ is_fourth_byte; +} + +/* begin file src/generic/utf8_lookup2_algorithm.h */ +// +// Detect Unicode errors. +// +// UTF-8 is designed to allow multiple bytes and be compatible with ASCII. It's a fairly basic +// encoding that uses the first few bits on each byte to denote a "byte type", and all other bits +// are straight up concatenated into the final value. The first byte of a multibyte character is a +// "leading byte" and starts with N 1's, where N is the total number of bytes (110_____ = 2 byte +// lead). The remaining bytes of a multibyte character all start with 10. 1-byte characters just +// start with 0, because that's what ASCII looks like. Here's what each size +// +// - ASCII (7 bits): 0_______ +// - 2 byte character (11 bits): 110_____ 10______ +// - 3 byte character (17 bits): 1110____ 10______ 10______ +// - 4 byte character (23 bits): 11110___ 10______ 10______ 10______ +// - 5+ byte character (illegal): 11111___ +// +// There are 5 classes of error that can happen in Unicode: +// +// - TOO_SHORT: when you have a multibyte character with too few bytes (i.e. missing continuation). +// We detect this by looking for new characters (lead bytes) inside the range of a multibyte +// character. +// +// e.g. 11000000 01100001 (2-byte character where second byte is ASCII) +// +// - TOO_LONG: when there are more bytes in your character than you need (i.e. extra continuation). +// We detect this by requiring that the next byte after your multibyte character be a new +// character--so a continuation after your character is wrong. +// +// e.g. 11011111 10111111 10111111 (2-byte character followed by *another* continuation byte) +// +// - TOO_LARGE: Unicode only goes up to U+10FFFF. These characters are too large. +// +// e.g. 11110111 10111111 10111111 10111111 (bigger than 10FFFF). +// +// - OVERLONG: multibyte characters with a bunch of leading zeroes, where you could have +// used fewer bytes to make the same character. Like encoding an ASCII character in 4 bytes is +// technically possible, but UTF-8 disallows it so that there is only one way to write an "a". +// +// e.g. 11000001 10100001 (2-byte encoding of "a", which only requires 1 byte: 01100001) +// +// - SURROGATE: Unicode U+D800-U+DFFF is a *surrogate* character, reserved for use in UCS-2 and +// WTF-8 encodings for characters with > 2 bytes. These are illegal in pure UTF-8. +// +// e.g. 11101101 10100000 10000000 (U+D800) +// +// - INVALID_5_BYTE: 5-byte, 6-byte, 7-byte and 8-byte characters are unsupported; Unicode does not +// support values with more than 23 bits (which a 4-byte character supports). +// +// e.g. 11111000 10100000 10000000 10000000 10000000 (U+800000) +// +// Legal utf-8 byte sequences per http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf - page 94: +// +// Code Points 1st 2s 3s 4s +// U+0000..U+007F 00..7F +// U+0080..U+07FF C2..DF 80..BF +// U+0800..U+0FFF E0 A0..BF 80..BF +// U+1000..U+CFFF E1..EC 80..BF 80..BF +// U+D000..U+D7FF ED 80..9F 80..BF +// U+E000..U+FFFF EE..EF 80..BF 80..BF +// U+10000..U+3FFFF F0 90..BF 80..BF 80..BF +// U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF +// U+100000..U+10FFFF F4 80..8F 80..BF 80..BF +// +using namespace simd; + +namespace utf8_validation { + + // + // Find special case UTF-8 errors where the character is technically readable (has the right length) + // but the *value* is disallowed. + // + // This includes overlong encodings, surrogates and values too large for Unicode. + // + // It turns out the bad character ranges can all be detected by looking at the first 12 bits of the + // UTF-8 encoded character (i.e. all of byte 1, and the high 4 bits of byte 2). This algorithm does a + // 3 4-bit table lookups, identifying which errors that 4 bits could match, and then &'s them together. + // If all 3 lookups detect the same error, it's an error. + // + really_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { + // + // These are the errors we're going to match for bytes 1-2, by looking at the first three + // nibbles of the character: > & & + // + static const int OVERLONG_2 = 0x01; // 1100000_ 10______ (technically we match 10______ but we could match ________, they both yield errors either way) + static const int OVERLONG_3 = 0x02; // 11100000 100_____ ________ + static const int OVERLONG_4 = 0x04; // 11110000 1000____ ________ ________ + static const int SURROGATE = 0x08; // 11101101 [101_]____ + static const int TOO_LARGE = 0x10; // 11110100 (1001|101_)____ + static const int TOO_LARGE_2 = 0x20; // 1111(1___|011_|0101) 10______ + + // After processing the rest of byte 1 (the low bits), we're still not done--we have to check + // byte 2 to be sure which things are errors and which aren't. + // Since high_bits is byte 5, byte 2 is high_bits.prev<3> + static const int CARRY = OVERLONG_2 | TOO_LARGE_2; + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ASCII: ________ [0___]____ + CARRY, CARRY, CARRY, CARRY, + // ASCII: ________ [0___]____ + CARRY, CARRY, CARRY, CARRY, + // Continuations: ________ [10__]____ + CARRY | OVERLONG_3 | OVERLONG_4, // ________ [1000]____ + CARRY | OVERLONG_3 | TOO_LARGE, // ________ [1001]____ + CARRY | TOO_LARGE | SURROGATE, // ________ [1010]____ + CARRY | TOO_LARGE | SURROGATE, // ________ [1011]____ + // Multibyte Leads: ________ [11__]____ + CARRY, CARRY, CARRY, CARRY + ); + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // [0___]____ (ASCII) + 0, 0, 0, 0, + 0, 0, 0, 0, + // [10__]____ (continuation) + 0, 0, 0, 0, + // [11__]____ (2+-byte leads) + OVERLONG_2, 0, // [110_]____ (2-byte lead) + OVERLONG_3 | SURROGATE, // [1110]____ (3-byte lead) + OVERLONG_4 | TOO_LARGE | TOO_LARGE_2 // [1111]____ (4+-byte lead) + ); + + const simd8 byte_1_low = (prev1 & 0x0F).lookup_16( + // ____[00__] ________ + OVERLONG_2 | OVERLONG_3 | OVERLONG_4, // ____[0000] ________ + OVERLONG_2, // ____[0001] ________ + 0, 0, + // ____[01__] ________ + TOO_LARGE, // ____[0100] ________ + TOO_LARGE_2, + TOO_LARGE_2, + TOO_LARGE_2, + // ____[10__] ________ + TOO_LARGE_2, TOO_LARGE_2, TOO_LARGE_2, TOO_LARGE_2, + // ____[11__] ________ + TOO_LARGE_2, + TOO_LARGE_2 | SURROGATE, // ____[1101] ________ + TOO_LARGE_2, TOO_LARGE_2 + ); + + return byte_1_high & byte_1_low & byte_2_high; + } + + // + // Validate the length of multibyte characters (that each multibyte character has the right number + // of continuation characters, and that all continuation characters are part of a multibyte + // character). + // + // Algorithm + // ========= + // + // This algorithm compares *expected* continuation characters with *actual* continuation bytes, + // and emits an error anytime there is a mismatch. + // + // For example, in the string "𝄞₿֏ab", which has a 4-, 3-, 2- and 1-byte + // characters, the file will look like this: + // + // | Character | 𝄞 | | | | ₿ | | | ֏ | | a | b | + // |-----------------------|----|----|----|----|----|----|----|----|----|----|----| + // | Character Length | 4 | | | | 3 | | | 2 | | 1 | 1 | + // | Byte | F0 | 9D | 84 | 9E | E2 | 82 | BF | D6 | 8F | 61 | 62 | + // | is_second_byte | | X | | | | X | | | X | | | + // | is_third_byte | | | X | | | | X | | | | | + // | is_fourth_byte | | | | X | | | | | | | | + // | expected_continuation | | X | X | X | | X | X | | X | | | + // | is_continuation | | X | X | X | | X | X | | X | | | + // + // The errors here are basically (Second Byte OR Third Byte OR Fourth Byte == Continuation): + // + // - **Extra Continuations:** Any continuation that is not a second, third or fourth byte is not + // part of a valid 2-, 3- or 4-byte character and is thus an error. It could be that it's just + // floating around extra outside of any character, or that there is an illegal 5-byte character, + // or maybe it's at the beginning of the file before any characters have started; but it's an + // error in all these cases. + // - **Missing Continuations:** Any second, third or fourth byte that *isn't* a continuation is an error, because that means + // we started a new character before we were finished with the current one. + // + // Getting the Previous Bytes + // -------------------------- + // + // Because we want to know if a byte is the *second* (or third, or fourth) byte of a multibyte + // character, we need to "shift the bytes" to find that out. This is what they mean: + // + // - `is_continuation`: if the current byte is a continuation. + // - `is_second_byte`: if 1 byte back is the start of a 2-, 3- or 4-byte character. + // - `is_third_byte`: if 2 bytes back is the start of a 3- or 4-byte character. + // - `is_fourth_byte`: if 3 bytes back is the start of a 4-byte character. + // + // We use shuffles to go n bytes back, selecting part of the current `input` and part of the + // `prev_input` (search for `.prev<1>`, `.prev<2>`, etc.). These are passed in by the caller + // function, because the 1-byte-back data is used by other checks as well. + // + // Getting the Continuation Mask + // ----------------------------- + // + // Once we have the right bytes, we have to get the masks. To do this, we treat UTF-8 bytes as + // numbers, using signed `<` and `>` operations to check if they are continuations or leads. + // In fact, we treat the numbers as *signed*, partly because it helps us, and partly because + // Intel's SIMD presently only offers signed `<` and `>` operations (not unsigned ones). + // + // In UTF-8, bytes that start with the bits 110, 1110 and 11110 are 2-, 3- and 4-byte "leads," + // respectively, meaning they expect to have 1, 2 and 3 "continuation bytes" after them. + // Continuation bytes start with 10, and ASCII (1-byte characters) starts with 0. + // + // When treated as signed numbers, they look like this: + // + // | Type | High Bits | Binary Range | Signed | + // |--------------|------------|--------------|--------| + // | ASCII | `0` | `01111111` | 127 | + // | | | `00000000` | 0 | + // | 4+-Byte Lead | `1111` | `11111111` | -1 | + // | | | `11110000 | -16 | + // | 3-Byte Lead | `1110` | `11101111` | -17 | + // | | | `11100000 | -32 | + // | 2-Byte Lead | `110` | `11011111` | -33 | + // | | | `11000000 | -64 | + // | Continuation | `10` | `10111111` | -65 | + // | | | `10000000 | -128 | + // + // This makes it pretty easy to get the continuation mask! It's just a single comparison: + // + // ``` + // is_continuation = input < -64` + // ``` + // + // We can do something similar for the others, but it takes two comparisons instead of one: "is + // the start of a 4-byte character" is `< -32` and `> -65`, for example. And 2+ bytes is `< 0` and + // `> -64`. Surely we can do better, they're right next to each other! + // + // Getting the is_xxx Masks: Shifting the Range + // -------------------------------------------- + // + // Notice *why* continuations were a single comparison. The actual *range* would require two + // comparisons--`< -64` and `> -129`--but all characters are always greater than -128, so we get + // that for free. In fact, if we had *unsigned* comparisons, 2+, 3+ and 4+ comparisons would be + // just as easy: 4+ would be `> 239`, 3+ would be `> 223`, and 2+ would be `> 191`. + // + // Instead, we add 128 to each byte, shifting the range up to make comparison easy. This wraps + // ASCII down into the negative, and puts 4+-Byte Lead at the top: + // + // | Type | High Bits | Binary Range | Signed | + // |----------------------|------------|--------------|-------| + // | 4+-Byte Lead (+ 127) | `0111` | `01111111` | 127 | + // | | | `01110000 | 112 | + // |----------------------|------------|--------------|-------| + // | 3-Byte Lead (+ 127) | `0110` | `01101111` | 111 | + // | | | `01100000 | 96 | + // |----------------------|------------|--------------|-------| + // | 2-Byte Lead (+ 127) | `010` | `01011111` | 95 | + // | | | `01000000 | 64 | + // |----------------------|------------|--------------|-------| + // | Continuation (+ 127) | `00` | `00111111` | 63 | + // | | | `00000000 | 0 | + // |----------------------|------------|--------------|-------| + // | ASCII (+ 127) | `1` | `11111111` | -1 | + // | | | `10000000` | -128 | + // |----------------------|------------|--------------|-------| + // + // *Now* we can use signed `>` on all of them: + // + // ``` + // prev1 = input.prev<1> + // prev2 = input.prev<2> + // prev3 = input.prev<3> + // prev1_flipped = input.prev<1>(prev_input) ^ 0x80; // Same as `+ 128` + // prev2_flipped = input.prev<2>(prev_input) ^ 0x80; // Same as `+ 128` + // prev3_flipped = input.prev<3>(prev_input) ^ 0x80; // Same as `+ 128` + // is_second_byte = prev1_flipped > 63; // 2+-byte lead + // is_third_byte = prev2_flipped > 95; // 3+-byte lead + // is_fourth_byte = prev3_flipped > 111; // 4+-byte lead + // ``` + // + // NOTE: we use `^ 0x80` instead of `+ 128` in the code, which accomplishes the same thing, and even takes the same number + // of cycles as `+`, but on many Intel architectures can be parallelized better (you can do 3 + // `^`'s at a time on Haswell, but only 2 `+`'s). + // + // That doesn't look like it saved us any instructions, did it? Well, because we're adding the + // same number to all of them, we can save one of those `+ 128` operations by assembling + // `prev2_flipped` out of prev 1 and prev 3 instead of assembling it from input and adding 128 + // to it. One more instruction saved! + // + // ``` + // prev1 = input.prev<1> + // prev3 = input.prev<3> + // prev1_flipped = prev1 ^ 0x80; // Same as `+ 128` + // prev3_flipped = prev3 ^ 0x80; // Same as `+ 128` + // prev2_flipped = prev1_flipped.concat<2>(prev3_flipped): // | C -> ^ D, or + // you do B | C -> | A -> ^ D. But addition and subtraction *are* associative: (A + B + C) - D can + // be written as `(A + B) + (C - D)`. This means you can do A + B and C - D at the same time, and + // then adds the result together. Same number of operations, but if the processor can run + // independent things in parallel (which most can), it runs faster. + // + // This doesn't help us on Intel, but might help us elsewhere: on Haswell, at least, | and ^ have + // a super nice advantage in that more of them can be run at the same time (they can run on 3 + // ports, while + and - can run on 2)! This means that we can do A | B while we're still doing C, + // saving us the cycle we would have earned by using +. Even more, using an instruction with a + // wider array of ports can help *other* code run ahead, too, since these instructions can "get + // out of the way," running on a port other instructions can't. + // + // Epilogue II: One More Trick + // --------------------------- + // + // There's one more relevant trick up our sleeve, it turns out: it turns out on Intel we can "pay + // for" the (prev<1> + 128) instruction, because it can be used to save an instruction in + // check_special_cases()--but we'll talk about that there :) + // + really_inline simd8 check_multibyte_lengths(simd8 input, simd8 prev_input, simd8 prev1) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + + // Cont is 10000000-101111111 (-65...-128) + simd8 is_continuation = simd8(input) < int8_t(-64); + // must_be_continuation is architecture-specific because Intel doesn't have unsigned comparisons + return simd8(must_be_continuation(prev1, prev2, prev3) ^ is_continuation); + } + + // + // Return nonzero if there are incomplete multibyte characters at the end of the block: + // e.g. if there is a 4-byte character, but it's 3 bytes from the end. + // + really_inline simd8 is_incomplete(simd8 input) { + // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): + // ... 1111____ 111_____ 11______ + static const uint8_t max_array[32] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0b11110000u-1, 0b11100000u-1, 0b11000000u-1 + }; + const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]); + return input.gt_bits(max_value); + } + + struct utf8_checker { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + // The last input we received + simd8 prev_input_block; + // Whether the last input we received was incomplete (used for ASCII fast path) + simd8 prev_incomplete; + + // + // Check whether the current bytes are valid UTF-8. + // + really_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes + // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + this->error |= check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, prev1); + } + + // The only problem that can happen at EOF is that a multibyte character is too short. + really_inline void check_eof() { + // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't + // possibly finish them. + this->error |= this->prev_incomplete; + } + + really_inline void check_next_input(simd8x64 input) { + if (likely(is_ascii(input))) { + // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't + // possibly finish them. + this->error |= this->prev_incomplete; + } else { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + for (int i=1; i::NUM_CHUNKS; i++) { + this->check_utf8_bytes(input.chunks[i], input.chunks[i-1]); + } + this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]); + this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1]; + } + } + + really_inline ErrorValues errors() { + return this->error.any_bits_set_anywhere() ? simdjson::UTF8_ERROR : simdjson::SUCCESS; + } + + }; // struct utf8_checker +} + +using utf8_validation::utf8_checker; +/* end file src/generic/utf8_lookup2_algorithm.h */ +/* begin file src/generic/stage1_find_marks.h */ +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1_find_marks.h" (this simplifies amalgation) + +namespace stage1 { + +class bit_indexer { +public: + uint32_t *tail; + + bit_indexer(uint32_t *index_buf) : tail(index_buf) {} + + // flatten out values in 'bits' assuming that they are are to have values of idx + // plus their position in the bitvector, and store these indexes at + // base_ptr[base] incrementing base as we go + // will potentially store extra values beyond end of valid bits, so base_ptr + // needs to be large enough to handle this + really_inline void write_indexes(uint32_t idx, uint64_t bits) { + // In some instances, the next branch is expensive because it is mispredicted. + // Unfortunately, in other cases, + // it helps tremendously. + if (bits == 0) + return; + uint32_t cnt = hamming(bits); + + // Do the first 8 all together + for (int i=0; i<8; i++) { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + } + + // Do the next 8 all together (we hope in most cases it won't happen at all + // and the branch is easily predicted). + if (unlikely(cnt > 8)) { + for (int i=8; i<16; i++) { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + } + + // Most files don't have 16+ structurals per block, so we take several basically guaranteed + // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) + // or the start of a value ("abc" true 123) every four characters. + if (unlikely(cnt > 16)) { + uint32_t i = 16; + do { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + i++; + } while (i < cnt); + } + } + + this->tail += cnt; + } +}; + +class json_structural_scanner { +public: + // Whether the first character of the next iteration is escaped. + uint64_t prev_escaped = 0ULL; + // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). + uint64_t prev_in_string = 0ULL; + // Whether the last character of the previous iteration is a primitive value character + // (anything except whitespace, braces, comma or colon). + uint64_t prev_primitive = 0ULL; + // Mask of structural characters from the last iteration. + // Kept around for performance reasons, so we can call flatten_bits to soak up some unused + // CPU capacity while the next iteration is busy with an expensive clmul in compute_quote_mask. + uint64_t prev_structurals = 0; + // Errors with unescaped characters in strings (ASCII codepoints < 0x20) + uint64_t unescaped_chars_error = 0; + bit_indexer structural_indexes; + + json_structural_scanner(uint32_t *_structural_indexes) : structural_indexes{_structural_indexes} {} + + // + // Finish the scan and return any errors. + // + // This may detect errors as well, such as unclosed string and certain UTF-8 errors. + // if streaming is set to true, an unclosed string is allowed. + // + really_inline ErrorValues detect_errors_on_eof(bool streaming = false); + + // + // Return a mask of all string characters plus end quotes. + // + // prev_escaped is overflow saying whether the next character is escaped. + // prev_in_string is overflow saying whether we're still in a string. + // + // Backslash sequences outside of quotes will be detected in stage 2. + // + really_inline uint64_t find_strings(const simd::simd8x64 in); + + // + // Determine which characters are *structural*: + // - braces: [] and {} + // - the start of primitives (123, true, false, null) + // - the start of invalid non-whitespace (+, &, ture, UTF-8) + // + // Also detects value sequence errors: + // - two values with no separator between ("hello" "world") + // - separators with no values ([1,] [1,,]and [,2]) + // + // This method will find all of the above whether it is in a string or not. + // + // To reduce dependency on the expensive "what is in a string" computation, this method treats the + // contents of a string the same as content outside. Errors and structurals inside the string or on + // the trailing quote will need to be removed later when the correct string information is known. + // + really_inline uint64_t find_potential_structurals(const simd::simd8x64 in); + + // + // Find the important bits of JSON in a STEP_SIZE-byte chunk, and add them to structural_indexes. + // + template + really_inline void scan_step(const uint8_t *buf, const size_t idx, utf8_checker &utf8_checker); + + // + // Parse the entire input in STEP_SIZE-byte chunks. + // + template + really_inline void scan(const uint8_t *buf, const size_t len, utf8_checker &utf8_checker); +}; + +// Routines to print masks and text for debugging bitmask operations +UNUSED static char * format_input_text(const simd8x64 in) { + static char *buf = (char*)malloc(sizeof(simd8x64) + 1); + in.store((uint8_t*)buf); + for (size_t i=0; i); i++) { + if (buf[i] < ' ') { buf[i] = '_'; } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +UNUSED static char * format_mask(uint64_t mask) { + static char *buf = (char*)malloc(64 + 1); + for (size_t i=0; i<64; i++) { + buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; + } + buf[64] = '\0'; + return buf; +} + +// +// Finds escaped characters (characters following \). +// +// Handles runs of backslashes like \\\" and \\\\" correctly (yielding 0101 and 01010, respectively). +// +// Does this by: +// - Shift the escape mask to get potentially escaped characters (characters after backslashes). +// - Mask escaped sequences that start on *even* bits with 1010101010 (odd bits are escaped, even bits are not) +// - Mask escaped sequences that start on *odd* bits with 0101010101 (even bits are escaped, odd bits are not) +// +// To distinguish between escaped sequences starting on even/odd bits, it finds the start of all +// escape sequences, filters out the ones that start on even bits, and adds that to the mask of +// escape sequences. This causes the addition to clear out the sequences starting on odd bits (since +// the start bit causes a carry), and leaves even-bit sequences alone. +// +// Example: +// +// text | \\\ | \\\"\\\" \\\" \\"\\" | +// escape | xxx | xx xxx xxx xx xx | Removed overflow backslash; will | it into follows_escape +// odd_starts | x | x x x | escape & ~even_bits & ~follows_escape +// even_seq | c| cxxx c xx c | c = carry bit -- will be masked out later +// invert_mask | | cxxx c xx c| even_seq << 1 +// follows_escape | xx | x xx xxx xxx xx xx | Includes overflow bit +// escaped | x | x x x x x x x x | +// desired | x | x x x x x x x x | +// text | \\\ | \\\"\\\" \\\" \\"\\" | +// +really_inline uint64_t find_escaped(uint64_t escape, uint64_t &escaped_overflow) { + // If there was overflow, pretend the first character isn't a backslash + escape &= ~escaped_overflow; + uint64_t follows_escape = escape << 1 | escaped_overflow; + + // Get sequences starting on even bits by clearing out the odd series using + + const uint64_t even_bits = 0x5555555555555555ULL; + uint64_t odd_sequence_starts = escape & ~even_bits & ~follows_escape; + uint64_t sequences_starting_on_even_bits; + escaped_overflow = add_overflow(odd_sequence_starts, escape, &sequences_starting_on_even_bits); + uint64_t invert_mask = sequences_starting_on_even_bits << 1; // The mask we want to return is the *escaped* bits, not escapes. + + // Mask every other backslashed character as an escaped character + // Flip the mask for sequences that start on even bits, to correct them + return (even_bits ^ invert_mask) & follows_escape; +} + +// +// Check if the current character immediately follows a matching character. +// +// For example, this checks for quotes with backslashes in front of them: +// +// const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); +// +really_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { + const uint64_t result = match << 1 | overflow; + overflow = match >> 63; + return result; +} + +// +// Check if the current character follows a matching character, with possible "filler" between. +// For example, this checks for empty curly braces, e.g. +// +// in.eq('}') & follows(in.eq('['), in.eq(' '), prev_empty_array) // { * } +// +really_inline uint64_t follows(const uint64_t match, const uint64_t filler, uint64_t &overflow) { + uint64_t follows_match = follows(match, overflow); + uint64_t result; + overflow |= add_overflow(follows_match, filler, &result); + return result; +} + +really_inline ErrorValues json_structural_scanner::detect_errors_on_eof(bool streaming) { + if ((prev_in_string) and (not streaming)) { + return UNCLOSED_STRING; + } + if (unescaped_chars_error) { + return UNESCAPED_CHARS; + } + return SUCCESS; +} + +// +// Return a mask of all string characters plus end quotes. +// +// prev_escaped is overflow saying whether the next character is escaped. +// prev_in_string is overflow saying whether we're still in a string. +// +// Backslash sequences outside of quotes will be detected in stage 2. +// +really_inline uint64_t json_structural_scanner::find_strings(const simd::simd8x64 in) { + const uint64_t backslash = in.eq('\\'); + const uint64_t escaped = find_escaped(backslash, prev_escaped); + const uint64_t quote = in.eq('"') & ~escaped; + // prefix_xor flips on bits inside the string (and flips off the end quote). + const uint64_t in_string = prefix_xor(quote) ^ prev_in_string; + /* right shift of a signed value expected to be well-defined and standard + * compliant as of C++20, + * John Regher from Utah U. says this is fine code */ + prev_in_string = static_cast(static_cast(in_string) >> 63); + // Use ^ to turn the beginning quote off, and the end quote on. + return in_string ^ quote; +} + +// +// Determine which characters are *structural*: +// - braces: [] and {} +// - the start of primitives (123, true, false, null) +// - the start of invalid non-whitespace (+, &, ture, UTF-8) +// +// Also detects value sequence errors: +// - two values with no separator between ("hello" "world") +// - separators with no values ([1,] [1,,]and [,2]) +// +// This method will find all of the above whether it is in a string or not. +// +// To reduce dependency on the expensive "what is in a string" computation, this method treats the +// contents of a string the same as content outside. Errors and structurals inside the string or on +// the trailing quote will need to be removed later when the correct string information is known. +// +really_inline uint64_t json_structural_scanner::find_potential_structurals(const simd::simd8x64 in) { + // These use SIMD so let's kick them off before running the regular 64-bit stuff ... + uint64_t whitespace, op; + find_whitespace_and_operators(in, whitespace, op); + + // Detect the start of a run of primitive characters. Includes numbers, booleans, and strings ("). + // Everything except whitespace, braces, colon and comma. + const uint64_t primitive = ~(op | whitespace); + const uint64_t follows_primitive = follows(primitive, prev_primitive); + const uint64_t start_primitive = primitive & ~follows_primitive; + + // Return final structurals + return op | start_primitive; +} + +// +// Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes. +// +// PERF NOTES: +// We pipe 2 inputs through these stages: +// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load +// 2 inputs' worth at once so that by the time step 2 is looking for them input, it's available. +// 2. Scan the JSON for critical data: strings, primitives and operators. This is the critical path. +// The output of step 1 depends entirely on this information. These functions don't quite use +// up enough CPU: the second half of the functions is highly serial, only using 1 execution core +// at a time. The second input's scans has some dependency on the first ones finishing it, but +// they can make a lot of progress before they need that information. +// 3. Step 1 doesn't use enough capacity, so we run some extra stuff while we're waiting for that +// to finish: utf-8 checks and generating the output from the last iteration. +// +// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all +// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough +// workout. +// +template<> +really_inline void json_structural_scanner::scan_step<128>(const uint8_t *buf, const size_t idx, utf8_checker &utf8_checker) { + // + // Load up all 128 bytes into SIMD registers + // + simd::simd8x64 in_1(buf); + simd::simd8x64 in_2(buf+64); + + // + // Find the strings and potential structurals (operators / primitives). + // + // This will include false structurals that are *inside* strings--we'll filter strings out + // before we return. + // + uint64_t string_1 = this->find_strings(in_1); + uint64_t structurals_1 = this->find_potential_structurals(in_1); + uint64_t string_2 = this->find_strings(in_2); + uint64_t structurals_2 = this->find_potential_structurals(in_2); + + // + // Do miscellaneous work while the processor is busy calculating strings and structurals. + // + // After that, weed out structurals that are inside strings and find invalid string characters. + // + uint64_t unescaped_1 = in_1.lteq(0x1F); + utf8_checker.check_next_input(in_1); + this->structural_indexes.write_indexes(idx-64, this->prev_structurals); // Output *last* iteration's structurals to ParsedJson + this->prev_structurals = structurals_1 & ~string_1; + this->unescaped_chars_error |= unescaped_1 & string_1; + + uint64_t unescaped_2 = in_2.lteq(0x1F); + utf8_checker.check_next_input(in_2); + this->structural_indexes.write_indexes(idx, this->prev_structurals); // Output *last* iteration's structurals to ParsedJson + this->prev_structurals = structurals_2 & ~string_2; + this->unescaped_chars_error |= unescaped_2 & string_2; +} + +// +// Find the important bits of JSON in a 64-byte chunk, and add them to structural_indexes. +// +template<> +really_inline void json_structural_scanner::scan_step<64>(const uint8_t *buf, const size_t idx, utf8_checker &utf8_checker) { + // + // Load up bytes into SIMD registers + // + simd::simd8x64 in_1(buf); + + // + // Find the strings and potential structurals (operators / primitives). + // + // This will include false structurals that are *inside* strings--we'll filter strings out + // before we return. + // + uint64_t string_1 = this->find_strings(in_1); + uint64_t structurals_1 = this->find_potential_structurals(in_1); + + // + // Do miscellaneous work while the processor is busy calculating strings and structurals. + // + // After that, weed out structurals that are inside strings and find invalid string characters. + // + uint64_t unescaped_1 = in_1.lteq(0x1F); + utf8_checker.check_next_input(in_1); + this->structural_indexes.write_indexes(idx-64, this->prev_structurals); // Output *last* iteration's structurals to ParsedJson + this->prev_structurals = structurals_1 & ~string_1; + this->unescaped_chars_error |= unescaped_1 & string_1; +} + +template +really_inline void json_structural_scanner::scan(const uint8_t *buf, const size_t len, utf8_checker &utf8_checker) { + size_t lenminusstep = len < STEP_SIZE ? 0 : len - STEP_SIZE; + size_t idx = 0; + + for (; idx < lenminusstep; idx += STEP_SIZE) { + this->scan_step(&buf[idx], idx, utf8_checker); + } + + /* If we have a final chunk of less than STEP_SIZE bytes, pad it to STEP_SIZE with + * spaces before processing it (otherwise, we risk invalidating the UTF-8 + * checks). */ + if (likely(idx < len)) { + uint8_t tmp_buf[STEP_SIZE]; + memset(tmp_buf, 0x20, STEP_SIZE); + memcpy(tmp_buf, buf + idx, len - idx); + this->scan_step(&tmp_buf[0], idx, utf8_checker); + idx += STEP_SIZE; + } + + /* finally, flatten out the remaining structurals from the last iteration */ + this->structural_indexes.write_indexes(idx-64, this->prev_structurals); +} + +// Setting the streaming parameter to true allows the find_structural_bits to tolerate unclosed strings. +// The caller should still ensure that the input is valid UTF-8. If you are processing substrings, +// you may want to call on a function like trimmed_length_safe_utf8. +template +int find_structural_bits(const uint8_t *buf, size_t len, simdjson::ParsedJson &pj, bool streaming) { + if (unlikely(len > pj.byte_capacity)) { + return simdjson::CAPACITY; + } + utf8_checker utf8_checker{}; + json_structural_scanner scanner{pj.structural_indexes.get()}; + scanner.scan(buf, len, utf8_checker); + // we might tolerate an unclosed string if streaming is true + simdjson::ErrorValues error = scanner.detect_errors_on_eof(streaming); + if (unlikely(error != simdjson::SUCCESS)) { + return error; + } + pj.n_structural_indexes = scanner.structural_indexes.tail - pj.structural_indexes.get(); + /* a valid JSON file cannot have zero structural indexes - we should have + * found something */ + if (unlikely(pj.n_structural_indexes == 0u)) { + return simdjson::EMPTY; + } + if (unlikely(pj.structural_indexes[pj.n_structural_indexes - 1] > len)) { + return simdjson::UNEXPECTED_ERROR; + } + if (len != pj.structural_indexes[pj.n_structural_indexes - 1]) { + /* the string might not be NULL terminated, but we add a virtual NULL + * ending character. */ + pj.structural_indexes[pj.n_structural_indexes++] = len; + } + /* make it safe to dereference one beyond this array */ + pj.structural_indexes[pj.n_structural_indexes] = 0; + return utf8_checker.errors(); +} + +} // namespace stage1 +/* end file src/generic/stage1_find_marks.h */ + +} // namespace simdjson::arm64 + +namespace simdjson { + +template <> +int find_structural_bits(const uint8_t *buf, size_t len, simdjson::ParsedJson &pj, bool streaming) { + return arm64::stage1::find_structural_bits<64>(buf, len, pj, streaming); +} + +} // namespace simdjson + +#endif // IS_ARM64 +#endif // SIMDJSON_ARM64_STAGE1_FIND_MARKS_H +/* end file src/generic/stage1_find_marks.h */ +/* begin file src/haswell/stage1_find_marks.h */ +#ifndef SIMDJSON_HASWELL_STAGE1_FIND_MARKS_H +#define SIMDJSON_HASWELL_STAGE1_FIND_MARKS_H + + +#ifdef IS_X86_64 + +/* begin file src/haswell/bitmask.h */ +#ifndef SIMDJSON_HASWELL_BITMASK_H +#define SIMDJSON_HASWELL_BITMASK_H + + +#ifdef IS_X86_64 + +/* begin file src/haswell/intrinsics.h */ +#ifndef SIMDJSON_HASWELL_INTRINSICS_H +#define SIMDJSON_HASWELL_INTRINSICS_H + +#ifdef IS_X86_64 + +#ifdef _MSC_VER +#include // visual studio +#else +#include // elsewhere +#endif // _MSC_VER +#endif // IS_X86_64 +#endif // SIMDJSON_HASWELL_INTRINSICS_H +/* end file src/haswell/intrinsics.h */ + +TARGET_HASWELL +namespace simdjson::haswell { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +really_inline uint64_t prefix_xor(const uint64_t bitmask) { + // There should be no such thing with a processor supporting avx2 + // but not clmul. + __m128i all_ones = _mm_set1_epi8('\xFF'); + __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); + return _mm_cvtsi128_si64(result); +} + +} // namespace simdjson::haswell +UNTARGET_REGION + +#endif // IS_X86_64 +#endif +/* end file src/haswell/intrinsics.h */ /* begin file src/haswell/simd.h */ #ifndef SIMDJSON_HASWELL_SIMD_H #define SIMDJSON_HASWELL_SIMD_H @@ -3349,6 +2256,7 @@ namespace simdjson::arm64::simd { #ifdef IS_X86_64 +/* haswell/intrinsics.h already included: #include "haswell/intrinsics.h" */ TARGET_HASWELL namespace simdjson::haswell::simd { @@ -3654,6 +2562,1043 @@ UNTARGET_REGION #endif // IS_X86_64 #endif // SIMDJSON_HASWELL_SIMD_H /* end file src/haswell/simd.h */ +/* begin file src/haswell/bitmanipulation.h */ +#ifndef SIMDJSON_HASWELL_BITMANIPULATION_H +#define SIMDJSON_HASWELL_BITMANIPULATION_H + + +#ifdef IS_X86_64 +/* haswell/intrinsics.h already included: #include "haswell/intrinsics.h" */ + +TARGET_HASWELL +namespace simdjson::haswell { + +#ifndef _MSC_VER +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +__attribute__((no_sanitize("undefined"))) // this is deliberate +#endif +really_inline int trailing_zeroes(uint64_t input_num) { +#ifdef _MSC_VER + return (int)_tzcnt_u64(input_num); +#else + //////// + // You might expect the next line to be equivalent to + // return (int)_tzcnt_u64(input_num); + // but the generated code differs and might be less efficient? + //////// + return __builtin_ctzll(input_num); +#endif// _MSC_VER +} + +/* result might be undefined when input_num is zero */ +really_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return _blsr_u64(input_num); +} + +/* result might be undefined when input_num is zero */ +really_inline int leading_zeroes(uint64_t input_num) { + return static_cast(_lzcnt_u64(input_num)); +} + +really_inline int hamming(uint64_t input_num) { +#ifdef _MSC_VER + // note: we do not support legacy 32-bit Windows + return __popcnt64(input_num);// Visual Studio wants two underscores +#else + return _popcnt64(input_num); +#endif +} + +really_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#ifdef _MSC_VER + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + (unsigned long long *)result); +#endif +} + +#ifdef _MSC_VER +#pragma intrinsic(_umul128) +#endif +really_inline bool mul_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#ifdef _MSC_VER + uint64_t high; + *result = _umul128(value1, value2, &high); + return high; +#else + return __builtin_umulll_overflow(value1, value2, + (unsigned long long *)result); +#endif +} +}// namespace simdjson::haswell +UNTARGET_REGION +#endif +#endif // SIMDJSON_HASWELL_BITMANIPULATION_H +/* end file src/haswell/bitmanipulation.h */ + +TARGET_HASWELL +namespace simdjson::haswell { + +using namespace simd; + +really_inline void find_whitespace_and_operators(simd8x64 in, uint64_t &whitespace, uint64_t &op) { + + // These lookups rely on the fact that anything < 127 will match the lower 4 bits, which is why + // we can't use the generic lookup_16. + auto whitespace_table = simd8::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100); + auto op_table = simd8::repeat_16(',', '}', 0, 0, 0xc0u, 0, 0, 0, 0, 0, 0, 0, 0, 0, ':', '{'); + + whitespace = in.map([&](simd8 _in) { + return _in == simd8(_mm256_shuffle_epi8(whitespace_table, _in)); + }).to_bitmask(); + + op = in.map([&](simd8 _in) { + // | 32 handles the fact that { } and [ ] are exactly 32 bytes apart + return (_in | 32) == simd8(_mm256_shuffle_epi8(op_table, _in-',')); + }).to_bitmask(); +} + +really_inline bool is_ascii(simd8x64 input) { + simd8 bits = input.reduce([&](auto a,auto b) { return a|b; }); + return !bits.any_bits_set_anywhere(0b10000000u); +} + +really_inline simd8 must_be_continuation(simd8 prev1, simd8 prev2, simd8 prev3) { + simd8 is_second_byte = prev1.saturating_sub(0b11000000u-1); // Only 11______ will be > 0 + simd8 is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0 + // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. + return simd8(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0); +} + +/* begin file src/generic/utf8_lookup2_algorithm.h */ +// +// Detect Unicode errors. +// +// UTF-8 is designed to allow multiple bytes and be compatible with ASCII. It's a fairly basic +// encoding that uses the first few bits on each byte to denote a "byte type", and all other bits +// are straight up concatenated into the final value. The first byte of a multibyte character is a +// "leading byte" and starts with N 1's, where N is the total number of bytes (110_____ = 2 byte +// lead). The remaining bytes of a multibyte character all start with 10. 1-byte characters just +// start with 0, because that's what ASCII looks like. Here's what each size +// +// - ASCII (7 bits): 0_______ +// - 2 byte character (11 bits): 110_____ 10______ +// - 3 byte character (17 bits): 1110____ 10______ 10______ +// - 4 byte character (23 bits): 11110___ 10______ 10______ 10______ +// - 5+ byte character (illegal): 11111___ +// +// There are 5 classes of error that can happen in Unicode: +// +// - TOO_SHORT: when you have a multibyte character with too few bytes (i.e. missing continuation). +// We detect this by looking for new characters (lead bytes) inside the range of a multibyte +// character. +// +// e.g. 11000000 01100001 (2-byte character where second byte is ASCII) +// +// - TOO_LONG: when there are more bytes in your character than you need (i.e. extra continuation). +// We detect this by requiring that the next byte after your multibyte character be a new +// character--so a continuation after your character is wrong. +// +// e.g. 11011111 10111111 10111111 (2-byte character followed by *another* continuation byte) +// +// - TOO_LARGE: Unicode only goes up to U+10FFFF. These characters are too large. +// +// e.g. 11110111 10111111 10111111 10111111 (bigger than 10FFFF). +// +// - OVERLONG: multibyte characters with a bunch of leading zeroes, where you could have +// used fewer bytes to make the same character. Like encoding an ASCII character in 4 bytes is +// technically possible, but UTF-8 disallows it so that there is only one way to write an "a". +// +// e.g. 11000001 10100001 (2-byte encoding of "a", which only requires 1 byte: 01100001) +// +// - SURROGATE: Unicode U+D800-U+DFFF is a *surrogate* character, reserved for use in UCS-2 and +// WTF-8 encodings for characters with > 2 bytes. These are illegal in pure UTF-8. +// +// e.g. 11101101 10100000 10000000 (U+D800) +// +// - INVALID_5_BYTE: 5-byte, 6-byte, 7-byte and 8-byte characters are unsupported; Unicode does not +// support values with more than 23 bits (which a 4-byte character supports). +// +// e.g. 11111000 10100000 10000000 10000000 10000000 (U+800000) +// +// Legal utf-8 byte sequences per http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf - page 94: +// +// Code Points 1st 2s 3s 4s +// U+0000..U+007F 00..7F +// U+0080..U+07FF C2..DF 80..BF +// U+0800..U+0FFF E0 A0..BF 80..BF +// U+1000..U+CFFF E1..EC 80..BF 80..BF +// U+D000..U+D7FF ED 80..9F 80..BF +// U+E000..U+FFFF EE..EF 80..BF 80..BF +// U+10000..U+3FFFF F0 90..BF 80..BF 80..BF +// U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF +// U+100000..U+10FFFF F4 80..8F 80..BF 80..BF +// +using namespace simd; + +namespace utf8_validation { + + // + // Find special case UTF-8 errors where the character is technically readable (has the right length) + // but the *value* is disallowed. + // + // This includes overlong encodings, surrogates and values too large for Unicode. + // + // It turns out the bad character ranges can all be detected by looking at the first 12 bits of the + // UTF-8 encoded character (i.e. all of byte 1, and the high 4 bits of byte 2). This algorithm does a + // 3 4-bit table lookups, identifying which errors that 4 bits could match, and then &'s them together. + // If all 3 lookups detect the same error, it's an error. + // + really_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { + // + // These are the errors we're going to match for bytes 1-2, by looking at the first three + // nibbles of the character: > & & + // + static const int OVERLONG_2 = 0x01; // 1100000_ 10______ (technically we match 10______ but we could match ________, they both yield errors either way) + static const int OVERLONG_3 = 0x02; // 11100000 100_____ ________ + static const int OVERLONG_4 = 0x04; // 11110000 1000____ ________ ________ + static const int SURROGATE = 0x08; // 11101101 [101_]____ + static const int TOO_LARGE = 0x10; // 11110100 (1001|101_)____ + static const int TOO_LARGE_2 = 0x20; // 1111(1___|011_|0101) 10______ + + // After processing the rest of byte 1 (the low bits), we're still not done--we have to check + // byte 2 to be sure which things are errors and which aren't. + // Since high_bits is byte 5, byte 2 is high_bits.prev<3> + static const int CARRY = OVERLONG_2 | TOO_LARGE_2; + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ASCII: ________ [0___]____ + CARRY, CARRY, CARRY, CARRY, + // ASCII: ________ [0___]____ + CARRY, CARRY, CARRY, CARRY, + // Continuations: ________ [10__]____ + CARRY | OVERLONG_3 | OVERLONG_4, // ________ [1000]____ + CARRY | OVERLONG_3 | TOO_LARGE, // ________ [1001]____ + CARRY | TOO_LARGE | SURROGATE, // ________ [1010]____ + CARRY | TOO_LARGE | SURROGATE, // ________ [1011]____ + // Multibyte Leads: ________ [11__]____ + CARRY, CARRY, CARRY, CARRY + ); + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // [0___]____ (ASCII) + 0, 0, 0, 0, + 0, 0, 0, 0, + // [10__]____ (continuation) + 0, 0, 0, 0, + // [11__]____ (2+-byte leads) + OVERLONG_2, 0, // [110_]____ (2-byte lead) + OVERLONG_3 | SURROGATE, // [1110]____ (3-byte lead) + OVERLONG_4 | TOO_LARGE | TOO_LARGE_2 // [1111]____ (4+-byte lead) + ); + + const simd8 byte_1_low = (prev1 & 0x0F).lookup_16( + // ____[00__] ________ + OVERLONG_2 | OVERLONG_3 | OVERLONG_4, // ____[0000] ________ + OVERLONG_2, // ____[0001] ________ + 0, 0, + // ____[01__] ________ + TOO_LARGE, // ____[0100] ________ + TOO_LARGE_2, + TOO_LARGE_2, + TOO_LARGE_2, + // ____[10__] ________ + TOO_LARGE_2, TOO_LARGE_2, TOO_LARGE_2, TOO_LARGE_2, + // ____[11__] ________ + TOO_LARGE_2, + TOO_LARGE_2 | SURROGATE, // ____[1101] ________ + TOO_LARGE_2, TOO_LARGE_2 + ); + + return byte_1_high & byte_1_low & byte_2_high; + } + + // + // Validate the length of multibyte characters (that each multibyte character has the right number + // of continuation characters, and that all continuation characters are part of a multibyte + // character). + // + // Algorithm + // ========= + // + // This algorithm compares *expected* continuation characters with *actual* continuation bytes, + // and emits an error anytime there is a mismatch. + // + // For example, in the string "𝄞₿֏ab", which has a 4-, 3-, 2- and 1-byte + // characters, the file will look like this: + // + // | Character | 𝄞 | | | | ₿ | | | ֏ | | a | b | + // |-----------------------|----|----|----|----|----|----|----|----|----|----|----| + // | Character Length | 4 | | | | 3 | | | 2 | | 1 | 1 | + // | Byte | F0 | 9D | 84 | 9E | E2 | 82 | BF | D6 | 8F | 61 | 62 | + // | is_second_byte | | X | | | | X | | | X | | | + // | is_third_byte | | | X | | | | X | | | | | + // | is_fourth_byte | | | | X | | | | | | | | + // | expected_continuation | | X | X | X | | X | X | | X | | | + // | is_continuation | | X | X | X | | X | X | | X | | | + // + // The errors here are basically (Second Byte OR Third Byte OR Fourth Byte == Continuation): + // + // - **Extra Continuations:** Any continuation that is not a second, third or fourth byte is not + // part of a valid 2-, 3- or 4-byte character and is thus an error. It could be that it's just + // floating around extra outside of any character, or that there is an illegal 5-byte character, + // or maybe it's at the beginning of the file before any characters have started; but it's an + // error in all these cases. + // - **Missing Continuations:** Any second, third or fourth byte that *isn't* a continuation is an error, because that means + // we started a new character before we were finished with the current one. + // + // Getting the Previous Bytes + // -------------------------- + // + // Because we want to know if a byte is the *second* (or third, or fourth) byte of a multibyte + // character, we need to "shift the bytes" to find that out. This is what they mean: + // + // - `is_continuation`: if the current byte is a continuation. + // - `is_second_byte`: if 1 byte back is the start of a 2-, 3- or 4-byte character. + // - `is_third_byte`: if 2 bytes back is the start of a 3- or 4-byte character. + // - `is_fourth_byte`: if 3 bytes back is the start of a 4-byte character. + // + // We use shuffles to go n bytes back, selecting part of the current `input` and part of the + // `prev_input` (search for `.prev<1>`, `.prev<2>`, etc.). These are passed in by the caller + // function, because the 1-byte-back data is used by other checks as well. + // + // Getting the Continuation Mask + // ----------------------------- + // + // Once we have the right bytes, we have to get the masks. To do this, we treat UTF-8 bytes as + // numbers, using signed `<` and `>` operations to check if they are continuations or leads. + // In fact, we treat the numbers as *signed*, partly because it helps us, and partly because + // Intel's SIMD presently only offers signed `<` and `>` operations (not unsigned ones). + // + // In UTF-8, bytes that start with the bits 110, 1110 and 11110 are 2-, 3- and 4-byte "leads," + // respectively, meaning they expect to have 1, 2 and 3 "continuation bytes" after them. + // Continuation bytes start with 10, and ASCII (1-byte characters) starts with 0. + // + // When treated as signed numbers, they look like this: + // + // | Type | High Bits | Binary Range | Signed | + // |--------------|------------|--------------|--------| + // | ASCII | `0` | `01111111` | 127 | + // | | | `00000000` | 0 | + // | 4+-Byte Lead | `1111` | `11111111` | -1 | + // | | | `11110000 | -16 | + // | 3-Byte Lead | `1110` | `11101111` | -17 | + // | | | `11100000 | -32 | + // | 2-Byte Lead | `110` | `11011111` | -33 | + // | | | `11000000 | -64 | + // | Continuation | `10` | `10111111` | -65 | + // | | | `10000000 | -128 | + // + // This makes it pretty easy to get the continuation mask! It's just a single comparison: + // + // ``` + // is_continuation = input < -64` + // ``` + // + // We can do something similar for the others, but it takes two comparisons instead of one: "is + // the start of a 4-byte character" is `< -32` and `> -65`, for example. And 2+ bytes is `< 0` and + // `> -64`. Surely we can do better, they're right next to each other! + // + // Getting the is_xxx Masks: Shifting the Range + // -------------------------------------------- + // + // Notice *why* continuations were a single comparison. The actual *range* would require two + // comparisons--`< -64` and `> -129`--but all characters are always greater than -128, so we get + // that for free. In fact, if we had *unsigned* comparisons, 2+, 3+ and 4+ comparisons would be + // just as easy: 4+ would be `> 239`, 3+ would be `> 223`, and 2+ would be `> 191`. + // + // Instead, we add 128 to each byte, shifting the range up to make comparison easy. This wraps + // ASCII down into the negative, and puts 4+-Byte Lead at the top: + // + // | Type | High Bits | Binary Range | Signed | + // |----------------------|------------|--------------|-------| + // | 4+-Byte Lead (+ 127) | `0111` | `01111111` | 127 | + // | | | `01110000 | 112 | + // |----------------------|------------|--------------|-------| + // | 3-Byte Lead (+ 127) | `0110` | `01101111` | 111 | + // | | | `01100000 | 96 | + // |----------------------|------------|--------------|-------| + // | 2-Byte Lead (+ 127) | `010` | `01011111` | 95 | + // | | | `01000000 | 64 | + // |----------------------|------------|--------------|-------| + // | Continuation (+ 127) | `00` | `00111111` | 63 | + // | | | `00000000 | 0 | + // |----------------------|------------|--------------|-------| + // | ASCII (+ 127) | `1` | `11111111` | -1 | + // | | | `10000000` | -128 | + // |----------------------|------------|--------------|-------| + // + // *Now* we can use signed `>` on all of them: + // + // ``` + // prev1 = input.prev<1> + // prev2 = input.prev<2> + // prev3 = input.prev<3> + // prev1_flipped = input.prev<1>(prev_input) ^ 0x80; // Same as `+ 128` + // prev2_flipped = input.prev<2>(prev_input) ^ 0x80; // Same as `+ 128` + // prev3_flipped = input.prev<3>(prev_input) ^ 0x80; // Same as `+ 128` + // is_second_byte = prev1_flipped > 63; // 2+-byte lead + // is_third_byte = prev2_flipped > 95; // 3+-byte lead + // is_fourth_byte = prev3_flipped > 111; // 4+-byte lead + // ``` + // + // NOTE: we use `^ 0x80` instead of `+ 128` in the code, which accomplishes the same thing, and even takes the same number + // of cycles as `+`, but on many Intel architectures can be parallelized better (you can do 3 + // `^`'s at a time on Haswell, but only 2 `+`'s). + // + // That doesn't look like it saved us any instructions, did it? Well, because we're adding the + // same number to all of them, we can save one of those `+ 128` operations by assembling + // `prev2_flipped` out of prev 1 and prev 3 instead of assembling it from input and adding 128 + // to it. One more instruction saved! + // + // ``` + // prev1 = input.prev<1> + // prev3 = input.prev<3> + // prev1_flipped = prev1 ^ 0x80; // Same as `+ 128` + // prev3_flipped = prev3 ^ 0x80; // Same as `+ 128` + // prev2_flipped = prev1_flipped.concat<2>(prev3_flipped): // | C -> ^ D, or + // you do B | C -> | A -> ^ D. But addition and subtraction *are* associative: (A + B + C) - D can + // be written as `(A + B) + (C - D)`. This means you can do A + B and C - D at the same time, and + // then adds the result together. Same number of operations, but if the processor can run + // independent things in parallel (which most can), it runs faster. + // + // This doesn't help us on Intel, but might help us elsewhere: on Haswell, at least, | and ^ have + // a super nice advantage in that more of them can be run at the same time (they can run on 3 + // ports, while + and - can run on 2)! This means that we can do A | B while we're still doing C, + // saving us the cycle we would have earned by using +. Even more, using an instruction with a + // wider array of ports can help *other* code run ahead, too, since these instructions can "get + // out of the way," running on a port other instructions can't. + // + // Epilogue II: One More Trick + // --------------------------- + // + // There's one more relevant trick up our sleeve, it turns out: it turns out on Intel we can "pay + // for" the (prev<1> + 128) instruction, because it can be used to save an instruction in + // check_special_cases()--but we'll talk about that there :) + // + really_inline simd8 check_multibyte_lengths(simd8 input, simd8 prev_input, simd8 prev1) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + + // Cont is 10000000-101111111 (-65...-128) + simd8 is_continuation = simd8(input) < int8_t(-64); + // must_be_continuation is architecture-specific because Intel doesn't have unsigned comparisons + return simd8(must_be_continuation(prev1, prev2, prev3) ^ is_continuation); + } + + // + // Return nonzero if there are incomplete multibyte characters at the end of the block: + // e.g. if there is a 4-byte character, but it's 3 bytes from the end. + // + really_inline simd8 is_incomplete(simd8 input) { + // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): + // ... 1111____ 111_____ 11______ + static const uint8_t max_array[32] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0b11110000u-1, 0b11100000u-1, 0b11000000u-1 + }; + const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]); + return input.gt_bits(max_value); + } + + struct utf8_checker { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + // The last input we received + simd8 prev_input_block; + // Whether the last input we received was incomplete (used for ASCII fast path) + simd8 prev_incomplete; + + // + // Check whether the current bytes are valid UTF-8. + // + really_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes + // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + this->error |= check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, prev1); + } + + // The only problem that can happen at EOF is that a multibyte character is too short. + really_inline void check_eof() { + // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't + // possibly finish them. + this->error |= this->prev_incomplete; + } + + really_inline void check_next_input(simd8x64 input) { + if (likely(is_ascii(input))) { + // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't + // possibly finish them. + this->error |= this->prev_incomplete; + } else { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + for (int i=1; i::NUM_CHUNKS; i++) { + this->check_utf8_bytes(input.chunks[i], input.chunks[i-1]); + } + this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]); + this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1]; + } + } + + really_inline ErrorValues errors() { + return this->error.any_bits_set_anywhere() ? simdjson::UTF8_ERROR : simdjson::SUCCESS; + } + + }; // struct utf8_checker +} + +using utf8_validation::utf8_checker; +/* end file src/generic/utf8_lookup2_algorithm.h */ +/* begin file src/generic/stage1_find_marks.h */ +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1_find_marks.h" (this simplifies amalgation) + +namespace stage1 { + +class bit_indexer { +public: + uint32_t *tail; + + bit_indexer(uint32_t *index_buf) : tail(index_buf) {} + + // flatten out values in 'bits' assuming that they are are to have values of idx + // plus their position in the bitvector, and store these indexes at + // base_ptr[base] incrementing base as we go + // will potentially store extra values beyond end of valid bits, so base_ptr + // needs to be large enough to handle this + really_inline void write_indexes(uint32_t idx, uint64_t bits) { + // In some instances, the next branch is expensive because it is mispredicted. + // Unfortunately, in other cases, + // it helps tremendously. + if (bits == 0) + return; + uint32_t cnt = hamming(bits); + + // Do the first 8 all together + for (int i=0; i<8; i++) { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + } + + // Do the next 8 all together (we hope in most cases it won't happen at all + // and the branch is easily predicted). + if (unlikely(cnt > 8)) { + for (int i=8; i<16; i++) { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + } + + // Most files don't have 16+ structurals per block, so we take several basically guaranteed + // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) + // or the start of a value ("abc" true 123) every four characters. + if (unlikely(cnt > 16)) { + uint32_t i = 16; + do { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + i++; + } while (i < cnt); + } + } + + this->tail += cnt; + } +}; + +class json_structural_scanner { +public: + // Whether the first character of the next iteration is escaped. + uint64_t prev_escaped = 0ULL; + // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). + uint64_t prev_in_string = 0ULL; + // Whether the last character of the previous iteration is a primitive value character + // (anything except whitespace, braces, comma or colon). + uint64_t prev_primitive = 0ULL; + // Mask of structural characters from the last iteration. + // Kept around for performance reasons, so we can call flatten_bits to soak up some unused + // CPU capacity while the next iteration is busy with an expensive clmul in compute_quote_mask. + uint64_t prev_structurals = 0; + // Errors with unescaped characters in strings (ASCII codepoints < 0x20) + uint64_t unescaped_chars_error = 0; + bit_indexer structural_indexes; + + json_structural_scanner(uint32_t *_structural_indexes) : structural_indexes{_structural_indexes} {} + + // + // Finish the scan and return any errors. + // + // This may detect errors as well, such as unclosed string and certain UTF-8 errors. + // if streaming is set to true, an unclosed string is allowed. + // + really_inline ErrorValues detect_errors_on_eof(bool streaming = false); + + // + // Return a mask of all string characters plus end quotes. + // + // prev_escaped is overflow saying whether the next character is escaped. + // prev_in_string is overflow saying whether we're still in a string. + // + // Backslash sequences outside of quotes will be detected in stage 2. + // + really_inline uint64_t find_strings(const simd::simd8x64 in); + + // + // Determine which characters are *structural*: + // - braces: [] and {} + // - the start of primitives (123, true, false, null) + // - the start of invalid non-whitespace (+, &, ture, UTF-8) + // + // Also detects value sequence errors: + // - two values with no separator between ("hello" "world") + // - separators with no values ([1,] [1,,]and [,2]) + // + // This method will find all of the above whether it is in a string or not. + // + // To reduce dependency on the expensive "what is in a string" computation, this method treats the + // contents of a string the same as content outside. Errors and structurals inside the string or on + // the trailing quote will need to be removed later when the correct string information is known. + // + really_inline uint64_t find_potential_structurals(const simd::simd8x64 in); + + // + // Find the important bits of JSON in a STEP_SIZE-byte chunk, and add them to structural_indexes. + // + template + really_inline void scan_step(const uint8_t *buf, const size_t idx, utf8_checker &utf8_checker); + + // + // Parse the entire input in STEP_SIZE-byte chunks. + // + template + really_inline void scan(const uint8_t *buf, const size_t len, utf8_checker &utf8_checker); +}; + +// Routines to print masks and text for debugging bitmask operations +UNUSED static char * format_input_text(const simd8x64 in) { + static char *buf = (char*)malloc(sizeof(simd8x64) + 1); + in.store((uint8_t*)buf); + for (size_t i=0; i); i++) { + if (buf[i] < ' ') { buf[i] = '_'; } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +UNUSED static char * format_mask(uint64_t mask) { + static char *buf = (char*)malloc(64 + 1); + for (size_t i=0; i<64; i++) { + buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; + } + buf[64] = '\0'; + return buf; +} + +// +// Finds escaped characters (characters following \). +// +// Handles runs of backslashes like \\\" and \\\\" correctly (yielding 0101 and 01010, respectively). +// +// Does this by: +// - Shift the escape mask to get potentially escaped characters (characters after backslashes). +// - Mask escaped sequences that start on *even* bits with 1010101010 (odd bits are escaped, even bits are not) +// - Mask escaped sequences that start on *odd* bits with 0101010101 (even bits are escaped, odd bits are not) +// +// To distinguish between escaped sequences starting on even/odd bits, it finds the start of all +// escape sequences, filters out the ones that start on even bits, and adds that to the mask of +// escape sequences. This causes the addition to clear out the sequences starting on odd bits (since +// the start bit causes a carry), and leaves even-bit sequences alone. +// +// Example: +// +// text | \\\ | \\\"\\\" \\\" \\"\\" | +// escape | xxx | xx xxx xxx xx xx | Removed overflow backslash; will | it into follows_escape +// odd_starts | x | x x x | escape & ~even_bits & ~follows_escape +// even_seq | c| cxxx c xx c | c = carry bit -- will be masked out later +// invert_mask | | cxxx c xx c| even_seq << 1 +// follows_escape | xx | x xx xxx xxx xx xx | Includes overflow bit +// escaped | x | x x x x x x x x | +// desired | x | x x x x x x x x | +// text | \\\ | \\\"\\\" \\\" \\"\\" | +// +really_inline uint64_t find_escaped(uint64_t escape, uint64_t &escaped_overflow) { + // If there was overflow, pretend the first character isn't a backslash + escape &= ~escaped_overflow; + uint64_t follows_escape = escape << 1 | escaped_overflow; + + // Get sequences starting on even bits by clearing out the odd series using + + const uint64_t even_bits = 0x5555555555555555ULL; + uint64_t odd_sequence_starts = escape & ~even_bits & ~follows_escape; + uint64_t sequences_starting_on_even_bits; + escaped_overflow = add_overflow(odd_sequence_starts, escape, &sequences_starting_on_even_bits); + uint64_t invert_mask = sequences_starting_on_even_bits << 1; // The mask we want to return is the *escaped* bits, not escapes. + + // Mask every other backslashed character as an escaped character + // Flip the mask for sequences that start on even bits, to correct them + return (even_bits ^ invert_mask) & follows_escape; +} + +// +// Check if the current character immediately follows a matching character. +// +// For example, this checks for quotes with backslashes in front of them: +// +// const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); +// +really_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { + const uint64_t result = match << 1 | overflow; + overflow = match >> 63; + return result; +} + +// +// Check if the current character follows a matching character, with possible "filler" between. +// For example, this checks for empty curly braces, e.g. +// +// in.eq('}') & follows(in.eq('['), in.eq(' '), prev_empty_array) // { * } +// +really_inline uint64_t follows(const uint64_t match, const uint64_t filler, uint64_t &overflow) { + uint64_t follows_match = follows(match, overflow); + uint64_t result; + overflow |= add_overflow(follows_match, filler, &result); + return result; +} + +really_inline ErrorValues json_structural_scanner::detect_errors_on_eof(bool streaming) { + if ((prev_in_string) and (not streaming)) { + return UNCLOSED_STRING; + } + if (unescaped_chars_error) { + return UNESCAPED_CHARS; + } + return SUCCESS; +} + +// +// Return a mask of all string characters plus end quotes. +// +// prev_escaped is overflow saying whether the next character is escaped. +// prev_in_string is overflow saying whether we're still in a string. +// +// Backslash sequences outside of quotes will be detected in stage 2. +// +really_inline uint64_t json_structural_scanner::find_strings(const simd::simd8x64 in) { + const uint64_t backslash = in.eq('\\'); + const uint64_t escaped = find_escaped(backslash, prev_escaped); + const uint64_t quote = in.eq('"') & ~escaped; + // prefix_xor flips on bits inside the string (and flips off the end quote). + const uint64_t in_string = prefix_xor(quote) ^ prev_in_string; + /* right shift of a signed value expected to be well-defined and standard + * compliant as of C++20, + * John Regher from Utah U. says this is fine code */ + prev_in_string = static_cast(static_cast(in_string) >> 63); + // Use ^ to turn the beginning quote off, and the end quote on. + return in_string ^ quote; +} + +// +// Determine which characters are *structural*: +// - braces: [] and {} +// - the start of primitives (123, true, false, null) +// - the start of invalid non-whitespace (+, &, ture, UTF-8) +// +// Also detects value sequence errors: +// - two values with no separator between ("hello" "world") +// - separators with no values ([1,] [1,,]and [,2]) +// +// This method will find all of the above whether it is in a string or not. +// +// To reduce dependency on the expensive "what is in a string" computation, this method treats the +// contents of a string the same as content outside. Errors and structurals inside the string or on +// the trailing quote will need to be removed later when the correct string information is known. +// +really_inline uint64_t json_structural_scanner::find_potential_structurals(const simd::simd8x64 in) { + // These use SIMD so let's kick them off before running the regular 64-bit stuff ... + uint64_t whitespace, op; + find_whitespace_and_operators(in, whitespace, op); + + // Detect the start of a run of primitive characters. Includes numbers, booleans, and strings ("). + // Everything except whitespace, braces, colon and comma. + const uint64_t primitive = ~(op | whitespace); + const uint64_t follows_primitive = follows(primitive, prev_primitive); + const uint64_t start_primitive = primitive & ~follows_primitive; + + // Return final structurals + return op | start_primitive; +} + +// +// Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes. +// +// PERF NOTES: +// We pipe 2 inputs through these stages: +// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load +// 2 inputs' worth at once so that by the time step 2 is looking for them input, it's available. +// 2. Scan the JSON for critical data: strings, primitives and operators. This is the critical path. +// The output of step 1 depends entirely on this information. These functions don't quite use +// up enough CPU: the second half of the functions is highly serial, only using 1 execution core +// at a time. The second input's scans has some dependency on the first ones finishing it, but +// they can make a lot of progress before they need that information. +// 3. Step 1 doesn't use enough capacity, so we run some extra stuff while we're waiting for that +// to finish: utf-8 checks and generating the output from the last iteration. +// +// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all +// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough +// workout. +// +template<> +really_inline void json_structural_scanner::scan_step<128>(const uint8_t *buf, const size_t idx, utf8_checker &utf8_checker) { + // + // Load up all 128 bytes into SIMD registers + // + simd::simd8x64 in_1(buf); + simd::simd8x64 in_2(buf+64); + + // + // Find the strings and potential structurals (operators / primitives). + // + // This will include false structurals that are *inside* strings--we'll filter strings out + // before we return. + // + uint64_t string_1 = this->find_strings(in_1); + uint64_t structurals_1 = this->find_potential_structurals(in_1); + uint64_t string_2 = this->find_strings(in_2); + uint64_t structurals_2 = this->find_potential_structurals(in_2); + + // + // Do miscellaneous work while the processor is busy calculating strings and structurals. + // + // After that, weed out structurals that are inside strings and find invalid string characters. + // + uint64_t unescaped_1 = in_1.lteq(0x1F); + utf8_checker.check_next_input(in_1); + this->structural_indexes.write_indexes(idx-64, this->prev_structurals); // Output *last* iteration's structurals to ParsedJson + this->prev_structurals = structurals_1 & ~string_1; + this->unescaped_chars_error |= unescaped_1 & string_1; + + uint64_t unescaped_2 = in_2.lteq(0x1F); + utf8_checker.check_next_input(in_2); + this->structural_indexes.write_indexes(idx, this->prev_structurals); // Output *last* iteration's structurals to ParsedJson + this->prev_structurals = structurals_2 & ~string_2; + this->unescaped_chars_error |= unescaped_2 & string_2; +} + +// +// Find the important bits of JSON in a 64-byte chunk, and add them to structural_indexes. +// +template<> +really_inline void json_structural_scanner::scan_step<64>(const uint8_t *buf, const size_t idx, utf8_checker &utf8_checker) { + // + // Load up bytes into SIMD registers + // + simd::simd8x64 in_1(buf); + + // + // Find the strings and potential structurals (operators / primitives). + // + // This will include false structurals that are *inside* strings--we'll filter strings out + // before we return. + // + uint64_t string_1 = this->find_strings(in_1); + uint64_t structurals_1 = this->find_potential_structurals(in_1); + + // + // Do miscellaneous work while the processor is busy calculating strings and structurals. + // + // After that, weed out structurals that are inside strings and find invalid string characters. + // + uint64_t unescaped_1 = in_1.lteq(0x1F); + utf8_checker.check_next_input(in_1); + this->structural_indexes.write_indexes(idx-64, this->prev_structurals); // Output *last* iteration's structurals to ParsedJson + this->prev_structurals = structurals_1 & ~string_1; + this->unescaped_chars_error |= unescaped_1 & string_1; +} + +template +really_inline void json_structural_scanner::scan(const uint8_t *buf, const size_t len, utf8_checker &utf8_checker) { + size_t lenminusstep = len < STEP_SIZE ? 0 : len - STEP_SIZE; + size_t idx = 0; + + for (; idx < lenminusstep; idx += STEP_SIZE) { + this->scan_step(&buf[idx], idx, utf8_checker); + } + + /* If we have a final chunk of less than STEP_SIZE bytes, pad it to STEP_SIZE with + * spaces before processing it (otherwise, we risk invalidating the UTF-8 + * checks). */ + if (likely(idx < len)) { + uint8_t tmp_buf[STEP_SIZE]; + memset(tmp_buf, 0x20, STEP_SIZE); + memcpy(tmp_buf, buf + idx, len - idx); + this->scan_step(&tmp_buf[0], idx, utf8_checker); + idx += STEP_SIZE; + } + + /* finally, flatten out the remaining structurals from the last iteration */ + this->structural_indexes.write_indexes(idx-64, this->prev_structurals); +} + +// Setting the streaming parameter to true allows the find_structural_bits to tolerate unclosed strings. +// The caller should still ensure that the input is valid UTF-8. If you are processing substrings, +// you may want to call on a function like trimmed_length_safe_utf8. +template +int find_structural_bits(const uint8_t *buf, size_t len, simdjson::ParsedJson &pj, bool streaming) { + if (unlikely(len > pj.byte_capacity)) { + return simdjson::CAPACITY; + } + utf8_checker utf8_checker{}; + json_structural_scanner scanner{pj.structural_indexes.get()}; + scanner.scan(buf, len, utf8_checker); + // we might tolerate an unclosed string if streaming is true + simdjson::ErrorValues error = scanner.detect_errors_on_eof(streaming); + if (unlikely(error != simdjson::SUCCESS)) { + return error; + } + pj.n_structural_indexes = scanner.structural_indexes.tail - pj.structural_indexes.get(); + /* a valid JSON file cannot have zero structural indexes - we should have + * found something */ + if (unlikely(pj.n_structural_indexes == 0u)) { + return simdjson::EMPTY; + } + if (unlikely(pj.structural_indexes[pj.n_structural_indexes - 1] > len)) { + return simdjson::UNEXPECTED_ERROR; + } + if (len != pj.structural_indexes[pj.n_structural_indexes - 1]) { + /* the string might not be NULL terminated, but we add a virtual NULL + * ending character. */ + pj.structural_indexes[pj.n_structural_indexes++] = len; + } + /* make it safe to dereference one beyond this array */ + pj.structural_indexes[pj.n_structural_indexes] = 0; + return utf8_checker.errors(); +} + +} // namespace stage1 +/* end file src/generic/stage1_find_marks.h */ + +} // namespace haswell +UNTARGET_REGION + +TARGET_HASWELL +namespace simdjson { + +template <> +int find_structural_bits(const uint8_t *buf, size_t len, simdjson::ParsedJson &pj, bool streaming) { + return haswell::stage1::find_structural_bits<128>(buf, len, pj, streaming); +} + +} // namespace simdjson +UNTARGET_REGION + +#endif // IS_X86_64 +#endif // SIMDJSON_HASWELL_STAGE1_FIND_MARKS_H +/* end file src/generic/stage1_find_marks.h */ +/* begin file src/westmere/stage1_find_marks.h */ +#ifndef SIMDJSON_WESTMERE_STAGE1_FIND_MARKS_H +#define SIMDJSON_WESTMERE_STAGE1_FIND_MARKS_H + + +#ifdef IS_X86_64 + +/* begin file src/westmere/bitmask.h */ +#ifndef SIMDJSON_WESTMERE_BITMASK_H +#define SIMDJSON_WESTMERE_BITMASK_H + + +#ifdef IS_X86_64 + +/* begin file src/westmere/intrinsics.h */ +#ifndef SIMDJSON_WESTMERE_INTRINSICS_H +#define SIMDJSON_WESTMERE_INTRINSICS_H + +#ifdef IS_X86_64 +#ifdef _MSC_VER +#include // visual studio +#else +#include // elsewhere +#endif // _MSC_VER +#endif // IS_X86_64 +#endif // SIMDJSON_WESTMERE_INTRINSICS_H +/* end file src/westmere/intrinsics.h */ + +TARGET_WESTMERE +namespace simdjson::westmere { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +really_inline uint64_t prefix_xor(const uint64_t bitmask) { + // There should be no such thing with a processing supporting avx2 + // but not clmul. + __m128i all_ones = _mm_set1_epi8('\xFF'); + __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); + return _mm_cvtsi128_si64(result); +} + +} // namespace simdjson::westmere +UNTARGET_REGION + +#endif // IS_X86_64 +#endif +/* end file src/westmere/intrinsics.h */ /* begin file src/westmere/simd.h */ #ifndef SIMDJSON_WESTMERE_SIMD_H #define SIMDJSON_WESTMERE_SIMD_H @@ -3661,6 +3606,7 @@ UNTARGET_REGION #ifdef IS_X86_64 +/* westmere/intrinsics.h already included: #include "westmere/intrinsics.h" */ TARGET_WESTMERE namespace simdjson::westmere::simd { @@ -3966,1834 +3912,96 @@ UNTARGET_REGION #endif // IS_X86_64 #endif // SIMDJSON_WESTMERE_SIMD_INPUT_H /* end file src/westmere/simd.h */ -/* begin file src/arm64/stage1_find_marks.h */ -#ifndef SIMDJSON_ARM64_STAGE1_FIND_MARKS_H -#define SIMDJSON_ARM64_STAGE1_FIND_MARKS_H - - -#ifdef IS_ARM64 - - -namespace simdjson::arm64 { - -using namespace simd; - -really_inline void find_whitespace_and_operators( - const simd::simd8x64 in, - uint64_t &whitespace, uint64_t &op) { - - auto v = in.map([&](simd8 chunk) { - auto nib_lo = chunk & 0xf; - auto nib_hi = chunk.shr<4>(); - auto shuf_lo = nib_lo.lookup_16(16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0); - auto shuf_hi = nib_hi.lookup_16(8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0); - return shuf_lo & shuf_hi; - }); - - op = v.map([&](simd8 _v) { return _v.any_bits_set(0x7); }).to_bitmask(); - whitespace = v.map([&](simd8 _v) { return _v.any_bits_set(0x18); }).to_bitmask(); -} - -really_inline bool is_ascii(simd8x64 input) { - simd8 bits = input.reduce([&](auto a,auto b) { return a|b; }); - return bits.max() < 0b10000000u; -} - -really_inline simd8 must_be_continuation(simd8 prev1, simd8 prev2, simd8 prev3) { - simd8 is_second_byte = prev1 >= uint8_t(0b11000000u); - simd8 is_third_byte = prev2 >= uint8_t(0b11100000u); - simd8 is_fourth_byte = prev3 >= uint8_t(0b11110000u); - // Use ^ instead of | for is_*_byte, because ^ is commutative, and the caller is using ^ as well. - // This will work fine because we only have to report errors for cases with 0-1 lead bytes. - // Multiple lead bytes implies 2 overlapping multibyte characters, and if that happens, there is - // guaranteed to be at least *one* lead byte that is part of only 1 other multibyte character. - // The error will be detected there. - return is_second_byte ^ is_third_byte ^ is_fourth_byte; -} - -// -// Detect Unicode errors. -// -// UTF-8 is designed to allow multiple bytes and be compatible with ASCII. It's a fairly basic -// encoding that uses the first few bits on each byte to denote a "byte type", and all other bits -// are straight up concatenated into the final value. The first byte of a multibyte character is a -// "leading byte" and starts with N 1's, where N is the total number of bytes (110_____ = 2 byte -// lead). The remaining bytes of a multibyte character all start with 10. 1-byte characters just -// start with 0, because that's what ASCII looks like. Here's what each size -// -// - ASCII (7 bits): 0_______ -// - 2 byte character (11 bits): 110_____ 10______ -// - 3 byte character (17 bits): 1110____ 10______ 10______ -// - 4 byte character (23 bits): 11110___ 10______ 10______ 10______ -// - 5+ byte character (illegal): 11111___ -// -// There are 5 classes of error that can happen in Unicode: -// -// - TOO_SHORT: when you have a multibyte character with too few bytes (i.e. missing continuation). -// We detect this by looking for new characters (lead bytes) inside the range of a multibyte -// character. -// -// e.g. 11000000 01100001 (2-byte character where second byte is ASCII) -// -// - TOO_LONG: when there are more bytes in your character than you need (i.e. extra continuation). -// We detect this by requiring that the next byte after your multibyte character be a new -// character--so a continuation after your character is wrong. -// -// e.g. 11011111 10111111 10111111 (2-byte character followed by *another* continuation byte) -// -// - TOO_LARGE: Unicode only goes up to U+10FFFF. These characters are too large. -// -// e.g. 11110111 10111111 10111111 10111111 (bigger than 10FFFF). -// -// - OVERLONG: multibyte characters with a bunch of leading zeroes, where you could have -// used fewer bytes to make the same character. Like encoding an ASCII character in 4 bytes is -// technically possible, but UTF-8 disallows it so that there is only one way to write an "a". -// -// e.g. 11000001 10100001 (2-byte encoding of "a", which only requires 1 byte: 01100001) -// -// - SURROGATE: Unicode U+D800-U+DFFF is a *surrogate* character, reserved for use in UCS-2 and -// WTF-8 encodings for characters with > 2 bytes. These are illegal in pure UTF-8. -// -// e.g. 11101101 10100000 10000000 (U+D800) -// -// - INVALID_5_BYTE: 5-byte, 6-byte, 7-byte and 8-byte characters are unsupported; Unicode does not -// support values with more than 23 bits (which a 4-byte character supports). -// -// e.g. 11111000 10100000 10000000 10000000 10000000 (U+800000) -// -// Legal utf-8 byte sequences per http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf - page 94: -// -// Code Points 1st 2s 3s 4s -// U+0000..U+007F 00..7F -// U+0080..U+07FF C2..DF 80..BF -// U+0800..U+0FFF E0 A0..BF 80..BF -// U+1000..U+CFFF E1..EC 80..BF 80..BF -// U+D000..U+D7FF ED 80..9F 80..BF -// U+E000..U+FFFF EE..EF 80..BF 80..BF -// U+10000..U+3FFFF F0 90..BF 80..BF 80..BF -// U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF -// U+100000..U+10FFFF F4 80..8F 80..BF 80..BF -// -using namespace simd; - -namespace utf8_validation { - - // - // Find special case UTF-8 errors where the character is technically readable (has the right length) - // but the *value* is disallowed. - // - // This includes overlong encodings, surrogates and values too large for Unicode. - // - // It turns out the bad character ranges can all be detected by looking at the first 12 bits of the - // UTF-8 encoded character (i.e. all of byte 1, and the high 4 bits of byte 2). This algorithm does a - // 3 4-bit table lookups, identifying which errors that 4 bits could match, and then &'s them together. - // If all 3 lookups detect the same error, it's an error. - // - really_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { - // - // These are the errors we're going to match for bytes 1-2, by looking at the first three - // nibbles of the character: > & & - // - static const int OVERLONG_2 = 0x01; // 1100000_ 10______ (technically we match 10______ but we could match ________, they both yield errors either way) - static const int OVERLONG_3 = 0x02; // 11100000 100_____ ________ - static const int OVERLONG_4 = 0x04; // 11110000 1000____ ________ ________ - static const int SURROGATE = 0x08; // 11101101 [101_]____ - static const int TOO_LARGE = 0x10; // 11110100 (1001|101_)____ - static const int TOO_LARGE_2 = 0x20; // 1111(1___|011_|0101) 10______ - - // After processing the rest of byte 1 (the low bits), we're still not done--we have to check - // byte 2 to be sure which things are errors and which aren't. - // Since high_bits is byte 5, byte 2 is high_bits.prev<3> - static const int CARRY = OVERLONG_2 | TOO_LARGE_2; - const simd8 byte_2_high = input.shr<4>().lookup_16( - // ASCII: ________ [0___]____ - CARRY, CARRY, CARRY, CARRY, - // ASCII: ________ [0___]____ - CARRY, CARRY, CARRY, CARRY, - // Continuations: ________ [10__]____ - CARRY | OVERLONG_3 | OVERLONG_4, // ________ [1000]____ - CARRY | OVERLONG_3 | TOO_LARGE, // ________ [1001]____ - CARRY | TOO_LARGE | SURROGATE, // ________ [1010]____ - CARRY | TOO_LARGE | SURROGATE, // ________ [1011]____ - // Multibyte Leads: ________ [11__]____ - CARRY, CARRY, CARRY, CARRY - ); - - const simd8 byte_1_high = prev1.shr<4>().lookup_16( - // [0___]____ (ASCII) - 0, 0, 0, 0, - 0, 0, 0, 0, - // [10__]____ (continuation) - 0, 0, 0, 0, - // [11__]____ (2+-byte leads) - OVERLONG_2, 0, // [110_]____ (2-byte lead) - OVERLONG_3 | SURROGATE, // [1110]____ (3-byte lead) - OVERLONG_4 | TOO_LARGE | TOO_LARGE_2 // [1111]____ (4+-byte lead) - ); - - const simd8 byte_1_low = (prev1 & 0x0F).lookup_16( - // ____[00__] ________ - OVERLONG_2 | OVERLONG_3 | OVERLONG_4, // ____[0000] ________ - OVERLONG_2, // ____[0001] ________ - 0, 0, - // ____[01__] ________ - TOO_LARGE, // ____[0100] ________ - TOO_LARGE_2, - TOO_LARGE_2, - TOO_LARGE_2, - // ____[10__] ________ - TOO_LARGE_2, TOO_LARGE_2, TOO_LARGE_2, TOO_LARGE_2, - // ____[11__] ________ - TOO_LARGE_2, - TOO_LARGE_2 | SURROGATE, // ____[1101] ________ - TOO_LARGE_2, TOO_LARGE_2 - ); - - return byte_1_high & byte_1_low & byte_2_high; - } - - // - // Validate the length of multibyte characters (that each multibyte character has the right number - // of continuation characters, and that all continuation characters are part of a multibyte - // character). - // - // Algorithm - // ========= - // - // This algorithm compares *expected* continuation characters with *actual* continuation bytes, - // and emits an error anytime there is a mismatch. - // - // For example, in the string "𝄞₿֏ab", which has a 4-, 3-, 2- and 1-byte - // characters, the file will look like this: - // - // | Character | 𝄞 | | | | ₿ | | | ֏ | | a | b | - // |-----------------------|----|----|----|----|----|----|----|----|----|----|----| - // | Character Length | 4 | | | | 3 | | | 2 | | 1 | 1 | - // | Byte | F0 | 9D | 84 | 9E | E2 | 82 | BF | D6 | 8F | 61 | 62 | - // | is_second_byte | | X | | | | X | | | X | | | - // | is_third_byte | | | X | | | | X | | | | | - // | is_fourth_byte | | | | X | | | | | | | | - // | expected_continuation | | X | X | X | | X | X | | X | | | - // | is_continuation | | X | X | X | | X | X | | X | | | - // - // The errors here are basically (Second Byte OR Third Byte OR Fourth Byte == Continuation): - // - // - **Extra Continuations:** Any continuation that is not a second, third or fourth byte is not - // part of a valid 2-, 3- or 4-byte character and is thus an error. It could be that it's just - // floating around extra outside of any character, or that there is an illegal 5-byte character, - // or maybe it's at the beginning of the file before any characters have started; but it's an - // error in all these cases. - // - **Missing Continuations:** Any second, third or fourth byte that *isn't* a continuation is an error, because that means - // we started a new character before we were finished with the current one. - // - // Getting the Previous Bytes - // -------------------------- - // - // Because we want to know if a byte is the *second* (or third, or fourth) byte of a multibyte - // character, we need to "shift the bytes" to find that out. This is what they mean: - // - // - `is_continuation`: if the current byte is a continuation. - // - `is_second_byte`: if 1 byte back is the start of a 2-, 3- or 4-byte character. - // - `is_third_byte`: if 2 bytes back is the start of a 3- or 4-byte character. - // - `is_fourth_byte`: if 3 bytes back is the start of a 4-byte character. - // - // We use shuffles to go n bytes back, selecting part of the current `input` and part of the - // `prev_input` (search for `.prev<1>`, `.prev<2>`, etc.). These are passed in by the caller - // function, because the 1-byte-back data is used by other checks as well. - // - // Getting the Continuation Mask - // ----------------------------- - // - // Once we have the right bytes, we have to get the masks. To do this, we treat UTF-8 bytes as - // numbers, using signed `<` and `>` operations to check if they are continuations or leads. - // In fact, we treat the numbers as *signed*, partly because it helps us, and partly because - // Intel's SIMD presently only offers signed `<` and `>` operations (not unsigned ones). - // - // In UTF-8, bytes that start with the bits 110, 1110 and 11110 are 2-, 3- and 4-byte "leads," - // respectively, meaning they expect to have 1, 2 and 3 "continuation bytes" after them. - // Continuation bytes start with 10, and ASCII (1-byte characters) starts with 0. - // - // When treated as signed numbers, they look like this: - // - // | Type | High Bits | Binary Range | Signed | - // |--------------|------------|--------------|--------| - // | ASCII | `0` | `01111111` | 127 | - // | | | `00000000` | 0 | - // | 4+-Byte Lead | `1111` | `11111111` | -1 | - // | | | `11110000 | -16 | - // | 3-Byte Lead | `1110` | `11101111` | -17 | - // | | | `11100000 | -32 | - // | 2-Byte Lead | `110` | `11011111` | -33 | - // | | | `11000000 | -64 | - // | Continuation | `10` | `10111111` | -65 | - // | | | `10000000 | -128 | - // - // This makes it pretty easy to get the continuation mask! It's just a single comparison: - // - // ``` - // is_continuation = input < -64` - // ``` - // - // We can do something similar for the others, but it takes two comparisons instead of one: "is - // the start of a 4-byte character" is `< -32` and `> -65`, for example. And 2+ bytes is `< 0` and - // `> -64`. Surely we can do better, they're right next to each other! - // - // Getting the is_xxx Masks: Shifting the Range - // -------------------------------------------- - // - // Notice *why* continuations were a single comparison. The actual *range* would require two - // comparisons--`< -64` and `> -129`--but all characters are always greater than -128, so we get - // that for free. In fact, if we had *unsigned* comparisons, 2+, 3+ and 4+ comparisons would be - // just as easy: 4+ would be `> 239`, 3+ would be `> 223`, and 2+ would be `> 191`. - // - // Instead, we add 128 to each byte, shifting the range up to make comparison easy. This wraps - // ASCII down into the negative, and puts 4+-Byte Lead at the top: - // - // | Type | High Bits | Binary Range | Signed | - // |----------------------|------------|--------------|-------| - // | 4+-Byte Lead (+ 127) | `0111` | `01111111` | 127 | - // | | | `01110000 | 112 | - // |----------------------|------------|--------------|-------| - // | 3-Byte Lead (+ 127) | `0110` | `01101111` | 111 | - // | | | `01100000 | 96 | - // |----------------------|------------|--------------|-------| - // | 2-Byte Lead (+ 127) | `010` | `01011111` | 95 | - // | | | `01000000 | 64 | - // |----------------------|------------|--------------|-------| - // | Continuation (+ 127) | `00` | `00111111` | 63 | - // | | | `00000000 | 0 | - // |----------------------|------------|--------------|-------| - // | ASCII (+ 127) | `1` | `11111111` | -1 | - // | | | `10000000` | -128 | - // |----------------------|------------|--------------|-------| - // - // *Now* we can use signed `>` on all of them: - // - // ``` - // prev1 = input.prev<1> - // prev2 = input.prev<2> - // prev3 = input.prev<3> - // prev1_flipped = input.prev<1>(prev_input) ^ 0x80; // Same as `+ 128` - // prev2_flipped = input.prev<2>(prev_input) ^ 0x80; // Same as `+ 128` - // prev3_flipped = input.prev<3>(prev_input) ^ 0x80; // Same as `+ 128` - // is_second_byte = prev1_flipped > 63; // 2+-byte lead - // is_third_byte = prev2_flipped > 95; // 3+-byte lead - // is_fourth_byte = prev3_flipped > 111; // 4+-byte lead - // ``` - // - // NOTE: we use `^ 0x80` instead of `+ 128` in the code, which accomplishes the same thing, and even takes the same number - // of cycles as `+`, but on many Intel architectures can be parallelized better (you can do 3 - // `^`'s at a time on Haswell, but only 2 `+`'s). - // - // That doesn't look like it saved us any instructions, did it? Well, because we're adding the - // same number to all of them, we can save one of those `+ 128` operations by assembling - // `prev2_flipped` out of prev 1 and prev 3 instead of assembling it from input and adding 128 - // to it. One more instruction saved! - // - // ``` - // prev1 = input.prev<1> - // prev3 = input.prev<3> - // prev1_flipped = prev1 ^ 0x80; // Same as `+ 128` - // prev3_flipped = prev3 ^ 0x80; // Same as `+ 128` - // prev2_flipped = prev1_flipped.concat<2>(prev3_flipped): // | C -> ^ D, or - // you do B | C -> | A -> ^ D. But addition and subtraction *are* associative: (A + B + C) - D can - // be written as `(A + B) + (C - D)`. This means you can do A + B and C - D at the same time, and - // then adds the result together. Same number of operations, but if the processor can run - // independent things in parallel (which most can), it runs faster. - // - // This doesn't help us on Intel, but might help us elsewhere: on Haswell, at least, | and ^ have - // a super nice advantage in that more of them can be run at the same time (they can run on 3 - // ports, while + and - can run on 2)! This means that we can do A | B while we're still doing C, - // saving us the cycle we would have earned by using +. Even more, using an instruction with a - // wider array of ports can help *other* code run ahead, too, since these instructions can "get - // out of the way," running on a port other instructions can't. - // - // Epilogue II: One More Trick - // --------------------------- - // - // There's one more relevant trick up our sleeve, it turns out: it turns out on Intel we can "pay - // for" the (prev<1> + 128) instruction, because it can be used to save an instruction in - // check_special_cases()--but we'll talk about that there :) - // - really_inline simd8 check_multibyte_lengths(simd8 input, simd8 prev_input, simd8 prev1) { - simd8 prev2 = input.prev<2>(prev_input); - simd8 prev3 = input.prev<3>(prev_input); - - // Cont is 10000000-101111111 (-65...-128) - simd8 is_continuation = simd8(input) < int8_t(-64); - // must_be_continuation is architecture-specific because Intel doesn't have unsigned comparisons - return simd8(must_be_continuation(prev1, prev2, prev3) ^ is_continuation); - } - - // - // Return nonzero if there are incomplete multibyte characters at the end of the block: - // e.g. if there is a 4-byte character, but it's 3 bytes from the end. - // - really_inline simd8 is_incomplete(simd8 input) { - // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): - // ... 1111____ 111_____ 11______ - static const uint8_t max_array[32] = { - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 0b11110000u-1, 0b11100000u-1, 0b11000000u-1 - }; - const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]); - return input.gt_bits(max_value); - } - - struct utf8_checker { - // If this is nonzero, there has been a UTF-8 error. - simd8 error; - // The last input we received - simd8 prev_input_block; - // Whether the last input we received was incomplete (used for ASCII fast path) - simd8 prev_incomplete; - - // - // Check whether the current bytes are valid UTF-8. - // - really_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { - // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes - // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) - simd8 prev1 = input.prev<1>(prev_input); - this->error |= check_special_cases(input, prev1); - this->error |= check_multibyte_lengths(input, prev_input, prev1); - } - - // The only problem that can happen at EOF is that a multibyte character is too short. - really_inline void check_eof() { - // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't - // possibly finish them. - this->error |= this->prev_incomplete; - } - - really_inline void check_next_input(simd8x64 input) { - if (likely(is_ascii(input))) { - // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't - // possibly finish them. - this->error |= this->prev_incomplete; - } else { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - for (int i=1; i::NUM_CHUNKS; i++) { - this->check_utf8_bytes(input.chunks[i], input.chunks[i-1]); - } - this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]); - this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1]; - } - } - - really_inline ErrorValues errors() { - return this->error.any_bits_set_anywhere() ? simdjson::UTF8_ERROR : simdjson::SUCCESS; - } - - }; // struct utf8_checker -} - -using utf8_validation::utf8_checker; -// This file contains the common code every implementation uses in stage1 -// It is intended to be included multiple times and compiled multiple times -// We assume the file in which it is included already includes -// "simdjson/stage1_find_marks.h" (this simplifies amalgation) - -namespace stage1 { - -class bit_indexer { -public: - uint32_t *tail; - - bit_indexer(uint32_t *index_buf) : tail(index_buf) {} - - // flatten out values in 'bits' assuming that they are are to have values of idx - // plus their position in the bitvector, and store these indexes at - // base_ptr[base] incrementing base as we go - // will potentially store extra values beyond end of valid bits, so base_ptr - // needs to be large enough to handle this - really_inline void write_indexes(uint32_t idx, uint64_t bits) { - // In some instances, the next branch is expensive because it is mispredicted. - // Unfortunately, in other cases, - // it helps tremendously. - if (bits == 0) - return; - uint32_t cnt = hamming(bits); - - // Do the first 8 all together - for (int i=0; i<8; i++) { - this->tail[i] = idx + trailing_zeroes(bits); - bits = clear_lowest_bit(bits); - } - - // Do the next 8 all together (we hope in most cases it won't happen at all - // and the branch is easily predicted). - if (unlikely(cnt > 8)) { - for (int i=8; i<16; i++) { - this->tail[i] = idx + trailing_zeroes(bits); - bits = clear_lowest_bit(bits); - } - - // Most files don't have 16+ structurals per block, so we take several basically guaranteed - // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) - // or the start of a value ("abc" true 123) every four characters. - if (unlikely(cnt > 16)) { - uint32_t i = 16; - do { - this->tail[i] = idx + trailing_zeroes(bits); - bits = clear_lowest_bit(bits); - i++; - } while (i < cnt); - } - } - - this->tail += cnt; - } -}; - -class json_structural_scanner { -public: - // Whether the first character of the next iteration is escaped. - uint64_t prev_escaped = 0ULL; - // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). - uint64_t prev_in_string = 0ULL; - // Whether the last character of the previous iteration is a primitive value character - // (anything except whitespace, braces, comma or colon). - uint64_t prev_primitive = 0ULL; - // Mask of structural characters from the last iteration. - // Kept around for performance reasons, so we can call flatten_bits to soak up some unused - // CPU capacity while the next iteration is busy with an expensive clmul in compute_quote_mask. - uint64_t prev_structurals = 0; - // Errors with unescaped characters in strings (ASCII codepoints < 0x20) - uint64_t unescaped_chars_error = 0; - bit_indexer structural_indexes; - - json_structural_scanner(uint32_t *_structural_indexes) : structural_indexes{_structural_indexes} {} - - // - // Finish the scan and return any errors. - // - // This may detect errors as well, such as unclosed string and certain UTF-8 errors. - // if streaming is set to true, an unclosed string is allowed. - // - really_inline ErrorValues detect_errors_on_eof(bool streaming = false); - - // - // Return a mask of all string characters plus end quotes. - // - // prev_escaped is overflow saying whether the next character is escaped. - // prev_in_string is overflow saying whether we're still in a string. - // - // Backslash sequences outside of quotes will be detected in stage 2. - // - really_inline uint64_t find_strings(const simd::simd8x64 in); - - // - // Determine which characters are *structural*: - // - braces: [] and {} - // - the start of primitives (123, true, false, null) - // - the start of invalid non-whitespace (+, &, ture, UTF-8) - // - // Also detects value sequence errors: - // - two values with no separator between ("hello" "world") - // - separators with no values ([1,] [1,,]and [,2]) - // - // This method will find all of the above whether it is in a string or not. - // - // To reduce dependency on the expensive "what is in a string" computation, this method treats the - // contents of a string the same as content outside. Errors and structurals inside the string or on - // the trailing quote will need to be removed later when the correct string information is known. - // - really_inline uint64_t find_potential_structurals(const simd::simd8x64 in); - - // - // Find the important bits of JSON in a STEP_SIZE-byte chunk, and add them to structural_indexes. - // - template - really_inline void scan_step(const uint8_t *buf, const size_t idx, utf8_checker &utf8_checker); - - // - // Parse the entire input in STEP_SIZE-byte chunks. - // - template - really_inline void scan(const uint8_t *buf, const size_t len, utf8_checker &utf8_checker); -}; - -// Routines to print masks and text for debugging bitmask operations -UNUSED static char * format_input_text(const simd8x64 in) { - static char *buf = (char*)malloc(sizeof(simd8x64) + 1); - in.store((uint8_t*)buf); - for (size_t i=0; i); i++) { - if (buf[i] < ' ') { buf[i] = '_'; } - } - buf[sizeof(simd8x64)] = '\0'; - return buf; -} - -UNUSED static char * format_mask(uint64_t mask) { - static char *buf = (char*)malloc(64 + 1); - for (size_t i=0; i<64; i++) { - buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; - } - buf[64] = '\0'; - return buf; -} - -// -// Finds escaped characters (characters following \). -// -// Handles runs of backslashes like \\\" and \\\\" correctly (yielding 0101 and 01010, respectively). -// -// Does this by: -// - Shift the escape mask to get potentially escaped characters (characters after backslashes). -// - Mask escaped sequences that start on *even* bits with 1010101010 (odd bits are escaped, even bits are not) -// - Mask escaped sequences that start on *odd* bits with 0101010101 (even bits are escaped, odd bits are not) -// -// To distinguish between escaped sequences starting on even/odd bits, it finds the start of all -// escape sequences, filters out the ones that start on even bits, and adds that to the mask of -// escape sequences. This causes the addition to clear out the sequences starting on odd bits (since -// the start bit causes a carry), and leaves even-bit sequences alone. -// -// Example: -// -// text | \\\ | \\\"\\\" \\\" \\"\\" | -// escape | xxx | xx xxx xxx xx xx | Removed overflow backslash; will | it into follows_escape -// odd_starts | x | x x x | escape & ~even_bits & ~follows_escape -// even_seq | c| cxxx c xx c | c = carry bit -- will be masked out later -// invert_mask | | cxxx c xx c| even_seq << 1 -// follows_escape | xx | x xx xxx xxx xx xx | Includes overflow bit -// escaped | x | x x x x x x x x | -// desired | x | x x x x x x x x | -// text | \\\ | \\\"\\\" \\\" \\"\\" | -// -really_inline uint64_t find_escaped(uint64_t escape, uint64_t &escaped_overflow) { - // If there was overflow, pretend the first character isn't a backslash - escape &= ~escaped_overflow; - uint64_t follows_escape = escape << 1 | escaped_overflow; - - // Get sequences starting on even bits by clearing out the odd series using + - const uint64_t even_bits = 0x5555555555555555ULL; - uint64_t odd_sequence_starts = escape & ~even_bits & ~follows_escape; - uint64_t sequences_starting_on_even_bits; - escaped_overflow = add_overflow(odd_sequence_starts, escape, &sequences_starting_on_even_bits); - uint64_t invert_mask = sequences_starting_on_even_bits << 1; // The mask we want to return is the *escaped* bits, not escapes. - - // Mask every other backslashed character as an escaped character - // Flip the mask for sequences that start on even bits, to correct them - return (even_bits ^ invert_mask) & follows_escape; -} - -// -// Check if the current character immediately follows a matching character. -// -// For example, this checks for quotes with backslashes in front of them: -// -// const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); -// -really_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { - const uint64_t result = match << 1 | overflow; - overflow = match >> 63; - return result; -} - -// -// Check if the current character follows a matching character, with possible "filler" between. -// For example, this checks for empty curly braces, e.g. -// -// in.eq('}') & follows(in.eq('['), in.eq(' '), prev_empty_array) // { * } -// -really_inline uint64_t follows(const uint64_t match, const uint64_t filler, uint64_t &overflow) { - uint64_t follows_match = follows(match, overflow); - uint64_t result; - overflow |= add_overflow(follows_match, filler, &result); - return result; -} - -really_inline ErrorValues json_structural_scanner::detect_errors_on_eof(bool streaming) { - if ((prev_in_string) and (not streaming)) { - return UNCLOSED_STRING; - } - if (unescaped_chars_error) { - return UNESCAPED_CHARS; - } - return SUCCESS; -} - -// -// Return a mask of all string characters plus end quotes. -// -// prev_escaped is overflow saying whether the next character is escaped. -// prev_in_string is overflow saying whether we're still in a string. -// -// Backslash sequences outside of quotes will be detected in stage 2. -// -really_inline uint64_t json_structural_scanner::find_strings(const simd::simd8x64 in) { - const uint64_t backslash = in.eq('\\'); - const uint64_t escaped = find_escaped(backslash, prev_escaped); - const uint64_t quote = in.eq('"') & ~escaped; - // prefix_xor flips on bits inside the string (and flips off the end quote). - const uint64_t in_string = prefix_xor(quote) ^ prev_in_string; - /* right shift of a signed value expected to be well-defined and standard - * compliant as of C++20, - * John Regher from Utah U. says this is fine code */ - prev_in_string = static_cast(static_cast(in_string) >> 63); - // Use ^ to turn the beginning quote off, and the end quote on. - return in_string ^ quote; -} - -// -// Determine which characters are *structural*: -// - braces: [] and {} -// - the start of primitives (123, true, false, null) -// - the start of invalid non-whitespace (+, &, ture, UTF-8) -// -// Also detects value sequence errors: -// - two values with no separator between ("hello" "world") -// - separators with no values ([1,] [1,,]and [,2]) -// -// This method will find all of the above whether it is in a string or not. -// -// To reduce dependency on the expensive "what is in a string" computation, this method treats the -// contents of a string the same as content outside. Errors and structurals inside the string or on -// the trailing quote will need to be removed later when the correct string information is known. -// -really_inline uint64_t json_structural_scanner::find_potential_structurals(const simd::simd8x64 in) { - // These use SIMD so let's kick them off before running the regular 64-bit stuff ... - uint64_t whitespace, op; - find_whitespace_and_operators(in, whitespace, op); - - // Detect the start of a run of primitive characters. Includes numbers, booleans, and strings ("). - // Everything except whitespace, braces, colon and comma. - const uint64_t primitive = ~(op | whitespace); - const uint64_t follows_primitive = follows(primitive, prev_primitive); - const uint64_t start_primitive = primitive & ~follows_primitive; - - // Return final structurals - return op | start_primitive; -} - -// -// Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes. -// -// PERF NOTES: -// We pipe 2 inputs through these stages: -// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load -// 2 inputs' worth at once so that by the time step 2 is looking for them input, it's available. -// 2. Scan the JSON for critical data: strings, primitives and operators. This is the critical path. -// The output of step 1 depends entirely on this information. These functions don't quite use -// up enough CPU: the second half of the functions is highly serial, only using 1 execution core -// at a time. The second input's scans has some dependency on the first ones finishing it, but -// they can make a lot of progress before they need that information. -// 3. Step 1 doesn't use enough capacity, so we run some extra stuff while we're waiting for that -// to finish: utf-8 checks and generating the output from the last iteration. -// -// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all -// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough -// workout. -// -template<> -really_inline void json_structural_scanner::scan_step<128>(const uint8_t *buf, const size_t idx, utf8_checker &utf8_checker) { - // - // Load up all 128 bytes into SIMD registers - // - simd::simd8x64 in_1(buf); - simd::simd8x64 in_2(buf+64); - - // - // Find the strings and potential structurals (operators / primitives). - // - // This will include false structurals that are *inside* strings--we'll filter strings out - // before we return. - // - uint64_t string_1 = this->find_strings(in_1); - uint64_t structurals_1 = this->find_potential_structurals(in_1); - uint64_t string_2 = this->find_strings(in_2); - uint64_t structurals_2 = this->find_potential_structurals(in_2); - - // - // Do miscellaneous work while the processor is busy calculating strings and structurals. - // - // After that, weed out structurals that are inside strings and find invalid string characters. - // - uint64_t unescaped_1 = in_1.lteq(0x1F); - utf8_checker.check_next_input(in_1); - this->structural_indexes.write_indexes(idx-64, this->prev_structurals); // Output *last* iteration's structurals to ParsedJson - this->prev_structurals = structurals_1 & ~string_1; - this->unescaped_chars_error |= unescaped_1 & string_1; - - uint64_t unescaped_2 = in_2.lteq(0x1F); - utf8_checker.check_next_input(in_2); - this->structural_indexes.write_indexes(idx, this->prev_structurals); // Output *last* iteration's structurals to ParsedJson - this->prev_structurals = structurals_2 & ~string_2; - this->unescaped_chars_error |= unescaped_2 & string_2; -} - -// -// Find the important bits of JSON in a 64-byte chunk, and add them to structural_indexes. -// -template<> -really_inline void json_structural_scanner::scan_step<64>(const uint8_t *buf, const size_t idx, utf8_checker &utf8_checker) { - // - // Load up bytes into SIMD registers - // - simd::simd8x64 in_1(buf); - - // - // Find the strings and potential structurals (operators / primitives). - // - // This will include false structurals that are *inside* strings--we'll filter strings out - // before we return. - // - uint64_t string_1 = this->find_strings(in_1); - uint64_t structurals_1 = this->find_potential_structurals(in_1); - - // - // Do miscellaneous work while the processor is busy calculating strings and structurals. - // - // After that, weed out structurals that are inside strings and find invalid string characters. - // - uint64_t unescaped_1 = in_1.lteq(0x1F); - utf8_checker.check_next_input(in_1); - this->structural_indexes.write_indexes(idx-64, this->prev_structurals); // Output *last* iteration's structurals to ParsedJson - this->prev_structurals = structurals_1 & ~string_1; - this->unescaped_chars_error |= unescaped_1 & string_1; -} - -template -really_inline void json_structural_scanner::scan(const uint8_t *buf, const size_t len, utf8_checker &utf8_checker) { - size_t lenminusstep = len < STEP_SIZE ? 0 : len - STEP_SIZE; - size_t idx = 0; - - for (; idx < lenminusstep; idx += STEP_SIZE) { - this->scan_step(&buf[idx], idx, utf8_checker); - } - - /* If we have a final chunk of less than STEP_SIZE bytes, pad it to STEP_SIZE with - * spaces before processing it (otherwise, we risk invalidating the UTF-8 - * checks). */ - if (likely(idx < len)) { - uint8_t tmp_buf[STEP_SIZE]; - memset(tmp_buf, 0x20, STEP_SIZE); - memcpy(tmp_buf, buf + idx, len - idx); - this->scan_step(&tmp_buf[0], idx, utf8_checker); - idx += STEP_SIZE; - } - - /* finally, flatten out the remaining structurals from the last iteration */ - this->structural_indexes.write_indexes(idx-64, this->prev_structurals); -} - -// Setting the streaming parameter to true allows the find_structural_bits to tolerate unclosed strings. -// The caller should still ensure that the input is valid UTF-8. If you are processing substrings, -// you may want to call on a function like trimmed_length_safe_utf8. -template -int find_structural_bits(const uint8_t *buf, size_t len, simdjson::ParsedJson &pj, bool streaming) { - if (unlikely(len > pj.byte_capacity)) { - return simdjson::CAPACITY; - } - utf8_checker utf8_checker{}; - json_structural_scanner scanner{pj.structural_indexes.get()}; - scanner.scan(buf, len, utf8_checker); - // we might tolerate an unclosed string if streaming is true - simdjson::ErrorValues error = scanner.detect_errors_on_eof(streaming); - if (unlikely(error != simdjson::SUCCESS)) { - return error; - } - pj.n_structural_indexes = scanner.structural_indexes.tail - pj.structural_indexes.get(); - /* a valid JSON file cannot have zero structural indexes - we should have - * found something */ - if (unlikely(pj.n_structural_indexes == 0u)) { - return simdjson::EMPTY; - } - if (unlikely(pj.structural_indexes[pj.n_structural_indexes - 1] > len)) { - return simdjson::UNEXPECTED_ERROR; - } - if (len != pj.structural_indexes[pj.n_structural_indexes - 1]) { - /* the string might not be NULL terminated, but we add a virtual NULL - * ending character. */ - pj.structural_indexes[pj.n_structural_indexes++] = len; - } - /* make it safe to dereference one beyond this array */ - pj.structural_indexes[pj.n_structural_indexes] = 0; - return utf8_checker.errors(); -} - -} // namespace stage1 - -} // namespace simdjson::arm64 - -namespace simdjson { - -template <> -int find_structural_bits(const uint8_t *buf, size_t len, simdjson::ParsedJson &pj, bool streaming) { - return arm64::stage1::find_structural_bits<64>(buf, len, pj, streaming); -} - -} // namespace simdjson - -#endif // IS_ARM64 -#endif // SIMDJSON_ARM64_STAGE1_FIND_MARKS_H -/* end file src/arm64/stage1_find_marks.h */ -/* begin file src/haswell/stage1_find_marks.h */ -#ifndef SIMDJSON_HASWELL_STAGE1_FIND_MARKS_H -#define SIMDJSON_HASWELL_STAGE1_FIND_MARKS_H - +/* begin file src/westmere/bitmanipulation.h */ +#ifndef SIMDJSON_WESTMERE_BITMANIPULATION_H +#define SIMDJSON_WESTMERE_BITMANIPULATION_H #ifdef IS_X86_64 +/* westmere/intrinsics.h already included: #include "westmere/intrinsics.h" */ +TARGET_WESTMERE +namespace simdjson::westmere { -TARGET_HASWELL -namespace simdjson::haswell { - -using namespace simd; - -really_inline void find_whitespace_and_operators(simd8x64 in, uint64_t &whitespace, uint64_t &op) { - - // These lookups rely on the fact that anything < 127 will match the lower 4 bits, which is why - // we can't use the generic lookup_16. - auto whitespace_table = simd8::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100); - auto op_table = simd8::repeat_16(',', '}', 0, 0, 0xc0u, 0, 0, 0, 0, 0, 0, 0, 0, 0, ':', '{'); - - whitespace = in.map([&](simd8 _in) { - return _in == simd8(_mm256_shuffle_epi8(whitespace_table, _in)); - }).to_bitmask(); - - op = in.map([&](simd8 _in) { - // | 32 handles the fact that { } and [ ] are exactly 32 bytes apart - return (_in | 32) == simd8(_mm256_shuffle_epi8(op_table, _in-',')); - }).to_bitmask(); +#ifndef _MSC_VER +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +__attribute__((no_sanitize("undefined"))) // this is deliberate +#endif +/* result might be undefined when input_num is zero */ +really_inline int trailing_zeroes(uint64_t input_num) { +#ifdef _MSC_VER + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else + return __builtin_ctzll(input_num); +#endif// _MSC_VER } -really_inline bool is_ascii(simd8x64 input) { - simd8 bits = input.reduce([&](auto a,auto b) { return a|b; }); - return !bits.any_bits_set_anywhere(0b10000000u); +/* result might be undefined when input_num is zero */ +really_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); } -really_inline simd8 must_be_continuation(simd8 prev1, simd8 prev2, simd8 prev3) { - simd8 is_second_byte = prev1.saturating_sub(0b11000000u-1); // Only 11______ will be > 0 - simd8 is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0 - simd8 is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0 - // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. - return simd8(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0); +/* result might be undefined when input_num is zero */ +really_inline int leading_zeroes(uint64_t input_num) { +#ifdef _MSC_VER + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// _MSC_VER } -// -// Detect Unicode errors. -// -// UTF-8 is designed to allow multiple bytes and be compatible with ASCII. It's a fairly basic -// encoding that uses the first few bits on each byte to denote a "byte type", and all other bits -// are straight up concatenated into the final value. The first byte of a multibyte character is a -// "leading byte" and starts with N 1's, where N is the total number of bytes (110_____ = 2 byte -// lead). The remaining bytes of a multibyte character all start with 10. 1-byte characters just -// start with 0, because that's what ASCII looks like. Here's what each size -// -// - ASCII (7 bits): 0_______ -// - 2 byte character (11 bits): 110_____ 10______ -// - 3 byte character (17 bits): 1110____ 10______ 10______ -// - 4 byte character (23 bits): 11110___ 10______ 10______ 10______ -// - 5+ byte character (illegal): 11111___ -// -// There are 5 classes of error that can happen in Unicode: -// -// - TOO_SHORT: when you have a multibyte character with too few bytes (i.e. missing continuation). -// We detect this by looking for new characters (lead bytes) inside the range of a multibyte -// character. -// -// e.g. 11000000 01100001 (2-byte character where second byte is ASCII) -// -// - TOO_LONG: when there are more bytes in your character than you need (i.e. extra continuation). -// We detect this by requiring that the next byte after your multibyte character be a new -// character--so a continuation after your character is wrong. -// -// e.g. 11011111 10111111 10111111 (2-byte character followed by *another* continuation byte) -// -// - TOO_LARGE: Unicode only goes up to U+10FFFF. These characters are too large. -// -// e.g. 11110111 10111111 10111111 10111111 (bigger than 10FFFF). -// -// - OVERLONG: multibyte characters with a bunch of leading zeroes, where you could have -// used fewer bytes to make the same character. Like encoding an ASCII character in 4 bytes is -// technically possible, but UTF-8 disallows it so that there is only one way to write an "a". -// -// e.g. 11000001 10100001 (2-byte encoding of "a", which only requires 1 byte: 01100001) -// -// - SURROGATE: Unicode U+D800-U+DFFF is a *surrogate* character, reserved for use in UCS-2 and -// WTF-8 encodings for characters with > 2 bytes. These are illegal in pure UTF-8. -// -// e.g. 11101101 10100000 10000000 (U+D800) -// -// - INVALID_5_BYTE: 5-byte, 6-byte, 7-byte and 8-byte characters are unsupported; Unicode does not -// support values with more than 23 bits (which a 4-byte character supports). -// -// e.g. 11111000 10100000 10000000 10000000 10000000 (U+800000) -// -// Legal utf-8 byte sequences per http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf - page 94: -// -// Code Points 1st 2s 3s 4s -// U+0000..U+007F 00..7F -// U+0080..U+07FF C2..DF 80..BF -// U+0800..U+0FFF E0 A0..BF 80..BF -// U+1000..U+CFFF E1..EC 80..BF 80..BF -// U+D000..U+D7FF ED 80..9F 80..BF -// U+E000..U+FFFF EE..EF 80..BF 80..BF -// U+10000..U+3FFFF F0 90..BF 80..BF 80..BF -// U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF -// U+100000..U+10FFFF F4 80..8F 80..BF 80..BF -// -using namespace simd; - -namespace utf8_validation { - - // - // Find special case UTF-8 errors where the character is technically readable (has the right length) - // but the *value* is disallowed. - // - // This includes overlong encodings, surrogates and values too large for Unicode. - // - // It turns out the bad character ranges can all be detected by looking at the first 12 bits of the - // UTF-8 encoded character (i.e. all of byte 1, and the high 4 bits of byte 2). This algorithm does a - // 3 4-bit table lookups, identifying which errors that 4 bits could match, and then &'s them together. - // If all 3 lookups detect the same error, it's an error. - // - really_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { - // - // These are the errors we're going to match for bytes 1-2, by looking at the first three - // nibbles of the character: > & & - // - static const int OVERLONG_2 = 0x01; // 1100000_ 10______ (technically we match 10______ but we could match ________, they both yield errors either way) - static const int OVERLONG_3 = 0x02; // 11100000 100_____ ________ - static const int OVERLONG_4 = 0x04; // 11110000 1000____ ________ ________ - static const int SURROGATE = 0x08; // 11101101 [101_]____ - static const int TOO_LARGE = 0x10; // 11110100 (1001|101_)____ - static const int TOO_LARGE_2 = 0x20; // 1111(1___|011_|0101) 10______ - - // After processing the rest of byte 1 (the low bits), we're still not done--we have to check - // byte 2 to be sure which things are errors and which aren't. - // Since high_bits is byte 5, byte 2 is high_bits.prev<3> - static const int CARRY = OVERLONG_2 | TOO_LARGE_2; - const simd8 byte_2_high = input.shr<4>().lookup_16( - // ASCII: ________ [0___]____ - CARRY, CARRY, CARRY, CARRY, - // ASCII: ________ [0___]____ - CARRY, CARRY, CARRY, CARRY, - // Continuations: ________ [10__]____ - CARRY | OVERLONG_3 | OVERLONG_4, // ________ [1000]____ - CARRY | OVERLONG_3 | TOO_LARGE, // ________ [1001]____ - CARRY | TOO_LARGE | SURROGATE, // ________ [1010]____ - CARRY | TOO_LARGE | SURROGATE, // ________ [1011]____ - // Multibyte Leads: ________ [11__]____ - CARRY, CARRY, CARRY, CARRY - ); - - const simd8 byte_1_high = prev1.shr<4>().lookup_16( - // [0___]____ (ASCII) - 0, 0, 0, 0, - 0, 0, 0, 0, - // [10__]____ (continuation) - 0, 0, 0, 0, - // [11__]____ (2+-byte leads) - OVERLONG_2, 0, // [110_]____ (2-byte lead) - OVERLONG_3 | SURROGATE, // [1110]____ (3-byte lead) - OVERLONG_4 | TOO_LARGE | TOO_LARGE_2 // [1111]____ (4+-byte lead) - ); - - const simd8 byte_1_low = (prev1 & 0x0F).lookup_16( - // ____[00__] ________ - OVERLONG_2 | OVERLONG_3 | OVERLONG_4, // ____[0000] ________ - OVERLONG_2, // ____[0001] ________ - 0, 0, - // ____[01__] ________ - TOO_LARGE, // ____[0100] ________ - TOO_LARGE_2, - TOO_LARGE_2, - TOO_LARGE_2, - // ____[10__] ________ - TOO_LARGE_2, TOO_LARGE_2, TOO_LARGE_2, TOO_LARGE_2, - // ____[11__] ________ - TOO_LARGE_2, - TOO_LARGE_2 | SURROGATE, // ____[1101] ________ - TOO_LARGE_2, TOO_LARGE_2 - ); - - return byte_1_high & byte_1_low & byte_2_high; - } - - // - // Validate the length of multibyte characters (that each multibyte character has the right number - // of continuation characters, and that all continuation characters are part of a multibyte - // character). - // - // Algorithm - // ========= - // - // This algorithm compares *expected* continuation characters with *actual* continuation bytes, - // and emits an error anytime there is a mismatch. - // - // For example, in the string "𝄞₿֏ab", which has a 4-, 3-, 2- and 1-byte - // characters, the file will look like this: - // - // | Character | 𝄞 | | | | ₿ | | | ֏ | | a | b | - // |-----------------------|----|----|----|----|----|----|----|----|----|----|----| - // | Character Length | 4 | | | | 3 | | | 2 | | 1 | 1 | - // | Byte | F0 | 9D | 84 | 9E | E2 | 82 | BF | D6 | 8F | 61 | 62 | - // | is_second_byte | | X | | | | X | | | X | | | - // | is_third_byte | | | X | | | | X | | | | | - // | is_fourth_byte | | | | X | | | | | | | | - // | expected_continuation | | X | X | X | | X | X | | X | | | - // | is_continuation | | X | X | X | | X | X | | X | | | - // - // The errors here are basically (Second Byte OR Third Byte OR Fourth Byte == Continuation): - // - // - **Extra Continuations:** Any continuation that is not a second, third or fourth byte is not - // part of a valid 2-, 3- or 4-byte character and is thus an error. It could be that it's just - // floating around extra outside of any character, or that there is an illegal 5-byte character, - // or maybe it's at the beginning of the file before any characters have started; but it's an - // error in all these cases. - // - **Missing Continuations:** Any second, third or fourth byte that *isn't* a continuation is an error, because that means - // we started a new character before we were finished with the current one. - // - // Getting the Previous Bytes - // -------------------------- - // - // Because we want to know if a byte is the *second* (or third, or fourth) byte of a multibyte - // character, we need to "shift the bytes" to find that out. This is what they mean: - // - // - `is_continuation`: if the current byte is a continuation. - // - `is_second_byte`: if 1 byte back is the start of a 2-, 3- or 4-byte character. - // - `is_third_byte`: if 2 bytes back is the start of a 3- or 4-byte character. - // - `is_fourth_byte`: if 3 bytes back is the start of a 4-byte character. - // - // We use shuffles to go n bytes back, selecting part of the current `input` and part of the - // `prev_input` (search for `.prev<1>`, `.prev<2>`, etc.). These are passed in by the caller - // function, because the 1-byte-back data is used by other checks as well. - // - // Getting the Continuation Mask - // ----------------------------- - // - // Once we have the right bytes, we have to get the masks. To do this, we treat UTF-8 bytes as - // numbers, using signed `<` and `>` operations to check if they are continuations or leads. - // In fact, we treat the numbers as *signed*, partly because it helps us, and partly because - // Intel's SIMD presently only offers signed `<` and `>` operations (not unsigned ones). - // - // In UTF-8, bytes that start with the bits 110, 1110 and 11110 are 2-, 3- and 4-byte "leads," - // respectively, meaning they expect to have 1, 2 and 3 "continuation bytes" after them. - // Continuation bytes start with 10, and ASCII (1-byte characters) starts with 0. - // - // When treated as signed numbers, they look like this: - // - // | Type | High Bits | Binary Range | Signed | - // |--------------|------------|--------------|--------| - // | ASCII | `0` | `01111111` | 127 | - // | | | `00000000` | 0 | - // | 4+-Byte Lead | `1111` | `11111111` | -1 | - // | | | `11110000 | -16 | - // | 3-Byte Lead | `1110` | `11101111` | -17 | - // | | | `11100000 | -32 | - // | 2-Byte Lead | `110` | `11011111` | -33 | - // | | | `11000000 | -64 | - // | Continuation | `10` | `10111111` | -65 | - // | | | `10000000 | -128 | - // - // This makes it pretty easy to get the continuation mask! It's just a single comparison: - // - // ``` - // is_continuation = input < -64` - // ``` - // - // We can do something similar for the others, but it takes two comparisons instead of one: "is - // the start of a 4-byte character" is `< -32` and `> -65`, for example. And 2+ bytes is `< 0` and - // `> -64`. Surely we can do better, they're right next to each other! - // - // Getting the is_xxx Masks: Shifting the Range - // -------------------------------------------- - // - // Notice *why* continuations were a single comparison. The actual *range* would require two - // comparisons--`< -64` and `> -129`--but all characters are always greater than -128, so we get - // that for free. In fact, if we had *unsigned* comparisons, 2+, 3+ and 4+ comparisons would be - // just as easy: 4+ would be `> 239`, 3+ would be `> 223`, and 2+ would be `> 191`. - // - // Instead, we add 128 to each byte, shifting the range up to make comparison easy. This wraps - // ASCII down into the negative, and puts 4+-Byte Lead at the top: - // - // | Type | High Bits | Binary Range | Signed | - // |----------------------|------------|--------------|-------| - // | 4+-Byte Lead (+ 127) | `0111` | `01111111` | 127 | - // | | | `01110000 | 112 | - // |----------------------|------------|--------------|-------| - // | 3-Byte Lead (+ 127) | `0110` | `01101111` | 111 | - // | | | `01100000 | 96 | - // |----------------------|------------|--------------|-------| - // | 2-Byte Lead (+ 127) | `010` | `01011111` | 95 | - // | | | `01000000 | 64 | - // |----------------------|------------|--------------|-------| - // | Continuation (+ 127) | `00` | `00111111` | 63 | - // | | | `00000000 | 0 | - // |----------------------|------------|--------------|-------| - // | ASCII (+ 127) | `1` | `11111111` | -1 | - // | | | `10000000` | -128 | - // |----------------------|------------|--------------|-------| - // - // *Now* we can use signed `>` on all of them: - // - // ``` - // prev1 = input.prev<1> - // prev2 = input.prev<2> - // prev3 = input.prev<3> - // prev1_flipped = input.prev<1>(prev_input) ^ 0x80; // Same as `+ 128` - // prev2_flipped = input.prev<2>(prev_input) ^ 0x80; // Same as `+ 128` - // prev3_flipped = input.prev<3>(prev_input) ^ 0x80; // Same as `+ 128` - // is_second_byte = prev1_flipped > 63; // 2+-byte lead - // is_third_byte = prev2_flipped > 95; // 3+-byte lead - // is_fourth_byte = prev3_flipped > 111; // 4+-byte lead - // ``` - // - // NOTE: we use `^ 0x80` instead of `+ 128` in the code, which accomplishes the same thing, and even takes the same number - // of cycles as `+`, but on many Intel architectures can be parallelized better (you can do 3 - // `^`'s at a time on Haswell, but only 2 `+`'s). - // - // That doesn't look like it saved us any instructions, did it? Well, because we're adding the - // same number to all of them, we can save one of those `+ 128` operations by assembling - // `prev2_flipped` out of prev 1 and prev 3 instead of assembling it from input and adding 128 - // to it. One more instruction saved! - // - // ``` - // prev1 = input.prev<1> - // prev3 = input.prev<3> - // prev1_flipped = prev1 ^ 0x80; // Same as `+ 128` - // prev3_flipped = prev3 ^ 0x80; // Same as `+ 128` - // prev2_flipped = prev1_flipped.concat<2>(prev3_flipped): // | C -> ^ D, or - // you do B | C -> | A -> ^ D. But addition and subtraction *are* associative: (A + B + C) - D can - // be written as `(A + B) + (C - D)`. This means you can do A + B and C - D at the same time, and - // then adds the result together. Same number of operations, but if the processor can run - // independent things in parallel (which most can), it runs faster. - // - // This doesn't help us on Intel, but might help us elsewhere: on Haswell, at least, | and ^ have - // a super nice advantage in that more of them can be run at the same time (they can run on 3 - // ports, while + and - can run on 2)! This means that we can do A | B while we're still doing C, - // saving us the cycle we would have earned by using +. Even more, using an instruction with a - // wider array of ports can help *other* code run ahead, too, since these instructions can "get - // out of the way," running on a port other instructions can't. - // - // Epilogue II: One More Trick - // --------------------------- - // - // There's one more relevant trick up our sleeve, it turns out: it turns out on Intel we can "pay - // for" the (prev<1> + 128) instruction, because it can be used to save an instruction in - // check_special_cases()--but we'll talk about that there :) - // - really_inline simd8 check_multibyte_lengths(simd8 input, simd8 prev_input, simd8 prev1) { - simd8 prev2 = input.prev<2>(prev_input); - simd8 prev3 = input.prev<3>(prev_input); - - // Cont is 10000000-101111111 (-65...-128) - simd8 is_continuation = simd8(input) < int8_t(-64); - // must_be_continuation is architecture-specific because Intel doesn't have unsigned comparisons - return simd8(must_be_continuation(prev1, prev2, prev3) ^ is_continuation); - } - - // - // Return nonzero if there are incomplete multibyte characters at the end of the block: - // e.g. if there is a 4-byte character, but it's 3 bytes from the end. - // - really_inline simd8 is_incomplete(simd8 input) { - // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): - // ... 1111____ 111_____ 11______ - static const uint8_t max_array[32] = { - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 0b11110000u-1, 0b11100000u-1, 0b11000000u-1 - }; - const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]); - return input.gt_bits(max_value); - } - - struct utf8_checker { - // If this is nonzero, there has been a UTF-8 error. - simd8 error; - // The last input we received - simd8 prev_input_block; - // Whether the last input we received was incomplete (used for ASCII fast path) - simd8 prev_incomplete; - - // - // Check whether the current bytes are valid UTF-8. - // - really_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { - // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes - // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) - simd8 prev1 = input.prev<1>(prev_input); - this->error |= check_special_cases(input, prev1); - this->error |= check_multibyte_lengths(input, prev_input, prev1); - } - - // The only problem that can happen at EOF is that a multibyte character is too short. - really_inline void check_eof() { - // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't - // possibly finish them. - this->error |= this->prev_incomplete; - } - - really_inline void check_next_input(simd8x64 input) { - if (likely(is_ascii(input))) { - // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't - // possibly finish them. - this->error |= this->prev_incomplete; - } else { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - for (int i=1; i::NUM_CHUNKS; i++) { - this->check_utf8_bytes(input.chunks[i], input.chunks[i-1]); - } - this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]); - this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1]; - } - } - - really_inline ErrorValues errors() { - return this->error.any_bits_set_anywhere() ? simdjson::UTF8_ERROR : simdjson::SUCCESS; - } - - }; // struct utf8_checker +really_inline int hamming(uint64_t input_num) { +#ifdef _MSC_VER + // note: we do not support legacy 32-bit Windows + return __popcnt64(input_num);// Visual Studio wants two underscores +#else + return _popcnt64(input_num); +#endif } -using utf8_validation::utf8_checker; -// This file contains the common code every implementation uses in stage1 -// It is intended to be included multiple times and compiled multiple times -// We assume the file in which it is included already includes -// "simdjson/stage1_find_marks.h" (this simplifies amalgation) - -namespace stage1 { - -class bit_indexer { -public: - uint32_t *tail; - - bit_indexer(uint32_t *index_buf) : tail(index_buf) {} - - // flatten out values in 'bits' assuming that they are are to have values of idx - // plus their position in the bitvector, and store these indexes at - // base_ptr[base] incrementing base as we go - // will potentially store extra values beyond end of valid bits, so base_ptr - // needs to be large enough to handle this - really_inline void write_indexes(uint32_t idx, uint64_t bits) { - // In some instances, the next branch is expensive because it is mispredicted. - // Unfortunately, in other cases, - // it helps tremendously. - if (bits == 0) - return; - uint32_t cnt = hamming(bits); - - // Do the first 8 all together - for (int i=0; i<8; i++) { - this->tail[i] = idx + trailing_zeroes(bits); - bits = clear_lowest_bit(bits); - } - - // Do the next 8 all together (we hope in most cases it won't happen at all - // and the branch is easily predicted). - if (unlikely(cnt > 8)) { - for (int i=8; i<16; i++) { - this->tail[i] = idx + trailing_zeroes(bits); - bits = clear_lowest_bit(bits); - } - - // Most files don't have 16+ structurals per block, so we take several basically guaranteed - // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) - // or the start of a value ("abc" true 123) every four characters. - if (unlikely(cnt > 16)) { - uint32_t i = 16; - do { - this->tail[i] = idx + trailing_zeroes(bits); - bits = clear_lowest_bit(bits); - i++; - } while (i < cnt); - } - } - - this->tail += cnt; - } -}; - -class json_structural_scanner { -public: - // Whether the first character of the next iteration is escaped. - uint64_t prev_escaped = 0ULL; - // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). - uint64_t prev_in_string = 0ULL; - // Whether the last character of the previous iteration is a primitive value character - // (anything except whitespace, braces, comma or colon). - uint64_t prev_primitive = 0ULL; - // Mask of structural characters from the last iteration. - // Kept around for performance reasons, so we can call flatten_bits to soak up some unused - // CPU capacity while the next iteration is busy with an expensive clmul in compute_quote_mask. - uint64_t prev_structurals = 0; - // Errors with unescaped characters in strings (ASCII codepoints < 0x20) - uint64_t unescaped_chars_error = 0; - bit_indexer structural_indexes; - - json_structural_scanner(uint32_t *_structural_indexes) : structural_indexes{_structural_indexes} {} - - // - // Finish the scan and return any errors. - // - // This may detect errors as well, such as unclosed string and certain UTF-8 errors. - // if streaming is set to true, an unclosed string is allowed. - // - really_inline ErrorValues detect_errors_on_eof(bool streaming = false); - - // - // Return a mask of all string characters plus end quotes. - // - // prev_escaped is overflow saying whether the next character is escaped. - // prev_in_string is overflow saying whether we're still in a string. - // - // Backslash sequences outside of quotes will be detected in stage 2. - // - really_inline uint64_t find_strings(const simd::simd8x64 in); - - // - // Determine which characters are *structural*: - // - braces: [] and {} - // - the start of primitives (123, true, false, null) - // - the start of invalid non-whitespace (+, &, ture, UTF-8) - // - // Also detects value sequence errors: - // - two values with no separator between ("hello" "world") - // - separators with no values ([1,] [1,,]and [,2]) - // - // This method will find all of the above whether it is in a string or not. - // - // To reduce dependency on the expensive "what is in a string" computation, this method treats the - // contents of a string the same as content outside. Errors and structurals inside the string or on - // the trailing quote will need to be removed later when the correct string information is known. - // - really_inline uint64_t find_potential_structurals(const simd::simd8x64 in); - - // - // Find the important bits of JSON in a STEP_SIZE-byte chunk, and add them to structural_indexes. - // - template - really_inline void scan_step(const uint8_t *buf, const size_t idx, utf8_checker &utf8_checker); - - // - // Parse the entire input in STEP_SIZE-byte chunks. - // - template - really_inline void scan(const uint8_t *buf, const size_t len, utf8_checker &utf8_checker); -}; - -// Routines to print masks and text for debugging bitmask operations -UNUSED static char * format_input_text(const simd8x64 in) { - static char *buf = (char*)malloc(sizeof(simd8x64) + 1); - in.store((uint8_t*)buf); - for (size_t i=0; i); i++) { - if (buf[i] < ' ') { buf[i] = '_'; } - } - buf[sizeof(simd8x64)] = '\0'; - return buf; +really_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#ifdef _MSC_VER + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + (unsigned long long *)result); +#endif } -UNUSED static char * format_mask(uint64_t mask) { - static char *buf = (char*)malloc(64 + 1); - for (size_t i=0; i<64; i++) { - buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; - } - buf[64] = '\0'; - return buf; +#ifdef _MSC_VER +#pragma intrinsic(_umul128) +#endif +really_inline bool mul_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#ifdef _MSC_VER + uint64_t high; + *result = _umul128(value1, value2, &high); + return high; +#else + return __builtin_umulll_overflow(value1, value2, + (unsigned long long *)result); +#endif } -// -// Finds escaped characters (characters following \). -// -// Handles runs of backslashes like \\\" and \\\\" correctly (yielding 0101 and 01010, respectively). -// -// Does this by: -// - Shift the escape mask to get potentially escaped characters (characters after backslashes). -// - Mask escaped sequences that start on *even* bits with 1010101010 (odd bits are escaped, even bits are not) -// - Mask escaped sequences that start on *odd* bits with 0101010101 (even bits are escaped, odd bits are not) -// -// To distinguish between escaped sequences starting on even/odd bits, it finds the start of all -// escape sequences, filters out the ones that start on even bits, and adds that to the mask of -// escape sequences. This causes the addition to clear out the sequences starting on odd bits (since -// the start bit causes a carry), and leaves even-bit sequences alone. -// -// Example: -// -// text | \\\ | \\\"\\\" \\\" \\"\\" | -// escape | xxx | xx xxx xxx xx xx | Removed overflow backslash; will | it into follows_escape -// odd_starts | x | x x x | escape & ~even_bits & ~follows_escape -// even_seq | c| cxxx c xx c | c = carry bit -- will be masked out later -// invert_mask | | cxxx c xx c| even_seq << 1 -// follows_escape | xx | x xx xxx xxx xx xx | Includes overflow bit -// escaped | x | x x x x x x x x | -// desired | x | x x x x x x x x | -// text | \\\ | \\\"\\\" \\\" \\"\\" | -// -really_inline uint64_t find_escaped(uint64_t escape, uint64_t &escaped_overflow) { - // If there was overflow, pretend the first character isn't a backslash - escape &= ~escaped_overflow; - uint64_t follows_escape = escape << 1 | escaped_overflow; - - // Get sequences starting on even bits by clearing out the odd series using + - const uint64_t even_bits = 0x5555555555555555ULL; - uint64_t odd_sequence_starts = escape & ~even_bits & ~follows_escape; - uint64_t sequences_starting_on_even_bits; - escaped_overflow = add_overflow(odd_sequence_starts, escape, &sequences_starting_on_even_bits); - uint64_t invert_mask = sequences_starting_on_even_bits << 1; // The mask we want to return is the *escaped* bits, not escapes. - - // Mask every other backslashed character as an escaped character - // Flip the mask for sequences that start on even bits, to correct them - return (even_bits ^ invert_mask) & follows_escape; -} - -// -// Check if the current character immediately follows a matching character. -// -// For example, this checks for quotes with backslashes in front of them: -// -// const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); -// -really_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { - const uint64_t result = match << 1 | overflow; - overflow = match >> 63; - return result; -} - -// -// Check if the current character follows a matching character, with possible "filler" between. -// For example, this checks for empty curly braces, e.g. -// -// in.eq('}') & follows(in.eq('['), in.eq(' '), prev_empty_array) // { * } -// -really_inline uint64_t follows(const uint64_t match, const uint64_t filler, uint64_t &overflow) { - uint64_t follows_match = follows(match, overflow); - uint64_t result; - overflow |= add_overflow(follows_match, filler, &result); - return result; -} - -really_inline ErrorValues json_structural_scanner::detect_errors_on_eof(bool streaming) { - if ((prev_in_string) and (not streaming)) { - return UNCLOSED_STRING; - } - if (unescaped_chars_error) { - return UNESCAPED_CHARS; - } - return SUCCESS; -} - -// -// Return a mask of all string characters plus end quotes. -// -// prev_escaped is overflow saying whether the next character is escaped. -// prev_in_string is overflow saying whether we're still in a string. -// -// Backslash sequences outside of quotes will be detected in stage 2. -// -really_inline uint64_t json_structural_scanner::find_strings(const simd::simd8x64 in) { - const uint64_t backslash = in.eq('\\'); - const uint64_t escaped = find_escaped(backslash, prev_escaped); - const uint64_t quote = in.eq('"') & ~escaped; - // prefix_xor flips on bits inside the string (and flips off the end quote). - const uint64_t in_string = prefix_xor(quote) ^ prev_in_string; - /* right shift of a signed value expected to be well-defined and standard - * compliant as of C++20, - * John Regher from Utah U. says this is fine code */ - prev_in_string = static_cast(static_cast(in_string) >> 63); - // Use ^ to turn the beginning quote off, and the end quote on. - return in_string ^ quote; -} - -// -// Determine which characters are *structural*: -// - braces: [] and {} -// - the start of primitives (123, true, false, null) -// - the start of invalid non-whitespace (+, &, ture, UTF-8) -// -// Also detects value sequence errors: -// - two values with no separator between ("hello" "world") -// - separators with no values ([1,] [1,,]and [,2]) -// -// This method will find all of the above whether it is in a string or not. -// -// To reduce dependency on the expensive "what is in a string" computation, this method treats the -// contents of a string the same as content outside. Errors and structurals inside the string or on -// the trailing quote will need to be removed later when the correct string information is known. -// -really_inline uint64_t json_structural_scanner::find_potential_structurals(const simd::simd8x64 in) { - // These use SIMD so let's kick them off before running the regular 64-bit stuff ... - uint64_t whitespace, op; - find_whitespace_and_operators(in, whitespace, op); - - // Detect the start of a run of primitive characters. Includes numbers, booleans, and strings ("). - // Everything except whitespace, braces, colon and comma. - const uint64_t primitive = ~(op | whitespace); - const uint64_t follows_primitive = follows(primitive, prev_primitive); - const uint64_t start_primitive = primitive & ~follows_primitive; - - // Return final structurals - return op | start_primitive; -} - -// -// Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes. -// -// PERF NOTES: -// We pipe 2 inputs through these stages: -// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load -// 2 inputs' worth at once so that by the time step 2 is looking for them input, it's available. -// 2. Scan the JSON for critical data: strings, primitives and operators. This is the critical path. -// The output of step 1 depends entirely on this information. These functions don't quite use -// up enough CPU: the second half of the functions is highly serial, only using 1 execution core -// at a time. The second input's scans has some dependency on the first ones finishing it, but -// they can make a lot of progress before they need that information. -// 3. Step 1 doesn't use enough capacity, so we run some extra stuff while we're waiting for that -// to finish: utf-8 checks and generating the output from the last iteration. -// -// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all -// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough -// workout. -// -template<> -really_inline void json_structural_scanner::scan_step<128>(const uint8_t *buf, const size_t idx, utf8_checker &utf8_checker) { - // - // Load up all 128 bytes into SIMD registers - // - simd::simd8x64 in_1(buf); - simd::simd8x64 in_2(buf+64); - - // - // Find the strings and potential structurals (operators / primitives). - // - // This will include false structurals that are *inside* strings--we'll filter strings out - // before we return. - // - uint64_t string_1 = this->find_strings(in_1); - uint64_t structurals_1 = this->find_potential_structurals(in_1); - uint64_t string_2 = this->find_strings(in_2); - uint64_t structurals_2 = this->find_potential_structurals(in_2); - - // - // Do miscellaneous work while the processor is busy calculating strings and structurals. - // - // After that, weed out structurals that are inside strings and find invalid string characters. - // - uint64_t unescaped_1 = in_1.lteq(0x1F); - utf8_checker.check_next_input(in_1); - this->structural_indexes.write_indexes(idx-64, this->prev_structurals); // Output *last* iteration's structurals to ParsedJson - this->prev_structurals = structurals_1 & ~string_1; - this->unescaped_chars_error |= unescaped_1 & string_1; - - uint64_t unescaped_2 = in_2.lteq(0x1F); - utf8_checker.check_next_input(in_2); - this->structural_indexes.write_indexes(idx, this->prev_structurals); // Output *last* iteration's structurals to ParsedJson - this->prev_structurals = structurals_2 & ~string_2; - this->unescaped_chars_error |= unescaped_2 & string_2; -} - -// -// Find the important bits of JSON in a 64-byte chunk, and add them to structural_indexes. -// -template<> -really_inline void json_structural_scanner::scan_step<64>(const uint8_t *buf, const size_t idx, utf8_checker &utf8_checker) { - // - // Load up bytes into SIMD registers - // - simd::simd8x64 in_1(buf); - - // - // Find the strings and potential structurals (operators / primitives). - // - // This will include false structurals that are *inside* strings--we'll filter strings out - // before we return. - // - uint64_t string_1 = this->find_strings(in_1); - uint64_t structurals_1 = this->find_potential_structurals(in_1); - - // - // Do miscellaneous work while the processor is busy calculating strings and structurals. - // - // After that, weed out structurals that are inside strings and find invalid string characters. - // - uint64_t unescaped_1 = in_1.lteq(0x1F); - utf8_checker.check_next_input(in_1); - this->structural_indexes.write_indexes(idx-64, this->prev_structurals); // Output *last* iteration's structurals to ParsedJson - this->prev_structurals = structurals_1 & ~string_1; - this->unescaped_chars_error |= unescaped_1 & string_1; -} - -template -really_inline void json_structural_scanner::scan(const uint8_t *buf, const size_t len, utf8_checker &utf8_checker) { - size_t lenminusstep = len < STEP_SIZE ? 0 : len - STEP_SIZE; - size_t idx = 0; - - for (; idx < lenminusstep; idx += STEP_SIZE) { - this->scan_step(&buf[idx], idx, utf8_checker); - } - - /* If we have a final chunk of less than STEP_SIZE bytes, pad it to STEP_SIZE with - * spaces before processing it (otherwise, we risk invalidating the UTF-8 - * checks). */ - if (likely(idx < len)) { - uint8_t tmp_buf[STEP_SIZE]; - memset(tmp_buf, 0x20, STEP_SIZE); - memcpy(tmp_buf, buf + idx, len - idx); - this->scan_step(&tmp_buf[0], idx, utf8_checker); - idx += STEP_SIZE; - } - - /* finally, flatten out the remaining structurals from the last iteration */ - this->structural_indexes.write_indexes(idx-64, this->prev_structurals); -} - -// Setting the streaming parameter to true allows the find_structural_bits to tolerate unclosed strings. -// The caller should still ensure that the input is valid UTF-8. If you are processing substrings, -// you may want to call on a function like trimmed_length_safe_utf8. -template -int find_structural_bits(const uint8_t *buf, size_t len, simdjson::ParsedJson &pj, bool streaming) { - if (unlikely(len > pj.byte_capacity)) { - return simdjson::CAPACITY; - } - utf8_checker utf8_checker{}; - json_structural_scanner scanner{pj.structural_indexes.get()}; - scanner.scan(buf, len, utf8_checker); - // we might tolerate an unclosed string if streaming is true - simdjson::ErrorValues error = scanner.detect_errors_on_eof(streaming); - if (unlikely(error != simdjson::SUCCESS)) { - return error; - } - pj.n_structural_indexes = scanner.structural_indexes.tail - pj.structural_indexes.get(); - /* a valid JSON file cannot have zero structural indexes - we should have - * found something */ - if (unlikely(pj.n_structural_indexes == 0u)) { - return simdjson::EMPTY; - } - if (unlikely(pj.structural_indexes[pj.n_structural_indexes - 1] > len)) { - return simdjson::UNEXPECTED_ERROR; - } - if (len != pj.structural_indexes[pj.n_structural_indexes - 1]) { - /* the string might not be NULL terminated, but we add a virtual NULL - * ending character. */ - pj.structural_indexes[pj.n_structural_indexes++] = len; - } - /* make it safe to dereference one beyond this array */ - pj.structural_indexes[pj.n_structural_indexes] = 0; - return utf8_checker.errors(); -} - -} // namespace stage1 - -} // namespace haswell +}// namespace simdjson::westmere UNTARGET_REGION -TARGET_HASWELL -namespace simdjson { - -template <> -int find_structural_bits(const uint8_t *buf, size_t len, simdjson::ParsedJson &pj, bool streaming) { - return haswell::stage1::find_structural_bits<128>(buf, len, pj, streaming); -} - -} // namespace simdjson -UNTARGET_REGION - -#endif // IS_X86_64 -#endif // SIMDJSON_HASWELL_STAGE1_FIND_MARKS_H -/* end file src/haswell/stage1_find_marks.h */ -/* begin file src/westmere/stage1_find_marks.h */ -#ifndef SIMDJSON_WESTMERE_STAGE1_FIND_MARKS_H -#define SIMDJSON_WESTMERE_STAGE1_FIND_MARKS_H - - -#ifdef IS_X86_64 - +#endif +#endif // SIMDJSON_WESTMERE_BITMANIPULATION_H +/* end file src/westmere/bitmanipulation.h */ TARGET_WESTMERE namespace simdjson::westmere { @@ -5832,6 +4040,7 @@ really_inline simd8 must_be_continuation(simd8 prev1, simd8(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0); } +/* begin file src/generic/utf8_lookup2_algorithm.h */ // // Detect Unicode errors. // @@ -6257,6 +4466,8 @@ namespace utf8_validation { } using utf8_validation::utf8_checker; +/* end file src/generic/utf8_lookup2_algorithm.h */ +/* begin file src/generic/stage1_find_marks.h */ // This file contains the common code every implementation uses in stage1 // It is intended to be included multiple times and compiled multiple times // We assume the file in which it is included already includes @@ -6682,6 +4893,7 @@ int find_structural_bits(const uint8_t *buf, size_t len, simdjson::ParsedJson &p } } // namespace stage1 +/* end file src/generic/stage1_find_marks.h */ } // namespace westmere UNTARGET_REGION @@ -6699,576 +4911,13 @@ UNTARGET_REGION #endif // IS_X86_64 #endif // SIMDJSON_WESTMERE_STAGE1_FIND_MARKS_H -/* end file src/westmere/stage1_find_marks.h */ -/* begin file src/stage1_find_marks.cpp */ -/* end file src/stage1_find_marks.cpp */ -/* begin file src/arm64/stringparsing.h */ -#ifndef SIMDJSON_ARM64_STRINGPARSING_H -#define SIMDJSON_ARM64_STRINGPARSING_H - - -#ifdef IS_ARM64 - - -namespace simdjson::arm64 { - -using namespace simd; - -// Holds backslashes and quotes locations. -struct parse_string_helper { - uint32_t bs_bits; - uint32_t quote_bits; - static const uint32_t BYTES_PROCESSED = 32; -}; - -really_inline parse_string_helper find_bs_bits_and_quote_bits(const uint8_t *src, uint8_t *dst) { - // this can read up to 31 bytes beyond the buffer size, but we require - // SIMDJSON_PADDING of padding - static_assert(SIMDJSON_PADDING >= (parse_string_helper::BYTES_PROCESSED - 1)); - simd8 v0(src); - simd8 v1(src + sizeof(v0)); - v0.store(dst); - v1.store(dst + sizeof(v0)); - - // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on ARM; therefore, we - // smash them together into a 64-byte mask and get the bitmask from there. - uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); - return { - static_cast(bs_and_quote), // bs_bits - static_cast(bs_and_quote >> 32) // quote_bits - }; -} - -// This file contains the common code every implementation uses -// It is intended to be included multiple times and compiled multiple times -// We assume the file in which it is include already includes -// "stringparsing.h" (this simplifies amalgation) - -// begin copypasta -// These chars yield themselves: " \ / -// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab -// u not handled in this table as it's complex -static const uint8_t escape_map[256] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. - 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. - 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -// handle a unicode codepoint -// write appropriate values into dest -// src will advance 6 bytes or 12 bytes -// dest will advance a variable amount (return via pointer) -// return true if the unicode codepoint was valid -// We work in little-endian then swap at write time -WARN_UNUSED -really_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, - uint8_t **dst_ptr) { - // hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the - // conversion isn't valid; we defer the check for this to inside the - // multilingual plane check - uint32_t code_point = hex_to_u32_nocheck(*src_ptr + 2); - *src_ptr += 6; - // check for low surrogate for characters outside the Basic - // Multilingual Plane. - if (code_point >= 0xd800 && code_point < 0xdc00) { - if (((*src_ptr)[0] != '\\') || (*src_ptr)[1] != 'u') { - return false; - } - uint32_t code_point_2 = hex_to_u32_nocheck(*src_ptr + 2); - - // if the first code point is invalid we will get here, as we will go past - // the check for being outside the Basic Multilingual plane. If we don't - // find a \u immediately afterwards we fail out anyhow, but if we do, - // this check catches both the case of the first code point being invalid - // or the second code point being invalid. - if ((code_point | code_point_2) >> 16) { - return false; - } - - code_point = - (((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000; - *src_ptr += 6; - } - size_t offset = codepoint_to_utf8(code_point, *dst_ptr); - *dst_ptr += offset; - return offset > 0; -} - -WARN_UNUSED really_inline bool parse_string(UNUSED const uint8_t *buf, - UNUSED size_t len, ParsedJson &pj, - UNUSED const uint32_t depth, - UNUSED uint32_t offset) { - pj.write_tape(pj.current_string_buf_loc - pj.string_buf.get(), '"'); - const uint8_t *src = &buf[offset + 1]; /* we know that buf at offset is a " */ - uint8_t *dst = pj.current_string_buf_loc + sizeof(uint32_t); - const uint8_t *const start_of_string = dst; - while (1) { - parse_string_helper helper = find_bs_bits_and_quote_bits(src, dst); - if (((helper.bs_bits - 1) & helper.quote_bits) != 0) { - /* we encountered quotes first. Move dst to point to quotes and exit - */ - - /* find out where the quote is... */ - auto quote_dist = trailing_zeroes(helper.quote_bits); - - /* NULL termination is still handy if you expect all your strings to - * be NULL terminated? */ - /* It comes at a small cost */ - dst[quote_dist] = 0; - - uint32_t str_length = (dst - start_of_string) + quote_dist; - memcpy(pj.current_string_buf_loc, &str_length, sizeof(str_length)); - /***************************** - * Above, check for overflow in case someone has a crazy string - * (>=4GB?) _ - * But only add the overflow check when the document itself exceeds - * 4GB - * Currently unneeded because we refuse to parse docs larger or equal - * to 4GB. - ****************************/ - - /* we advance the point, accounting for the fact that we have a NULL - * termination */ - pj.current_string_buf_loc = dst + quote_dist + 1; - return true; - } - if (((helper.quote_bits - 1) & helper.bs_bits) != 0) { - /* find out where the backspace is */ - auto bs_dist = trailing_zeroes(helper.bs_bits); - uint8_t escape_char = src[bs_dist + 1]; - /* we encountered backslash first. Handle backslash */ - if (escape_char == 'u') { - /* move src/dst up to the start; they will be further adjusted - within the unicode codepoint handling code. */ - src += bs_dist; - dst += bs_dist; - if (!handle_unicode_codepoint(&src, &dst)) { - return false; - } - } else { - /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and - * write bs_dist+1 characters to output - * note this may reach beyond the part of the buffer we've actually - * seen. I think this is ok */ - uint8_t escape_result = escape_map[escape_char]; - if (escape_result == 0u) { - return false; /* bogus escape value is an error */ - } - dst[bs_dist] = escape_result; - src += bs_dist + 2; - dst += bs_dist + 1; - } - } else { - /* they are the same. Since they can't co-occur, it means we - * encountered neither. */ - src += parse_string_helper::BYTES_PROCESSED; - dst += parse_string_helper::BYTES_PROCESSED; - } - } - /* can't be reached */ - return true; -} - -} -// namespace simdjson::amd64 - -#endif // IS_ARM64 -#endif -/* end file src/arm64/stringparsing.h */ -/* begin file src/haswell/stringparsing.h */ -#ifndef SIMDJSON_HASWELL_STRINGPARSING_H -#define SIMDJSON_HASWELL_STRINGPARSING_H - - -#ifdef IS_X86_64 - - -TARGET_HASWELL -namespace simdjson::haswell { - -using namespace simd; - -// Holds backslashes and quotes locations. -struct parse_string_helper { - uint32_t bs_bits; - uint32_t quote_bits; - static const uint32_t BYTES_PROCESSED = 32; -}; - -really_inline parse_string_helper find_bs_bits_and_quote_bits(const uint8_t *src, uint8_t *dst) { - // this can read up to 15 bytes beyond the buffer size, but we require - // SIMDJSON_PADDING of padding - static_assert(SIMDJSON_PADDING >= (parse_string_helper::BYTES_PROCESSED - 1)); - simd8 v(src); - // store to dest unconditionally - we can overwrite the bits we don't like later - v.store(dst); - return { - (uint32_t)(v == '\\').to_bitmask(), // bs_bits - (uint32_t)(v == '"').to_bitmask(), // quote_bits - }; -} - -// This file contains the common code every implementation uses -// It is intended to be included multiple times and compiled multiple times -// We assume the file in which it is include already includes -// "stringparsing.h" (this simplifies amalgation) - -// begin copypasta -// These chars yield themselves: " \ / -// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab -// u not handled in this table as it's complex -static const uint8_t escape_map[256] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. - 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. - 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -// handle a unicode codepoint -// write appropriate values into dest -// src will advance 6 bytes or 12 bytes -// dest will advance a variable amount (return via pointer) -// return true if the unicode codepoint was valid -// We work in little-endian then swap at write time -WARN_UNUSED -really_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, - uint8_t **dst_ptr) { - // hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the - // conversion isn't valid; we defer the check for this to inside the - // multilingual plane check - uint32_t code_point = hex_to_u32_nocheck(*src_ptr + 2); - *src_ptr += 6; - // check for low surrogate for characters outside the Basic - // Multilingual Plane. - if (code_point >= 0xd800 && code_point < 0xdc00) { - if (((*src_ptr)[0] != '\\') || (*src_ptr)[1] != 'u') { - return false; - } - uint32_t code_point_2 = hex_to_u32_nocheck(*src_ptr + 2); - - // if the first code point is invalid we will get here, as we will go past - // the check for being outside the Basic Multilingual plane. If we don't - // find a \u immediately afterwards we fail out anyhow, but if we do, - // this check catches both the case of the first code point being invalid - // or the second code point being invalid. - if ((code_point | code_point_2) >> 16) { - return false; - } - - code_point = - (((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000; - *src_ptr += 6; - } - size_t offset = codepoint_to_utf8(code_point, *dst_ptr); - *dst_ptr += offset; - return offset > 0; -} - -WARN_UNUSED really_inline bool parse_string(UNUSED const uint8_t *buf, - UNUSED size_t len, ParsedJson &pj, - UNUSED const uint32_t depth, - UNUSED uint32_t offset) { - pj.write_tape(pj.current_string_buf_loc - pj.string_buf.get(), '"'); - const uint8_t *src = &buf[offset + 1]; /* we know that buf at offset is a " */ - uint8_t *dst = pj.current_string_buf_loc + sizeof(uint32_t); - const uint8_t *const start_of_string = dst; - while (1) { - parse_string_helper helper = find_bs_bits_and_quote_bits(src, dst); - if (((helper.bs_bits - 1) & helper.quote_bits) != 0) { - /* we encountered quotes first. Move dst to point to quotes and exit - */ - - /* find out where the quote is... */ - auto quote_dist = trailing_zeroes(helper.quote_bits); - - /* NULL termination is still handy if you expect all your strings to - * be NULL terminated? */ - /* It comes at a small cost */ - dst[quote_dist] = 0; - - uint32_t str_length = (dst - start_of_string) + quote_dist; - memcpy(pj.current_string_buf_loc, &str_length, sizeof(str_length)); - /***************************** - * Above, check for overflow in case someone has a crazy string - * (>=4GB?) _ - * But only add the overflow check when the document itself exceeds - * 4GB - * Currently unneeded because we refuse to parse docs larger or equal - * to 4GB. - ****************************/ - - /* we advance the point, accounting for the fact that we have a NULL - * termination */ - pj.current_string_buf_loc = dst + quote_dist + 1; - return true; - } - if (((helper.quote_bits - 1) & helper.bs_bits) != 0) { - /* find out where the backspace is */ - auto bs_dist = trailing_zeroes(helper.bs_bits); - uint8_t escape_char = src[bs_dist + 1]; - /* we encountered backslash first. Handle backslash */ - if (escape_char == 'u') { - /* move src/dst up to the start; they will be further adjusted - within the unicode codepoint handling code. */ - src += bs_dist; - dst += bs_dist; - if (!handle_unicode_codepoint(&src, &dst)) { - return false; - } - } else { - /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and - * write bs_dist+1 characters to output - * note this may reach beyond the part of the buffer we've actually - * seen. I think this is ok */ - uint8_t escape_result = escape_map[escape_char]; - if (escape_result == 0u) { - return false; /* bogus escape value is an error */ - } - dst[bs_dist] = escape_result; - src += bs_dist + 2; - dst += bs_dist + 1; - } - } else { - /* they are the same. Since they can't co-occur, it means we - * encountered neither. */ - src += parse_string_helper::BYTES_PROCESSED; - dst += parse_string_helper::BYTES_PROCESSED; - } - } - /* can't be reached */ - return true; -} - -} // namespace simdjson::haswell -UNTARGET_REGION - -#endif // IS_X86_64 - -#endif -/* end file src/haswell/stringparsing.h */ -/* begin file src/westmere/stringparsing.h */ -#ifndef SIMDJSON_WESTMERE_STRINGPARSING_H -#define SIMDJSON_WESTMERE_STRINGPARSING_H - - -#ifdef IS_X86_64 - - -TARGET_WESTMERE -namespace simdjson::westmere { - -using namespace simd; - -// Holds backslashes and quotes locations. -struct parse_string_helper { - uint32_t bs_bits; - uint32_t quote_bits; - static const uint32_t BYTES_PROCESSED = 32; -}; - -really_inline parse_string_helper find_bs_bits_and_quote_bits(const uint8_t *src, uint8_t *dst) { - // this can read up to 31 bytes beyond the buffer size, but we require - // SIMDJSON_PADDING of padding - static_assert(SIMDJSON_PADDING >= (parse_string_helper::BYTES_PROCESSED - 1)); - simd8 v0(src); - simd8 v1(src + 16); - v0.store(dst); - v1.store(dst + 16); - uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); - return { - static_cast(bs_and_quote), // bs_bits - static_cast(bs_and_quote >> 32) // quote_bits - }; -} - -// This file contains the common code every implementation uses -// It is intended to be included multiple times and compiled multiple times -// We assume the file in which it is include already includes -// "stringparsing.h" (this simplifies amalgation) - -// begin copypasta -// These chars yield themselves: " \ / -// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab -// u not handled in this table as it's complex -static const uint8_t escape_map[256] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. - 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. - 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -// handle a unicode codepoint -// write appropriate values into dest -// src will advance 6 bytes or 12 bytes -// dest will advance a variable amount (return via pointer) -// return true if the unicode codepoint was valid -// We work in little-endian then swap at write time -WARN_UNUSED -really_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, - uint8_t **dst_ptr) { - // hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the - // conversion isn't valid; we defer the check for this to inside the - // multilingual plane check - uint32_t code_point = hex_to_u32_nocheck(*src_ptr + 2); - *src_ptr += 6; - // check for low surrogate for characters outside the Basic - // Multilingual Plane. - if (code_point >= 0xd800 && code_point < 0xdc00) { - if (((*src_ptr)[0] != '\\') || (*src_ptr)[1] != 'u') { - return false; - } - uint32_t code_point_2 = hex_to_u32_nocheck(*src_ptr + 2); - - // if the first code point is invalid we will get here, as we will go past - // the check for being outside the Basic Multilingual plane. If we don't - // find a \u immediately afterwards we fail out anyhow, but if we do, - // this check catches both the case of the first code point being invalid - // or the second code point being invalid. - if ((code_point | code_point_2) >> 16) { - return false; - } - - code_point = - (((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000; - *src_ptr += 6; - } - size_t offset = codepoint_to_utf8(code_point, *dst_ptr); - *dst_ptr += offset; - return offset > 0; -} - -WARN_UNUSED really_inline bool parse_string(UNUSED const uint8_t *buf, - UNUSED size_t len, ParsedJson &pj, - UNUSED const uint32_t depth, - UNUSED uint32_t offset) { - pj.write_tape(pj.current_string_buf_loc - pj.string_buf.get(), '"'); - const uint8_t *src = &buf[offset + 1]; /* we know that buf at offset is a " */ - uint8_t *dst = pj.current_string_buf_loc + sizeof(uint32_t); - const uint8_t *const start_of_string = dst; - while (1) { - parse_string_helper helper = find_bs_bits_and_quote_bits(src, dst); - if (((helper.bs_bits - 1) & helper.quote_bits) != 0) { - /* we encountered quotes first. Move dst to point to quotes and exit - */ - - /* find out where the quote is... */ - auto quote_dist = trailing_zeroes(helper.quote_bits); - - /* NULL termination is still handy if you expect all your strings to - * be NULL terminated? */ - /* It comes at a small cost */ - dst[quote_dist] = 0; - - uint32_t str_length = (dst - start_of_string) + quote_dist; - memcpy(pj.current_string_buf_loc, &str_length, sizeof(str_length)); - /***************************** - * Above, check for overflow in case someone has a crazy string - * (>=4GB?) _ - * But only add the overflow check when the document itself exceeds - * 4GB - * Currently unneeded because we refuse to parse docs larger or equal - * to 4GB. - ****************************/ - - /* we advance the point, accounting for the fact that we have a NULL - * termination */ - pj.current_string_buf_loc = dst + quote_dist + 1; - return true; - } - if (((helper.quote_bits - 1) & helper.bs_bits) != 0) { - /* find out where the backspace is */ - auto bs_dist = trailing_zeroes(helper.bs_bits); - uint8_t escape_char = src[bs_dist + 1]; - /* we encountered backslash first. Handle backslash */ - if (escape_char == 'u') { - /* move src/dst up to the start; they will be further adjusted - within the unicode codepoint handling code. */ - src += bs_dist; - dst += bs_dist; - if (!handle_unicode_codepoint(&src, &dst)) { - return false; - } - } else { - /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and - * write bs_dist+1 characters to output - * note this may reach beyond the part of the buffer we've actually - * seen. I think this is ok */ - uint8_t escape_result = escape_map[escape_char]; - if (escape_result == 0u) { - return false; /* bogus escape value is an error */ - } - dst[bs_dist] = escape_result; - src += bs_dist + 2; - dst += bs_dist + 1; - } - } else { - /* they are the same. Since they can't co-occur, it means we - * encountered neither. */ - src += parse_string_helper::BYTES_PROCESSED; - dst += parse_string_helper::BYTES_PROCESSED; - } - } - /* can't be reached */ - return true; -} - -} // namespace simdjson::westmere -UNTARGET_REGION - -#endif // IS_X86_64 - -#endif -/* end file src/westmere/stringparsing.h */ +/* end file src/generic/stage1_find_marks.h */ +/* end file src/generic/stage1_find_marks.h */ /* begin file src/stage2_build_tape.cpp */ #include #include +/* jsoncharutils.h already included: #include "jsoncharutils.h" */ using namespace simdjson; @@ -7324,7 +4973,6 @@ void found_string(const uint8_t *buf, const uint8_t *parsed_begin, void found_bad_string(const uint8_t *buf); #endif -/* end file src/stage2_build_tape.cpp */ /* begin file src/arm64/stage2_build_tape.h */ #ifndef SIMDJSON_ARM64_STAGE2_BUILD_TAPE_H #define SIMDJSON_ARM64_STAGE2_BUILD_TAPE_H @@ -7332,9 +4980,806 @@ void found_bad_string(const uint8_t *buf); #ifdef IS_ARM64 +/* begin file src/arm64/stringparsing.h */ +#ifndef SIMDJSON_ARM64_STRINGPARSING_H +#define SIMDJSON_ARM64_STRINGPARSING_H + + +#ifdef IS_ARM64 + +/* arm64/simd.h already included: #include "arm64/simd.h" */ +/* jsoncharutils.h already included: #include "jsoncharutils.h" */ +/* arm64/intrinsics.h already included: #include "arm64/intrinsics.h" */ +/* arm64/bitmanipulation.h already included: #include "arm64/bitmanipulation.h" */ namespace simdjson::arm64 { +using namespace simd; + +// Holds backslashes and quotes locations. +struct parse_string_helper { + uint32_t bs_bits; + uint32_t quote_bits; + static const uint32_t BYTES_PROCESSED = 32; +}; + +really_inline parse_string_helper find_bs_bits_and_quote_bits(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (parse_string_helper::BYTES_PROCESSED - 1)); + simd8 v0(src); + simd8 v1(src + sizeof(v0)); + v0.store(dst); + v1.store(dst + sizeof(v0)); + + // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on ARM; therefore, we + // smash them together into a 64-byte mask and get the bitmask from there. + uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + static_cast(bs_and_quote), // bs_bits + static_cast(bs_and_quote >> 32) // quote_bits + }; +} + +/* begin file src/generic/stringparsing.h */ +// This file contains the common code every implementation uses +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is include already includes +// "stringparsing.h" (this simplifies amalgation) + +// begin copypasta +// These chars yield themselves: " \ / +// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab +// u not handled in this table as it's complex +static const uint8_t escape_map[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. + 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. + 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +// handle a unicode codepoint +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +WARN_UNUSED +really_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, + uint8_t **dst_ptr) { + // hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion isn't valid; we defer the check for this to inside the + // multilingual plane check + uint32_t code_point = hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + // check for low surrogate for characters outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + if (((*src_ptr)[0] != '\\') || (*src_ptr)[1] != 'u') { + return false; + } + uint32_t code_point_2 = hex_to_u32_nocheck(*src_ptr + 2); + + // if the first code point is invalid we will get here, as we will go past + // the check for being outside the Basic Multilingual plane. If we don't + // find a \u immediately afterwards we fail out anyhow, but if we do, + // this check catches both the case of the first code point being invalid + // or the second code point being invalid. + if ((code_point | code_point_2) >> 16) { + return false; + } + + code_point = + (((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000; + *src_ptr += 6; + } + size_t offset = codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + +WARN_UNUSED really_inline bool parse_string(UNUSED const uint8_t *buf, + UNUSED size_t len, ParsedJson &pj, + UNUSED const uint32_t depth, + UNUSED uint32_t offset) { + pj.write_tape(pj.current_string_buf_loc - pj.string_buf.get(), '"'); + const uint8_t *src = &buf[offset + 1]; /* we know that buf at offset is a " */ + uint8_t *dst = pj.current_string_buf_loc + sizeof(uint32_t); + const uint8_t *const start_of_string = dst; + while (1) { + parse_string_helper helper = find_bs_bits_and_quote_bits(src, dst); + if (((helper.bs_bits - 1) & helper.quote_bits) != 0) { + /* we encountered quotes first. Move dst to point to quotes and exit + */ + + /* find out where the quote is... */ + auto quote_dist = trailing_zeroes(helper.quote_bits); + + /* NULL termination is still handy if you expect all your strings to + * be NULL terminated? */ + /* It comes at a small cost */ + dst[quote_dist] = 0; + + uint32_t str_length = (dst - start_of_string) + quote_dist; + memcpy(pj.current_string_buf_loc, &str_length, sizeof(str_length)); + /***************************** + * Above, check for overflow in case someone has a crazy string + * (>=4GB?) _ + * But only add the overflow check when the document itself exceeds + * 4GB + * Currently unneeded because we refuse to parse docs larger or equal + * to 4GB. + ****************************/ + + /* we advance the point, accounting for the fact that we have a NULL + * termination */ + pj.current_string_buf_loc = dst + quote_dist + 1; + return true; + } + if (((helper.quote_bits - 1) & helper.bs_bits) != 0) { + /* find out where the backspace is */ + auto bs_dist = trailing_zeroes(helper.bs_bits); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint(&src, &dst)) { + return false; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return false; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += parse_string_helper::BYTES_PROCESSED; + dst += parse_string_helper::BYTES_PROCESSED; + } + } + /* can't be reached */ + return true; +} +/* end file src/generic/stringparsing.h */ + +} +// namespace simdjson::amd64 + +#endif // IS_ARM64 +#endif +/* end file src/generic/stringparsing.h */ +/* begin file src/arm64/numberparsing.h */ +#ifndef SIMDJSON_ARM64_NUMBERPARSING_H +#define SIMDJSON_ARM64_NUMBERPARSING_H + +#ifdef IS_ARM64 + +/* arm64/intrinsics.h already included: #include "arm64/intrinsics.h" */ +/* arm64/bitmanipulation.h already included: #include "arm64/bitmanipulation.h" */ +/* jsoncharutils.h already included: #include "jsoncharutils.h" */ +#include +#include + + +#ifdef JSON_TEST_NUMBERS // for unit testing +void found_invalid_number(const uint8_t *buf); +void found_integer(int64_t result, const uint8_t *buf); +void found_unsigned_integer(uint64_t result, const uint8_t *buf); +void found_float(double result, const uint8_t *buf); +#endif + +namespace simdjson::arm64 { + +// we don't have SSE, so let us use a scalar function +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +static inline uint32_t parse_eight_digits_unrolled(const char *chars) { + uint64_t val; + memcpy(&val, chars, sizeof(uint64_t)); + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return (val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32; +} + +#define SWAR_NUMBER_PARSING + +/* begin file src/generic/numberparsing.h */ + +// Allowable floating-point values range +// std::numeric_limits::lowest() to std::numeric_limits::max(), +// so from -1.7976e308 all the way to 1.7975e308 in binary64. The lowest +// non-zero normal values is std::numeric_limits::min() or +// about 2.225074e-308. +static const double power_of_ten[] = { + 1e-308, 1e-307, 1e-306, 1e-305, 1e-304, 1e-303, 1e-302, 1e-301, 1e-300, + 1e-299, 1e-298, 1e-297, 1e-296, 1e-295, 1e-294, 1e-293, 1e-292, 1e-291, + 1e-290, 1e-289, 1e-288, 1e-287, 1e-286, 1e-285, 1e-284, 1e-283, 1e-282, + 1e-281, 1e-280, 1e-279, 1e-278, 1e-277, 1e-276, 1e-275, 1e-274, 1e-273, + 1e-272, 1e-271, 1e-270, 1e-269, 1e-268, 1e-267, 1e-266, 1e-265, 1e-264, + 1e-263, 1e-262, 1e-261, 1e-260, 1e-259, 1e-258, 1e-257, 1e-256, 1e-255, + 1e-254, 1e-253, 1e-252, 1e-251, 1e-250, 1e-249, 1e-248, 1e-247, 1e-246, + 1e-245, 1e-244, 1e-243, 1e-242, 1e-241, 1e-240, 1e-239, 1e-238, 1e-237, + 1e-236, 1e-235, 1e-234, 1e-233, 1e-232, 1e-231, 1e-230, 1e-229, 1e-228, + 1e-227, 1e-226, 1e-225, 1e-224, 1e-223, 1e-222, 1e-221, 1e-220, 1e-219, + 1e-218, 1e-217, 1e-216, 1e-215, 1e-214, 1e-213, 1e-212, 1e-211, 1e-210, + 1e-209, 1e-208, 1e-207, 1e-206, 1e-205, 1e-204, 1e-203, 1e-202, 1e-201, + 1e-200, 1e-199, 1e-198, 1e-197, 1e-196, 1e-195, 1e-194, 1e-193, 1e-192, + 1e-191, 1e-190, 1e-189, 1e-188, 1e-187, 1e-186, 1e-185, 1e-184, 1e-183, + 1e-182, 1e-181, 1e-180, 1e-179, 1e-178, 1e-177, 1e-176, 1e-175, 1e-174, + 1e-173, 1e-172, 1e-171, 1e-170, 1e-169, 1e-168, 1e-167, 1e-166, 1e-165, + 1e-164, 1e-163, 1e-162, 1e-161, 1e-160, 1e-159, 1e-158, 1e-157, 1e-156, + 1e-155, 1e-154, 1e-153, 1e-152, 1e-151, 1e-150, 1e-149, 1e-148, 1e-147, + 1e-146, 1e-145, 1e-144, 1e-143, 1e-142, 1e-141, 1e-140, 1e-139, 1e-138, + 1e-137, 1e-136, 1e-135, 1e-134, 1e-133, 1e-132, 1e-131, 1e-130, 1e-129, + 1e-128, 1e-127, 1e-126, 1e-125, 1e-124, 1e-123, 1e-122, 1e-121, 1e-120, + 1e-119, 1e-118, 1e-117, 1e-116, 1e-115, 1e-114, 1e-113, 1e-112, 1e-111, + 1e-110, 1e-109, 1e-108, 1e-107, 1e-106, 1e-105, 1e-104, 1e-103, 1e-102, + 1e-101, 1e-100, 1e-99, 1e-98, 1e-97, 1e-96, 1e-95, 1e-94, 1e-93, + 1e-92, 1e-91, 1e-90, 1e-89, 1e-88, 1e-87, 1e-86, 1e-85, 1e-84, + 1e-83, 1e-82, 1e-81, 1e-80, 1e-79, 1e-78, 1e-77, 1e-76, 1e-75, + 1e-74, 1e-73, 1e-72, 1e-71, 1e-70, 1e-69, 1e-68, 1e-67, 1e-66, + 1e-65, 1e-64, 1e-63, 1e-62, 1e-61, 1e-60, 1e-59, 1e-58, 1e-57, + 1e-56, 1e-55, 1e-54, 1e-53, 1e-52, 1e-51, 1e-50, 1e-49, 1e-48, + 1e-47, 1e-46, 1e-45, 1e-44, 1e-43, 1e-42, 1e-41, 1e-40, 1e-39, + 1e-38, 1e-37, 1e-36, 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, + 1e-29, 1e-28, 1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, + 1e-20, 1e-19, 1e-18, 1e-17, 1e-16, 1e-15, 1e-14, 1e-13, 1e-12, + 1e-11, 1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, + 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, + 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, + 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22, 1e23, 1e24, + 1e25, 1e26, 1e27, 1e28, 1e29, 1e30, 1e31, 1e32, 1e33, + 1e34, 1e35, 1e36, 1e37, 1e38, 1e39, 1e40, 1e41, 1e42, + 1e43, 1e44, 1e45, 1e46, 1e47, 1e48, 1e49, 1e50, 1e51, + 1e52, 1e53, 1e54, 1e55, 1e56, 1e57, 1e58, 1e59, 1e60, + 1e61, 1e62, 1e63, 1e64, 1e65, 1e66, 1e67, 1e68, 1e69, + 1e70, 1e71, 1e72, 1e73, 1e74, 1e75, 1e76, 1e77, 1e78, + 1e79, 1e80, 1e81, 1e82, 1e83, 1e84, 1e85, 1e86, 1e87, + 1e88, 1e89, 1e90, 1e91, 1e92, 1e93, 1e94, 1e95, 1e96, + 1e97, 1e98, 1e99, 1e100, 1e101, 1e102, 1e103, 1e104, 1e105, + 1e106, 1e107, 1e108, 1e109, 1e110, 1e111, 1e112, 1e113, 1e114, + 1e115, 1e116, 1e117, 1e118, 1e119, 1e120, 1e121, 1e122, 1e123, + 1e124, 1e125, 1e126, 1e127, 1e128, 1e129, 1e130, 1e131, 1e132, + 1e133, 1e134, 1e135, 1e136, 1e137, 1e138, 1e139, 1e140, 1e141, + 1e142, 1e143, 1e144, 1e145, 1e146, 1e147, 1e148, 1e149, 1e150, + 1e151, 1e152, 1e153, 1e154, 1e155, 1e156, 1e157, 1e158, 1e159, + 1e160, 1e161, 1e162, 1e163, 1e164, 1e165, 1e166, 1e167, 1e168, + 1e169, 1e170, 1e171, 1e172, 1e173, 1e174, 1e175, 1e176, 1e177, + 1e178, 1e179, 1e180, 1e181, 1e182, 1e183, 1e184, 1e185, 1e186, + 1e187, 1e188, 1e189, 1e190, 1e191, 1e192, 1e193, 1e194, 1e195, + 1e196, 1e197, 1e198, 1e199, 1e200, 1e201, 1e202, 1e203, 1e204, + 1e205, 1e206, 1e207, 1e208, 1e209, 1e210, 1e211, 1e212, 1e213, + 1e214, 1e215, 1e216, 1e217, 1e218, 1e219, 1e220, 1e221, 1e222, + 1e223, 1e224, 1e225, 1e226, 1e227, 1e228, 1e229, 1e230, 1e231, + 1e232, 1e233, 1e234, 1e235, 1e236, 1e237, 1e238, 1e239, 1e240, + 1e241, 1e242, 1e243, 1e244, 1e245, 1e246, 1e247, 1e248, 1e249, + 1e250, 1e251, 1e252, 1e253, 1e254, 1e255, 1e256, 1e257, 1e258, + 1e259, 1e260, 1e261, 1e262, 1e263, 1e264, 1e265, 1e266, 1e267, + 1e268, 1e269, 1e270, 1e271, 1e272, 1e273, 1e274, 1e275, 1e276, + 1e277, 1e278, 1e279, 1e280, 1e281, 1e282, 1e283, 1e284, 1e285, + 1e286, 1e287, 1e288, 1e289, 1e290, 1e291, 1e292, 1e293, 1e294, + 1e295, 1e296, 1e297, 1e298, 1e299, 1e300, 1e301, 1e302, 1e303, + 1e304, 1e305, 1e306, 1e307, 1e308}; + +static inline bool is_integer(char c) { + return (c >= '0' && c <= '9'); + // this gets compiled to (uint8_t)(c - '0') <= 9 on all decent compilers +} + +// We need to check that the character following a zero is valid. This is +// probably frequent and it is hard than it looks. We are building all of this +// just to differentiate between 0x1 (invalid), 0,1 (valid) 0e1 (valid)... +const bool structural_or_whitespace_or_exponent_or_decimal_negated[256] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, + 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + +really_inline bool +is_not_structural_or_whitespace_or_exponent_or_decimal(unsigned char c) { + return structural_or_whitespace_or_exponent_or_decimal_negated[c]; +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +static inline bool is_made_of_eight_digits_fast(const char *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING); + memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + + +// +// This function computes base * 10 ^ (- negative_exponent ). +// It is only even going to be used when negative_exponent is tiny. +static double subnormal_power10(double base, int64_t negative_exponent) { + // avoid integer overflows in the pow expression, those values would + // become zero anyway. + if(negative_exponent < -1000) { + return 0; + } + + // this is probably not going to be fast + return base * 1e-308 * pow(10, negative_exponent + 308); +} + +// called by parse_number when we know that the output is a float, +// but where there might be some integer overflow. The trick here is to +// parse using floats from the start. +// Do not call this function directly as it skips some of the checks from +// parse_number +// +// This function will almost never be called!!! +// +// Note: a redesign could avoid this function entirely. +// +static never_inline bool parse_float(const uint8_t *const buf, ParsedJson &pj, + const uint32_t offset, bool found_minus) { + const char *p = reinterpret_cast(buf + offset); + bool negative = false; + if (found_minus) { + ++p; + negative = true; + } + long double i; + if (*p == '0') { // 0 cannot be followed by an integer + ++p; + i = 0; + } else { + unsigned char digit = *p - '0'; + i = digit; + p++; + while (is_integer(*p)) { + digit = *p - '0'; + i = 10 * i + digit; + ++p; + } + } + if ('.' == *p) { + ++p; + int fractional_weight = 308; + if (is_integer(*p)) { + unsigned char digit = *p - '0'; + ++p; + + fractional_weight--; + i = i + digit * (fractional_weight >= 0 ? power_of_ten[fractional_weight] + : 0); + } else { +#ifdef JSON_TEST_NUMBERS // for unit testing + found_invalid_number(buf + offset); +#endif + return false; + } + while (is_integer(*p)) { + unsigned char digit = *p - '0'; + ++p; + fractional_weight--; + i = i + digit * (fractional_weight >= 0 ? power_of_ten[fractional_weight] + : 0); + } + } + if (('e' == *p) || ('E' == *p)) { + ++p; + bool neg_exp = false; + if ('-' == *p) { + neg_exp = true; + ++p; + } else if ('+' == *p) { + ++p; + } + if (!is_integer(*p)) { +#ifdef JSON_TEST_NUMBERS // for unit testing + found_invalid_number(buf + offset); +#endif + return false; + } + unsigned char digit = *p - '0'; + int64_t exp_number = digit; // exponential part + p++; + if (is_integer(*p)) { + digit = *p - '0'; + exp_number = 10 * exp_number + digit; + ++p; + } + if (is_integer(*p)) { + digit = *p - '0'; + exp_number = 10 * exp_number + digit; + ++p; + } + if (is_integer(*p)) { + digit = *p - '0'; + exp_number = 10 * exp_number + digit; + ++p; + } + while (is_integer(*p)) { + if (exp_number > 0x100000000) { // we need to check for overflows +// we refuse to parse this +#ifdef JSON_TEST_NUMBERS // for unit testing + found_invalid_number(buf + offset); +#endif + return false; + } + digit = *p - '0'; + exp_number = 10 * exp_number + digit; + ++p; + } + if (unlikely(exp_number > 308)) { + // this path is unlikely + if (neg_exp) { + // We either have zero or a subnormal. + // We expect this to be uncommon so we go through a slow path. + i = subnormal_power10(i, -exp_number); + } else { +// We know for sure that we have a number that is too large, +// we refuse to parse this +#ifdef JSON_TEST_NUMBERS // for unit testing + found_invalid_number(buf + offset); +#endif + return false; + } + } else { + int exponent = (neg_exp ? -exp_number : exp_number); + // we have that exp_number is [0,308] so that + // exponent is [-308,308] so that + // 308 + exponent is in [0, 2 * 308] + i *= power_of_ten[308 + exponent]; + } + } + if (is_not_structural_or_whitespace(*p)) { + return false; + } + // check that we can go from long double to double safely. + if(i > std::numeric_limits::max()) { +#ifdef JSON_TEST_NUMBERS // for unit testing + found_invalid_number(buf + offset); +#endif + return false; + } + double d = negative ? -i : i; + pj.write_tape_double(d); +#ifdef JSON_TEST_NUMBERS // for unit testing + found_float(d, buf + offset); +#endif + return is_structural_or_whitespace(*p); +} + +// called by parse_number when we know that the output is an integer, +// but where there might be some integer overflow. +// we want to catch overflows! +// Do not call this function directly as it skips some of the checks from +// parse_number +// +// This function will almost never be called!!! +// +static never_inline bool parse_large_integer(const uint8_t *const buf, + ParsedJson &pj, + const uint32_t offset, + bool found_minus) { + const char *p = reinterpret_cast(buf + offset); + + bool negative = false; + if (found_minus) { + ++p; + negative = true; + } + uint64_t i; + if (*p == '0') { // 0 cannot be followed by an integer + ++p; + i = 0; + } else { + unsigned char digit = *p - '0'; + i = digit; + p++; + // the is_made_of_eight_digits_fast routine is unlikely to help here because + // we rarely see large integer parts like 123456789 + while (is_integer(*p)) { + digit = *p - '0'; + if (mul_overflow(i, 10, &i)) { +#ifdef JSON_TEST_NUMBERS // for unit testing + found_invalid_number(buf + offset); +#endif + return false; // overflow + } + if (add_overflow(i, digit, &i)) { +#ifdef JSON_TEST_NUMBERS // for unit testing + found_invalid_number(buf + offset); +#endif + return false; // overflow + } + ++p; + } + } + if (negative) { + if (i > 0x8000000000000000) { + // overflows! +#ifdef JSON_TEST_NUMBERS // for unit testing + found_invalid_number(buf + offset); +#endif + return false; // overflow + } else if (i == 0x8000000000000000) { + // In two's complement, we cannot represent 0x8000000000000000 + // as a positive signed integer, but the negative version is + // possible. + constexpr int64_t signed_answer = INT64_MIN; + pj.write_tape_s64(signed_answer); +#ifdef JSON_TEST_NUMBERS // for unit testing + found_integer(signed_answer, buf + offset); +#endif + } else { + // we can negate safely + int64_t signed_answer = -static_cast(i); + pj.write_tape_s64(signed_answer); +#ifdef JSON_TEST_NUMBERS // for unit testing + found_integer(signed_answer, buf + offset); +#endif + } + } else { + // we have a positive integer, the contract is that + // we try to represent it as a signed integer and only + // fallback on unsigned integers if absolutely necessary. + if(i < 0x8000000000000000) { +#ifdef JSON_TEST_NUMBERS // for unit testing + found_integer(i, buf + offset); +#endif + pj.write_tape_s64(i); + } else { +#ifdef JSON_TEST_NUMBERS // for unit testing + found_unsigned_integer(i, buf + offset); +#endif + pj.write_tape_u64(i); + } + } + return is_structural_or_whitespace(*p); +} + +// parse the number at buf + offset +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0 or 1) at high speed. +static really_inline bool parse_number(const uint8_t *const buf, ParsedJson &pj, + const uint32_t offset, + bool found_minus) { +#ifdef SIMDJSON_SKIPNUMBERPARSING // for performance analysis, it is sometimes + // useful to skip parsing + pj.write_tape_s64(0); // always write zero + return true; // always succeeds +#else + const char *p = reinterpret_cast(buf + offset); + bool negative = false; + if (found_minus) { + ++p; + negative = true; + if (!is_integer(*p)) { // a negative sign must be followed by an integer +#ifdef JSON_TEST_NUMBERS // for unit testing + found_invalid_number(buf + offset); +#endif + return false; + } + } + const char *const start_digits = p; + + uint64_t i; // an unsigned int avoids signed overflows (which are bad) + if (*p == '0') { // 0 cannot be followed by an integer + ++p; + if (is_not_structural_or_whitespace_or_exponent_or_decimal(*p)) { +#ifdef JSON_TEST_NUMBERS // for unit testing + found_invalid_number(buf + offset); +#endif + return false; + } + i = 0; + } else { + if (!(is_integer(*p))) { // must start with an integer +#ifdef JSON_TEST_NUMBERS // for unit testing + found_invalid_number(buf + offset); +#endif + return false; + } + unsigned char digit = *p - '0'; + i = digit; + p++; + // the is_made_of_eight_digits_fast routine is unlikely to help here because + // we rarely see large integer parts like 123456789 + while (is_integer(*p)) { + digit = *p - '0'; + // a multiplication by 10 is cheaper than an arbitrary integer + // multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + ++p; + } + } + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; // At this point we know that we have a float + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + ++p; + const char *const first_after_period = p; + if (is_integer(*p)) { + unsigned char digit = *p - '0'; + ++p; + i = i * 10 + digit; // might overflow + multiplication by 10 is likely + // cheaper than arbitrary mult. + // we will handle the overflow later + } else { +#ifdef JSON_TEST_NUMBERS // for unit testing + found_invalid_number(buf + offset); +#endif + return false; + } +#ifdef SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif + while (is_integer(*p)) { + unsigned char digit = *p - '0'; + ++p; + i = i * 10 + digit; // in rare cases, this will overflow, but that's ok + // because we have parse_highprecision_float later. + } + exponent = first_after_period - p; + } + int digit_count = + p - start_digits - 1; // used later to guard against overflows + int64_t exp_number = 0; // exponential part + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + bool neg_exp = false; + if ('-' == *p) { + neg_exp = true; + ++p; + } else if ('+' == *p) { + ++p; + } + if (!is_integer(*p)) { +#ifdef JSON_TEST_NUMBERS // for unit testing + found_invalid_number(buf + offset); +#endif + return false; + } + unsigned char digit = *p - '0'; + exp_number = digit; + p++; + if (is_integer(*p)) { + digit = *p - '0'; + exp_number = 10 * exp_number + digit; + ++p; + } + if (is_integer(*p)) { + digit = *p - '0'; + exp_number = 10 * exp_number + digit; + ++p; + } + while (is_integer(*p)) { + if (exp_number > 0x100000000) { // we need to check for overflows + // we refuse to parse this +#ifdef JSON_TEST_NUMBERS // for unit testing + found_invalid_number(buf + offset); +#endif + return false; + } + digit = *p - '0'; + exp_number = 10 * exp_number + digit; + ++p; + } + exponent += (neg_exp ? -exp_number : exp_number); + } + if (is_float) { + uint64_t power_index = 308 + exponent; + if (unlikely((digit_count >= 19))) { // this is uncommon + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const char *start = start_digits; + while ((*start == '0') || (*start == '.')) { + start++; + } + // we over-decrement by one when there is a '.' + digit_count -= (start - start_digits); + if (digit_count >= 19) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + return parse_float(buf, pj, offset, found_minus); + } + } + if (unlikely((power_index > 2 * 308))) { // this is uncommon!!! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + return parse_float(buf, pj, offset, found_minus); + } + double factor = power_of_ten[power_index]; + factor = negative ? -factor : factor; + double d = i * factor; + pj.write_tape_double(d); +#ifdef JSON_TEST_NUMBERS // for unit testing + found_float(d, buf + offset); +#endif + } else { + if (unlikely(digit_count >= 18)) { // this is uncommon!!! + // there is a good chance that we had an overflow, so we need + // need to recover: we parse the whole thing again. + return parse_large_integer(buf, pj, offset, found_minus); + } + i = negative ? 0 - i : i; + pj.write_tape_s64(i); +#ifdef JSON_TEST_NUMBERS // for unit testing + found_integer(i, buf + offset); +#endif + } + return is_structural_or_whitespace(*p); +#endif // SIMDJSON_SKIPNUMBERPARSING +} + +/* end file src/generic/numberparsing.h */ + + +}// namespace simdjson::arm64 + + +#endif // IS_ARM64 +#endif // SIMDJSON_ARM64_NUMBERPARSING_H +/* end file src/generic/numberparsing.h */ + +namespace simdjson::arm64 { + +/* begin file src/generic/stage2_build_tape.h */ // This file contains the common code every implementation uses for stage2 // It is intended to be included multiple times and compiled multiple times // We assume the file in which it is include already includes @@ -7730,6 +6175,8 @@ error: } } // namespace stage2 +/* end file src/generic/stage2_build_tape.h */ +/* begin file src/generic/stage2_streaming_build_tape.h */ namespace stage2 { struct streaming_structural_parser: structural_parser { @@ -7884,6 +6331,7 @@ error: } } // namespace stage2 +/* end file src/generic/stage2_streaming_build_tape.h */ } // namespace simdjson::arm64 @@ -7906,7 +6354,7 @@ unified_machine(const uint8_t *buf, size_t len, ParsedJson #endif // IS_ARM64 #endif // SIMDJSON_ARM64_STAGE2_BUILD_TAPE_H -/* end file src/arm64/stage2_build_tape.h */ +/* end file src/generic/stage2_streaming_build_tape.h */ /* begin file src/haswell/stage2_build_tape.h */ #ifndef SIMDJSON_HASWELL_STAGE2_BUILD_TAPE_H #define SIMDJSON_HASWELL_STAGE2_BUILD_TAPE_H @@ -7914,10 +6362,816 @@ unified_machine(const uint8_t *buf, size_t len, ParsedJson #ifdef IS_X86_64 +/* begin file src/haswell/stringparsing.h */ +#ifndef SIMDJSON_HASWELL_STRINGPARSING_H +#define SIMDJSON_HASWELL_STRINGPARSING_H + + +#ifdef IS_X86_64 + +/* haswell/simd.h already included: #include "haswell/simd.h" */ +/* jsoncharutils.h already included: #include "jsoncharutils.h" */ +/* haswell/intrinsics.h already included: #include "haswell/intrinsics.h" */ +/* haswell/bitmanipulation.h already included: #include "haswell/bitmanipulation.h" */ TARGET_HASWELL namespace simdjson::haswell { +using namespace simd; + +// Holds backslashes and quotes locations. +struct parse_string_helper { + uint32_t bs_bits; + uint32_t quote_bits; + static const uint32_t BYTES_PROCESSED = 32; +}; + +really_inline parse_string_helper find_bs_bits_and_quote_bits(const uint8_t *src, uint8_t *dst) { + // this can read up to 15 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (parse_string_helper::BYTES_PROCESSED - 1)); + simd8 v(src); + // store to dest unconditionally - we can overwrite the bits we don't like later + v.store(dst); + return { + (uint32_t)(v == '\\').to_bitmask(), // bs_bits + (uint32_t)(v == '"').to_bitmask(), // quote_bits + }; +} + +/* begin file src/generic/stringparsing.h */ +// This file contains the common code every implementation uses +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is include already includes +// "stringparsing.h" (this simplifies amalgation) + +// begin copypasta +// These chars yield themselves: " \ / +// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab +// u not handled in this table as it's complex +static const uint8_t escape_map[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. + 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. + 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +// handle a unicode codepoint +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +WARN_UNUSED +really_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, + uint8_t **dst_ptr) { + // hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion isn't valid; we defer the check for this to inside the + // multilingual plane check + uint32_t code_point = hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + // check for low surrogate for characters outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + if (((*src_ptr)[0] != '\\') || (*src_ptr)[1] != 'u') { + return false; + } + uint32_t code_point_2 = hex_to_u32_nocheck(*src_ptr + 2); + + // if the first code point is invalid we will get here, as we will go past + // the check for being outside the Basic Multilingual plane. If we don't + // find a \u immediately afterwards we fail out anyhow, but if we do, + // this check catches both the case of the first code point being invalid + // or the second code point being invalid. + if ((code_point | code_point_2) >> 16) { + return false; + } + + code_point = + (((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000; + *src_ptr += 6; + } + size_t offset = codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + +WARN_UNUSED really_inline bool parse_string(UNUSED const uint8_t *buf, + UNUSED size_t len, ParsedJson &pj, + UNUSED const uint32_t depth, + UNUSED uint32_t offset) { + pj.write_tape(pj.current_string_buf_loc - pj.string_buf.get(), '"'); + const uint8_t *src = &buf[offset + 1]; /* we know that buf at offset is a " */ + uint8_t *dst = pj.current_string_buf_loc + sizeof(uint32_t); + const uint8_t *const start_of_string = dst; + while (1) { + parse_string_helper helper = find_bs_bits_and_quote_bits(src, dst); + if (((helper.bs_bits - 1) & helper.quote_bits) != 0) { + /* we encountered quotes first. Move dst to point to quotes and exit + */ + + /* find out where the quote is... */ + auto quote_dist = trailing_zeroes(helper.quote_bits); + + /* NULL termination is still handy if you expect all your strings to + * be NULL terminated? */ + /* It comes at a small cost */ + dst[quote_dist] = 0; + + uint32_t str_length = (dst - start_of_string) + quote_dist; + memcpy(pj.current_string_buf_loc, &str_length, sizeof(str_length)); + /***************************** + * Above, check for overflow in case someone has a crazy string + * (>=4GB?) _ + * But only add the overflow check when the document itself exceeds + * 4GB + * Currently unneeded because we refuse to parse docs larger or equal + * to 4GB. + ****************************/ + + /* we advance the point, accounting for the fact that we have a NULL + * termination */ + pj.current_string_buf_loc = dst + quote_dist + 1; + return true; + } + if (((helper.quote_bits - 1) & helper.bs_bits) != 0) { + /* find out where the backspace is */ + auto bs_dist = trailing_zeroes(helper.bs_bits); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint(&src, &dst)) { + return false; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return false; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += parse_string_helper::BYTES_PROCESSED; + dst += parse_string_helper::BYTES_PROCESSED; + } + } + /* can't be reached */ + return true; +} +/* end file src/generic/stringparsing.h */ + +} // namespace simdjson::haswell +UNTARGET_REGION + +#endif // IS_X86_64 + +#endif +/* end file src/generic/stringparsing.h */ +/* begin file src/haswell/numberparsing.h */ +#ifndef SIMDJSON_HASWELL_NUMBERPARSING_H +#define SIMDJSON_HASWELL_NUMBERPARSING_H + +#ifdef IS_X86_64 + +/* haswell/intrinsics.h already included: #include "haswell/intrinsics.h" */ +/* haswell/bitmanipulation.h already included: #include "haswell/bitmanipulation.h" */ +/* jsoncharutils.h already included: #include "jsoncharutils.h" */ +#include +#include + + +#ifdef JSON_TEST_NUMBERS // for unit testing +void found_invalid_number(const uint8_t *buf); +void found_integer(int64_t result, const uint8_t *buf); +void found_unsigned_integer(uint64_t result, const uint8_t *buf); +void found_float(double result, const uint8_t *buf); +#endif + +TARGET_HASWELL +namespace simdjson::haswell { +static inline uint32_t parse_eight_digits_unrolled(const char *chars) { + // this actually computes *16* values so we are being wasteful. + const __m128i ascii0 = _mm_set1_epi8('0'); + const __m128i mul_1_10 = + _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); + const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); + const __m128i mul_1_10000 = + _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); + const __m128i input = _mm_sub_epi8( + _mm_loadu_si128(reinterpret_cast(chars)), ascii0); + const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); + const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); + const __m128i t3 = _mm_packus_epi32(t2, t2); + const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); + return _mm_cvtsi128_si32( + t4); // only captures the sum of the first 8 digits, drop the rest +} + +#define SWAR_NUMBER_PARSING + +/* begin file src/generic/numberparsing.h */ + +// Allowable floating-point values range +// std::numeric_limits::lowest() to std::numeric_limits::max(), +// so from -1.7976e308 all the way to 1.7975e308 in binary64. The lowest +// non-zero normal values is std::numeric_limits::min() or +// about 2.225074e-308. +static const double power_of_ten[] = { + 1e-308, 1e-307, 1e-306, 1e-305, 1e-304, 1e-303, 1e-302, 1e-301, 1e-300, + 1e-299, 1e-298, 1e-297, 1e-296, 1e-295, 1e-294, 1e-293, 1e-292, 1e-291, + 1e-290, 1e-289, 1e-288, 1e-287, 1e-286, 1e-285, 1e-284, 1e-283, 1e-282, + 1e-281, 1e-280, 1e-279, 1e-278, 1e-277, 1e-276, 1e-275, 1e-274, 1e-273, + 1e-272, 1e-271, 1e-270, 1e-269, 1e-268, 1e-267, 1e-266, 1e-265, 1e-264, + 1e-263, 1e-262, 1e-261, 1e-260, 1e-259, 1e-258, 1e-257, 1e-256, 1e-255, + 1e-254, 1e-253, 1e-252, 1e-251, 1e-250, 1e-249, 1e-248, 1e-247, 1e-246, + 1e-245, 1e-244, 1e-243, 1e-242, 1e-241, 1e-240, 1e-239, 1e-238, 1e-237, + 1e-236, 1e-235, 1e-234, 1e-233, 1e-232, 1e-231, 1e-230, 1e-229, 1e-228, + 1e-227, 1e-226, 1e-225, 1e-224, 1e-223, 1e-222, 1e-221, 1e-220, 1e-219, + 1e-218, 1e-217, 1e-216, 1e-215, 1e-214, 1e-213, 1e-212, 1e-211, 1e-210, + 1e-209, 1e-208, 1e-207, 1e-206, 1e-205, 1e-204, 1e-203, 1e-202, 1e-201, + 1e-200, 1e-199, 1e-198, 1e-197, 1e-196, 1e-195, 1e-194, 1e-193, 1e-192, + 1e-191, 1e-190, 1e-189, 1e-188, 1e-187, 1e-186, 1e-185, 1e-184, 1e-183, + 1e-182, 1e-181, 1e-180, 1e-179, 1e-178, 1e-177, 1e-176, 1e-175, 1e-174, + 1e-173, 1e-172, 1e-171, 1e-170, 1e-169, 1e-168, 1e-167, 1e-166, 1e-165, + 1e-164, 1e-163, 1e-162, 1e-161, 1e-160, 1e-159, 1e-158, 1e-157, 1e-156, + 1e-155, 1e-154, 1e-153, 1e-152, 1e-151, 1e-150, 1e-149, 1e-148, 1e-147, + 1e-146, 1e-145, 1e-144, 1e-143, 1e-142, 1e-141, 1e-140, 1e-139, 1e-138, + 1e-137, 1e-136, 1e-135, 1e-134, 1e-133, 1e-132, 1e-131, 1e-130, 1e-129, + 1e-128, 1e-127, 1e-126, 1e-125, 1e-124, 1e-123, 1e-122, 1e-121, 1e-120, + 1e-119, 1e-118, 1e-117, 1e-116, 1e-115, 1e-114, 1e-113, 1e-112, 1e-111, + 1e-110, 1e-109, 1e-108, 1e-107, 1e-106, 1e-105, 1e-104, 1e-103, 1e-102, + 1e-101, 1e-100, 1e-99, 1e-98, 1e-97, 1e-96, 1e-95, 1e-94, 1e-93, + 1e-92, 1e-91, 1e-90, 1e-89, 1e-88, 1e-87, 1e-86, 1e-85, 1e-84, + 1e-83, 1e-82, 1e-81, 1e-80, 1e-79, 1e-78, 1e-77, 1e-76, 1e-75, + 1e-74, 1e-73, 1e-72, 1e-71, 1e-70, 1e-69, 1e-68, 1e-67, 1e-66, + 1e-65, 1e-64, 1e-63, 1e-62, 1e-61, 1e-60, 1e-59, 1e-58, 1e-57, + 1e-56, 1e-55, 1e-54, 1e-53, 1e-52, 1e-51, 1e-50, 1e-49, 1e-48, + 1e-47, 1e-46, 1e-45, 1e-44, 1e-43, 1e-42, 1e-41, 1e-40, 1e-39, + 1e-38, 1e-37, 1e-36, 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, + 1e-29, 1e-28, 1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, + 1e-20, 1e-19, 1e-18, 1e-17, 1e-16, 1e-15, 1e-14, 1e-13, 1e-12, + 1e-11, 1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, + 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, + 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, + 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22, 1e23, 1e24, + 1e25, 1e26, 1e27, 1e28, 1e29, 1e30, 1e31, 1e32, 1e33, + 1e34, 1e35, 1e36, 1e37, 1e38, 1e39, 1e40, 1e41, 1e42, + 1e43, 1e44, 1e45, 1e46, 1e47, 1e48, 1e49, 1e50, 1e51, + 1e52, 1e53, 1e54, 1e55, 1e56, 1e57, 1e58, 1e59, 1e60, + 1e61, 1e62, 1e63, 1e64, 1e65, 1e66, 1e67, 1e68, 1e69, + 1e70, 1e71, 1e72, 1e73, 1e74, 1e75, 1e76, 1e77, 1e78, + 1e79, 1e80, 1e81, 1e82, 1e83, 1e84, 1e85, 1e86, 1e87, + 1e88, 1e89, 1e90, 1e91, 1e92, 1e93, 1e94, 1e95, 1e96, + 1e97, 1e98, 1e99, 1e100, 1e101, 1e102, 1e103, 1e104, 1e105, + 1e106, 1e107, 1e108, 1e109, 1e110, 1e111, 1e112, 1e113, 1e114, + 1e115, 1e116, 1e117, 1e118, 1e119, 1e120, 1e121, 1e122, 1e123, + 1e124, 1e125, 1e126, 1e127, 1e128, 1e129, 1e130, 1e131, 1e132, + 1e133, 1e134, 1e135, 1e136, 1e137, 1e138, 1e139, 1e140, 1e141, + 1e142, 1e143, 1e144, 1e145, 1e146, 1e147, 1e148, 1e149, 1e150, + 1e151, 1e152, 1e153, 1e154, 1e155, 1e156, 1e157, 1e158, 1e159, + 1e160, 1e161, 1e162, 1e163, 1e164, 1e165, 1e166, 1e167, 1e168, + 1e169, 1e170, 1e171, 1e172, 1e173, 1e174, 1e175, 1e176, 1e177, + 1e178, 1e179, 1e180, 1e181, 1e182, 1e183, 1e184, 1e185, 1e186, + 1e187, 1e188, 1e189, 1e190, 1e191, 1e192, 1e193, 1e194, 1e195, + 1e196, 1e197, 1e198, 1e199, 1e200, 1e201, 1e202, 1e203, 1e204, + 1e205, 1e206, 1e207, 1e208, 1e209, 1e210, 1e211, 1e212, 1e213, + 1e214, 1e215, 1e216, 1e217, 1e218, 1e219, 1e220, 1e221, 1e222, + 1e223, 1e224, 1e225, 1e226, 1e227, 1e228, 1e229, 1e230, 1e231, + 1e232, 1e233, 1e234, 1e235, 1e236, 1e237, 1e238, 1e239, 1e240, + 1e241, 1e242, 1e243, 1e244, 1e245, 1e246, 1e247, 1e248, 1e249, + 1e250, 1e251, 1e252, 1e253, 1e254, 1e255, 1e256, 1e257, 1e258, + 1e259, 1e260, 1e261, 1e262, 1e263, 1e264, 1e265, 1e266, 1e267, + 1e268, 1e269, 1e270, 1e271, 1e272, 1e273, 1e274, 1e275, 1e276, + 1e277, 1e278, 1e279, 1e280, 1e281, 1e282, 1e283, 1e284, 1e285, + 1e286, 1e287, 1e288, 1e289, 1e290, 1e291, 1e292, 1e293, 1e294, + 1e295, 1e296, 1e297, 1e298, 1e299, 1e300, 1e301, 1e302, 1e303, + 1e304, 1e305, 1e306, 1e307, 1e308}; + +static inline bool is_integer(char c) { + return (c >= '0' && c <= '9'); + // this gets compiled to (uint8_t)(c - '0') <= 9 on all decent compilers +} + +// We need to check that the character following a zero is valid. This is +// probably frequent and it is hard than it looks. We are building all of this +// just to differentiate between 0x1 (invalid), 0,1 (valid) 0e1 (valid)... +const bool structural_or_whitespace_or_exponent_or_decimal_negated[256] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, + 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + +really_inline bool +is_not_structural_or_whitespace_or_exponent_or_decimal(unsigned char c) { + return structural_or_whitespace_or_exponent_or_decimal_negated[c]; +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +static inline bool is_made_of_eight_digits_fast(const char *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING); + memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + + +// +// This function computes base * 10 ^ (- negative_exponent ). +// It is only even going to be used when negative_exponent is tiny. +static double subnormal_power10(double base, int64_t negative_exponent) { + // avoid integer overflows in the pow expression, those values would + // become zero anyway. + if(negative_exponent < -1000) { + return 0; + } + + // this is probably not going to be fast + return base * 1e-308 * pow(10, negative_exponent + 308); +} + +// called by parse_number when we know that the output is a float, +// but where there might be some integer overflow. The trick here is to +// parse using floats from the start. +// Do not call this function directly as it skips some of the checks from +// parse_number +// +// This function will almost never be called!!! +// +// Note: a redesign could avoid this function entirely. +// +static never_inline bool parse_float(const uint8_t *const buf, ParsedJson &pj, + const uint32_t offset, bool found_minus) { + const char *p = reinterpret_cast(buf + offset); + bool negative = false; + if (found_minus) { + ++p; + negative = true; + } + long double i; + if (*p == '0') { // 0 cannot be followed by an integer + ++p; + i = 0; + } else { + unsigned char digit = *p - '0'; + i = digit; + p++; + while (is_integer(*p)) { + digit = *p - '0'; + i = 10 * i + digit; + ++p; + } + } + if ('.' == *p) { + ++p; + int fractional_weight = 308; + if (is_integer(*p)) { + unsigned char digit = *p - '0'; + ++p; + + fractional_weight--; + i = i + digit * (fractional_weight >= 0 ? power_of_ten[fractional_weight] + : 0); + } else { +#ifdef JSON_TEST_NUMBERS // for unit testing + found_invalid_number(buf + offset); +#endif + return false; + } + while (is_integer(*p)) { + unsigned char digit = *p - '0'; + ++p; + fractional_weight--; + i = i + digit * (fractional_weight >= 0 ? power_of_ten[fractional_weight] + : 0); + } + } + if (('e' == *p) || ('E' == *p)) { + ++p; + bool neg_exp = false; + if ('-' == *p) { + neg_exp = true; + ++p; + } else if ('+' == *p) { + ++p; + } + if (!is_integer(*p)) { +#ifdef JSON_TEST_NUMBERS // for unit testing + found_invalid_number(buf + offset); +#endif + return false; + } + unsigned char digit = *p - '0'; + int64_t exp_number = digit; // exponential part + p++; + if (is_integer(*p)) { + digit = *p - '0'; + exp_number = 10 * exp_number + digit; + ++p; + } + if (is_integer(*p)) { + digit = *p - '0'; + exp_number = 10 * exp_number + digit; + ++p; + } + if (is_integer(*p)) { + digit = *p - '0'; + exp_number = 10 * exp_number + digit; + ++p; + } + while (is_integer(*p)) { + if (exp_number > 0x100000000) { // we need to check for overflows +// we refuse to parse this +#ifdef JSON_TEST_NUMBERS // for unit testing + found_invalid_number(buf + offset); +#endif + return false; + } + digit = *p - '0'; + exp_number = 10 * exp_number + digit; + ++p; + } + if (unlikely(exp_number > 308)) { + // this path is unlikely + if (neg_exp) { + // We either have zero or a subnormal. + // We expect this to be uncommon so we go through a slow path. + i = subnormal_power10(i, -exp_number); + } else { +// We know for sure that we have a number that is too large, +// we refuse to parse this +#ifdef JSON_TEST_NUMBERS // for unit testing + found_invalid_number(buf + offset); +#endif + return false; + } + } else { + int exponent = (neg_exp ? -exp_number : exp_number); + // we have that exp_number is [0,308] so that + // exponent is [-308,308] so that + // 308 + exponent is in [0, 2 * 308] + i *= power_of_ten[308 + exponent]; + } + } + if (is_not_structural_or_whitespace(*p)) { + return false; + } + // check that we can go from long double to double safely. + if(i > std::numeric_limits::max()) { +#ifdef JSON_TEST_NUMBERS // for unit testing + found_invalid_number(buf + offset); +#endif + return false; + } + double d = negative ? -i : i; + pj.write_tape_double(d); +#ifdef JSON_TEST_NUMBERS // for unit testing + found_float(d, buf + offset); +#endif + return is_structural_or_whitespace(*p); +} + +// called by parse_number when we know that the output is an integer, +// but where there might be some integer overflow. +// we want to catch overflows! +// Do not call this function directly as it skips some of the checks from +// parse_number +// +// This function will almost never be called!!! +// +static never_inline bool parse_large_integer(const uint8_t *const buf, + ParsedJson &pj, + const uint32_t offset, + bool found_minus) { + const char *p = reinterpret_cast(buf + offset); + + bool negative = false; + if (found_minus) { + ++p; + negative = true; + } + uint64_t i; + if (*p == '0') { // 0 cannot be followed by an integer + ++p; + i = 0; + } else { + unsigned char digit = *p - '0'; + i = digit; + p++; + // the is_made_of_eight_digits_fast routine is unlikely to help here because + // we rarely see large integer parts like 123456789 + while (is_integer(*p)) { + digit = *p - '0'; + if (mul_overflow(i, 10, &i)) { +#ifdef JSON_TEST_NUMBERS // for unit testing + found_invalid_number(buf + offset); +#endif + return false; // overflow + } + if (add_overflow(i, digit, &i)) { +#ifdef JSON_TEST_NUMBERS // for unit testing + found_invalid_number(buf + offset); +#endif + return false; // overflow + } + ++p; + } + } + if (negative) { + if (i > 0x8000000000000000) { + // overflows! +#ifdef JSON_TEST_NUMBERS // for unit testing + found_invalid_number(buf + offset); +#endif + return false; // overflow + } else if (i == 0x8000000000000000) { + // In two's complement, we cannot represent 0x8000000000000000 + // as a positive signed integer, but the negative version is + // possible. + constexpr int64_t signed_answer = INT64_MIN; + pj.write_tape_s64(signed_answer); +#ifdef JSON_TEST_NUMBERS // for unit testing + found_integer(signed_answer, buf + offset); +#endif + } else { + // we can negate safely + int64_t signed_answer = -static_cast(i); + pj.write_tape_s64(signed_answer); +#ifdef JSON_TEST_NUMBERS // for unit testing + found_integer(signed_answer, buf + offset); +#endif + } + } else { + // we have a positive integer, the contract is that + // we try to represent it as a signed integer and only + // fallback on unsigned integers if absolutely necessary. + if(i < 0x8000000000000000) { +#ifdef JSON_TEST_NUMBERS // for unit testing + found_integer(i, buf + offset); +#endif + pj.write_tape_s64(i); + } else { +#ifdef JSON_TEST_NUMBERS // for unit testing + found_unsigned_integer(i, buf + offset); +#endif + pj.write_tape_u64(i); + } + } + return is_structural_or_whitespace(*p); +} + +// parse the number at buf + offset +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0 or 1) at high speed. +static really_inline bool parse_number(const uint8_t *const buf, ParsedJson &pj, + const uint32_t offset, + bool found_minus) { +#ifdef SIMDJSON_SKIPNUMBERPARSING // for performance analysis, it is sometimes + // useful to skip parsing + pj.write_tape_s64(0); // always write zero + return true; // always succeeds +#else + const char *p = reinterpret_cast(buf + offset); + bool negative = false; + if (found_minus) { + ++p; + negative = true; + if (!is_integer(*p)) { // a negative sign must be followed by an integer +#ifdef JSON_TEST_NUMBERS // for unit testing + found_invalid_number(buf + offset); +#endif + return false; + } + } + const char *const start_digits = p; + + uint64_t i; // an unsigned int avoids signed overflows (which are bad) + if (*p == '0') { // 0 cannot be followed by an integer + ++p; + if (is_not_structural_or_whitespace_or_exponent_or_decimal(*p)) { +#ifdef JSON_TEST_NUMBERS // for unit testing + found_invalid_number(buf + offset); +#endif + return false; + } + i = 0; + } else { + if (!(is_integer(*p))) { // must start with an integer +#ifdef JSON_TEST_NUMBERS // for unit testing + found_invalid_number(buf + offset); +#endif + return false; + } + unsigned char digit = *p - '0'; + i = digit; + p++; + // the is_made_of_eight_digits_fast routine is unlikely to help here because + // we rarely see large integer parts like 123456789 + while (is_integer(*p)) { + digit = *p - '0'; + // a multiplication by 10 is cheaper than an arbitrary integer + // multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + ++p; + } + } + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; // At this point we know that we have a float + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + ++p; + const char *const first_after_period = p; + if (is_integer(*p)) { + unsigned char digit = *p - '0'; + ++p; + i = i * 10 + digit; // might overflow + multiplication by 10 is likely + // cheaper than arbitrary mult. + // we will handle the overflow later + } else { +#ifdef JSON_TEST_NUMBERS // for unit testing + found_invalid_number(buf + offset); +#endif + return false; + } +#ifdef SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif + while (is_integer(*p)) { + unsigned char digit = *p - '0'; + ++p; + i = i * 10 + digit; // in rare cases, this will overflow, but that's ok + // because we have parse_highprecision_float later. + } + exponent = first_after_period - p; + } + int digit_count = + p - start_digits - 1; // used later to guard against overflows + int64_t exp_number = 0; // exponential part + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + bool neg_exp = false; + if ('-' == *p) { + neg_exp = true; + ++p; + } else if ('+' == *p) { + ++p; + } + if (!is_integer(*p)) { +#ifdef JSON_TEST_NUMBERS // for unit testing + found_invalid_number(buf + offset); +#endif + return false; + } + unsigned char digit = *p - '0'; + exp_number = digit; + p++; + if (is_integer(*p)) { + digit = *p - '0'; + exp_number = 10 * exp_number + digit; + ++p; + } + if (is_integer(*p)) { + digit = *p - '0'; + exp_number = 10 * exp_number + digit; + ++p; + } + while (is_integer(*p)) { + if (exp_number > 0x100000000) { // we need to check for overflows + // we refuse to parse this +#ifdef JSON_TEST_NUMBERS // for unit testing + found_invalid_number(buf + offset); +#endif + return false; + } + digit = *p - '0'; + exp_number = 10 * exp_number + digit; + ++p; + } + exponent += (neg_exp ? -exp_number : exp_number); + } + if (is_float) { + uint64_t power_index = 308 + exponent; + if (unlikely((digit_count >= 19))) { // this is uncommon + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const char *start = start_digits; + while ((*start == '0') || (*start == '.')) { + start++; + } + // we over-decrement by one when there is a '.' + digit_count -= (start - start_digits); + if (digit_count >= 19) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + return parse_float(buf, pj, offset, found_minus); + } + } + if (unlikely((power_index > 2 * 308))) { // this is uncommon!!! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + return parse_float(buf, pj, offset, found_minus); + } + double factor = power_of_ten[power_index]; + factor = negative ? -factor : factor; + double d = i * factor; + pj.write_tape_double(d); +#ifdef JSON_TEST_NUMBERS // for unit testing + found_float(d, buf + offset); +#endif + } else { + if (unlikely(digit_count >= 18)) { // this is uncommon!!! + // there is a good chance that we had an overflow, so we need + // need to recover: we parse the whole thing again. + return parse_large_integer(buf, pj, offset, found_minus); + } + i = negative ? 0 - i : i; + pj.write_tape_s64(i); +#ifdef JSON_TEST_NUMBERS // for unit testing + found_integer(i, buf + offset); +#endif + } + return is_structural_or_whitespace(*p); +#endif // SIMDJSON_SKIPNUMBERPARSING +} + +/* end file src/generic/numberparsing.h */ + +} // namespace simdjson::haswell +UNTARGET_REGION + + + + +#endif // IS_X86_64 + + +#endif // SIMDJSON_HASWELL_NUMBERPARSING_H +/* end file src/generic/numberparsing.h */ + +TARGET_HASWELL +namespace simdjson::haswell { + +/* begin file src/generic/stage2_build_tape.h */ // This file contains the common code every implementation uses for stage2 // It is intended to be included multiple times and compiled multiple times // We assume the file in which it is include already includes @@ -8313,6 +7567,8 @@ error: } } // namespace stage2 +/* end file src/generic/stage2_build_tape.h */ +/* begin file src/generic/stage2_streaming_build_tape.h */ namespace stage2 { struct streaming_structural_parser: structural_parser { @@ -8467,6 +7723,7 @@ error: } } // namespace stage2 +/* end file src/generic/stage2_streaming_build_tape.h */ } // namespace simdjson::haswell UNTARGET_REGION @@ -8492,7 +7749,7 @@ UNTARGET_REGION #endif // IS_X86_64 #endif // SIMDJSON_HASWELL_STAGE2_BUILD_TAPE_H -/* end file src/haswell/stage2_build_tape.h */ +/* end file src/generic/stage2_streaming_build_tape.h */ /* begin file src/westmere/stage2_build_tape.h */ #ifndef SIMDJSON_WESTMERE_STAGE2_BUILD_TAPE_H #define SIMDJSON_WESTMERE_STAGE2_BUILD_TAPE_H @@ -8500,10 +7757,817 @@ UNTARGET_REGION #ifdef IS_X86_64 +/* begin file src/westmere/stringparsing.h */ +#ifndef SIMDJSON_WESTMERE_STRINGPARSING_H +#define SIMDJSON_WESTMERE_STRINGPARSING_H + + +#ifdef IS_X86_64 + +/* westmere/simd.h already included: #include "westmere/simd.h" */ +/* jsoncharutils.h already included: #include "jsoncharutils.h" */ +/* westmere/intrinsics.h already included: #include "westmere/intrinsics.h" */ +/* westmere/bitmanipulation.h already included: #include "westmere/bitmanipulation.h" */ TARGET_WESTMERE namespace simdjson::westmere { +using namespace simd; + +// Holds backslashes and quotes locations. +struct parse_string_helper { + uint32_t bs_bits; + uint32_t quote_bits; + static const uint32_t BYTES_PROCESSED = 32; +}; + +really_inline parse_string_helper find_bs_bits_and_quote_bits(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (parse_string_helper::BYTES_PROCESSED - 1)); + simd8 v0(src); + simd8 v1(src + 16); + v0.store(dst); + v1.store(dst + 16); + uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + static_cast(bs_and_quote), // bs_bits + static_cast(bs_and_quote >> 32) // quote_bits + }; +} + +/* begin file src/generic/stringparsing.h */ +// This file contains the common code every implementation uses +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is include already includes +// "stringparsing.h" (this simplifies amalgation) + +// begin copypasta +// These chars yield themselves: " \ / +// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab +// u not handled in this table as it's complex +static const uint8_t escape_map[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. + 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. + 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +// handle a unicode codepoint +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +WARN_UNUSED +really_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, + uint8_t **dst_ptr) { + // hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion isn't valid; we defer the check for this to inside the + // multilingual plane check + uint32_t code_point = hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + // check for low surrogate for characters outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + if (((*src_ptr)[0] != '\\') || (*src_ptr)[1] != 'u') { + return false; + } + uint32_t code_point_2 = hex_to_u32_nocheck(*src_ptr + 2); + + // if the first code point is invalid we will get here, as we will go past + // the check for being outside the Basic Multilingual plane. If we don't + // find a \u immediately afterwards we fail out anyhow, but if we do, + // this check catches both the case of the first code point being invalid + // or the second code point being invalid. + if ((code_point | code_point_2) >> 16) { + return false; + } + + code_point = + (((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000; + *src_ptr += 6; + } + size_t offset = codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + +WARN_UNUSED really_inline bool parse_string(UNUSED const uint8_t *buf, + UNUSED size_t len, ParsedJson &pj, + UNUSED const uint32_t depth, + UNUSED uint32_t offset) { + pj.write_tape(pj.current_string_buf_loc - pj.string_buf.get(), '"'); + const uint8_t *src = &buf[offset + 1]; /* we know that buf at offset is a " */ + uint8_t *dst = pj.current_string_buf_loc + sizeof(uint32_t); + const uint8_t *const start_of_string = dst; + while (1) { + parse_string_helper helper = find_bs_bits_and_quote_bits(src, dst); + if (((helper.bs_bits - 1) & helper.quote_bits) != 0) { + /* we encountered quotes first. Move dst to point to quotes and exit + */ + + /* find out where the quote is... */ + auto quote_dist = trailing_zeroes(helper.quote_bits); + + /* NULL termination is still handy if you expect all your strings to + * be NULL terminated? */ + /* It comes at a small cost */ + dst[quote_dist] = 0; + + uint32_t str_length = (dst - start_of_string) + quote_dist; + memcpy(pj.current_string_buf_loc, &str_length, sizeof(str_length)); + /***************************** + * Above, check for overflow in case someone has a crazy string + * (>=4GB?) _ + * But only add the overflow check when the document itself exceeds + * 4GB + * Currently unneeded because we refuse to parse docs larger or equal + * to 4GB. + ****************************/ + + /* we advance the point, accounting for the fact that we have a NULL + * termination */ + pj.current_string_buf_loc = dst + quote_dist + 1; + return true; + } + if (((helper.quote_bits - 1) & helper.bs_bits) != 0) { + /* find out where the backspace is */ + auto bs_dist = trailing_zeroes(helper.bs_bits); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint(&src, &dst)) { + return false; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return false; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += parse_string_helper::BYTES_PROCESSED; + dst += parse_string_helper::BYTES_PROCESSED; + } + } + /* can't be reached */ + return true; +} +/* end file src/generic/stringparsing.h */ + +} // namespace simdjson::westmere +UNTARGET_REGION + +#endif // IS_X86_64 + +#endif +/* end file src/generic/stringparsing.h */ +/* begin file src/westmere/numberparsing.h */ +#ifndef SIMDJSON_WESTMERE_NUMBERPARSING_H +#define SIMDJSON_WESTMERE_NUMBERPARSING_H + +#ifdef IS_X86_64 + +/* westmere/intrinsics.h already included: #include "westmere/intrinsics.h" */ +/* westmere/intrinsics.h already included: #include "westmere/intrinsics.h" */ +/* westmere/bitmanipulation.h already included: #include "westmere/bitmanipulation.h" */ +/* jsoncharutils.h already included: #include "jsoncharutils.h" */ +#include +#include + + +#ifdef JSON_TEST_NUMBERS // for unit testing +void found_invalid_number(const uint8_t *buf); +void found_integer(int64_t result, const uint8_t *buf); +void found_unsigned_integer(uint64_t result, const uint8_t *buf); +void found_float(double result, const uint8_t *buf); +#endif + + +TARGET_WESTMERE +namespace simdjson::westmere { +static inline uint32_t parse_eight_digits_unrolled(const char *chars) { + // this actually computes *16* values so we are being wasteful. + const __m128i ascii0 = _mm_set1_epi8('0'); + const __m128i mul_1_10 = + _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); + const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); + const __m128i mul_1_10000 = + _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); + const __m128i input = _mm_sub_epi8( + _mm_loadu_si128(reinterpret_cast(chars)), ascii0); + const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); + const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); + const __m128i t3 = _mm_packus_epi32(t2, t2); + const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); + return _mm_cvtsi128_si32( + t4); // only captures the sum of the first 8 digits, drop the rest +} + +#define SWAR_NUMBER_PARSING + +/* begin file src/generic/numberparsing.h */ + +// Allowable floating-point values range +// std::numeric_limits::lowest() to std::numeric_limits::max(), +// so from -1.7976e308 all the way to 1.7975e308 in binary64. The lowest +// non-zero normal values is std::numeric_limits::min() or +// about 2.225074e-308. +static const double power_of_ten[] = { + 1e-308, 1e-307, 1e-306, 1e-305, 1e-304, 1e-303, 1e-302, 1e-301, 1e-300, + 1e-299, 1e-298, 1e-297, 1e-296, 1e-295, 1e-294, 1e-293, 1e-292, 1e-291, + 1e-290, 1e-289, 1e-288, 1e-287, 1e-286, 1e-285, 1e-284, 1e-283, 1e-282, + 1e-281, 1e-280, 1e-279, 1e-278, 1e-277, 1e-276, 1e-275, 1e-274, 1e-273, + 1e-272, 1e-271, 1e-270, 1e-269, 1e-268, 1e-267, 1e-266, 1e-265, 1e-264, + 1e-263, 1e-262, 1e-261, 1e-260, 1e-259, 1e-258, 1e-257, 1e-256, 1e-255, + 1e-254, 1e-253, 1e-252, 1e-251, 1e-250, 1e-249, 1e-248, 1e-247, 1e-246, + 1e-245, 1e-244, 1e-243, 1e-242, 1e-241, 1e-240, 1e-239, 1e-238, 1e-237, + 1e-236, 1e-235, 1e-234, 1e-233, 1e-232, 1e-231, 1e-230, 1e-229, 1e-228, + 1e-227, 1e-226, 1e-225, 1e-224, 1e-223, 1e-222, 1e-221, 1e-220, 1e-219, + 1e-218, 1e-217, 1e-216, 1e-215, 1e-214, 1e-213, 1e-212, 1e-211, 1e-210, + 1e-209, 1e-208, 1e-207, 1e-206, 1e-205, 1e-204, 1e-203, 1e-202, 1e-201, + 1e-200, 1e-199, 1e-198, 1e-197, 1e-196, 1e-195, 1e-194, 1e-193, 1e-192, + 1e-191, 1e-190, 1e-189, 1e-188, 1e-187, 1e-186, 1e-185, 1e-184, 1e-183, + 1e-182, 1e-181, 1e-180, 1e-179, 1e-178, 1e-177, 1e-176, 1e-175, 1e-174, + 1e-173, 1e-172, 1e-171, 1e-170, 1e-169, 1e-168, 1e-167, 1e-166, 1e-165, + 1e-164, 1e-163, 1e-162, 1e-161, 1e-160, 1e-159, 1e-158, 1e-157, 1e-156, + 1e-155, 1e-154, 1e-153, 1e-152, 1e-151, 1e-150, 1e-149, 1e-148, 1e-147, + 1e-146, 1e-145, 1e-144, 1e-143, 1e-142, 1e-141, 1e-140, 1e-139, 1e-138, + 1e-137, 1e-136, 1e-135, 1e-134, 1e-133, 1e-132, 1e-131, 1e-130, 1e-129, + 1e-128, 1e-127, 1e-126, 1e-125, 1e-124, 1e-123, 1e-122, 1e-121, 1e-120, + 1e-119, 1e-118, 1e-117, 1e-116, 1e-115, 1e-114, 1e-113, 1e-112, 1e-111, + 1e-110, 1e-109, 1e-108, 1e-107, 1e-106, 1e-105, 1e-104, 1e-103, 1e-102, + 1e-101, 1e-100, 1e-99, 1e-98, 1e-97, 1e-96, 1e-95, 1e-94, 1e-93, + 1e-92, 1e-91, 1e-90, 1e-89, 1e-88, 1e-87, 1e-86, 1e-85, 1e-84, + 1e-83, 1e-82, 1e-81, 1e-80, 1e-79, 1e-78, 1e-77, 1e-76, 1e-75, + 1e-74, 1e-73, 1e-72, 1e-71, 1e-70, 1e-69, 1e-68, 1e-67, 1e-66, + 1e-65, 1e-64, 1e-63, 1e-62, 1e-61, 1e-60, 1e-59, 1e-58, 1e-57, + 1e-56, 1e-55, 1e-54, 1e-53, 1e-52, 1e-51, 1e-50, 1e-49, 1e-48, + 1e-47, 1e-46, 1e-45, 1e-44, 1e-43, 1e-42, 1e-41, 1e-40, 1e-39, + 1e-38, 1e-37, 1e-36, 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, + 1e-29, 1e-28, 1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, + 1e-20, 1e-19, 1e-18, 1e-17, 1e-16, 1e-15, 1e-14, 1e-13, 1e-12, + 1e-11, 1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, + 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, + 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, + 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22, 1e23, 1e24, + 1e25, 1e26, 1e27, 1e28, 1e29, 1e30, 1e31, 1e32, 1e33, + 1e34, 1e35, 1e36, 1e37, 1e38, 1e39, 1e40, 1e41, 1e42, + 1e43, 1e44, 1e45, 1e46, 1e47, 1e48, 1e49, 1e50, 1e51, + 1e52, 1e53, 1e54, 1e55, 1e56, 1e57, 1e58, 1e59, 1e60, + 1e61, 1e62, 1e63, 1e64, 1e65, 1e66, 1e67, 1e68, 1e69, + 1e70, 1e71, 1e72, 1e73, 1e74, 1e75, 1e76, 1e77, 1e78, + 1e79, 1e80, 1e81, 1e82, 1e83, 1e84, 1e85, 1e86, 1e87, + 1e88, 1e89, 1e90, 1e91, 1e92, 1e93, 1e94, 1e95, 1e96, + 1e97, 1e98, 1e99, 1e100, 1e101, 1e102, 1e103, 1e104, 1e105, + 1e106, 1e107, 1e108, 1e109, 1e110, 1e111, 1e112, 1e113, 1e114, + 1e115, 1e116, 1e117, 1e118, 1e119, 1e120, 1e121, 1e122, 1e123, + 1e124, 1e125, 1e126, 1e127, 1e128, 1e129, 1e130, 1e131, 1e132, + 1e133, 1e134, 1e135, 1e136, 1e137, 1e138, 1e139, 1e140, 1e141, + 1e142, 1e143, 1e144, 1e145, 1e146, 1e147, 1e148, 1e149, 1e150, + 1e151, 1e152, 1e153, 1e154, 1e155, 1e156, 1e157, 1e158, 1e159, + 1e160, 1e161, 1e162, 1e163, 1e164, 1e165, 1e166, 1e167, 1e168, + 1e169, 1e170, 1e171, 1e172, 1e173, 1e174, 1e175, 1e176, 1e177, + 1e178, 1e179, 1e180, 1e181, 1e182, 1e183, 1e184, 1e185, 1e186, + 1e187, 1e188, 1e189, 1e190, 1e191, 1e192, 1e193, 1e194, 1e195, + 1e196, 1e197, 1e198, 1e199, 1e200, 1e201, 1e202, 1e203, 1e204, + 1e205, 1e206, 1e207, 1e208, 1e209, 1e210, 1e211, 1e212, 1e213, + 1e214, 1e215, 1e216, 1e217, 1e218, 1e219, 1e220, 1e221, 1e222, + 1e223, 1e224, 1e225, 1e226, 1e227, 1e228, 1e229, 1e230, 1e231, + 1e232, 1e233, 1e234, 1e235, 1e236, 1e237, 1e238, 1e239, 1e240, + 1e241, 1e242, 1e243, 1e244, 1e245, 1e246, 1e247, 1e248, 1e249, + 1e250, 1e251, 1e252, 1e253, 1e254, 1e255, 1e256, 1e257, 1e258, + 1e259, 1e260, 1e261, 1e262, 1e263, 1e264, 1e265, 1e266, 1e267, + 1e268, 1e269, 1e270, 1e271, 1e272, 1e273, 1e274, 1e275, 1e276, + 1e277, 1e278, 1e279, 1e280, 1e281, 1e282, 1e283, 1e284, 1e285, + 1e286, 1e287, 1e288, 1e289, 1e290, 1e291, 1e292, 1e293, 1e294, + 1e295, 1e296, 1e297, 1e298, 1e299, 1e300, 1e301, 1e302, 1e303, + 1e304, 1e305, 1e306, 1e307, 1e308}; + +static inline bool is_integer(char c) { + return (c >= '0' && c <= '9'); + // this gets compiled to (uint8_t)(c - '0') <= 9 on all decent compilers +} + +// We need to check that the character following a zero is valid. This is +// probably frequent and it is hard than it looks. We are building all of this +// just to differentiate between 0x1 (invalid), 0,1 (valid) 0e1 (valid)... +const bool structural_or_whitespace_or_exponent_or_decimal_negated[256] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, + 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + +really_inline bool +is_not_structural_or_whitespace_or_exponent_or_decimal(unsigned char c) { + return structural_or_whitespace_or_exponent_or_decimal_negated[c]; +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +static inline bool is_made_of_eight_digits_fast(const char *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING); + memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + + +// +// This function computes base * 10 ^ (- negative_exponent ). +// It is only even going to be used when negative_exponent is tiny. +static double subnormal_power10(double base, int64_t negative_exponent) { + // avoid integer overflows in the pow expression, those values would + // become zero anyway. + if(negative_exponent < -1000) { + return 0; + } + + // this is probably not going to be fast + return base * 1e-308 * pow(10, negative_exponent + 308); +} + +// called by parse_number when we know that the output is a float, +// but where there might be some integer overflow. The trick here is to +// parse using floats from the start. +// Do not call this function directly as it skips some of the checks from +// parse_number +// +// This function will almost never be called!!! +// +// Note: a redesign could avoid this function entirely. +// +static never_inline bool parse_float(const uint8_t *const buf, ParsedJson &pj, + const uint32_t offset, bool found_minus) { + const char *p = reinterpret_cast(buf + offset); + bool negative = false; + if (found_minus) { + ++p; + negative = true; + } + long double i; + if (*p == '0') { // 0 cannot be followed by an integer + ++p; + i = 0; + } else { + unsigned char digit = *p - '0'; + i = digit; + p++; + while (is_integer(*p)) { + digit = *p - '0'; + i = 10 * i + digit; + ++p; + } + } + if ('.' == *p) { + ++p; + int fractional_weight = 308; + if (is_integer(*p)) { + unsigned char digit = *p - '0'; + ++p; + + fractional_weight--; + i = i + digit * (fractional_weight >= 0 ? power_of_ten[fractional_weight] + : 0); + } else { +#ifdef JSON_TEST_NUMBERS // for unit testing + found_invalid_number(buf + offset); +#endif + return false; + } + while (is_integer(*p)) { + unsigned char digit = *p - '0'; + ++p; + fractional_weight--; + i = i + digit * (fractional_weight >= 0 ? power_of_ten[fractional_weight] + : 0); + } + } + if (('e' == *p) || ('E' == *p)) { + ++p; + bool neg_exp = false; + if ('-' == *p) { + neg_exp = true; + ++p; + } else if ('+' == *p) { + ++p; + } + if (!is_integer(*p)) { +#ifdef JSON_TEST_NUMBERS // for unit testing + found_invalid_number(buf + offset); +#endif + return false; + } + unsigned char digit = *p - '0'; + int64_t exp_number = digit; // exponential part + p++; + if (is_integer(*p)) { + digit = *p - '0'; + exp_number = 10 * exp_number + digit; + ++p; + } + if (is_integer(*p)) { + digit = *p - '0'; + exp_number = 10 * exp_number + digit; + ++p; + } + if (is_integer(*p)) { + digit = *p - '0'; + exp_number = 10 * exp_number + digit; + ++p; + } + while (is_integer(*p)) { + if (exp_number > 0x100000000) { // we need to check for overflows +// we refuse to parse this +#ifdef JSON_TEST_NUMBERS // for unit testing + found_invalid_number(buf + offset); +#endif + return false; + } + digit = *p - '0'; + exp_number = 10 * exp_number + digit; + ++p; + } + if (unlikely(exp_number > 308)) { + // this path is unlikely + if (neg_exp) { + // We either have zero or a subnormal. + // We expect this to be uncommon so we go through a slow path. + i = subnormal_power10(i, -exp_number); + } else { +// We know for sure that we have a number that is too large, +// we refuse to parse this +#ifdef JSON_TEST_NUMBERS // for unit testing + found_invalid_number(buf + offset); +#endif + return false; + } + } else { + int exponent = (neg_exp ? -exp_number : exp_number); + // we have that exp_number is [0,308] so that + // exponent is [-308,308] so that + // 308 + exponent is in [0, 2 * 308] + i *= power_of_ten[308 + exponent]; + } + } + if (is_not_structural_or_whitespace(*p)) { + return false; + } + // check that we can go from long double to double safely. + if(i > std::numeric_limits::max()) { +#ifdef JSON_TEST_NUMBERS // for unit testing + found_invalid_number(buf + offset); +#endif + return false; + } + double d = negative ? -i : i; + pj.write_tape_double(d); +#ifdef JSON_TEST_NUMBERS // for unit testing + found_float(d, buf + offset); +#endif + return is_structural_or_whitespace(*p); +} + +// called by parse_number when we know that the output is an integer, +// but where there might be some integer overflow. +// we want to catch overflows! +// Do not call this function directly as it skips some of the checks from +// parse_number +// +// This function will almost never be called!!! +// +static never_inline bool parse_large_integer(const uint8_t *const buf, + ParsedJson &pj, + const uint32_t offset, + bool found_minus) { + const char *p = reinterpret_cast(buf + offset); + + bool negative = false; + if (found_minus) { + ++p; + negative = true; + } + uint64_t i; + if (*p == '0') { // 0 cannot be followed by an integer + ++p; + i = 0; + } else { + unsigned char digit = *p - '0'; + i = digit; + p++; + // the is_made_of_eight_digits_fast routine is unlikely to help here because + // we rarely see large integer parts like 123456789 + while (is_integer(*p)) { + digit = *p - '0'; + if (mul_overflow(i, 10, &i)) { +#ifdef JSON_TEST_NUMBERS // for unit testing + found_invalid_number(buf + offset); +#endif + return false; // overflow + } + if (add_overflow(i, digit, &i)) { +#ifdef JSON_TEST_NUMBERS // for unit testing + found_invalid_number(buf + offset); +#endif + return false; // overflow + } + ++p; + } + } + if (negative) { + if (i > 0x8000000000000000) { + // overflows! +#ifdef JSON_TEST_NUMBERS // for unit testing + found_invalid_number(buf + offset); +#endif + return false; // overflow + } else if (i == 0x8000000000000000) { + // In two's complement, we cannot represent 0x8000000000000000 + // as a positive signed integer, but the negative version is + // possible. + constexpr int64_t signed_answer = INT64_MIN; + pj.write_tape_s64(signed_answer); +#ifdef JSON_TEST_NUMBERS // for unit testing + found_integer(signed_answer, buf + offset); +#endif + } else { + // we can negate safely + int64_t signed_answer = -static_cast(i); + pj.write_tape_s64(signed_answer); +#ifdef JSON_TEST_NUMBERS // for unit testing + found_integer(signed_answer, buf + offset); +#endif + } + } else { + // we have a positive integer, the contract is that + // we try to represent it as a signed integer and only + // fallback on unsigned integers if absolutely necessary. + if(i < 0x8000000000000000) { +#ifdef JSON_TEST_NUMBERS // for unit testing + found_integer(i, buf + offset); +#endif + pj.write_tape_s64(i); + } else { +#ifdef JSON_TEST_NUMBERS // for unit testing + found_unsigned_integer(i, buf + offset); +#endif + pj.write_tape_u64(i); + } + } + return is_structural_or_whitespace(*p); +} + +// parse the number at buf + offset +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0 or 1) at high speed. +static really_inline bool parse_number(const uint8_t *const buf, ParsedJson &pj, + const uint32_t offset, + bool found_minus) { +#ifdef SIMDJSON_SKIPNUMBERPARSING // for performance analysis, it is sometimes + // useful to skip parsing + pj.write_tape_s64(0); // always write zero + return true; // always succeeds +#else + const char *p = reinterpret_cast(buf + offset); + bool negative = false; + if (found_minus) { + ++p; + negative = true; + if (!is_integer(*p)) { // a negative sign must be followed by an integer +#ifdef JSON_TEST_NUMBERS // for unit testing + found_invalid_number(buf + offset); +#endif + return false; + } + } + const char *const start_digits = p; + + uint64_t i; // an unsigned int avoids signed overflows (which are bad) + if (*p == '0') { // 0 cannot be followed by an integer + ++p; + if (is_not_structural_or_whitespace_or_exponent_or_decimal(*p)) { +#ifdef JSON_TEST_NUMBERS // for unit testing + found_invalid_number(buf + offset); +#endif + return false; + } + i = 0; + } else { + if (!(is_integer(*p))) { // must start with an integer +#ifdef JSON_TEST_NUMBERS // for unit testing + found_invalid_number(buf + offset); +#endif + return false; + } + unsigned char digit = *p - '0'; + i = digit; + p++; + // the is_made_of_eight_digits_fast routine is unlikely to help here because + // we rarely see large integer parts like 123456789 + while (is_integer(*p)) { + digit = *p - '0'; + // a multiplication by 10 is cheaper than an arbitrary integer + // multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + ++p; + } + } + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; // At this point we know that we have a float + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + ++p; + const char *const first_after_period = p; + if (is_integer(*p)) { + unsigned char digit = *p - '0'; + ++p; + i = i * 10 + digit; // might overflow + multiplication by 10 is likely + // cheaper than arbitrary mult. + // we will handle the overflow later + } else { +#ifdef JSON_TEST_NUMBERS // for unit testing + found_invalid_number(buf + offset); +#endif + return false; + } +#ifdef SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif + while (is_integer(*p)) { + unsigned char digit = *p - '0'; + ++p; + i = i * 10 + digit; // in rare cases, this will overflow, but that's ok + // because we have parse_highprecision_float later. + } + exponent = first_after_period - p; + } + int digit_count = + p - start_digits - 1; // used later to guard against overflows + int64_t exp_number = 0; // exponential part + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + bool neg_exp = false; + if ('-' == *p) { + neg_exp = true; + ++p; + } else if ('+' == *p) { + ++p; + } + if (!is_integer(*p)) { +#ifdef JSON_TEST_NUMBERS // for unit testing + found_invalid_number(buf + offset); +#endif + return false; + } + unsigned char digit = *p - '0'; + exp_number = digit; + p++; + if (is_integer(*p)) { + digit = *p - '0'; + exp_number = 10 * exp_number + digit; + ++p; + } + if (is_integer(*p)) { + digit = *p - '0'; + exp_number = 10 * exp_number + digit; + ++p; + } + while (is_integer(*p)) { + if (exp_number > 0x100000000) { // we need to check for overflows + // we refuse to parse this +#ifdef JSON_TEST_NUMBERS // for unit testing + found_invalid_number(buf + offset); +#endif + return false; + } + digit = *p - '0'; + exp_number = 10 * exp_number + digit; + ++p; + } + exponent += (neg_exp ? -exp_number : exp_number); + } + if (is_float) { + uint64_t power_index = 308 + exponent; + if (unlikely((digit_count >= 19))) { // this is uncommon + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const char *start = start_digits; + while ((*start == '0') || (*start == '.')) { + start++; + } + // we over-decrement by one when there is a '.' + digit_count -= (start - start_digits); + if (digit_count >= 19) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + return parse_float(buf, pj, offset, found_minus); + } + } + if (unlikely((power_index > 2 * 308))) { // this is uncommon!!! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + return parse_float(buf, pj, offset, found_minus); + } + double factor = power_of_ten[power_index]; + factor = negative ? -factor : factor; + double d = i * factor; + pj.write_tape_double(d); +#ifdef JSON_TEST_NUMBERS // for unit testing + found_float(d, buf + offset); +#endif + } else { + if (unlikely(digit_count >= 18)) { // this is uncommon!!! + // there is a good chance that we had an overflow, so we need + // need to recover: we parse the whole thing again. + return parse_large_integer(buf, pj, offset, found_minus); + } + i = negative ? 0 - i : i; + pj.write_tape_s64(i); +#ifdef JSON_TEST_NUMBERS // for unit testing + found_integer(i, buf + offset); +#endif + } + return is_structural_or_whitespace(*p); +#endif // SIMDJSON_SKIPNUMBERPARSING +} + +/* end file src/generic/numberparsing.h */ + +} // namespace simdjson::westmere +UNTARGET_REGION + + + +#endif // IS_X86_64 +#endif // SIMDJSON_WESTMERE_NUMBERPARSING_H +/* end file src/generic/numberparsing.h */ + +TARGET_WESTMERE +namespace simdjson::westmere { + +/* begin file src/generic/stage2_build_tape.h */ // This file contains the common code every implementation uses for stage2 // It is intended to be included multiple times and compiled multiple times // We assume the file in which it is include already includes @@ -8899,6 +8963,8 @@ error: } } // namespace stage2 +/* end file src/generic/stage2_build_tape.h */ +/* begin file src/generic/stage2_streaming_build_tape.h */ namespace stage2 { struct streaming_structural_parser: structural_parser { @@ -9053,6 +9119,7 @@ error: } } // namespace stage2 +/* end file src/generic/stage2_streaming_build_tape.h */ } // namespace simdjson::westmere UNTARGET_REGION @@ -9079,7 +9146,8 @@ UNTARGET_REGION #endif // IS_X86_64 #endif // SIMDJSON_WESTMERE_STAGE2_BUILD_TAPE_H -/* end file src/westmere/stage2_build_tape.h */ +/* end file src/generic/stage2_streaming_build_tape.h */ +/* end file src/generic/stage2_streaming_build_tape.h */ /* begin file src/parsedjson.cpp */ namespace simdjson { diff --git a/singleheader/simdjson.h b/singleheader/simdjson.h index 5018b213..45c83507 100755 --- a/singleheader/simdjson.h +++ b/singleheader/simdjson.h @@ -1,4 +1,4 @@ -/* auto-generated on Thu Jan 30 10:52:58 EST 2020. Do not edit! */ +/* auto-generated on Sun Feb 2 15:10:09 PST 2020. Do not edit! */ /* begin file include/simdjson/simdjson_version.h */ // /include/simdjson/simdjson_version.h automatically generated by release.py, // do not change by hand @@ -1904,6 +1904,14 @@ inline ParsedJson build_parsed_json(const padded_string &s) { } // namespace simdjson #endif /* end file include/simdjson/jsonparser.h */ +/* begin file include/simdjson/jsonstream.h */ +#ifndef SIMDJSON_JSONSTREAM_H +#define SIMDJSON_JSONSTREAM_H + +#include +#include +#include +#include /* begin file src/jsoncharutils.h */ #ifndef SIMDJSON_JSONCHARUTILS_H #define SIMDJSON_JSONCHARUTILS_H @@ -2229,12 +2237,6 @@ inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { #endif /* end file src/jsoncharutils.h */ -/* begin file include/simdjson/jsonstream.h */ -#ifndef SIMDJSON_JSONSTREAM_H -#define SIMDJSON_JSONSTREAM_H - -#include -#include namespace simdjson { @@ -2358,7 +2360,7 @@ private: inline size_t remaining() const { return str.size() - str_start; } const string_container &str; - size_t _batch_size; + size_t _batch_size; // this is actually variable! size_t str_start{0}; size_t next_json{0}; bool load_next_batch{true}; @@ -2534,7 +2536,7 @@ int JsonStream::json_parse(ParsedJson &pj) { if (unlikely(load_next_batch)) { // First time loading if (!stage_1_thread.joinable()) { - _batch_size = std::min(_batch_size, remaining()); + _batch_size = (std::min)(_batch_size, remaining()); _batch_size = trimmed_length_safe_utf8((const char *)buf(), _batch_size); if (_batch_size == 0) { pj.error_code = simdjson::UTF8_ERROR; @@ -2571,7 +2573,7 @@ int JsonStream::json_parse(ParsedJson &pj) { if (remaining() - _batch_size > 0) { last_json_buffer_loc = pj.structural_indexes[find_last_json_buf_idx(buf(), _batch_size, pj)]; - _batch_size = std::min(_batch_size, remaining() - last_json_buffer_loc); + _batch_size = (std::min)(_batch_size, remaining() - last_json_buffer_loc); if (_batch_size > 0) { _batch_size = trimmed_length_safe_utf8( (const char *)(buf() + last_json_buffer_loc), _batch_size); @@ -2627,7 +2629,7 @@ int JsonStream::json_parse(ParsedJson &pj) { if (unlikely(load_next_batch)) { advance(current_buffer_loc); n_bytes_parsed += current_buffer_loc; - _batch_size = std::min(_batch_size, remaining()); + _batch_size = (std::min)(_batch_size, remaining()); _batch_size = trimmed_length_safe_utf8((const char *)buf(), _batch_size); int stage1_is_ok = best_stage1(buf(), _batch_size, pj, true); if (stage1_is_ok != simdjson::SUCCESS) { @@ -2664,4 +2666,4 @@ int JsonStream::json_parse(ParsedJson &pj) { } // end of namespace simdjson #endif // SIMDJSON_JSONSTREAM_H -/* end file include/simdjson/jsonstream.h */ +/* end file src/jsoncharutils.h */