From bf67c967d65623a76bd06c1873a2aed42ec314b2 Mon Sep 17 00:00:00 2001 From: John Keiser Date: Thu, 16 Jul 2020 13:42:51 -0700 Subject: [PATCH] Inline jsoncharutils per-implementation --- src/arm64/dom_parser_implementation.cpp | 1 + src/fallback/dom_parser_implementation.cpp | 1 + src/generic/stage2/jsoncharutils.h | 134 ++++++++++++++++++ src/generic/stage2/numberparsing.h | 1 - src/generic/stage2/stringparsing.h | 2 - src/haswell/dom_parser_implementation.cpp | 1 + ...jsoncharutils.h => jsoncharutils_tables.h} | 118 +-------------- src/simdjson.cpp | 2 +- src/westmere/dom_parser_implementation.cpp | 2 + 9 files changed, 143 insertions(+), 119 deletions(-) create mode 100644 src/generic/stage2/jsoncharutils.h rename src/{jsoncharutils.h => jsoncharutils_tables.h} (92%) diff --git a/src/arm64/dom_parser_implementation.cpp b/src/arm64/dom_parser_implementation.cpp index 0dee412b..50063149 100644 --- a/src/arm64/dom_parser_implementation.cpp +++ b/src/arm64/dom_parser_implementation.cpp @@ -1,5 +1,6 @@ #include "arm64/begin_implementation.h" #include "arm64/dom_parser_implementation.h" +#include "generic/stage2/jsoncharutils.h" // // Stage 1 diff --git a/src/fallback/dom_parser_implementation.cpp b/src/fallback/dom_parser_implementation.cpp index 1df7d868..98034332 100644 --- a/src/fallback/dom_parser_implementation.cpp +++ b/src/fallback/dom_parser_implementation.cpp @@ -1,5 +1,6 @@ #include "fallback/begin_implementation.h" #include "fallback/dom_parser_implementation.h" +#include "generic/stage2/jsoncharutils.h" // // Stage 1 diff --git a/src/generic/stage2/jsoncharutils.h b/src/generic/stage2/jsoncharutils.h new file mode 100644 index 00000000..e5f3755a --- /dev/null +++ b/src/generic/stage2/jsoncharutils.h @@ -0,0 +1,134 @@ +namespace simdjson { +namespace SIMDJSON_IMPLEMENTATION { +namespace stage2 { + +// return non-zero if not a structural or whitespace char +// zero otherwise +really_inline uint32_t is_not_structural_or_whitespace_or_null(uint8_t c) { + return structural_or_whitespace_or_null_negated[c]; +} + +// return non-zero if not a structural or whitespace char +// zero otherwise +really_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return structural_or_whitespace_negated[c]; +} + +really_inline uint32_t is_structural_or_whitespace_or_null(uint8_t c) { + return structural_or_whitespace_or_null[c]; +} + +really_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = digit_to_val32[630 + src[0]]; + uint32_t v2 = digit_to_val32[420 + src[1]]; + uint32_t v3 = digit_to_val32[210 + src[2]]; + uint32_t v4 = digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +//// +// The following code is used in number parsing. It is not +// properly "char utils" stuff, but we move it here so that +// it does not get copied multiple times in the binaries (once +// per instruction set). +/// + +constexpr int FASTFLOAT_SMALLEST_POWER = -325; +constexpr int FASTFLOAT_LARGEST_POWER = 308; + +struct value128 { + uint64_t low; + uint64_t high; +}; + +#ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +really_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { + value128 answer; +#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) +#ifdef _M_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // _M_ARM64 +#else // defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) + __uint128_t r = ((__uint128_t)value1) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace stage2 +} // namespace SIMDJSON_IMPLEMENTATION +} // namespace simdjson diff --git a/src/generic/stage2/numberparsing.h b/src/generic/stage2/numberparsing.h index 74904c9a..6f1eb4ad 100644 --- a/src/generic/stage2/numberparsing.h +++ b/src/generic/stage2/numberparsing.h @@ -1,4 +1,3 @@ -#include "jsoncharutils.h" #include #include diff --git a/src/generic/stage2/stringparsing.h b/src/generic/stage2/stringparsing.h index 179074b2..ad144d5d 100644 --- a/src/generic/stage2/stringparsing.h +++ b/src/generic/stage2/stringparsing.h @@ -1,8 +1,6 @@ // This file contains the common code every implementation uses // It is intended to be included multiple times and compiled multiple times -#include "jsoncharutils.h" - namespace simdjson { namespace SIMDJSON_IMPLEMENTATION { namespace stage2 { diff --git a/src/haswell/dom_parser_implementation.cpp b/src/haswell/dom_parser_implementation.cpp index 05bced90..345b0150 100644 --- a/src/haswell/dom_parser_implementation.cpp +++ b/src/haswell/dom_parser_implementation.cpp @@ -1,5 +1,6 @@ #include "haswell/begin_implementation.h" #include "haswell/dom_parser_implementation.h" +#include "generic/stage2/jsoncharutils.h" // // Stage 1 diff --git a/src/jsoncharutils.h b/src/jsoncharutils_tables.h similarity index 92% rename from src/jsoncharutils.h rename to src/jsoncharutils_tables.h index 4380db8a..365ef415 100644 --- a/src/jsoncharutils.h +++ b/src/jsoncharutils_tables.h @@ -1,5 +1,5 @@ -#ifndef SIMDJSON_JSONCHARUTILS_H -#define SIMDJSON_JSONCHARUTILS_H +#ifndef SIMDJSON_JSONCHARUTILS_TABLES_H +#define SIMDJSON_JSONCHARUTILS_TABLES_H #include "simdjson.h" @@ -34,12 +34,6 @@ const uint32_t structural_or_whitespace_or_null_negated[256] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; -// return non-zero if not a structural or whitespace char -// zero otherwise -really_inline uint32_t is_not_structural_or_whitespace_or_null(uint8_t c) { - return structural_or_whitespace_or_null_negated[c]; -} - const uint32_t structural_or_whitespace_negated[256] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -57,12 +51,6 @@ const uint32_t structural_or_whitespace_negated[256] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; -// return non-zero if not a structural or whitespace char -// zero otherwise -really_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { - return structural_or_whitespace_negated[c]; -} - const uint32_t structural_or_whitespace_or_null[256] = { 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, @@ -76,10 +64,6 @@ const uint32_t structural_or_whitespace_or_null[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; -really_inline uint32_t is_structural_or_whitespace_or_null(uint8_t c) { - return structural_or_whitespace_or_null[c]; -} - const uint32_t structural_or_whitespace[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, @@ -93,10 +77,6 @@ const uint32_t structural_or_whitespace[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; -really_inline uint32_t is_structural_or_whitespace(uint8_t c) { - return structural_or_whitespace[c]; -} - const uint32_t digit_to_val32[886] = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, @@ -246,62 +226,6 @@ const uint32_t digit_to_val32[886] = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}; -// returns a value with the high 16 bits set if not valid -// otherwise returns the conversion of the 4 hex digits at src into the bottom -// 16 bits of the 32-bit return register -// -// see -// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ -static inline uint32_t hex_to_u32_nocheck( - const uint8_t *src) { // strictly speaking, static inline is a C-ism - uint32_t v1 = digit_to_val32[630 + src[0]]; - uint32_t v2 = digit_to_val32[420 + src[1]]; - uint32_t v3 = digit_to_val32[210 + src[2]]; - uint32_t v4 = digit_to_val32[0 + src[3]]; - return v1 | v2 | v3 | v4; -} - -// given a code point cp, writes to c -// the utf-8 code, outputting the length in -// bytes, if the length is zero, the code point -// is invalid -// -// This can possibly be made faster using pdep -// and clz and table lookups, but JSON documents -// have few escaped code points, and the following -// function looks cheap. -// -// Note: we assume that surrogates are treated separately -// -inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { - if (cp <= 0x7F) { - c[0] = uint8_t(cp); - return 1; // ascii - } - if (cp <= 0x7FF) { - c[0] = uint8_t((cp >> 6) + 192); - c[1] = uint8_t((cp & 63) + 128); - return 2; // universal plane - // Surrogates are treated elsewhere... - //} //else if (0xd800 <= cp && cp <= 0xdfff) { - // return 0; // surrogates // could put assert here - } else if (cp <= 0xFFFF) { - c[0] = uint8_t((cp >> 12) + 224); - c[1] = uint8_t(((cp >> 6) & 63) + 128); - c[2] = uint8_t((cp & 63) + 128); - return 3; - } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this - // is not needed - c[0] = uint8_t((cp >> 18) + 240); - c[1] = uint8_t(((cp >> 12) & 63) + 128); - c[2] = uint8_t(((cp >> 6) & 63) + 128); - c[3] = uint8_t((cp & 63) + 128); - return 4; - } - // will return 0 when the code point was too large. - return 0; // bad r -} - //// // The following code is used in number parsing. It is not // properly "char utils" stuff, but we move it here so that @@ -317,42 +241,6 @@ struct value128 { uint64_t high; }; -#ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm -// this is a slow emulation routine for 32-bit -// -static inline uint64_t __emulu(uint32_t x, uint32_t y) { - return x * (uint64_t)y; -} -static inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { - uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); - uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); - uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); - uint64_t adbc_carry = !!(adbc < ad); - uint64_t lo = bd + (adbc << 32); - *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + - (adbc_carry << 32) + !!(lo < bd); - return lo; -} -#endif - -really_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { - value128 answer; -#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) -#ifdef _M_ARM64 - // ARM64 has native support for 64-bit multiplications, no need to emultate - answer.high = __umulh(value1, value2); - answer.low = value1 * value2; -#else - answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 -#endif // _M_ARM64 -#else // defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) - __uint128_t r = ((__uint128_t)value1) * value2; - answer.low = uint64_t(r); - answer.high = uint64_t(r >> 64); -#endif - return answer; -} - // Precomputed powers of ten from 10^0 to 10^22. These // can be represented exactly using the double type. static const double power_of_ten[] = { @@ -1333,4 +1221,4 @@ const uint64_t mantissa_128[] = { } // namespace simdjson -#endif // SIMDJSON_JSONCHARUTILS_H +#endif // SIMDJSON_JSONCHARUTILS_TABLES_H diff --git a/src/simdjson.cpp b/src/simdjson.cpp index 84dc93a1..48af5147 100644 --- a/src/simdjson.cpp +++ b/src/simdjson.cpp @@ -9,7 +9,7 @@ SIMDJSON_DISABLE_UNDESIRED_WARNINGS // Anything in the top level directory MUST be included outside of the #if statements // below, or amalgamation will screw them up! #include "isadetection.h" -#include "jsoncharutils.h" +#include "jsoncharutils_tables.h" #include "simdprune_tables.h" #if SIMDJSON_IMPLEMENTATION_ARM64 diff --git a/src/westmere/dom_parser_implementation.cpp b/src/westmere/dom_parser_implementation.cpp index c4322546..32c0fdd0 100644 --- a/src/westmere/dom_parser_implementation.cpp +++ b/src/westmere/dom_parser_implementation.cpp @@ -1,4 +1,6 @@ #include "westmere/begin_implementation.h" +#include "westmere/dom_parser_implementation.h" +#include "generic/stage2/jsoncharutils.h" // // Stage 1