Inline jsoncharutils per-implementation

This commit is contained in:
John Keiser 2020-07-16 13:42:51 -07:00
parent 44b7a7145c
commit bf67c967d6
9 changed files with 143 additions and 119 deletions

View File

@ -1,5 +1,6 @@
#include "arm64/begin_implementation.h" #include "arm64/begin_implementation.h"
#include "arm64/dom_parser_implementation.h" #include "arm64/dom_parser_implementation.h"
#include "generic/stage2/jsoncharutils.h"
// //
// Stage 1 // Stage 1

View File

@ -1,5 +1,6 @@
#include "fallback/begin_implementation.h" #include "fallback/begin_implementation.h"
#include "fallback/dom_parser_implementation.h" #include "fallback/dom_parser_implementation.h"
#include "generic/stage2/jsoncharutils.h"
// //
// Stage 1 // Stage 1

View File

@ -0,0 +1,134 @@
namespace simdjson {
namespace SIMDJSON_IMPLEMENTATION {
namespace stage2 {
// return non-zero if not a structural or whitespace char
// zero otherwise
really_inline uint32_t is_not_structural_or_whitespace_or_null(uint8_t c) {
return structural_or_whitespace_or_null_negated[c];
}
// return non-zero if not a structural or whitespace char
// zero otherwise
really_inline uint32_t is_not_structural_or_whitespace(uint8_t c) {
return structural_or_whitespace_negated[c];
}
really_inline uint32_t is_structural_or_whitespace_or_null(uint8_t c) {
return structural_or_whitespace_or_null[c];
}
really_inline uint32_t is_structural_or_whitespace(uint8_t c) {
return structural_or_whitespace[c];
}
// returns a value with the high 16 bits set if not valid
// otherwise returns the conversion of the 4 hex digits at src into the bottom
// 16 bits of the 32-bit return register
//
// see
// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/
static inline uint32_t hex_to_u32_nocheck(
const uint8_t *src) { // strictly speaking, static inline is a C-ism
uint32_t v1 = digit_to_val32[630 + src[0]];
uint32_t v2 = digit_to_val32[420 + src[1]];
uint32_t v3 = digit_to_val32[210 + src[2]];
uint32_t v4 = digit_to_val32[0 + src[3]];
return v1 | v2 | v3 | v4;
}
// given a code point cp, writes to c
// the utf-8 code, outputting the length in
// bytes, if the length is zero, the code point
// is invalid
//
// This can possibly be made faster using pdep
// and clz and table lookups, but JSON documents
// have few escaped code points, and the following
// function looks cheap.
//
// Note: we assume that surrogates are treated separately
//
inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) {
if (cp <= 0x7F) {
c[0] = uint8_t(cp);
return 1; // ascii
}
if (cp <= 0x7FF) {
c[0] = uint8_t((cp >> 6) + 192);
c[1] = uint8_t((cp & 63) + 128);
return 2; // universal plane
// Surrogates are treated elsewhere...
//} //else if (0xd800 <= cp && cp <= 0xdfff) {
// return 0; // surrogates // could put assert here
} else if (cp <= 0xFFFF) {
c[0] = uint8_t((cp >> 12) + 224);
c[1] = uint8_t(((cp >> 6) & 63) + 128);
c[2] = uint8_t((cp & 63) + 128);
return 3;
} else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this
// is not needed
c[0] = uint8_t((cp >> 18) + 240);
c[1] = uint8_t(((cp >> 12) & 63) + 128);
c[2] = uint8_t(((cp >> 6) & 63) + 128);
c[3] = uint8_t((cp & 63) + 128);
return 4;
}
// will return 0 when the code point was too large.
return 0; // bad r
}
////
// The following code is used in number parsing. It is not
// properly "char utils" stuff, but we move it here so that
// it does not get copied multiple times in the binaries (once
// per instruction set).
///
constexpr int FASTFLOAT_SMALLEST_POWER = -325;
constexpr int FASTFLOAT_LARGEST_POWER = 308;
struct value128 {
uint64_t low;
uint64_t high;
};
#ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm
// this is a slow emulation routine for 32-bit
//
static inline uint64_t __emulu(uint32_t x, uint32_t y) {
return x * (uint64_t)y;
}
static inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) {
uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd);
uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd);
uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32));
uint64_t adbc_carry = !!(adbc < ad);
uint64_t lo = bd + (adbc << 32);
*hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) +
(adbc_carry << 32) + !!(lo < bd);
return lo;
}
#endif
really_inline value128 full_multiplication(uint64_t value1, uint64_t value2) {
value128 answer;
#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS)
#ifdef _M_ARM64
// ARM64 has native support for 64-bit multiplications, no need to emultate
answer.high = __umulh(value1, value2);
answer.low = value1 * value2;
#else
answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64
#endif // _M_ARM64
#else // defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS)
__uint128_t r = ((__uint128_t)value1) * value2;
answer.low = uint64_t(r);
answer.high = uint64_t(r >> 64);
#endif
return answer;
}
} // namespace stage2
} // namespace SIMDJSON_IMPLEMENTATION
} // namespace simdjson

View File

@ -1,4 +1,3 @@
#include "jsoncharutils.h"
#include <cmath> #include <cmath>
#include <limits> #include <limits>

View File

@ -1,8 +1,6 @@
// This file contains the common code every implementation uses // This file contains the common code every implementation uses
// It is intended to be included multiple times and compiled multiple times // It is intended to be included multiple times and compiled multiple times
#include "jsoncharutils.h"
namespace simdjson { namespace simdjson {
namespace SIMDJSON_IMPLEMENTATION { namespace SIMDJSON_IMPLEMENTATION {
namespace stage2 { namespace stage2 {

View File

@ -1,5 +1,6 @@
#include "haswell/begin_implementation.h" #include "haswell/begin_implementation.h"
#include "haswell/dom_parser_implementation.h" #include "haswell/dom_parser_implementation.h"
#include "generic/stage2/jsoncharutils.h"
// //
// Stage 1 // Stage 1

View File

@ -1,5 +1,5 @@
#ifndef SIMDJSON_JSONCHARUTILS_H #ifndef SIMDJSON_JSONCHARUTILS_TABLES_H
#define SIMDJSON_JSONCHARUTILS_H #define SIMDJSON_JSONCHARUTILS_TABLES_H
#include "simdjson.h" #include "simdjson.h"
@ -34,12 +34,6 @@ const uint32_t structural_or_whitespace_or_null_negated[256] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
// return non-zero if not a structural or whitespace char
// zero otherwise
really_inline uint32_t is_not_structural_or_whitespace_or_null(uint8_t c) {
return structural_or_whitespace_or_null_negated[c];
}
const uint32_t structural_or_whitespace_negated[256] = { const uint32_t structural_or_whitespace_negated[256] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@ -57,12 +51,6 @@ const uint32_t structural_or_whitespace_negated[256] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
// return non-zero if not a structural or whitespace char
// zero otherwise
really_inline uint32_t is_not_structural_or_whitespace(uint8_t c) {
return structural_or_whitespace_negated[c];
}
const uint32_t structural_or_whitespace_or_null[256] = { const uint32_t structural_or_whitespace_or_null[256] = {
1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
@ -76,10 +64,6 @@ const uint32_t structural_or_whitespace_or_null[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
really_inline uint32_t is_structural_or_whitespace_or_null(uint8_t c) {
return structural_or_whitespace_or_null[c];
}
const uint32_t structural_or_whitespace[256] = { const uint32_t structural_or_whitespace[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
@ -93,10 +77,6 @@ const uint32_t structural_or_whitespace[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
really_inline uint32_t is_structural_or_whitespace(uint8_t c) {
return structural_or_whitespace[c];
}
const uint32_t digit_to_val32[886] = { const uint32_t digit_to_val32[886] = {
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
@ -246,62 +226,6 @@ const uint32_t digit_to_val32[886] = {
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}; 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF};
// returns a value with the high 16 bits set if not valid
// otherwise returns the conversion of the 4 hex digits at src into the bottom
// 16 bits of the 32-bit return register
//
// see
// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/
static inline uint32_t hex_to_u32_nocheck(
const uint8_t *src) { // strictly speaking, static inline is a C-ism
uint32_t v1 = digit_to_val32[630 + src[0]];
uint32_t v2 = digit_to_val32[420 + src[1]];
uint32_t v3 = digit_to_val32[210 + src[2]];
uint32_t v4 = digit_to_val32[0 + src[3]];
return v1 | v2 | v3 | v4;
}
// given a code point cp, writes to c
// the utf-8 code, outputting the length in
// bytes, if the length is zero, the code point
// is invalid
//
// This can possibly be made faster using pdep
// and clz and table lookups, but JSON documents
// have few escaped code points, and the following
// function looks cheap.
//
// Note: we assume that surrogates are treated separately
//
inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) {
if (cp <= 0x7F) {
c[0] = uint8_t(cp);
return 1; // ascii
}
if (cp <= 0x7FF) {
c[0] = uint8_t((cp >> 6) + 192);
c[1] = uint8_t((cp & 63) + 128);
return 2; // universal plane
// Surrogates are treated elsewhere...
//} //else if (0xd800 <= cp && cp <= 0xdfff) {
// return 0; // surrogates // could put assert here
} else if (cp <= 0xFFFF) {
c[0] = uint8_t((cp >> 12) + 224);
c[1] = uint8_t(((cp >> 6) & 63) + 128);
c[2] = uint8_t((cp & 63) + 128);
return 3;
} else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this
// is not needed
c[0] = uint8_t((cp >> 18) + 240);
c[1] = uint8_t(((cp >> 12) & 63) + 128);
c[2] = uint8_t(((cp >> 6) & 63) + 128);
c[3] = uint8_t((cp & 63) + 128);
return 4;
}
// will return 0 when the code point was too large.
return 0; // bad r
}
//// ////
// The following code is used in number parsing. It is not // The following code is used in number parsing. It is not
// properly "char utils" stuff, but we move it here so that // properly "char utils" stuff, but we move it here so that
@ -317,42 +241,6 @@ struct value128 {
uint64_t high; uint64_t high;
}; };
#ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm
// this is a slow emulation routine for 32-bit
//
static inline uint64_t __emulu(uint32_t x, uint32_t y) {
return x * (uint64_t)y;
}
static inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) {
uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd);
uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd);
uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32));
uint64_t adbc_carry = !!(adbc < ad);
uint64_t lo = bd + (adbc << 32);
*hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) +
(adbc_carry << 32) + !!(lo < bd);
return lo;
}
#endif
really_inline value128 full_multiplication(uint64_t value1, uint64_t value2) {
value128 answer;
#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS)
#ifdef _M_ARM64
// ARM64 has native support for 64-bit multiplications, no need to emultate
answer.high = __umulh(value1, value2);
answer.low = value1 * value2;
#else
answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64
#endif // _M_ARM64
#else // defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS)
__uint128_t r = ((__uint128_t)value1) * value2;
answer.low = uint64_t(r);
answer.high = uint64_t(r >> 64);
#endif
return answer;
}
// Precomputed powers of ten from 10^0 to 10^22. These // Precomputed powers of ten from 10^0 to 10^22. These
// can be represented exactly using the double type. // can be represented exactly using the double type.
static const double power_of_ten[] = { static const double power_of_ten[] = {
@ -1333,4 +1221,4 @@ const uint64_t mantissa_128[] = {
} // namespace simdjson } // namespace simdjson
#endif // SIMDJSON_JSONCHARUTILS_H #endif // SIMDJSON_JSONCHARUTILS_TABLES_H

View File

@ -9,7 +9,7 @@ SIMDJSON_DISABLE_UNDESIRED_WARNINGS
// Anything in the top level directory MUST be included outside of the #if statements // Anything in the top level directory MUST be included outside of the #if statements
// below, or amalgamation will screw them up! // below, or amalgamation will screw them up!
#include "isadetection.h" #include "isadetection.h"
#include "jsoncharutils.h" #include "jsoncharutils_tables.h"
#include "simdprune_tables.h" #include "simdprune_tables.h"
#if SIMDJSON_IMPLEMENTATION_ARM64 #if SIMDJSON_IMPLEMENTATION_ARM64

View File

@ -1,4 +1,6 @@
#include "westmere/begin_implementation.h" #include "westmere/begin_implementation.h"
#include "westmere/dom_parser_implementation.h"
#include "generic/stage2/jsoncharutils.h"
// //
// Stage 1 // Stage 1