Add fallback parser for pre-SSE4.2 machines

This commit is contained in:
John Keiser 2020-03-15 12:50:09 -07:00
parent 8e2c06cb0e
commit af203aaf86
19 changed files with 434 additions and 38 deletions

View File

@ -61,9 +61,10 @@ endif # ifeq ($(MEMSANITIZE),1)
SRCHEADERS_GENERIC=src/generic/atomparsing.h src/generic/numberparsing.h src/generic/stage1_find_marks.h src/generic/stage2_build_tape.h src/generic/stringparsing.h src/generic/stage2_streaming_build_tape.h src/generic/utf8_fastvalidate_algorithm.h src/generic/utf8_lookup_algorithm.h src/generic/utf8_lookup2_algorithm.h src/generic/utf8_range_algorithm.h src/generic/utf8_zwegner_algorithm.h
SRCHEADERS_ARM64= src/arm64/bitmanipulation.h src/arm64/bitmask.h src/arm64/intrinsics.h src/arm64/numberparsing.h src/arm64/simd.h src/arm64/stage1_find_marks.h src/arm64/stage2_build_tape.h src/arm64/stringparsing.h
SRCHEADERS_HASWELL= src/haswell/bitmanipulation.h src/haswell/bitmask.h src/haswell/intrinsics.h src/haswell/numberparsing.h src/haswell/simd.h src/haswell/stage1_find_marks.h src/haswell/stage2_build_tape.h src/haswell/stringparsing.h
SRCHEADERS_FALLBACK= src/fallback/implementation.h src/fallback/stage1_find_marks.h src/fallback/stage2_build_tape.h
SRCHEADERS_WESTMERE=src/westmere/bitmanipulation.h src/westmere/bitmask.h src/westmere/intrinsics.h src/westmere/numberparsing.h src/westmere/simd.h src/westmere/stage1_find_marks.h src/westmere/stage2_build_tape.h src/westmere/stringparsing.h
SRCHEADERS_SRC=src/isadetection.h src/jsoncharutils.h src/simdprune_tables.h src/implementation.cpp src/stage1_find_marks.cpp src/stage2_build_tape.cpp src/document_parser_callbacks.h
SRCHEADERS=$(SRCHEADERS_SRC) $(SRCHEADERS_GENERIC) $(SRCHEADERS_ARM64) $(SRCHEADERS_HASWELL) $(SRCHEADERS_WESTMERE)
SRCHEADERS=$(SRCHEADERS_SRC) $(SRCHEADERS_GENERIC) $(SRCHEADERS_ARM64) $(SRCHEADERS_HASWELL) $(SRCHEADERS_WESTMERE) $(SRCHEADERS_FALLBACK)
INCLUDEHEADERS=include/simdjson.h include/simdjson/common_defs.h include/simdjson/internal/jsonformatutils.h include/simdjson/jsonioutil.h include/simdjson/jsonminifier.h include/simdjson/jsonparser.h include/simdjson/padded_string.h include/simdjson/inline/padded_string.h include/simdjson/document.h include/simdjson/inline/document.h include/simdjson/document_iterator.h include/simdjson/inline/document_iterator.h include/simdjson/document_stream.h include/simdjson/inline/document_stream.h include/simdjson/implementation.h include/simdjson/parsedjson.h include/simdjson/jsonstream.h include/simdjson/inline/jsonstream.h include/simdjson/portability.h include/simdjson/error.h include/simdjson/inline/error.h include/simdjson/simdjson.h include/simdjson/simdjson_version.h

View File

@ -128,7 +128,7 @@ struct option_struct {
break;
default:
// reaching here means an argument was given to getopt() which did not have a case label
exit_error("Unexpected argument - missing case for option "+
exit_usage("Unexpected argument - missing case for option "+
std::string(1,static_cast<char>(c))+
" (programming error)");
}

View File

@ -48,6 +48,7 @@
#endif
// under GCC and CLANG, we use these two macros
#define TARGET_FALLBACK TARGET_REGION("")
#define TARGET_HASWELL TARGET_REGION("avx2,bmi,pclmul,lzcnt")
#define TARGET_WESTMERE TARGET_REGION("sse4.2,pclmul")
#define TARGET_ARM64

View File

@ -42,6 +42,9 @@ set(SIMDJSON_SRC_HEADERS
arm64/stage1_find_marks.h
arm64/stage2_build_tape.h
arm64/stringparsing.h
fallback/implementation.h
fallback/stage1_find_marks.h
fallback/stage2_build_tape.h
generic/atomparsing.h
generic/numberparsing.h
generic/stage1_find_marks.h

View File

@ -15,16 +15,24 @@ namespace simdjson::arm64 {
using namespace simd;
// Holds backslashes and quotes locations.
struct parse_string_helper {
struct backslash_and_quote {
public:
static constexpr uint32_t BYTES_PROCESSED = 32;
really_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst);
really_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; }
really_inline bool has_backslash() { return bs_bits != 0; }
really_inline int quote_index() { return trailing_zeroes(quote_bits); }
really_inline int backslash_index() { return trailing_zeroes(bs_bits); }
uint32_t bs_bits;
uint32_t quote_bits;
static const uint32_t BYTES_PROCESSED = 32;
};
}; // struct backslash_and_quote
really_inline parse_string_helper find_bs_bits_and_quote_bits(const uint8_t *src, uint8_t *dst) {
really_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) {
// this can read up to 31 bytes beyond the buffer size, but we require
// SIMDJSON_PADDING of padding
static_assert(SIMDJSON_PADDING >= (parse_string_helper::BYTES_PROCESSED - 1));
static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1));
simd8<uint8_t> v0(src);
simd8<uint8_t> v1(src + sizeof(v0));
v0.store(dst);

View File

@ -0,0 +1,65 @@
#ifndef SIMDJSON_FALLBACK_BITMANIPULATION_H
#define SIMDJSON_FALLBACK_BITMANIPULATION_H
#include "simdjson.h"
#include <limits>
TARGET_FALLBACK
namespace simdjson::fallback {
#ifndef _MSC_VER
// We sometimes call trailing_zero on inputs that are zero,
// but the algorithms do not end up using the returned value.
// Sadly, sanitizers are not smart enough to figure it out.
__attribute__((no_sanitize("undefined"))) // this is deliberate
#endif // _MSC_VER
/* result might be undefined when input_num is zero */
really_inline int trailing_zeroes(uint64_t input_num) {
#ifdef _MSC_VER
unsigned long ret;
// Search the mask data from least significant bit (LSB)
// to the most significant bit (MSB) for a set bit (1).
_BitScanForward64(&ret, input_num);
return (int)ret;
#else
return __builtin_ctzll(input_num);
#endif // _MSC_VER
} // namespace simdjson::arm64
/* result might be undefined when input_num is zero */
really_inline uint64_t clear_lowest_bit(uint64_t input_num) {
return input_num & (input_num-1);
}
/* result might be undefined when input_num is zero */
really_inline int leading_zeroes(uint64_t input_num) {
#ifdef _MSC_VER
unsigned long leading_zero = 0;
// Search the mask data from most significant bit (MSB)
// to least significant bit (LSB) for a set bit (1).
if (_BitScanReverse64(&leading_zero, input_num))
return (int)(63 - leading_zero);
else
return 64;
#else
return __builtin_clzll(input_num);
#endif// _MSC_VER
}
really_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
*result = value1 + value2;
return *result < value1;
}
really_inline bool mul_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
*result = value1 * value2;
// TODO there must be a faster way
return value2 > 0 && value1 > std::numeric_limits<uint64_t>::max() / value2;
}
} // namespace simdjson::fallback
UNTARGET_REGION
#endif // SIMDJSON_FALLBACK_BITMANIPULATION_H

View File

@ -0,0 +1,26 @@
#ifndef SIMDJSON_FALLBACK_IMPLEMENTATION_H
#define SIMDJSON_FALLBACK_IMPLEMENTATION_H
#include "simdjson.h"
#include "isadetection.h"
TARGET_FALLBACK
namespace simdjson::fallback {
class implementation final : public simdjson::implementation {
public:
really_inline implementation() : simdjson::implementation(
"fallback",
"Generic fallback implementation",
0
) {}
WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, document::parser &parser) const noexcept final;
WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, document::parser &parser, bool streaming) const noexcept final;
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, document::parser &parser) const noexcept final;
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, document::parser &parser, size_t &next_json) const noexcept final;
};
} // namespace simdjson::fallback
UNTARGET_REGION
#endif // SIMDJSON_FALLBACK_IMPLEMENTATION_H

View File

@ -0,0 +1,34 @@
#ifndef SIMDJSON_FALLBACK_NUMBERPARSING_H
#define SIMDJSON_FALLBACK_NUMBERPARSING_H
#include "simdjson.h"
#include "jsoncharutils.h"
#include "fallback/bitmanipulation.h"
#include <cmath>
#include <limits>
#ifdef JSON_TEST_NUMBERS // for unit testing
void found_invalid_number(const uint8_t *buf);
void found_integer(int64_t result, const uint8_t *buf);
void found_unsigned_integer(uint64_t result, const uint8_t *buf);
void found_float(double result, const uint8_t *buf);
#endif
TARGET_FALLBACK
namespace simdjson::fallback {
static inline uint32_t parse_eight_digits_unrolled(const char *chars) {
uint32_t result = 0;
for (int i=0;i<8;i++) {
result = result*10 + (chars[i] - '0');
}
return result;
}
#define SWAR_NUMBER_PARSING
#include "generic/numberparsing.h"
} // namespace simdjson::fallback
UNTARGET_REGION
#endif // SIMDJSON_FALLBACK_NUMBERPARSING_H

View File

@ -0,0 +1,160 @@
#ifndef SIMDJSON_FALLBACK_STAGE1_FIND_MARKS_H
#define SIMDJSON_FALLBACK_STAGE1_FIND_MARKS_H
#include "simdjson.h"
#include "fallback/implementation.h"
TARGET_FALLBACK
namespace simdjson::fallback::stage1 {
class structural_scanner {
public:
really_inline structural_scanner(const uint8_t *_buf, uint32_t _len, document::parser &_doc_parser, bool _streaming)
: buf{_buf}, next_structural_index{_doc_parser.structural_indexes.get()}, doc_parser{_doc_parser}, idx{0}, len{_len}, error{SUCCESS}, streaming{_streaming} {}
really_inline void add_structural() {
*next_structural_index = idx;
next_structural_index++;
}
really_inline bool is_continuation(uint8_t c) {
return (c & 0b11000000) == 0b10000000;
}
really_inline void validate_utf8_character() {
// Continuation
if (unlikely((buf[idx] & 0b01000000) == 0)) {
// extra continuation
error = UTF8_ERROR;
idx++;
return;
}
// 2-byte
if ((buf[idx] & 0b00100000) == 0) {
// missing continuation
if (unlikely(idx+1 > len || !is_continuation(buf[idx+1]))) { error = UTF8_ERROR; idx++; return; }
// overlong: 1100000_ 10______
if (buf[idx] <= 0b11000001) { error = UTF8_ERROR; }
idx += 2;
return;
}
// 3-byte
if ((buf[idx] & 0b00010000) == 0) {
// missing continuation
if (unlikely(idx+2 > len || !is_continuation(buf[idx+1]) || !is_continuation(buf[idx+2]))) { error = UTF8_ERROR; idx++; return; }
// overlong: 11100000 100_____ ________
if (buf[idx] == 0b11100000 && buf[idx+1] <= 0b10011111) { error = UTF8_ERROR; }
// surrogates: U+D800-U+DFFF 11101101 101_____
if (buf[idx] == 0b11101101 && buf[idx+1] >= 0b10100000) { error = UTF8_ERROR; }
idx += 3;
return;
}
// 4-byte
// missing continuation
if (unlikely(idx+3 > len || !is_continuation(buf[idx+1]) || !is_continuation(buf[idx+2]) || !is_continuation(buf[idx+3]))) { error = UTF8_ERROR; idx++; return; }
// overlong: 11110000 1000____ ________ ________
if (buf[idx] == 0b11110000 && buf[idx+1] <= 0b10001111) { error = UTF8_ERROR; }
// too large: > U+10FFFF:
// 11110100 (1001|101_)____
// 1111(1___|011_|0101) 10______
// also includes 5, 6, 7 and 8 byte characters:
// 11111___
if (buf[idx] == 0b11110100 && buf[idx+1] >= 0b10010000) { error = UTF8_ERROR; }
if (buf[idx] >= 0b11110101) { error = UTF8_ERROR; }
idx += 4;
}
really_inline void validate_string() {
idx++; // skip first quote
while (idx < len && buf[idx] != '"') {
if (buf[idx] == '\\') {
idx += 2;
} else if (unlikely(buf[idx] & 0b10000000)) {
validate_utf8_character();
} else {
if (buf[idx] < 0x20) { error = UNESCAPED_CHARS; }
idx++;
}
}
if (idx >= len && !streaming) { error = UNCLOSED_STRING; }
}
really_inline bool is_whitespace_or_operator(uint8_t c) {
switch (c) {
case '{': case '}': case '[': case ']': case ',': case ':':
case ' ': case '\r': case '\n': case '\t':
return true;
default:
return false;
}
}
//
// Parse the entire input in STEP_SIZE-byte chunks.
//
really_inline error_code scan() {
for (;idx<len;idx++) {
switch (buf[idx]) {
// String
case '"':
add_structural();
validate_string();
break;
// Operator
case '{': case '}': case '[': case ']': case ',': case ':':
add_structural();
break;
// Whitespace
case ' ': case '\r': case '\n': case '\t':
break;
// Primitive or invalid character (invalid characters will be checked in stage 2)
default:
// Anything else, add the structural and go until we find the next one
add_structural();
while (idx+1<len && !is_whitespace_or_operator(buf[idx+1])) {
idx++;
};
break;
}
}
if (unlikely(next_structural_index == doc_parser.structural_indexes.get())) {
return EMPTY;
}
*next_structural_index = len;
next_structural_index++;
doc_parser.n_structural_indexes = next_structural_index - doc_parser.structural_indexes.get();
return error;
}
private:
const uint8_t *buf;
uint32_t *next_structural_index;
document::parser &doc_parser;
uint32_t idx;
uint32_t len;
error_code error;
bool streaming;
}; // structural_scanner
} // simdjson::fallback::stage1
UNTARGET_REGION
TARGET_FALLBACK
namespace simdjson::fallback {
WARN_UNUSED error_code implementation::stage1(const uint8_t *buf, size_t len, document::parser &parser, bool streaming) const noexcept {
if (unlikely(len > parser.capacity())) {
return CAPACITY;
}
stage1::structural_scanner scanner(buf, len, parser, streaming);
return scanner.scan();
}
} // namespace simdjson::fallback
UNTARGET_REGION
#endif // SIMDJSON_FALLBACK_STAGE1_FIND_MARKS_H

View File

@ -0,0 +1,20 @@
#ifndef SIMDJSON_FALLBACK_STAGE2_BUILD_TAPE_H
#define SIMDJSON_FALLBACK_STAGE2_BUILD_TAPE_H
#include "simdjson.h"
#include "fallback/implementation.h"
#include "fallback/stringparsing.h"
#include "fallback/numberparsing.h"
TARGET_FALLBACK
namespace simdjson::fallback {
#include "generic/atomparsing.h"
#include "generic/stage2_build_tape.h"
#include "generic/stage2_streaming_build_tape.h"
} // namespace simdjson
UNTARGET_REGION
#endif // SIMDJSON_FALLBACK_STAGE2_BUILD_TAPE_H

View File

@ -0,0 +1,35 @@
#ifndef SIMDJSON_FALLBACK_STRINGPARSING_H
#define SIMDJSON_FALLBACK_STRINGPARSING_H
#include "simdjson.h"
#include "jsoncharutils.h"
TARGET_FALLBACK
namespace simdjson::fallback {
// Holds backslashes and quotes locations.
struct backslash_and_quote {
public:
static constexpr uint32_t BYTES_PROCESSED = 1;
really_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst);
really_inline bool has_quote_first() { return c == '"'; }
really_inline bool has_backslash() { return c == '\\'; }
really_inline int quote_index() { return c == '"' ? 0 : 1; }
really_inline int backslash_index() { return c == '\\' ? 0 : 1; }
uint8_t c;
}; // struct backslash_and_quote
really_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) {
// store to dest unconditionally - we can overwrite the bits we don't like later
dst[0] = src[0];
return { src[0] };
}
#include "generic/stringparsing.h"
} // namespace simdjson::fallback
UNTARGET_REGION
#endif // SIMDJSON_FALLBACK_STRINGPARSING_H

View File

@ -71,23 +71,19 @@ really_inline bool handle_unicode_codepoint(const uint8_t **src_ptr,
return offset > 0;
}
WARN_UNUSED really_inline uint8_t *parse_string(const uint8_t *src,
uint8_t *dst) {
WARN_UNUSED really_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst) {
src++;
while (1) {
parse_string_helper helper = find_bs_bits_and_quote_bits(src, dst);
if (((helper.bs_bits - 1) & helper.quote_bits) != 0) {
/* we encountered quotes first. Move dst to point to quotes and exit
*/
/* find out where the quote is... */
auto quote_dist = trailing_zeroes(helper.quote_bits);
return dst + quote_dist;
// Copy the next n bytes, and find the backslash and quote in them.
auto bs_quote = backslash_and_quote::copy_and_find(src, dst);
// If the next thing is the end quote, copy and return
if (bs_quote.has_quote_first()) {
// we encountered quotes first. Move dst to point to quotes and exit
return dst + bs_quote.quote_index();
}
if (((helper.quote_bits - 1) & helper.bs_bits) != 0) {
if (bs_quote.has_backslash()) {
/* find out where the backspace is */
auto bs_dist = trailing_zeroes(helper.bs_bits);
auto bs_dist = bs_quote.backslash_index();
uint8_t escape_char = src[bs_dist + 1];
/* we encountered backslash first. Handle backslash */
if (escape_char == 'u') {
@ -114,8 +110,8 @@ WARN_UNUSED really_inline uint8_t *parse_string(const uint8_t *src,
} else {
/* they are the same. Since they can't co-occur, it means we
* encountered neither. */
src += parse_string_helper::BYTES_PROCESSED;
dst += parse_string_helper::BYTES_PROCESSED;
src += backslash_and_quote::BYTES_PROCESSED;
dst += backslash_and_quote::BYTES_PROCESSED;
}
}
/* can't be reached */

View File

@ -6,7 +6,7 @@
// are straight up concatenated into the final value. The first byte of a multibyte character is a
// "leading byte" and starts with N 1's, where N is the total number of bytes (110_____ = 2 byte
// lead). The remaining bytes of a multibyte character all start with 10. 1-byte characters just
// start with 0, because that's what ASCII looks like. Here's what each size
// start with 0, because that's what ASCII looks like. Here's what each size looks like:
//
// - ASCII (7 bits): 0_______
// - 2 byte character (11 bits): 110_____ 10______

View File

@ -16,16 +16,24 @@ namespace simdjson::haswell {
using namespace simd;
// Holds backslashes and quotes locations.
struct parse_string_helper {
struct backslash_and_quote {
public:
static constexpr uint32_t BYTES_PROCESSED = 32;
really_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst);
really_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; }
really_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; }
really_inline int quote_index() { return trailing_zeroes(quote_bits); }
really_inline int backslash_index() { return trailing_zeroes(bs_bits); }
uint32_t bs_bits;
uint32_t quote_bits;
static const uint32_t BYTES_PROCESSED = 32;
};
}; // struct backslash_and_quote
really_inline parse_string_helper find_bs_bits_and_quote_bits(const uint8_t *src, uint8_t *dst) {
really_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) {
// this can read up to 15 bytes beyond the buffer size, but we require
// SIMDJSON_PADDING of padding
static_assert(SIMDJSON_PADDING >= (parse_string_helper::BYTES_PROCESSED - 1));
static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1));
simd8<uint8_t> v(src);
// store to dest unconditionally - we can overwrite the bits we don't like later
v.store(dst);

View File

@ -8,11 +8,13 @@
#include "haswell/implementation.h"
#include "westmere/implementation.h"
#include "fallback/implementation.h"
namespace simdjson::internal {
const fallback::implementation fallback_singleton{};
const haswell::implementation haswell_singleton{};
const westmere::implementation westmere_singleton{};
constexpr const std::initializer_list<const implementation *> available_implementation_pointers { &haswell_singleton, &westmere_singleton };
constexpr const std::initializer_list<const implementation *> available_implementation_pointers { &haswell_singleton, &westmere_singleton, &fallback_singleton };
}
#endif
@ -20,10 +22,12 @@ constexpr const std::initializer_list<const implementation *> available_implemen
#ifdef IS_ARM64
#include "arm64/implementation.h"
#include "fallback/implementation.h"
namespace simdjson::internal {
const fallback::implementation fallback_singleton{};
const arm64::implementation arm64_singleton{};
constexpr const std::initializer_list<const implementation *> available_implementation_pointers { &arm64_singleton };
constexpr const std::initializer_list<const implementation *> available_implementation_pointers { &arm64_singleton, &fallback_singleton };
}
#endif

View File

@ -1,3 +1,4 @@
#include "arm64/stage1_find_marks.h"
#include "fallback/stage1_find_marks.h"
#include "haswell/stage1_find_marks.h"
#include "westmere/stage1_find_marks.h"

View File

@ -13,5 +13,6 @@ void found_bad_string(const uint8_t *buf);
#endif
#include "arm64/stage2_build_tape.h"
#include "fallback/stage2_build_tape.h"
#include "haswell/stage2_build_tape.h"
#include "westmere/stage2_build_tape.h"

View File

@ -16,16 +16,24 @@ namespace simdjson::westmere {
using namespace simd;
// Holds backslashes and quotes locations.
struct parse_string_helper {
struct backslash_and_quote {
public:
static constexpr uint32_t BYTES_PROCESSED = 32;
really_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst);
really_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; }
really_inline bool has_backslash() { return bs_bits != 0; }
really_inline int quote_index() { return trailing_zeroes(quote_bits); }
really_inline int backslash_index() { return trailing_zeroes(bs_bits); }
uint32_t bs_bits;
uint32_t quote_bits;
static const uint32_t BYTES_PROCESSED = 32;
};
}; // struct backslash_and_quote
really_inline parse_string_helper find_bs_bits_and_quote_bits(const uint8_t *src, uint8_t *dst) {
really_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) {
// this can read up to 31 bytes beyond the buffer size, but we require
// SIMDJSON_PADDING of padding
static_assert(SIMDJSON_PADDING >= (parse_string_helper::BYTES_PROCESSED - 1));
static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1));
simd8<uint8_t> v0(src);
simd8<uint8_t> v1(src + 16);
v0.store(dst);

View File

@ -10,6 +10,7 @@
#include <string_view>
#include <sstream>
#include <utility>
#include <unistd.h>
#include "simdjson.h"
@ -32,6 +33,7 @@ inline uint64_t f64_ulp_dist(double a, double b) {
bool number_test_small_integers() {
std::cout << __func__ << std::endl;
char buf[1024];
simdjson::document::parser parser;
for (int m = 10; m < 20; m++) {
@ -66,6 +68,7 @@ bool number_test_small_integers() {
bool number_test_powers_of_two() {
std::cout << __func__ << std::endl;
char buf[1024];
simdjson::document::parser parser;
int maxulp = 0;
@ -202,6 +205,7 @@ static const double testing_power_of_ten[] = {
bool number_test_powers_of_ten() {
std::cout << __func__ << std::endl;
char buf[1024];
simdjson::document::parser parser;
for (int i = -1000000; i <= 308; ++i) {// large negative values should be zero.
@ -267,6 +271,7 @@ bool number_test_powers_of_ten() {
// adversarial example that once triggred overruns, see https://github.com/lemire/simdjson/issues/345
bool bad_example() {
std::cout << __func__ << std::endl;
std::string badjson = "[7,7,7,7,6,7,7,7,6,7,7,6,[7,7,7,7,6,7,7,7,6,7,7,6,7,7,7,7,7,7,6";
simdjson::document::parser parser = simdjson::build_parsed_json(badjson);
if(parser.is_valid()) {
@ -277,6 +282,7 @@ bool bad_example() {
}
// returns true if successful
bool stable_test() {
std::cout << __func__ << std::endl;
std::string json = "{"
"\"Image\":{"
"\"Width\":800,"
@ -1438,10 +1444,10 @@ bool error_messages_in_correct_order() {
bool lots_of_brackets() {
std::string input;
for(size_t i = 0; i < 1000; i++) {
for(size_t i = 0; i < 16; i++) {
input += "[";
}
for(size_t i = 0; i < 1000; i++) {
for(size_t i = 0; i < 16; i++) {
input += "]";
}
auto [doc, error] = simdjson::document::parse(input);
@ -1451,7 +1457,26 @@ bool lots_of_brackets() {
return true;
}
int main() {
int main(int argc, char *argv[]) {
std::cout << std::unitbuf;
char c;
while ((c = getopt(argc, argv, "a:")) != -1) {
switch (c) {
case 'a': {
const simdjson::implementation *impl = simdjson::available_implementations[optarg];
if (!impl) {
fprintf(stderr, "Unsupported architecture value -a %s\n", optarg);
return EXIT_FAILURE;
}
simdjson::active_implementation = impl;
break;
}
default:
fprintf(stderr, "Unexpected argument %c\n", c);
return EXIT_FAILURE;
}
}
// this is put here deliberately to check that the documentation is correct (README),
// should this fail to compile, you should update the documentation:
if (simdjson::active_implementation->name() == "unsupported") {