Trying to fix issue 465 (#466)

* Trying to fix issue 465

* Actually testing

* Refreshing amal.

* Removing spurious ;
This commit is contained in:
Daniel Lemire 2020-01-27 11:25:23 -05:00 committed by GitHub
parent 6978a0b8d4
commit e695a19d11
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 222 additions and 251 deletions

View File

@ -29,6 +29,9 @@ commands:
- run: make checkperf
- run: make clean
- run: ARCHFLAGS=-march=haswell make test # this breaks runtime dispatch, but see https://github.com/lemire/simdjson/issues/444... this is a code robustness test
- run: make clean
- run: EXTRAFLAGS=-DSIMDJSON_NO_COMPUTED_GOTO=true make test # this should run tests with computed gotos disabled
cmake_test:
steps:
- run: apt-get update -qq

View File

@ -90,6 +90,15 @@ On x64 hardware, you should typically build your code by specifying the oldest/l
We also support 64-bit ARM. We assume NEON support. There is no runtime dispatch on ARM.
## Computed GOTOs
For best performance, we use a technique called "computed goto", it is also sometimes described as "Labels as Values".
Though it is not part of the C++ standard, it is supported by many major compilers and it brings measurable performance benefits that
are difficult to achieve otherwise.
The computed gotos are automatically disabled under Visual Studio.
If you wish to forcefully disable computed gotos, you can do so by compiling the code with the macro `SIMDJSON_NO_COMPUTED_GOTO`
defined. It is not recommended to disable computed gotos if your compiler supports it. In fact, you should almost never need to
be concerned with computed gotos.
## Thread safety

View File

@ -25,7 +25,7 @@
#define DEBUG_BLOCK(name, block)
#endif
#ifndef _MSC_VER
#if !defined(_MSC_VER) && !defined(SIMDJSON_NO_COMPUTED_GOTO)
// Implemented using Labels as Values which works in GCC and CLANG (and maybe
// also in Intel's compiler), but won't work in MSVC.
#define SIMDJSON_USE_COMPUTED_GOTO

View File

@ -34,6 +34,6 @@ int find_structural_bits(const char *buf, size_t len, simdjson::ParsedJson &pj)
return find_structural_bits<T>((const uint8_t *)buf, len, pj);
}
}; // namespace simdjson
} // namespace simdjson
#endif

View File

@ -1,4 +1,4 @@
/* auto-generated on Wed Jan 15 15:50:50 EST 2020. Do not edit! */
/* auto-generated on Mon Jan 27 10:35:34 EST 2020. Do not edit! */
#include <iostream>
#include "simdjson.h"

View File

@ -1,4 +1,4 @@
/* auto-generated on Wed Jan 15 15:50:50 EST 2020. Do not edit! */
/* auto-generated on Mon Jan 27 10:35:34 EST 2020. Do not edit! */
#include "simdjson.h"
/* used for http://dmalloc.com/ Dmalloc - Debug Malloc Library */
@ -1129,7 +1129,7 @@ int json_parse_dispatch(const uint8_t *buf, size_t len, ParsedJson &pj,
break;
#endif
default:
std::cerr << "The processor is not supported by simdjson." << std::endl;
// The processor is not supported by simdjson.
return simdjson::UNEXPECTED_ERROR;
}
@ -1145,9 +1145,7 @@ ParsedJson build_parsed_json(const uint8_t *buf, size_t len,
bool ok = pj.allocate_capacity(len);
if (ok) {
json_parse(buf, len, pj, realloc);
} else {
std::cerr << "failure during memory allocation " << std::endl;
}
}
return pj;
}
} // namespace simdjson
@ -1384,7 +1382,6 @@ void find_the_best_supported_implementation() {
return;
}
#endif
std::cerr << "The processor is not supported by simdjson." << std::endl;
// we throw an exception since this should not be recoverable
throw new std::runtime_error("unsupported architecture");
}
@ -3661,7 +3658,7 @@ namespace simdjson::arm64::simd {
}
// Store to array
really_inline void store(uint8_t dst[16]) { return vst1q_u8(dst, *this); }
really_inline void store(uint8_t dst[16]) const { return vst1q_u8(dst, *this); }
// Saturated math
really_inline simd8<uint8_t> saturating_add(const simd8<uint8_t> other) const { return vqaddq_u8(*this, other); }
@ -3761,7 +3758,7 @@ namespace simdjson::arm64::simd {
}
// Store to array
really_inline void store(int8_t dst[16]) { return vst1q_s8(dst, *this); }
really_inline void store(int8_t dst[16]) const { return vst1q_s8(dst, *this); }
// Explicit conversion to/from unsigned
really_inline explicit simd8(const uint8x16_t other): simd8(vreinterpretq_s8_u8(other)) {}
@ -3819,7 +3816,7 @@ namespace simdjson::arm64::simd {
really_inline simd8x64(const simd8<T> chunk0, const simd8<T> chunk1, const simd8<T> chunk2, const simd8<T> chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {}
really_inline simd8x64(const T ptr[64]) : chunks{simd8<T>::load(ptr), simd8<T>::load(ptr+16), simd8<T>::load(ptr+32), simd8<T>::load(ptr+48)} {}
really_inline void store(T ptr[64]) {
really_inline void store(T ptr[64]) const {
this->chunks[0].store(ptr+sizeof(simd8<T>)*0);
this->chunks[1].store(ptr+sizeof(simd8<T>)*1);
this->chunks[2].store(ptr+sizeof(simd8<T>)*2);
@ -5125,44 +5122,68 @@ public:
really_inline void scan(const uint8_t *buf, const size_t len, utf8_checker &utf8_checker);
};
// return a bitvector indicating where we have characters that end an odd-length
// sequence of backslashes (and thus change the behavior of the next character
// to follow). A even-length sequence of backslashes, and, for that matter, the
// largest even-length prefix of our odd-length sequence of backslashes, simply
// modify the behavior of the backslashes themselves.
// We also update the prev_iter_ends_odd_backslash reference parameter to
// indicate whether we end an iteration on an odd-length sequence of
// backslashes, which modifies our subsequent search for odd-length
// sequences of backslashes in an obvious way.
really_inline uint64_t follows_odd_sequence_of(const uint64_t match, uint64_t &overflow) {
// Routines to print masks and text for debugging bitmask operations
UNUSED static char * format_input_text(const simd8x64<uint8_t> in) {
static char *buf = (char*)malloc(sizeof(simd8x64<uint8_t>) + 1);
in.store((uint8_t*)buf);
for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
if (buf[i] < ' ') { buf[i] = '_'; }
}
buf[sizeof(simd8x64<uint8_t>)] = '\0';
return buf;
}
UNUSED static char * format_mask(uint64_t mask) {
static char *buf = (char*)malloc(64 + 1);
for (size_t i=0; i<64; i++) {
buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' ';
}
buf[64] = '\0';
return buf;
}
//
// Finds escaped characters (characters following \).
//
// Handles runs of backslashes like \\\" and \\\\" correctly (yielding 0101 and 01010, respectively).
//
// Does this by:
// - Shift the escape mask to get potentially escaped characters (characters after backslashes).
// - Mask escaped sequences that start on *even* bits with 1010101010 (odd bits are escaped, even bits are not)
// - Mask escaped sequences that start on *odd* bits with 0101010101 (even bits are escaped, odd bits are not)
//
// To distinguish between escaped sequences starting on even/odd bits, it finds the start of all
// escape sequences, filters out the ones that start on even bits, and adds that to the mask of
// escape sequences. This causes the addition to clear out the sequences starting on odd bits (since
// the start bit causes a carry), and leaves even-bit sequences alone.
//
// Example:
//
// text | \\\ | \\\"\\\" \\\" \\"\\" |
// escape | xxx | xx xxx xxx xx xx | Removed overflow backslash; will | it into follows_escape
// odd_starts | x | x x x | escape & ~even_bits & ~follows_escape
// even_seq | c| cxxx c xx c | c = carry bit -- will be masked out later
// invert_mask | | cxxx c xx c| even_seq << 1
// follows_escape | xx | x xx xxx xxx xx xx | Includes overflow bit
// escaped | x | x x x x x x x x |
// desired | x | x x x x x x x x |
// text | \\\ | \\\"\\\" \\\" \\"\\" |
//
really_inline uint64_t find_escaped(uint64_t escape, uint64_t &escaped_overflow) {
// If there was overflow, pretend the first character isn't a backslash
escape &= ~escaped_overflow;
uint64_t follows_escape = escape << 1 | escaped_overflow;
// Get sequences starting on even bits by clearing out the odd series using +
const uint64_t even_bits = 0x5555555555555555ULL;
const uint64_t odd_bits = ~even_bits;
uint64_t start_edges = match & ~(match << 1);
/* flip lowest if we have an odd-length run at the end of the prior
* iteration */
uint64_t even_start_mask = even_bits ^ overflow;
uint64_t even_starts = start_edges & even_start_mask;
uint64_t odd_starts = start_edges & ~even_start_mask;
uint64_t even_carries = match + even_starts;
uint64_t odd_sequence_starts = escape & ~even_bits & ~follows_escape;
uint64_t sequences_starting_on_even_bits;
escaped_overflow = add_overflow(odd_sequence_starts, escape, &sequences_starting_on_even_bits);
uint64_t invert_mask = sequences_starting_on_even_bits << 1; // The mask we want to return is the *escaped* bits, not escapes.
uint64_t odd_carries;
/* must record the carry-out of our odd-carries out of bit 63; this
* indicates whether the sense of any edge going to the next iteration
* should be flipped */
bool new_overflow = add_overflow(match, odd_starts, &odd_carries);
odd_carries |= overflow; /* push in bit zero as a
* potential end if we had an
* odd-numbered run at the
* end of the previous
* iteration */
overflow = new_overflow ? 0x1ULL : 0x0ULL;
uint64_t even_carry_ends = even_carries & ~match;
uint64_t odd_carry_ends = odd_carries & ~match;
uint64_t even_start_odd_end = even_carry_ends & odd_bits;
uint64_t odd_start_even_end = odd_carry_ends & even_bits;
uint64_t odd_ends = even_start_odd_end | odd_start_even_end;
return odd_ends;
// Mask every other backslashed character as an escaped character
// Flip the mask for sequences that start on even bits, to correct them
return (even_bits ^ invert_mask) & follows_escape;
}
//
@ -5211,7 +5232,7 @@ really_inline ErrorValues json_structural_scanner::detect_errors_on_eof(bool str
//
really_inline uint64_t json_structural_scanner::find_strings(const simd::simd8x64<uint8_t> in) {
const uint64_t backslash = in.eq('\\');
const uint64_t escaped = follows_odd_sequence_of(backslash, prev_escaped);
const uint64_t escaped = find_escaped(backslash, prev_escaped);
const uint64_t quote = in.eq('"') & ~escaped;
// prefix_xor flips on bits inside the string (and flips off the end quote).
const uint64_t in_string = prefix_xor(quote) ^ prev_in_string;
@ -5371,9 +5392,6 @@ really_inline void json_structural_scanner::scan(const uint8_t *buf, const size_
template<size_t STEP_SIZE>
int find_structural_bits(const uint8_t *buf, size_t len, simdjson::ParsedJson &pj, bool streaming) {
if (unlikely(len > pj.byte_capacity)) {
std::cerr << "Your ParsedJson object only supports documents up to "
<< pj.byte_capacity << " bytes but you are trying to process "
<< len << " bytes" << std::endl;
return simdjson::CAPACITY;
}
utf8_checker utf8_checker{};
@ -6012,44 +6030,68 @@ public:
really_inline void scan(const uint8_t *buf, const size_t len, utf8_checker &utf8_checker);
};
// return a bitvector indicating where we have characters that end an odd-length
// sequence of backslashes (and thus change the behavior of the next character
// to follow). A even-length sequence of backslashes, and, for that matter, the
// largest even-length prefix of our odd-length sequence of backslashes, simply
// modify the behavior of the backslashes themselves.
// We also update the prev_iter_ends_odd_backslash reference parameter to
// indicate whether we end an iteration on an odd-length sequence of
// backslashes, which modifies our subsequent search for odd-length
// sequences of backslashes in an obvious way.
really_inline uint64_t follows_odd_sequence_of(const uint64_t match, uint64_t &overflow) {
// Routines to print masks and text for debugging bitmask operations
UNUSED static char * format_input_text(const simd8x64<uint8_t> in) {
static char *buf = (char*)malloc(sizeof(simd8x64<uint8_t>) + 1);
in.store((uint8_t*)buf);
for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
if (buf[i] < ' ') { buf[i] = '_'; }
}
buf[sizeof(simd8x64<uint8_t>)] = '\0';
return buf;
}
UNUSED static char * format_mask(uint64_t mask) {
static char *buf = (char*)malloc(64 + 1);
for (size_t i=0; i<64; i++) {
buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' ';
}
buf[64] = '\0';
return buf;
}
//
// Finds escaped characters (characters following \).
//
// Handles runs of backslashes like \\\" and \\\\" correctly (yielding 0101 and 01010, respectively).
//
// Does this by:
// - Shift the escape mask to get potentially escaped characters (characters after backslashes).
// - Mask escaped sequences that start on *even* bits with 1010101010 (odd bits are escaped, even bits are not)
// - Mask escaped sequences that start on *odd* bits with 0101010101 (even bits are escaped, odd bits are not)
//
// To distinguish between escaped sequences starting on even/odd bits, it finds the start of all
// escape sequences, filters out the ones that start on even bits, and adds that to the mask of
// escape sequences. This causes the addition to clear out the sequences starting on odd bits (since
// the start bit causes a carry), and leaves even-bit sequences alone.
//
// Example:
//
// text | \\\ | \\\"\\\" \\\" \\"\\" |
// escape | xxx | xx xxx xxx xx xx | Removed overflow backslash; will | it into follows_escape
// odd_starts | x | x x x | escape & ~even_bits & ~follows_escape
// even_seq | c| cxxx c xx c | c = carry bit -- will be masked out later
// invert_mask | | cxxx c xx c| even_seq << 1
// follows_escape | xx | x xx xxx xxx xx xx | Includes overflow bit
// escaped | x | x x x x x x x x |
// desired | x | x x x x x x x x |
// text | \\\ | \\\"\\\" \\\" \\"\\" |
//
really_inline uint64_t find_escaped(uint64_t escape, uint64_t &escaped_overflow) {
// If there was overflow, pretend the first character isn't a backslash
escape &= ~escaped_overflow;
uint64_t follows_escape = escape << 1 | escaped_overflow;
// Get sequences starting on even bits by clearing out the odd series using +
const uint64_t even_bits = 0x5555555555555555ULL;
const uint64_t odd_bits = ~even_bits;
uint64_t start_edges = match & ~(match << 1);
/* flip lowest if we have an odd-length run at the end of the prior
* iteration */
uint64_t even_start_mask = even_bits ^ overflow;
uint64_t even_starts = start_edges & even_start_mask;
uint64_t odd_starts = start_edges & ~even_start_mask;
uint64_t even_carries = match + even_starts;
uint64_t odd_sequence_starts = escape & ~even_bits & ~follows_escape;
uint64_t sequences_starting_on_even_bits;
escaped_overflow = add_overflow(odd_sequence_starts, escape, &sequences_starting_on_even_bits);
uint64_t invert_mask = sequences_starting_on_even_bits << 1; // The mask we want to return is the *escaped* bits, not escapes.
uint64_t odd_carries;
/* must record the carry-out of our odd-carries out of bit 63; this
* indicates whether the sense of any edge going to the next iteration
* should be flipped */
bool new_overflow = add_overflow(match, odd_starts, &odd_carries);
odd_carries |= overflow; /* push in bit zero as a
* potential end if we had an
* odd-numbered run at the
* end of the previous
* iteration */
overflow = new_overflow ? 0x1ULL : 0x0ULL;
uint64_t even_carry_ends = even_carries & ~match;
uint64_t odd_carry_ends = odd_carries & ~match;
uint64_t even_start_odd_end = even_carry_ends & odd_bits;
uint64_t odd_start_even_end = odd_carry_ends & even_bits;
uint64_t odd_ends = even_start_odd_end | odd_start_even_end;
return odd_ends;
// Mask every other backslashed character as an escaped character
// Flip the mask for sequences that start on even bits, to correct them
return (even_bits ^ invert_mask) & follows_escape;
}
//
@ -6098,7 +6140,7 @@ really_inline ErrorValues json_structural_scanner::detect_errors_on_eof(bool str
//
really_inline uint64_t json_structural_scanner::find_strings(const simd::simd8x64<uint8_t> in) {
const uint64_t backslash = in.eq('\\');
const uint64_t escaped = follows_odd_sequence_of(backslash, prev_escaped);
const uint64_t escaped = find_escaped(backslash, prev_escaped);
const uint64_t quote = in.eq('"') & ~escaped;
// prefix_xor flips on bits inside the string (and flips off the end quote).
const uint64_t in_string = prefix_xor(quote) ^ prev_in_string;
@ -6258,9 +6300,6 @@ really_inline void json_structural_scanner::scan(const uint8_t *buf, const size_
template<size_t STEP_SIZE>
int find_structural_bits(const uint8_t *buf, size_t len, simdjson::ParsedJson &pj, bool streaming) {
if (unlikely(len > pj.byte_capacity)) {
std::cerr << "Your ParsedJson object only supports documents up to "
<< pj.byte_capacity << " bytes but you are trying to process "
<< len << " bytes" << std::endl;
return simdjson::CAPACITY;
}
utf8_checker utf8_checker{};
@ -6904,44 +6943,68 @@ public:
really_inline void scan(const uint8_t *buf, const size_t len, utf8_checker &utf8_checker);
};
// return a bitvector indicating where we have characters that end an odd-length
// sequence of backslashes (and thus change the behavior of the next character
// to follow). A even-length sequence of backslashes, and, for that matter, the
// largest even-length prefix of our odd-length sequence of backslashes, simply
// modify the behavior of the backslashes themselves.
// We also update the prev_iter_ends_odd_backslash reference parameter to
// indicate whether we end an iteration on an odd-length sequence of
// backslashes, which modifies our subsequent search for odd-length
// sequences of backslashes in an obvious way.
really_inline uint64_t follows_odd_sequence_of(const uint64_t match, uint64_t &overflow) {
// Routines to print masks and text for debugging bitmask operations
UNUSED static char * format_input_text(const simd8x64<uint8_t> in) {
static char *buf = (char*)malloc(sizeof(simd8x64<uint8_t>) + 1);
in.store((uint8_t*)buf);
for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {
if (buf[i] < ' ') { buf[i] = '_'; }
}
buf[sizeof(simd8x64<uint8_t>)] = '\0';
return buf;
}
UNUSED static char * format_mask(uint64_t mask) {
static char *buf = (char*)malloc(64 + 1);
for (size_t i=0; i<64; i++) {
buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' ';
}
buf[64] = '\0';
return buf;
}
//
// Finds escaped characters (characters following \).
//
// Handles runs of backslashes like \\\" and \\\\" correctly (yielding 0101 and 01010, respectively).
//
// Does this by:
// - Shift the escape mask to get potentially escaped characters (characters after backslashes).
// - Mask escaped sequences that start on *even* bits with 1010101010 (odd bits are escaped, even bits are not)
// - Mask escaped sequences that start on *odd* bits with 0101010101 (even bits are escaped, odd bits are not)
//
// To distinguish between escaped sequences starting on even/odd bits, it finds the start of all
// escape sequences, filters out the ones that start on even bits, and adds that to the mask of
// escape sequences. This causes the addition to clear out the sequences starting on odd bits (since
// the start bit causes a carry), and leaves even-bit sequences alone.
//
// Example:
//
// text | \\\ | \\\"\\\" \\\" \\"\\" |
// escape | xxx | xx xxx xxx xx xx | Removed overflow backslash; will | it into follows_escape
// odd_starts | x | x x x | escape & ~even_bits & ~follows_escape
// even_seq | c| cxxx c xx c | c = carry bit -- will be masked out later
// invert_mask | | cxxx c xx c| even_seq << 1
// follows_escape | xx | x xx xxx xxx xx xx | Includes overflow bit
// escaped | x | x x x x x x x x |
// desired | x | x x x x x x x x |
// text | \\\ | \\\"\\\" \\\" \\"\\" |
//
really_inline uint64_t find_escaped(uint64_t escape, uint64_t &escaped_overflow) {
// If there was overflow, pretend the first character isn't a backslash
escape &= ~escaped_overflow;
uint64_t follows_escape = escape << 1 | escaped_overflow;
// Get sequences starting on even bits by clearing out the odd series using +
const uint64_t even_bits = 0x5555555555555555ULL;
const uint64_t odd_bits = ~even_bits;
uint64_t start_edges = match & ~(match << 1);
/* flip lowest if we have an odd-length run at the end of the prior
* iteration */
uint64_t even_start_mask = even_bits ^ overflow;
uint64_t even_starts = start_edges & even_start_mask;
uint64_t odd_starts = start_edges & ~even_start_mask;
uint64_t even_carries = match + even_starts;
uint64_t odd_sequence_starts = escape & ~even_bits & ~follows_escape;
uint64_t sequences_starting_on_even_bits;
escaped_overflow = add_overflow(odd_sequence_starts, escape, &sequences_starting_on_even_bits);
uint64_t invert_mask = sequences_starting_on_even_bits << 1; // The mask we want to return is the *escaped* bits, not escapes.
uint64_t odd_carries;
/* must record the carry-out of our odd-carries out of bit 63; this
* indicates whether the sense of any edge going to the next iteration
* should be flipped */
bool new_overflow = add_overflow(match, odd_starts, &odd_carries);
odd_carries |= overflow; /* push in bit zero as a
* potential end if we had an
* odd-numbered run at the
* end of the previous
* iteration */
overflow = new_overflow ? 0x1ULL : 0x0ULL;
uint64_t even_carry_ends = even_carries & ~match;
uint64_t odd_carry_ends = odd_carries & ~match;
uint64_t even_start_odd_end = even_carry_ends & odd_bits;
uint64_t odd_start_even_end = odd_carry_ends & even_bits;
uint64_t odd_ends = even_start_odd_end | odd_start_even_end;
return odd_ends;
// Mask every other backslashed character as an escaped character
// Flip the mask for sequences that start on even bits, to correct them
return (even_bits ^ invert_mask) & follows_escape;
}
//
@ -6990,7 +7053,7 @@ really_inline ErrorValues json_structural_scanner::detect_errors_on_eof(bool str
//
really_inline uint64_t json_structural_scanner::find_strings(const simd::simd8x64<uint8_t> in) {
const uint64_t backslash = in.eq('\\');
const uint64_t escaped = follows_odd_sequence_of(backslash, prev_escaped);
const uint64_t escaped = find_escaped(backslash, prev_escaped);
const uint64_t quote = in.eq('"') & ~escaped;
// prefix_xor flips on bits inside the string (and flips off the end quote).
const uint64_t in_string = prefix_xor(quote) ^ prev_in_string;
@ -7150,9 +7213,6 @@ really_inline void json_structural_scanner::scan(const uint8_t *buf, const size_
template<size_t STEP_SIZE>
int find_structural_bits(const uint8_t *buf, size_t len, simdjson::ParsedJson &pj, bool streaming) {
if (unlikely(len > pj.byte_capacity)) {
std::cerr << "Your ParsedJson object only supports documents up to "
<< pj.byte_capacity << " bytes but you are trying to process "
<< len << " bytes" << std::endl;
return simdjson::CAPACITY;
}
utf8_checker utf8_checker{};
@ -7769,7 +7829,6 @@ UNTARGET_REGION
/* begin file src/stage2_build_tape.cpp */
#include <cassert>
#include <cstring>
#include <iostream>
using namespace simdjson;
@ -9628,7 +9687,7 @@ bool ParsedJson::allocate_capacity(size_t len, size_t max_depth) {
if (!string_buf || !tape ||
!containing_scope_offset || !ret_address ||
!structural_indexes) {
std::cerr << "Could not allocate memory" << std::endl;
// Could not allocate memory
return false;
}
/*
@ -9685,13 +9744,11 @@ bool ParsedJson::print_json(std::ostream &os) const {
if (type == 'r') {
how_many = tape_val & JSON_VALUE_MASK;
} else {
fprintf(stderr, "Error: no starting root node?");
// Error: no starting root node?
return false;
}
if (how_many > tape_capacity) {
fprintf(
stderr,
"We may be exceeding the tape capacity. Is this a valid document?\n");
// We may be exceeding the tape capacity. Is this a valid document?
return false;
}
tape_idx++;
@ -9778,10 +9835,10 @@ bool ParsedJson::print_json(std::ostream &os) const {
os << ']';
break;
case 'r': // we start and end with the root node
fprintf(stderr, "should we be hitting the root node?\n");
// should we be hitting the root node?
return false;
default:
fprintf(stderr, "bug %c\n", type);
// bug?
return false;
}
}
@ -9803,7 +9860,7 @@ bool ParsedJson::dump_raw_tape(std::ostream &os) const {
if (type == 'r') {
how_many = tape_val & JSON_VALUE_MASK;
} else {
fprintf(stderr, "Error: no starting root node?");
// Error: no starting root node?
return false;
}
os << "\t// pointing to " << how_many << " (right after last node)\n";
@ -9871,7 +9928,7 @@ bool ParsedJson::dump_raw_tape(std::ostream &os) const {
<< " (start of the scope) \n";
break;
case 'r': // we start and end with the root node
fprintf(stderr, "should we be hitting the root node?\n");
// should we be hitting the root node?
return false;
default:
return false;

View File

@ -1,4 +1,4 @@
/* auto-generated on Wed Jan 15 15:50:50 EST 2020. Do not edit! */
/* auto-generated on Mon Jan 27 10:35:34 EST 2020. Do not edit! */
/* begin file include/simdjson/simdjson_version.h */
// /include/simdjson/simdjson_version.h automatically generated by release.py,
// do not change by hand
@ -20,7 +20,6 @@ enum {
#include <cstddef>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#ifdef _MSC_VER
#include <iso646.h>
@ -66,7 +65,7 @@ enum {
#endif
// under GCC and CLANG, we use these two macros
#define TARGET_HASWELL TARGET_REGION("avx2,bmi,pclmul")
#define TARGET_HASWELL TARGET_REGION("avx2,bmi,pclmul,lzcnt")
#define TARGET_WESTMERE TARGET_REGION("sse4.2,pclmul")
#define TARGET_ARM64
@ -294,53 +293,11 @@ static inline uint32_t detect_supported_architectures() {
#ifndef SIMDJSON_JSONFORMATUTILS_H
#define SIMDJSON_JSONFORMATUTILS_H
#include <cstdio>
#include <iomanip>
#include <iostream>
namespace simdjson {
// ends with zero char
static inline void print_with_escapes(const unsigned char *src) {
while (*src) {
switch (*src) {
case '\b':
putchar('\\');
putchar('b');
break;
case '\f':
putchar('\\');
putchar('f');
break;
case '\n':
putchar('\\');
putchar('n');
break;
case '\r':
putchar('\\');
putchar('r');
break;
case '\"':
putchar('\\');
putchar('"');
break;
case '\t':
putchar('\\');
putchar('t');
break;
case '\\':
putchar('\\');
putchar('\\');
break;
default:
if (*src <= 0x1F) {
printf("\\u%04x", *src);
} else {
putchar(*src);
}
}
src++;
}
}
// ends with zero char
static inline void print_with_escapes(const unsigned char *src,
@ -389,49 +346,6 @@ static inline void print_with_escapes(const unsigned char *src,
}
}
// print len chars
static inline void print_with_escapes(const unsigned char *src, size_t len) {
const unsigned char *finalsrc = src + len;
while (src < finalsrc) {
switch (*src) {
case '\b':
putchar('\\');
putchar('b');
break;
case '\f':
putchar('\\');
putchar('f');
break;
case '\n':
putchar('\\');
putchar('n');
break;
case '\r':
putchar('\\');
putchar('r');
break;
case '\"':
putchar('\\');
putchar('"');
break;
case '\t':
putchar('\\');
putchar('t');
break;
case '\\':
putchar('\\');
putchar('\\');
break;
default:
if (*src <= 0x1F) {
printf("\\u%04x", *src);
} else {
putchar(*src);
}
}
src++;
}
}
// print len chars
static inline void print_with_escapes(const unsigned char *src,
@ -586,7 +500,7 @@ const std::string &error_message(const int);
#define DEBUG_BLOCK(name, block)
#endif
#ifndef _MSC_VER
#if !defined(_MSC_VER) && !defined(SIMDJSON_NO_COMPUTED_GOTO)
// Implemented using Labels as Values which works in GCC and CLANG (and maybe
// also in Intel's compiler), but won't work in MSVC.
#define SIMDJSON_USE_COMPUTED_GOTO
@ -642,7 +556,7 @@ const std::string &error_message(const int);
namespace simdjson {
// low-level function to allocate memory with padding so we can read passed the
// low-level function to allocate memory with padding so we can read past the
// "length" bytes safely. if you must provide a pointer to some data, create it
// with this function: length is the max. size in bytes of the string caller is
// responsible to free the memory (free(...))
@ -815,7 +729,6 @@ static inline size_t json_minify(const padded_string &p, char *out) {
#define SIMDJSON_PARSEDJSON_H
#include <cstring>
#include <iostream>
#include <memory>
#define JSON_VALUE_MASK 0xFFFFFFFFFFFFFF
@ -864,7 +777,7 @@ public:
// this should be called when parsing (right before writing the tapes)
void init();
// print the json to stdout (should be valid)
// print the json to std::ostream (should be valid)
// return false if the tape is likely wrong (e.g., you did not parse a valid
// JSON).
WARN_UNUSED
@ -915,8 +828,8 @@ public:
tape[saved_loc] |= val;
}
class InvalidJSON : public std::exception {
const char *what() const throw() { return "JSON document is invalid"; }
struct InvalidJSON : public std::exception {
const char *what() const noexcept { return "JSON document is invalid"; }
};
template <size_t max_depth> class BasicIterator;
@ -948,20 +861,7 @@ public:
};
// dump bits low to high
inline void dumpbits_always(uint64_t v, const std::string &msg) {
for (uint32_t i = 0; i < 64; i++) {
std::cout << (((v >> static_cast<uint64_t>(i)) & 0x1ULL) ? "1" : "_");
}
std::cout << " " << msg.c_str() << "\n";
}
inline void dumpbits32_always(uint32_t v, const std::string &msg) {
for (uint32_t i = 0; i < 32; i++) {
std::cout << (((v >> i) & 0x1ULL) ? "1" : "_");
}
std::cout << " " << msg.c_str() << "\n";
}
} // namespace simdjson
#endif
/* end file include/simdjson/parsedjson.h */
@ -1744,7 +1644,7 @@ int find_structural_bits(const char *buf, size_t len, simdjson::ParsedJson &pj)
return find_structural_bits<T>((const uint8_t *)buf, len, pj);
}
}; // namespace simdjson
} // namespace simdjson
#endif
/* end file include/simdjson/stage1_find_marks.h */
@ -2127,7 +2027,9 @@ namespace simdjson {
size_t next_json{0};
bool load_next_batch{true};
size_t current_buffer_loc{0};
#ifdef SIMDJSON_THREADS_ENABLED
size_t last_json_buffer_loc{0};
#endif
size_t n_parsed_docs{0};
size_t n_bytes_parsed{0};
#ifdef SIMDJSON_THREADS_ENABLED