Let tape writing be put in a register
This commit is contained in:
parent
e15e1e253d
commit
bbd61eb13f
|
@ -107,9 +107,6 @@ public:
|
|||
*/
|
||||
virtual ~dom_parser_implementation() = default;
|
||||
|
||||
/** Next location to write to in the tape */
|
||||
uint32_t current_loc{0};
|
||||
|
||||
/** Number of structural indices passed from stage 1 to stage 2 */
|
||||
uint32_t n_structural_indexes{0};
|
||||
/** Structural indices passed from stage 1 to stage 2 */
|
||||
|
|
|
@ -25,8 +25,8 @@ namespace logger {
|
|||
if (LOG_ENABLED) {
|
||||
log_depth = 0;
|
||||
printf("\n");
|
||||
printf("| %-*s | %-*s | %*s | %*s | %*s | %-*s | %-*s |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", 4, "Curr", 4, "Next", 5, "Next#", LOG_DETAIL_LEN, "Detail", LOG_INDEX_LEN, "index");
|
||||
printf("|%.*s|%.*s|%.*s|%.*s|%.*s|%.*s|%.*s|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, 4+2, DASHES, 4+2, DASHES, 5+2, DASHES, LOG_DETAIL_LEN+2, DASHES, LOG_INDEX_LEN+2, DASHES);
|
||||
printf("| %-*s | %-*s | %*s | %*s | %*s | %-*s | %-*s | %-*s |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", 4, "Curr", 4, "Next", 5, "Next#", 5, "Tape#", LOG_DETAIL_LEN, "Detail", LOG_INDEX_LEN, "index");
|
||||
printf("|%.*s|%.*s|%.*s|%.*s|%.*s|%.*s|%.*s|%.*s|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, 4+2, DASHES, 4+2, DASHES, 5+2, DASHES, 5+2, DASHES, LOG_DETAIL_LEN+2, DASHES, LOG_INDEX_LEN+2, DASHES);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -44,25 +44,17 @@ namespace logger {
|
|||
{
|
||||
// Print the next N characters in the buffer.
|
||||
printf("| ");
|
||||
if (structurals.at_beginning()) {
|
||||
// If the pointer is at the beginning, print a space followed by the beginning characters
|
||||
// Print spaces for unprintable or newline characters.
|
||||
printf(" ");
|
||||
for (int i=0;i<LOG_BUFFER_LEN-1;i++) {
|
||||
printf("%c", printable_char(structurals.buf[i]));
|
||||
}
|
||||
} else {
|
||||
// Otherwise, print the characters starting from the buffer position.
|
||||
// Print spaces for unprintable or newline characters.
|
||||
for (int i=0;i<LOG_BUFFER_LEN;i++) {
|
||||
printf("%c", printable_char(structurals.current()[i]));
|
||||
}
|
||||
// Otherwise, print the characters starting from the buffer position.
|
||||
// Print spaces for unprintable or newline characters.
|
||||
for (int i=0;i<LOG_BUFFER_LEN;i++) {
|
||||
printf("%c", printable_char(structurals.current()[i]));
|
||||
}
|
||||
printf(" ");
|
||||
}
|
||||
printf("| %c ", printable_char(structurals.at_beginning() ? ' ' : structurals.current_char()));
|
||||
printf("| %c ", printable_char(structurals.current_char()));
|
||||
printf("| %c ", printable_char(structurals.peek_next_char()));
|
||||
printf("| %5u ", structurals.parser.structural_indexes[*(structurals.current_structural+1)]);
|
||||
printf("| %5u ", structurals.next_tape_index());
|
||||
printf("| %-*s ", LOG_DETAIL_LEN, detail);
|
||||
printf("| %*u ", LOG_INDEX_LEN, *structurals.current_structural);
|
||||
printf("|\n");
|
||||
|
|
|
@ -291,14 +291,14 @@ never_inline bool parse_large_integer(const uint8_t *const src,
|
|||
// as a positive signed integer, but the negative version is
|
||||
// possible.
|
||||
constexpr int64_t signed_answer = INT64_MIN;
|
||||
writer.write_s64(signed_answer);
|
||||
writer.append_s64(signed_answer);
|
||||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
found_integer(signed_answer, src);
|
||||
#endif
|
||||
} else {
|
||||
// we can negate safely
|
||||
int64_t signed_answer = -static_cast<int64_t>(i);
|
||||
writer.write_s64(signed_answer);
|
||||
writer.append_s64(signed_answer);
|
||||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
found_integer(signed_answer, src);
|
||||
#endif
|
||||
|
@ -311,12 +311,12 @@ never_inline bool parse_large_integer(const uint8_t *const src,
|
|||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
found_integer(i, src);
|
||||
#endif
|
||||
writer.write_s64(i);
|
||||
writer.append_s64(i);
|
||||
} else {
|
||||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
found_unsigned_integer(i, src);
|
||||
#endif
|
||||
writer.write_u64(i);
|
||||
writer.append_u64(i);
|
||||
}
|
||||
}
|
||||
return is_structural_or_whitespace(*p);
|
||||
|
@ -326,7 +326,7 @@ template<typename W>
|
|||
bool slow_float_parsing(UNUSED const char * src, W writer) {
|
||||
double d;
|
||||
if (parse_float_strtod(src, &d)) {
|
||||
writer.write_double(d);
|
||||
writer.append_double(d);
|
||||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
found_float(d, (const uint8_t *)src);
|
||||
#endif
|
||||
|
@ -350,10 +350,10 @@ bool slow_float_parsing(UNUSED const char * src, W writer) {
|
|||
template<typename W>
|
||||
really_inline bool parse_number(UNUSED const uint8_t *const src,
|
||||
UNUSED bool found_minus,
|
||||
W writer) {
|
||||
W &writer) {
|
||||
#ifdef SIMDJSON_SKIPNUMBERPARSING // for performance analysis, it is sometimes
|
||||
// useful to skip parsing
|
||||
writer.write_s64(0); // always write zero
|
||||
writer.append_s64(0); // always write zero
|
||||
return true; // always succeeds
|
||||
#else
|
||||
const char *p = reinterpret_cast<const char *>(src);
|
||||
|
@ -497,7 +497,7 @@ really_inline bool parse_number(UNUSED const uint8_t *const src,
|
|||
}
|
||||
// we over-decrement by one when there is a '.'
|
||||
digit_count -= int(start - start_digits);
|
||||
if (digit_count >= 19) {
|
||||
if (unlikely(digit_count >= 19)) {
|
||||
// Ok, chances are good that we had an overflow!
|
||||
// this is almost never going to get called!!!
|
||||
// we start anew, going slowly!!!
|
||||
|
@ -505,14 +505,22 @@ really_inline bool parse_number(UNUSED const uint8_t *const src,
|
|||
// 10000000000000000000000000000000000000000000e+308
|
||||
// 3.1415926535897932384626433832795028841971693993751
|
||||
//
|
||||
return slow_float_parsing((const char *) src, writer);
|
||||
bool success = slow_float_parsing((const char *) src, writer);
|
||||
// The number was already written, but we made a copy of the writer
|
||||
// when we passed it to the parse_large_integer() function, so
|
||||
writer.skip_double();
|
||||
return success;
|
||||
}
|
||||
}
|
||||
if (unlikely(exponent < FASTFLOAT_SMALLEST_POWER) ||
|
||||
(exponent > FASTFLOAT_LARGEST_POWER)) { // this is uncommon!!!
|
||||
// this is almost never going to get called!!!
|
||||
// we start anew, going slowly!!!
|
||||
return slow_float_parsing((const char *) src, writer);
|
||||
bool success = slow_float_parsing((const char *) src, writer);
|
||||
// The number was already written, but we made a copy of the writer when we passed it to the
|
||||
// slow_float_parsing() function, so we have to skip those tape spots now that we've returned
|
||||
writer.skip_double();
|
||||
return success;
|
||||
}
|
||||
bool success = true;
|
||||
double d = compute_float_64(exponent, i, negative, &success);
|
||||
|
@ -521,7 +529,7 @@ really_inline bool parse_number(UNUSED const uint8_t *const src,
|
|||
success = parse_float_strtod((const char *)src, &d);
|
||||
}
|
||||
if (success) {
|
||||
writer.write_double(d);
|
||||
writer.append_double(d);
|
||||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
found_float(d, src);
|
||||
#endif
|
||||
|
@ -536,10 +544,14 @@ really_inline bool parse_number(UNUSED const uint8_t *const src,
|
|||
if (unlikely(digit_count >= 18)) { // this is uncommon!!!
|
||||
// there is a good chance that we had an overflow, so we need
|
||||
// need to recover: we parse the whole thing again.
|
||||
return parse_large_integer(src, writer, found_minus);
|
||||
bool success = parse_large_integer(src, writer, found_minus);
|
||||
// The number was already written, but we made a copy of the writer
|
||||
// when we passed it to the parse_large_integer() function, so
|
||||
writer.skip_large_integer();
|
||||
return success;
|
||||
}
|
||||
i = negative ? 0 - i : i;
|
||||
writer.write_s64(i);
|
||||
writer.append_s64(i);
|
||||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
found_integer(i, src);
|
||||
#endif
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
namespace stage2 {
|
||||
namespace { // Make everything here private
|
||||
|
||||
#include "generic/stage2/tape_writer.h"
|
||||
|
||||
#ifdef SIMDJSON_USE_COMPUTED_GOTO
|
||||
#define INIT_ADDRESSES() { &&array_begin, &&array_continue, &&error, &&finish, &&object_begin, &&object_continue }
|
||||
#define GOTO(address) { goto *(address); }
|
||||
|
@ -46,45 +48,25 @@ struct unified_machine_addresses {
|
|||
#undef FAIL_IF
|
||||
#define FAIL_IF(EXPR) { if (EXPR) { return addresses.error; } }
|
||||
|
||||
struct number_writer {
|
||||
dom_parser_implementation &parser;
|
||||
|
||||
really_inline void write_s64(int64_t value) noexcept {
|
||||
append_tape(0, internal::tape_type::INT64);
|
||||
std::memcpy(&parser.doc->tape[parser.current_loc], &value, sizeof(value));
|
||||
++parser.current_loc;
|
||||
}
|
||||
really_inline void write_u64(uint64_t value) noexcept {
|
||||
append_tape(0, internal::tape_type::UINT64);
|
||||
parser.doc->tape[parser.current_loc++] = value;
|
||||
}
|
||||
really_inline void write_double(double value) noexcept {
|
||||
append_tape(0, internal::tape_type::DOUBLE);
|
||||
static_assert(sizeof(value) == sizeof(parser.doc->tape[parser.current_loc]), "mismatch size");
|
||||
memcpy(&parser.doc->tape[parser.current_loc++], &value, sizeof(double));
|
||||
// doc->tape[doc->current_loc++] = *((uint64_t *)&d);
|
||||
}
|
||||
really_inline void append_tape(uint64_t val, internal::tape_type t) noexcept {
|
||||
parser.doc->tape[parser.current_loc++] = val | ((uint64_t(char(t))) << 56);
|
||||
}
|
||||
}; // struct number_writer
|
||||
|
||||
struct structural_parser : structural_iterator {
|
||||
/** Lets you append to the tape */
|
||||
tape_writer tape;
|
||||
/** Next write location in the string buf for stage 2 parsing */
|
||||
uint8_t *current_string_buf_loc{};
|
||||
uint8_t *current_string_buf_loc;
|
||||
/** Current depth (nested objects and arrays) */
|
||||
uint32_t depth;
|
||||
uint32_t depth{0};
|
||||
|
||||
// For non-streaming, to pass an explicit 0 as next_structural, which enables optimizations
|
||||
really_inline structural_parser(dom_parser_implementation &_parser, uint32_t start_structural_index)
|
||||
: structural_iterator(_parser, start_structural_index),
|
||||
depth{0} {
|
||||
tape{parser.doc->tape.get()},
|
||||
current_string_buf_loc{parser.doc->string_buf.get()} {
|
||||
}
|
||||
|
||||
WARN_UNUSED really_inline bool start_scope(ret_address_t continue_state) {
|
||||
parser.containing_scope[depth].tape_index = parser.current_loc;
|
||||
parser.containing_scope[depth].tape_index = next_tape_index();
|
||||
parser.containing_scope[depth].count = 0;
|
||||
parser.current_loc++; // We don't actually *write* the start element until the end.
|
||||
tape.skip(); // We don't actually *write* the start element until the end.
|
||||
parser.ret_address[depth] = continue_state;
|
||||
depth++;
|
||||
bool exceeded_max_depth = depth >= parser.max_depth();
|
||||
|
@ -112,14 +94,18 @@ struct structural_parser : structural_iterator {
|
|||
depth--;
|
||||
// write our doc->tape location to the header scope
|
||||
// The root scope gets written *at* the previous location.
|
||||
append_tape(parser.containing_scope[depth].tape_index, end);
|
||||
tape.append(parser.containing_scope[depth].tape_index, end);
|
||||
// count can overflow if it exceeds 24 bits... so we saturate
|
||||
// the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff).
|
||||
const uint32_t start_tape_index = parser.containing_scope[depth].tape_index;
|
||||
const uint32_t count = parser.containing_scope[depth].count;
|
||||
const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count;
|
||||
// This is a load and an OR. It would be possible to just write once at doc->tape[d.tape_index]
|
||||
write_tape(start_tape_index, parser.current_loc | (uint64_t(cntsat) << 32), start);
|
||||
tape_writer::write(parser.doc->tape[start_tape_index], next_tape_index() | (uint64_t(cntsat) << 32), start);
|
||||
}
|
||||
|
||||
really_inline uint32_t next_tape_index() {
|
||||
return uint32_t(tape.next_tape_loc - parser.doc->tape.get());
|
||||
}
|
||||
|
||||
really_inline void end_object() {
|
||||
|
@ -135,14 +121,6 @@ struct structural_parser : structural_iterator {
|
|||
end_scope(internal::tape_type::ROOT, internal::tape_type::ROOT);
|
||||
}
|
||||
|
||||
really_inline void append_tape(uint64_t val, internal::tape_type t) noexcept {
|
||||
parser.doc->tape[parser.current_loc++] = val | ((uint64_t(char(t))) << 56);
|
||||
}
|
||||
|
||||
really_inline void write_tape(uint32_t loc, uint64_t val, internal::tape_type t) noexcept {
|
||||
parser.doc->tape[loc] = val | ((uint64_t(char(t))) << 56);
|
||||
}
|
||||
|
||||
// increment_count increments the count of keys in an object or values in an array.
|
||||
// Note that if you are at the level of the values or elements, the count
|
||||
// must be increment in the preceding depth (depth-1) where the array or
|
||||
|
@ -153,7 +131,7 @@ struct structural_parser : structural_iterator {
|
|||
|
||||
really_inline uint8_t *on_start_string() noexcept {
|
||||
// we advance the point, accounting for the fact that we have a NULL termination
|
||||
append_tape(current_string_buf_loc - parser.doc->string_buf.get(), internal::tape_type::STRING);
|
||||
tape.append(current_string_buf_loc - parser.doc->string_buf.get(), internal::tape_type::STRING);
|
||||
return current_string_buf_loc + sizeof(uint32_t);
|
||||
}
|
||||
|
||||
|
@ -183,8 +161,7 @@ struct structural_parser : structural_iterator {
|
|||
|
||||
WARN_UNUSED really_inline bool parse_number(const uint8_t *src, bool found_minus) {
|
||||
log_value("number");
|
||||
number_writer writer{parser};
|
||||
bool succeeded = numberparsing::parse_number(src, found_minus, writer);
|
||||
bool succeeded = numberparsing::parse_number(src, found_minus, tape);
|
||||
if (!succeeded) { log_error("Invalid number"); }
|
||||
return !succeeded;
|
||||
}
|
||||
|
@ -200,17 +177,17 @@ struct structural_parser : structural_iterator {
|
|||
case 't':
|
||||
log_value("true");
|
||||
FAIL_IF( !atomparsing::is_valid_true_atom(current()) );
|
||||
append_tape(0, internal::tape_type::TRUE_VALUE);
|
||||
tape.append(0, internal::tape_type::TRUE_VALUE);
|
||||
return continue_state;
|
||||
case 'f':
|
||||
log_value("false");
|
||||
FAIL_IF( !atomparsing::is_valid_false_atom(current()) );
|
||||
append_tape(0, internal::tape_type::FALSE_VALUE);
|
||||
tape.append(0, internal::tape_type::FALSE_VALUE);
|
||||
return continue_state;
|
||||
case 'n':
|
||||
log_value("null");
|
||||
FAIL_IF( !atomparsing::is_valid_null_atom(current()) );
|
||||
append_tape(0, internal::tape_type::NULL_VALUE);
|
||||
tape.append(0, internal::tape_type::NULL_VALUE);
|
||||
return continue_state;
|
||||
case '0': case '1': case '2': case '3': case '4':
|
||||
case '5': case '6': case '7': case '8': case '9':
|
||||
|
@ -285,8 +262,6 @@ struct structural_parser : structural_iterator {
|
|||
|
||||
really_inline void init() {
|
||||
log_start();
|
||||
current_string_buf_loc = parser.doc->string_buf.get();
|
||||
parser.current_loc = 0;
|
||||
parser.error = UNINITIALIZED;
|
||||
}
|
||||
|
||||
|
@ -362,17 +337,17 @@ WARN_UNUSED static error_code parse_structurals(dom_parser_implementation &dom_p
|
|||
case 't':
|
||||
parser.log_value("true");
|
||||
FAIL_IF( !atomparsing::is_valid_true_atom(parser.current(), parser.remaining_len()) );
|
||||
parser.append_tape(0, internal::tape_type::TRUE_VALUE);
|
||||
parser.tape.append(0, internal::tape_type::TRUE_VALUE);
|
||||
goto finish;
|
||||
case 'f':
|
||||
parser.log_value("false");
|
||||
FAIL_IF( !atomparsing::is_valid_false_atom(parser.current(), parser.remaining_len()) );
|
||||
parser.append_tape(0, internal::tape_type::FALSE_VALUE);
|
||||
parser.tape.append(0, internal::tape_type::FALSE_VALUE);
|
||||
goto finish;
|
||||
case 'n':
|
||||
parser.log_value("null");
|
||||
FAIL_IF( !atomparsing::is_valid_null_atom(parser.current(), parser.remaining_len()) );
|
||||
parser.append_tape(0, internal::tape_type::NULL_VALUE);
|
||||
parser.tape.append(0, internal::tape_type::NULL_VALUE);
|
||||
goto finish;
|
||||
case '0': case '1': case '2': case '3': case '4':
|
||||
case '5': case '6': case '7': case '8': case '9':
|
||||
|
|
|
@ -0,0 +1,95 @@
|
|||
struct tape_writer {
|
||||
/** The next place to write to tape */
|
||||
uint64_t *next_tape_loc;
|
||||
|
||||
/** Write a signed 64-bit value to tape. */
|
||||
really_inline void append_s64(int64_t value) noexcept;
|
||||
|
||||
/** Write an unsigned 64-bit value to tape. */
|
||||
really_inline void append_u64(uint64_t value) noexcept;
|
||||
|
||||
/** Write a double value to tape. */
|
||||
really_inline void append_double(double value) noexcept;
|
||||
|
||||
/**
|
||||
* Append a tape entry (an 8-bit type,and 56 bits worth of value).
|
||||
*/
|
||||
really_inline void append(uint64_t val, internal::tape_type t) noexcept;
|
||||
|
||||
/**
|
||||
* Skip the current tape entry without writing.
|
||||
*
|
||||
* Used to skip the start of the container, since we'll come back later to fill it in when the
|
||||
* container ends.
|
||||
*/
|
||||
really_inline void skip() noexcept;
|
||||
|
||||
/**
|
||||
* Skip the number of tape entries necessary to write a large u64 or i64.
|
||||
*/
|
||||
really_inline void skip_large_integer() noexcept;
|
||||
|
||||
/**
|
||||
* Skip the number of tape entries necessary to write a double.
|
||||
*/
|
||||
really_inline void skip_double() noexcept;
|
||||
|
||||
/**
|
||||
* Write a value to a known location on tape.
|
||||
*
|
||||
* Used to go back and write out the start of a container after the container ends.
|
||||
*/
|
||||
really_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept;
|
||||
|
||||
private:
|
||||
/**
|
||||
* Append both the tape entry, and a supplementary value following it. Used for types that need
|
||||
* all 64 bits, such as double and uint64_t.
|
||||
*/
|
||||
template<typename T>
|
||||
really_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept;
|
||||
}; // struct number_writer
|
||||
|
||||
really_inline void tape_writer::append_s64(int64_t value) noexcept {
|
||||
append2(0, value, internal::tape_type::INT64);
|
||||
}
|
||||
|
||||
really_inline void tape_writer::append_u64(uint64_t value) noexcept {
|
||||
append(0, internal::tape_type::UINT64);
|
||||
*next_tape_loc = value;
|
||||
next_tape_loc++;
|
||||
}
|
||||
|
||||
/** Write a double value to tape. */
|
||||
really_inline void tape_writer::append_double(double value) noexcept {
|
||||
append2(0, value, internal::tape_type::DOUBLE);
|
||||
}
|
||||
|
||||
really_inline void tape_writer::skip() noexcept {
|
||||
next_tape_loc++;
|
||||
}
|
||||
|
||||
really_inline void tape_writer::skip_large_integer() noexcept {
|
||||
next_tape_loc += 2;
|
||||
}
|
||||
|
||||
really_inline void tape_writer::skip_double() noexcept {
|
||||
next_tape_loc += 2;
|
||||
}
|
||||
|
||||
really_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept {
|
||||
*next_tape_loc = val | ((uint64_t(char(t))) << 56);
|
||||
next_tape_loc++;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
really_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept {
|
||||
append(val, t);
|
||||
static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!");
|
||||
memcpy(next_tape_loc, &val2, sizeof(val2));
|
||||
next_tape_loc++;
|
||||
}
|
||||
|
||||
really_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept {
|
||||
tape_loc = val | ((uint64_t(char(t))) << 56);
|
||||
}
|
Loading…
Reference in New Issue