From bbd61eb13fcc3c11090d6edf32f643cd6c4d6d55 Mon Sep 17 00:00:00 2001 From: John Keiser Date: Sun, 7 Jun 2020 12:40:19 -0700 Subject: [PATCH] Let tape writing be put in a register --- .../internal/dom_parser_implementation.h | 3 - src/generic/stage2/logger.h | 24 ++--- src/generic/stage2/numberparsing.h | 38 +++++--- src/generic/stage2/structural_parser.h | 73 +++++--------- src/generic/stage2/tape_writer.h | 95 +++++++++++++++++++ 5 files changed, 152 insertions(+), 81 deletions(-) create mode 100644 src/generic/stage2/tape_writer.h diff --git a/include/simdjson/internal/dom_parser_implementation.h b/include/simdjson/internal/dom_parser_implementation.h index 4bf16d48..40f22e7b 100644 --- a/include/simdjson/internal/dom_parser_implementation.h +++ b/include/simdjson/internal/dom_parser_implementation.h @@ -107,9 +107,6 @@ public: */ virtual ~dom_parser_implementation() = default; - /** Next location to write to in the tape */ - uint32_t current_loc{0}; - /** Number of structural indices passed from stage 1 to stage 2 */ uint32_t n_structural_indexes{0}; /** Structural indices passed from stage 1 to stage 2 */ diff --git a/src/generic/stage2/logger.h b/src/generic/stage2/logger.h index c682fb0d..2f543840 100644 --- a/src/generic/stage2/logger.h +++ b/src/generic/stage2/logger.h @@ -25,8 +25,8 @@ namespace logger { if (LOG_ENABLED) { log_depth = 0; printf("\n"); - printf("| %-*s | %-*s | %*s | %*s | %*s | %-*s | %-*s |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", 4, "Curr", 4, "Next", 5, "Next#", LOG_DETAIL_LEN, "Detail", LOG_INDEX_LEN, "index"); - printf("|%.*s|%.*s|%.*s|%.*s|%.*s|%.*s|%.*s|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, 4+2, DASHES, 4+2, DASHES, 5+2, DASHES, LOG_DETAIL_LEN+2, DASHES, LOG_INDEX_LEN+2, DASHES); + printf("| %-*s | %-*s | %*s | %*s | %*s | %-*s | %-*s | %-*s |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", 4, "Curr", 4, "Next", 5, "Next#", 5, "Tape#", LOG_DETAIL_LEN, "Detail", LOG_INDEX_LEN, "index"); + printf("|%.*s|%.*s|%.*s|%.*s|%.*s|%.*s|%.*s|%.*s|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, 4+2, DASHES, 4+2, DASHES, 5+2, DASHES, 5+2, DASHES, LOG_DETAIL_LEN+2, DASHES, LOG_INDEX_LEN+2, DASHES); } } @@ -44,25 +44,17 @@ namespace logger { { // Print the next N characters in the buffer. printf("| "); - if (structurals.at_beginning()) { - // If the pointer is at the beginning, print a space followed by the beginning characters - // Print spaces for unprintable or newline characters. - printf(" "); - for (int i=0;i(i); - writer.write_s64(signed_answer); + writer.append_s64(signed_answer); #ifdef JSON_TEST_NUMBERS // for unit testing found_integer(signed_answer, src); #endif @@ -311,12 +311,12 @@ never_inline bool parse_large_integer(const uint8_t *const src, #ifdef JSON_TEST_NUMBERS // for unit testing found_integer(i, src); #endif - writer.write_s64(i); + writer.append_s64(i); } else { #ifdef JSON_TEST_NUMBERS // for unit testing found_unsigned_integer(i, src); #endif - writer.write_u64(i); + writer.append_u64(i); } } return is_structural_or_whitespace(*p); @@ -326,7 +326,7 @@ template bool slow_float_parsing(UNUSED const char * src, W writer) { double d; if (parse_float_strtod(src, &d)) { - writer.write_double(d); + writer.append_double(d); #ifdef JSON_TEST_NUMBERS // for unit testing found_float(d, (const uint8_t *)src); #endif @@ -350,10 +350,10 @@ bool slow_float_parsing(UNUSED const char * src, W writer) { template really_inline bool parse_number(UNUSED const uint8_t *const src, UNUSED bool found_minus, - W writer) { + W &writer) { #ifdef SIMDJSON_SKIPNUMBERPARSING // for performance analysis, it is sometimes // useful to skip parsing - writer.write_s64(0); // always write zero + writer.append_s64(0); // always write zero return true; // always succeeds #else const char *p = reinterpret_cast(src); @@ -497,7 +497,7 @@ really_inline bool parse_number(UNUSED const uint8_t *const src, } // we over-decrement by one when there is a '.' digit_count -= int(start - start_digits); - if (digit_count >= 19) { + if (unlikely(digit_count >= 19)) { // Ok, chances are good that we had an overflow! // this is almost never going to get called!!! // we start anew, going slowly!!! @@ -505,14 +505,22 @@ really_inline bool parse_number(UNUSED const uint8_t *const src, // 10000000000000000000000000000000000000000000e+308 // 3.1415926535897932384626433832795028841971693993751 // - return slow_float_parsing((const char *) src, writer); + bool success = slow_float_parsing((const char *) src, writer); + // The number was already written, but we made a copy of the writer + // when we passed it to the parse_large_integer() function, so + writer.skip_double(); + return success; } } if (unlikely(exponent < FASTFLOAT_SMALLEST_POWER) || (exponent > FASTFLOAT_LARGEST_POWER)) { // this is uncommon!!! // this is almost never going to get called!!! // we start anew, going slowly!!! - return slow_float_parsing((const char *) src, writer); + bool success = slow_float_parsing((const char *) src, writer); + // The number was already written, but we made a copy of the writer when we passed it to the + // slow_float_parsing() function, so we have to skip those tape spots now that we've returned + writer.skip_double(); + return success; } bool success = true; double d = compute_float_64(exponent, i, negative, &success); @@ -521,7 +529,7 @@ really_inline bool parse_number(UNUSED const uint8_t *const src, success = parse_float_strtod((const char *)src, &d); } if (success) { - writer.write_double(d); + writer.append_double(d); #ifdef JSON_TEST_NUMBERS // for unit testing found_float(d, src); #endif @@ -536,10 +544,14 @@ really_inline bool parse_number(UNUSED const uint8_t *const src, if (unlikely(digit_count >= 18)) { // this is uncommon!!! // there is a good chance that we had an overflow, so we need // need to recover: we parse the whole thing again. - return parse_large_integer(src, writer, found_minus); + bool success = parse_large_integer(src, writer, found_minus); + // The number was already written, but we made a copy of the writer + // when we passed it to the parse_large_integer() function, so + writer.skip_large_integer(); + return success; } i = negative ? 0 - i : i; - writer.write_s64(i); + writer.append_s64(i); #ifdef JSON_TEST_NUMBERS // for unit testing found_integer(i, src); #endif diff --git a/src/generic/stage2/structural_parser.h b/src/generic/stage2/structural_parser.h index 6fabcd88..53bcc3ac 100644 --- a/src/generic/stage2/structural_parser.h +++ b/src/generic/stage2/structural_parser.h @@ -6,6 +6,8 @@ namespace stage2 { namespace { // Make everything here private +#include "generic/stage2/tape_writer.h" + #ifdef SIMDJSON_USE_COMPUTED_GOTO #define INIT_ADDRESSES() { &&array_begin, &&array_continue, &&error, &&finish, &&object_begin, &&object_continue } #define GOTO(address) { goto *(address); } @@ -46,45 +48,25 @@ struct unified_machine_addresses { #undef FAIL_IF #define FAIL_IF(EXPR) { if (EXPR) { return addresses.error; } } -struct number_writer { - dom_parser_implementation &parser; - - really_inline void write_s64(int64_t value) noexcept { - append_tape(0, internal::tape_type::INT64); - std::memcpy(&parser.doc->tape[parser.current_loc], &value, sizeof(value)); - ++parser.current_loc; - } - really_inline void write_u64(uint64_t value) noexcept { - append_tape(0, internal::tape_type::UINT64); - parser.doc->tape[parser.current_loc++] = value; - } - really_inline void write_double(double value) noexcept { - append_tape(0, internal::tape_type::DOUBLE); - static_assert(sizeof(value) == sizeof(parser.doc->tape[parser.current_loc]), "mismatch size"); - memcpy(&parser.doc->tape[parser.current_loc++], &value, sizeof(double)); - // doc->tape[doc->current_loc++] = *((uint64_t *)&d); - } - really_inline void append_tape(uint64_t val, internal::tape_type t) noexcept { - parser.doc->tape[parser.current_loc++] = val | ((uint64_t(char(t))) << 56); - } -}; // struct number_writer - struct structural_parser : structural_iterator { + /** Lets you append to the tape */ + tape_writer tape; /** Next write location in the string buf for stage 2 parsing */ - uint8_t *current_string_buf_loc{}; + uint8_t *current_string_buf_loc; /** Current depth (nested objects and arrays) */ - uint32_t depth; + uint32_t depth{0}; // For non-streaming, to pass an explicit 0 as next_structural, which enables optimizations really_inline structural_parser(dom_parser_implementation &_parser, uint32_t start_structural_index) : structural_iterator(_parser, start_structural_index), - depth{0} { + tape{parser.doc->tape.get()}, + current_string_buf_loc{parser.doc->string_buf.get()} { } WARN_UNUSED really_inline bool start_scope(ret_address_t continue_state) { - parser.containing_scope[depth].tape_index = parser.current_loc; + parser.containing_scope[depth].tape_index = next_tape_index(); parser.containing_scope[depth].count = 0; - parser.current_loc++; // We don't actually *write* the start element until the end. + tape.skip(); // We don't actually *write* the start element until the end. parser.ret_address[depth] = continue_state; depth++; bool exceeded_max_depth = depth >= parser.max_depth(); @@ -112,14 +94,18 @@ struct structural_parser : structural_iterator { depth--; // write our doc->tape location to the header scope // The root scope gets written *at* the previous location. - append_tape(parser.containing_scope[depth].tape_index, end); + tape.append(parser.containing_scope[depth].tape_index, end); // count can overflow if it exceeds 24 bits... so we saturate // the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff). const uint32_t start_tape_index = parser.containing_scope[depth].tape_index; const uint32_t count = parser.containing_scope[depth].count; const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count; // This is a load and an OR. It would be possible to just write once at doc->tape[d.tape_index] - write_tape(start_tape_index, parser.current_loc | (uint64_t(cntsat) << 32), start); + tape_writer::write(parser.doc->tape[start_tape_index], next_tape_index() | (uint64_t(cntsat) << 32), start); + } + + really_inline uint32_t next_tape_index() { + return uint32_t(tape.next_tape_loc - parser.doc->tape.get()); } really_inline void end_object() { @@ -135,14 +121,6 @@ struct structural_parser : structural_iterator { end_scope(internal::tape_type::ROOT, internal::tape_type::ROOT); } - really_inline void append_tape(uint64_t val, internal::tape_type t) noexcept { - parser.doc->tape[parser.current_loc++] = val | ((uint64_t(char(t))) << 56); - } - - really_inline void write_tape(uint32_t loc, uint64_t val, internal::tape_type t) noexcept { - parser.doc->tape[loc] = val | ((uint64_t(char(t))) << 56); - } - // increment_count increments the count of keys in an object or values in an array. // Note that if you are at the level of the values or elements, the count // must be increment in the preceding depth (depth-1) where the array or @@ -153,7 +131,7 @@ struct structural_parser : structural_iterator { really_inline uint8_t *on_start_string() noexcept { // we advance the point, accounting for the fact that we have a NULL termination - append_tape(current_string_buf_loc - parser.doc->string_buf.get(), internal::tape_type::STRING); + tape.append(current_string_buf_loc - parser.doc->string_buf.get(), internal::tape_type::STRING); return current_string_buf_loc + sizeof(uint32_t); } @@ -183,8 +161,7 @@ struct structural_parser : structural_iterator { WARN_UNUSED really_inline bool parse_number(const uint8_t *src, bool found_minus) { log_value("number"); - number_writer writer{parser}; - bool succeeded = numberparsing::parse_number(src, found_minus, writer); + bool succeeded = numberparsing::parse_number(src, found_minus, tape); if (!succeeded) { log_error("Invalid number"); } return !succeeded; } @@ -200,17 +177,17 @@ struct structural_parser : structural_iterator { case 't': log_value("true"); FAIL_IF( !atomparsing::is_valid_true_atom(current()) ); - append_tape(0, internal::tape_type::TRUE_VALUE); + tape.append(0, internal::tape_type::TRUE_VALUE); return continue_state; case 'f': log_value("false"); FAIL_IF( !atomparsing::is_valid_false_atom(current()) ); - append_tape(0, internal::tape_type::FALSE_VALUE); + tape.append(0, internal::tape_type::FALSE_VALUE); return continue_state; case 'n': log_value("null"); FAIL_IF( !atomparsing::is_valid_null_atom(current()) ); - append_tape(0, internal::tape_type::NULL_VALUE); + tape.append(0, internal::tape_type::NULL_VALUE); return continue_state; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': @@ -285,8 +262,6 @@ struct structural_parser : structural_iterator { really_inline void init() { log_start(); - current_string_buf_loc = parser.doc->string_buf.get(); - parser.current_loc = 0; parser.error = UNINITIALIZED; } @@ -362,17 +337,17 @@ WARN_UNUSED static error_code parse_structurals(dom_parser_implementation &dom_p case 't': parser.log_value("true"); FAIL_IF( !atomparsing::is_valid_true_atom(parser.current(), parser.remaining_len()) ); - parser.append_tape(0, internal::tape_type::TRUE_VALUE); + parser.tape.append(0, internal::tape_type::TRUE_VALUE); goto finish; case 'f': parser.log_value("false"); FAIL_IF( !atomparsing::is_valid_false_atom(parser.current(), parser.remaining_len()) ); - parser.append_tape(0, internal::tape_type::FALSE_VALUE); + parser.tape.append(0, internal::tape_type::FALSE_VALUE); goto finish; case 'n': parser.log_value("null"); FAIL_IF( !atomparsing::is_valid_null_atom(parser.current(), parser.remaining_len()) ); - parser.append_tape(0, internal::tape_type::NULL_VALUE); + parser.tape.append(0, internal::tape_type::NULL_VALUE); goto finish; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': diff --git a/src/generic/stage2/tape_writer.h b/src/generic/stage2/tape_writer.h new file mode 100644 index 00000000..47282d6a --- /dev/null +++ b/src/generic/stage2/tape_writer.h @@ -0,0 +1,95 @@ +struct tape_writer { + /** The next place to write to tape */ + uint64_t *next_tape_loc; + + /** Write a signed 64-bit value to tape. */ + really_inline void append_s64(int64_t value) noexcept; + + /** Write an unsigned 64-bit value to tape. */ + really_inline void append_u64(uint64_t value) noexcept; + + /** Write a double value to tape. */ + really_inline void append_double(double value) noexcept; + + /** + * Append a tape entry (an 8-bit type,and 56 bits worth of value). + */ + really_inline void append(uint64_t val, internal::tape_type t) noexcept; + + /** + * Skip the current tape entry without writing. + * + * Used to skip the start of the container, since we'll come back later to fill it in when the + * container ends. + */ + really_inline void skip() noexcept; + + /** + * Skip the number of tape entries necessary to write a large u64 or i64. + */ + really_inline void skip_large_integer() noexcept; + + /** + * Skip the number of tape entries necessary to write a double. + */ + really_inline void skip_double() noexcept; + + /** + * Write a value to a known location on tape. + * + * Used to go back and write out the start of a container after the container ends. + */ + really_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; + +private: + /** + * Append both the tape entry, and a supplementary value following it. Used for types that need + * all 64 bits, such as double and uint64_t. + */ + template + really_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; +}; // struct number_writer + +really_inline void tape_writer::append_s64(int64_t value) noexcept { + append2(0, value, internal::tape_type::INT64); +} + +really_inline void tape_writer::append_u64(uint64_t value) noexcept { + append(0, internal::tape_type::UINT64); + *next_tape_loc = value; + next_tape_loc++; +} + +/** Write a double value to tape. */ +really_inline void tape_writer::append_double(double value) noexcept { + append2(0, value, internal::tape_type::DOUBLE); +} + +really_inline void tape_writer::skip() noexcept { + next_tape_loc++; +} + +really_inline void tape_writer::skip_large_integer() noexcept { + next_tape_loc += 2; +} + +really_inline void tape_writer::skip_double() noexcept { + next_tape_loc += 2; +} + +really_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { + *next_tape_loc = val | ((uint64_t(char(t))) << 56); + next_tape_loc++; +} + +template +really_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { + append(val, t); + static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); + memcpy(next_tape_loc, &val2, sizeof(val2)); + next_tape_loc++; +} + +really_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { + tape_loc = val | ((uint64_t(char(t))) << 56); +}