From b75fa26dc1436c0414ca15e6682bee9cda09ac21 Mon Sep 17 00:00:00 2001 From: John Keiser Date: Mon, 1 Jun 2020 12:14:09 -0700 Subject: [PATCH] Move containing_scope and ret_address to .cpp --- .../internal/dom_parser_implementation.h | 21 ------- src/arm64/dom_parser_implementation.h | 38 +----------- src/fallback/dom_parser_implementation.h | 38 +----------- src/generic/dom_parser_implementation.h | 58 +++++++++++++++++++ src/generic/stage2/allocate.h | 4 +- .../stage2/streaming_structural_parser.h | 2 +- src/generic/stage2/structural_parser.h | 26 ++++----- src/haswell/dom_parser_implementation.h | 39 +------------ src/westmere/dom_parser_implementation.h | 38 +----------- 9 files changed, 78 insertions(+), 186 deletions(-) create mode 100644 src/generic/dom_parser_implementation.h diff --git a/include/simdjson/internal/dom_parser_implementation.h b/include/simdjson/internal/dom_parser_implementation.h index c651f7fc..6ff98168 100644 --- a/include/simdjson/internal/dom_parser_implementation.h +++ b/include/simdjson/internal/dom_parser_implementation.h @@ -13,18 +13,6 @@ class document; namespace internal { -// expectation: sizeof(scope_descriptor) = 64/8. -struct scope_descriptor { - uint32_t tape_index; // where, on the tape, does the scope ([,{) begins - uint32_t count; // how many elements in the scope -}; // struct scope_descriptor - -#ifdef SIMDJSON_USE_COMPUTED_GOTO -typedef void* ret_address; -#else -typedef char ret_address; -#endif - /** * An implementation of simdjson's DOM parser for a particular CPU architecture. * @@ -130,15 +118,6 @@ public: /** Structural indices passed from stage 1 to stage 2 */ std::unique_ptr structural_indexes{}; - /** Tape location of each open { or [ */ - std::unique_ptr containing_scope{}; - - /** Return address of each open { or [ */ - std::unique_ptr ret_address{}; - - /** Error code, used ENTIRELY to make gcc not be slower than before. Not actually consumed. */ - error_code error{UNINITIALIZED}; - /** * The largest document this parser can support without reallocating. * diff --git a/src/arm64/dom_parser_implementation.h b/src/arm64/dom_parser_implementation.h index fd33308c..29174f80 100644 --- a/src/arm64/dom_parser_implementation.h +++ b/src/arm64/dom_parser_implementation.h @@ -7,43 +7,7 @@ namespace simdjson { namespace arm64 { -class dom_parser_implementation final : public internal::dom_parser_implementation { -public: - const uint8_t *buf{}; // Buffer passed to stage 1 - size_t len{0}; // Length passed to stage 1 - dom::document *doc{}; // Document passed to stage 2 - - really_inline dom_parser_implementation(); - dom_parser_implementation(const dom_parser_implementation &) = delete; - dom_parser_implementation & operator=(const dom_parser_implementation &) = delete; - - WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; - WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, bool streaming) noexcept final; - WARN_UNUSED error_code stage2(dom::document &doc) noexcept final; - WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, dom::document &doc, size_t &next_json) noexcept final; - WARN_UNUSED error_code set_capacity(size_t capacity) noexcept final; - WARN_UNUSED error_code set_max_depth(size_t max_depth) noexcept final; -}; - -#include "generic/stage1/allocate.h" -#include "generic/stage2/allocate.h" - -really_inline dom_parser_implementation::dom_parser_implementation() {} - -// Leaving these here so they can be inlined if so desired -WARN_UNUSED error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { - error_code err = stage1::allocate::set_capacity(*this, capacity); - if (err) { _capacity = 0; return err; } - _capacity = capacity; - return SUCCESS; -} - -WARN_UNUSED error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { - error_code err = stage2::allocate::set_max_depth(*this, max_depth); - if (err) { _max_depth = 0; return err; } - _max_depth = max_depth; - return SUCCESS; -} +#include "generic/dom_parser_implementation.h" } // namespace arm64 } // namespace simdjson diff --git a/src/fallback/dom_parser_implementation.h b/src/fallback/dom_parser_implementation.h index e3350df6..b648c36c 100644 --- a/src/fallback/dom_parser_implementation.h +++ b/src/fallback/dom_parser_implementation.h @@ -7,43 +7,7 @@ namespace simdjson { namespace fallback { -class dom_parser_implementation final : public internal::dom_parser_implementation { -public: - const uint8_t *buf{}; // Buffer passed to stage 1 - size_t len{0}; // Length passed to stage 1 - dom::document *doc{}; // Document passed to stage 2 - - really_inline dom_parser_implementation(); - dom_parser_implementation(const dom_parser_implementation &) = delete; - dom_parser_implementation & operator=(const dom_parser_implementation &) = delete; - - WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; - WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, bool streaming) noexcept final; - WARN_UNUSED error_code stage2(dom::document &doc) noexcept final; - WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, dom::document &doc, size_t &next_json) noexcept final; - WARN_UNUSED error_code set_capacity(size_t capacity) noexcept final; - WARN_UNUSED error_code set_max_depth(size_t max_depth) noexcept final; -}; - -#include "generic/stage1/allocate.h" -#include "generic/stage2/allocate.h" - -really_inline dom_parser_implementation::dom_parser_implementation() {} - -// Leaving these here so they can be inlined if so desired -WARN_UNUSED error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { - error_code err = stage1::allocate::set_capacity(*this, capacity); - if (err) { _capacity = 0; return err; } - _capacity = capacity; - return SUCCESS; -} - -WARN_UNUSED error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { - error_code err = stage2::allocate::set_max_depth(*this, max_depth); - if (err) { _max_depth = 0; return err; } - _max_depth = max_depth; - return SUCCESS; -} +#include "generic/dom_parser_implementation.h" } // namespace fallback } // namespace simdjson diff --git a/src/generic/dom_parser_implementation.h b/src/generic/dom_parser_implementation.h new file mode 100644 index 00000000..4c8ec598 --- /dev/null +++ b/src/generic/dom_parser_implementation.h @@ -0,0 +1,58 @@ +// expectation: sizeof(scope_descriptor) = 64/8. +struct scope_descriptor { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct scope_descriptor + +#ifdef SIMDJSON_USE_COMPUTED_GOTO +typedef void* ret_address_t; +#else +typedef char ret_address_t; +#endif + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr containing_scope{}; + /** Return address of each open { or [ */ + std::unique_ptr ret_address{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + /** Error code (TODO remove, this is not even used, we just set it so the g++ optimizer doesn't get confused) */ + error_code error{UNINITIALIZED}; + + really_inline dom_parser_implementation(); + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation & operator=(const dom_parser_implementation &) = delete; + + WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, bool streaming) noexcept final; + WARN_UNUSED error_code stage2(dom::document &doc) noexcept final; + WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, dom::document &doc, size_t &next_json) noexcept final; + WARN_UNUSED error_code set_capacity(size_t capacity) noexcept final; + WARN_UNUSED error_code set_max_depth(size_t max_depth) noexcept final; +}; + +#include "generic/stage1/allocate.h" +#include "generic/stage2/allocate.h" + +really_inline dom_parser_implementation::dom_parser_implementation() {} + +// Leaving these here so they can be inlined if so desired +WARN_UNUSED error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + error_code err = stage1::allocate::set_capacity(*this, capacity); + if (err) { _capacity = 0; return err; } + _capacity = capacity; + return SUCCESS; +} + +WARN_UNUSED error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + error_code err = stage2::allocate::set_max_depth(*this, max_depth); + if (err) { _max_depth = 0; return err; } + _max_depth = max_depth; + return SUCCESS; +} diff --git a/src/generic/stage2/allocate.h b/src/generic/stage2/allocate.h index ae89f6e1..ad35df11 100644 --- a/src/generic/stage2/allocate.h +++ b/src/generic/stage2/allocate.h @@ -5,8 +5,8 @@ namespace allocate { // Allocates stage 2 internal state and outputs in the parser // really_inline error_code set_max_depth(dom_parser_implementation &parser, size_t max_depth) { - parser.containing_scope.reset(new (std::nothrow) internal::scope_descriptor[max_depth]); - parser.ret_address.reset(new (std::nothrow) internal::ret_address[max_depth]); + parser.containing_scope.reset(new (std::nothrow) scope_descriptor[max_depth]); + parser.ret_address.reset(new (std::nothrow) ret_address_t[max_depth]); if (!parser.ret_address || !parser.containing_scope) { return MEMALLOC; diff --git a/src/generic/stage2/streaming_structural_parser.h b/src/generic/stage2/streaming_structural_parser.h index 5e27ef90..b83e116d 100755 --- a/src/generic/stage2/streaming_structural_parser.h +++ b/src/generic/stage2/streaming_structural_parser.h @@ -4,7 +4,7 @@ struct streaming_structural_parser: structural_parser { really_inline streaming_structural_parser(dom_parser_implementation &_parser, uint32_t next_structural) : structural_parser(_parser, next_structural) {} // override to add streaming - WARN_UNUSED really_inline error_code start(ret_address finish_parser) { + WARN_UNUSED really_inline error_code start(ret_address_t finish_parser) { log_start(); init(); // sets is_valid to false // Capacity ain't no thang for streaming, so we don't check it. diff --git a/src/generic/stage2/structural_parser.h b/src/generic/stage2/structural_parser.h index 60031b7d..85ba0836 100644 --- a/src/generic/stage2/structural_parser.h +++ b/src/generic/stage2/structural_parser.h @@ -5,8 +5,6 @@ namespace stage2 { -using internal::ret_address; - #ifdef SIMDJSON_USE_COMPUTED_GOTO #define INIT_ADDRESSES() { &&array_begin, &&array_continue, &&error, &&finish, &&object_begin, &&object_continue } #define GOTO(address) { goto *(address); } @@ -36,12 +34,12 @@ using internal::ret_address; #endif // SIMDJSON_USE_COMPUTED_GOTO struct unified_machine_addresses { - ret_address array_begin; - ret_address array_continue; - ret_address error; - ret_address finish; - ret_address object_begin; - ret_address object_continue; + ret_address_t array_begin; + ret_address_t array_continue; + ret_address_t error; + ret_address_t finish; + ret_address_t object_begin; + ret_address_t object_continue; }; #undef FAIL_IF @@ -82,7 +80,7 @@ struct structural_parser { uint32_t next_structural = 0 ) : structurals(_parser.buf, _parser.len, _parser.structural_indexes.get(), next_structural), parser{_parser}, depth{0} {} - WARN_UNUSED really_inline bool start_scope(ret_address continue_state) { + WARN_UNUSED really_inline bool start_scope(ret_address_t continue_state) { parser.containing_scope[depth].tape_index = parser.current_loc; parser.containing_scope[depth].count = 0; parser.current_loc++; // We don't actually *write* the start element until the end. @@ -93,17 +91,17 @@ struct structural_parser { return exceeded_max_depth; } - WARN_UNUSED really_inline bool start_document(ret_address continue_state) { + WARN_UNUSED really_inline bool start_document(ret_address_t continue_state) { log_start_value("document"); return start_scope(continue_state); } - WARN_UNUSED really_inline bool start_object(ret_address continue_state) { + WARN_UNUSED really_inline bool start_object(ret_address_t continue_state) { log_start_value("object"); return start_scope(continue_state); } - WARN_UNUSED really_inline bool start_array(ret_address continue_state) { + WARN_UNUSED really_inline bool start_array(ret_address_t continue_state) { log_start_value("array"); return start_scope(continue_state); } @@ -241,7 +239,7 @@ struct structural_parser { return false; } - WARN_UNUSED really_inline ret_address parse_value(const unified_machine_addresses &addresses, ret_address continue_state) { + WARN_UNUSED really_inline ret_address_t parse_value(const unified_machine_addresses &addresses, ret_address_t continue_state) { switch (structurals.current_char()) { case '"': FAIL_IF( parse_string() ); @@ -333,7 +331,7 @@ struct structural_parser { parser.error = UNINITIALIZED; } - WARN_UNUSED really_inline error_code start(size_t len, ret_address finish_state) { + WARN_UNUSED really_inline error_code start(size_t len, ret_address_t finish_state) { log_start(); init(); // sets is_valid to false if (len > parser.capacity()) { diff --git a/src/haswell/dom_parser_implementation.h b/src/haswell/dom_parser_implementation.h index c328b7d3..fdf7d2b0 100644 --- a/src/haswell/dom_parser_implementation.h +++ b/src/haswell/dom_parser_implementation.h @@ -3,46 +3,11 @@ #include "simdjson.h" #include "isadetection.h" + namespace simdjson { namespace haswell { -class dom_parser_implementation final : public internal::dom_parser_implementation { -public: - const uint8_t *buf{}; // Buffer passed to stage 1 - size_t len{0}; // Length passed to stage 1 - dom::document *doc{}; // Document passed to stage 2 - - really_inline dom_parser_implementation(); - dom_parser_implementation(const dom_parser_implementation &) = delete; - dom_parser_implementation & operator=(const dom_parser_implementation &) = delete; - - WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; - WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, bool streaming) noexcept final; - WARN_UNUSED error_code stage2(dom::document &doc) noexcept final; - WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, dom::document &doc, size_t &next_json) noexcept final; - WARN_UNUSED error_code set_capacity(size_t capacity) noexcept final; - WARN_UNUSED error_code set_max_depth(size_t max_depth) noexcept final; -}; - -#include "generic/stage1/allocate.h" -#include "generic/stage2/allocate.h" - -really_inline dom_parser_implementation::dom_parser_implementation() {} - -// Leaving these here so they can be inlined if so desired -WARN_UNUSED error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { - error_code err = stage1::allocate::set_capacity(*this, capacity); - if (err) { _capacity = 0; return err; } - _capacity = capacity; - return SUCCESS; -} - -WARN_UNUSED error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { - error_code err = stage2::allocate::set_max_depth(*this, max_depth); - if (err) { _max_depth = 0; return err; } - _max_depth = max_depth; - return SUCCESS; -} +#include "generic/dom_parser_implementation.h" } // namespace haswell } // namespace simdjson diff --git a/src/westmere/dom_parser_implementation.h b/src/westmere/dom_parser_implementation.h index 7e7c14dd..c36d9c8a 100644 --- a/src/westmere/dom_parser_implementation.h +++ b/src/westmere/dom_parser_implementation.h @@ -7,43 +7,7 @@ namespace simdjson { namespace westmere { -class dom_parser_implementation final : public internal::dom_parser_implementation { -public: - const uint8_t *buf{}; // Buffer passed to stage 1 - size_t len{0}; // Length passed to stage 1 - dom::document *doc{}; // Document passed to stage 2 - - really_inline dom_parser_implementation(); - dom_parser_implementation(const dom_parser_implementation &) = delete; - dom_parser_implementation & operator=(const dom_parser_implementation &) = delete; - - WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; - WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, bool streaming) noexcept final; - WARN_UNUSED error_code stage2(dom::document &doc) noexcept final; - WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, dom::document &doc, size_t &next_json) noexcept final; - WARN_UNUSED error_code set_capacity(size_t capacity) noexcept final; - WARN_UNUSED error_code set_max_depth(size_t max_depth) noexcept final; -}; - -#include "generic/stage1/allocate.h" -#include "generic/stage2/allocate.h" - -really_inline dom_parser_implementation::dom_parser_implementation() {} - -// Leaving these here so they can be inlined if so desired -WARN_UNUSED error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { - error_code err = stage1::allocate::set_capacity(*this, capacity); - if (err) { _capacity = 0; return err; } - _capacity = capacity; - return SUCCESS; -} - -WARN_UNUSED error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { - error_code err = stage2::allocate::set_max_depth(*this, max_depth); - if (err) { _max_depth = 0; return err; } - _max_depth = max_depth; - return SUCCESS; -} +#include "generic/dom_parser_implementation.h" } // namespace westmere } // namespace simdjson