From 4ea866f050f940daef0a2c1f26d74e95e09366b6 Mon Sep 17 00:00:00 2001 From: John Keiser Date: Mon, 11 May 2020 23:06:38 -0700 Subject: [PATCH] Move stage2 classes into their own files --- HACKING.md | 2 +- src/arm64/stage1_find_marks.h | 6 +- src/arm64/stage2_build_tape.h | 11 +-- .../{stage1_find_marks.h => stage1.h} | 6 +- .../{stage2_build_tape.h => stage2.h} | 11 +-- src/generic/json_minifier.h | 2 +- src/generic/json_structural_indexer.h | 2 +- ...d_tape.h => streaming_structural_parser.h} | 0 src/generic/structural_iterator.h | 69 +++++++++++++++++++ ...tage2_build_tape.h => structural_parser.h} | 68 +----------------- src/haswell/{stage1_find_marks.h => stage1.h} | 6 +- src/haswell/{stage2_build_tape.h => stage2.h} | 11 +-- src/stage1_find_marks.cpp | 8 +-- src/stage2_build_tape.cpp | 8 +-- .../{stage1_find_marks.h => stage1.h} | 6 +- .../{stage2_build_tape.h => stage2.h} | 11 +-- 16 files changed, 117 insertions(+), 110 deletions(-) rename src/fallback/{stage1_find_marks.h => stage1.h} (98%) rename src/fallback/{stage2_build_tape.h => stage2.h} (52%) rename src/generic/{stage2_streaming_build_tape.h => streaming_structural_parser.h} (100%) create mode 100644 src/generic/structural_iterator.h rename src/generic/{stage2_build_tape.h => structural_parser.h} (85%) rename src/haswell/{stage1_find_marks.h => stage1.h} (96%) rename src/haswell/{stage2_build_tape.h => stage2.h} (55%) rename src/westmere/{stage1_find_marks.h => stage1.h} (95%) rename src/westmere/{stage2_build_tape.h => stage2.h} (55%) diff --git a/HACKING.md b/HACKING.md index d1094dc0..d90c4c3d 100644 --- a/HACKING.md +++ b/HACKING.md @@ -29,7 +29,7 @@ simdjson's source structure, from the top level, looks like this: ```c++ namespace simdjson { namespace haswell { - #include "generic/stage1_find_marks.h" + #include "generic/stage1.h" } } ``` diff --git a/src/arm64/stage1_find_marks.h b/src/arm64/stage1_find_marks.h index 18bb3951..48bfd0f2 100644 --- a/src/arm64/stage1_find_marks.h +++ b/src/arm64/stage1_find_marks.h @@ -1,5 +1,5 @@ -#ifndef SIMDJSON_ARM64_STAGE1_FIND_MARKS_H -#define SIMDJSON_ARM64_STAGE1_FIND_MARKS_H +#ifndef SIMDJSON_ARM64_STAGE1_H +#define SIMDJSON_ARM64_STAGE1_H #include "simdjson.h" #include "arm64/bitmask.h" @@ -89,4 +89,4 @@ WARN_UNUSED error_code implementation::stage1(const uint8_t *buf, size_t len, pa } // namespace arm64 } // namespace simdjson -#endif // SIMDJSON_ARM64_STAGE1_FIND_MARKS_H +#endif // SIMDJSON_ARM64_STAGE1_H diff --git a/src/arm64/stage2_build_tape.h b/src/arm64/stage2_build_tape.h index 58804c56..874de504 100644 --- a/src/arm64/stage2_build_tape.h +++ b/src/arm64/stage2_build_tape.h @@ -1,5 +1,5 @@ -#ifndef SIMDJSON_ARM64_STAGE2_BUILD_TAPE_H -#define SIMDJSON_ARM64_STAGE2_BUILD_TAPE_H +#ifndef SIMDJSON_ARM64_STAGE2_H +#define SIMDJSON_ARM64_STAGE2_H #include "simdjson.h" #include "arm64/implementation.h" @@ -10,10 +10,11 @@ namespace simdjson { namespace arm64 { #include "generic/atomparsing.h" -#include "generic/stage2_build_tape.h" -#include "generic/stage2_streaming_build_tape.h" +#include "generic/structural_iterator.h" +#include "generic/structural_parser.h" +#include "generic/streaming_structural_parser.h" } // namespace arm64 } // namespace simdjson -#endif // SIMDJSON_ARM64_STAGE2_BUILD_TAPE_H +#endif // SIMDJSON_ARM64_STAGE2_H diff --git a/src/fallback/stage1_find_marks.h b/src/fallback/stage1.h similarity index 98% rename from src/fallback/stage1_find_marks.h rename to src/fallback/stage1.h index a3a8298a..0d8f4813 100644 --- a/src/fallback/stage1_find_marks.h +++ b/src/fallback/stage1.h @@ -1,5 +1,5 @@ -#ifndef SIMDJSON_FALLBACK_STAGE1_FIND_MARKS_H -#define SIMDJSON_FALLBACK_STAGE1_FIND_MARKS_H +#ifndef SIMDJSON_FALLBACK_STAGE1_H +#define SIMDJSON_FALLBACK_STAGE1_H #include "simdjson.h" #include "fallback/implementation.h" @@ -211,4 +211,4 @@ WARN_UNUSED error_code implementation::minify(const uint8_t *buf, size_t len, ui } // namespace fallback } // namespace simdjson -#endif // SIMDJSON_FALLBACK_STAGE1_FIND_MARKS_H +#endif // SIMDJSON_FALLBACK_STAGE1_H diff --git a/src/fallback/stage2_build_tape.h b/src/fallback/stage2.h similarity index 52% rename from src/fallback/stage2_build_tape.h rename to src/fallback/stage2.h index 0baed8e1..2ecb89df 100644 --- a/src/fallback/stage2_build_tape.h +++ b/src/fallback/stage2.h @@ -1,5 +1,5 @@ -#ifndef SIMDJSON_FALLBACK_STAGE2_BUILD_TAPE_H -#define SIMDJSON_FALLBACK_STAGE2_BUILD_TAPE_H +#ifndef SIMDJSON_FALLBACK_STAGE2_H +#define SIMDJSON_FALLBACK_STAGE2_H #include "simdjson.h" @@ -11,10 +11,11 @@ namespace simdjson { namespace fallback { #include "generic/atomparsing.h" -#include "generic/stage2_build_tape.h" -#include "generic/stage2_streaming_build_tape.h" +#include "generic/structural_iterator.h" +#include "generic/structural_parser.h" +#include "generic/streaming_structural_parser.h" } // namespace fallback } // namespace simdjson -#endif // SIMDJSON_FALLBACK_STAGE2_BUILD_TAPE_H +#endif // SIMDJSON_FALLBACK_STAGE2_H diff --git a/src/generic/json_minifier.h b/src/generic/json_minifier.h index f5a1e8f7..aa42202e 100644 --- a/src/generic/json_minifier.h +++ b/src/generic/json_minifier.h @@ -1,7 +1,7 @@ // This file contains the common code every implementation uses in stage1 // It is intended to be included multiple times and compiled multiple times // We assume the file in which it is included already includes -// "simdjson/stage1_find_marks.h" (this simplifies amalgation) +// "simdjson/stage1.h" (this simplifies amalgation) namespace stage1 { diff --git a/src/generic/json_structural_indexer.h b/src/generic/json_structural_indexer.h index 99f6ec15..f1ed4bf0 100644 --- a/src/generic/json_structural_indexer.h +++ b/src/generic/json_structural_indexer.h @@ -1,7 +1,7 @@ // This file contains the common code every implementation uses in stage1 // It is intended to be included multiple times and compiled multiple times // We assume the file in which it is included already includes -// "simdjson/stage1_find_marks.h" (this simplifies amalgation) +// "simdjson/stage1.h" (this simplifies amalgation) namespace stage1 { diff --git a/src/generic/stage2_streaming_build_tape.h b/src/generic/streaming_structural_parser.h similarity index 100% rename from src/generic/stage2_streaming_build_tape.h rename to src/generic/streaming_structural_parser.h diff --git a/src/generic/structural_iterator.h b/src/generic/structural_iterator.h new file mode 100644 index 00000000..8cb6c666 --- /dev/null +++ b/src/generic/structural_iterator.h @@ -0,0 +1,69 @@ +namespace stage2 { + +class structural_iterator { +public: + really_inline structural_iterator(const uint8_t* _buf, size_t _len, const uint32_t *_structural_indexes, size_t next_structural_index) + : buf{_buf}, + len{_len}, + structural_indexes{_structural_indexes}, + next_structural{next_structural_index} + {} + really_inline char advance_char() { + idx = structural_indexes[next_structural]; + next_structural++; + c = *current(); + return c; + } + really_inline char current_char() { + return c; + } + really_inline const uint8_t* current() { + return &buf[idx]; + } + really_inline size_t remaining_len() { + return len - idx; + } + template + really_inline bool with_space_terminated_copy(const F& f) { + /** + * We need to make a copy to make sure that the string is space terminated. + * This is not about padding the input, which should already padded up + * to len + SIMDJSON_PADDING. However, we have no control at this stage + * on how the padding was done. What if the input string was padded with nulls? + * It is quite common for an input string to have an extra null character (C string). + * We do not want to allow 9\0 (where \0 is the null character) inside a JSON + * document, but the string "9\0" by itself is fine. So we make a copy and + * pad the input with spaces when we know that there is just one input element. + * This copy is relatively expensive, but it will almost never be called in + * practice unless you are in the strange scenario where you have many JSON + * documents made of single atoms. + */ + char *copy = static_cast(malloc(len + SIMDJSON_PADDING)); + if (copy == nullptr) { + return true; + } + memcpy(copy, buf, len); + memset(copy + len, ' ', SIMDJSON_PADDING); + bool result = f(reinterpret_cast(copy), idx); + free(copy); + return result; + } + really_inline bool past_end(uint32_t n_structural_indexes) { + return next_structural+1 > n_structural_indexes; + } + really_inline bool at_end(uint32_t n_structural_indexes) { + return next_structural+1 == n_structural_indexes; + } + really_inline size_t next_structural_index() { + return next_structural; + } + + const uint8_t* const buf; + const size_t len; + const uint32_t* const structural_indexes; + size_t next_structural; // next structural index + size_t idx{0}; // location of the structural character in the input (buf) + uint8_t c{0}; // used to track the (structural) character we are looking at +}; + +} // namespace stage2 diff --git a/src/generic/stage2_build_tape.h b/src/generic/structural_parser.h similarity index 85% rename from src/generic/stage2_build_tape.h rename to src/generic/structural_parser.h index 9b1bc109..9024683d 100644 --- a/src/generic/stage2_build_tape.h +++ b/src/generic/structural_parser.h @@ -1,7 +1,7 @@ // This file contains the common code every implementation uses for stage2 // It is intended to be included multiple times and compiled multiple times // We assume the file in which it is include already includes -// "simdjson/stage2_build_tape.h" (this simplifies amalgation) +// "simdjson/stage2.h" (this simplifies amalgation) namespace stage2 { @@ -47,72 +47,6 @@ struct unified_machine_addresses { #undef FAIL_IF #define FAIL_IF(EXPR) { if (EXPR) { return addresses.error; } } -class structural_iterator { -public: - really_inline structural_iterator(const uint8_t* _buf, size_t _len, const uint32_t *_structural_indexes, size_t next_structural_index) - : buf{_buf}, - len{_len}, - structural_indexes{_structural_indexes}, - next_structural{next_structural_index} - {} - really_inline char advance_char() { - idx = structural_indexes[next_structural]; - next_structural++; - c = *current(); - return c; - } - really_inline char current_char() { - return c; - } - really_inline const uint8_t* current() { - return &buf[idx]; - } - really_inline size_t remaining_len() { - return len - idx; - } - template - really_inline bool with_space_terminated_copy(const F& f) { - /** - * We need to make a copy to make sure that the string is space terminated. - * This is not about padding the input, which should already padded up - * to len + SIMDJSON_PADDING. However, we have no control at this stage - * on how the padding was done. What if the input string was padded with nulls? - * It is quite common for an input string to have an extra null character (C string). - * We do not want to allow 9\0 (where \0 is the null character) inside a JSON - * document, but the string "9\0" by itself is fine. So we make a copy and - * pad the input with spaces when we know that there is just one input element. - * This copy is relatively expensive, but it will almost never be called in - * practice unless you are in the strange scenario where you have many JSON - * documents made of single atoms. - */ - char *copy = static_cast(malloc(len + SIMDJSON_PADDING)); - if (copy == nullptr) { - return true; - } - memcpy(copy, buf, len); - memset(copy + len, ' ', SIMDJSON_PADDING); - bool result = f(reinterpret_cast(copy), idx); - free(copy); - return result; - } - really_inline bool past_end(uint32_t n_structural_indexes) { - return next_structural+1 > n_structural_indexes; - } - really_inline bool at_end(uint32_t n_structural_indexes) { - return next_structural+1 == n_structural_indexes; - } - really_inline size_t next_structural_index() { - return next_structural; - } - - const uint8_t* const buf; - const size_t len; - const uint32_t* const structural_indexes; - size_t next_structural; // next structural index - size_t idx{0}; // location of the structural character in the input (buf) - uint8_t c{0}; // used to track the (structural) character we are looking at -}; - struct number_writer { parser &doc_parser; diff --git a/src/haswell/stage1_find_marks.h b/src/haswell/stage1.h similarity index 96% rename from src/haswell/stage1_find_marks.h rename to src/haswell/stage1.h index 16f9f9f1..02753971 100644 --- a/src/haswell/stage1_find_marks.h +++ b/src/haswell/stage1.h @@ -1,5 +1,5 @@ -#ifndef SIMDJSON_HASWELL_STAGE1_FIND_MARKS_H -#define SIMDJSON_HASWELL_STAGE1_FIND_MARKS_H +#ifndef SIMDJSON_HASWELL_STAGE1_H +#define SIMDJSON_HASWELL_STAGE1_H #include "simdjson.h" @@ -80,4 +80,4 @@ WARN_UNUSED error_code implementation::stage1(const uint8_t *buf, size_t len, pa } // namespace simdjson UNTARGET_REGION -#endif // SIMDJSON_HASWELL_STAGE1_FIND_MARKS_H +#endif // SIMDJSON_HASWELL_STAGE1_H diff --git a/src/haswell/stage2_build_tape.h b/src/haswell/stage2.h similarity index 55% rename from src/haswell/stage2_build_tape.h rename to src/haswell/stage2.h index 9eeedbf8..4bd624b2 100644 --- a/src/haswell/stage2_build_tape.h +++ b/src/haswell/stage2.h @@ -1,5 +1,5 @@ -#ifndef SIMDJSON_HASWELL_STAGE2_BUILD_TAPE_H -#define SIMDJSON_HASWELL_STAGE2_BUILD_TAPE_H +#ifndef SIMDJSON_HASWELL_STAGE2_H +#define SIMDJSON_HASWELL_STAGE2_H #include "simdjson.h" #include "haswell/implementation.h" @@ -11,11 +11,12 @@ namespace simdjson { namespace haswell { #include "generic/atomparsing.h" -#include "generic/stage2_build_tape.h" -#include "generic/stage2_streaming_build_tape.h" +#include "generic/structural_iterator.h" +#include "generic/structural_parser.h" +#include "generic/streaming_structural_parser.h" } // namespace haswell } // namespace simdjson UNTARGET_REGION -#endif // SIMDJSON_HASWELL_STAGE2_BUILD_TAPE_H +#endif // SIMDJSON_HASWELL_STAGE2_H diff --git a/src/stage1_find_marks.cpp b/src/stage1_find_marks.cpp index 338b954a..7571d9ee 100644 --- a/src/stage1_find_marks.cpp +++ b/src/stage1_find_marks.cpp @@ -1,12 +1,12 @@ #if SIMDJSON_IMPLEMENTATION_ARM64 -#include "arm64/stage1_find_marks.h" +#include "arm64/stage1.h" #endif #if SIMDJSON_IMPLEMENTATION_FALLBACK -#include "fallback/stage1_find_marks.h" +#include "fallback/stage1.h" #endif #if SIMDJSON_IMPLEMENTATION_HASWELL -#include "haswell/stage1_find_marks.h" +#include "haswell/stage1.h" #endif #if SIMDJSON_IMPLEMENTATION_WESTMERE -#include "westmere/stage1_find_marks.h" +#include "westmere/stage1.h" #endif diff --git a/src/stage2_build_tape.cpp b/src/stage2_build_tape.cpp index 78336276..36f462ca 100644 --- a/src/stage2_build_tape.cpp +++ b/src/stage2_build_tape.cpp @@ -12,14 +12,14 @@ void found_bad_string(const uint8_t *buf); #endif #if SIMDJSON_IMPLEMENTATION_ARM64 -#include "arm64/stage2_build_tape.h" +#include "arm64/stage2.h" #endif #if SIMDJSON_IMPLEMENTATION_FALLBACK -#include "fallback/stage2_build_tape.h" +#include "fallback/stage2.h" #endif #if SIMDJSON_IMPLEMENTATION_HASWELL -#include "haswell/stage2_build_tape.h" +#include "haswell/stage2.h" #endif #if SIMDJSON_IMPLEMENTATION_WESTMERE -#include "westmere/stage2_build_tape.h" +#include "westmere/stage2.h" #endif diff --git a/src/westmere/stage1_find_marks.h b/src/westmere/stage1.h similarity index 95% rename from src/westmere/stage1_find_marks.h rename to src/westmere/stage1.h index 4a1b5e61..8f5a2a4b 100644 --- a/src/westmere/stage1_find_marks.h +++ b/src/westmere/stage1.h @@ -1,5 +1,5 @@ -#ifndef SIMDJSON_WESTMERE_STAGE1_FIND_MARKS_H -#define SIMDJSON_WESTMERE_STAGE1_FIND_MARKS_H +#ifndef SIMDJSON_WESTMERE_STAGE1_H +#define SIMDJSON_WESTMERE_STAGE1_H #include "simdjson.h" #include "westmere/bitmask.h" @@ -79,4 +79,4 @@ WARN_UNUSED error_code implementation::stage1(const uint8_t *buf, size_t len, pa } // namespace simdjson UNTARGET_REGION -#endif // SIMDJSON_WESTMERE_STAGE1_FIND_MARKS_H +#endif // SIMDJSON_WESTMERE_STAGE1_H diff --git a/src/westmere/stage2_build_tape.h b/src/westmere/stage2.h similarity index 55% rename from src/westmere/stage2_build_tape.h rename to src/westmere/stage2.h index 7105c123..fc6efdfb 100644 --- a/src/westmere/stage2_build_tape.h +++ b/src/westmere/stage2.h @@ -1,5 +1,5 @@ -#ifndef SIMDJSON_WESTMERE_STAGE2_BUILD_TAPE_H -#define SIMDJSON_WESTMERE_STAGE2_BUILD_TAPE_H +#ifndef SIMDJSON_WESTMERE_STAGE2_H +#define SIMDJSON_WESTMERE_STAGE2_H #include "simdjson.h" #include "westmere/implementation.h" @@ -11,10 +11,11 @@ namespace simdjson { namespace westmere { #include "generic/atomparsing.h" -#include "generic/stage2_build_tape.h" -#include "generic/stage2_streaming_build_tape.h" +#include "generic/structural_iterator.h" +#include "generic/structural_parser.h" +#include "generic/streaming_structural_parser.h" } // namespace westmere } // namespace simdjson UNTARGET_REGION -#endif // SIMDJSON_WESTMERE_STAGE2_BUILD_TAPE_H +#endif // SIMDJSON_WESTMERE_STAGE2_H