Merge pull request #882 from simdjson/jkeiser/move-cpp-files

Split stage2 into files-per-class
This commit is contained in:
John Keiser 2020-05-19 14:34:01 -07:00 committed by GitHub
commit 603b6596af
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
49 changed files with 6265 additions and 6069 deletions

View File

@ -29,7 +29,7 @@ simdjson's source structure, from the top level, looks like this:
```c++
namespace simdjson {
namespace haswell {
#include "generic/stage1_find_marks.h"
#include "generic/stage1/json_structural_indexer.h"
}
}
```

View File

@ -89,6 +89,12 @@ public:
size_t json_index;
};
#ifdef SIMDJSON_USE_COMPUTED_GOTO
typedef void* ret_address;
#else
typedef char ret_address;
#endif
} // namespace internal
namespace dom {
@ -977,13 +983,8 @@ public:
/** @private Tape location of each open { or [ */
std::unique_ptr<scope_descriptor[]> containing_scope{};
#ifdef SIMDJSON_USE_COMPUTED_GOTO
/** @private Return address of each open { or [ */
std::unique_ptr<void*[]> ret_address{};
#else
/** @private Return address of each open { or [ */
std::unique_ptr<char[]> ret_address{};
#endif
std::unique_ptr<internal::ret_address[]> ret_address{};
/** @private Use `if (parser.parse(...).error())` instead */
bool valid{false};

View File

@ -509,11 +509,7 @@ inline error_code parser::allocate(size_t capacity, size_t max_depth) noexcept {
// Initialize stage 2 state
//
containing_scope.reset(new (std::nothrow) scope_descriptor[max_depth]); // TODO realloc
#ifdef SIMDJSON_USE_COMPUTED_GOTO
ret_address.reset(new (std::nothrow) void *[max_depth]);
#else
ret_address.reset(new (std::nothrow) char[max_depth]);
#endif
ret_address.reset(new (std::nothrow) internal::ret_address[max_depth]);
if (!ret_address || !containing_scope) {
// Could not allocate memory

View File

@ -1,8 +1,9 @@
#ifndef SIMDJSON_INLINE_ERROR_H
#define SIMDJSON_INLINE_ERROR_H
#include "simdjson/error.h"
#include <cstring>
#include <string>
#include "simdjson/error.h"
namespace simdjson {
namespace internal {

View File

@ -7,14 +7,14 @@ set -e
SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
PROJECTPATH=$SCRIPTPATH"/.."
PROJECTPATH="$(dirname $SCRIPTPATH)"
echo "Project at "$PROJECTPATH
echo "We are about to amalgamate all simdjson files into one source file. "
echo "See https://www.sqlite.org/amalgamation.html and https://en.wikipedia.org/wiki/Single_Compilation_Unit for rationale. "
if [ -z "$AMALGAMATE_SOURCE_PATH" ]; then AMALGAMATE_SOURCE_PATH="$SCRIPTPATH/../src"; fi
if [ -z "$AMALGAMATE_INCLUDE_PATH" ]; then AMALGAMATE_INCLUDE_PATH="$SCRIPTPATH/../include"; fi
if [ -z "$AMALGAMATE_SOURCE_PATH" ]; then AMALGAMATE_SOURCE_PATH="$PROJECTPATH/src"; fi
if [ -z "$AMALGAMATE_INCLUDE_PATH" ]; then AMALGAMATE_INCLUDE_PATH="$PROJECTPATH/include"; fi
if [ -z "$AMALGAMATE_OUTPUT_PATH" ]; then AMALGAMATE_OUTPUT_PATH="$SCRIPTPATH"; fi
# this list excludes the "src/generic headers"

View File

@ -1,4 +1,4 @@
/* auto-generated on Tue May 5 20:03:59 EDT 2020. Do not edit! */
/* auto-generated on Tue May 19 13:32:53 PDT 2020. Do not edit! */
#include <iostream>
#include "simdjson.h"

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,4 @@
/* auto-generated on Tue May 5 20:03:59 EDT 2020. Do not edit! */
/* auto-generated on Tue May 19 13:32:53 PDT 2020. Do not edit! */
/* begin file include/simdjson.h */
#ifndef SIMDJSON_H
#define SIMDJSON_H
@ -328,12 +328,19 @@ constexpr size_t DEFAULT_MAX_DEPTH = 1024;
#define unlikely(x) x
#endif
#include <CppCoreCheck\Warnings.h>
#define SIMDJSON_PUSH_DISABLE_WARNINGS __pragma(warning( push ))
#define SIMDJSON_PUSH_DISABLE_ALL_WARNINGS __pragma(warning( push, 0 ))
#define SIMDJSON_DISABLE_VS_WARNING(WARNING_NUMBER) __pragma(warning( disable : WARNING_NUMBER ))
// Get rid of Intellisense-only warnings (Code Analysis)
// Though __has_include is C++17, it looks like it is supported in Visual Studio 2017 or better.
// We are probably not supporting earlier version of Visual Studio in any case.
#if __has_include(<CppCoreCheck\Warnings.h>)
#include <CppCoreCheck\Warnings.h>
#define SIMDJSON_DISABLE_UNDESIRED_WARNINGS SIMDJSON_DISABLE_VS_WARNING(ALL_CPPCORECHECK_WARNINGS)
#else
#define SIMDJSON_DISABLE_UNDESIRED_WARNINGS
#endif
#define SIMDJSON_DISABLE_DEPRECATED_WARNING SIMDJSON_DISABLE_VS_WARNING(4996)
#define SIMDJSON_POP_DISABLE_WARNINGS __pragma(warning( pop ))
@ -2482,6 +2489,8 @@ public:
really_inline uint32_t scope_count() const noexcept;
template<typename T>
really_inline T next_tape_value() const noexcept;
really_inline uint32_t get_string_length() const noexcept;
really_inline const char * get_c_str() const noexcept;
inline std::string_view get_string_view() const noexcept;
/** The document this element references. */
@ -2491,6 +2500,12 @@ public:
size_t json_index;
};
#ifdef SIMDJSON_USE_COMPUTED_GOTO
typedef void* ret_address;
#else
typedef char ret_address;
#endif
} // namespace internal
namespace dom {
@ -2623,7 +2638,22 @@ public:
* Get the key of this key/value pair.
*/
inline std::string_view key() const noexcept;
/**
* Get the length (in bytes) of the key in this key/value pair.
* You should expect this function to be faster than key().size().
*/
inline uint32_t key_length() const noexcept;
/**
* Returns true if the key in this key/value pair is equal
* to the provided string_view.
*/
inline bool key_equals(const std::string_view & o) const noexcept;
/**
* Returns true if the key in this key/value pair is equal
* to the provided string_view in a case-insensitive manner.
* Case comparisons may only be handled correctly for ASCII strings.
*/
inline bool key_equals_case_insensitive(const std::string_view & o) const noexcept;
/**
* Get the key of this key/value pair.
*/
@ -3364,16 +3394,8 @@ public:
/** @private Tape location of each open { or [ */
std::unique_ptr<scope_descriptor[]> containing_scope{};
#ifdef SIMDJSON_USE_COMPUTED_GOTO
/** @private Return address of each open { or [ */
std::unique_ptr<void*[]> ret_address{};
#else
/** @private Return address of each open { or [ */
std::unique_ptr<char[]> ret_address{};
#endif
/** @private Next write location in the string buf for stage 2 parsing */
uint8_t *current_string_buf_loc{};
std::unique_ptr<internal::ret_address[]> ret_address{};
/** @private Use `if (parser.parse(...).error())` instead */
bool valid{false};
@ -3405,32 +3427,6 @@ public:
/** @private Private and deprecated: use `parser.parse(...).doc.dump_raw_tape()` instead */
inline bool dump_raw_tape(std::ostream &os) const noexcept;
//
// Parser callbacks: these are internal!
//
/** @private this should be called when parsing (right before writing the tapes) */
inline void init_stage2() noexcept;
really_inline error_code on_error(error_code new_error_code) noexcept; ///< @private
really_inline error_code on_success(error_code success_code) noexcept; ///< @private
really_inline bool on_start_document(uint32_t depth) noexcept; ///< @private
really_inline bool on_start_object(uint32_t depth) noexcept; ///< @private
really_inline bool on_start_array(uint32_t depth) noexcept; ///< @private
// TODO we're not checking this bool
really_inline bool on_end_document(uint32_t depth) noexcept; ///< @private
really_inline bool on_end_object(uint32_t depth) noexcept; ///< @private
really_inline bool on_end_array(uint32_t depth) noexcept; ///< @private
really_inline bool on_true_atom() noexcept; ///< @private
really_inline bool on_false_atom() noexcept; ///< @private
really_inline bool on_null_atom() noexcept; ///< @private
really_inline uint8_t *on_start_string() noexcept; ///< @private
really_inline bool on_end_string(uint8_t *dst) noexcept; ///< @private
really_inline bool on_number_s64(int64_t value) noexcept; ///< @private
really_inline bool on_number_u64(uint64_t value) noexcept; ///< @private
really_inline bool on_number_double(double value) noexcept; ///< @private
really_inline void increment_count(uint32_t depth) noexcept; ///< @private
really_inline void end_scope(uint32_t depth) noexcept; ///< @private
private:
/**
* The maximum document length this parser will automatically support.
@ -3475,8 +3471,6 @@ private:
//
//
inline void write_tape(uint64_t val, internal::tape_type t) noexcept;
/**
* Ensure we have enough capacity to handle at least desired_capacity bytes,
* and auto-allocate if not.
@ -5130,11 +5124,7 @@ inline error_code parser::allocate(size_t capacity, size_t max_depth) noexcept {
// Initialize stage 2 state
//
containing_scope.reset(new (std::nothrow) scope_descriptor[max_depth]); // TODO realloc
#ifdef SIMDJSON_USE_COMPUTED_GOTO
ret_address.reset(new (std::nothrow) void *[max_depth]);
#else
ret_address.reset(new (std::nothrow) char[max_depth]);
#endif
ret_address.reset(new (std::nothrow) internal::ret_address[max_depth]);
if (!ret_address || !containing_scope) {
// Could not allocate memory
@ -5297,7 +5287,7 @@ inline simdjson_result<element> object::at(const std::string_view &json_pointer)
inline simdjson_result<element> object::at_key(const std::string_view &key) const noexcept {
iterator end_field = end();
for (iterator field = begin(); field != end_field; ++field) {
if (key == field.key()) {
if (field.key_equals(key)) {
return field.value();
}
}
@ -5309,13 +5299,8 @@ inline simdjson_result<element> object::at_key(const std::string_view &key) cons
inline simdjson_result<element> object::at_key_case_insensitive(const std::string_view &key) const noexcept {
iterator end_field = end();
for (iterator field = begin(); field != end_field; ++field) {
auto field_key = field.key();
if (key.length() == field_key.length()) {
// See For case-insensitive string comparisons, avoid char-by-char functions
// https://lemire.me/blog/2020/04/30/for-case-insensitive-string-comparisons-avoid-char-by-char-functions/
// Note that it might be worth rolling our own strncasecmp function, with vectorization.
const bool equal = (simdjson_strncasecmp(key.data(), field_key.data(), key.length()) == 0);
if (equal) { return field.value(); }
if (field.key_equals_case_insensitive(key)) {
return field.value();
}
}
return NO_SUCH_FIELD;
@ -5337,13 +5322,10 @@ inline object::iterator& object::iterator::operator++() noexcept {
return *this;
}
inline std::string_view object::iterator::key() const noexcept {
size_t string_buf_index = size_t(tape_value());
uint32_t len;
memcpy(&len, &doc->string_buf[string_buf_index], sizeof(len));
return std::string_view(
reinterpret_cast<const char *>(&doc->string_buf[string_buf_index + sizeof(uint32_t)]),
len
);
return get_string_view();
}
inline uint32_t object::iterator::key_length() const noexcept {
return get_string_length();
}
inline const char* object::iterator::key_c_str() const noexcept {
return reinterpret_cast<const char *>(&doc->string_buf[size_t(tape_value()) + sizeof(uint32_t)]);
@ -5352,6 +5334,42 @@ inline element object::iterator::value() const noexcept {
return element(doc, json_index + 1);
}
/**
* Design notes:
* Instead of constructing a string_view and then comparing it with a
* user-provided strings, it is probably more performant to have dedicated
* functions taking as a parameter the string we want to compare against
* and return true when they are equal. That avoids the creation of a temporary
* std::string_view. Though it is possible for the compiler to avoid entirely
* any overhead due to string_view, relying too much on compiler magic is
* problematic: compiler magic sometimes fail, and then what do you do?
* Also, enticing users to rely on high-performance function is probably better
* on the long run.
*/
inline bool object::iterator::key_equals(const std::string_view & o) const noexcept {
// We use the fact that the key length can be computed quickly
// without access to the string buffer.
const uint32_t len = key_length();
if(o.size() == len) {
// We avoid construction of a temporary string_view instance.
return (memcmp(o.data(), key_c_str(), len) == 0);
}
return false;
}
inline bool object::iterator::key_equals_case_insensitive(const std::string_view & o) const noexcept {
// We use the fact that the key length can be computed quickly
// without access to the string buffer.
const uint32_t len = key_length();
if(o.size() == len) {
// See For case-insensitive string comparisons, avoid char-by-char functions
// https://lemire.me/blog/2020/04/30/for-case-insensitive-string-comparisons-avoid-char-by-char-functions/
// Note that it might be worth rolling our own strncasecmp function, with vectorization.
return (simdjson_strncasecmp(o.data(), key_c_str(), len) == 0);
}
return false;
}
//
// key_value_pair inline implementation
//
@ -5386,8 +5404,7 @@ template<>
inline simdjson_result<const char *> element::get<const char *>() const noexcept {
switch (tape_ref_type()) {
case internal::tape_type::STRING: {
size_t string_buf_index = size_t(tape_value());
return reinterpret_cast<const char *>(&doc->string_buf[string_buf_index + sizeof(uint32_t)]);
return get_c_str();
}
default:
return INCORRECT_TYPE;
@ -5786,13 +5803,23 @@ really_inline T tape_ref::next_tape_value() const noexcept {
memcpy(&x,&doc->tape[json_index + 1],sizeof(uint64_t));
return x;
}
inline std::string_view internal::tape_ref::get_string_view() const noexcept {
size_t string_buf_index = size_t(tape_value());
really_inline uint32_t internal::tape_ref::get_string_length() const noexcept {
uint64_t string_buf_index = size_t(tape_value());
uint32_t len;
memcpy(&len, &doc->string_buf[string_buf_index], sizeof(len));
return len;
}
really_inline const char * internal::tape_ref::get_c_str() const noexcept {
uint64_t string_buf_index = size_t(tape_value());
return reinterpret_cast<const char *>(&doc->string_buf[string_buf_index + sizeof(uint32_t)]);
}
inline std::string_view internal::tape_ref::get_string_view() const noexcept {
return std::string_view(
reinterpret_cast<const char *>(&doc->string_buf[string_buf_index + sizeof(uint32_t)]),
len
get_c_str(),
get_string_length()
);
}
@ -6092,6 +6119,7 @@ inline error_code document_stream::json_parse() noexcept {
#ifndef SIMDJSON_INLINE_ERROR_H
#define SIMDJSON_INLINE_ERROR_H
#include <cstring>
#include <string>
namespace simdjson {

View File

@ -31,7 +31,7 @@ static inline uint32_t parse_eight_digits_unrolled(const char *chars) {
#define SWAR_NUMBER_PARSING
#include "generic/numberparsing.h"
#include "generic/stage2/numberparsing.h"
} // namespace arm64
} // namespace simdjson

View File

@ -1,6 +1,3 @@
#ifndef SIMDJSON_ARM64_STAGE1_FIND_MARKS_H
#define SIMDJSON_ARM64_STAGE1_FIND_MARKS_H
#include "simdjson.h"
#include "arm64/bitmask.h"
#include "arm64/simd.h"
@ -71,22 +68,20 @@ really_inline simd8<bool> must_be_continuation(simd8<uint8_t> prev1, simd8<uint8
return is_second_byte ^ is_third_byte ^ is_fourth_byte;
}
#include "generic/buf_block_reader.h"
#include "generic/json_string_scanner.h"
#include "generic/json_scanner.h"
#include "generic/stage1/buf_block_reader.h"
#include "generic/stage1/json_string_scanner.h"
#include "generic/stage1/json_scanner.h"
#include "generic/json_minifier.h"
#include "generic/stage1/json_minifier.h"
WARN_UNUSED error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept {
return arm64::stage1::json_minifier::minify<64>(buf, len, dst, dst_len);
}
#include "generic/utf8_lookup2_algorithm.h"
#include "generic/json_structural_indexer.h"
#include "generic/stage1/utf8_lookup2_algorithm.h"
#include "generic/stage1/json_structural_indexer.h"
WARN_UNUSED error_code implementation::stage1(const uint8_t *buf, size_t len, parser &parser, bool streaming) const noexcept {
return arm64::stage1::json_structural_indexer::index<64>(buf, len, parser, streaming);
}
} // namespace arm64
} // namespace simdjson
#endif // SIMDJSON_ARM64_STAGE1_FIND_MARKS_H

20
src/arm64/stage2.cpp Normal file
View File

@ -0,0 +1,20 @@
#ifndef SIMDJSON_ARM64_STAGE2_H
#define SIMDJSON_ARM64_STAGE2_H
#include "simdjson.h"
#include "arm64/implementation.h"
#include "arm64/stringparsing.h"
#include "arm64/numberparsing.h"
namespace simdjson {
namespace arm64 {
#include "generic/stage2/atomparsing.h"
#include "generic/stage2/structural_iterator.h"
#include "generic/stage2/structural_parser.h"
#include "generic/stage2/streaming_structural_parser.h"
} // namespace arm64
} // namespace simdjson
#endif // SIMDJSON_ARM64_STAGE2_H

View File

@ -1,19 +0,0 @@
#ifndef SIMDJSON_ARM64_STAGE2_BUILD_TAPE_H
#define SIMDJSON_ARM64_STAGE2_BUILD_TAPE_H
#include "simdjson.h"
#include "arm64/implementation.h"
#include "arm64/stringparsing.h"
#include "arm64/numberparsing.h"
namespace simdjson {
namespace arm64 {
#include "generic/atomparsing.h"
#include "generic/stage2_build_tape.h"
#include "generic/stage2_streaming_build_tape.h"
} // namespace arm64
} // namespace simdjson
#endif // SIMDJSON_ARM64_STAGE2_BUILD_TAPE_H

View File

@ -45,7 +45,7 @@ really_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8
};
}
#include "generic/stringparsing.h"
#include "generic/stage2/stringparsing.h"
} // namespace arm64
} // namespace simdjson

View File

@ -1,4 +1,4 @@
#include "simdjson/error.h"
#include "simdjson.h"
namespace simdjson {
namespace internal {

View File

@ -26,7 +26,7 @@ static inline uint32_t parse_eight_digits_unrolled(const char *chars) {
#define SWAR_NUMBER_PARSING
#include "generic/numberparsing.h"
#include "generic/stage2/numberparsing.h"
} // namespace fallback

View File

@ -1,6 +1,3 @@
#ifndef SIMDJSON_FALLBACK_STAGE1_FIND_MARKS_H
#define SIMDJSON_FALLBACK_STAGE1_FIND_MARKS_H
#include "simdjson.h"
#include "fallback/implementation.h"
@ -210,5 +207,3 @@ WARN_UNUSED error_code implementation::minify(const uint8_t *buf, size_t len, ui
} // namespace fallback
} // namespace simdjson
#endif // SIMDJSON_FALLBACK_STAGE1_FIND_MARKS_H

16
src/fallback/stage2.cpp Normal file
View File

@ -0,0 +1,16 @@
#include "simdjson.h"
#include "fallback/implementation.h"
#include "fallback/stringparsing.h"
#include "fallback/numberparsing.h"
namespace simdjson {
namespace fallback {
#include "generic/stage2/atomparsing.h"
#include "generic/stage2/structural_iterator.h"
#include "generic/stage2/structural_parser.h"
#include "generic/stage2/streaming_structural_parser.h"
} // namespace fallback
} // namespace simdjson

View File

@ -1,20 +0,0 @@
#ifndef SIMDJSON_FALLBACK_STAGE2_BUILD_TAPE_H
#define SIMDJSON_FALLBACK_STAGE2_BUILD_TAPE_H
#include "simdjson.h"
#include "fallback/implementation.h"
#include "fallback/stringparsing.h"
#include "fallback/numberparsing.h"
namespace simdjson {
namespace fallback {
#include "generic/atomparsing.h"
#include "generic/stage2_build_tape.h"
#include "generic/stage2_streaming_build_tape.h"
} // namespace fallback
} // namespace simdjson
#endif // SIMDJSON_FALLBACK_STAGE2_BUILD_TAPE_H

View File

@ -27,7 +27,7 @@ really_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8
return { src[0] };
}
#include "generic/stringparsing.h"
#include "generic/stage2/stringparsing.h"
} // namespace fallback
} // namespace simdjson

View File

@ -1,7 +1,7 @@
// This file contains the common code every implementation uses in stage1
// It is intended to be included multiple times and compiled multiple times
// We assume the file in which it is included already includes
// "simdjson/stage1_find_marks.h" (this simplifies amalgation)
// "simdjson/stage1.h" (this simplifies amalgation)
namespace stage1 {

View File

@ -1,7 +1,7 @@
// This file contains the common code every implementation uses in stage1
// It is intended to be included multiple times and compiled multiple times
// We assume the file in which it is included already includes
// "simdjson/stage1_find_marks.h" (this simplifies amalgation)
// "simdjson/stage1.h" (this simplifies amalgation)
namespace stage1 {

View File

@ -1,3 +1,4 @@
namespace stage2 {
namespace atomparsing {
really_inline uint32_t string_to_uint32(const char* str) { return *reinterpret_cast<const uint32_t *>(str); }
@ -46,4 +47,5 @@ really_inline bool is_valid_null_atom(const uint8_t *src, size_t len) {
else { return false; }
}
} // namespace atomparsing
} // namespace atomparsing
} // namespace stage2

View File

@ -1,3 +1,4 @@
namespace stage2 {
namespace numberparsing {
// Attempts to compute i * 10^(power) exactly; and if "negative" is
@ -568,3 +569,4 @@ really_inline bool parse_number(UNUSED const uint8_t *const src,
}
} // namespace numberparsing
} // namespace stage2

View File

@ -3,6 +3,7 @@
// We assume the file in which it is include already includes
// "stringparsing.h" (this simplifies amalgation)
namespace stage2 {
namespace stringparsing {
// begin copypasta
@ -118,4 +119,5 @@ WARN_UNUSED really_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst
return nullptr;
}
} // namespace stringparsing
} // namespace stringparsing
} // namespace stage2

View File

@ -0,0 +1,69 @@
namespace stage2 {
class structural_iterator {
public:
really_inline structural_iterator(const uint8_t* _buf, size_t _len, const uint32_t *_structural_indexes, size_t next_structural_index)
: buf{_buf},
len{_len},
structural_indexes{_structural_indexes},
next_structural{next_structural_index}
{}
really_inline char advance_char() {
idx = structural_indexes[next_structural];
next_structural++;
c = *current();
return c;
}
really_inline char current_char() {
return c;
}
really_inline const uint8_t* current() {
return &buf[idx];
}
really_inline size_t remaining_len() {
return len - idx;
}
template<typename F>
really_inline bool with_space_terminated_copy(const F& f) {
/**
* We need to make a copy to make sure that the string is space terminated.
* This is not about padding the input, which should already padded up
* to len + SIMDJSON_PADDING. However, we have no control at this stage
* on how the padding was done. What if the input string was padded with nulls?
* It is quite common for an input string to have an extra null character (C string).
* We do not want to allow 9\0 (where \0 is the null character) inside a JSON
* document, but the string "9\0" by itself is fine. So we make a copy and
* pad the input with spaces when we know that there is just one input element.
* This copy is relatively expensive, but it will almost never be called in
* practice unless you are in the strange scenario where you have many JSON
* documents made of single atoms.
*/
char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
if (copy == nullptr) {
return true;
}
memcpy(copy, buf, len);
memset(copy + len, ' ', SIMDJSON_PADDING);
bool result = f(reinterpret_cast<const uint8_t*>(copy), idx);
free(copy);
return result;
}
really_inline bool past_end(uint32_t n_structural_indexes) {
return next_structural+1 > n_structural_indexes;
}
really_inline bool at_end(uint32_t n_structural_indexes) {
return next_structural+1 == n_structural_indexes;
}
really_inline size_t next_structural_index() {
return next_structural;
}
const uint8_t* const buf;
const size_t len;
const uint32_t* const structural_indexes;
size_t next_structural; // next structural index
size_t idx{0}; // location of the structural character in the input (buf)
uint8_t c{0}; // used to track the (structural) character we are looking at
};
} // namespace stage2

View File

@ -1,17 +1,17 @@
// This file contains the common code every implementation uses for stage2
// It is intended to be included multiple times and compiled multiple times
// We assume the file in which it is include already includes
// "simdjson/stage2_build_tape.h" (this simplifies amalgation)
// "simdjson/stage2.h" (this simplifies amalgation)
namespace stage2 {
using internal::ret_address;
#ifdef SIMDJSON_USE_COMPUTED_GOTO
typedef void* ret_address;
#define INIT_ADDRESSES() { &&array_begin, &&array_continue, &&error, &&finish, &&object_begin, &&object_continue }
#define GOTO(address) { goto *(address); }
#define CONTINUE(address) { goto *(address); }
#else
typedef char ret_address;
#else // SIMDJSON_USE_COMPUTED_GOTO
#define INIT_ADDRESSES() { '[', 'a', 'e', 'f', '{', 'o' };
#define GOTO(address) \
{ \
@ -33,7 +33,7 @@ typedef char ret_address;
case 'f': goto finish; \
} \
}
#endif
#endif // SIMDJSON_USE_COMPUTED_GOTO
struct unified_machine_addresses {
ret_address array_begin;
@ -47,72 +47,6 @@ struct unified_machine_addresses {
#undef FAIL_IF
#define FAIL_IF(EXPR) { if (EXPR) { return addresses.error; } }
class structural_iterator {
public:
really_inline structural_iterator(const uint8_t* _buf, size_t _len, const uint32_t *_structural_indexes, size_t next_structural_index)
: buf{_buf},
len{_len},
structural_indexes{_structural_indexes},
next_structural{next_structural_index}
{}
really_inline char advance_char() {
idx = structural_indexes[next_structural];
next_structural++;
c = *current();
return c;
}
really_inline char current_char() {
return c;
}
really_inline const uint8_t* current() {
return &buf[idx];
}
really_inline size_t remaining_len() {
return len - idx;
}
template<typename F>
really_inline bool with_space_terminated_copy(const F& f) {
/**
* We need to make a copy to make sure that the string is space terminated.
* This is not about padding the input, which should already padded up
* to len + SIMDJSON_PADDING. However, we have no control at this stage
* on how the padding was done. What if the input string was padded with nulls?
* It is quite common for an input string to have an extra null character (C string).
* We do not want to allow 9\0 (where \0 is the null character) inside a JSON
* document, but the string "9\0" by itself is fine. So we make a copy and
* pad the input with spaces when we know that there is just one input element.
* This copy is relatively expensive, but it will almost never be called in
* practice unless you are in the strange scenario where you have many JSON
* documents made of single atoms.
*/
char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
if (copy == nullptr) {
return true;
}
memcpy(copy, buf, len);
memset(copy + len, ' ', SIMDJSON_PADDING);
bool result = f(reinterpret_cast<const uint8_t*>(copy), idx);
free(copy);
return result;
}
really_inline bool past_end(uint32_t n_structural_indexes) {
return next_structural+1 > n_structural_indexes;
}
really_inline bool at_end(uint32_t n_structural_indexes) {
return next_structural+1 == n_structural_indexes;
}
really_inline size_t next_structural_index() {
return next_structural;
}
const uint8_t* const buf;
const size_t len;
const uint32_t* const structural_indexes;
size_t next_structural; // next structural index
size_t idx{0}; // location of the structural character in the input (buf)
uint8_t c{0}; // used to track the (structural) character we are looking at
};
struct number_writer {
parser &doc_parser;

View File

@ -39,7 +39,7 @@ static inline uint32_t parse_eight_digits_unrolled(const char *chars) {
#define SWAR_NUMBER_PARSING
#include "generic/numberparsing.h"
#include "generic/stage2/numberparsing.h"
} // namespace haswell

View File

@ -1,6 +1,3 @@
#ifndef SIMDJSON_HASWELL_STAGE1_FIND_MARKS_H
#define SIMDJSON_HASWELL_STAGE1_FIND_MARKS_H
#include "simdjson.h"
#include "haswell/bitmask.h"
@ -60,17 +57,17 @@ really_inline simd8<bool> must_be_continuation(simd8<uint8_t> prev1, simd8<uint8
return simd8<int8_t>(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0);
}
#include "generic/buf_block_reader.h"
#include "generic/json_string_scanner.h"
#include "generic/json_scanner.h"
#include "generic/stage1/buf_block_reader.h"
#include "generic/stage1/json_string_scanner.h"
#include "generic/stage1/json_scanner.h"
#include "generic/json_minifier.h"
#include "generic/stage1/json_minifier.h"
WARN_UNUSED error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept {
return haswell::stage1::json_minifier::minify<128>(buf, len, dst, dst_len);
}
#include "generic/utf8_lookup2_algorithm.h"
#include "generic/json_structural_indexer.h"
#include "generic/stage1/utf8_lookup2_algorithm.h"
#include "generic/stage1/json_structural_indexer.h"
WARN_UNUSED error_code implementation::stage1(const uint8_t *buf, size_t len, parser &parser, bool streaming) const noexcept {
return haswell::stage1::json_structural_indexer::index<128>(buf, len, parser, streaming);
}
@ -79,5 +76,3 @@ WARN_UNUSED error_code implementation::stage1(const uint8_t *buf, size_t len, pa
} // namespace simdjson
UNTARGET_REGION
#endif // SIMDJSON_HASWELL_STAGE1_FIND_MARKS_H

17
src/haswell/stage2.cpp Normal file
View File

@ -0,0 +1,17 @@
#include "simdjson.h"
#include "haswell/implementation.h"
#include "haswell/stringparsing.h"
#include "haswell/numberparsing.h"
TARGET_HASWELL
namespace simdjson {
namespace haswell {
#include "generic/stage2/atomparsing.h"
#include "generic/stage2/structural_iterator.h"
#include "generic/stage2/structural_parser.h"
#include "generic/stage2/streaming_structural_parser.h"
} // namespace haswell
} // namespace simdjson
UNTARGET_REGION

View File

@ -1,21 +0,0 @@
#ifndef SIMDJSON_HASWELL_STAGE2_BUILD_TAPE_H
#define SIMDJSON_HASWELL_STAGE2_BUILD_TAPE_H
#include "simdjson.h"
#include "haswell/implementation.h"
#include "haswell/stringparsing.h"
#include "haswell/numberparsing.h"
TARGET_HASWELL
namespace simdjson {
namespace haswell {
#include "generic/atomparsing.h"
#include "generic/stage2_build_tape.h"
#include "generic/stage2_streaming_build_tape.h"
} // namespace haswell
} // namespace simdjson
UNTARGET_REGION
#endif // SIMDJSON_HASWELL_STAGE2_BUILD_TAPE_H

View File

@ -41,7 +41,7 @@ really_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8
};
}
#include "generic/stringparsing.h"
#include "generic/stage2/stringparsing.h"
} // namespace haswell
} // namespace simdjson

View File

@ -3,6 +3,12 @@
#include "simdjson.h"
#ifdef JSON_TEST_STRINGS
void found_string(const uint8_t *buf, const uint8_t *parsed_begin,
const uint8_t *parsed_end);
void found_bad_string(const uint8_t *buf);
#endif
namespace simdjson {
// structural chars here are
// they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c (and NULL)
@ -1328,4 +1334,4 @@ const uint64_t mantissa_128[] = {
} // namespace simdjson
#endif
#endif // SIMDJSON_JSONCHARUTILS_H

View File

@ -5,7 +5,28 @@ SIMDJSON_DISABLE_UNDESIRED_WARNINGS
#include "error.cpp"
#include "implementation.cpp"
#include "stage1_find_marks.cpp"
#include "stage2_build_tape.cpp"
// Anything in the top level directory MUST be included outside of the #if statements
// below, or amalgamation will screw them up!
#include "isadetection.h"
#include "jsoncharutils.h"
#include "simdprune_tables.h"
#if SIMDJSON_IMPLEMENTATION_ARM64
#include "arm64/stage1.cpp"
#include "arm64/stage2.cpp"
#endif
#if SIMDJSON_IMPLEMENTATION_FALLBACK
#include "fallback/stage1.cpp"
#include "fallback/stage2.cpp"
#endif
#if SIMDJSON_IMPLEMENTATION_HASWELL
#include "haswell/stage1.cpp"
#include "haswell/stage2.cpp"
#endif
#if SIMDJSON_IMPLEMENTATION_WESTMERE
#include "westmere/stage1.cpp"
#include "westmere/stage2.cpp"
#endif
SIMDJSON_POP_DISABLE_WARNINGS

View File

@ -1,12 +0,0 @@
#if SIMDJSON_IMPLEMENTATION_ARM64
#include "arm64/stage1_find_marks.h"
#endif
#if SIMDJSON_IMPLEMENTATION_FALLBACK
#include "fallback/stage1_find_marks.h"
#endif
#if SIMDJSON_IMPLEMENTATION_HASWELL
#include "haswell/stage1_find_marks.h"
#endif
#if SIMDJSON_IMPLEMENTATION_WESTMERE
#include "westmere/stage1_find_marks.h"
#endif

View File

@ -1,25 +0,0 @@
#include "simdjson.h"
#include <cassert>
#include <cstring>
#include "jsoncharutils.h"
using namespace simdjson;
#ifdef JSON_TEST_STRINGS
void found_string(const uint8_t *buf, const uint8_t *parsed_begin,
const uint8_t *parsed_end);
void found_bad_string(const uint8_t *buf);
#endif
#if SIMDJSON_IMPLEMENTATION_ARM64
#include "arm64/stage2_build_tape.h"
#endif
#if SIMDJSON_IMPLEMENTATION_FALLBACK
#include "fallback/stage2_build_tape.h"
#endif
#if SIMDJSON_IMPLEMENTATION_HASWELL
#include "haswell/stage2_build_tape.h"
#endif
#if SIMDJSON_IMPLEMENTATION_WESTMERE
#include "westmere/stage2_build_tape.h"
#endif

View File

@ -40,7 +40,7 @@ static inline uint32_t parse_eight_digits_unrolled(const char *chars) {
#define SWAR_NUMBER_PARSING
#include "generic/numberparsing.h"
#include "generic/stage2/numberparsing.h"
} // namespace westmere

View File

@ -1,6 +1,3 @@
#ifndef SIMDJSON_WESTMERE_STAGE1_FIND_MARKS_H
#define SIMDJSON_WESTMERE_STAGE1_FIND_MARKS_H
#include "simdjson.h"
#include "westmere/bitmask.h"
#include "westmere/simd.h"
@ -59,17 +56,17 @@ really_inline simd8<bool> must_be_continuation(simd8<uint8_t> prev1, simd8<uint8
return simd8<int8_t>(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0);
}
#include "generic/buf_block_reader.h"
#include "generic/json_string_scanner.h"
#include "generic/json_scanner.h"
#include "generic/stage1/buf_block_reader.h"
#include "generic/stage1/json_string_scanner.h"
#include "generic/stage1/json_scanner.h"
#include "generic/json_minifier.h"
#include "generic/stage1/json_minifier.h"
WARN_UNUSED error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept {
return westmere::stage1::json_minifier::minify<64>(buf, len, dst, dst_len);
}
#include "generic/utf8_lookup2_algorithm.h"
#include "generic/json_structural_indexer.h"
#include "generic/stage1/utf8_lookup2_algorithm.h"
#include "generic/stage1/json_structural_indexer.h"
WARN_UNUSED error_code implementation::stage1(const uint8_t *buf, size_t len, parser &parser, bool streaming) const noexcept {
return westmere::stage1::json_structural_indexer::index<64>(buf, len, parser, streaming);
}
@ -78,5 +75,3 @@ WARN_UNUSED error_code implementation::stage1(const uint8_t *buf, size_t len, pa
} // namespace simdjson
UNTARGET_REGION
#endif // SIMDJSON_WESTMERE_STAGE1_FIND_MARKS_H

17
src/westmere/stage2.cpp Normal file
View File

@ -0,0 +1,17 @@
#include "simdjson.h"
#include "westmere/implementation.h"
#include "westmere/stringparsing.h"
#include "westmere/numberparsing.h"
TARGET_WESTMERE
namespace simdjson {
namespace westmere {
#include "generic/stage2/atomparsing.h"
#include "generic/stage2/structural_iterator.h"
#include "generic/stage2/structural_parser.h"
#include "generic/stage2/streaming_structural_parser.h"
} // namespace westmere
} // namespace simdjson
UNTARGET_REGION

View File

@ -1,20 +0,0 @@
#ifndef SIMDJSON_WESTMERE_STAGE2_BUILD_TAPE_H
#define SIMDJSON_WESTMERE_STAGE2_BUILD_TAPE_H
#include "simdjson.h"
#include "westmere/implementation.h"
#include "westmere/stringparsing.h"
#include "westmere/numberparsing.h"
TARGET_WESTMERE
namespace simdjson {
namespace westmere {
#include "generic/atomparsing.h"
#include "generic/stage2_build_tape.h"
#include "generic/stage2_streaming_build_tape.h"
} // namespace westmere
} // namespace simdjson
UNTARGET_REGION
#endif // SIMDJSON_WESTMERE_STAGE2_BUILD_TAPE_H

View File

@ -43,7 +43,7 @@ really_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8
};
}
#include "generic/stringparsing.h"
#include "generic/stage2/stringparsing.h"
} // namespace westmere
} // namespace simdjson