Merge pull request #947 from simdjson/jkeiser/stream-parse
On-Demand Parsing
This commit is contained in:
commit
a9480a768b
|
@ -17,7 +17,7 @@ environment:
|
|||
- job_name: VS2019 (Win32)
|
||||
platform: Win32
|
||||
CMAKE_ARGS: -A %Platform% -DSIMDJSON_BUILD_STATIC=OFF -DSIMDJSON_ENABLE_THREADS=ON # This should be the default. Testing anyway.
|
||||
CTEST_ARGS: -E checkperf
|
||||
CTEST_ARGS: -E "checkperf|ondemand_basictests"
|
||||
- job_name: VS2015
|
||||
image: Visual Studio 2015
|
||||
CMAKE_ARGS: -A %Platform% -DSIMDJSON_BUILD_STATIC=ON -DSIMDJSON_ENABLE_THREADS=OFF
|
||||
|
|
|
@ -96,7 +96,7 @@ commands:
|
|||
- run: |
|
||||
cd build &&
|
||||
tools/json2json -h &&
|
||||
ctest $CTEST_FLAGS -L acceptance -LE per_implementation &&
|
||||
ctest $CTEST_FLAGS -DSIMDJSON_IMPLEMENTATION="haswell;westmere;fallback" -L acceptance -LE per_implementation &&
|
||||
SIMDJSON_FORCE_IMPLEMENTATION=haswell ctest $CTEST_FLAGS -L per_implementation -E checkperf &&
|
||||
SIMDJSON_FORCE_IMPLEMENTATION=westmere ctest $CTEST_FLAGS -L per_implementation -E checkperf &&
|
||||
SIMDJSON_FORCE_IMPLEMENTATION=fallback ctest $CTEST_FLAGS -L per_implementation -E checkperf &&
|
||||
|
|
14
.drone.yml
14
.drone.yml
|
@ -50,7 +50,7 @@ steps:
|
|||
CC: gcc
|
||||
CXX: g++
|
||||
BUILD_FLAGS: -- -j
|
||||
CMAKE_FLAGS: -DSIMDJSON_BUILD_STATIC=ON
|
||||
CMAKE_FLAGS: -DSIMDJSON_BUILD_STATIC=ON -DSIMDJSON_IMPLEMENTATION=haswell;westmere;fallback
|
||||
CTEST_FLAGS: -j4 --output-on-failure -E checkperf
|
||||
commands:
|
||||
- apt-get update -qq
|
||||
|
@ -76,7 +76,7 @@ steps:
|
|||
CC: clang-6.0
|
||||
CXX: clang++-6.0
|
||||
BUILD_FLAGS: -- -j
|
||||
CMAKE_FLAGS: -DSIMDJSON_BUILD_STATIC=ON
|
||||
CMAKE_FLAGS: -DSIMDJSON_BUILD_STATIC=ON -DSIMDJSON_IMPLEMENTATION=haswell;westmere;fallback
|
||||
CTEST_FLAGS: -j4 --output-on-failure -E checkperf
|
||||
commands:
|
||||
- mkdir build
|
||||
|
@ -140,7 +140,7 @@ steps:
|
|||
CC: gcc
|
||||
CXX: g++
|
||||
BUILD_FLAGS: -- -j
|
||||
CMAKE_FLAGS: -DSIMDJSON_BUILD_STATIC=ON
|
||||
CMAKE_FLAGS: -DSIMDJSON_BUILD_STATIC=ON -DSIMDJSON_IMPLEMENTATION=haswell;westmere;fallback
|
||||
CTEST_FLAGS: -j4 --output-on-failure -E checkperf
|
||||
commands:
|
||||
- apt-get update -qq
|
||||
|
@ -165,7 +165,7 @@ steps:
|
|||
environment:
|
||||
CC: clang-9
|
||||
CXX: clang++-9
|
||||
CMAKE_FLAGS: -DSIMDJSON_SANITIZE=ON
|
||||
CMAKE_FLAGS: -DSIMDJSON_SANITIZE=ON -DSIMDJSON_IMPLEMENTATION=haswell;westmere;fallback
|
||||
BUILD_FLAGS: -- -j
|
||||
CTEST_FLAGS: -j4 --output-on-failure -E checkperf
|
||||
commands:
|
||||
|
@ -189,7 +189,7 @@ steps:
|
|||
CC: gcc
|
||||
CXX: g++
|
||||
BUILD_FLAGS: -- -j
|
||||
CMAKE_FLAGS: -DSIMDJSON_BUILD_STATIC=ON
|
||||
CMAKE_FLAGS: -DSIMDJSON_BUILD_STATIC=ON -DSIMDJSON_IMPLEMENTATION=arm64;fallback
|
||||
CTEST_FLAGS: -j4 --output-on-failure -E checkperf
|
||||
commands:
|
||||
- apt-get update -qq
|
||||
|
@ -274,7 +274,7 @@ steps:
|
|||
image: gcc:8
|
||||
environment:
|
||||
BUILD_FLAGS: -- -j
|
||||
CMAKE_FLAGS: -DSIMDJSON_BUILD_STATIC=ON
|
||||
CMAKE_FLAGS: -DSIMDJSON_BUILD_STATIC=ON -DSIMDJSON_IMPLEMENTATION=arm64;fallback
|
||||
CTEST_FLAGS: -j4 --output-on-failure -E checkperf
|
||||
CC: gcc
|
||||
CXX: g++
|
||||
|
@ -299,7 +299,7 @@ steps:
|
|||
environment:
|
||||
CC: clang-6.0
|
||||
CXX: clang++-6.0
|
||||
CMAKE_FLAGS: -DSIMDJSON_SANITIZE=ON
|
||||
CMAKE_FLAGS: -DSIMDJSON_SANITIZE=ON -DSIMDJSON_IMPLEMENTATION=arm64;fallback
|
||||
BUILD_FLAGS: -- -j
|
||||
CTEST_FLAGS: -j4 --output-on-failure -E checkperf
|
||||
commands:
|
||||
|
|
|
@ -49,5 +49,5 @@ jobs:
|
|||
mkdir build32
|
||||
cd build32
|
||||
cmake -DSIMDJSON_BUILD_STATIC=ON -DSIMDJSON_COMPETITION=OFF -DSIMDJSON_GOOGLE_BENCHMARKS=OFF -DSIMDJSON_ENABLE_THREADS=OFF ..
|
||||
cmake --build . --target parse_many_test jsoncheck basictests numberparsingcheck stringparsingcheck errortests integer_tests pointercheck --verbose
|
||||
cmake --build . --target parse_many_test jsoncheck basictests ondemand_basictests numberparsingcheck stringparsingcheck errortests integer_tests pointercheck --verbose
|
||||
ctest -R "(parse_many_test|jsoncheck|basictests|stringparsingcheck|numberparsingcheck|errortests|integer_tests|pointercheck)" --output-on-failure
|
||||
|
|
|
@ -49,11 +49,11 @@ jobs:
|
|||
mkdir build64
|
||||
cd build64
|
||||
cmake -DSIMDJSON_BUILD_STATIC=ON -DSIMDJSON_COMPETITION=OFF -DSIMDJSON_GOOGLE_BENCHMARKS=OFF -DSIMDJSON_ENABLE_THREADS=OFF ..
|
||||
cmake --build . --target parse_many_test jsoncheck basictests numberparsingcheck stringparsingcheck errortests integer_tests pointercheck --verbose
|
||||
cmake --build . --target parse_many_test jsoncheck basictests ondemand_basictests numberparsingcheck stringparsingcheck errortests integer_tests pointercheck --verbose
|
||||
ctest -R "(parse_many_test|jsoncheck|basictests|stringparsingcheck|numberparsingcheck|errortests|integer_tests|pointercheck)" --output-on-failure
|
||||
cd ..
|
||||
mkdir build64debug
|
||||
cd build64debug
|
||||
cmake -DCMAKE_BUILD_TYPE=Debug -DSIMDJSON_BUILD_STATIC=ON -DSIMDJSON_COMPETITION=OFF -DSIMDJSON_GOOGLE_BENCHMARKS=OFF -DSIMDJSON_ENABLE_THREADS=OFF ..
|
||||
cmake --build . --target parse_many_test jsoncheck basictests numberparsingcheck stringparsingcheck errortests integer_tests pointercheck --verbose
|
||||
cmake --build . --target parse_many_test jsoncheck basictests ondemand_basictests numberparsingcheck stringparsingcheck errortests integer_tests pointercheck --verbose
|
||||
ctest -R "(parse_many_test|jsoncheck|basictests|stringparsingcheck|numberparsingcheck|errortests|integer_tests|pointercheck)" --output-on-failure
|
||||
|
|
|
@ -44,6 +44,7 @@ if (TARGET benchmark::benchmark)
|
|||
link_libraries(benchmark::benchmark)
|
||||
add_executable(bench_parse_call bench_parse_call.cpp)
|
||||
add_executable(bench_dom_api bench_dom_api.cpp)
|
||||
add_executable(bench_ondemand bench_ondemand.cpp)
|
||||
endif()
|
||||
|
||||
include(checkperf.cmake)
|
||||
|
|
|
@ -0,0 +1,22 @@
|
|||
#include "simdjson.h"
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <random>
|
||||
#include <vector>
|
||||
SIMDJSON_PUSH_DISABLE_ALL_WARNINGS
|
||||
#include <benchmark/benchmark.h>
|
||||
SIMDJSON_POP_DISABLE_WARNINGS
|
||||
|
||||
#include "partial_tweets/ondemand.h"
|
||||
#include "partial_tweets/iter.h"
|
||||
#include "partial_tweets/dom.h"
|
||||
|
||||
#include "largerandom/ondemand.h"
|
||||
#include "largerandom/iter.h"
|
||||
#include "largerandom/dom.h"
|
||||
|
||||
#include "kostya/ondemand.h"
|
||||
#include "kostya/iter.h"
|
||||
#include "kostya/dom.h"
|
||||
|
||||
BENCHMARK_MAIN();
|
|
@ -1,359 +1,14 @@
|
|||
#define SIMDJSON_IMPLEMENTATION_FALLBACK 0
|
||||
#define SIMDJSON_IMPLEMENTATION_WESTMERE 0
|
||||
#define SIMDJSON_IMPLEMENTATION_ARM64 0
|
||||
|
||||
#include "simdjson.h"
|
||||
#include "simdjson.cpp"
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <random>
|
||||
|
||||
#include "simdjson.h"
|
||||
|
||||
#include <vector>
|
||||
SIMDJSON_PUSH_DISABLE_ALL_WARNINGS
|
||||
#include <benchmark/benchmark.h>
|
||||
SIMDJSON_POP_DISABLE_WARNINGS
|
||||
|
||||
#include "simdjson.cpp"
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
|
||||
using namespace benchmark;
|
||||
using namespace simdjson;
|
||||
using std::cerr;
|
||||
using std::endl;
|
||||
|
||||
const char *TWITTER_JSON = SIMDJSON_BENCHMARK_DATA_DIR "twitter.json";
|
||||
const int REPETITIONS = 10;
|
||||
|
||||
#if SIMDJSON_IMPLEMENTATION_HASWELL
|
||||
|
||||
#include "twitter/sax_tweet_reader.h"
|
||||
|
||||
static void sax_tweets(State &state) {
|
||||
// Load twitter.json to a buffer
|
||||
padded_string json;
|
||||
if (auto error = padded_string::load(TWITTER_JSON).get(json)) { cerr << error << endl; return; }
|
||||
|
||||
// Allocate
|
||||
twitter::sax_tweet_reader reader;
|
||||
if (auto error = reader.set_capacity(json.size())) { cerr << error << endl; return; }
|
||||
|
||||
// Warm the vector
|
||||
if (auto error = reader.read_tweets(json)) { throw error; }
|
||||
|
||||
// Read tweets
|
||||
size_t bytes = 0;
|
||||
size_t tweets = 0;
|
||||
for (SIMDJSON_UNUSED auto _ : state) {
|
||||
if (auto error = reader.read_tweets(json)) { throw error; }
|
||||
bytes += json.size();
|
||||
tweets += reader.tweets.size();
|
||||
}
|
||||
// Gigabyte: https://en.wikipedia.org/wiki/Gigabyte
|
||||
state.counters["Gigabytes"] = benchmark::Counter(
|
||||
double(bytes), benchmark::Counter::kIsRate,
|
||||
benchmark::Counter::OneK::kIs1000); // For GiB : kIs1024
|
||||
state.counters["docs"] = Counter(double(state.iterations()), benchmark::Counter::kIsRate);
|
||||
state.counters["tweets"] = Counter(double(tweets), benchmark::Counter::kIsRate);
|
||||
}
|
||||
BENCHMARK(sax_tweets)->Repetitions(REPETITIONS)->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
|
||||
return *(std::max_element(std::begin(v), std::end(v)));
|
||||
})->DisplayAggregatesOnly(true);
|
||||
|
||||
#endif // SIMDJSON_IMPLEMENTATION_HASWELL
|
||||
|
||||
#include "twitter/tweet.h"
|
||||
|
||||
simdjson_really_inline uint64_t nullable_int(dom::element element) {
|
||||
if (element.is_null()) { return 0; }
|
||||
return element;
|
||||
}
|
||||
simdjson_really_inline void read_dom_tweets(dom::parser &parser, padded_string &json, std::vector<twitter::tweet> &tweets) {
|
||||
for (dom::element tweet : parser.parse(json)["statuses"]) {
|
||||
auto user = tweet["user"];
|
||||
tweets.push_back(
|
||||
{
|
||||
tweet["id"],
|
||||
tweet["text"],
|
||||
tweet["created_at"],
|
||||
nullable_int(tweet["in_reply_to_status_id"]),
|
||||
tweet["retweet_count"],
|
||||
tweet["favorite_count"],
|
||||
{ user["id"], user["screen_name"] }
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
static void dom_tweets(State &state) {
|
||||
// Load twitter.json to a buffer
|
||||
padded_string json;
|
||||
if (auto error = padded_string::load(TWITTER_JSON).get(json)) { cerr << error << endl; return; }
|
||||
|
||||
// Allocate
|
||||
dom::parser parser;
|
||||
if (auto error = parser.allocate(json.size())) { cerr << error << endl; return; };
|
||||
|
||||
// Warm the vector
|
||||
std::vector<twitter::tweet> tweets;
|
||||
read_dom_tweets(parser, json, tweets);
|
||||
|
||||
// Read tweets
|
||||
size_t bytes = 0;
|
||||
size_t num_tweets = 0;
|
||||
for (SIMDJSON_UNUSED auto _ : state) {
|
||||
tweets.clear();
|
||||
read_dom_tweets(parser, json, tweets);
|
||||
bytes += json.size();
|
||||
num_tweets += tweets.size();
|
||||
}
|
||||
// Gigabyte: https://en.wikipedia.org/wiki/Gigabyte
|
||||
state.counters["Gigabytes"] = benchmark::Counter(
|
||||
double(bytes), benchmark::Counter::kIsRate,
|
||||
benchmark::Counter::OneK::kIs1000); // For GiB : kIs1024
|
||||
state.counters["docs"] = Counter(double(state.iterations()), benchmark::Counter::kIsRate);
|
||||
state.counters["tweets"] = Counter(double(num_tweets), benchmark::Counter::kIsRate);
|
||||
}
|
||||
BENCHMARK(dom_tweets)->Repetitions(REPETITIONS)->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
|
||||
return *(std::max_element(std::begin(v), std::end(v)));
|
||||
})->DisplayAggregatesOnly(true);
|
||||
|
||||
static void dom_parse(State &state) {
|
||||
// Load twitter.json to a buffer
|
||||
padded_string json;
|
||||
if (auto error = padded_string::load(TWITTER_JSON).get(json)) { cerr << error << endl; return; }
|
||||
|
||||
// Allocate
|
||||
dom::parser parser;
|
||||
if (auto error = parser.allocate(json.size())) { cerr << error << endl; return; };
|
||||
|
||||
// Read tweets
|
||||
size_t bytes = 0;
|
||||
for (SIMDJSON_UNUSED auto _ : state) {
|
||||
if (parser.parse(json).error()) { throw "Parsing failed"; };
|
||||
bytes += json.size();
|
||||
}
|
||||
// Gigabyte: https://en.wikipedia.org/wiki/Gigabyte
|
||||
state.counters["Gigabytes"] = benchmark::Counter(
|
||||
double(bytes), benchmark::Counter::kIsRate,
|
||||
benchmark::Counter::OneK::kIs1000); // For GiB : kIs1024
|
||||
state.counters["docs"] = Counter(double(state.iterations()), benchmark::Counter::kIsRate);
|
||||
}
|
||||
BENCHMARK(dom_parse)->Repetitions(REPETITIONS)->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
|
||||
return *(std::max_element(std::begin(v), std::end(v)));
|
||||
})->DisplayAggregatesOnly(true);
|
||||
|
||||
|
||||
/********************
|
||||
* Large file parsing benchmarks:
|
||||
********************/
|
||||
|
||||
static std::string build_json_array(size_t N) {
|
||||
std::default_random_engine e;
|
||||
std::uniform_real_distribution<> dis(0, 1);
|
||||
std::stringstream myss;
|
||||
myss << "[" << std::endl;
|
||||
if(N > 0) {
|
||||
myss << "{ \"x\":" << dis(e) << ", \"y\":" << dis(e) << ", \"z\":" << dis(e) << "}" << std::endl;
|
||||
}
|
||||
for(size_t i = 1; i < N; i++) {
|
||||
myss << "," << std::endl;
|
||||
myss << "{ \"x\":" << dis(e) << ", \"y\":" << dis(e) << ", \"z\":" << dis(e) << "}";
|
||||
}
|
||||
myss << std::endl;
|
||||
myss << "]" << std::endl;
|
||||
std::string answer = myss.str();
|
||||
std::cout << "Creating a source file spanning " << (answer.size() + 512) / 1024 << " KB " << std::endl;
|
||||
return answer;
|
||||
}
|
||||
|
||||
static const simdjson::padded_string& get_my_json_str() {
|
||||
static simdjson::padded_string s = build_json_array(1000000);
|
||||
return s;
|
||||
}
|
||||
|
||||
struct my_point {
|
||||
double x;
|
||||
double y;
|
||||
double z;
|
||||
};
|
||||
|
||||
// ./benchmark/bench_sax --benchmark_filter=largerandom
|
||||
|
||||
|
||||
/***
|
||||
* We start with the naive DOM-based approach.
|
||||
**/
|
||||
static void dom_parse_largerandom(State &state) {
|
||||
// Load twitter.json to a buffer
|
||||
const padded_string& json = get_my_json_str();
|
||||
|
||||
// Allocate
|
||||
dom::parser parser;
|
||||
if (auto error = parser.allocate(json.size())) { cerr << error << endl; return; };
|
||||
|
||||
// Read
|
||||
size_t bytes = 0;
|
||||
simdjson::error_code error;
|
||||
for (SIMDJSON_UNUSED auto _ : state) {
|
||||
std::vector<my_point> container;
|
||||
dom::element doc;
|
||||
if ((error = parser.parse(json).get(doc))) {
|
||||
std::cerr << "failure: " << error << std::endl;
|
||||
throw "Parsing failed";
|
||||
};
|
||||
for (auto p : doc) {
|
||||
container.emplace_back(my_point{p["x"], p["y"], p["z"]});
|
||||
}
|
||||
bytes += json.size();
|
||||
benchmark::DoNotOptimize(container.data());
|
||||
|
||||
}
|
||||
// Gigabyte: https://en.wikipedia.org/wiki/Gigabyte
|
||||
state.counters["Gigabytes"] = benchmark::Counter(
|
||||
double(bytes), benchmark::Counter::kIsRate,
|
||||
benchmark::Counter::OneK::kIs1000); // For GiB : kIs1024
|
||||
state.counters["docs"] = Counter(double(state.iterations()), benchmark::Counter::kIsRate);
|
||||
}
|
||||
|
||||
BENCHMARK(dom_parse_largerandom)->Repetitions(REPETITIONS)->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
|
||||
return *(std::max_element(std::begin(v), std::end(v)));
|
||||
})->DisplayAggregatesOnly(true);
|
||||
|
||||
#if SIMDJSON_IMPLEMENTATION_HASWELL
|
||||
|
||||
/***
|
||||
* Next we are going to code the SAX approach.
|
||||
**/
|
||||
|
||||
SIMDJSON_TARGET_HASWELL
|
||||
|
||||
namespace largerandom {
|
||||
namespace {
|
||||
|
||||
using namespace simdjson;
|
||||
using namespace haswell;
|
||||
using namespace haswell::stage2;
|
||||
struct sax_point_reader_visitor {
|
||||
public:
|
||||
sax_point_reader_visitor(std::vector<my_point> &_points) : points(_points) {
|
||||
}
|
||||
|
||||
simdjson_really_inline error_code visit_document_start(json_iterator &) { return SUCCESS; }
|
||||
simdjson_really_inline error_code visit_object_start(json_iterator &) { return SUCCESS; }
|
||||
simdjson_really_inline error_code visit_key(json_iterator &, const uint8_t *key) {
|
||||
switch(key[0]) {
|
||||
case 'x':
|
||||
idx = 0;
|
||||
break;
|
||||
case 'y':
|
||||
idx = 2;
|
||||
break;
|
||||
case 'z':
|
||||
idx = 3;
|
||||
break;
|
||||
}
|
||||
return SUCCESS;
|
||||
}
|
||||
simdjson_really_inline error_code visit_primitive(json_iterator &, const uint8_t *value) {
|
||||
return numberparsing::parse_double(value).get(buffer[idx]);
|
||||
}
|
||||
simdjson_really_inline error_code visit_array_start(json_iterator &) { return SUCCESS; }
|
||||
simdjson_really_inline error_code visit_array_end(json_iterator &) { return SUCCESS; }
|
||||
simdjson_really_inline error_code visit_object_end(json_iterator &) { return SUCCESS; }
|
||||
simdjson_really_inline error_code visit_document_end(json_iterator &) { return SUCCESS; }
|
||||
simdjson_really_inline error_code visit_empty_array(json_iterator &) { return SUCCESS; }
|
||||
simdjson_really_inline error_code visit_empty_object(json_iterator &) { return SUCCESS; }
|
||||
simdjson_really_inline error_code visit_root_primitive(json_iterator &, const uint8_t *) { return SUCCESS; }
|
||||
simdjson_really_inline error_code increment_count(json_iterator &) { return SUCCESS; }
|
||||
std::vector<my_point> &points;
|
||||
size_t idx{0};
|
||||
double buffer[3];
|
||||
};
|
||||
|
||||
struct sax_point_reader {
|
||||
std::vector<my_point> points;
|
||||
std::unique_ptr<uint8_t[]> string_buf;
|
||||
size_t capacity;
|
||||
dom_parser_implementation dom_parser;
|
||||
|
||||
sax_point_reader();
|
||||
error_code set_capacity(size_t new_capacity);
|
||||
error_code read_points(const padded_string &json);
|
||||
}; // struct sax_point_reader
|
||||
|
||||
sax_point_reader::sax_point_reader() : points{}, string_buf{}, capacity{0}, dom_parser() {
|
||||
}
|
||||
|
||||
error_code sax_point_reader::set_capacity(size_t new_capacity) {
|
||||
// string_capacity copied from document::allocate
|
||||
size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + 32, 64);
|
||||
string_buf.reset(new (std::nothrow) uint8_t[string_capacity]);
|
||||
if (auto error = dom_parser.set_capacity(new_capacity)) { return error; }
|
||||
if (capacity == 0) { // set max depth the first time only
|
||||
if (auto error = dom_parser.set_max_depth(DEFAULT_MAX_DEPTH)) { return error; }
|
||||
}
|
||||
capacity = new_capacity;
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
error_code sax_point_reader::read_points(const padded_string &json) {
|
||||
// Allocate capacity if needed
|
||||
points.clear();
|
||||
if (capacity < json.size()) {
|
||||
if (auto error = set_capacity(capacity)) { return error; }
|
||||
}
|
||||
|
||||
// Run stage 1 first.
|
||||
if (auto error = dom_parser.stage1((uint8_t *)json.data(), json.size(), false)) { return error; }
|
||||
|
||||
// Then walk the document, parsing the tweets as we go
|
||||
json_iterator iter(dom_parser, 0);
|
||||
sax_point_reader_visitor visitor(points);
|
||||
if (auto error = iter.walk_document<false>(visitor)) { return error; }
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
} // unnamed namespace
|
||||
} // namespace largerandom
|
||||
|
||||
SIMDJSON_UNTARGET_REGION
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// ./benchmark/bench_sax --benchmark_filter=largerandom
|
||||
static void sax_parse_largerandom(State &state) {
|
||||
// Load twitter.json to a buffer
|
||||
const padded_string& json = get_my_json_str();
|
||||
|
||||
// Allocate
|
||||
largerandom::sax_point_reader reader;
|
||||
if (auto error = reader.set_capacity(json.size())) { throw error; }
|
||||
// warming
|
||||
for(size_t i = 0; i < 10; i++) {
|
||||
if (auto error = reader.read_points(json)) { throw error; }
|
||||
}
|
||||
|
||||
// Read
|
||||
size_t bytes = 0;
|
||||
for (SIMDJSON_UNUSED auto _ : state) {
|
||||
if (auto error = reader.read_points(json)) { throw error; }
|
||||
bytes += json.size();
|
||||
benchmark::DoNotOptimize(reader.points.data());
|
||||
}
|
||||
// Gigabyte: https://en.wikipedia.org/wiki/Gigabyte
|
||||
state.counters["Gigabytes"] = benchmark::Counter(
|
||||
double(bytes), benchmark::Counter::kIsRate,
|
||||
benchmark::Counter::OneK::kIs1000); // For GiB : kIs1024
|
||||
state.counters["docs"] = Counter(double(state.iterations()), benchmark::Counter::kIsRate);
|
||||
}
|
||||
BENCHMARK(sax_parse_largerandom)->Repetitions(REPETITIONS)->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
|
||||
return *(std::max_element(std::begin(v), std::end(v)));
|
||||
})->DisplayAggregatesOnly(true);
|
||||
|
||||
#endif // SIMDJSON_IMPLEMENTATION_HASWELL
|
||||
|
||||
#endif // SIMDJSON_EXCEPTIONS
|
||||
#include "partial_tweets/sax.h"
|
||||
#include "largerandom/sax.h"
|
||||
|
||||
BENCHMARK_MAIN();
|
||||
|
|
|
@ -116,18 +116,18 @@ struct event_collector {
|
|||
|
||||
#if defined(__linux__)
|
||||
LinuxEvents<PERF_TYPE_HARDWARE> linux_events;
|
||||
event_collector() : linux_events(vector<int>{
|
||||
event_collector(bool quiet = false) : linux_events(vector<int>{
|
||||
PERF_COUNT_HW_CPU_CYCLES,
|
||||
PERF_COUNT_HW_INSTRUCTIONS,
|
||||
PERF_COUNT_HW_BRANCH_MISSES,
|
||||
PERF_COUNT_HW_CACHE_REFERENCES,
|
||||
PERF_COUNT_HW_CACHE_MISSES
|
||||
}) {}
|
||||
}, quiet) {}
|
||||
bool has_events() {
|
||||
return linux_events.is_working();
|
||||
}
|
||||
#else
|
||||
event_collector() {}
|
||||
event_collector(SIMDJSON_UNUSED bool _quiet = false) {}
|
||||
bool has_events() {
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,80 @@
|
|||
#pragma once
|
||||
|
||||
template<typename B, typename R> static void JsonBenchmark(benchmark::State &state, const simdjson::padded_string &json) {
|
||||
event_collector collector(true);
|
||||
event_aggregate events;
|
||||
|
||||
// Warmup and equality check (make sure the data is right!)
|
||||
B bench;
|
||||
if (!bench.Run(json)) { state.SkipWithError("warmup tweet reading failed"); return; }
|
||||
{
|
||||
R reference;
|
||||
if (!reference.Run(json)) { state.SkipWithError("reference tweet reading failed"); return; }
|
||||
if (bench.Result() != reference.Result()) { state.SkipWithError("results are not the same"); return; }
|
||||
}
|
||||
|
||||
// Run the benchmark
|
||||
for (SIMDJSON_UNUSED auto _ : state) {
|
||||
collector.start();
|
||||
|
||||
if (!bench.Run(json)) { state.SkipWithError("tweet reading failed"); return; }
|
||||
|
||||
events << collector.end();
|
||||
}
|
||||
|
||||
state.SetBytesProcessed(json.size() * state.iterations());
|
||||
state.SetItemsProcessed(bench.ItemCount() * state.iterations());
|
||||
state.counters["best_bytes_per_sec"] = benchmark::Counter(double(json.size()) / events.best.elapsed_sec());
|
||||
state.counters["best_items_per_sec"] = benchmark::Counter(double(bench.ItemCount()) / events.best.elapsed_sec());
|
||||
|
||||
state.counters["docs_per_sec"] = benchmark::Counter(1.0, benchmark::Counter::kIsIterationInvariantRate);
|
||||
state.counters["best_docs_per_sec"] = benchmark::Counter(1.0 / events.best.elapsed_sec());
|
||||
|
||||
if (collector.has_events()) {
|
||||
state.counters["instructions"] = events.instructions();
|
||||
state.counters["cycles"] = events.cycles();
|
||||
state.counters["branch_miss"] = events.branch_misses();
|
||||
state.counters["cache_miss"] = events.cache_misses();
|
||||
state.counters["cache_ref"] = events.cache_references();
|
||||
|
||||
state.counters["instructions_per_byte"] = events.instructions() / double(json.size());
|
||||
state.counters["instructions_per_cycle"] = events.instructions() / events.cycles();
|
||||
state.counters["cycles_per_byte"] = events.cycles() / double(json.size());
|
||||
state.counters["frequency"] = benchmark::Counter(events.cycles(), benchmark::Counter::kIsIterationInvariantRate);
|
||||
|
||||
state.counters["best_instructions"] = events.best.instructions();
|
||||
state.counters["best_cycles"] = events.best.cycles();
|
||||
state.counters["best_branch_miss"] = events.best.branch_misses();
|
||||
state.counters["best_cache_miss"] = events.best.cache_misses();
|
||||
state.counters["best_cache_ref"] = events.best.cache_references();
|
||||
|
||||
state.counters["best_instructions_per_byte"] = events.best.instructions() / double(json.size());
|
||||
state.counters["best_instructions_per_cycle"] = events.best.instructions() / events.best.cycles();
|
||||
state.counters["best_cycles_per_byte"] = events.best.cycles() / double(json.size());
|
||||
state.counters["best_frequency"] = events.best.cycles() / events.best.elapsed_sec();
|
||||
}
|
||||
state.counters["bytes"] = benchmark::Counter(double(json.size()));
|
||||
state.counters["items"] = benchmark::Counter(double(bench.ItemCount()));
|
||||
|
||||
// Build the label
|
||||
using namespace std;
|
||||
stringstream label;
|
||||
label << fixed << setprecision(2);
|
||||
label << "[best:";
|
||||
label << " throughput=" << setw(6) << (double(json.size()) / 1000000000.0 / events.best.elapsed_sec()) << " GB/s";
|
||||
label << " doc_throughput=" << setw(6) << uint64_t(1.0 / events.best.elapsed_sec()) << " docs/s";
|
||||
|
||||
if (collector.has_events()) {
|
||||
label << " instructions=" << setw(12) << uint64_t(events.best.instructions()) << setw(0);
|
||||
label << " cycles=" << setw(12) << uint64_t(events.best.cycles()) << setw(0);
|
||||
label << " branch_miss=" << setw(8) << uint64_t(events.best.branch_misses()) << setw(0);
|
||||
label << " cache_miss=" << setw(8) << uint64_t(events.best.cache_misses()) << setw(0);
|
||||
label << " cache_ref=" << setw(10) << uint64_t(events.best.cache_references()) << setw(0);
|
||||
}
|
||||
|
||||
label << " items=" << setw(10) << bench.ItemCount() << setw(0);
|
||||
label << " avg_time=" << setw(10) << uint64_t(events.elapsed_ns()) << setw(0) << " ns";
|
||||
label << "]";
|
||||
|
||||
state.SetLabel(label.str());
|
||||
}
|
|
@ -0,0 +1,69 @@
|
|||
#pragma once
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
|
||||
#include "kostya.h"
|
||||
|
||||
namespace kostya {
|
||||
|
||||
using namespace simdjson;
|
||||
|
||||
class Dom {
|
||||
public:
|
||||
simdjson_really_inline bool Run(const padded_string &json);
|
||||
|
||||
simdjson_really_inline const std::vector<my_point> &Result() { return container; }
|
||||
simdjson_really_inline size_t ItemCount() { return container.size(); }
|
||||
|
||||
private:
|
||||
dom::parser parser{};
|
||||
std::vector<my_point> container{};
|
||||
};
|
||||
|
||||
simdjson_really_inline bool Dom::Run(const padded_string &json) {
|
||||
container.clear();
|
||||
|
||||
for (auto point : parser.parse(json)["coordinates"]) {
|
||||
container.emplace_back(my_point{point["x"], point["y"], point["z"]});
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
BENCHMARK_TEMPLATE(Kostya, Dom);
|
||||
|
||||
namespace sum {
|
||||
|
||||
class Dom {
|
||||
public:
|
||||
simdjson_really_inline bool Run(const padded_string &json);
|
||||
|
||||
simdjson_really_inline my_point &Result() { return sum; }
|
||||
simdjson_really_inline size_t ItemCount() { return count; }
|
||||
|
||||
private:
|
||||
dom::parser parser{};
|
||||
my_point sum{};
|
||||
size_t count{};
|
||||
};
|
||||
|
||||
simdjson_really_inline bool Dom::Run(const padded_string &json) {
|
||||
sum = { 0, 0, 0 };
|
||||
count = 0;
|
||||
|
||||
for (auto coord : parser.parse(json)["coordinates"]) {
|
||||
sum.x += double(coord["x"]);
|
||||
sum.y += double(coord["y"]);
|
||||
sum.z += double(coord["z"]);
|
||||
count++;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
BENCHMARK_TEMPLATE(KostyaSum, Dom);
|
||||
|
||||
} // namespace sum
|
||||
} // namespace kostya
|
||||
|
||||
#endif // SIMDJSON_EXCEPTIONS
|
|
@ -0,0 +1,96 @@
|
|||
#pragma once
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
|
||||
#include "kostya.h"
|
||||
|
||||
namespace kostya {
|
||||
|
||||
using namespace simdjson;
|
||||
using namespace simdjson::builtin;
|
||||
|
||||
class Iter {
|
||||
public:
|
||||
simdjson_really_inline bool Run(const padded_string &json);
|
||||
|
||||
simdjson_really_inline const std::vector<my_point> &Result() { return container; }
|
||||
simdjson_really_inline size_t ItemCount() { return container.size(); }
|
||||
|
||||
private:
|
||||
ondemand::parser parser{};
|
||||
std::vector<my_point> container{};
|
||||
|
||||
simdjson_really_inline simdjson_result<double> first_double(ondemand::json_iterator &iter, const char *key) {
|
||||
if (!iter.start_object() || ondemand::raw_json_string(iter.field_key()) != key || iter.field_value()) { throw "Invalid field"; }
|
||||
return iter.consume_double();
|
||||
}
|
||||
|
||||
simdjson_really_inline simdjson_result<double> next_double(ondemand::json_iterator &iter, const char *key) {
|
||||
if (!iter.has_next_field() || ondemand::raw_json_string(iter.field_key()) != key || iter.field_value()) { throw "Invalid field"; }
|
||||
return iter.consume_double();
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
simdjson_really_inline bool Iter::Run(const padded_string &json) {
|
||||
container.clear();
|
||||
|
||||
using std::cerr;
|
||||
using std::endl;
|
||||
auto iter = parser.iterate_raw(json).value();
|
||||
if (!iter.start_object() || !iter.find_field_raw("coordinates")) { cerr << "find coordinates field failed" << endl; return false; }
|
||||
if (iter.start_array()) {
|
||||
do {
|
||||
container.emplace_back(my_point{first_double(iter, "x"), next_double(iter, "y"), next_double(iter, "z")});
|
||||
if (iter.skip_container()) { return false; } // Skip the rest of the coordinates object
|
||||
} while (iter.has_next_element());
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
BENCHMARK_TEMPLATE(Kostya, Iter);
|
||||
|
||||
|
||||
namespace sum {
|
||||
|
||||
class Iter {
|
||||
public:
|
||||
simdjson_really_inline bool Run(const padded_string &json);
|
||||
|
||||
simdjson_really_inline my_point &Result() { return sum; }
|
||||
simdjson_really_inline size_t ItemCount() { return count; }
|
||||
|
||||
private:
|
||||
ondemand::parser parser{};
|
||||
my_point sum{};
|
||||
size_t count{};
|
||||
};
|
||||
|
||||
simdjson_really_inline bool Iter::Run(const padded_string &json) {
|
||||
sum = {0,0,0};
|
||||
count = 0;
|
||||
|
||||
auto iter = parser.iterate_raw(json).value();
|
||||
if (!iter.start_object() || !iter.find_field_raw("coordinates")) { return false; }
|
||||
if (!iter.start_array()) { return false; }
|
||||
do {
|
||||
if (!iter.start_object() || !iter.find_field_raw("x")) { return false; }
|
||||
sum.x += iter.consume_double();
|
||||
if (!iter.has_next_field() || !iter.find_field_raw("y")) { return false; }
|
||||
sum.y += iter.consume_double();
|
||||
if (!iter.has_next_field() || !iter.find_field_raw("z")) { return false; }
|
||||
sum.z += iter.consume_double();
|
||||
if (iter.skip_container()) { return false; } // Skip the rest of the coordinates object
|
||||
count++;
|
||||
} while (iter.has_next_element());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
BENCHMARK_TEMPLATE(KostyaSum, Iter);
|
||||
|
||||
} // namespace sum
|
||||
} // namespace kostya
|
||||
|
||||
#endif // SIMDJSON_EXCEPTIONS
|
|
@ -0,0 +1,95 @@
|
|||
#pragma once
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
|
||||
//
|
||||
// Interface
|
||||
//
|
||||
|
||||
namespace kostya {
|
||||
template<typename T> static void Kostya(benchmark::State &state);
|
||||
namespace sum {
|
||||
template<typename T> static void KostyaSum(benchmark::State &state);
|
||||
}
|
||||
|
||||
using namespace simdjson;
|
||||
|
||||
static void append_coordinate(std::default_random_engine &e, std::uniform_real_distribution<> &dis, std::stringstream &myss) {
|
||||
using std::endl;
|
||||
myss << R"( {)" << endl;
|
||||
myss << R"( "x": )" << dis(e) << "," << endl;
|
||||
myss << R"( "y": )" << dis(e) << "," << endl;
|
||||
myss << R"( "z": )" << dis(e) << "," << endl;
|
||||
myss << R"( "name": ")" << char('a'+dis(e)*25) << char('a'+dis(e)*25) << char('a'+dis(e)*25) << char('a'+dis(e)*25) << char('a'+dis(e)*25) << char('a'+dis(e)*25) << " " << int(dis(e)*10000) << "\"," << endl;
|
||||
myss << R"( "opts": {)" << endl;
|
||||
myss << R"( "1": [)" << endl;
|
||||
myss << R"( 1,)" << endl;
|
||||
myss << R"( true)" << endl;
|
||||
myss << R"( ])" << endl;
|
||||
myss << R"( })" << endl;
|
||||
myss << R"( })";
|
||||
}
|
||||
|
||||
static std::string build_json_array(size_t N) {
|
||||
using namespace std;
|
||||
default_random_engine e;
|
||||
uniform_real_distribution<> dis(0, 1);
|
||||
stringstream myss;
|
||||
myss << R"({)" << endl;
|
||||
myss << R"( "coordinates": [)" << endl;
|
||||
for (size_t i=1; i<N; i++) {
|
||||
append_coordinate(e, dis, myss); myss << "," << endl;
|
||||
}
|
||||
append_coordinate(e, dis, myss); myss << endl;
|
||||
myss << R"( ],)" << endl;
|
||||
myss << R"( "info": "some info")" << endl;
|
||||
myss << R"(})" << endl;
|
||||
string answer = myss.str();
|
||||
cout << "Creating a source file spanning " << (answer.size() + 512) / 1024 << " KB " << endl;
|
||||
return answer;
|
||||
}
|
||||
|
||||
static const padded_string &get_built_json_array() {
|
||||
static padded_string json = build_json_array(524288);
|
||||
return json;
|
||||
}
|
||||
|
||||
struct my_point {
|
||||
double x;
|
||||
double y;
|
||||
double z;
|
||||
simdjson_really_inline bool operator==(const my_point &other) const {
|
||||
return x == other.x && y == other.y && z == other.z;
|
||||
}
|
||||
simdjson_really_inline bool operator!=(const my_point &other) const { return !(*this == other); }
|
||||
};
|
||||
|
||||
SIMDJSON_UNUSED static std::ostream &operator<<(std::ostream &o, const my_point &p) {
|
||||
return o << p.x << "," << p.y << "," << p.z << std::endl;
|
||||
}
|
||||
|
||||
} // namespace kostya
|
||||
|
||||
//
|
||||
// Implementation
|
||||
//
|
||||
#include <vector>
|
||||
#include "event_counter.h"
|
||||
#include "dom.h"
|
||||
#include "json_benchmark.h"
|
||||
|
||||
namespace kostya {
|
||||
|
||||
template<typename T> static void Kostya(benchmark::State &state) {
|
||||
JsonBenchmark<T, Dom>(state, get_built_json_array());
|
||||
}
|
||||
|
||||
namespace sum {
|
||||
template<typename T> static void KostyaSum(benchmark::State &state) {
|
||||
JsonBenchmark<T, Dom>(state, get_built_json_array());
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace kostya
|
||||
|
||||
#endif // SIMDJSON_EXCEPTIONS
|
|
@ -0,0 +1,76 @@
|
|||
#pragma once
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
|
||||
#include "kostya.h"
|
||||
|
||||
namespace kostya {
|
||||
|
||||
using namespace simdjson;
|
||||
using namespace simdjson::builtin;
|
||||
|
||||
class OnDemand {
|
||||
public:
|
||||
simdjson_really_inline bool Run(const padded_string &json);
|
||||
|
||||
simdjson_really_inline const std::vector<my_point> &Result() { return container; }
|
||||
simdjson_really_inline size_t ItemCount() { return container.size(); }
|
||||
|
||||
private:
|
||||
ondemand::parser parser{};
|
||||
std::vector<my_point> container{};
|
||||
};
|
||||
|
||||
simdjson_really_inline bool OnDemand::Run(const padded_string &json) {
|
||||
container.clear();
|
||||
|
||||
using std::cout;
|
||||
using std::endl;
|
||||
|
||||
auto doc = parser.iterate(json);
|
||||
for (ondemand::object coord : doc["coordinates"]) {
|
||||
container.emplace_back(my_point{coord["x"], coord["y"], coord["z"]});
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
BENCHMARK_TEMPLATE(Kostya, OnDemand);
|
||||
|
||||
|
||||
namespace sum {
|
||||
|
||||
class OnDemand {
|
||||
public:
|
||||
simdjson_really_inline bool Run(const padded_string &json);
|
||||
|
||||
simdjson_really_inline my_point &Result() { return sum; }
|
||||
simdjson_really_inline size_t ItemCount() { return count; }
|
||||
|
||||
private:
|
||||
ondemand::parser parser{};
|
||||
my_point sum{};
|
||||
size_t count{};
|
||||
};
|
||||
|
||||
simdjson_really_inline bool OnDemand::Run(const padded_string &json) {
|
||||
sum = {0,0,0};
|
||||
count = 0;
|
||||
|
||||
auto doc = parser.iterate(json);
|
||||
for (ondemand::object coord : doc["coordinates"]) {
|
||||
sum.x += double(coord["x"]);
|
||||
sum.y += double(coord["y"]);
|
||||
sum.z += double(coord["z"]);
|
||||
count++;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
BENCHMARK_TEMPLATE(KostyaSum, OnDemand);
|
||||
|
||||
} // namespace sum
|
||||
} // namespace kostya
|
||||
|
||||
#endif // SIMDJSON_EXCEPTIONS
|
|
@ -0,0 +1,69 @@
|
|||
#pragma once
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
|
||||
#include "largerandom.h"
|
||||
|
||||
namespace largerandom {
|
||||
|
||||
using namespace simdjson;
|
||||
|
||||
class Dom {
|
||||
public:
|
||||
simdjson_really_inline bool Run(const padded_string &json);
|
||||
|
||||
simdjson_really_inline const std::vector<my_point> &Result() { return container; }
|
||||
simdjson_really_inline size_t ItemCount() { return container.size(); }
|
||||
|
||||
private:
|
||||
dom::parser parser{};
|
||||
std::vector<my_point> container{};
|
||||
};
|
||||
|
||||
simdjson_really_inline bool Dom::Run(const padded_string &json) {
|
||||
container.clear();
|
||||
|
||||
for (auto point : parser.parse(json)) {
|
||||
container.emplace_back(my_point{point["x"], point["y"], point["z"]});
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
BENCHMARK_TEMPLATE(LargeRandom, Dom);
|
||||
|
||||
namespace sum {
|
||||
|
||||
class Dom {
|
||||
public:
|
||||
simdjson_really_inline bool Run(const padded_string &json);
|
||||
|
||||
simdjson_really_inline my_point &Result() { return sum; }
|
||||
simdjson_really_inline size_t ItemCount() { return count; }
|
||||
|
||||
private:
|
||||
dom::parser parser{};
|
||||
my_point sum{};
|
||||
size_t count{};
|
||||
};
|
||||
|
||||
simdjson_really_inline bool Dom::Run(const padded_string &json) {
|
||||
sum = { 0, 0, 0 };
|
||||
count = 0;
|
||||
|
||||
for (auto coord : parser.parse(json)) {
|
||||
sum.x += double(coord["x"]);
|
||||
sum.y += double(coord["y"]);
|
||||
sum.z += double(coord["z"]);
|
||||
count++;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
BENCHMARK_TEMPLATE(LargeRandomSum, Dom);
|
||||
|
||||
} // namespace sum
|
||||
} // namespace largerandom
|
||||
|
||||
#endif // SIMDJSON_EXCEPTIONS
|
|
@ -0,0 +1,92 @@
|
|||
#pragma once
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
|
||||
#include "largerandom.h"
|
||||
|
||||
namespace largerandom {
|
||||
|
||||
using namespace simdjson;
|
||||
using namespace simdjson::builtin;
|
||||
|
||||
class Iter {
|
||||
public:
|
||||
simdjson_really_inline bool Run(const padded_string &json);
|
||||
|
||||
simdjson_really_inline const std::vector<my_point> &Result() { return container; }
|
||||
simdjson_really_inline size_t ItemCount() { return container.size(); }
|
||||
|
||||
private:
|
||||
ondemand::parser parser{};
|
||||
std::vector<my_point> container{};
|
||||
|
||||
simdjson_really_inline double first_double(ondemand::json_iterator &iter) {
|
||||
if (iter.start_object().error() || iter.field_key().error() || iter.field_value()) { throw "Invalid field"; }
|
||||
return iter.consume_double();
|
||||
}
|
||||
|
||||
simdjson_really_inline double next_double(ondemand::json_iterator &iter) {
|
||||
if (!iter.has_next_field() || iter.field_key().error() || iter.field_value()) { throw "Invalid field"; }
|
||||
return iter.consume_double();
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
simdjson_really_inline bool Iter::Run(const padded_string &json) {
|
||||
container.clear();
|
||||
|
||||
auto iter = parser.iterate_raw(json).value();
|
||||
if (iter.start_array()) {
|
||||
do {
|
||||
container.emplace_back(my_point{first_double(iter), next_double(iter), next_double(iter)});
|
||||
if (iter.has_next_field()) { throw "Too many fields"; }
|
||||
} while (iter.has_next_element());
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
BENCHMARK_TEMPLATE(LargeRandom, Iter);
|
||||
|
||||
|
||||
namespace sum {
|
||||
|
||||
class Iter {
|
||||
public:
|
||||
simdjson_really_inline bool Run(const padded_string &json);
|
||||
|
||||
simdjson_really_inline my_point &Result() { return sum; }
|
||||
simdjson_really_inline size_t ItemCount() { return count; }
|
||||
|
||||
private:
|
||||
ondemand::parser parser{};
|
||||
my_point sum{};
|
||||
size_t count{};
|
||||
};
|
||||
|
||||
simdjson_really_inline bool Iter::Run(const padded_string &json) {
|
||||
sum = {0,0,0};
|
||||
count = 0;
|
||||
|
||||
auto iter = parser.iterate_raw(json).value();
|
||||
if (!iter.start_array()) { return false; }
|
||||
do {
|
||||
if (!iter.start_object() || iter.field_key().value() != "x" || iter.field_value()) { return false; }
|
||||
sum.x += iter.consume_double();
|
||||
if (!iter.has_next_field() || iter.field_key().value() != "y" || iter.field_value()) { return false; }
|
||||
sum.y += iter.consume_double();
|
||||
if (!iter.has_next_field() || iter.field_key().value() != "z" || iter.field_value()) { return false; }
|
||||
sum.z += iter.consume_double();
|
||||
if (*iter.advance() != '}') { return false; }
|
||||
count++;
|
||||
} while (iter.has_next_element());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
BENCHMARK_TEMPLATE(LargeRandomSum, Iter);
|
||||
|
||||
} // namespace sum
|
||||
} // namespace largerandom
|
||||
|
||||
#endif // SIMDJSON_EXCEPTIONS
|
|
@ -0,0 +1,80 @@
|
|||
#pragma once
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
|
||||
//
|
||||
// Interface
|
||||
//
|
||||
|
||||
namespace largerandom {
|
||||
template<typename T> static void LargeRandom(benchmark::State &state);
|
||||
namespace sum {
|
||||
template<typename T> static void LargeRandomSum(benchmark::State &state);
|
||||
}
|
||||
|
||||
using namespace simdjson;
|
||||
|
||||
static std::string build_json_array(size_t N) {
|
||||
std::default_random_engine e;
|
||||
std::uniform_real_distribution<> dis(0, 1);
|
||||
std::stringstream myss;
|
||||
myss << "[" << std::endl;
|
||||
if(N > 0) {
|
||||
myss << "{ \"x\":" << dis(e) << ", \"y\":" << dis(e) << ", \"z\":" << dis(e) << "}" << std::endl;
|
||||
}
|
||||
for(size_t i = 1; i < N; i++) {
|
||||
myss << "," << std::endl;
|
||||
myss << "{ \"x\":" << dis(e) << ", \"y\":" << dis(e) << ", \"z\":" << dis(e) << "}";
|
||||
}
|
||||
myss << std::endl;
|
||||
myss << "]" << std::endl;
|
||||
std::string answer = myss.str();
|
||||
std::cout << "Creating a source file spanning " << (answer.size() + 512) / 1024 << " KB " << std::endl;
|
||||
return answer;
|
||||
}
|
||||
|
||||
static const padded_string &get_built_json_array() {
|
||||
static padded_string json = build_json_array(1000000);
|
||||
return json;
|
||||
}
|
||||
|
||||
struct my_point {
|
||||
double x;
|
||||
double y;
|
||||
double z;
|
||||
simdjson_really_inline bool operator==(const my_point &other) const {
|
||||
return x == other.x && y == other.y && z == other.z;
|
||||
}
|
||||
simdjson_really_inline bool operator!=(const my_point &other) const { return !(*this == other); }
|
||||
};
|
||||
|
||||
SIMDJSON_UNUSED static std::ostream &operator<<(std::ostream &o, const my_point &p) {
|
||||
return o << p.x << "," << p.y << "," << p.z << std::endl;
|
||||
}
|
||||
|
||||
} // namespace largerandom
|
||||
|
||||
//
|
||||
// Implementation
|
||||
//
|
||||
#include <vector>
|
||||
#include "event_counter.h"
|
||||
#include "dom.h"
|
||||
#include "json_benchmark.h"
|
||||
|
||||
namespace largerandom {
|
||||
|
||||
template<typename T> static void LargeRandom(benchmark::State &state) {
|
||||
JsonBenchmark<T, Dom>(state, get_built_json_array());
|
||||
}
|
||||
|
||||
namespace sum {
|
||||
|
||||
template<typename T> static void LargeRandomSum(benchmark::State &state) {
|
||||
JsonBenchmark<T, Dom>(state, get_built_json_array());
|
||||
}
|
||||
|
||||
}
|
||||
} // namespace largerandom
|
||||
|
||||
#endif // SIMDJSON_EXCEPTIONS
|
|
@ -0,0 +1,73 @@
|
|||
#pragma once
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
|
||||
#include "largerandom.h"
|
||||
|
||||
namespace largerandom {
|
||||
|
||||
using namespace simdjson;
|
||||
using namespace simdjson::builtin;
|
||||
|
||||
class OnDemand {
|
||||
public:
|
||||
simdjson_really_inline bool Run(const padded_string &json);
|
||||
|
||||
simdjson_really_inline const std::vector<my_point> &Result() { return container; }
|
||||
simdjson_really_inline size_t ItemCount() { return container.size(); }
|
||||
|
||||
private:
|
||||
ondemand::parser parser{};
|
||||
std::vector<my_point> container{};
|
||||
};
|
||||
|
||||
simdjson_really_inline bool OnDemand::Run(const padded_string &json) {
|
||||
container.clear();
|
||||
|
||||
auto doc = parser.iterate(json);
|
||||
for (ondemand::object coord : doc) {
|
||||
container.emplace_back(my_point{coord["x"], coord["y"], coord["z"]});
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
BENCHMARK_TEMPLATE(LargeRandom, OnDemand);
|
||||
|
||||
|
||||
namespace sum {
|
||||
|
||||
class OnDemand {
|
||||
public:
|
||||
simdjson_really_inline bool Run(const padded_string &json);
|
||||
|
||||
simdjson_really_inline my_point &Result() { return sum; }
|
||||
simdjson_really_inline size_t ItemCount() { return count; }
|
||||
|
||||
private:
|
||||
ondemand::parser parser{};
|
||||
my_point sum{};
|
||||
size_t count{};
|
||||
};
|
||||
|
||||
simdjson_really_inline bool OnDemand::Run(const padded_string &json) {
|
||||
sum = {0,0,0};
|
||||
count = 0;
|
||||
|
||||
auto doc = parser.iterate(json);
|
||||
for (ondemand::object coord : doc.get_array()) {
|
||||
sum.x += double(coord["x"]);
|
||||
sum.y += double(coord["y"]);
|
||||
sum.z += double(coord["z"]);
|
||||
count++;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
BENCHMARK_TEMPLATE(LargeRandomSum, OnDemand);
|
||||
|
||||
} // namespace sum
|
||||
} // namespace largerandom
|
||||
|
||||
#endif // SIMDJSON_EXCEPTIONS
|
|
@ -0,0 +1,124 @@
|
|||
#pragma once
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
|
||||
#include "largerandom.h"
|
||||
|
||||
namespace largerandom {
|
||||
|
||||
using namespace simdjson;
|
||||
using namespace simdjson::builtin;
|
||||
using namespace simdjson::builtin::stage2;
|
||||
|
||||
class Sax {
|
||||
public:
|
||||
simdjson_really_inline bool Run(const padded_string &json) noexcept;
|
||||
|
||||
simdjson_really_inline const std::vector<my_point> &Result() { return container; }
|
||||
simdjson_really_inline size_t ItemCount() { return container.size(); }
|
||||
|
||||
private:
|
||||
simdjson_really_inline error_code RunNoExcept(const padded_string &json) noexcept;
|
||||
error_code Allocate(size_t new_capacity);
|
||||
std::unique_ptr<uint8_t[]> string_buf{};
|
||||
size_t capacity{};
|
||||
dom_parser_implementation dom_parser{};
|
||||
std::vector<my_point> container{};
|
||||
};
|
||||
|
||||
using namespace simdjson;
|
||||
using namespace simdjson::builtin;
|
||||
using namespace simdjson::builtin::stage2;
|
||||
struct sax_point_reader_visitor {
|
||||
public:
|
||||
std::vector<my_point> &points;
|
||||
enum {GOT_X=0, GOT_Y=1, GOT_Z=2, GOT_SOMETHING_ELSE=4};
|
||||
size_t idx{GOT_SOMETHING_ELSE};
|
||||
double buffer[3];
|
||||
|
||||
sax_point_reader_visitor(std::vector<my_point> &_points) : points(_points) {}
|
||||
|
||||
simdjson_really_inline error_code visit_object_start(json_iterator &) {
|
||||
idx = 0;
|
||||
return SUCCESS;
|
||||
}
|
||||
simdjson_really_inline error_code visit_primitive(json_iterator &, const uint8_t *value) {
|
||||
if(idx == GOT_SOMETHING_ELSE) { return simdjson::SUCCESS; }
|
||||
return numberparsing::parse_double(value).get(buffer[idx]);
|
||||
}
|
||||
simdjson_really_inline error_code visit_object_end(json_iterator &) {
|
||||
points.emplace_back(my_point{buffer[0], buffer[1], buffer[2]});
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
simdjson_really_inline error_code visit_document_start(json_iterator &) { return SUCCESS; }
|
||||
simdjson_really_inline error_code visit_key(json_iterator &, const uint8_t * key) {
|
||||
switch(key[1]) {
|
||||
// Technically, we should check the other characters
|
||||
// in the key, but we are cheating to go as fast
|
||||
// as possible.
|
||||
case 'x':
|
||||
idx = GOT_X;
|
||||
break;
|
||||
case 'y':
|
||||
idx = GOT_Y;
|
||||
break;
|
||||
case 'z':
|
||||
idx = GOT_Z;
|
||||
break;
|
||||
default:
|
||||
idx = GOT_SOMETHING_ELSE;
|
||||
}
|
||||
return SUCCESS;
|
||||
}
|
||||
simdjson_really_inline error_code visit_array_start(json_iterator &) { return SUCCESS; }
|
||||
simdjson_really_inline error_code visit_array_end(json_iterator &) { return SUCCESS; }
|
||||
simdjson_really_inline error_code visit_document_end(json_iterator &) { return SUCCESS; }
|
||||
simdjson_really_inline error_code visit_empty_array(json_iterator &) { return SUCCESS; }
|
||||
simdjson_really_inline error_code visit_empty_object(json_iterator &) { return SUCCESS; }
|
||||
simdjson_really_inline error_code visit_root_primitive(json_iterator &, const uint8_t *) { return SUCCESS; }
|
||||
simdjson_really_inline error_code increment_count(json_iterator &) { return SUCCESS; }
|
||||
};
|
||||
|
||||
// NOTE: this assumes the dom_parser is already allocated
|
||||
bool Sax::Run(const padded_string &json) noexcept {
|
||||
auto error = RunNoExcept(json);
|
||||
if (error) { std::cerr << error << std::endl; return false; }
|
||||
return true;
|
||||
}
|
||||
|
||||
error_code Sax::RunNoExcept(const padded_string &json) noexcept {
|
||||
container.clear();
|
||||
|
||||
// Allocate capacity if needed
|
||||
if (capacity < json.size()) {
|
||||
SIMDJSON_TRY( Allocate(json.size()) );
|
||||
}
|
||||
|
||||
// Run stage 1 first.
|
||||
SIMDJSON_TRY( dom_parser.stage1((uint8_t *)json.data(), json.size(), false) );
|
||||
|
||||
// Then walk the document, parsing the tweets as we go
|
||||
json_iterator iter(dom_parser, 0);
|
||||
sax_point_reader_visitor visitor(container);
|
||||
SIMDJSON_TRY( iter.walk_document<false>(visitor) );
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
error_code Sax::Allocate(size_t new_capacity) {
|
||||
// string_capacity copied from document::allocate
|
||||
size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + 32, 64);
|
||||
string_buf.reset(new (std::nothrow) uint8_t[string_capacity]);
|
||||
if (auto error = dom_parser.set_capacity(new_capacity)) { return error; }
|
||||
if (capacity == 0) { // set max depth the first time only
|
||||
if (auto error = dom_parser.set_max_depth(DEFAULT_MAX_DEPTH)) { return error; }
|
||||
}
|
||||
capacity = new_capacity;
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
BENCHMARK_TEMPLATE(LargeRandom, Sax);
|
||||
|
||||
} // namespace largerandom
|
||||
|
||||
#endif // SIMDJSON_EXCEPTIONS
|
|
@ -29,9 +29,10 @@ template <int TYPE = PERF_TYPE_HARDWARE> class LinuxEvents {
|
|||
size_t num_events{};
|
||||
std::vector<uint64_t> temp_result_vec{};
|
||||
std::vector<uint64_t> ids{};
|
||||
bool quiet;
|
||||
|
||||
public:
|
||||
explicit LinuxEvents(std::vector<int> config_vec) : fd(0), working(true) {
|
||||
explicit LinuxEvents(std::vector<int> config_vec, bool _quiet=false) : fd(0), working(true), quiet{_quiet} {
|
||||
std::memset(&attribs, 0, sizeof(attribs));
|
||||
attribs.type = TYPE;
|
||||
attribs.size = sizeof(attribs);
|
||||
|
@ -101,8 +102,11 @@ public:
|
|||
|
||||
private:
|
||||
void report_error(const std::string &context) {
|
||||
if (working)
|
||||
std::cerr << (context + ": " + std::string(strerror(errno))) << std::endl;
|
||||
if (!quiet) {
|
||||
if (working) {
|
||||
std::cerr << (context + ": " + std::string(strerror(errno))) << std::endl;
|
||||
}
|
||||
}
|
||||
working = false;
|
||||
}
|
||||
};
|
||||
|
|
|
@ -48,7 +48,9 @@ std::string rapid_stringme(char *json) {
|
|||
std::string simdjson_stringme(simdjson::padded_string & json) {
|
||||
std::stringstream ss;
|
||||
dom::parser parser;
|
||||
dom::element doc = parser.parse(json);
|
||||
dom::element doc;
|
||||
auto error = parser.parse(json).get(doc);
|
||||
if (error) { std::cerr << error << std::endl; abort(); }
|
||||
ss << simdjson::minify(doc);
|
||||
return ss.str();
|
||||
}
|
||||
|
|
|
@ -131,24 +131,34 @@ struct Stat {
|
|||
size_t stringLength; // Number of code units in all strings
|
||||
};
|
||||
|
||||
static void GenStatPlus(Stat &stat, const dom::element v) {
|
||||
static error_code GenStatPlus(Stat &stat, const dom::element &v);
|
||||
static error_code GenStatPlus(Stat &stat, const simdjson_result<dom::element> &r) {
|
||||
dom::element v;
|
||||
SIMDJSON_TRY( r.get(v) );
|
||||
return GenStatPlus(stat, v);
|
||||
}
|
||||
static error_code GenStatPlus(Stat &stat, const dom::element &v) {
|
||||
switch (v.type()) {
|
||||
case dom::element_type::ARRAY:
|
||||
for (dom::element child : dom::array(v)) {
|
||||
case dom::element_type::ARRAY: {
|
||||
dom::array a;
|
||||
SIMDJSON_TRY( v.get(a) )
|
||||
for (auto child : a) {
|
||||
GenStatPlus(stat, child);
|
||||
stat.elementCount++;
|
||||
}
|
||||
stat.arrayCount++;
|
||||
break;
|
||||
case dom::element_type::OBJECT:
|
||||
for (dom::key_value_pair kv : dom::object(v)) {
|
||||
} break;
|
||||
case dom::element_type::OBJECT: {
|
||||
dom::object o;
|
||||
SIMDJSON_TRY( v.get(o) );
|
||||
for (dom::key_value_pair kv : o) {
|
||||
GenStatPlus(stat, kv.value);
|
||||
stat.stringLength += kv.key.size();
|
||||
stat.memberCount++;
|
||||
stat.stringCount++;
|
||||
}
|
||||
stat.objectCount++;
|
||||
break;
|
||||
} break;
|
||||
case dom::element_type::INT64:
|
||||
case dom::element_type::UINT64:
|
||||
case dom::element_type::DOUBLE:
|
||||
|
@ -156,20 +166,24 @@ static void GenStatPlus(Stat &stat, const dom::element v) {
|
|||
break;
|
||||
case dom::element_type::STRING: {
|
||||
stat.stringCount++;
|
||||
auto sv = std::string_view(v);
|
||||
std::string_view sv;
|
||||
SIMDJSON_TRY( v.get(sv) );
|
||||
stat.stringLength += sv.size();
|
||||
} break;
|
||||
case dom::element_type::BOOL:
|
||||
if (bool(v)) {
|
||||
case dom::element_type::BOOL: {
|
||||
bool b;
|
||||
SIMDJSON_TRY( v.get(b) );
|
||||
if (b) {
|
||||
stat.trueCount++;
|
||||
} else {
|
||||
stat.falseCount++;
|
||||
}
|
||||
break;
|
||||
} break;
|
||||
case dom::element_type::NULL_VALUE:
|
||||
++stat.nullCount;
|
||||
break;
|
||||
}
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
static void RapidGenStat(Stat &stat, const rapidjson::Value &v) {
|
||||
|
@ -221,7 +235,8 @@ simdjson_never_inline Stat rapidjson_compute_stats_ref(const rapidjson::Value &d
|
|||
simdjson_never_inline Stat
|
||||
simdjson_compute_stats_refplus(const simdjson::dom::element &doc) {
|
||||
Stat s{};
|
||||
GenStatPlus(s, doc);
|
||||
auto error = GenStatPlus(s, doc);
|
||||
if (error) { std::cerr << error << std::endl; abort(); }
|
||||
return s;
|
||||
}
|
||||
|
||||
|
@ -469,9 +484,7 @@ int main(int argc, char *argv[]) {
|
|||
simdjson::dom::parser parser;
|
||||
simdjson::dom::element doc;
|
||||
auto error = parser.parse(p).get(doc);
|
||||
if (error) {
|
||||
std::cerr << error << std::endl;
|
||||
}
|
||||
if (error) { std::cerr << error << std::endl; abort(); }
|
||||
size_t refval = simdjson_compute_stats_refplus(doc).objectCount;
|
||||
|
||||
BEST_TIME("simdjson ",
|
||||
|
|
|
@ -0,0 +1,51 @@
|
|||
#pragma once
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
|
||||
#include "partial_tweets.h"
|
||||
|
||||
namespace partial_tweets {
|
||||
|
||||
using namespace simdjson;
|
||||
|
||||
class Dom {
|
||||
public:
|
||||
simdjson_really_inline bool Run(const padded_string &json);
|
||||
|
||||
simdjson_really_inline const std::vector<tweet> &Result() { return tweets; }
|
||||
simdjson_really_inline size_t ItemCount() { return tweets.size(); }
|
||||
|
||||
private:
|
||||
dom::parser parser{};
|
||||
std::vector<tweet> tweets{};
|
||||
|
||||
simdjson_really_inline uint64_t nullable_int(dom::element element) {
|
||||
if (element.is_null()) { return 0; }
|
||||
return element;
|
||||
}
|
||||
};
|
||||
|
||||
simdjson_really_inline bool Dom::Run(const padded_string &json) {
|
||||
tweets.clear();
|
||||
|
||||
for (dom::element tweet : parser.parse(json)["statuses"]) {
|
||||
auto user = tweet["user"];
|
||||
tweets.emplace_back(partial_tweets::tweet{
|
||||
tweet["created_at"],
|
||||
tweet["id"],
|
||||
tweet["text"],
|
||||
nullable_int(tweet["in_reply_to_status_id"]),
|
||||
{ user["id"], user["screen_name"] },
|
||||
tweet["retweet_count"],
|
||||
tweet["favorite_count"]
|
||||
});
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
BENCHMARK_TEMPLATE(PartialTweets, Dom);
|
||||
|
||||
} // namespace partial_tweets
|
||||
|
||||
#endif // SIMDJSON_EXCEPTIONS
|
|
@ -0,0 +1,64 @@
|
|||
#pragma once
|
||||
|
||||
#include "partial_tweets.h"
|
||||
|
||||
namespace partial_tweets {
|
||||
|
||||
using namespace simdjson;
|
||||
|
||||
class DomNoExcept {
|
||||
public:
|
||||
simdjson_really_inline bool Run(const simdjson::padded_string &json) noexcept;
|
||||
|
||||
simdjson_really_inline const std::vector<tweet> &Result() { return tweets; }
|
||||
simdjson_really_inline size_t ItemCount() { return tweets.size(); }
|
||||
|
||||
private:
|
||||
dom::parser parser{};
|
||||
std::vector<tweet> tweets{};
|
||||
|
||||
simdjson_really_inline simdjson_result<uint64_t> nullable_int(simdjson_result<dom::element> result) noexcept {
|
||||
dom::element element;
|
||||
SIMDJSON_TRY( result.get(element) );
|
||||
if (element.is_null()) { return 0; }
|
||||
return element.get_uint64();
|
||||
}
|
||||
|
||||
simdjson_really_inline error_code RunNoExcept(const simdjson::padded_string &json) noexcept;
|
||||
};
|
||||
|
||||
simdjson_really_inline bool DomNoExcept::Run(const simdjson::padded_string &json) noexcept {
|
||||
auto error = RunNoExcept(json);
|
||||
if (error) { std::cerr << error << std::endl; return false; }
|
||||
return true;
|
||||
}
|
||||
|
||||
simdjson_really_inline error_code DomNoExcept::RunNoExcept(const simdjson::padded_string &json) noexcept {
|
||||
tweets.clear();
|
||||
|
||||
dom::array tweet_array;
|
||||
SIMDJSON_TRY( parser.parse(json)["statuses"].get_array().get(tweet_array) );
|
||||
|
||||
for (auto tweet_element : tweet_array) {
|
||||
dom::object tweet;
|
||||
SIMDJSON_TRY( tweet_element.get_object().get(tweet) );
|
||||
|
||||
dom::object user;
|
||||
SIMDJSON_TRY( tweet["user"].get_object().get(user) );
|
||||
|
||||
partial_tweets::tweet t;
|
||||
SIMDJSON_TRY( tweet["created_at"] .get_string().get(t.created_at) );
|
||||
SIMDJSON_TRY( tweet["id"] .get_uint64().get(t.id) );
|
||||
SIMDJSON_TRY( tweet["text"] .get_string().get(t.text) );
|
||||
SIMDJSON_TRY( nullable_int(tweet["in_reply_to_status_id"]).get(t.in_reply_to_status_id) );
|
||||
SIMDJSON_TRY( user["id"] .get_uint64().get(t.user.id) );
|
||||
SIMDJSON_TRY( user["screen_name"] .get_string().get(t.user.screen_name) );
|
||||
SIMDJSON_TRY( tweet["retweet_count"] .get_uint64().get(t.retweet_count) );
|
||||
SIMDJSON_TRY( tweet["favorite_count"].get_uint64().get(t.favorite_count) );
|
||||
|
||||
tweets.push_back(t);
|
||||
}
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
} // namespace partial_tweets
|
|
@ -0,0 +1,93 @@
|
|||
#pragma once
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
|
||||
#include "partial_tweets.h"
|
||||
|
||||
namespace partial_tweets {
|
||||
|
||||
using namespace simdjson;
|
||||
using namespace simdjson::builtin;
|
||||
|
||||
class Iter {
|
||||
public:
|
||||
simdjson_really_inline bool Run(const padded_string &json);
|
||||
|
||||
simdjson_really_inline const std::vector<tweet> &Result() { return tweets; }
|
||||
simdjson_really_inline size_t ItemCount() { return tweets.size(); }
|
||||
|
||||
private:
|
||||
ondemand::parser parser{};
|
||||
std::vector<tweet> tweets{};
|
||||
|
||||
simdjson_really_inline uint64_t nullable_int(ondemand::value && value) {
|
||||
if (value.is_null()) { return 0; }
|
||||
return std::move(value);
|
||||
}
|
||||
|
||||
simdjson_really_inline twitter_user read_user(ondemand::object && user) {
|
||||
// Move user into a local object so it gets destroyed (and moves the iterator)
|
||||
ondemand::object u = std::move(user);
|
||||
return { u["id"], u["screen_name"] };
|
||||
}
|
||||
};
|
||||
|
||||
simdjson_really_inline bool Iter::Run(const padded_string &json) {
|
||||
tweets.clear();
|
||||
|
||||
// Walk the document, parsing the tweets as we go
|
||||
|
||||
// { "statuses":
|
||||
auto iter = parser.iterate_raw(json).value();
|
||||
if (!iter.start_object() || !iter.find_field_raw("statuses")) { return false; }
|
||||
// { "statuses": [
|
||||
if (!iter.start_array()) { return false; }
|
||||
|
||||
do {
|
||||
tweet tweet;
|
||||
|
||||
if (!iter.start_object() || !iter.find_field_raw("created_at")) { return false; }
|
||||
tweet.created_at = iter.consume_string();
|
||||
|
||||
if (!iter.has_next_field() || !iter.find_field_raw("id")) { return false; }
|
||||
tweet.id = iter.consume_uint64();
|
||||
|
||||
if (!iter.has_next_field() || !iter.find_field_raw("text")) { return false; }
|
||||
tweet.text = iter.consume_string();
|
||||
|
||||
if (!iter.has_next_field() || !iter.find_field_raw("in_reply_to_status_id")) { return false; }
|
||||
if (!iter.is_null()) {
|
||||
tweet.in_reply_to_status_id = iter.consume_uint64();
|
||||
}
|
||||
|
||||
if (!iter.has_next_field() || !iter.find_field_raw("user")) { return false; }
|
||||
{
|
||||
if (!iter.start_object() || !iter.find_field_raw("id")) { return false; }
|
||||
tweet.user.id = iter.consume_uint64();
|
||||
|
||||
if (!iter.has_next_field() || !iter.find_field_raw("screen_name")) { return false; }
|
||||
tweet.user.screen_name = iter.consume_string();
|
||||
|
||||
if (iter.skip_container()) { return false; } // Skip the rest of the user object
|
||||
}
|
||||
|
||||
if (!iter.has_next_field() || !iter.find_field_raw("retweet_count")) { return false; }
|
||||
tweet.retweet_count = iter.consume_uint64();
|
||||
|
||||
if (!iter.has_next_field() || !iter.find_field_raw("favorite_count")) { return false; }
|
||||
tweet.favorite_count = iter.consume_uint64();
|
||||
|
||||
tweets.push_back(tweet);
|
||||
|
||||
if (iter.skip_container()) { return false; } // Skip the rest of the tweet object
|
||||
|
||||
} while (iter.has_next_element());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
BENCHMARK_TEMPLATE(PartialTweets, Iter);
|
||||
|
||||
} // namespace partial_tweets
|
||||
|
||||
#endif // SIMDJSON_EXCEPTIONS
|
|
@ -0,0 +1,58 @@
|
|||
#pragma once
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
|
||||
#include "partial_tweets.h"
|
||||
|
||||
namespace partial_tweets {
|
||||
|
||||
using namespace simdjson;
|
||||
using namespace simdjson::builtin;
|
||||
|
||||
class OnDemand {
|
||||
public:
|
||||
simdjson_really_inline bool Run(const padded_string &json);
|
||||
|
||||
simdjson_really_inline const std::vector<tweet> &Result() { return tweets; }
|
||||
simdjson_really_inline size_t ItemCount() { return tweets.size(); }
|
||||
|
||||
private:
|
||||
ondemand::parser parser{};
|
||||
std::vector<tweet> tweets{};
|
||||
|
||||
simdjson_really_inline uint64_t nullable_int(ondemand::value && value) {
|
||||
if (value.is_null()) { return 0; }
|
||||
return std::move(value);
|
||||
}
|
||||
|
||||
simdjson_really_inline twitter_user read_user(ondemand::object && user) {
|
||||
// Move user into a local object so it gets destroyed (and moves the iterator)
|
||||
ondemand::object u = std::move(user);
|
||||
return { u["id"], u["screen_name"] };
|
||||
}
|
||||
};
|
||||
|
||||
simdjson_really_inline bool OnDemand::Run(const padded_string &json) {
|
||||
tweets.clear();
|
||||
|
||||
// Walk the document, parsing the tweets as we go
|
||||
auto doc = parser.iterate(json);
|
||||
for (ondemand::object tweet : doc["statuses"]) {
|
||||
tweets.emplace_back(partial_tweets::tweet{
|
||||
tweet["created_at"],
|
||||
tweet["id"],
|
||||
tweet["text"],
|
||||
nullable_int(tweet["in_reply_to_status_id"]),
|
||||
read_user(tweet["user"]),
|
||||
tweet["retweet_count"],
|
||||
tweet["favorite_count"]
|
||||
});
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
BENCHMARK_TEMPLATE(PartialTweets, OnDemand);
|
||||
|
||||
} // namespace partial_tweets
|
||||
|
||||
#endif // SIMDJSON_EXCEPTIONS
|
|
@ -0,0 +1,41 @@
|
|||
#pragma once
|
||||
|
||||
//
|
||||
// Interface
|
||||
//
|
||||
|
||||
namespace partial_tweets {
|
||||
template<typename T> static void PartialTweets(benchmark::State &state);
|
||||
} // namespace partial_tweets
|
||||
|
||||
//
|
||||
// Implementation
|
||||
//
|
||||
|
||||
#include "tweet.h"
|
||||
#include <vector>
|
||||
#include "event_counter.h"
|
||||
#include "domnoexcept.h"
|
||||
#include "json_benchmark.h"
|
||||
|
||||
namespace partial_tweets {
|
||||
|
||||
using namespace simdjson;
|
||||
|
||||
template<typename T> static void PartialTweets(benchmark::State &state) {
|
||||
//
|
||||
// Load the JSON file
|
||||
//
|
||||
constexpr const char *TWITTER_JSON = SIMDJSON_BENCHMARK_DATA_DIR "twitter.json";
|
||||
error_code error;
|
||||
padded_string json;
|
||||
if ((error = padded_string::load(TWITTER_JSON).get(json))) {
|
||||
std::cerr << error << std::endl;
|
||||
state.SkipWithError("error loading");
|
||||
return;
|
||||
}
|
||||
|
||||
JsonBenchmark<T, DomNoExcept>(state, json);
|
||||
}
|
||||
|
||||
} // namespace partial_tweets
|
|
@ -0,0 +1,69 @@
|
|||
#pragma once
|
||||
|
||||
|
||||
#include "partial_tweets.h"
|
||||
#include "sax_tweet_reader_visitor.h"
|
||||
|
||||
namespace partial_tweets {
|
||||
|
||||
using namespace simdjson;
|
||||
using namespace simdjson::builtin;
|
||||
using namespace simdjson::builtin::stage2;
|
||||
|
||||
class Sax {
|
||||
public:
|
||||
simdjson_really_inline bool Run(const padded_string &json) noexcept;
|
||||
|
||||
simdjson_really_inline const std::vector<tweet> &Result() { return tweets; }
|
||||
simdjson_really_inline size_t ItemCount() { return tweets.size(); }
|
||||
|
||||
private:
|
||||
simdjson_really_inline error_code RunNoExcept(const padded_string &json) noexcept;
|
||||
error_code Allocate(size_t new_capacity);
|
||||
std::unique_ptr<uint8_t[]> string_buf{};
|
||||
size_t capacity{};
|
||||
dom_parser_implementation dom_parser{};
|
||||
std::vector<tweet> tweets{};
|
||||
};
|
||||
|
||||
// NOTE: this assumes the dom_parser is already allocated
|
||||
bool Sax::Run(const padded_string &json) noexcept {
|
||||
auto error = RunNoExcept(json);
|
||||
if (error) { std::cerr << error << std::endl; return false; }
|
||||
return true;
|
||||
}
|
||||
|
||||
error_code Sax::RunNoExcept(const padded_string &json) noexcept {
|
||||
tweets.clear();
|
||||
|
||||
// Allocate capacity if needed
|
||||
if (capacity < json.size()) {
|
||||
SIMDJSON_TRY( Allocate(json.size()) );
|
||||
}
|
||||
|
||||
// Run stage 1 first.
|
||||
SIMDJSON_TRY( dom_parser.stage1((uint8_t *)json.data(), json.size(), false) );
|
||||
|
||||
// Then walk the document, parsing the tweets as we go
|
||||
json_iterator iter(dom_parser, 0);
|
||||
sax_tweet_reader_visitor visitor(tweets, string_buf.get());
|
||||
SIMDJSON_TRY( iter.walk_document<false>(visitor) );
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
error_code Sax::Allocate(size_t new_capacity) {
|
||||
// string_capacity copied from document::allocate
|
||||
size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + 32, 64);
|
||||
string_buf.reset(new (std::nothrow) uint8_t[string_capacity]);
|
||||
if (auto error = dom_parser.set_capacity(new_capacity)) { return error; }
|
||||
if (capacity == 0) { // set max depth the first time only
|
||||
if (auto error = dom_parser.set_max_depth(DEFAULT_MAX_DEPTH)) { return error; }
|
||||
}
|
||||
capacity = new_capacity;
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
BENCHMARK_TEMPLATE(PartialTweets, Sax);
|
||||
|
||||
} // namespace partial_tweets
|
||||
|
|
@ -1,21 +1,18 @@
|
|||
#ifndef TWITTER_SAX_TWEET_READER_VISITOR_H
|
||||
#define TWITTER_SAX_TWEET_READER_VISITOR_H
|
||||
#pragma once
|
||||
|
||||
#include "simdjson.h"
|
||||
#include "tweet.h"
|
||||
#include <vector>
|
||||
|
||||
SIMDJSON_TARGET_HASWELL
|
||||
|
||||
namespace twitter {
|
||||
namespace partial_tweets {
|
||||
|
||||
using namespace simdjson;
|
||||
using namespace haswell;
|
||||
using namespace haswell::stage2;
|
||||
using namespace simdjson::builtin;
|
||||
using namespace simdjson::builtin::stage2;
|
||||
|
||||
struct sax_tweet_reader_visitor {
|
||||
public:
|
||||
sax_tweet_reader_visitor(std::vector<tweet> &_tweets, uint8_t *string_buf);
|
||||
simdjson_really_inline sax_tweet_reader_visitor(std::vector<tweet> &tweets, uint8_t *string_buf);
|
||||
|
||||
simdjson_really_inline error_code visit_document_start(json_iterator &iter);
|
||||
simdjson_really_inline error_code visit_object_start(json_iterator &iter);
|
||||
|
@ -68,8 +65,8 @@ private:
|
|||
field_type type{field_type::any};
|
||||
};
|
||||
|
||||
containers container{containers::document};
|
||||
std::vector<tweet> &tweets;
|
||||
containers container{containers::document};
|
||||
uint8_t *current_string_buf_loc;
|
||||
const uint8_t *current_key{};
|
||||
|
||||
|
@ -94,9 +91,9 @@ private:
|
|||
static field_lookup fields;
|
||||
}; // sax_tweet_reader_visitor
|
||||
|
||||
sax_tweet_reader_visitor::sax_tweet_reader_visitor(std::vector<tweet> &_tweets, uint8_t *string_buf)
|
||||
simdjson_really_inline sax_tweet_reader_visitor::sax_tweet_reader_visitor(std::vector<tweet> &_tweets, uint8_t *_string_buf)
|
||||
: tweets{_tweets},
|
||||
current_string_buf_loc{string_buf} {
|
||||
current_string_buf_loc{_string_buf} {
|
||||
}
|
||||
|
||||
simdjson_really_inline error_code sax_tweet_reader_visitor::visit_document_start(json_iterator &iter) {
|
||||
|
@ -112,6 +109,7 @@ simdjson_really_inline error_code sax_tweet_reader_visitor::visit_array_start(js
|
|||
switch (fields.get(current_key, container).type) {
|
||||
case field_type::array: // { "statuses": [
|
||||
start_container(iter);
|
||||
current_key = nullptr;
|
||||
return SUCCESS;
|
||||
case field_type::any:
|
||||
return SUCCESS;
|
||||
|
@ -190,6 +188,7 @@ simdjson_really_inline error_code sax_tweet_reader_visitor::visit_primitive(json
|
|||
iter.log_error("unexpected primitive");
|
||||
return INCORRECT_TYPE;
|
||||
}
|
||||
current_key = nullptr;
|
||||
}
|
||||
|
||||
// If it's not a field, it's a child of an array.
|
||||
|
@ -202,16 +201,17 @@ simdjson_really_inline error_code sax_tweet_reader_visitor::visit_array_end(json
|
|||
return SUCCESS;
|
||||
}
|
||||
simdjson_really_inline error_code sax_tweet_reader_visitor::visit_object_end(json_iterator &iter) {
|
||||
current_key = nullptr;
|
||||
if (in_container(iter)) { end_container(iter); }
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
simdjson_really_inline error_code sax_tweet_reader_visitor::visit_document_end(json_iterator &iter) {
|
||||
iter.log_end_value("document");
|
||||
simdjson_really_inline error_code sax_tweet_reader_visitor::visit_document_end(json_iterator &) {
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
simdjson_really_inline error_code sax_tweet_reader_visitor::visit_empty_array(json_iterator &) {
|
||||
current_key = nullptr;
|
||||
return SUCCESS;
|
||||
}
|
||||
simdjson_really_inline error_code sax_tweet_reader_visitor::visit_empty_object(json_iterator &) {
|
||||
|
@ -233,15 +233,15 @@ simdjson_really_inline bool sax_tweet_reader_visitor::in_container_child(json_it
|
|||
simdjson_really_inline void sax_tweet_reader_visitor::start_container(json_iterator &iter) {
|
||||
SIMDJSON_ASSUME(iter.depth <= MAX_SUPPORTED_DEPTH); // Asserts in debug mode
|
||||
container = containers(iter.depth);
|
||||
if (logger::LOG_ENABLED) { iter.log_start_value(STATE_NAMES[iter.depth]); }
|
||||
if (logger::LOG_ENABLED) { iter.log_value(STATE_NAMES[iter.depth]); }
|
||||
if (container == containers::tweet) { tweets.push_back({}); }
|
||||
}
|
||||
simdjson_really_inline void sax_tweet_reader_visitor::end_container(json_iterator &iter) {
|
||||
if (logger::LOG_ENABLED) { iter.log_end_value(STATE_NAMES[int(container)]); }
|
||||
simdjson_really_inline void sax_tweet_reader_visitor::end_container(json_iterator &) {
|
||||
container = containers(int(container) - 1);
|
||||
}
|
||||
simdjson_really_inline error_code sax_tweet_reader_visitor::parse_nullable_unsigned(json_iterator &iter, const uint8_t *value, const field &f) {
|
||||
iter.log_value(f.key);
|
||||
auto i = reinterpret_cast<uint64_t *>(reinterpret_cast<char *>(&tweets.back() + f.offset));
|
||||
auto i = reinterpret_cast<uint64_t *>(reinterpret_cast<char *>(&tweets.back()) + f.offset);
|
||||
if (auto error = numberparsing::parse_unsigned(value).get(*i)) {
|
||||
// If number parsing failed, check if it's null before returning the error
|
||||
if (!atomparsing::is_valid_null_atom(value)) { iter.log_error("expected number or null"); return error; }
|
||||
|
@ -251,12 +251,12 @@ simdjson_really_inline error_code sax_tweet_reader_visitor::parse_nullable_unsig
|
|||
}
|
||||
simdjson_really_inline error_code sax_tweet_reader_visitor::parse_unsigned(json_iterator &iter, const uint8_t *value, const field &f) {
|
||||
iter.log_value(f.key);
|
||||
auto i = reinterpret_cast<uint64_t *>(reinterpret_cast<char *>(&tweets.back() + f.offset));
|
||||
auto i = reinterpret_cast<uint64_t *>(reinterpret_cast<char *>(&tweets.back()) + f.offset);
|
||||
return numberparsing::parse_unsigned(value).get(*i);
|
||||
}
|
||||
simdjson_really_inline error_code sax_tweet_reader_visitor::parse_string(json_iterator &iter, const uint8_t *value, const field &f) {
|
||||
iter.log_value(f.key);
|
||||
auto s = reinterpret_cast<std::string_view *>(reinterpret_cast<char *>(&tweets.back() + f.offset));
|
||||
auto s = reinterpret_cast<std::string_view *>(reinterpret_cast<char *>(&tweets.back()) + f.offset);
|
||||
return stringparsing::parse_string_to_buffer(value, current_string_buf_loc, *s);
|
||||
}
|
||||
|
||||
|
@ -291,7 +291,6 @@ simdjson_really_inline void sax_tweet_reader_visitor::field_lookup::neg(const ch
|
|||
auto index = hash(key, depth);
|
||||
if (entries[index].key) {
|
||||
fprintf(stderr, "%s (depth %d) conflicts with %s (depth %d) !\n", key, depth, entries[index].key, int(entries[index].container));
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -503,7 +502,7 @@ sax_tweet_reader_visitor::field_lookup::field_lookup() {
|
|||
// for (int a=0;a<4;a++) {
|
||||
// for (int b=0;b<4;b++) {
|
||||
// for (int c=0;c<4;c++) {
|
||||
// twitter::sax_tweet_reader_visitor::field_lookup fields(a,b,c);
|
||||
// sax_tweet_reader_visitor::field_lookup fields(a,b,c);
|
||||
// if (fields.collision_count) { continue; }
|
||||
// if (fields.zero_emission) { continue; }
|
||||
// if (fields.conflict_count < min_count) { printf("min=%d,%d,%d (%d)", a, b, c, fields.conflict_count); }
|
||||
|
@ -512,8 +511,4 @@ sax_tweet_reader_visitor::field_lookup::field_lookup() {
|
|||
// }
|
||||
// }
|
||||
|
||||
} // namespace twitter
|
||||
|
||||
SIMDJSON_UNTARGET_REGION
|
||||
|
||||
#endif // TWITTER_SAX_TWEET_READER_VISITOR_H
|
||||
} // namespace partial_tweets
|
|
@ -0,0 +1,57 @@
|
|||
#pragma once
|
||||
|
||||
#include "simdjson.h"
|
||||
#include "twitter_user.h"
|
||||
|
||||
namespace partial_tweets {
|
||||
|
||||
// {
|
||||
// "statuses": [
|
||||
// {
|
||||
// "created_at": "Sun Aug 31 00:29:15 +0000 2014",
|
||||
// "id": 505874924095815700,
|
||||
// "text": "@aym0566x \n\n名前:前田あゆみ\n第一印象:なんか怖っ!\n今の印象:とりあえずキモい。噛み合わない\n好きなところ:ぶすでキモいとこ😋✨✨\n思い出:んーーー、ありすぎ😊❤️\nLINE交換できる?:あぁ……ごめん✋\nトプ画をみて:照れますがな😘✨\n一言:お前は一生もんのダチ💖",
|
||||
// "in_reply_to_status_id": null,
|
||||
// "user": {
|
||||
// "id": 1186275104,
|
||||
// "screen_name": "ayuu0123"
|
||||
// },
|
||||
// "retweet_count": 0,
|
||||
// "favorite_count": 0
|
||||
// }
|
||||
// ]
|
||||
// }
|
||||
|
||||
struct tweet {
|
||||
std::string_view created_at{};
|
||||
uint64_t id{};
|
||||
std::string_view text{};
|
||||
uint64_t in_reply_to_status_id{};
|
||||
twitter_user user{};
|
||||
uint64_t retweet_count{};
|
||||
uint64_t favorite_count{};
|
||||
simdjson_really_inline bool operator==(const tweet &other) const {
|
||||
return created_at == other.created_at &&
|
||||
id == other.id &&
|
||||
text == other.text &&
|
||||
in_reply_to_status_id == other.in_reply_to_status_id &&
|
||||
user == other.user &&
|
||||
retweet_count == other.retweet_count &&
|
||||
favorite_count == other.favorite_count;
|
||||
}
|
||||
simdjson_really_inline bool operator!=(const tweet &other) const { return !(*this == other); }
|
||||
};
|
||||
|
||||
SIMDJSON_UNUSED static std::ostream &operator<<(std::ostream &o, const tweet &t) {
|
||||
o << "created_at: " << t.created_at << std::endl;
|
||||
o << "id: " << t.id << std::endl;
|
||||
o << "text: " << t.text << std::endl;
|
||||
o << "in_reply_to_status_id: " << t.in_reply_to_status_id << std::endl;
|
||||
o << "user.id: " << t.user.id << std::endl;
|
||||
o << "user.screen_name: " << t.user.screen_name << std::endl;
|
||||
o << "retweet_count: " << t.retweet_count << std::endl;
|
||||
o << "favorite_count: " << t.favorite_count << std::endl;
|
||||
return o;
|
||||
}
|
||||
|
||||
} // namespace partial_tweets
|
|
@ -0,0 +1,16 @@
|
|||
#pragma once
|
||||
#include "simdjson.h"
|
||||
|
||||
namespace partial_tweets {
|
||||
|
||||
struct twitter_user {
|
||||
uint64_t id{};
|
||||
std::string_view screen_name{};
|
||||
|
||||
bool operator==(const twitter_user &other) const {
|
||||
return id == other.id &&
|
||||
screen_name == other.screen_name;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace partial_tweets
|
|
@ -1,67 +0,0 @@
|
|||
#ifndef TWITTER_SAX_TWEET_READER_H
|
||||
#define TWITTER_SAX_TWEET_READER_H
|
||||
|
||||
#include "simdjson.h"
|
||||
#include "sax_tweet_reader_visitor.h"
|
||||
#include "tweet.h"
|
||||
#include <vector>
|
||||
|
||||
SIMDJSON_TARGET_HASWELL
|
||||
|
||||
namespace twitter {
|
||||
namespace {
|
||||
|
||||
using namespace simdjson;
|
||||
using namespace haswell;
|
||||
using namespace haswell::stage2;
|
||||
|
||||
struct sax_tweet_reader {
|
||||
std::vector<tweet> tweets;
|
||||
std::unique_ptr<uint8_t[]> string_buf;
|
||||
size_t capacity;
|
||||
dom_parser_implementation dom_parser;
|
||||
|
||||
sax_tweet_reader();
|
||||
error_code set_capacity(size_t new_capacity);
|
||||
error_code read_tweets(padded_string &json);
|
||||
}; // struct tweet_reader
|
||||
|
||||
sax_tweet_reader::sax_tweet_reader() : tweets{}, string_buf{}, capacity{0}, dom_parser() {
|
||||
}
|
||||
|
||||
error_code sax_tweet_reader::set_capacity(size_t new_capacity) {
|
||||
// string_capacity copied from document::allocate
|
||||
size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + 32, 64);
|
||||
string_buf.reset(new (std::nothrow) uint8_t[string_capacity]);
|
||||
if (auto error = dom_parser.set_capacity(new_capacity)) { return error; }
|
||||
if (capacity == 0) { // set max depth the first time only
|
||||
if (auto error = dom_parser.set_max_depth(DEFAULT_MAX_DEPTH)) { return error; }
|
||||
}
|
||||
capacity = new_capacity;
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
// NOTE: this assumes the dom_parser is already allocated
|
||||
error_code sax_tweet_reader::read_tweets(padded_string &json) {
|
||||
// Allocate capacity if needed
|
||||
tweets.clear();
|
||||
if (capacity < json.size()) {
|
||||
if (auto error = set_capacity(capacity)) { return error; }
|
||||
}
|
||||
|
||||
// Run stage 1 first.
|
||||
if (auto error = dom_parser.stage1((uint8_t *)json.data(), json.size(), false)) { return error; }
|
||||
|
||||
// Then walk the document, parsing the tweets as we go
|
||||
json_iterator iter(dom_parser, 0);
|
||||
sax_tweet_reader_visitor visitor(tweets, string_buf.get());
|
||||
if (auto error = iter.walk_document<false>(visitor)) { return error; }
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
} // unnamed namespace
|
||||
} // namespace twitter
|
||||
|
||||
SIMDJSON_UNTARGET_REGION
|
||||
|
||||
#endif // TWITTER_SAX_TWEET_READER_H
|
|
@ -1,21 +0,0 @@
|
|||
#ifndef TWEET_H
|
||||
#define TWEET_H
|
||||
|
||||
#include "simdjson.h"
|
||||
#include "twitter_user.h"
|
||||
|
||||
namespace twitter {
|
||||
|
||||
struct tweet {
|
||||
uint64_t id{};
|
||||
std::string_view text{};
|
||||
std::string_view created_at{};
|
||||
uint64_t in_reply_to_status_id{};
|
||||
uint64_t retweet_count{};
|
||||
uint64_t favorite_count{};
|
||||
twitter_user user{};
|
||||
};
|
||||
|
||||
} // namespace twitter
|
||||
|
||||
#endif // TWEET_H
|
|
@ -1,15 +0,0 @@
|
|||
#ifndef TWITTER_USER_H
|
||||
#define TWITTER_USER_H
|
||||
|
||||
#include "simdjson.h"
|
||||
|
||||
namespace twitter {
|
||||
|
||||
struct twitter_user {
|
||||
uint64_t id{};
|
||||
std::string_view screen_name{};
|
||||
};
|
||||
|
||||
} // namespace twitter
|
||||
|
||||
#endif // TWITTER_USER_H
|
|
@ -94,24 +94,88 @@ else()
|
|||
target_compile_options(simdjson-internal-flags INTERFACE -Wsign-compare -Wshadow -Wwrite-strings -Wpointer-arith -Winit-self -Wconversion -Wno-sign-conversion)
|
||||
endif()
|
||||
|
||||
#
|
||||
# Optional flags
|
||||
#
|
||||
|
||||
#
|
||||
# Implementation selection
|
||||
#
|
||||
set(SIMDJSON_ALL_IMPLEMENTATIONS "fallback;westmere;haswell;arm64")
|
||||
|
||||
set(SIMDJSON_IMPLEMENTATION "" CACHE STRING "Semicolon-separated list of implementations to include (${SIMDJSON_ALL_IMPLEMENTATIONS}). If this is not set, any implementations that are supported at compile time and may be selected at runtime will be included.")
|
||||
foreach(implementation ${SIMDJSON_IMPLEMENTATION})
|
||||
if(NOT (implementation IN_LIST SIMDJSON_ALL_IMPLEMENTATIONS))
|
||||
message(ERROR "Implementation ${implementation} not supported by simdjson. Possible implementations: ${SIMDJSON_ALL_IMPLEMENTATIONS}")
|
||||
endif()
|
||||
endforeach(implementation)
|
||||
|
||||
set(SIMDJSON_EXCLUDE_IMPLEMENTATION "" CACHE STRING "Semicolon-separated list of implementations to exclude (haswell/westmere/arm64/fallback). By default, excludes any implementations that are unsupported at compile time or cannot be selected at runtime.")
|
||||
foreach(implementation ${SIMDJSON_EXCLUDE_IMPLEMENTATION})
|
||||
if(NOT (implementation IN_LIST SIMDJSON_ALL_IMPLEMENTATIONS))
|
||||
message(ERROR "Implementation ${implementation} not supported by simdjson. Possible implementations: ${SIMDJSON_ALL_IMPLEMENTATIONS}")
|
||||
endif()
|
||||
endforeach(implementation)
|
||||
|
||||
foreach(implementation ${SIMDJSON_ALL_IMPLEMENTATIONS})
|
||||
string(TOUPPER ${implementation} implementation_upper)
|
||||
if(implementation IN_LIST SIMDJSON_EXCLUDE_IMPLEMENTATION)
|
||||
message(STATUS "Excluding implementation ${implementation} due to SIMDJSON_EXCLUDE_IMPLEMENTATION=${SIMDJSON_EXCLUDE_IMPLEMENTATION}")
|
||||
target_compile_definitions(simdjson-flags INTERFACE "SIMDJSON_IMPLEMENTATION_${implementation_upper}=0")
|
||||
elseif(implementation IN_LIST SIMDJSON_IMPLEMENTATION)
|
||||
message(STATUS "Including implementation ${implementation} due to SIMDJSON_IMPLEMENTATION=${SIMDJSON_IMPLEMENTATION}")
|
||||
target_compile_definitions(simdjson-flags INTERFACE "SIMDJSON_IMPLEMENTATION_${implementation_upper}=1")
|
||||
elseif(SIMDJSON_IMPLEMENTATION)
|
||||
message(STATUS "Excluding implementation ${implementation} due to SIMDJSON_IMPLEMENTATION=${SIMDJSON_IMPLEMENTATION}")
|
||||
target_compile_definitions(simdjson-flags INTERFACE "SIMDJSON_IMPLEMENTATION_${implementation_upper}=0")
|
||||
endif()
|
||||
endforeach(implementation)
|
||||
|
||||
# TODO make it so this generates the necessary compiler flags to select the given implementation as the builtin automatically!
|
||||
option(SIMDJSON_BUILTIN_IMPLEMENTATION "Select the implementation that will be used for user code. Defaults to the most universal implementation in SIMDJSON_IMPLEMENTATION (in the order ${SIMDJSON_ALL_IMPLEMENTATIONS}) if specified; otherwise, by default the compiler will pick the best implementation that can always be selected given the compiler flags." "")
|
||||
if(SIMDJSON_BUILTIN_IMPLEMENTATION)
|
||||
target_compile_definitions(simdjson-flags INTERFACE "SIMDJSON_BUILTIN_IMPLEMENTATION=${SIMDJSON_BUILTIN_IMPLEMENTATION}")
|
||||
else()
|
||||
# Pick the most universal implementation out of the selected implementations (if any)
|
||||
foreach(implementation ${SIMDJSON_ALL_IMPLEMENTATIONS})
|
||||
if(implementation IN_LIST SIMDJSON_IMPLEMENTATION AND NOT (implementation IN_LIST SIMDJSON_EXCLUDE_IMPLEMENTATION))
|
||||
message(STATUS "Selected implementation ${implementation} as builtin implementation based on ${SIMDJSON_IMPLEMENTATION}.")
|
||||
target_compile_definitions(simdjson-flags INTERFACE "SIMDJSON_BUILTIN_IMPLEMENTATION=${implementation}")
|
||||
break()
|
||||
endif()
|
||||
endforeach(implementation)
|
||||
endif(SIMDJSON_BUILTIN_IMPLEMENTATION)
|
||||
|
||||
option(SIMDJSON_IMPLEMENTATION_HASWELL "Include the haswell implementation" ON)
|
||||
if(NOT SIMDJSON_IMPLEMENTATION_HASWELL)
|
||||
target_compile_definitions(simdjson-internal-flags INTERFACE SIMDJSON_IMPLEMENTATION_HASWELL=0)
|
||||
message(DEPRECATION "SIMDJSON_IMPLEMENTATION_HASWELL is deprecated. Use SIMDJSON_IMPLEMENTATION=-haswell instead.")
|
||||
target_compile_definitions(simdjson-flags INTERFACE SIMDJSON_IMPLEMENTATION_HASWELL=0)
|
||||
endif()
|
||||
option(SIMDJSON_IMPLEMENTATION_WESTMERE "Include the westmere implementation" ON)
|
||||
if(NOT SIMDJSON_IMPLEMENTATION_WESTMERE)
|
||||
target_compile_definitions(simdjson-internal-flags INTERFACE SIMDJSON_IMPLEMENTATION_WESTMERE=0)
|
||||
message(DEPRECATION "SIMDJSON_IMPLEMENTATION_WESTMERE is deprecated. SIMDJSON_IMPLEMENTATION=-westmere instead.")
|
||||
target_compile_definitions(simdjson-flags INTERFACE SIMDJSON_IMPLEMENTATION_WESTMERE=0)
|
||||
endif()
|
||||
option(SIMDJSON_IMPLEMENTATION_ARM64 "Include the arm64 implementation" ON)
|
||||
if(NOT SIMDJSON_IMPLEMENTATION_ARM64)
|
||||
target_compile_definitions(simdjson-internal-flags INTERFACE SIMDJSON_IMPLEMENTATION_ARM64=0)
|
||||
message(DEPRECATION "SIMDJSON_IMPLEMENTATION_ARM64 is deprecated. Use SIMDJSON_IMPLEMENTATION=-arm64 instead.")
|
||||
target_compile_definitions(simdjson-flags INTERFACE SIMDJSON_IMPLEMENTATION_ARM64=0)
|
||||
endif()
|
||||
option(SIMDJSON_IMPLEMENTATION_FALLBACK "Include the fallback implementation" ON)
|
||||
if(NOT SIMDJSON_IMPLEMENTATION_FALLBACK)
|
||||
target_compile_definitions(simdjson-internal-flags INTERFACE SIMDJSON_IMPLEMENTATION_FALLBACK=0)
|
||||
message(DEPRECATION "SIMDJSON_IMPLEMENTATION_FALLBACK is deprecated. Use SIMDJSON_IMPLEMENTATION=-fallback instead.")
|
||||
target_compile_definitions(simdjson-flags INTERFACE SIMDJSON_IMPLEMENTATION_FALLBACK=0)
|
||||
endif()
|
||||
|
||||
#
|
||||
# Other optional flags
|
||||
#
|
||||
option(SIMDJSON_ONDEMAND_SAFETY_RAILS "Validate ondemand user code at runtime to ensure it is being used correctly. Defaults to ON for debug builds, OFF for release builds." $<IF:$<CONFIG:DEBUG>,ON,OFF>)
|
||||
if(SIMDJSON_ONDEMAND_SAFETY_RAILS)
|
||||
message(STATUS "Ondemand safety rails enabled. Ondemand user code will be checked at runtime. This will be slower than normal!")
|
||||
target_compile_definitions(simdjson-flags INTERFACE SIMDJSON_ONDEMAND_SAFETY_RAILS)
|
||||
endif(SIMDJSON_ONDEMAND_SAFETY_RAILS)
|
||||
|
||||
option(SIMDJSON_BASH "Allow usage of bash within CMake" ON)
|
||||
|
||||
option(SIMDJSON_GIT "Allow usage of git within CMake" ON)
|
||||
|
@ -119,7 +183,7 @@ option(SIMDJSON_GIT "Allow usage of git within CMake" ON)
|
|||
option(SIMDJSON_EXCEPTIONS "Enable simdjson's exception-throwing interface" ON)
|
||||
if(NOT SIMDJSON_EXCEPTIONS)
|
||||
message(STATUS "simdjson exception interface turned off. Code that does not check error codes will not compile.")
|
||||
target_compile_definitions(simdjson-internal-flags INTERFACE SIMDJSON_EXCEPTIONS=0)
|
||||
target_compile_definitions(simdjson-flags INTERFACE SIMDJSON_EXCEPTIONS=0)
|
||||
endif()
|
||||
|
||||
option(SIMDJSON_ENABLE_THREADS "Link with thread support" ON)
|
||||
|
@ -133,6 +197,11 @@ if(SIMDJSON_ENABLE_THREADS)
|
|||
target_compile_definitions(simdjson-flags INTERFACE SIMDJSON_THREADS_ENABLED=1) # This will be set in the code automatically.
|
||||
endif()
|
||||
|
||||
option(SIMDJSON_VERBOSE_LOGGING, "Enable verbose logging for internal simdjson library development." OFF)
|
||||
if (SIMDJSON_VERBOSE_LOGGING)
|
||||
target_compile_definitions(simdjson-flags INTERFACE SIMDJSON_VERBOSE_LOGGING=1)
|
||||
endif()
|
||||
|
||||
if(SIMDJSON_USE_LIBCPP)
|
||||
target_link_libraries(simdjson-flags INTERFACE -stdlib=libc++ -lc++abi)
|
||||
# instead of the above line, we could have used
|
||||
|
|
|
@ -1 +1 @@
|
|||
Subproject commit 4b63c333a842295b1bfb79d05863633037328300
|
||||
Subproject commit 794c975287355de48158d9a80ed502d26b20a472
|
|
@ -0,0 +1,450 @@
|
|||
How simdjson's On Demand Parsing works
|
||||
======================================
|
||||
|
||||
The simdjson On Demand API is a natural C++ DOM-like API backed by a forward-only iterator over JSON
|
||||
source text with a natural C++ interface on top, including object and array iterators, object lookup, and conversion to native C++
|
||||
types (string_view, double, bool, etc.).
|
||||
|
||||
```c++
|
||||
ondemand::parser parser;
|
||||
auto doc = parser.iterate(json);
|
||||
for (auto tweet : doc["statuses"]) {
|
||||
std::string_view text = tweet["text"];
|
||||
std::string_view screen_name = tweet["user"]["screen_name"];
|
||||
uint64_t retweets = tweet["retweet_count"];
|
||||
uint64_t favorites = tweet["favorite_count"];
|
||||
cout << screen_name << " (" << retweets << " retweets / " << favorites << " favorites): " << text << endl;
|
||||
}
|
||||
```
|
||||
|
||||
It is designed around several principles:
|
||||
|
||||
* **Streaming (\*):** Does not preparse values, keeping memory usage and latency down. NOTE: Right now, simdjson has a preprocessing step that identifies the location of each value in the whole input. This means you have to pass the whole input, and the parser does have an internal buffer that consumes 4 bytes per JSON value (plus each operator like , : ] and }). This limitation will be reduced/eliminated in later versions.
|
||||
* **Forward-Only:** To prevent reiteration of the same values and to keep the number of variables down (literally), only a single index is maintained and everything uses it (even if you have nested for loops). This means when you're going through an array of arrays, for example, that the inner array loop will advance the index to the next comma, and the array can just pick it up and look at it.
|
||||
* **Natural Iteration:** A JSON array or object can be iterated with a normal C++ for loop. Nested arrays and objects are supported by nested for loops.
|
||||
* **Use-Specific Parsing:** Parsing is always specific to the type the you ask for. For example, if you ask for an unsigned integer, we just start parsing digits. If there were no digits, we toss an error. There are even different parsers for double, uint64_t and int64_t. This avoids the branchiness of a generic "type switch," and makes the code more inlineable and compact.
|
||||
* **Validate What You Use:** On Demand deliberately validates the values you use and the structure leading to it, but nothing else. The goal is a guarantee that the value you asked for is the correct one and is not malformed: there must be no confusion over whether you got the right value. But it leaves the possibility that the JSON *as a whole* is invalid. A full-validation mode is possible and planned, but I think this mode should be the default, personally, or at least pretty heavily advertised. Full-validation mode should really only be for debug.
|
||||
|
||||
Rationale
|
||||
---------
|
||||
|
||||
Current JSON parsers generally have either ease of use or performance. Very few have both at
|
||||
once. simdjson's On Demand API bridges that gap with a familiar, friendly DOM API and the
|
||||
performance of just-in-time parsing on top of the simdjson core's legendary performance.
|
||||
|
||||
To achieve ease of use, we mimicked the *form* of a traditional DOM API: you can iterate over
|
||||
arrays, look up fields in objects, and extract native values like double, uint64_t, string and bool.
|
||||
To achieve performance, we introduced some key limitations that make the DOM API *streaming*:
|
||||
array/object iteration cannot be restarted, and fields must be looked up in order, and string/number
|
||||
values can only be parsed once.
|
||||
|
||||
```c++
|
||||
ondemand::parser parser;
|
||||
auto doc = parser.iterate(json);
|
||||
for (auto tweet : doc["statuses"]) {
|
||||
std::string_view text = tweet["text"];
|
||||
std::string_view screen_name = tweet["user"]["screen_name"];
|
||||
uint64_t retweets = tweet["retweet_count"];
|
||||
uint64_t favorites = tweet["favorite_count"];
|
||||
cout << screen_name << " (" << retweets << " retweets / " << favorites << " favorites): " << text << endl;
|
||||
}
|
||||
```
|
||||
|
||||
This streaming approach means that fields or values you don't use don't have to get parsed or
|
||||
converted, saving space and time.
|
||||
|
||||
Further, the On Demand API doesn't parse a value *at all* until you try to convert it to double,
|
||||
int, string, or bool. Because you have told it the type at that point, it can avoid the the key
|
||||
"what type is this" branch present in almost all other parsers, avoiding branch misprediction that
|
||||
cause massive (sometimes 2-4x) slowdowns.
|
||||
|
||||
To understand exactly what's happening here and why it's different, it's helpful to review the major
|
||||
approaches to parsing and parser APIs in use today.
|
||||
|
||||
### Generic DOM Parsers
|
||||
|
||||
Many of the most usable, popular JSON APIs deserialize into a **DOM**: an intermediate tree of
|
||||
objects, arrays and values. The resulting API lets you refer to each array or object separately,
|
||||
using familiar techniques like iteration (`for (auto value : array)`) or indexing (`object["key"]`).
|
||||
In some cases the values are even deserialized straight into familiar C++ constructs like vector and
|
||||
map.
|
||||
|
||||
This model is dead simple to use, since it talks in terms of *data types* instead of JSON. It's
|
||||
often easy enough that many users use the deserialized JSON as-is instead of deserializing into
|
||||
their own custom structs, saving a ton of development work.
|
||||
|
||||
simdjson's DOM parser is one such example. It looks very similar to the ondemand example, except
|
||||
it calls `parse` instead of `iterate`:
|
||||
|
||||
```c++
|
||||
dom::parser parser;
|
||||
auto doc = parser.parse(json);
|
||||
for (auto tweet : doc["statuses"]) {
|
||||
std::string_view text = tweet["text"];
|
||||
std::string_view screen_name = tweet["user"]["screen_name"];
|
||||
uint64_t retweets = tweet["retweet_count"];
|
||||
uint64_t favorites = tweet["favorite_count"];
|
||||
cout << screen_name << " (" << retweets << " retweets / " << favorites << " favorites): " << text << endl;
|
||||
}
|
||||
```
|
||||
|
||||
Pros and cons of generic DOM:
|
||||
* Straightforward, user-friendly interface (arrays and objects)
|
||||
* No lifetime concerns (arrays and objects are often independent of JSON text and parser internal state)
|
||||
* Parses and stores everything, using memory/CPU even on values you don't end up using (cost can be brought down some with lazy numbers/strings and top-level iterators)
|
||||
* Values stay in memory even if you only use them once
|
||||
* Heavy performance drain from [type blindness](#type-blindness).
|
||||
|
||||
### SAX (SAJ?) Parsers
|
||||
|
||||
The SAX model ("Streaming API for XML") uses streaming to eliminate the high cost of
|
||||
parsing and storing the entire JSON. In the SAX model, a core JSON parser parses the JSON document
|
||||
piece by piece, but instead of stuffing values in a DOM tree, it passes each value to a callback,
|
||||
letting the user use the value and decide for themselves whether to discard it and where to store
|
||||
it. or discard it.
|
||||
|
||||
This allows users to work with much larger files without running out of memory. Some SAX APIs even
|
||||
allow the user to skip values entirely, lightening the parsing burden as well.
|
||||
|
||||
The big drawback is complexity: SAX APIs generally have you define a single callback for each type
|
||||
(e.g. `string_field(std::string_view key, std::string_view value)`). Because of this, you suffer
|
||||
from context blindness: when you find a string you have to check where it is before you know what to
|
||||
do with it. Is this string the text of the tweet, the screen name, or something else I don't even
|
||||
care about? Are we even in a tweet right now, or is this from some other place in the document
|
||||
entirely?
|
||||
|
||||
The following is SAX example of the Twitter problem we've seen in the Generic DOM and On Demand
|
||||
examples. To make it short enough to use as an example at all, it's heavily redacted: it only solves
|
||||
a part of the problem (doesn't get user.screen_name), it has bugs (it doesn't handle sub-objects
|
||||
in a tweet at all), and it uses a theoretical, simple SAX API that minimizes ceremony and skips over
|
||||
the issue of lazy parsing and number types entirely.
|
||||
|
||||
```c++
|
||||
struct twitter_callbacks {
|
||||
bool in_statuses;
|
||||
bool in_tweet;
|
||||
std::string_view text;
|
||||
uint64_t retweets;
|
||||
uint64_t favorites;
|
||||
void start_object_field(std::string_view key) {
|
||||
if (key == "statuses") { in_statuses = true; }
|
||||
}
|
||||
void start_object() {
|
||||
if (in_statuses) { in_tweet = true; }
|
||||
}
|
||||
void string_field(std::string_view key, std::string_view value) {
|
||||
if (in_tweet && key == "text") { text = value; }
|
||||
}
|
||||
void number_field(std::string_view key, uint64_t value) {
|
||||
if (in_tweet) {
|
||||
if (key == "retweet_count") { retweets = value; }
|
||||
if (key == "favorite_count") { favorites = value; }
|
||||
}
|
||||
}
|
||||
void end_object() {
|
||||
if (in_tweet) {
|
||||
cout << "[redacted] (" << retweets << " retweets / " << favorites << " favorites): " << text << endl;
|
||||
in_tweet = false;
|
||||
} else if (in_statuses) {
|
||||
in_statuses = false;
|
||||
}
|
||||
}
|
||||
};
|
||||
sax::parser parser;
|
||||
parser.parse(twitter_callbacks());
|
||||
```
|
||||
|
||||
This is a startling amount of code, requiring mental gymnastics even to read, and in order to get it
|
||||
this small and illustrate basic usage, *it has bugs* and skips over parsing user.screen_name
|
||||
entirely. The real implementation is much, much harder to write (and to read).
|
||||
|
||||
Pros and cons of SAX (SAJ):
|
||||
* Speed and space benefits from low, predictable memory usage
|
||||
* Some SAX APIs can lazily parse numbers/strings, another performance win (pay for what you use)
|
||||
* Performance drain from context blindness (switch statement for "where am I in the document")
|
||||
* Startlingly difficult to use
|
||||
|
||||
### Schema-Based Parser Generators
|
||||
|
||||
There is another breed of parser, commonly used to generate REST API clients, which is in principle
|
||||
capable of fixing most of the issues with DOM and SAX. These parsers take a schema--a description of
|
||||
your JSON, with field names, types, everything--and generate classes/structs in your language of
|
||||
choice, as well as a parser to deserialize the JSON into those structs. (In another variant, you
|
||||
define your own struct and a preprocessor inspects it and generates a JSON parser for it.)
|
||||
|
||||
Not all of these schema-based parser generators actually generate a parser or even optimize for
|
||||
streaming, but they are *able* to.
|
||||
|
||||
Some of the features help entirely eliminate the DOM and SAX issues:
|
||||
|
||||
Pros and cons:
|
||||
* Ease of Use is on par with DOM
|
||||
* Parsers that generate iterators and lazy values in structs can keep memory pressure down to SAX levels.
|
||||
* Type Blindness can be entirely solved with specific parsers for each type, saving many branches.
|
||||
* Context Blindness can be solved, especially if object fields are required and in order, saving
|
||||
even more branches.
|
||||
* Scenarios are limited by declarative language (often limited to deserialization-to-objects)
|
||||
|
||||
Rust's serde does a lot of the necessary legwork here, for example. (Editor's Note: I don't know
|
||||
*how much* it does, but I know it does a decent amount, and is very fast.)
|
||||
|
||||
### Type Blindness and Branch Misprediction
|
||||
|
||||
The DOM parsing model, and even the SAX model to a great extent, suffers from **type
|
||||
blindness:** you, the user, almost always know exactly what fields and what types are in your JSON,
|
||||
but the parser doesn't. When you say `json_parser.parse(json)`, the parser doesn't get told *any*
|
||||
of this. It has no way to know. This means it has to look at each value blind with a big "switch"
|
||||
statement, asking "is this a number? A string? A boolean? An array? An object?"
|
||||
|
||||
In modern processors, this kind of switch statement can make your program run *3-4 times slower*
|
||||
than it needs to. This is because of the high cost of branch misprediction.
|
||||
|
||||
Modern processors have more than one core, even on a single thread. To go fast, each of these cores
|
||||
"reads ahead" in your program, each picking different instructions to run (as soon as data is
|
||||
available). If all the cores are working almost all the time, your single-threaded program will run
|
||||
around 4 instructions per cycle--*4 times faster* than it theoretically could.
|
||||
|
||||
Most modern programs don't manage to get much past 1 instruction per cycle, however. This is
|
||||
because of branch misprediction. Programs have a lot of if statements, so to read ahead, processors
|
||||
guess which branch will be taken and read ahead from that branch. If it guesses wrong, all that
|
||||
wonderful work it did is discarded, and it starts over from the if statement. It *was* running at
|
||||
4x speed, but it was all wasted work!
|
||||
|
||||
And this brings us back to that switch statement. Type blindness means the processor essentially has
|
||||
to guess, for every JSON value, whether it will be an array, an object, number, string or boolean.
|
||||
Unless your file is something ridiculously predictable, like a giant array of numbers, it's going to
|
||||
trip up a lot. (Processors get better about this all the time, but for something complex like this
|
||||
there is only so much it can do in the tiny amount of time it has to guess.)
|
||||
|
||||
On Demand parsing is tailor-made to solve this problem at the source, parsing values only after the
|
||||
user declares their type by asking for a double, an int, a string, etc.
|
||||
|
||||
Algorithm
|
||||
---------
|
||||
|
||||
To help visualize the algorithm, we'll walk through the example C++ given at the top, for this JSON:
|
||||
|
||||
```json
|
||||
{
|
||||
"statuses": [
|
||||
{ "id": 1, "text": "first!", "user": { "screen_name": "lemire", "name": "Daniel" }, "favorite_count": 100, "retweet_count": 40 },
|
||||
{ "id": 2, "text": "second!", "user": { "screen_name": "jkeiser2", "name": "John" }, "favorite_count": 2, "retweet_count": 3 }
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### Starting the iteration
|
||||
|
||||
1. First, we declare a parser object that keeps internal buffers necessary for parsing. This can be
|
||||
reused to parse multiple JSON files, so you don't pay the high cost of allocating memory every
|
||||
time (and so it can stay in cache!).
|
||||
|
||||
This declaration doesn't actually allocate any memory; that will happen in the next step.
|
||||
|
||||
```c++
|
||||
ondemand::parser parser;
|
||||
```
|
||||
|
||||
2. We then start iterating the JSON document by allocating internal parser buffers, preprocessing
|
||||
the JSON, and initializing the iterator.
|
||||
|
||||
```c++
|
||||
auto doc = parser.iterate(json);
|
||||
```
|
||||
|
||||
Since this is the first time this parser has been used, iterate() first allocates internal
|
||||
parser buffers if this is the first time through. When reusing an existing parser, allocation
|
||||
only happens if the new document is bigger than internal buffers can handle. This is the only
|
||||
place On Demand does allocation.
|
||||
|
||||
simdjson then preprocesses the JSON text at high speed, finding all tokens (i.e. the starting
|
||||
position of any JSON value, as well as any important operators like `,`, `:`, `]` or `}`).
|
||||
|
||||
Finally, a `document` iterator is created, initialized at the position of the first value in the
|
||||
`json` text input. The document iterator is bumped forward by array / object iterators and
|
||||
object[] lookup, and must be kept around until iteration is complete.
|
||||
|
||||
This operation can fail! The result type here is actually `simdjson_result<document>`.
|
||||
simdjson uses simdjson_result whenever a value needs to be returned, but the function could fail.
|
||||
It has an error_code and a document in it, and was designed to allow you to use either error code
|
||||
checking or C++ exceptions via a direct cast `document(parser.iterate(json)); you can use get()
|
||||
to check the error and cast to a value, or cast directly to a value.
|
||||
|
||||
But as you can see, we don't check for the failure just yet.
|
||||
|
||||
3. We iterate over the "statuses" field using a typical C++ iterator, reading past the initial
|
||||
`{ "statuses": [ {`.
|
||||
|
||||
```c++
|
||||
for (ondemand::object tweet : doc["statuses"]) {
|
||||
```
|
||||
|
||||
This shorthand does a lot of stuff, and it's helpful to see what the initial bits expand to.
|
||||
Comments in front of each one explain what's going on:
|
||||
|
||||
```c++
|
||||
// Validate that the top-level value is an object: check for {
|
||||
ondemand::object top = doc.get_object();
|
||||
|
||||
// Find the field statuses by:
|
||||
// 1. Check whether the object is empty (check for }). (TODO we don't really need to do this unless the key lookup fails!)
|
||||
// 2. Check if we're at the field by looking for the string "statuses".
|
||||
// 3. Validate that there is a `:` after it.
|
||||
auto tweets_field = top["statuses"];
|
||||
|
||||
// Validate that the field value is an array: check for [
|
||||
// Also mark the array as finished if there is a ] next, which would cause the while () statement to exit immediately.
|
||||
ondemand::array tweets = tweets_field.get_array();
|
||||
// These three method calls do nothing substantial (the real checking happens in get_array() and ++)
|
||||
// != checks whether the array is marked as finished (if we have found a ]).
|
||||
ondemand::array_iterator tweets_iter = tweets.begin();
|
||||
while (tweets_iter != tweets.end()) {
|
||||
auto tweet_value = *tweets_iter;
|
||||
|
||||
// Validate that the array element is an object: check for {
|
||||
ondemand::object tweet = tweet_value.get_object();
|
||||
...
|
||||
}
|
||||
```
|
||||
|
||||
The one bit of shorthand that's not explained there is *error chaining*.
|
||||
Generally, you can use `document` methods on a simdjson_result<document> (this applies to all
|
||||
simdjson types); any errors will just be passed down the chain. Many method calls
|
||||
can be chained like this. So `for (object tweet : doc["statuses"])`, which is the equivalent of
|
||||
`object tweet = *(doc.get_object()["statuses"].get_array().begin()).get_object()` (what
|
||||
a mouthful!), could fail in any of 6 method calls, and the error will only be checked at the end,
|
||||
when you attempt to cast the final `simdjson_result<object>` to object (an exception will be
|
||||
thrown if there was an error).
|
||||
|
||||
4. We get the `"text"` field as a string.
|
||||
|
||||
```c++
|
||||
std::string_view text = tweet["text"];
|
||||
```
|
||||
|
||||
First, `["text"]` skips the `"id"` field because it doesn't match: skips the key, `:` and
|
||||
value (`1`). We then check whether there are more fields by looking for either `,`
|
||||
or `}`.
|
||||
|
||||
The second field is matched (`"text"`), so we validate the `:` and move to the actual value.
|
||||
|
||||
NOTE: `["text"]` does a *raw match*, comparing the key directly against the raw JSON. This means
|
||||
that keys with escapes in them may not be matched.
|
||||
|
||||
To convert to a string, we check for `"` and use simdjson's fast unescaping algorithm to copy
|
||||
`first!` (plus a terminating `\0`) into a buffer managed by the `document`. This buffer stores
|
||||
all strings from a single iteration. The next string will be written after the `\0`.
|
||||
|
||||
A `string_view` is returned which points to that buffer, and contains the length.
|
||||
|
||||
4. We get the `"screen_name"` from the `"user"` object.
|
||||
|
||||
```c++
|
||||
std::string_view screen_name = tweet["user"]["screen_name"];
|
||||
```
|
||||
|
||||
First, `["user"]` checks whether there are any more object fields by looking for either `,` or
|
||||
`}`. Then it matches `"user"` and validates the `:`.
|
||||
|
||||
`["screen_name"]` then converts to object, checking for `{`, and finds `"screen_name"`.
|
||||
|
||||
To convert to string, `lemire` is written to the document's string buffer, which now has *two*
|
||||
string_views pointing into it, and looks like `first!\0lemire\0`.
|
||||
|
||||
Finally, the temporary user object is destroyed, causing it to skip the remainder of the object
|
||||
(`}`).
|
||||
|
||||
5. We get `"retweet_count"` and `"favorite_count"` as unsigned integers.
|
||||
|
||||
```c++
|
||||
uint64_t retweets = tweet["retweet_count"];
|
||||
uint64_t favorites = tweet["favorite_count"];
|
||||
```
|
||||
|
||||
When it comes time to parse a number, we immediately.
|
||||
|
||||
6. We loop to the next tweet.
|
||||
|
||||
```c++
|
||||
for (ondemand::object tweet : doc["statuses"]) {
|
||||
...
|
||||
}
|
||||
```
|
||||
|
||||
The relevant parts of the loop here are:
|
||||
|
||||
```c++
|
||||
while (iter != statuses.end()) {
|
||||
ondemand::object tweet = *iter;
|
||||
...
|
||||
iter++;
|
||||
}
|
||||
```
|
||||
|
||||
First, the `tweet` destructor runs, skipping the remainder of the object (which in this case is
|
||||
just `}`).
|
||||
|
||||
Next, `iter++` checks whether there are more values and finds `,`. The loop continues.
|
||||
|
||||
Finally, `ondemand::object tweet = *iter` checks for `{` and returns the object.
|
||||
|
||||
This tweet is processed just like the previous one.
|
||||
|
||||
7. We finish the last tweet.
|
||||
|
||||
At the end of the loop, the `tweet` is first destroyed, skipping the remainder of the tweet
|
||||
object (`}`).
|
||||
|
||||
`iter++` (from `for (ondemand::object tweet : doc["statuses"])`) then checks whether there are
|
||||
more values and finds there are not (`]`). It marks the array iteration as finished and the for
|
||||
loop terminates.
|
||||
|
||||
Then the outer object is destroyed, skipping everything up to the `}`. TODO: I'm less than certain
|
||||
this actually happens: when does the temporary object actually go away, again?
|
||||
Design Features
|
||||
---------------
|
||||
|
||||
### String Parsing
|
||||
|
||||
When the user requests strings, we unescape them to a single string_buf (much like the DOM parser)
|
||||
so that users enjoy the same string performance as the core simdjson. The current_string_buf_loc is
|
||||
presently stored in the
|
||||
|
||||
We do not write the length to the string buffer, however; that is stored in the string_view we
|
||||
return to the user, and immediately forgotten.
|
||||
|
||||
### Object/Array Iteration
|
||||
|
||||
Because the C++ iterator contract requires iterators to be const-assignable and const-constructable,
|
||||
object and array iterators are separate classes from the object/array itself, and have an interior
|
||||
mutable reference to it.
|
||||
|
||||
### Iteration Safety
|
||||
|
||||
- If the value fails to be parsed as one type, you can try to parse it as something else until you
|
||||
succeed.
|
||||
- Safety: If the value succeeds in being parsed or converted to a type, you cannot try again. (It
|
||||
sets `json` to null, so you will segfault.) This prevents double iteration of an array (which
|
||||
will cause inconsistent iterator state) or double-unescaping a string (which could cause memory
|
||||
overruns if done too much).
|
||||
- Guaranteed Iteration: If you discard a value without using it--perhaps you just wanted to know
|
||||
if it was null but didn't care what the actual value was--it will iterate. The destructor will
|
||||
take care of this.
|
||||
|
||||
### Raw Key Lookup
|
||||
|
||||
TODO
|
||||
|
||||
### Skip Algorithm
|
||||
|
||||
TODO
|
||||
|
||||
### Root Scalar Parsing Without Malloc
|
||||
|
||||
The malloc when we parse the number / atoms at the root of the document has always bothered me a little, so I wrote alternate routines that use a stack-based buffer instead, based on the type in question. Atoms require no more than 6 bytes; integers no more than 21; and floats ... well, I [wanted your opinion on that, actually.](https://github.com/simdjson/simdjson/pull/947/files#diff-979f6706620f56f5d6a45ca3bf511669R166). I wanted to set a limit on the biggest possible float, and came up with:
|
||||
|
||||
> Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, 1074 is the maximum number of fractional digits needed to distinguish any two doubles (including zeroes and significant digits). Add 8 more digits for the other stuff (`-0.<fraction>e-308`) -- and you get 1082.
|
||||
|
||||
Technically, people could add an arbitrary number of digits after that ... but we could actually scan for that and ignore, if we wanted. I know it's a lot of convolutions to avoid malloc / free, but I think there are really good reasons to have a 100% malloc-free library (well, we have integration points that malloc, but they are predictable and could easily be swapped out.
|
||||
|
||||
I considered just using separate algorithms, and I think for numbers in particular there is probably a way to do that without having two separate functions, but I haven't figured out the *clean* way yet.
|
|
@ -45,7 +45,12 @@ variant=sanitizers
|
|||
-DCMAKE_C_FLAGS="-fsanitize=fuzzer-no-link,address,undefined -fno-sanitize-recover=undefined" \
|
||||
-DCMAKE_BUILD_TYPE=Debug \
|
||||
-DSIMDJSON_FUZZ_LINKMAIN=Off \
|
||||
<<<<<<< HEAD
|
||||
-DSIMDJSON_FUZZ_LDFLAGS="-fsanitize=fuzzer"
|
||||
=======
|
||||
-DSIMDJSON_FUZZ_LDFLAGS=$LIB_FUZZING_ENGINE \
|
||||
-DSIMDJSON_EXCLUDE_IMPLEMENTATION=haswell
|
||||
>>>>>>> Kinder, gentler implementation selection
|
||||
|
||||
ninja all_fuzzers
|
||||
cd ..
|
||||
|
|
|
@ -72,6 +72,22 @@ SIMDJSON_DISABLE_UNDESIRED_WARNINGS
|
|||
#include "simdjson/internal/tape_ref-inl.h"
|
||||
#include "simdjson/dom/serialization-inl.h"
|
||||
|
||||
// Implementation-internal files (must be included before the implementations themselves, to keep
|
||||
// amalgamation working--otherwise, the first time a file is included, it might be put inside the
|
||||
// #ifdef SIMDJSON_IMPLEMENTATION_ARM64/FALLBACK/etc., which means the other implementations can't
|
||||
// compile unless that implementation is turned on).
|
||||
#include "simdjson/internal/isadetection.h"
|
||||
#include "simdjson/internal/jsoncharutils_tables.h"
|
||||
#include "simdjson/internal/numberparsing_tables.h"
|
||||
#include "simdjson/internal/simdprune_tables.h"
|
||||
|
||||
// Implementations
|
||||
#include "simdjson/arm64.h"
|
||||
#include "simdjson/haswell.h"
|
||||
#include "simdjson/westmere.h"
|
||||
#include "simdjson/fallback.h"
|
||||
#include "simdjson/builtin.h"
|
||||
|
||||
SIMDJSON_POP_DISABLE_WARNINGS
|
||||
|
||||
#endif // SIMDJSON_H
|
||||
|
|
|
@ -0,0 +1,49 @@
|
|||
#ifndef SIMDJSON_ARM64_H
|
||||
#define SIMDJSON_ARM64_H
|
||||
|
||||
#ifdef SIMDJSON_FALLBACK_H
|
||||
#error "arm64.h must be included before fallback.h"
|
||||
#endif
|
||||
|
||||
#include "simdjson/portability.h"
|
||||
|
||||
#include "simdjson/internal/isadetection.h"
|
||||
#include "simdjson/internal/jsoncharutils_tables.h"
|
||||
#include "simdjson/internal/numberparsing_tables.h"
|
||||
#include "simdjson/internal/simdprune_tables.h"
|
||||
|
||||
#if SIMDJSON_IMPLEMENTATION_ARM64
|
||||
|
||||
namespace simdjson {
|
||||
/**
|
||||
* Implementation for NEON (ARMv8).
|
||||
*/
|
||||
namespace arm64 {
|
||||
} // namespace arm64
|
||||
} // namespace simdjson
|
||||
|
||||
#include "simdjson/arm64/implementation.h"
|
||||
|
||||
#include "simdjson/arm64/begin.h"
|
||||
|
||||
// Declarations
|
||||
#include "simdjson/generic/dom_parser_implementation.h"
|
||||
#include "simdjson/arm64/intrinsics.h"
|
||||
#include "simdjson/arm64/bitmanipulation.h"
|
||||
#include "simdjson/arm64/bitmask.h"
|
||||
#include "simdjson/arm64/simd.h"
|
||||
#include "simdjson/generic/jsoncharutils.h"
|
||||
#include "simdjson/generic/atomparsing.h"
|
||||
#include "simdjson/arm64/stringparsing.h"
|
||||
#include "simdjson/arm64/numberparsing.h"
|
||||
#include "simdjson/generic/implementation_simdjson_result_base.h"
|
||||
#include "simdjson/generic/ondemand.h"
|
||||
|
||||
// Inline definitions
|
||||
#include "simdjson/generic/implementation_simdjson_result_base-inl.h"
|
||||
#include "simdjson/generic/ondemand-inl.h"
|
||||
#include "simdjson/arm64/end.h"
|
||||
|
||||
#endif // SIMDJSON_IMPLEMENTATION_ARM64
|
||||
|
||||
#endif // SIMDJSON_ARM64_H
|
|
@ -0,0 +1 @@
|
|||
#define SIMDJSON_IMPLEMENTATION arm64
|
|
@ -1,8 +1,9 @@
|
|||
#ifndef SIMDJSON_ARM64_BITMANIPULATION_H
|
||||
#define SIMDJSON_ARM64_BITMANIPULATION_H
|
||||
|
||||
namespace simdjson {
|
||||
namespace SIMDJSON_IMPLEMENTATION {
|
||||
namespace {
|
||||
namespace arm64 {
|
||||
|
||||
// We sometimes call trailing_zero on inputs that are zero,
|
||||
// but the algorithms do not end up using the returned value.
|
||||
|
@ -55,7 +56,8 @@ simdjson_really_inline bool add_overflow(uint64_t value1, uint64_t value2, uint6
|
|||
#endif
|
||||
}
|
||||
|
||||
} // namespace arm64
|
||||
} // unnamed namespace
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // namespace simdjson
|
||||
|
||||
#endif // SIMDJSON_ARM64_BITMANIPULATION_H
|
|
@ -1,8 +1,9 @@
|
|||
#ifndef SIMDJSON_ARM64_BITMASK_H
|
||||
#define SIMDJSON_ARM64_BITMASK_H
|
||||
|
||||
namespace simdjson {
|
||||
namespace SIMDJSON_IMPLEMENTATION {
|
||||
namespace {
|
||||
namespace arm64 {
|
||||
|
||||
//
|
||||
// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered.
|
||||
|
@ -32,8 +33,8 @@ simdjson_really_inline uint64_t prefix_xor(uint64_t bitmask) {
|
|||
return bitmask;
|
||||
}
|
||||
|
||||
} // unnamed namespace
|
||||
} // namespace arm64
|
||||
} // namespace simdjson
|
||||
SIMDJSON_UNTARGET_REGION
|
||||
|
||||
#endif
|
|
@ -2,17 +2,19 @@
|
|||
#define SIMDJSON_ARM64_IMPLEMENTATION_H
|
||||
|
||||
#include "simdjson.h"
|
||||
#include "isadetection.h"
|
||||
#include "simdjson/internal/isadetection.h"
|
||||
|
||||
namespace {
|
||||
namespace simdjson {
|
||||
namespace arm64 {
|
||||
|
||||
namespace {
|
||||
using namespace simdjson;
|
||||
using namespace simdjson::dom;
|
||||
}
|
||||
|
||||
class implementation final : public simdjson::implementation {
|
||||
public:
|
||||
simdjson_really_inline implementation() : simdjson::implementation("arm64", "ARM NEON", instruction_set::NEON) {}
|
||||
simdjson_really_inline implementation() : simdjson::implementation("arm64", "ARM NEON", internal::instruction_set::NEON) {}
|
||||
SIMDJSON_WARN_UNUSED error_code create_dom_parser_implementation(
|
||||
size_t capacity,
|
||||
size_t max_length,
|
||||
|
@ -23,6 +25,6 @@ public:
|
|||
};
|
||||
|
||||
} // namespace arm64
|
||||
} // unnamed namespace
|
||||
} // namespace simdjson
|
||||
|
||||
#endif // SIMDJSON_ARM64_IMPLEMENTATION_H
|
|
@ -1,8 +1,9 @@
|
|||
#ifndef SIMDJSON_ARM64_NUMBERPARSING_H
|
||||
#define SIMDJSON_ARM64_NUMBERPARSING_H
|
||||
|
||||
namespace simdjson {
|
||||
namespace SIMDJSON_IMPLEMENTATION {
|
||||
namespace {
|
||||
namespace arm64 {
|
||||
|
||||
// we don't have SSE, so let us use a scalar function
|
||||
// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/
|
||||
|
@ -14,11 +15,12 @@ static simdjson_really_inline uint32_t parse_eight_digits_unrolled(const uint8_t
|
|||
return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32);
|
||||
}
|
||||
|
||||
} // namespace arm64
|
||||
} // unnamed namespace
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // namespace simdjson
|
||||
|
||||
#define SWAR_NUMBER_PARSING
|
||||
|
||||
#include "generic/stage2/numberparsing.h"
|
||||
#include "simdjson/generic/numberparsing.h"
|
||||
|
||||
#endif // SIMDJSON_ARM64_NUMBERPARSING_H
|
|
@ -2,13 +2,14 @@
|
|||
#define SIMDJSON_ARM64_SIMD_H
|
||||
|
||||
#include "simdjson.h"
|
||||
#include "simdprune_tables.h"
|
||||
#include "arm64/bitmanipulation.h"
|
||||
#include "simdjson/internal/simdprune_tables.h"
|
||||
#include "simdjson/arm64/bitmanipulation.h"
|
||||
#include <type_traits>
|
||||
|
||||
|
||||
namespace simdjson {
|
||||
namespace SIMDJSON_IMPLEMENTATION {
|
||||
namespace {
|
||||
namespace arm64 {
|
||||
namespace simd {
|
||||
|
||||
#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
|
||||
|
@ -217,10 +218,10 @@ simdjson_really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x
|
|||
simdjson_really_inline simd8<uint8_t>& operator-=(const simd8<uint8_t> other) { *this = *this - other; return *this; }
|
||||
|
||||
// Order-specific operations
|
||||
simdjson_really_inline uint8_t max() const { return vmaxvq_u8(*this); }
|
||||
simdjson_really_inline uint8_t min() const { return vminvq_u8(*this); }
|
||||
simdjson_really_inline simd8<uint8_t> max(const simd8<uint8_t> other) const { return vmaxq_u8(*this, other); }
|
||||
simdjson_really_inline simd8<uint8_t> min(const simd8<uint8_t> other) const { return vminq_u8(*this, other); }
|
||||
simdjson_really_inline uint8_t max_val() const { return vmaxvq_u8(*this); }
|
||||
simdjson_really_inline uint8_t min_val() const { return vminvq_u8(*this); }
|
||||
simdjson_really_inline simd8<uint8_t> max_val(const simd8<uint8_t> other) const { return vmaxq_u8(*this, other); }
|
||||
simdjson_really_inline simd8<uint8_t> min_val(const simd8<uint8_t> other) const { return vminq_u8(*this, other); }
|
||||
simdjson_really_inline simd8<bool> operator<=(const simd8<uint8_t> other) const { return vcleq_u8(*this, other); }
|
||||
simdjson_really_inline simd8<bool> operator>=(const simd8<uint8_t> other) const { return vcgeq_u8(*this, other); }
|
||||
simdjson_really_inline simd8<bool> operator<(const simd8<uint8_t> other) const { return vcltq_u8(*this, other); }
|
||||
|
@ -232,7 +233,7 @@ simdjson_really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x
|
|||
|
||||
// Bit-specific operations
|
||||
simdjson_really_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const { return vtstq_u8(*this, bits); }
|
||||
simdjson_really_inline bool any_bits_set_anywhere() const { return this->max() != 0; }
|
||||
simdjson_really_inline bool any_bits_set_anywhere() const { return this->max_val() != 0; }
|
||||
simdjson_really_inline bool any_bits_set_anywhere(simd8<uint8_t> bits) const { return (*this & bits).any_bits_set_anywhere(); }
|
||||
template<int N>
|
||||
simdjson_really_inline simd8<uint8_t> shr() const { return vshrq_n_u8(*this, N); }
|
||||
|
@ -255,6 +256,9 @@ simdjson_really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x
|
|||
// sensible, but the AVX ISA makes this kind of approach difficult.
|
||||
template<typename L>
|
||||
simdjson_really_inline void compress(uint16_t mask, L * output) const {
|
||||
using internal::thintable_epi8;
|
||||
using internal::BitsSetTable256mul2;
|
||||
using internal::pshufb_combine_table;
|
||||
// this particular implementation was inspired by work done by @animetosho
|
||||
// we do it in two steps, first 8 bytes and then second 8 bytes
|
||||
uint8_t mask1 = uint8_t(mask); // least significant 8 bits
|
||||
|
@ -374,8 +378,8 @@ simdjson_really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x
|
|||
simdjson_really_inline simd8<int8_t>& operator-=(const simd8<int8_t> other) { *this = *this - other; return *this; }
|
||||
|
||||
// Order-sensitive comparisons
|
||||
simdjson_really_inline simd8<int8_t> max(const simd8<int8_t> other) const { return vmaxq_s8(*this, other); }
|
||||
simdjson_really_inline simd8<int8_t> min(const simd8<int8_t> other) const { return vminq_s8(*this, other); }
|
||||
simdjson_really_inline simd8<int8_t> max_val(const simd8<int8_t> other) const { return vmaxq_s8(*this, other); }
|
||||
simdjson_really_inline simd8<int8_t> min_val(const simd8<int8_t> other) const { return vminq_s8(*this, other); }
|
||||
simdjson_really_inline simd8<bool> operator>(const simd8<int8_t> other) const { return vcgtq_s8(*this, other); }
|
||||
simdjson_really_inline simd8<bool> operator<(const simd8<int8_t> other) const { return vcltq_s8(*this, other); }
|
||||
simdjson_really_inline simd8<bool> operator==(const simd8<int8_t> other) const { return vceqq_s8(*this, other); }
|
||||
|
@ -484,7 +488,8 @@ simdjson_really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x
|
|||
}; // struct simd8x64<T>
|
||||
|
||||
} // namespace simd
|
||||
} // namespace arm64
|
||||
} // unnamed namespace
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // namespace simdjson
|
||||
|
||||
#endif // SIMDJSON_ARM64_SIMD_H
|
|
@ -2,11 +2,12 @@
|
|||
#define SIMDJSON_ARM64_STRINGPARSING_H
|
||||
|
||||
#include "simdjson.h"
|
||||
#include "arm64/simd.h"
|
||||
#include "arm64/bitmanipulation.h"
|
||||
#include "simdjson/arm64/simd.h"
|
||||
#include "simdjson/arm64/bitmanipulation.h"
|
||||
|
||||
namespace simdjson {
|
||||
namespace SIMDJSON_IMPLEMENTATION {
|
||||
namespace {
|
||||
namespace arm64 {
|
||||
|
||||
using namespace simd;
|
||||
|
||||
|
@ -43,9 +44,10 @@ simdjson_really_inline backslash_and_quote backslash_and_quote::copy_and_find(co
|
|||
};
|
||||
}
|
||||
|
||||
} // namespace arm64
|
||||
} // unnamed namespace
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // namespace simdjson
|
||||
|
||||
#include "generic/stage2/stringparsing.h"
|
||||
#include "simdjson/generic/stringparsing.h"
|
||||
|
||||
#endif // SIMDJSON_ARM64_STRINGPARSING_H
|
|
@ -0,0 +1,34 @@
|
|||
#ifndef SIMDJSON_BUILTIN_H
|
||||
#define SIMDJSON_BUILTIN_H
|
||||
|
||||
#include "simdjson/portability.h"
|
||||
|
||||
#ifndef SIMDJSON_BUILTIN_IMPLEMENTATION
|
||||
#if SIMDJSON_CAN_ALWAYS_RUN_HASWELL
|
||||
#define SIMDJSON_BUILTIN_IMPLEMENTATION haswell
|
||||
#elif SIMDJSON_CAN_ALWAYS_RUN_WESTMERE
|
||||
#define SIMDJSON_BUILTIN_IMPLEMENTATION westmere
|
||||
#elif SIMDJSON_CAN_ALWAYS_RUN_ARM64
|
||||
#define SIMDJSON_BUILTIN_IMPLEMENTATION arm64
|
||||
#elif SIMDJSON_CAN_ALWAYS_RUN_FALLBACK
|
||||
#define SIMDJSON_BUILTIN_IMPLEMENTATION fallback
|
||||
#else
|
||||
#error "All possible implementations (including fallback) have been disabled! simdjson will not run."
|
||||
#endif
|
||||
#endif // SIMDJSON_BUILTIN_IMPLEMENTATION
|
||||
|
||||
namespace simdjson {
|
||||
/**
|
||||
* Represents the best statically linked simdjson implementation that can be used by the compiling
|
||||
* program.
|
||||
*
|
||||
* Detects what options the program is compiled against, and picks the minimum implementation that
|
||||
* will work on any computer that can run the program. For example, if you compile with g++
|
||||
* -march=westmere, it will pick the westmere implementation. The haswell implementation will
|
||||
* still be available, and can be selected at runtime, but the builtin implementation (and any
|
||||
* code that uses it) will use westmere.
|
||||
*/
|
||||
namespace builtin = SIMDJSON_BUILTIN_IMPLEMENTATION;
|
||||
} // namespace simdjson
|
||||
|
||||
#endif // SIMDJSON_BUILTIN_H
|
|
@ -205,8 +205,8 @@ namespace std {
|
|||
#endif // SIMDJSON_HAS_STRING_VIEW
|
||||
#undef SIMDJSON_HAS_STRING_VIEW // We are not going to need this macro anymore.
|
||||
|
||||
|
||||
|
||||
/// If EXPR is an error, returns it.
|
||||
#define SIMDJSON_TRY(EXPR) { auto _err = (EXPR); if (_err) { return _err; } }
|
||||
|
||||
|
||||
/**
|
||||
|
@ -253,5 +253,4 @@ namespace std {
|
|||
* End of the crazy locale headers.
|
||||
*/
|
||||
|
||||
|
||||
#endif // SIMDJSON_COMMON_DEFS_H
|
||||
|
|
|
@ -37,7 +37,6 @@ public:
|
|||
* Get the next value.
|
||||
*
|
||||
* Part of the std::iterator interface.
|
||||
*
|
||||
*/
|
||||
inline iterator& operator++() noexcept;
|
||||
/**
|
||||
|
|
|
@ -23,28 +23,28 @@ class element;
|
|||
static constexpr size_t DEFAULT_BATCH_SIZE = 1000000;
|
||||
|
||||
/**
|
||||
* A persistent document parser.
|
||||
*
|
||||
* The parser is designed to be reused, holding the internal buffers necessary to do parsing,
|
||||
* as well as memory for a single document. The parsed document is overwritten on each parse.
|
||||
*
|
||||
* This class cannot be copied, only moved, to avoid unintended allocations.
|
||||
*
|
||||
* @note This is not thread safe: one parser cannot produce two documents at the same time!
|
||||
*/
|
||||
* A persistent document parser.
|
||||
*
|
||||
* The parser is designed to be reused, holding the internal buffers necessary to do parsing,
|
||||
* as well as memory for a single document. The parsed document is overwritten on each parse.
|
||||
*
|
||||
* This class cannot be copied, only moved, to avoid unintended allocations.
|
||||
*
|
||||
* @note This is not thread safe: one parser cannot produce two documents at the same time!
|
||||
*/
|
||||
class parser {
|
||||
public:
|
||||
/**
|
||||
* Create a JSON parser.
|
||||
*
|
||||
* The new parser will have zero capacity.
|
||||
*
|
||||
* @param max_capacity The maximum document length the parser can automatically handle. The parser
|
||||
* will allocate more capacity on an as needed basis (when it sees documents too big to handle)
|
||||
* up to this amount. The parser still starts with zero capacity no matter what this number is:
|
||||
* to allocate an initial capacity, call allocate() after constructing the parser.
|
||||
* Defaults to SIMDJSON_MAXSIZE_BYTES (the largest single document simdjson can process).
|
||||
*/
|
||||
* Create a JSON parser.
|
||||
*
|
||||
* The new parser will have zero capacity.
|
||||
*
|
||||
* @param max_capacity The maximum document length the parser can automatically handle. The parser
|
||||
* will allocate more capacity on an as needed basis (when it sees documents too big to handle)
|
||||
* up to this amount. The parser still starts with zero capacity no matter what this number is:
|
||||
* to allocate an initial capacity, call allocate() after constructing the parser.
|
||||
* Defaults to SIMDJSON_MAXSIZE_BYTES (the largest single document simdjson can process).
|
||||
*/
|
||||
simdjson_really_inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept;
|
||||
/**
|
||||
* Take another parser's buffers and state.
|
||||
|
|
|
@ -67,11 +67,16 @@ simdjson_really_inline error_code simdjson_result_base<T>::error() const noexcep
|
|||
#if SIMDJSON_EXCEPTIONS
|
||||
|
||||
template<typename T>
|
||||
simdjson_really_inline T& simdjson_result_base<T>::value() noexcept(false) {
|
||||
simdjson_really_inline T& simdjson_result_base<T>::value() & noexcept(false) {
|
||||
if (error()) { throw simdjson_error(error()); }
|
||||
return this->first;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
simdjson_really_inline T&& simdjson_result_base<T>::value() && noexcept(false) {
|
||||
return std::forward<simdjson_result_base<T>>(*this).take_value();
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
simdjson_really_inline T&& simdjson_result_base<T>::take_value() && noexcept(false) {
|
||||
if (error()) { throw simdjson_error(error()); }
|
||||
|
@ -122,10 +127,15 @@ simdjson_really_inline error_code simdjson_result<T>::error() const noexcept {
|
|||
#if SIMDJSON_EXCEPTIONS
|
||||
|
||||
template<typename T>
|
||||
simdjson_really_inline T& simdjson_result<T>::value() noexcept(false) {
|
||||
simdjson_really_inline T& simdjson_result<T>::value() & noexcept(false) {
|
||||
return internal::simdjson_result_base<T>::value();
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
simdjson_really_inline T&& simdjson_result<T>::value() && noexcept(false) {
|
||||
return std::forward<internal::simdjson_result_base<T>>(*this).value();
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
simdjson_really_inline T&& simdjson_result<T>::take_value() && noexcept(false) {
|
||||
return std::forward<internal::simdjson_result_base<T>>(*this).take_value();
|
||||
|
|
|
@ -34,6 +34,7 @@ enum error_code {
|
|||
INVALID_JSON_POINTER, ///< Invalid JSON pointer reference
|
||||
INVALID_URI_FRAGMENT, ///< Invalid URI fragment
|
||||
UNEXPECTED_ERROR, ///< indicative of a bug in simdjson
|
||||
PARSER_IN_USE, ///< parser is already in use.
|
||||
/** @private Number of error codes */
|
||||
NUM_ERROR_CODES
|
||||
};
|
||||
|
@ -145,7 +146,14 @@ struct simdjson_result_base : public std::pair<T, error_code> {
|
|||
*
|
||||
* @throw simdjson_error if there was an error.
|
||||
*/
|
||||
simdjson_really_inline T& value() noexcept(false);
|
||||
simdjson_really_inline T& value() & noexcept(false);
|
||||
|
||||
/**
|
||||
* Take the result value (move it).
|
||||
*
|
||||
* @throw simdjson_error if there was an error.
|
||||
*/
|
||||
simdjson_really_inline T&& value() && noexcept(false);
|
||||
|
||||
/**
|
||||
* Take the result value (move it).
|
||||
|
@ -217,7 +225,14 @@ struct simdjson_result : public internal::simdjson_result_base<T> {
|
|||
*
|
||||
* @throw simdjson_error if there was an error.
|
||||
*/
|
||||
simdjson_really_inline T& value() noexcept(false);
|
||||
simdjson_really_inline T& value() & noexcept(false);
|
||||
|
||||
/**
|
||||
* Take the result value (move it).
|
||||
*
|
||||
* @throw simdjson_error if there was an error.
|
||||
*/
|
||||
simdjson_really_inline T&& value() && noexcept(false);
|
||||
|
||||
/**
|
||||
* Take the result value (move it).
|
||||
|
|
|
@ -0,0 +1,43 @@
|
|||
#ifndef SIMDJSON_FALLBACK_H
|
||||
#define SIMDJSON_FALLBACK_H
|
||||
|
||||
#include "simdjson/portability.h"
|
||||
|
||||
// Default Fallback to on unless a builtin implementation has already been selected.
|
||||
#ifndef SIMDJSON_IMPLEMENTATION_FALLBACK
|
||||
#define SIMDJSON_IMPLEMENTATION_FALLBACK 1 // (!SIMDJSON_CAN_ALWAYS_RUN_ARM64 && !SIMDJSON_CAN_ALWAYS_RUN_HASWELL && !SIMDJSON_CAN_ALWAYS_RUN_WESTMERE)
|
||||
#endif
|
||||
#define SIMDJSON_CAN_ALWAYS_RUN_FALLBACK SIMDJSON_IMPLEMENTATION_FALLBACK
|
||||
|
||||
#if SIMDJSON_IMPLEMENTATION_FALLBACK
|
||||
|
||||
namespace simdjson {
|
||||
/**
|
||||
* Fallback implementation (runs on any machine).
|
||||
*/
|
||||
namespace fallback {
|
||||
} // namespace fallback
|
||||
} // namespace simdjson
|
||||
|
||||
#include "simdjson/fallback/implementation.h"
|
||||
|
||||
#include "simdjson/fallback/begin.h"
|
||||
|
||||
// Declarations
|
||||
#include "simdjson/generic/dom_parser_implementation.h"
|
||||
#include "simdjson/fallback/bitmanipulation.h"
|
||||
#include "simdjson/generic/jsoncharutils.h"
|
||||
#include "simdjson/generic/atomparsing.h"
|
||||
#include "simdjson/fallback/stringparsing.h"
|
||||
#include "simdjson/fallback/numberparsing.h"
|
||||
#include "simdjson/generic/implementation_simdjson_result_base.h"
|
||||
#include "simdjson/generic/ondemand.h"
|
||||
|
||||
// Inline definitions
|
||||
#include "simdjson/generic/implementation_simdjson_result_base-inl.h"
|
||||
#include "simdjson/generic/ondemand-inl.h"
|
||||
|
||||
#include "simdjson/fallback/end.h"
|
||||
|
||||
#endif // SIMDJSON_IMPLEMENTATION_FALLBACK
|
||||
#endif // SIMDJSON_FALLBACK_H
|
|
@ -0,0 +1 @@
|
|||
#define SIMDJSON_IMPLEMENTATION fallback
|
|
@ -4,8 +4,9 @@
|
|||
#include "simdjson.h"
|
||||
#include <limits>
|
||||
|
||||
namespace simdjson {
|
||||
namespace SIMDJSON_IMPLEMENTATION {
|
||||
namespace {
|
||||
namespace fallback {
|
||||
|
||||
#if defined(_MSC_VER) && !defined(_M_ARM64) && !defined(_M_X64)
|
||||
static inline unsigned char _BitScanForward64(unsigned long* ret, uint64_t x) {
|
||||
|
@ -39,7 +40,8 @@ simdjson_really_inline int leading_zeroes(uint64_t input_num) {
|
|||
#endif// _MSC_VER
|
||||
}
|
||||
|
||||
} // namespace fallback
|
||||
} // unnamed namespace
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // namespace simdjson
|
||||
|
||||
#endif // SIMDJSON_FALLBACK_BITMANIPULATION_H
|
|
@ -1,14 +1,15 @@
|
|||
#ifndef SIMDJSON_FALLBACK_IMPLEMENTATION_H
|
||||
#define SIMDJSON_FALLBACK_IMPLEMENTATION_H
|
||||
|
||||
#include "simdjson.h"
|
||||
#include "isadetection.h"
|
||||
#include "simdjson/implementation.h"
|
||||
|
||||
namespace {
|
||||
namespace simdjson {
|
||||
namespace fallback {
|
||||
|
||||
namespace {
|
||||
using namespace simdjson;
|
||||
using namespace simdjson::dom;
|
||||
}
|
||||
|
||||
class implementation final : public simdjson::implementation {
|
||||
public:
|
||||
|
@ -27,6 +28,6 @@ public:
|
|||
};
|
||||
|
||||
} // namespace fallback
|
||||
} // unnamed namespace
|
||||
} // namespace simdjson
|
||||
|
||||
#endif // SIMDJSON_FALLBACK_IMPLEMENTATION_H
|
|
@ -8,8 +8,9 @@ void found_unsigned_integer(uint64_t result, const uint8_t *buf);
|
|||
void found_float(double result, const uint8_t *buf);
|
||||
#endif
|
||||
|
||||
namespace {
|
||||
namespace simdjson {
|
||||
namespace SIMDJSON_IMPLEMENTATION {
|
||||
namespace {
|
||||
static simdjson_really_inline uint32_t parse_eight_digits_unrolled(const char *chars) {
|
||||
uint32_t result = 0;
|
||||
for (int i=0;i<8;i++) {
|
||||
|
@ -21,10 +22,11 @@ static simdjson_really_inline uint32_t parse_eight_digits_unrolled(const uint8_t
|
|||
return parse_eight_digits_unrolled((const char *)chars);
|
||||
}
|
||||
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // unnamed namespace
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // namespace simdjson
|
||||
|
||||
#define SWAR_NUMBER_PARSING
|
||||
#include "generic/stage2/numberparsing.h"
|
||||
#include "simdjson/generic/numberparsing.h"
|
||||
|
||||
#endif // SIMDJSON_FALLBACK_NUMBERPARSING_H
|
|
@ -3,8 +3,9 @@
|
|||
|
||||
#include "simdjson.h"
|
||||
|
||||
namespace simdjson {
|
||||
namespace SIMDJSON_IMPLEMENTATION {
|
||||
namespace {
|
||||
namespace fallback {
|
||||
|
||||
// Holds backslashes and quotes locations.
|
||||
struct backslash_and_quote {
|
||||
|
@ -26,9 +27,10 @@ simdjson_really_inline backslash_and_quote backslash_and_quote::copy_and_find(co
|
|||
return { src[0] };
|
||||
}
|
||||
|
||||
} // namespace fallback
|
||||
} // unnamed namespace
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // namespace simdjson
|
||||
|
||||
#include "generic/stage2/stringparsing.h"
|
||||
#include "simdjson/generic/stringparsing.h"
|
||||
|
||||
#endif // SIMDJSON_FALLBACK_STRINGPARSING_H
|
|
@ -1,6 +1,7 @@
|
|||
namespace {
|
||||
namespace simdjson {
|
||||
namespace SIMDJSON_IMPLEMENTATION {
|
||||
namespace stage2 {
|
||||
namespace {
|
||||
/// @private
|
||||
namespace atomparsing {
|
||||
|
||||
// The string_to_uint32 is exclusively used to map literal strings to 32-bit values.
|
||||
|
@ -24,7 +25,7 @@ simdjson_really_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) {
|
|||
|
||||
SIMDJSON_WARN_UNUSED
|
||||
simdjson_really_inline bool is_valid_true_atom(const uint8_t *src) {
|
||||
return (str4ncmp(src, "true") | is_not_structural_or_whitespace(src[4])) == 0;
|
||||
return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0;
|
||||
}
|
||||
|
||||
SIMDJSON_WARN_UNUSED
|
||||
|
@ -36,7 +37,7 @@ simdjson_really_inline bool is_valid_true_atom(const uint8_t *src, size_t len) {
|
|||
|
||||
SIMDJSON_WARN_UNUSED
|
||||
simdjson_really_inline bool is_valid_false_atom(const uint8_t *src) {
|
||||
return (str4ncmp(src+1, "alse") | is_not_structural_or_whitespace(src[5])) == 0;
|
||||
return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0;
|
||||
}
|
||||
|
||||
SIMDJSON_WARN_UNUSED
|
||||
|
@ -48,7 +49,7 @@ simdjson_really_inline bool is_valid_false_atom(const uint8_t *src, size_t len)
|
|||
|
||||
SIMDJSON_WARN_UNUSED
|
||||
simdjson_really_inline bool is_valid_null_atom(const uint8_t *src) {
|
||||
return (str4ncmp(src, "null") | is_not_structural_or_whitespace(src[4])) == 0;
|
||||
return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0;
|
||||
}
|
||||
|
||||
SIMDJSON_WARN_UNUSED
|
||||
|
@ -59,6 +60,6 @@ simdjson_really_inline bool is_valid_null_atom(const uint8_t *src, size_t len) {
|
|||
}
|
||||
|
||||
} // namespace atomparsing
|
||||
} // namespace stage2
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // unnamed namespace
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // namespace simdjson
|
|
@ -0,0 +1,80 @@
|
|||
#include "simdjson.h"
|
||||
#include "simdjson/internal/isadetection.h"
|
||||
|
||||
namespace simdjson {
|
||||
namespace SIMDJSON_IMPLEMENTATION {
|
||||
|
||||
// expectation: sizeof(open_container) = 64/8.
|
||||
struct open_container {
|
||||
uint32_t tape_index; // where, on the tape, does the scope ([,{) begins
|
||||
uint32_t count; // how many elements in the scope
|
||||
}; // struct open_container
|
||||
|
||||
static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits");
|
||||
|
||||
class dom_parser_implementation final : public internal::dom_parser_implementation {
|
||||
public:
|
||||
/** Tape location of each open { or [ */
|
||||
std::unique_ptr<open_container[]> open_containers{};
|
||||
/** Whether each open container is a [ or { */
|
||||
std::unique_ptr<bool[]> is_array{};
|
||||
/** Buffer passed to stage 1 */
|
||||
const uint8_t *buf{};
|
||||
/** Length passed to stage 1 */
|
||||
size_t len{0};
|
||||
/** Document passed to stage 2 */
|
||||
dom::document *doc{};
|
||||
|
||||
inline dom_parser_implementation() noexcept;
|
||||
inline dom_parser_implementation(dom_parser_implementation &&other) noexcept;
|
||||
inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept;
|
||||
dom_parser_implementation(const dom_parser_implementation &) = delete;
|
||||
dom_parser_implementation &operator=(const dom_parser_implementation &) = delete;
|
||||
|
||||
SIMDJSON_WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final;
|
||||
SIMDJSON_WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, bool partial) noexcept final;
|
||||
SIMDJSON_WARN_UNUSED error_code check_for_unclosed_array() noexcept;
|
||||
SIMDJSON_WARN_UNUSED error_code stage2(dom::document &doc) noexcept final;
|
||||
SIMDJSON_WARN_UNUSED error_code stage2_next(dom::document &doc) noexcept final;
|
||||
inline SIMDJSON_WARN_UNUSED error_code set_capacity(size_t capacity) noexcept final;
|
||||
inline SIMDJSON_WARN_UNUSED error_code set_max_depth(size_t max_depth) noexcept final;
|
||||
private:
|
||||
simdjson_really_inline SIMDJSON_WARN_UNUSED error_code set_capacity_stage1(size_t capacity);
|
||||
|
||||
};
|
||||
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // namespace simdjson
|
||||
|
||||
namespace simdjson {
|
||||
namespace SIMDJSON_IMPLEMENTATION {
|
||||
|
||||
inline dom_parser_implementation::dom_parser_implementation() noexcept = default;
|
||||
inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default;
|
||||
inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default;
|
||||
|
||||
// Leaving these here so they can be inlined if so desired
|
||||
inline SIMDJSON_WARN_UNUSED error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept {
|
||||
// Stage 1 index output
|
||||
size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7;
|
||||
structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] );
|
||||
if (!structural_indexes) { _capacity = 0; return MEMALLOC; }
|
||||
structural_indexes[0] = 0;
|
||||
n_structural_indexes = 0;
|
||||
|
||||
_capacity = capacity;
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
inline SIMDJSON_WARN_UNUSED error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept {
|
||||
// Stage 2 stacks
|
||||
open_containers.reset(new (std::nothrow) open_container[max_depth]);
|
||||
is_array.reset(new (std::nothrow) bool[max_depth]);
|
||||
if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; }
|
||||
|
||||
_max_depth = max_depth;
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // namespace simdjson
|
|
@ -0,0 +1,78 @@
|
|||
namespace simdjson {
|
||||
namespace SIMDJSON_IMPLEMENTATION {
|
||||
|
||||
//
|
||||
// internal::implementation_simdjson_result_base<T> inline implementation
|
||||
//
|
||||
|
||||
/**
|
||||
* Create a new empty result with error = UNINITIALIZED.
|
||||
*/
|
||||
template<typename T>
|
||||
simdjson_really_inline implementation_simdjson_result_base<T>::~implementation_simdjson_result_base() noexcept {
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
simdjson_really_inline void implementation_simdjson_result_base<T>::tie(T &value, error_code &error) && noexcept {
|
||||
// on the clang compiler that comes with current macOS (Apple clang version 11.0.0),
|
||||
// tie(width, error) = size["w"].get<uint64_t>();
|
||||
// fails with "error: no viable overloaded '='""
|
||||
error = this->second;
|
||||
if (!error) {
|
||||
value = std::forward<implementation_simdjson_result_base<T>>(*this).first;
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code implementation_simdjson_result_base<T>::get(T &value) && noexcept {
|
||||
error_code error;
|
||||
std::forward<implementation_simdjson_result_base<T>>(*this).tie(value, error);
|
||||
return error;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
simdjson_really_inline error_code implementation_simdjson_result_base<T>::error() const noexcept {
|
||||
return this->second;
|
||||
}
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
|
||||
template<typename T>
|
||||
simdjson_really_inline T& implementation_simdjson_result_base<T>::value() & noexcept(false) {
|
||||
if (error()) { throw simdjson_error(error()); }
|
||||
return this->first;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
simdjson_really_inline T&& implementation_simdjson_result_base<T>::value() && noexcept(false) {
|
||||
return std::forward<implementation_simdjson_result_base<T>>(*this).take_value();
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
simdjson_really_inline T&& implementation_simdjson_result_base<T>::take_value() && noexcept(false) {
|
||||
if (error()) { throw simdjson_error(error()); }
|
||||
return std::forward<T>(this->first);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
simdjson_really_inline implementation_simdjson_result_base<T>::operator T&&() && noexcept(false) {
|
||||
return std::forward<implementation_simdjson_result_base<T>>(*this).take_value();
|
||||
}
|
||||
|
||||
#endif // SIMDJSON_EXCEPTIONS
|
||||
|
||||
template<typename T>
|
||||
simdjson_really_inline implementation_simdjson_result_base<T>::implementation_simdjson_result_base(T &&value, error_code error) noexcept
|
||||
: first{std::forward<T>(value)}, second{error} {}
|
||||
template<typename T>
|
||||
simdjson_really_inline implementation_simdjson_result_base<T>::implementation_simdjson_result_base(error_code error) noexcept
|
||||
: implementation_simdjson_result_base(T{}, error) {}
|
||||
template<typename T>
|
||||
simdjson_really_inline implementation_simdjson_result_base<T>::implementation_simdjson_result_base(T &&value) noexcept
|
||||
: implementation_simdjson_result_base(std::forward<T>(value), SUCCESS) {}
|
||||
template<typename T>
|
||||
simdjson_really_inline implementation_simdjson_result_base<T>::implementation_simdjson_result_base() noexcept
|
||||
: implementation_simdjson_result_base(T{}, UNINITIALIZED) {}
|
||||
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // namespace simdjson
|
|
@ -0,0 +1,122 @@
|
|||
namespace simdjson {
|
||||
namespace SIMDJSON_IMPLEMENTATION {
|
||||
|
||||
// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair
|
||||
// so we can avoid inlining errors
|
||||
// TODO reconcile these!
|
||||
/**
|
||||
* The result of a simdjson operation that could fail.
|
||||
*
|
||||
* Gives the option of reading error codes, or throwing an exception by casting to the desired result.
|
||||
*
|
||||
* This is a base class for implementations that want to add functions to the result type for
|
||||
* chaining.
|
||||
*
|
||||
* Override like:
|
||||
*
|
||||
* struct simdjson_result<T> : public internal::implementation_simdjson_result_base<T> {
|
||||
* simdjson_result() noexcept : internal::implementation_simdjson_result_base<T>() {}
|
||||
* simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base<T>(error) {}
|
||||
* simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base<T>(std::forward(value)) {}
|
||||
* simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base<T>(value, error) {}
|
||||
* // Your extra methods here
|
||||
* }
|
||||
*
|
||||
* Then any method returning simdjson_result<T> will be chainable with your methods.
|
||||
*/
|
||||
template<typename T>
|
||||
struct implementation_simdjson_result_base {
|
||||
|
||||
/**
|
||||
* Create a new empty result with error = UNINITIALIZED.
|
||||
*/
|
||||
simdjson_really_inline implementation_simdjson_result_base() noexcept;
|
||||
|
||||
/**
|
||||
* Create a new error result.
|
||||
*/
|
||||
simdjson_really_inline implementation_simdjson_result_base(error_code error) noexcept;
|
||||
|
||||
/**
|
||||
* Create a new successful result.
|
||||
*/
|
||||
simdjson_really_inline implementation_simdjson_result_base(T &&value) noexcept;
|
||||
|
||||
/**
|
||||
* Create a new result with both things (use if you don't want to branch when creating the result).
|
||||
*/
|
||||
simdjson_really_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept;
|
||||
|
||||
/**
|
||||
* Move a result.
|
||||
*/
|
||||
simdjson_really_inline implementation_simdjson_result_base(implementation_simdjson_result_base<T> &&value) noexcept = default;
|
||||
|
||||
/**
|
||||
* Copy a result.
|
||||
*/
|
||||
simdjson_really_inline implementation_simdjson_result_base(const implementation_simdjson_result_base<T> &value) = default;
|
||||
|
||||
/**
|
||||
* Create a new empty result with error = UNINITIALIZED.
|
||||
*/
|
||||
simdjson_really_inline ~implementation_simdjson_result_base() noexcept;
|
||||
|
||||
/**
|
||||
* Move the value and the error to the provided variables.
|
||||
*
|
||||
* @param value The variable to assign the value to. May not be set if there is an error.
|
||||
* @param error The variable to assign the error to. Set to SUCCESS if there is no error.
|
||||
*/
|
||||
simdjson_really_inline void tie(T &value, error_code &error) && noexcept;
|
||||
|
||||
/**
|
||||
* Move the value to the provided variable.
|
||||
*
|
||||
* @param value The variable to assign the value to. May not be set if there is an error.
|
||||
*/
|
||||
simdjson_really_inline error_code get(T &value) && noexcept;
|
||||
|
||||
/**
|
||||
* The error.
|
||||
*/
|
||||
simdjson_really_inline error_code error() const noexcept;
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
|
||||
/**
|
||||
* Get the result value.
|
||||
*
|
||||
* @throw simdjson_error if there was an error.
|
||||
*/
|
||||
simdjson_really_inline T& value() & noexcept(false);
|
||||
|
||||
/**
|
||||
* Take the result value (move it).
|
||||
*
|
||||
* @throw simdjson_error if there was an error.
|
||||
*/
|
||||
simdjson_really_inline T&& value() && noexcept(false);
|
||||
|
||||
/**
|
||||
* Take the result value (move it).
|
||||
*
|
||||
* @throw simdjson_error if there was an error.
|
||||
*/
|
||||
simdjson_really_inline T&& take_value() && noexcept(false);
|
||||
|
||||
/**
|
||||
* Cast to the value (will throw on error).
|
||||
*
|
||||
* @throw simdjson_error if there was an error.
|
||||
*/
|
||||
simdjson_really_inline operator T&&() && noexcept(false);
|
||||
|
||||
#endif // SIMDJSON_EXCEPTIONS
|
||||
|
||||
T first;
|
||||
error_code second;
|
||||
}; // struct implementation_simdjson_result_base
|
||||
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // namespace simdjson
|
|
@ -1,15 +1,19 @@
|
|||
namespace {
|
||||
#include "simdjson/internal/jsoncharutils_tables.h"
|
||||
#include "simdjson/internal/numberparsing_tables.h"
|
||||
|
||||
namespace simdjson {
|
||||
namespace SIMDJSON_IMPLEMENTATION {
|
||||
namespace stage2 {
|
||||
namespace {
|
||||
namespace jsoncharutils {
|
||||
|
||||
// return non-zero if not a structural or whitespace char
|
||||
// zero otherwise
|
||||
simdjson_really_inline uint32_t is_not_structural_or_whitespace(uint8_t c) {
|
||||
return structural_or_whitespace_negated[c];
|
||||
return internal::structural_or_whitespace_negated[c];
|
||||
}
|
||||
|
||||
simdjson_really_inline uint32_t is_structural_or_whitespace(uint8_t c) {
|
||||
return structural_or_whitespace[c];
|
||||
return internal::structural_or_whitespace[c];
|
||||
}
|
||||
|
||||
// returns a value with the high 16 bits set if not valid
|
||||
|
@ -20,10 +24,10 @@ simdjson_really_inline uint32_t is_structural_or_whitespace(uint8_t c) {
|
|||
// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/
|
||||
static inline uint32_t hex_to_u32_nocheck(
|
||||
const uint8_t *src) { // strictly speaking, static inline is a C-ism
|
||||
uint32_t v1 = digit_to_val32[630 + src[0]];
|
||||
uint32_t v2 = digit_to_val32[420 + src[1]];
|
||||
uint32_t v3 = digit_to_val32[210 + src[2]];
|
||||
uint32_t v4 = digit_to_val32[0 + src[3]];
|
||||
uint32_t v1 = internal::digit_to_val32[630 + src[0]];
|
||||
uint32_t v2 = internal::digit_to_val32[420 + src[1]];
|
||||
uint32_t v3 = internal::digit_to_val32[210 + src[2]];
|
||||
uint32_t v4 = internal::digit_to_val32[0 + src[3]];
|
||||
return v1 | v2 | v3 | v4;
|
||||
}
|
||||
|
||||
|
@ -86,6 +90,8 @@ static simdjson_really_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64
|
|||
}
|
||||
#endif
|
||||
|
||||
using internal::value128;
|
||||
|
||||
simdjson_really_inline value128 full_multiplication(uint64_t value1, uint64_t value2) {
|
||||
value128 answer;
|
||||
#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS)
|
||||
|
@ -104,6 +110,7 @@ simdjson_really_inline value128 full_multiplication(uint64_t value1, uint64_t va
|
|||
return answer;
|
||||
}
|
||||
|
||||
} // namespace stage2
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // namespace jsoncharutils
|
||||
} // unnamed namespace
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // namespace simdjson
|
|
@ -1,11 +1,20 @@
|
|||
#include "simdjson/internal/numberparsing_tables.h"
|
||||
#include <cmath>
|
||||
#include <limits>
|
||||
|
||||
namespace {
|
||||
namespace simdjson {
|
||||
namespace SIMDJSON_IMPLEMENTATION {
|
||||
namespace stage2 {
|
||||
namespace {
|
||||
/// @private
|
||||
namespace numberparsing {
|
||||
|
||||
using internal::FASTFLOAT_LARGEST_POWER;
|
||||
using internal::FASTFLOAT_SMALLEST_POWER;
|
||||
using internal::value128;
|
||||
using internal::power_of_ten;
|
||||
using internal::mantissa_64;
|
||||
using internal::mantissa_128;
|
||||
|
||||
#ifdef JSON_TEST_NUMBERS
|
||||
#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR)
|
||||
#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE)))
|
||||
|
@ -130,7 +139,7 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg
|
|||
// We want the most significant 64 bits of the product. We know
|
||||
// this will be non-zero because the most significant bit of i is
|
||||
// 1.
|
||||
value128 product = full_multiplication(i, factor_mantissa);
|
||||
value128 product = jsoncharutils::full_multiplication(i, factor_mantissa);
|
||||
uint64_t lower = product.low;
|
||||
uint64_t upper = product.high;
|
||||
|
||||
|
@ -147,7 +156,7 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg
|
|||
mantissa_128[power - FASTFLOAT_SMALLEST_POWER];
|
||||
// next, we compute the 64-bit x 128-bit multiplication, getting a 192-bit
|
||||
// result (three 64-bit values)
|
||||
product = full_multiplication(i, factor_mantissa_low);
|
||||
product = jsoncharutils::full_multiplication(i, factor_mantissa_low);
|
||||
uint64_t product_low = product.low;
|
||||
uint64_t product_middle2 = product.high;
|
||||
uint64_t product_middle1 = lower;
|
||||
|
@ -448,6 +457,10 @@ simdjson_really_inline error_code parse_number(const uint8_t *const, W &writer)
|
|||
return SUCCESS; // always succeeds
|
||||
}
|
||||
|
||||
SIMDJSON_UNUSED simdjson_really_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src) noexcept { return 0; }
|
||||
SIMDJSON_UNUSED simdjson_really_inline simdjson_result<int64_t> parse_integer(const uint8_t * const src) noexcept { return 0; }
|
||||
SIMDJSON_UNUSED simdjson_really_inline simdjson_result<double> parse_double(const uint8_t * const src) noexcept { return 0; }
|
||||
|
||||
#else
|
||||
|
||||
// parse the number at src
|
||||
|
@ -497,7 +510,7 @@ simdjson_really_inline error_code parse_number(const uint8_t *const src, W &writ
|
|||
SIMDJSON_TRY( parse_exponent(src, p, exponent) );
|
||||
}
|
||||
if (is_float) {
|
||||
const bool clean_end = is_structural_or_whitespace(*p);
|
||||
const bool clean_end = jsoncharutils::is_structural_or_whitespace(*p);
|
||||
SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) );
|
||||
if (!clean_end) { return INVALID_NUMBER(src); }
|
||||
return SUCCESS;
|
||||
|
@ -513,7 +526,7 @@ simdjson_really_inline error_code parse_number(const uint8_t *const src, W &writ
|
|||
// Anything negative above INT64_MAX+1 is invalid
|
||||
if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); }
|
||||
WRITE_INTEGER(~i+1, src, writer);
|
||||
if (!is_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); }
|
||||
if (!jsoncharutils::is_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); }
|
||||
return SUCCESS;
|
||||
// Positive overflow check:
|
||||
// - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
|
||||
|
@ -536,7 +549,7 @@ simdjson_really_inline error_code parse_number(const uint8_t *const src, W &writ
|
|||
} else {
|
||||
WRITE_INTEGER(negative ? (~i+1) : i, src, writer);
|
||||
}
|
||||
if (!is_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); }
|
||||
if (!jsoncharutils::is_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); }
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -557,7 +570,7 @@ SIMDJSON_UNUSED simdjson_really_inline simdjson_result<uint64_t> parse_unsigned(
|
|||
// If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
|
||||
int digit_count = int(p - start_digits);
|
||||
if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return NUMBER_ERROR; }
|
||||
if (!is_structural_or_whitespace(*p)) { return NUMBER_ERROR; }
|
||||
if (!jsoncharutils::is_structural_or_whitespace(*p)) { return NUMBER_ERROR; }
|
||||
|
||||
// The longest positive 64-bit number is 20 digits.
|
||||
// We do it this way so we don't trigger this branch unless we must.
|
||||
|
@ -629,7 +642,7 @@ SIMDJSON_UNUSED simdjson_really_inline simdjson_result<uint64_t> parse_large_uns
|
|||
while (parse_digit(*p, i)) { p++; }
|
||||
}
|
||||
|
||||
if (!is_structural_or_whitespace(*p)) { return NUMBER_ERROR; }
|
||||
if (!jsoncharutils::is_structural_or_whitespace(*p)) { return NUMBER_ERROR; }
|
||||
// If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
|
||||
int digit_count = int(p - src);
|
||||
if (digit_count == 0 || ('0' == *src && digit_count > 1)) { return NUMBER_ERROR; }
|
||||
|
@ -655,7 +668,7 @@ SIMDJSON_UNUSED simdjson_really_inline simdjson_result<int64_t> parse_integer(co
|
|||
// If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
|
||||
int digit_count = int(p - start_digits);
|
||||
if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return NUMBER_ERROR; }
|
||||
if (!is_structural_or_whitespace(*p)) { return NUMBER_ERROR; }
|
||||
if (!jsoncharutils::is_structural_or_whitespace(*p)) { return NUMBER_ERROR; }
|
||||
|
||||
// The longest negative 64-bit number is 19 digits.
|
||||
// The longest positive 64-bit number is 20 digits.
|
||||
|
@ -747,6 +760,8 @@ SIMDJSON_UNUSED simdjson_really_inline simdjson_result<double> parse_double(cons
|
|||
overflow = overflow || exponent < FASTFLOAT_SMALLEST_POWER || exponent > FASTFLOAT_LARGEST_POWER;
|
||||
}
|
||||
|
||||
if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; }
|
||||
|
||||
//
|
||||
// Assemble (or slow-parse) the float
|
||||
//
|
||||
|
@ -763,6 +778,6 @@ SIMDJSON_UNUSED simdjson_really_inline simdjson_result<double> parse_double(cons
|
|||
#endif // SIMDJSON_SKIPNUMBERPARSING
|
||||
|
||||
} // namespace numberparsing
|
||||
} // namespace stage2
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // unnamed namespace
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // namespace simdjson
|
|
@ -0,0 +1,12 @@
|
|||
#include "simdjson/generic/ondemand/logger-inl.h"
|
||||
#include "simdjson/generic/ondemand/raw_json_string-inl.h"
|
||||
#include "simdjson/generic/ondemand/token_iterator-inl.h"
|
||||
#include "simdjson/generic/ondemand/json_iterator-inl.h"
|
||||
#include "simdjson/generic/ondemand/array_iterator-inl.h"
|
||||
#include "simdjson/generic/ondemand/object_iterator-inl.h"
|
||||
#include "simdjson/generic/ondemand/array-inl.h"
|
||||
#include "simdjson/generic/ondemand/document-inl.h"
|
||||
#include "simdjson/generic/ondemand/value-inl.h"
|
||||
#include "simdjson/generic/ondemand/field-inl.h"
|
||||
#include "simdjson/generic/ondemand/object-inl.h"
|
||||
#include "simdjson/generic/ondemand/parser-inl.h"
|
|
@ -0,0 +1,24 @@
|
|||
namespace simdjson {
|
||||
namespace SIMDJSON_IMPLEMENTATION {
|
||||
/**
|
||||
* A fast, simple, DOM-like interface that parses JSON as you use it.
|
||||
*
|
||||
* Designed for maximum speed and a lower memory profile.
|
||||
*/
|
||||
namespace ondemand {
|
||||
} // namespace ondemand
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // namespace simdjson
|
||||
|
||||
#include "simdjson/generic/ondemand/logger.h"
|
||||
#include "simdjson/generic/ondemand/raw_json_string.h"
|
||||
#include "simdjson/generic/ondemand/token_iterator.h"
|
||||
#include "simdjson/generic/ondemand/json_iterator.h"
|
||||
#include "simdjson/generic/ondemand/array_iterator.h"
|
||||
#include "simdjson/generic/ondemand/object_iterator.h"
|
||||
#include "simdjson/generic/ondemand/array.h"
|
||||
#include "simdjson/generic/ondemand/document.h"
|
||||
#include "simdjson/generic/ondemand/value.h"
|
||||
#include "simdjson/generic/ondemand/field.h"
|
||||
#include "simdjson/generic/ondemand/object.h"
|
||||
#include "simdjson/generic/ondemand/parser.h"
|
|
@ -0,0 +1,120 @@
|
|||
namespace simdjson {
|
||||
namespace SIMDJSON_IMPLEMENTATION {
|
||||
namespace ondemand {
|
||||
|
||||
//
|
||||
// ### Live States
|
||||
//
|
||||
// While iterating or looking up values, depth >= iter->depth. at_start may vary. Error is
|
||||
// always SUCCESS:
|
||||
//
|
||||
// - Start: This is the state when the array is first found and the iterator is just past the `{`.
|
||||
// In this state, at_start == true.
|
||||
// - Next: After we hand a scalar value to the user, or an array/object which they then fully
|
||||
// iterate over, the iterator is at the `,` before the next value (or `]`). In this state,
|
||||
// depth == iter->depth, at_start == false, and error == SUCCESS.
|
||||
// - Unfinished Business: When we hand an array/object to the user which they do not fully
|
||||
// iterate over, we need to finish that iteration by skipping child values until we reach the
|
||||
// Next state. In this state, depth > iter->depth, at_start == false, and error == SUCCESS.
|
||||
//
|
||||
// ## Error States
|
||||
//
|
||||
// In error states, we will yield exactly one more value before stopping. iter->depth == depth
|
||||
// and at_start is always false. We decrement after yielding the error, moving to the Finished
|
||||
// state.
|
||||
//
|
||||
// - Chained Error: When the array iterator is part of an error chain--for example, in
|
||||
// `for (auto tweet : doc["tweets"])`, where the tweet element may be missing or not be an
|
||||
// array--we yield that error in the loop, exactly once. In this state, error != SUCCESS and
|
||||
// iter->depth == depth, and at_start == false. We decrement depth when we yield the error.
|
||||
// - Missing Comma Error: When the iterator ++ method discovers there is no comma between elements,
|
||||
// we flag that as an error and treat it exactly the same as a Chained Error. In this state,
|
||||
// error == TAPE_ERROR, iter->depth == depth, and at_start == false.
|
||||
//
|
||||
// ## Terminal State
|
||||
//
|
||||
// The terminal state has iter->depth < depth. at_start is always false.
|
||||
//
|
||||
// - Finished: When we have reached a `]` or have reported an error, we are finished. We signal this
|
||||
// by decrementing depth. In this state, iter->depth < depth, at_start == false, and
|
||||
// error == SUCCESS.
|
||||
//
|
||||
|
||||
simdjson_really_inline array::array(json_iterator_ref &&_iter) noexcept
|
||||
: iter{std::forward<json_iterator_ref>(_iter)}
|
||||
{
|
||||
}
|
||||
|
||||
simdjson_really_inline array::~array() noexcept {
|
||||
if (iter.is_alive()) {
|
||||
logger::log_event(*iter, "unfinished", "array");
|
||||
SIMDJSON_UNUSED auto _err = iter->skip_container();
|
||||
iter.release();
|
||||
}
|
||||
}
|
||||
|
||||
simdjson_really_inline simdjson_result<array> array::start(json_iterator_ref &&iter) noexcept {
|
||||
bool has_value;
|
||||
SIMDJSON_TRY( iter->start_array().get(has_value) );
|
||||
if (!has_value) { iter.release(); }
|
||||
return array(std::forward<json_iterator_ref>(iter));
|
||||
}
|
||||
simdjson_really_inline array array::started(json_iterator_ref &&iter) noexcept {
|
||||
if (!iter->started_array()) { iter.release(); }
|
||||
return array(std::forward<json_iterator_ref>(iter));
|
||||
}
|
||||
|
||||
//
|
||||
// For array_iterator
|
||||
//
|
||||
simdjson_really_inline json_iterator &array::get_iterator() noexcept {
|
||||
return *iter;
|
||||
}
|
||||
simdjson_really_inline json_iterator_ref array::borrow_iterator() noexcept {
|
||||
return iter.borrow();
|
||||
}
|
||||
simdjson_really_inline bool array::is_iterator_alive() const noexcept {
|
||||
return iter.is_alive();
|
||||
}
|
||||
simdjson_really_inline void array::iteration_finished() noexcept {
|
||||
iter.release();
|
||||
}
|
||||
|
||||
simdjson_really_inline array_iterator<array> array::begin() & noexcept {
|
||||
return *this;
|
||||
}
|
||||
simdjson_really_inline array_iterator<array> array::end() & noexcept {
|
||||
return {};
|
||||
}
|
||||
|
||||
} // namespace ondemand
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // namespace simdjson
|
||||
|
||||
namespace simdjson {
|
||||
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array>::simdjson_result(
|
||||
SIMDJSON_IMPLEMENTATION::ondemand::array &&value
|
||||
) noexcept
|
||||
: implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::array>(
|
||||
std::forward<SIMDJSON_IMPLEMENTATION::ondemand::array>(value)
|
||||
)
|
||||
{
|
||||
}
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array>::simdjson_result(
|
||||
error_code error
|
||||
) noexcept
|
||||
: implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::array>(error)
|
||||
{
|
||||
}
|
||||
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array_iterator<SIMDJSON_IMPLEMENTATION::ondemand::array>> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array>::begin() & noexcept {
|
||||
if (error()) { return error(); }
|
||||
return first.begin();
|
||||
}
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array_iterator<SIMDJSON_IMPLEMENTATION::ondemand::array>> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array>::end() & noexcept {
|
||||
if (error()) { return error(); }
|
||||
return first.end();
|
||||
}
|
||||
|
||||
} // namespace simdjson
|
|
@ -0,0 +1,113 @@
|
|||
#include "simdjson/error.h"
|
||||
|
||||
namespace simdjson {
|
||||
namespace SIMDJSON_IMPLEMENTATION {
|
||||
namespace ondemand {
|
||||
|
||||
class value;
|
||||
class document;
|
||||
|
||||
/**
|
||||
* A forward-only JSON array.
|
||||
*/
|
||||
class array {
|
||||
public:
|
||||
/**
|
||||
* Create a new invalid array.
|
||||
*
|
||||
* Exists so you can declare a variable and later assign to it before use.
|
||||
*/
|
||||
simdjson_really_inline array() noexcept = default;
|
||||
simdjson_really_inline array(array &&other) noexcept = default;
|
||||
simdjson_really_inline array &operator=(array &&other) noexcept = default;
|
||||
array(const array &) = delete;
|
||||
array &operator=(const array &) = delete;
|
||||
|
||||
/**
|
||||
* Finishes iterating the array if it is not already fully iterated.
|
||||
*/
|
||||
simdjson_really_inline ~array() noexcept;
|
||||
|
||||
/**
|
||||
* Begin array iteration.
|
||||
*
|
||||
* Part of the std::iterable interface.
|
||||
*/
|
||||
simdjson_really_inline array_iterator<array> begin() & noexcept;
|
||||
/**
|
||||
* Sentinel representing the end of the array.
|
||||
*
|
||||
* Part of the std::iterable interface.
|
||||
*/
|
||||
simdjson_really_inline array_iterator<array> end() & noexcept;
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Begin array iteration.
|
||||
*
|
||||
* @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the
|
||||
* resulting array.
|
||||
* @error INCORRECT_TYPE if the iterator is not at [.
|
||||
*/
|
||||
static simdjson_really_inline simdjson_result<array> start(json_iterator_ref &&iter) noexcept;
|
||||
/**
|
||||
* Begin array iteration.
|
||||
*
|
||||
* This version of the method should be called after the initial [ has been verified, and is
|
||||
* intended for use by switch statements that check the type of a value.
|
||||
*
|
||||
* @param iter The iterator. Must be after the initial [. Will be *moved* into the resulting array.
|
||||
*/
|
||||
static simdjson_really_inline array started(json_iterator_ref &&iter) noexcept;
|
||||
|
||||
/**
|
||||
* Create an array at the given Internal array creation. Call array::start() or array::started() instead of this.
|
||||
*
|
||||
* @param iter The iterator. Must either be at the start of the first element with iter.is_alive()
|
||||
* == true, or past the [] with is_alive() == false if the array is empty. Will be *moved*
|
||||
* into the resulting array.
|
||||
*/
|
||||
simdjson_really_inline array(json_iterator_ref &&iter) noexcept;
|
||||
|
||||
//
|
||||
// For array_iterator
|
||||
//
|
||||
simdjson_really_inline json_iterator &get_iterator() noexcept;
|
||||
simdjson_really_inline json_iterator_ref borrow_iterator() noexcept;
|
||||
simdjson_really_inline bool is_iterator_alive() const noexcept;
|
||||
simdjson_really_inline void iteration_finished() noexcept;
|
||||
|
||||
/**
|
||||
* Iterator marking current position.
|
||||
*
|
||||
* iter.is_alive() == false indicates iteration is complete.
|
||||
*/
|
||||
json_iterator_ref iter{};
|
||||
|
||||
friend class value;
|
||||
friend struct simdjson_result<value>;
|
||||
friend struct simdjson_result<array>;
|
||||
friend class array_iterator<array>;
|
||||
};
|
||||
|
||||
} // namespace ondemand
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // namespace simdjson
|
||||
|
||||
namespace simdjson {
|
||||
|
||||
template<>
|
||||
struct simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array> : public SIMDJSON_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::array> {
|
||||
public:
|
||||
simdjson_really_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::array &&value) noexcept; ///< @private
|
||||
simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private
|
||||
|
||||
simdjson_really_inline simdjson_result() noexcept = default;
|
||||
simdjson_really_inline simdjson_result(simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array> &&a) noexcept = default;
|
||||
simdjson_really_inline ~simdjson_result() noexcept = default; ///< @private
|
||||
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array_iterator<SIMDJSON_IMPLEMENTATION::ondemand::array>> begin() & noexcept;
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array_iterator<SIMDJSON_IMPLEMENTATION::ondemand::array>> end() & noexcept;
|
||||
};
|
||||
|
||||
} // namespace simdjson
|
|
@ -0,0 +1,81 @@
|
|||
namespace simdjson {
|
||||
namespace SIMDJSON_IMPLEMENTATION {
|
||||
namespace ondemand {
|
||||
|
||||
template<typename T>
|
||||
simdjson_really_inline array_iterator<T>::array_iterator(T &_iter) noexcept : iter{&_iter} {}
|
||||
|
||||
template<typename T>
|
||||
simdjson_really_inline simdjson_result<array_iterator<T>> array_iterator<T>::start(T &iter, const uint8_t *json) noexcept {
|
||||
bool has_value;
|
||||
SIMDJSON_TRY( iter.get_iterator().start_array(json).get(has_value) );
|
||||
if (!has_value) { iter.iteration_finished(); }
|
||||
return array_iterator<T>(iter);
|
||||
}
|
||||
template<typename T>
|
||||
simdjson_really_inline simdjson_result<value> array_iterator<T>::operator*() noexcept {
|
||||
error_code error = iter->get_iterator().error();
|
||||
if (error) { iter->iteration_finished(); return error; }
|
||||
return value::start(iter->borrow_iterator());
|
||||
}
|
||||
template<typename T>
|
||||
simdjson_really_inline bool array_iterator<T>::operator==(const array_iterator<T> &other) noexcept {
|
||||
return !(*this != other);
|
||||
}
|
||||
template<typename T>
|
||||
simdjson_really_inline bool array_iterator<T>::operator!=(const array_iterator<T> &) noexcept {
|
||||
return iter->is_iterator_alive();
|
||||
}
|
||||
template<typename T>
|
||||
simdjson_really_inline array_iterator<T> &array_iterator<T>::operator++() noexcept {
|
||||
// TODO this is a safety rail ... users should exit loops as soon as they receive an error.
|
||||
// Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free.
|
||||
if (!iter->is_iterator_alive()) { return *this; } // Iterator will be released if there is an error
|
||||
bool has_value;
|
||||
error_code error = iter->get_iterator().has_next_element().get(has_value); // If there's an error, has_next stays true.
|
||||
if (!(error || has_value)) { iter->iteration_finished(); }
|
||||
return *this;
|
||||
}
|
||||
|
||||
} // namespace ondemand
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // namespace simdjson
|
||||
|
||||
namespace simdjson {
|
||||
|
||||
template<typename T>
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array_iterator<T>>::simdjson_result(
|
||||
SIMDJSON_IMPLEMENTATION::ondemand::array_iterator<T> &&value
|
||||
) noexcept
|
||||
: SIMDJSON_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::array_iterator<T>>(std::forward<SIMDJSON_IMPLEMENTATION::ondemand::array_iterator<T>>(value))
|
||||
{
|
||||
}
|
||||
template<typename T>
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array_iterator<T>>::simdjson_result(error_code error) noexcept
|
||||
: SIMDJSON_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::array_iterator<T>>({}, error)
|
||||
{
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array_iterator<T>>::operator*() noexcept {
|
||||
if (this->error()) { this->second = SUCCESS; return this->error(); }
|
||||
return *this->first;
|
||||
}
|
||||
template<typename T>
|
||||
simdjson_really_inline bool simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array_iterator<T>>::operator==(const simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array_iterator<T>> &other) noexcept {
|
||||
if (this->error()) { return true; }
|
||||
return this->first == other.first;
|
||||
}
|
||||
template<typename T>
|
||||
simdjson_really_inline bool simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array_iterator<T>>::operator!=(const simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array_iterator<T>> &other) noexcept {
|
||||
if (this->error()) { return false; }
|
||||
return this->first != other.first;
|
||||
}
|
||||
template<typename T>
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array_iterator<T>> &simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array_iterator<T>>::operator++() noexcept {
|
||||
if (this->error()) { return *this; }
|
||||
++(this->first);
|
||||
return *this;
|
||||
}
|
||||
|
||||
} // namespace simdjson
|
|
@ -0,0 +1,99 @@
|
|||
#include "simdjson/error.h"
|
||||
|
||||
namespace simdjson {
|
||||
namespace SIMDJSON_IMPLEMENTATION {
|
||||
namespace ondemand {
|
||||
|
||||
class array;
|
||||
class value;
|
||||
class document;
|
||||
|
||||
/**
|
||||
* A forward-only JSON array.
|
||||
*
|
||||
* This is an input_iterator, meaning:
|
||||
* - It is forward-only
|
||||
* - * must be called exactly once per element.
|
||||
* - ++ must be called exactly once in between each * (*, ++, *, ++, * ...)
|
||||
*/
|
||||
template<typename T>
|
||||
class array_iterator {
|
||||
public:
|
||||
/** Create a new, invalid array iterator. */
|
||||
simdjson_really_inline array_iterator() noexcept = default;
|
||||
simdjson_really_inline array_iterator(const array_iterator<T> &a) noexcept = default;
|
||||
simdjson_really_inline array_iterator<T> &operator=(const array_iterator<T> &a) noexcept = default;
|
||||
|
||||
//
|
||||
// Iterator interface
|
||||
//
|
||||
|
||||
/**
|
||||
* Get the current element.
|
||||
*
|
||||
* Part of the std::iterator interface.
|
||||
*/
|
||||
simdjson_really_inline simdjson_result<value> operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION.
|
||||
/**
|
||||
* Check if we are at the end of the JSON.
|
||||
*
|
||||
* Part of the std::iterator interface.
|
||||
*
|
||||
* @return true if there are no more elements in the JSON array.
|
||||
*/
|
||||
simdjson_really_inline bool operator==(const array_iterator<T> &) noexcept;
|
||||
/**
|
||||
* Check if there are more elements in the JSON array.
|
||||
*
|
||||
* Part of the std::iterator interface.
|
||||
*
|
||||
* @return true if there are more elements in the JSON array.
|
||||
*/
|
||||
simdjson_really_inline bool operator!=(const array_iterator<T> &) noexcept;
|
||||
/**
|
||||
* Move to the next element.
|
||||
*
|
||||
* Part of the std::iterator interface.
|
||||
*/
|
||||
simdjson_really_inline array_iterator<T> &operator++() noexcept;
|
||||
|
||||
private:
|
||||
T *iter{};
|
||||
|
||||
simdjson_really_inline array_iterator(T &iter) noexcept;
|
||||
|
||||
static simdjson_really_inline simdjson_result<array_iterator<T>> start(T &iter, const uint8_t *json) noexcept;
|
||||
|
||||
friend T;
|
||||
friend class array;
|
||||
friend class value;
|
||||
friend struct simdjson_result<array_iterator<T>>;
|
||||
};
|
||||
|
||||
} // namespace ondemand
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // namespace simdjson
|
||||
|
||||
namespace simdjson {
|
||||
|
||||
template<typename T>
|
||||
struct simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array_iterator<T>> : public SIMDJSON_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::array_iterator<T>> {
|
||||
public:
|
||||
simdjson_really_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::array_iterator<T> &&value) noexcept; ///< @private
|
||||
simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private
|
||||
|
||||
simdjson_really_inline simdjson_result() noexcept = default;
|
||||
simdjson_really_inline simdjson_result(simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array_iterator<T>> &&a) noexcept = default;
|
||||
simdjson_really_inline ~simdjson_result() noexcept = default; ///< @private
|
||||
|
||||
//
|
||||
// Iterator interface
|
||||
//
|
||||
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value> operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION.
|
||||
simdjson_really_inline bool operator==(const simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array_iterator<T>> &) noexcept;
|
||||
simdjson_really_inline bool operator!=(const simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array_iterator<T>> &) noexcept;
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array_iterator<T>> &operator++() noexcept;
|
||||
};
|
||||
|
||||
} // namespace simdjson
|
|
@ -0,0 +1,276 @@
|
|||
namespace simdjson {
|
||||
namespace SIMDJSON_IMPLEMENTATION {
|
||||
namespace ondemand {
|
||||
|
||||
simdjson_really_inline document::document(ondemand::json_iterator &&_iter, const uint8_t *_json) noexcept
|
||||
: iter{std::forward<json_iterator>(_iter)},
|
||||
json{_json}
|
||||
{
|
||||
logger::log_start_value(iter, "document");
|
||||
}
|
||||
simdjson_really_inline document::~document() noexcept {
|
||||
if (iter.is_alive()) {
|
||||
logger::log_end_value(iter, "document");
|
||||
}
|
||||
}
|
||||
|
||||
simdjson_really_inline void document::assert_at_start() const noexcept {
|
||||
SIMDJSON_ASSUME(json != nullptr);
|
||||
}
|
||||
simdjson_really_inline document document::start(json_iterator &&iter) noexcept {
|
||||
auto json = iter.advance();
|
||||
return document(std::forward<json_iterator>(iter), json);
|
||||
}
|
||||
|
||||
simdjson_really_inline value document::as_value() noexcept {
|
||||
assert_at_start();
|
||||
return { iter.borrow(), json };
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
simdjson_result<T> document::consume_if_success(simdjson_result<T> &&result) noexcept {
|
||||
if (result.error()) { json = nullptr; }
|
||||
return std::forward<simdjson_result<T>>(result);
|
||||
}
|
||||
|
||||
simdjson_really_inline simdjson_result<array> document::get_array() & noexcept {
|
||||
assert_at_start();
|
||||
return consume_if_success( as_value().get_array() );
|
||||
}
|
||||
simdjson_really_inline simdjson_result<object> document::get_object() & noexcept {
|
||||
assert_at_start();
|
||||
return consume_if_success( as_value().get_object() );
|
||||
}
|
||||
simdjson_really_inline simdjson_result<uint64_t> document::get_uint64() noexcept {
|
||||
assert_at_start();
|
||||
return consume_if_success( iter.parse_root_uint64(json) );
|
||||
}
|
||||
simdjson_really_inline simdjson_result<int64_t> document::get_int64() noexcept {
|
||||
assert_at_start();
|
||||
return consume_if_success( iter.parse_root_int64(json) );
|
||||
}
|
||||
simdjson_really_inline simdjson_result<double> document::get_double() noexcept {
|
||||
assert_at_start();
|
||||
return consume_if_success( iter.parse_root_double(json) );
|
||||
}
|
||||
simdjson_really_inline simdjson_result<std::string_view> document::get_string() & noexcept {
|
||||
return consume_if_success( as_value().get_string() );
|
||||
}
|
||||
simdjson_really_inline simdjson_result<raw_json_string> document::get_raw_json_string() & noexcept {
|
||||
return consume_if_success( as_value().get_raw_json_string() );
|
||||
}
|
||||
simdjson_really_inline simdjson_result<bool> document::get_bool() noexcept {
|
||||
assert_at_start();
|
||||
return consume_if_success( iter.parse_root_bool(json) );
|
||||
}
|
||||
simdjson_really_inline bool document::is_null() noexcept {
|
||||
assert_at_start();
|
||||
if (iter.root_is_null(json)) { json = nullptr; return true; }
|
||||
return false;
|
||||
}
|
||||
|
||||
template<> simdjson_really_inline simdjson_result<array> document::get() & noexcept { return get_array(); }
|
||||
template<> simdjson_really_inline simdjson_result<object> document::get() & noexcept { return get_object(); }
|
||||
template<> simdjson_really_inline simdjson_result<raw_json_string> document::get() & noexcept { return get_raw_json_string(); }
|
||||
template<> simdjson_really_inline simdjson_result<std::string_view> document::get() & noexcept { return get_string(); }
|
||||
template<> simdjson_really_inline simdjson_result<double> document::get() & noexcept { return get_double(); }
|
||||
template<> simdjson_really_inline simdjson_result<uint64_t> document::get() & noexcept { return get_uint64(); }
|
||||
template<> simdjson_really_inline simdjson_result<int64_t> document::get() & noexcept { return get_int64(); }
|
||||
template<> simdjson_really_inline simdjson_result<bool> document::get() & noexcept { return get_bool(); }
|
||||
|
||||
template<> simdjson_really_inline simdjson_result<double> document::get() && noexcept { return std::forward<document>(*this).get_double(); }
|
||||
template<> simdjson_really_inline simdjson_result<uint64_t> document::get() && noexcept { return std::forward<document>(*this).get_uint64(); }
|
||||
template<> simdjson_really_inline simdjson_result<int64_t> document::get() && noexcept { return std::forward<document>(*this).get_int64(); }
|
||||
template<> simdjson_really_inline simdjson_result<bool> document::get() && noexcept { return std::forward<document>(*this).get_bool(); }
|
||||
|
||||
template<typename T> simdjson_really_inline error_code document::get(T &out) & noexcept {
|
||||
return get<T>().get(out);
|
||||
}
|
||||
template<typename T> simdjson_really_inline error_code document::get(T &out) && noexcept {
|
||||
return std::forward<document>(*this).get<T>().get(out);
|
||||
}
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
simdjson_really_inline document::operator array() & noexcept(false) { return get_array(); }
|
||||
simdjson_really_inline document::operator object() & noexcept(false) { return get_object(); }
|
||||
simdjson_really_inline document::operator uint64_t() noexcept(false) { return get_uint64(); }
|
||||
simdjson_really_inline document::operator int64_t() noexcept(false) { return get_int64(); }
|
||||
simdjson_really_inline document::operator double() noexcept(false) { return get_double(); }
|
||||
simdjson_really_inline document::operator std::string_view() & noexcept(false) { return get_string(); }
|
||||
simdjson_really_inline document::operator raw_json_string() & noexcept(false) { return get_raw_json_string(); }
|
||||
simdjson_really_inline document::operator bool() noexcept(false) { return get_bool(); }
|
||||
#endif
|
||||
|
||||
simdjson_really_inline simdjson_result<array_iterator<document>> document::begin() & noexcept {
|
||||
return array_iterator<document>::start(*this, json);
|
||||
}
|
||||
simdjson_really_inline simdjson_result<array_iterator<document>> document::end() & noexcept {
|
||||
return {};
|
||||
}
|
||||
simdjson_really_inline simdjson_result<value> document::operator[](std::string_view key) & noexcept {
|
||||
return get_object()[key];
|
||||
}
|
||||
simdjson_really_inline simdjson_result<value> document::operator[](const char *key) & noexcept {
|
||||
return get_object()[key];
|
||||
}
|
||||
|
||||
//
|
||||
// For array_iterator
|
||||
//
|
||||
simdjson_really_inline json_iterator &document::get_iterator() noexcept {
|
||||
return iter;
|
||||
}
|
||||
simdjson_really_inline json_iterator_ref document::borrow_iterator() noexcept {
|
||||
return iter.borrow();
|
||||
}
|
||||
simdjson_really_inline bool document::is_iterator_alive() const noexcept {
|
||||
return json;
|
||||
}
|
||||
simdjson_really_inline void document::iteration_finished() noexcept {
|
||||
json = nullptr;
|
||||
}
|
||||
|
||||
} // namespace ondemand
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // namespace simdjson
|
||||
|
||||
namespace simdjson {
|
||||
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::simdjson_result(
|
||||
SIMDJSON_IMPLEMENTATION::ondemand::document &&value
|
||||
) noexcept :
|
||||
implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::document>(
|
||||
std::forward<SIMDJSON_IMPLEMENTATION::ondemand::document>(value)
|
||||
)
|
||||
{
|
||||
}
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::simdjson_result(
|
||||
error_code error
|
||||
) noexcept :
|
||||
implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::document>(
|
||||
error
|
||||
)
|
||||
{
|
||||
}
|
||||
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array_iterator<SIMDJSON_IMPLEMENTATION::ondemand::document>> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::begin() & noexcept {
|
||||
if (error()) { return error(); }
|
||||
return first.begin();
|
||||
}
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array_iterator<SIMDJSON_IMPLEMENTATION::ondemand::document>> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::end() & noexcept {
|
||||
return {};
|
||||
}
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::operator[](std::string_view key) & noexcept {
|
||||
if (error()) { return error(); }
|
||||
return first[key];
|
||||
}
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::operator[](const char *key) & noexcept {
|
||||
if (error()) { return error(); }
|
||||
return first[key];
|
||||
}
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::get_array() & noexcept {
|
||||
if (error()) { return error(); }
|
||||
return first.get_array();
|
||||
}
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::object> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::get_object() & noexcept {
|
||||
if (error()) { return error(); }
|
||||
return first.get_object();
|
||||
}
|
||||
simdjson_really_inline simdjson_result<uint64_t> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::get_uint64() noexcept {
|
||||
if (error()) { return error(); }
|
||||
return first.get_uint64();
|
||||
}
|
||||
simdjson_really_inline simdjson_result<int64_t> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::get_int64() noexcept {
|
||||
if (error()) { return error(); }
|
||||
return first.get_int64();
|
||||
}
|
||||
simdjson_really_inline simdjson_result<double> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::get_double() noexcept {
|
||||
if (error()) { return error(); }
|
||||
return first.get_double();
|
||||
}
|
||||
simdjson_really_inline simdjson_result<std::string_view> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::get_string() & noexcept {
|
||||
if (error()) { return error(); }
|
||||
return first.get_string();
|
||||
}
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::get_raw_json_string() & noexcept {
|
||||
if (error()) { return error(); }
|
||||
return first.get_raw_json_string();
|
||||
}
|
||||
simdjson_really_inline simdjson_result<bool> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::get_bool() noexcept {
|
||||
if (error()) { return error(); }
|
||||
return first.get_bool();
|
||||
}
|
||||
simdjson_really_inline bool simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::is_null() noexcept {
|
||||
if (error()) { return error(); }
|
||||
return first.is_null();
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
simdjson_really_inline simdjson_result<T> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::get() & noexcept {
|
||||
if (error()) { return error(); }
|
||||
return first.get<T>();
|
||||
}
|
||||
template<typename T>
|
||||
simdjson_really_inline simdjson_result<T> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::get() && noexcept {
|
||||
if (error()) { return error(); }
|
||||
return std::forward<SIMDJSON_IMPLEMENTATION::ondemand::document>(first).get<T>();
|
||||
}
|
||||
template<typename T>
|
||||
simdjson_really_inline error_code simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::get(T &out) & noexcept {
|
||||
if (error()) { return error(); }
|
||||
return first.get<T>(out);
|
||||
}
|
||||
template<typename T>
|
||||
simdjson_really_inline error_code simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::get(T &out) && noexcept {
|
||||
if (error()) { return error(); }
|
||||
return std::forward<SIMDJSON_IMPLEMENTATION::ondemand::document>(first).get<T>(out);
|
||||
}
|
||||
|
||||
template<> simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::get<SIMDJSON_IMPLEMENTATION::ondemand::document>() & noexcept = delete;
|
||||
template<> simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::get<SIMDJSON_IMPLEMENTATION::ondemand::document>() && noexcept {
|
||||
if (error()) { return error(); }
|
||||
return std::forward<SIMDJSON_IMPLEMENTATION::ondemand::document>(first);
|
||||
}
|
||||
template<> simdjson_really_inline error_code simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::get<SIMDJSON_IMPLEMENTATION::ondemand::document>(SIMDJSON_IMPLEMENTATION::ondemand::document &out) & noexcept = delete;
|
||||
template<> simdjson_really_inline error_code simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::get<SIMDJSON_IMPLEMENTATION::ondemand::document>(SIMDJSON_IMPLEMENTATION::ondemand::document &out) && noexcept {
|
||||
if (error()) { return error(); }
|
||||
out = std::forward<SIMDJSON_IMPLEMENTATION::ondemand::document>(first);
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::operator SIMDJSON_IMPLEMENTATION::ondemand::array() & noexcept(false) {
|
||||
if (error()) { throw simdjson_error(error()); }
|
||||
return first;
|
||||
}
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::operator SIMDJSON_IMPLEMENTATION::ondemand::object() & noexcept(false) {
|
||||
if (error()) { throw simdjson_error(error()); }
|
||||
return first;
|
||||
}
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::operator uint64_t() noexcept(false) {
|
||||
if (error()) { throw simdjson_error(error()); }
|
||||
return first;
|
||||
}
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::operator int64_t() noexcept(false) {
|
||||
if (error()) { throw simdjson_error(error()); }
|
||||
return first;
|
||||
}
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::operator double() noexcept(false) {
|
||||
if (error()) { throw simdjson_error(error()); }
|
||||
return first;
|
||||
}
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::operator std::string_view() & noexcept(false) {
|
||||
if (error()) { throw simdjson_error(error()); }
|
||||
return first;
|
||||
}
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::operator SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string() & noexcept(false) {
|
||||
if (error()) { throw simdjson_error(error()); }
|
||||
return first;
|
||||
}
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document>::operator bool() noexcept(false) {
|
||||
if (error()) { throw simdjson_error(error()); }
|
||||
return first;
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace simdjson
|
|
@ -0,0 +1,323 @@
|
|||
#include "simdjson/error.h"
|
||||
|
||||
namespace simdjson {
|
||||
namespace SIMDJSON_IMPLEMENTATION {
|
||||
namespace ondemand {
|
||||
|
||||
class parser;
|
||||
class array;
|
||||
class object;
|
||||
class value;
|
||||
class raw_json_string;
|
||||
template<typename T> class array_iterator;
|
||||
|
||||
/**
|
||||
* A JSON document iteration.
|
||||
*
|
||||
* Used by tokens to get text, and string buffer location.
|
||||
*
|
||||
* You must keep the document around during iteration.
|
||||
*/
|
||||
class document {
|
||||
public:
|
||||
simdjson_really_inline document(document &&other) noexcept = default;
|
||||
simdjson_really_inline document &operator=(document &&other) noexcept = default;
|
||||
|
||||
/**
|
||||
* Create a new invalid document.
|
||||
*
|
||||
* Exists so you can declare a variable and later assign to it before use.
|
||||
*/
|
||||
simdjson_really_inline document() noexcept = default;
|
||||
simdjson_really_inline document(const document &other) = delete;
|
||||
simdjson_really_inline document &operator=(const document &other) = delete;
|
||||
/**
|
||||
* Finishes logging (if logging is enabled).
|
||||
*/
|
||||
simdjson_really_inline ~document() noexcept;
|
||||
|
||||
/**
|
||||
* Cast this JSON value to an array.
|
||||
*
|
||||
* @returns An object that can be used to iterate the array.
|
||||
* @returns INCORRECT_TYPE If the JSON value is not an array.
|
||||
*/
|
||||
simdjson_really_inline simdjson_result<array> get_array() & noexcept;
|
||||
/**
|
||||
* Cast this JSON value to an object.
|
||||
*
|
||||
* @returns An object that can be used to look up or iterate fields.
|
||||
* @returns INCORRECT_TYPE If the JSON value is not an object.
|
||||
*/
|
||||
simdjson_really_inline simdjson_result<object> get_object() & noexcept;
|
||||
/**
|
||||
* Cast this JSON value to an unsigned integer.
|
||||
*
|
||||
* @returns A signed 64-bit integer.
|
||||
* @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer.
|
||||
*/
|
||||
simdjson_really_inline simdjson_result<uint64_t> get_uint64() noexcept;
|
||||
/**
|
||||
* Cast this JSON value to a signed integer.
|
||||
*
|
||||
* @returns A signed 64-bit integer.
|
||||
* @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer.
|
||||
*/
|
||||
simdjson_really_inline simdjson_result<int64_t> get_int64() noexcept;
|
||||
/**
|
||||
* Cast this JSON value to a double.
|
||||
*
|
||||
* @returns A double.
|
||||
* @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number.
|
||||
*/
|
||||
simdjson_really_inline simdjson_result<double> get_double() noexcept;
|
||||
/**
|
||||
* Cast this JSON value to a string.
|
||||
*
|
||||
* The string is guaranteed to be valid UTF-8.
|
||||
*
|
||||
* Equivalent to get<std::string_view>().
|
||||
*
|
||||
* @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next
|
||||
* time it parses a document or when it is destroyed.
|
||||
* @returns INCORRECT_TYPE if the JSON value is not a string.
|
||||
*/
|
||||
simdjson_really_inline simdjson_result<std::string_view> get_string() & noexcept;
|
||||
/**
|
||||
* Cast this JSON value to a raw_json_string.
|
||||
*
|
||||
* The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n).
|
||||
*
|
||||
* @returns A pointer to the raw JSON for the given string.
|
||||
* @returns INCORRECT_TYPE if the JSON value is not a string.
|
||||
*/
|
||||
simdjson_really_inline simdjson_result<raw_json_string> get_raw_json_string() & noexcept;
|
||||
/**
|
||||
* Cast this JSON value to a bool.
|
||||
*
|
||||
* @returns A bool value.
|
||||
* @returns INCORRECT_TYPE if the JSON value is not true or false.
|
||||
*/
|
||||
simdjson_really_inline simdjson_result<bool> get_bool() noexcept;
|
||||
/**
|
||||
* Checks if this JSON value is null.
|
||||
*
|
||||
* @returns Whether the value is null.
|
||||
*/
|
||||
simdjson_really_inline bool is_null() noexcept;
|
||||
|
||||
/**
|
||||
* Get this value as the given type.
|
||||
*
|
||||
* Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool
|
||||
*
|
||||
* @returns A value of the given type, parsed from the JSON.
|
||||
* @returns INCORRECT_TYPE If the JSON value is not the given type.
|
||||
*/
|
||||
template<typename T> simdjson_really_inline simdjson_result<T> get() & noexcept;
|
||||
/** @overload template<typename T> simdjson_result<T> get() & noexcept */
|
||||
template<typename T> simdjson_really_inline simdjson_result<T> get() && noexcept;
|
||||
|
||||
/**
|
||||
* Get this value as the given type.
|
||||
*
|
||||
* Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool
|
||||
*
|
||||
* @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized.
|
||||
* @returns INCORRECT_TYPE If the JSON value is not an object.
|
||||
* @returns SUCCESS If the parse succeeded and the out parameter was set to the value.
|
||||
*/
|
||||
template<typename T> simdjson_really_inline error_code get(T &out) & noexcept;
|
||||
/** @overload template<typename T> error_code get(T &out) & noexcept */
|
||||
template<typename T> simdjson_really_inline error_code get(T &out) && noexcept;
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
/**
|
||||
* Cast this JSON value to an array.
|
||||
*
|
||||
* @returns An object that can be used to iterate the array.
|
||||
* @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array.
|
||||
*/
|
||||
simdjson_really_inline operator array() & noexcept(false);
|
||||
/**
|
||||
* Cast this JSON value to an object.
|
||||
*
|
||||
* @returns An object that can be used to look up or iterate fields.
|
||||
* @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object.
|
||||
*/
|
||||
simdjson_really_inline operator object() & noexcept(false);
|
||||
/**
|
||||
* Cast this JSON value to an unsigned integer.
|
||||
*
|
||||
* @returns A signed 64-bit integer.
|
||||
* @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer.
|
||||
*/
|
||||
simdjson_really_inline operator uint64_t() noexcept(false);
|
||||
/**
|
||||
* Cast this JSON value to a signed integer.
|
||||
*
|
||||
* @returns A signed 64-bit integer.
|
||||
* @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer.
|
||||
*/
|
||||
simdjson_really_inline operator int64_t() noexcept(false);
|
||||
/**
|
||||
* Cast this JSON value to a double.
|
||||
*
|
||||
* @returns A double.
|
||||
* @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number.
|
||||
*/
|
||||
simdjson_really_inline operator double() noexcept(false);
|
||||
/**
|
||||
* Cast this JSON value to a string.
|
||||
*
|
||||
* The string is guaranteed to be valid UTF-8.
|
||||
*
|
||||
* Equivalent to get<std::string_view>().
|
||||
*
|
||||
* @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next
|
||||
* time it parses a document or when it is destroyed.
|
||||
* @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string.
|
||||
*/
|
||||
simdjson_really_inline operator std::string_view() & noexcept(false);
|
||||
/**
|
||||
* Cast this JSON value to a raw_json_string.
|
||||
*
|
||||
* The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n).
|
||||
*
|
||||
* @returns A pointer to the raw JSON for the given string.
|
||||
* @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string.
|
||||
*/
|
||||
simdjson_really_inline operator raw_json_string() & noexcept(false);
|
||||
/**
|
||||
* Cast this JSON value to a bool.
|
||||
*
|
||||
* @returns A bool value.
|
||||
* @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false.
|
||||
*/
|
||||
simdjson_really_inline operator bool() noexcept(false);
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Begin array iteration.
|
||||
*
|
||||
* Part of the std::iterable interface.
|
||||
*/
|
||||
simdjson_really_inline simdjson_result<array_iterator<document>> begin() & noexcept;
|
||||
/**
|
||||
* Sentinel representing the end of the array.
|
||||
*
|
||||
* Part of the std::iterable interface.
|
||||
*/
|
||||
simdjson_really_inline simdjson_result<array_iterator<document>> end() & noexcept;
|
||||
|
||||
/**
|
||||
* Look up a field by name on an object.
|
||||
*
|
||||
* This method may only be called once on a given value. If you want to look up multiple fields,
|
||||
* you must first get the object using value.get_object() or object(value).
|
||||
*
|
||||
* @param key The key to look up.
|
||||
* @returns INCORRECT_TYPE If the JSON value is not an array.
|
||||
*/
|
||||
simdjson_really_inline simdjson_result<value> operator[](std::string_view key) & noexcept;
|
||||
/**
|
||||
* Look up a field by name on an object.
|
||||
*
|
||||
* This method may only be called once on a given value. If you want to look up multiple fields,
|
||||
* you must first get the object using value.get_object() or object(value).
|
||||
*
|
||||
* @param key The key to look up.
|
||||
* @returns INCORRECT_TYPE If the JSON value is not an array.
|
||||
*/
|
||||
simdjson_really_inline simdjson_result<value> operator[](const char *key) & noexcept;
|
||||
|
||||
protected:
|
||||
simdjson_really_inline document(ondemand::json_iterator &&iter, const uint8_t *json) noexcept;
|
||||
simdjson_really_inline const uint8_t *text(uint32_t idx) const noexcept;
|
||||
|
||||
simdjson_really_inline value as_value() noexcept;
|
||||
static simdjson_really_inline document start(ondemand::json_iterator &&iter) noexcept;
|
||||
/**
|
||||
* Set json to null if the result is successful.
|
||||
*
|
||||
* Convenience function for value-getters.
|
||||
*/
|
||||
template<typename T>
|
||||
simdjson_result<T> consume_if_success(simdjson_result<T> &&result) noexcept;
|
||||
|
||||
simdjson_really_inline void assert_at_start() const noexcept;
|
||||
|
||||
//
|
||||
// For array_iterator
|
||||
//
|
||||
simdjson_really_inline json_iterator &get_iterator() noexcept;
|
||||
simdjson_really_inline json_iterator_ref borrow_iterator() noexcept;
|
||||
simdjson_really_inline bool is_iterator_alive() const noexcept;
|
||||
simdjson_really_inline void iteration_finished() noexcept;
|
||||
|
||||
//
|
||||
// Fields
|
||||
//
|
||||
json_iterator iter{}; ///< Current position in the document
|
||||
const uint8_t *json{}; ///< JSON for the value in the document (nullptr if value has been consumed)
|
||||
|
||||
friend struct simdjson_result<document>;
|
||||
friend class array_iterator<document>;
|
||||
friend class value;
|
||||
friend class ondemand::parser;
|
||||
friend class object;
|
||||
friend class array;
|
||||
friend class field;
|
||||
friend class token;
|
||||
};
|
||||
|
||||
} // namespace ondemand
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // namespace simdjson
|
||||
|
||||
namespace simdjson {
|
||||
|
||||
template<>
|
||||
struct simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document> : public SIMDJSON_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::document> {
|
||||
public:
|
||||
simdjson_really_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::document &&value) noexcept; ///< @private
|
||||
simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private
|
||||
|
||||
simdjson_really_inline simdjson_result() noexcept = default;
|
||||
simdjson_really_inline simdjson_result(simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::document> &&a) noexcept = default;
|
||||
simdjson_really_inline ~simdjson_result() noexcept = default; ///< @private
|
||||
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array> get_array() & noexcept;
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::object> get_object() & noexcept;
|
||||
simdjson_really_inline simdjson_result<uint64_t> get_uint64() noexcept;
|
||||
simdjson_really_inline simdjson_result<int64_t> get_int64() noexcept;
|
||||
simdjson_really_inline simdjson_result<double> get_double() noexcept;
|
||||
simdjson_really_inline simdjson_result<std::string_view> get_string() & noexcept;
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string> get_raw_json_string() & noexcept;
|
||||
simdjson_really_inline simdjson_result<bool> get_bool() noexcept;
|
||||
simdjson_really_inline bool is_null() noexcept;
|
||||
|
||||
template<typename T> simdjson_really_inline simdjson_result<T> get() & noexcept;
|
||||
template<typename T> simdjson_really_inline simdjson_result<T> get() && noexcept;
|
||||
|
||||
template<typename T> simdjson_really_inline error_code get(T &out) & noexcept;
|
||||
template<typename T> simdjson_really_inline error_code get(T &out) && noexcept;
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
simdjson_really_inline operator SIMDJSON_IMPLEMENTATION::ondemand::array() & noexcept(false);
|
||||
simdjson_really_inline operator SIMDJSON_IMPLEMENTATION::ondemand::object() & noexcept(false);
|
||||
simdjson_really_inline operator uint64_t() noexcept(false);
|
||||
simdjson_really_inline operator int64_t() noexcept(false);
|
||||
simdjson_really_inline operator double() noexcept(false);
|
||||
simdjson_really_inline operator std::string_view() & noexcept(false);
|
||||
simdjson_really_inline operator SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string() & noexcept(false);
|
||||
simdjson_really_inline operator bool() noexcept(false);
|
||||
#endif
|
||||
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array_iterator<SIMDJSON_IMPLEMENTATION::ondemand::document>> begin() & noexcept;
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array_iterator<SIMDJSON_IMPLEMENTATION::ondemand::document>> end() & noexcept;
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value> operator[](std::string_view key) & noexcept;
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value> operator[](const char *key) & noexcept;
|
||||
};
|
||||
|
||||
} // namespace simdjson
|
|
@ -0,0 +1,66 @@
|
|||
namespace simdjson {
|
||||
namespace SIMDJSON_IMPLEMENTATION {
|
||||
namespace ondemand {
|
||||
|
||||
// clang 6 doesn't think the default constructor can be noexcept, so we make it explicit
|
||||
simdjson_really_inline field::field() noexcept : std::pair<raw_json_string, ondemand::value>() {}
|
||||
|
||||
simdjson_really_inline field::field(raw_json_string key, ondemand::value &&value) noexcept
|
||||
: std::pair<raw_json_string, ondemand::value>(key, std::forward<ondemand::value>(value))
|
||||
{
|
||||
}
|
||||
|
||||
simdjson_really_inline simdjson_result<field> field::start(json_iterator_ref &parent_iter) noexcept {
|
||||
raw_json_string key;
|
||||
SIMDJSON_TRY( parent_iter->field_key().get(key) );
|
||||
SIMDJSON_TRY( parent_iter->field_value() );
|
||||
return field::start(parent_iter.borrow(), key);
|
||||
}
|
||||
|
||||
simdjson_really_inline simdjson_result<field> field::start(json_iterator_ref &&iter, raw_json_string key) noexcept {
|
||||
return field(key, value::start(std::forward<json_iterator_ref>(iter)));
|
||||
}
|
||||
|
||||
simdjson_really_inline raw_json_string field::key() const noexcept {
|
||||
return first;
|
||||
}
|
||||
|
||||
simdjson_really_inline value &field::value() & noexcept {
|
||||
return second;
|
||||
}
|
||||
|
||||
simdjson_really_inline value field::value() && noexcept {
|
||||
return std::forward<field>(*this).second;
|
||||
}
|
||||
|
||||
} // namespace ondemand
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // namespace simdjson
|
||||
|
||||
namespace simdjson {
|
||||
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::field>::simdjson_result(
|
||||
SIMDJSON_IMPLEMENTATION::ondemand::field &&value
|
||||
) noexcept :
|
||||
implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::field>(
|
||||
std::forward<SIMDJSON_IMPLEMENTATION::ondemand::field>(value)
|
||||
)
|
||||
{
|
||||
}
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::field>::simdjson_result(
|
||||
error_code error
|
||||
) noexcept :
|
||||
implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::field>(error)
|
||||
{
|
||||
}
|
||||
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::field>::key() noexcept {
|
||||
if (error()) { return error(); }
|
||||
return first.key();
|
||||
}
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::field>::value() noexcept {
|
||||
if (error()) { return error(); }
|
||||
return std::move(first.value());
|
||||
}
|
||||
|
||||
} // namespace simdjson
|
|
@ -0,0 +1,69 @@
|
|||
#include "simdjson/error.h"
|
||||
|
||||
namespace simdjson {
|
||||
namespace SIMDJSON_IMPLEMENTATION {
|
||||
namespace ondemand {
|
||||
|
||||
/**
|
||||
* A JSON field (key/value pair) in an object.
|
||||
*
|
||||
* Returned from object iteration.
|
||||
*
|
||||
* Extends from std::pair<raw_json_string, value> so you can use C++ algorithms that rely on pairs.
|
||||
*/
|
||||
class field : public std::pair<raw_json_string, value> {
|
||||
public:
|
||||
/**
|
||||
* Create a new invalid field.
|
||||
*
|
||||
* Exists so you can declare a variable and later assign to it before use.
|
||||
*/
|
||||
simdjson_really_inline field() noexcept;
|
||||
|
||||
simdjson_really_inline field(field &&other) noexcept = default;
|
||||
simdjson_really_inline field &operator=(field &&other) noexcept = default;
|
||||
simdjson_really_inline field(const field &other) noexcept = delete;
|
||||
simdjson_really_inline field &operator=(const field &other) noexcept = delete;
|
||||
|
||||
/**
|
||||
* Get the key.
|
||||
*/
|
||||
simdjson_really_inline raw_json_string key() const noexcept;
|
||||
/**
|
||||
* Get the field value.
|
||||
*/
|
||||
simdjson_really_inline ondemand::value &value() & noexcept;
|
||||
/**
|
||||
* @overload ondemand::value &ondemand::value() & noexcept
|
||||
*/
|
||||
simdjson_really_inline ondemand::value value() && noexcept;
|
||||
|
||||
protected:
|
||||
simdjson_really_inline field(raw_json_string key, ondemand::value &&value) noexcept;
|
||||
static simdjson_really_inline simdjson_result<field> start(json_iterator_ref &iter) noexcept;
|
||||
static simdjson_really_inline simdjson_result<field> start(json_iterator_ref &&iter, raw_json_string key) noexcept;
|
||||
friend struct simdjson_result<field>;
|
||||
friend class object_iterator;
|
||||
};
|
||||
|
||||
} // namespace ondemand
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // namespace simdjson
|
||||
|
||||
namespace simdjson {
|
||||
|
||||
template<>
|
||||
struct simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::field> : public SIMDJSON_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::field> {
|
||||
public:
|
||||
simdjson_really_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::field &&value) noexcept; ///< @private
|
||||
simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private
|
||||
|
||||
simdjson_really_inline simdjson_result() noexcept = default;
|
||||
simdjson_really_inline simdjson_result(simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::field> &&a) noexcept = default;
|
||||
simdjson_really_inline ~simdjson_result() noexcept = default; ///< @private
|
||||
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string> key() noexcept;
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value> value() noexcept;
|
||||
};
|
||||
|
||||
} // namespace simdjson
|
|
@ -0,0 +1,456 @@
|
|||
namespace simdjson {
|
||||
namespace SIMDJSON_IMPLEMENTATION {
|
||||
namespace ondemand {
|
||||
|
||||
simdjson_really_inline json_iterator::json_iterator(json_iterator &&other) noexcept
|
||||
: token_iterator(std::forward<token_iterator>(other)),
|
||||
parser{other.parser},
|
||||
current_string_buf_loc{other.current_string_buf_loc}
|
||||
#ifdef SIMDJSON_ONDEMAND_SAFETY_RAILS
|
||||
, active_lease_depth{other.active_lease_depth}
|
||||
#endif
|
||||
{
|
||||
other.parser = nullptr;
|
||||
}
|
||||
simdjson_really_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept {
|
||||
buf = other.buf;
|
||||
index = other.index;
|
||||
parser = other.parser;
|
||||
current_string_buf_loc = other.current_string_buf_loc;
|
||||
#ifdef SIMDJSON_ONDEMAND_SAFETY_RAILS
|
||||
active_lease_depth = other.active_lease_depth;
|
||||
#endif
|
||||
other.parser = nullptr;
|
||||
return *this;
|
||||
}
|
||||
|
||||
simdjson_really_inline json_iterator::json_iterator(ondemand::parser *_parser) noexcept
|
||||
: token_iterator(_parser->dom_parser.buf, _parser->dom_parser.structural_indexes.get()),
|
||||
parser{_parser},
|
||||
current_string_buf_loc{parser->string_buf.get()}
|
||||
#ifdef SIMDJSON_ONDEMAND_SAFETY_RAILS
|
||||
, active_lease_depth{0}
|
||||
#endif
|
||||
{
|
||||
// Release the string buf so it can be reused by the next document
|
||||
logger::log_headers();
|
||||
}
|
||||
#ifdef SIMDJSON_ONDEMAND_SAFETY_RAILS
|
||||
simdjson_really_inline json_iterator::~json_iterator() noexcept {
|
||||
// If we have any leases out when we die, it's an error
|
||||
SIMDJSON_ASSUME(active_lease_depth == 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<bool> json_iterator::start_object(const uint8_t *json) noexcept {
|
||||
if (*json != '{') { logger::log_error(*this, "Not an object"); return INCORRECT_TYPE; }
|
||||
return started_object();
|
||||
}
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<bool> json_iterator::start_object() noexcept {
|
||||
return start_object(advance());
|
||||
}
|
||||
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline bool json_iterator::started_object() noexcept {
|
||||
if (*peek() == '}') {
|
||||
logger::log_value(*this, "empty object");
|
||||
advance();
|
||||
return false;
|
||||
}
|
||||
logger::log_start_value(*this, "object");
|
||||
return true;
|
||||
}
|
||||
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<bool> json_iterator::has_next_field() noexcept {
|
||||
switch (*advance()) {
|
||||
case '}':
|
||||
logger::log_end_value(*this, "object");
|
||||
return false;
|
||||
case ',':
|
||||
return true;
|
||||
default:
|
||||
return report_error(TAPE_ERROR, "Missing comma between object fields");
|
||||
}
|
||||
}
|
||||
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<bool> json_iterator::find_field_raw(const char *key) noexcept {
|
||||
bool has_next;
|
||||
do {
|
||||
raw_json_string actual_key;
|
||||
SIMDJSON_TRY( consume_raw_json_string().get(actual_key) );
|
||||
if (*advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); }
|
||||
if (actual_key == key) {
|
||||
logger::log_event(*this, "match", key);
|
||||
return true;
|
||||
}
|
||||
logger::log_event(*this, "non-match", key);
|
||||
SIMDJSON_TRY( skip() ); // Skip the value so we can look at the next key
|
||||
|
||||
SIMDJSON_TRY( has_next_field().get(has_next) );
|
||||
} while (has_next);
|
||||
logger::log_event(*this, "no matches", key);
|
||||
return false;
|
||||
}
|
||||
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<raw_json_string> json_iterator::field_key() noexcept {
|
||||
const uint8_t *key = advance();
|
||||
if (*(key++) != '"') { return report_error(TAPE_ERROR, "Object key is not a string"); }
|
||||
return raw_json_string(key);
|
||||
}
|
||||
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code json_iterator::field_value() noexcept {
|
||||
if (*advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); }
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<bool> json_iterator::start_array(const uint8_t *json) noexcept {
|
||||
if (*json != '[') { logger::log_error(*this, "Not an array"); return INCORRECT_TYPE; }
|
||||
return started_array();
|
||||
}
|
||||
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<bool> json_iterator::start_array() noexcept {
|
||||
return start_array(advance());
|
||||
}
|
||||
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline bool json_iterator::started_array() noexcept {
|
||||
if (*peek() == ']') {
|
||||
logger::log_value(*this, "empty array");
|
||||
advance();
|
||||
return false;
|
||||
}
|
||||
logger::log_start_value(*this, "array");
|
||||
return true;
|
||||
}
|
||||
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<bool> json_iterator::has_next_element() noexcept {
|
||||
switch (*advance()) {
|
||||
case ']':
|
||||
logger::log_end_value(*this, "array");
|
||||
return false;
|
||||
case ',':
|
||||
return true;
|
||||
default:
|
||||
return report_error(TAPE_ERROR, "Missing comma between array elements");
|
||||
}
|
||||
}
|
||||
|
||||
SIMDJSON_WARN_UNUSED simdjson_result<std::string_view> json_iterator::parse_string(const uint8_t *json) noexcept {
|
||||
return parse_raw_json_string(json).unescape(current_string_buf_loc);
|
||||
}
|
||||
SIMDJSON_WARN_UNUSED simdjson_result<std::string_view> json_iterator::consume_string() noexcept {
|
||||
return parse_string(advance());
|
||||
}
|
||||
SIMDJSON_WARN_UNUSED simdjson_result<raw_json_string> json_iterator::parse_raw_json_string(const uint8_t *json) noexcept {
|
||||
logger::log_value(*this, "string", "");
|
||||
if (*json != '"') { logger::log_error(*this, "Not a string"); return INCORRECT_TYPE; }
|
||||
return raw_json_string(json+1);
|
||||
}
|
||||
SIMDJSON_WARN_UNUSED simdjson_result<raw_json_string> json_iterator::consume_raw_json_string() noexcept {
|
||||
return parse_raw_json_string(advance());
|
||||
}
|
||||
SIMDJSON_WARN_UNUSED simdjson_result<uint64_t> json_iterator::parse_uint64(const uint8_t *json) noexcept {
|
||||
logger::log_value(*this, "uint64", "");
|
||||
return numberparsing::parse_unsigned(json);
|
||||
}
|
||||
SIMDJSON_WARN_UNUSED simdjson_result<uint64_t> json_iterator::consume_uint64() noexcept {
|
||||
return parse_uint64(advance());
|
||||
}
|
||||
SIMDJSON_WARN_UNUSED simdjson_result<int64_t> json_iterator::parse_int64(const uint8_t *json) noexcept {
|
||||
logger::log_value(*this, "int64", "");
|
||||
return numberparsing::parse_integer(json);
|
||||
}
|
||||
SIMDJSON_WARN_UNUSED simdjson_result<int64_t> json_iterator::consume_int64() noexcept {
|
||||
return parse_int64(advance());
|
||||
}
|
||||
SIMDJSON_WARN_UNUSED simdjson_result<double> json_iterator::parse_double(const uint8_t *json) noexcept {
|
||||
logger::log_value(*this, "double", "");
|
||||
return numberparsing::parse_double(json);
|
||||
}
|
||||
SIMDJSON_WARN_UNUSED simdjson_result<double> json_iterator::consume_double() noexcept {
|
||||
return parse_double(advance());
|
||||
}
|
||||
SIMDJSON_WARN_UNUSED simdjson_result<bool> json_iterator::parse_bool(const uint8_t *json) noexcept {
|
||||
logger::log_value(*this, "bool", "");
|
||||
auto not_true = atomparsing::str4ncmp(json, "true");
|
||||
auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e');
|
||||
bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]);
|
||||
if (error) { logger::log_error(*this, "Not a boolean"); return INCORRECT_TYPE; }
|
||||
return simdjson_result<bool>(!not_true);
|
||||
}
|
||||
SIMDJSON_WARN_UNUSED simdjson_result<bool> json_iterator::consume_bool() noexcept {
|
||||
return parse_bool(advance());
|
||||
}
|
||||
simdjson_really_inline bool json_iterator::is_null(const uint8_t *json) noexcept {
|
||||
if (!atomparsing::str4ncmp(json, "null")) {
|
||||
logger::log_value(*this, "null", "");
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
simdjson_really_inline bool json_iterator::is_null() noexcept {
|
||||
if (is_null(peek())) {
|
||||
advance();
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
template<int N>
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint8_t (&tmpbuf)[N]) noexcept {
|
||||
// Truncate whitespace to fit the buffer.
|
||||
auto len = peek_length(-1);
|
||||
if (len > N-1) {
|
||||
if (jsoncharutils::is_not_structural_or_whitespace(json[N])) { return false; }
|
||||
len = N-1;
|
||||
}
|
||||
|
||||
// Copy to the buffer.
|
||||
memcpy(tmpbuf, json, len);
|
||||
tmpbuf[len] = ' ';
|
||||
return true;
|
||||
}
|
||||
|
||||
constexpr const uint32_t MAX_INT_LENGTH = 1024;
|
||||
|
||||
SIMDJSON_WARN_UNUSED simdjson_result<uint64_t> json_iterator::parse_root_uint64(const uint8_t *json) noexcept {
|
||||
uint8_t tmpbuf[20+1]; // <20 digits> is the longest possible unsigned integer
|
||||
if (!copy_to_buffer(json, tmpbuf)) { logger::log_error(*this, "Root number more than 20 characters"); return NUMBER_ERROR; }
|
||||
logger::log_value(*this, "uint64", "");
|
||||
auto result = numberparsing::parse_unsigned(tmpbuf);
|
||||
if (result.error()) { logger::log_error(*this, "Error parsing unsigned integer"); return result.error(); }
|
||||
return result;
|
||||
}
|
||||
SIMDJSON_WARN_UNUSED simdjson_result<uint64_t> json_iterator::consume_root_uint64() noexcept {
|
||||
return parse_root_uint64(advance());
|
||||
}
|
||||
SIMDJSON_WARN_UNUSED simdjson_result<int64_t> json_iterator::parse_root_int64(const uint8_t *json) noexcept {
|
||||
uint8_t tmpbuf[20+1]; // -<19 digits> is the longest possible integer
|
||||
if (!copy_to_buffer(json, tmpbuf)) { logger::log_error(*this, "Root number more than 20 characters"); return NUMBER_ERROR; }
|
||||
logger::log_value(*this, "int64", "");
|
||||
auto result = numberparsing::parse_integer(tmpbuf);
|
||||
if (result.error()) { report_error(result.error(), "Error parsing integer"); }
|
||||
return result;
|
||||
}
|
||||
SIMDJSON_WARN_UNUSED simdjson_result<int64_t> json_iterator::consume_root_int64() noexcept {
|
||||
return parse_root_int64(advance());
|
||||
}
|
||||
SIMDJSON_WARN_UNUSED simdjson_result<double> json_iterator::parse_root_double(const uint8_t *json) noexcept {
|
||||
// Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest number: -0.<fraction>e-308.
|
||||
uint8_t tmpbuf[1074+8+1];
|
||||
if (!copy_to_buffer(json, tmpbuf)) { logger::log_error(*this, "Root number more than 1082 characters"); return NUMBER_ERROR; }
|
||||
logger::log_value(*this, "double", "");
|
||||
auto result = numberparsing::parse_double(tmpbuf);
|
||||
if (result.error()) { report_error(result.error(), "Error parsing double"); }
|
||||
return result;
|
||||
}
|
||||
SIMDJSON_WARN_UNUSED simdjson_result<double> json_iterator::consume_root_double() noexcept {
|
||||
return parse_root_double(advance());
|
||||
}
|
||||
SIMDJSON_WARN_UNUSED simdjson_result<bool> json_iterator::parse_root_bool(const uint8_t *json) noexcept {
|
||||
uint8_t tmpbuf[5+1];
|
||||
if (!copy_to_buffer(json, tmpbuf)) { logger::log_error(*this, "Not a boolean"); return INCORRECT_TYPE; }
|
||||
return parse_bool(tmpbuf);
|
||||
}
|
||||
SIMDJSON_WARN_UNUSED simdjson_result<bool> json_iterator::consume_root_bool() noexcept {
|
||||
return parse_root_bool(advance());
|
||||
}
|
||||
simdjson_really_inline bool json_iterator::root_is_null(const uint8_t *json) noexcept {
|
||||
uint8_t tmpbuf[4+1];
|
||||
if (!copy_to_buffer(json, tmpbuf)) { return false; }
|
||||
return is_null(tmpbuf);
|
||||
}
|
||||
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code json_iterator::skip() noexcept {
|
||||
switch (*advance()) {
|
||||
// PERF TODO does it skip the depth check when we don't decrement depth?
|
||||
case '[': case '{':
|
||||
logger::log_start_value(*this, "skip");
|
||||
return skip_container();
|
||||
default:
|
||||
logger::log_value(*this, "skip", "");
|
||||
return SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code json_iterator::skip_container() noexcept {
|
||||
uint32_t depth = 1;
|
||||
// The loop breaks only when depth-- happens.
|
||||
auto end = &parser->dom_parser.structural_indexes[parser->dom_parser.n_structural_indexes];
|
||||
while (index <= end) {
|
||||
uint8_t ch = *advance();
|
||||
switch (ch) {
|
||||
// TODO consider whether matching braces is a requirement: if non-matching braces indicates
|
||||
// *missing* braces, then future lookups are not in the object/arrays they think they are,
|
||||
// violating the rule "validate enough structure that the user can be confident they are
|
||||
// looking at the right values."
|
||||
case ']': case '}':
|
||||
logger::log_end_value(*this, "skip");
|
||||
depth--;
|
||||
if (depth == 0) { logger::log_event(*this, "end skip", ""); return SUCCESS; }
|
||||
break;
|
||||
// PERF TODO does it skip the depth check when we don't decrement depth?
|
||||
case '[': case '{':
|
||||
logger::log_start_value(*this, "skip");
|
||||
depth++;
|
||||
break;
|
||||
default:
|
||||
logger::log_value(*this, "skip", "");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return report_error(TAPE_ERROR, "not enough close braces");
|
||||
}
|
||||
|
||||
simdjson_really_inline bool json_iterator::at_start() const noexcept {
|
||||
return index == parser->dom_parser.structural_indexes.get();
|
||||
}
|
||||
|
||||
simdjson_really_inline bool json_iterator::at_eof() const noexcept {
|
||||
return index == &parser->dom_parser.structural_indexes[parser->dom_parser.n_structural_indexes];
|
||||
}
|
||||
|
||||
simdjson_really_inline bool json_iterator::is_alive() const noexcept {
|
||||
return parser;
|
||||
}
|
||||
|
||||
|
||||
simdjson_really_inline json_iterator_ref json_iterator::borrow() noexcept {
|
||||
#ifdef SIMDJSON_ONDEMAND_SAFETY_RAILS
|
||||
SIMDJSON_ASSUME(active_lease_depth == 0);
|
||||
const uint32_t child_depth = 1;
|
||||
active_lease_depth = child_depth;
|
||||
return json_iterator_ref(this, child_depth);
|
||||
#else
|
||||
return json_iterator_ref(this);
|
||||
#endif
|
||||
}
|
||||
|
||||
simdjson_really_inline error_code json_iterator::report_error(error_code error, const char *message) noexcept {
|
||||
SIMDJSON_ASSUME(error != SUCCESS && error != UNINITIALIZED && error != INCORRECT_TYPE && error != NO_SUCH_FIELD);
|
||||
logger::log_error(*this, message);
|
||||
_error = error;
|
||||
return error;
|
||||
}
|
||||
simdjson_really_inline error_code json_iterator::error() const noexcept {
|
||||
return _error;
|
||||
}
|
||||
|
||||
//
|
||||
// json_iterator_ref
|
||||
//
|
||||
simdjson_really_inline json_iterator_ref::json_iterator_ref(json_iterator_ref &&other) noexcept
|
||||
: iter{other.iter}
|
||||
#ifdef SIMDJSON_ONDEMAND_SAFETY_RAILS
|
||||
, lease_depth{other.lease_depth}
|
||||
#endif // SIMDJSON_ONDEMAND_SAFETY_RAILS
|
||||
{
|
||||
other.iter = nullptr;
|
||||
}
|
||||
simdjson_really_inline json_iterator_ref &json_iterator_ref::operator=(json_iterator_ref &&other) noexcept {
|
||||
assert_is_not_active();
|
||||
iter = other.iter;
|
||||
#ifdef SIMDJSON_ONDEMAND_SAFETY_RAILS
|
||||
lease_depth = other.lease_depth;
|
||||
#endif // SIMDJSON_ONDEMAND_SAFETY_RAILS
|
||||
other.iter = nullptr;
|
||||
return *this;
|
||||
}
|
||||
|
||||
#ifdef SIMDJSON_ONDEMAND_SAFETY_RAILS
|
||||
simdjson_really_inline json_iterator_ref::~json_iterator_ref() noexcept {
|
||||
// The caller MUST consume their value and release the iterator before they die
|
||||
assert_is_not_active();
|
||||
}
|
||||
simdjson_really_inline json_iterator_ref::json_iterator_ref(
|
||||
json_iterator *_iter,
|
||||
uint32_t _lease_depth
|
||||
) noexcept : iter{_iter}, lease_depth{_lease_depth}
|
||||
{
|
||||
assert_is_active();
|
||||
}
|
||||
#else
|
||||
simdjson_really_inline json_iterator_ref::json_iterator_ref(
|
||||
json_iterator *_iter
|
||||
) noexcept : iter{_iter}
|
||||
{
|
||||
assert_is_active();
|
||||
}
|
||||
#endif // SIMDJSON_ONDEMAND_SAFETY_RAILS
|
||||
|
||||
simdjson_really_inline json_iterator_ref json_iterator_ref::borrow() noexcept {
|
||||
assert_is_active();
|
||||
#ifdef SIMDJSON_ONDEMAND_SAFETY_RAILS
|
||||
const uint32_t child_depth = lease_depth + 1;
|
||||
iter->active_lease_depth = child_depth;
|
||||
return json_iterator_ref(iter, child_depth);
|
||||
#else
|
||||
return json_iterator_ref(iter);
|
||||
#endif
|
||||
}
|
||||
simdjson_really_inline void json_iterator_ref::release() noexcept {
|
||||
assert_is_active();
|
||||
#ifdef SIMDJSON_ONDEMAND_SAFETY_RAILS
|
||||
iter->active_lease_depth = lease_depth - 1;
|
||||
#endif
|
||||
iter = nullptr;
|
||||
}
|
||||
|
||||
simdjson_really_inline json_iterator *json_iterator_ref::operator->() noexcept {
|
||||
assert_is_active();
|
||||
return iter;
|
||||
}
|
||||
simdjson_really_inline json_iterator &json_iterator_ref::operator*() noexcept {
|
||||
assert_is_active();
|
||||
return *iter;
|
||||
}
|
||||
simdjson_really_inline const json_iterator &json_iterator_ref::operator*() const noexcept {
|
||||
assert_is_active();
|
||||
return *iter;
|
||||
}
|
||||
|
||||
simdjson_really_inline bool json_iterator_ref::is_alive() const noexcept {
|
||||
return iter != nullptr;
|
||||
}
|
||||
simdjson_really_inline bool json_iterator_ref::is_active() const noexcept {
|
||||
#ifdef SIMDJSON_ONDEMAND_SAFETY_RAILS
|
||||
return is_alive() && lease_depth == iter->active_lease_depth;
|
||||
#else
|
||||
return is_alive();
|
||||
#endif
|
||||
}
|
||||
simdjson_really_inline void json_iterator_ref::assert_is_active() const noexcept {
|
||||
// We don't call const functions because VC++ is worried they might have side effects in __assume
|
||||
#ifdef SIMDJSON_ONDEMAND_SAFETY_RAILS
|
||||
SIMDJSON_ASSUME(iter != nullptr && lease_depth == iter->active_lease_depth);
|
||||
#else
|
||||
SIMDJSON_ASSUME(iter != nullptr);
|
||||
#endif
|
||||
}
|
||||
simdjson_really_inline void json_iterator_ref::assert_is_not_active() const noexcept {
|
||||
// We don't call const functions because VC++ is worried they might have side effects in __assume
|
||||
#ifdef SIMDJSON_ONDEMAND_SAFETY_RAILS
|
||||
SIMDJSON_ASSUME(!(iter != nullptr && lease_depth == iter->active_lease_depth));
|
||||
#else
|
||||
SIMDJSON_ASSUME(!(iter != nullptr));
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
} // namespace ondemand
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // namespace simdjson
|
||||
|
||||
namespace simdjson {
|
||||
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::json_iterator>::simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::json_iterator &&value) noexcept
|
||||
: implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::json_iterator>(std::forward<SIMDJSON_IMPLEMENTATION::ondemand::json_iterator>(value)) {}
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::json_iterator>::simdjson_result(error_code error) noexcept
|
||||
: implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::json_iterator>(error) {}
|
||||
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::json_iterator_ref>::simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::json_iterator_ref &&value) noexcept
|
||||
: implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::json_iterator_ref>(std::forward<SIMDJSON_IMPLEMENTATION::ondemand::json_iterator_ref>(value)) {}
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::json_iterator_ref>::simdjson_result(error_code error) noexcept
|
||||
: implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::json_iterator_ref>(error) {}
|
||||
|
||||
} // namespace simdjson
|
|
@ -0,0 +1,297 @@
|
|||
namespace simdjson {
|
||||
namespace SIMDJSON_IMPLEMENTATION {
|
||||
namespace ondemand {
|
||||
|
||||
class document;
|
||||
class object;
|
||||
class array;
|
||||
class value;
|
||||
class raw_json_string;
|
||||
class parser;
|
||||
class json_iterator_ref;
|
||||
|
||||
/**
|
||||
* Iterates through JSON, with structure-sensitive algorithms.
|
||||
*
|
||||
* @private This is not intended for external use.
|
||||
*/
|
||||
class json_iterator : public token_iterator {
|
||||
public:
|
||||
simdjson_really_inline json_iterator() noexcept = default;
|
||||
simdjson_really_inline json_iterator(json_iterator &&other) noexcept;
|
||||
simdjson_really_inline json_iterator &operator=(json_iterator &&other) noexcept;
|
||||
#ifdef SIMDJSON_ONDEMAND_SAFETY_RAILS
|
||||
simdjson_really_inline ~json_iterator() noexcept;
|
||||
#else
|
||||
simdjson_really_inline ~json_iterator() noexcept = default;
|
||||
#endif
|
||||
simdjson_really_inline json_iterator(const json_iterator &other) noexcept = delete;
|
||||
simdjson_really_inline json_iterator &operator=(const json_iterator &other) noexcept = delete;
|
||||
|
||||
/**
|
||||
* Check for an opening { and start an object iteration.
|
||||
*
|
||||
* @param json A pointer to the potential {
|
||||
* @returns Whether the object had any fields (returns false for empty).
|
||||
* @error INCORRECT_TYPE if there is no opening {
|
||||
*/
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<bool> start_object(const uint8_t *json) noexcept;
|
||||
/**
|
||||
* Check for an opening { and start an object iteration.
|
||||
*
|
||||
* @returns Whether the object had any fields (returns false for empty).
|
||||
* @error INCORRECT_TYPE if there is no opening {
|
||||
*/
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<bool> start_object() noexcept;
|
||||
|
||||
/**
|
||||
* Start an object iteration after the user has already checked and moved past the {.
|
||||
*
|
||||
* Does not move the iterator.
|
||||
*
|
||||
* @returns Whether the object had any fields (returns false for empty).
|
||||
*/
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline bool started_object() noexcept;
|
||||
|
||||
/**
|
||||
* Moves to the next field in an object.
|
||||
*
|
||||
* Looks for , and }. If } is found, the object is finished and the iterator advances past it.
|
||||
* Otherwise, it advances to the next value.
|
||||
*
|
||||
* @return whether there is another field in the object.
|
||||
* @error TAPE_ERROR If there is a comma missing between fields.
|
||||
*/
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<bool> has_next_field() noexcept;
|
||||
|
||||
/**
|
||||
* Get the current field's key.
|
||||
*/
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<raw_json_string> field_key() noexcept;
|
||||
|
||||
/**
|
||||
* Pass the : in the field and move to its value.
|
||||
*/
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code field_value() noexcept;
|
||||
|
||||
/**
|
||||
* Find the next field with the given key.
|
||||
*
|
||||
* Assumes you have called next_field() or otherwise matched the previous value.
|
||||
*
|
||||
* Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to
|
||||
* unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may
|
||||
* fail to match some keys with escapes (\u, \n, etc.).
|
||||
*/
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<bool> find_field_raw(const char *key) noexcept;
|
||||
|
||||
/**
|
||||
* Check for an opening [ and start an array iteration.
|
||||
*
|
||||
* @param json A pointer to the potential [.
|
||||
* @returns Whether the array had any elements (returns false for empty).
|
||||
* @error INCORRECT_TYPE If there is no [.
|
||||
*/
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<bool> start_array(const uint8_t *json) noexcept;
|
||||
/**
|
||||
* Check for an opening [ and start an array iteration.
|
||||
*
|
||||
* @returns Whether the array had any elements (returns false for empty).
|
||||
* @error INCORRECT_TYPE If there is no [.
|
||||
*/
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<bool> start_array() noexcept;
|
||||
|
||||
/**
|
||||
* Start an array iteration after the user has already checked and moved past the [.
|
||||
*
|
||||
* Does not move the iterator.
|
||||
*
|
||||
* @returns Whether the array had any elements (returns false for empty).
|
||||
*/
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline bool started_array() noexcept;
|
||||
|
||||
/**
|
||||
* Moves to the next element in an array.
|
||||
*
|
||||
* Looks for , and ]. If ] is found, the array is finished and the iterator advances past it.
|
||||
* Otherwise, it advances to the next value.
|
||||
*
|
||||
* @return Whether there is another element in the array.
|
||||
* @error TAPE_ERROR If there is a comma missing between elements.
|
||||
*/
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<bool> has_next_element() noexcept;
|
||||
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<std::string_view> parse_string(const uint8_t *json) noexcept;
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<std::string_view> consume_string() noexcept;
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<raw_json_string> parse_raw_json_string(const uint8_t *json) noexcept;
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<raw_json_string> consume_raw_json_string() noexcept;
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<uint64_t> parse_uint64(const uint8_t *json) noexcept;
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<uint64_t> consume_uint64() noexcept;
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<int64_t> parse_int64(const uint8_t *json) noexcept;
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<int64_t> consume_int64() noexcept;
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<double> parse_double(const uint8_t *json) noexcept;
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<double> consume_double() noexcept;
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<bool> parse_bool(const uint8_t *json) noexcept;
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<bool> consume_bool() noexcept;
|
||||
simdjson_really_inline bool is_null(const uint8_t *json) noexcept;
|
||||
simdjson_really_inline bool is_null() noexcept;
|
||||
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<uint64_t> parse_root_uint64(const uint8_t *json) noexcept;
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<uint64_t> consume_root_uint64() noexcept;
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<int64_t> parse_root_int64(const uint8_t *json) noexcept;
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<int64_t> consume_root_int64() noexcept;
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<double> parse_root_double(const uint8_t *json) noexcept;
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<double> consume_root_double() noexcept;
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<bool> parse_root_bool(const uint8_t *json) noexcept;
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<bool> consume_root_bool() noexcept;
|
||||
simdjson_really_inline bool root_is_null(const uint8_t *json) noexcept;
|
||||
simdjson_really_inline bool root_is_null() noexcept;
|
||||
|
||||
/**
|
||||
* Skips a JSON value, whether it is a scalar, array or object.
|
||||
*/
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code skip() noexcept;
|
||||
|
||||
/**
|
||||
* Skips to the end of a JSON object or array.
|
||||
*
|
||||
* @return true if this was the end of an array, false if it was the end of an object.
|
||||
*/
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code skip_container() noexcept;
|
||||
|
||||
/**
|
||||
* Tell whether the iterator is still at the start
|
||||
*/
|
||||
simdjson_really_inline bool at_start() const noexcept;
|
||||
|
||||
/**
|
||||
* Tell whether the iterator is at the EOF mark
|
||||
*/
|
||||
simdjson_really_inline bool at_eof() const noexcept;
|
||||
|
||||
/**
|
||||
* Tell whether the iterator is live (has not been moved).
|
||||
*/
|
||||
simdjson_really_inline bool is_alive() const noexcept;
|
||||
|
||||
/**
|
||||
* Report an error, preventing further iteration.
|
||||
*
|
||||
* @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD.
|
||||
* @param message An error message to report with the error.
|
||||
*/
|
||||
simdjson_really_inline error_code report_error(error_code error, const char *message) noexcept;
|
||||
|
||||
/**
|
||||
* Get the error (if any).
|
||||
*/
|
||||
simdjson_really_inline error_code error() const noexcept;
|
||||
|
||||
protected:
|
||||
ondemand::parser *parser{};
|
||||
/**
|
||||
* Next free location in the string buffer.
|
||||
*
|
||||
* Used by raw_json_string::unescape() to have a place to unescape strings to.
|
||||
*/
|
||||
uint8_t *current_string_buf_loc{};
|
||||
/**
|
||||
* JSON error, if there is one.
|
||||
*
|
||||
* INCORRECT_TYPE and NO_SUCH_FIELD are *not* stored here, ever.
|
||||
*
|
||||
* PERF NOTE: we *hope* this will be elided into control flow, as it is only used (a) in the first
|
||||
* iteration of the loop, or (b) for the final iteration after a missing comma is found in ++. If
|
||||
* this is not elided, we should make sure it's at least not using up a register. Failing that,
|
||||
* we should store it in document so there's only one of them.
|
||||
*/
|
||||
error_code _error{};
|
||||
#ifdef SIMDJSON_ONDEMAND_SAFETY_RAILS
|
||||
uint32_t active_lease_depth{};
|
||||
#endif
|
||||
|
||||
simdjson_really_inline json_iterator(ondemand::parser *parser) noexcept;
|
||||
template<int N>
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline bool copy_to_buffer(const uint8_t *json, uint8_t (&buf)[N]) noexcept;
|
||||
|
||||
simdjson_really_inline json_iterator_ref borrow() noexcept;
|
||||
|
||||
friend class document;
|
||||
friend class object;
|
||||
friend class array;
|
||||
friend class value;
|
||||
friend class raw_json_string;
|
||||
friend class parser;
|
||||
friend class json_iterator_ref;
|
||||
friend simdjson_really_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta) noexcept;
|
||||
}; // json_iterator
|
||||
|
||||
class json_iterator_ref {
|
||||
public:
|
||||
simdjson_really_inline json_iterator_ref() noexcept = default;
|
||||
simdjson_really_inline json_iterator_ref(json_iterator_ref &&other) noexcept;
|
||||
simdjson_really_inline json_iterator_ref &operator=(json_iterator_ref &&other) noexcept;
|
||||
|
||||
#ifdef SIMDJSON_ONDEMAND_SAFETY_RAILS
|
||||
simdjson_really_inline ~json_iterator_ref() noexcept;
|
||||
#else
|
||||
simdjson_really_inline ~json_iterator_ref() noexcept = default;
|
||||
#endif // SIMDJSON_ONDEMAND_SAFETY_RAILS
|
||||
|
||||
simdjson_really_inline json_iterator_ref(const json_iterator_ref &other) noexcept = delete;
|
||||
simdjson_really_inline json_iterator_ref &operator=(const json_iterator_ref &other) noexcept = delete;
|
||||
|
||||
simdjson_really_inline json_iterator_ref borrow() noexcept;
|
||||
simdjson_really_inline void release() noexcept;
|
||||
|
||||
simdjson_really_inline json_iterator *operator->() noexcept;
|
||||
simdjson_really_inline json_iterator &operator*() noexcept;
|
||||
simdjson_really_inline const json_iterator &operator*() const noexcept;
|
||||
|
||||
simdjson_really_inline bool is_alive() const noexcept;
|
||||
simdjson_really_inline bool is_active() const noexcept;
|
||||
|
||||
simdjson_really_inline void assert_is_active() const noexcept;
|
||||
simdjson_really_inline void assert_is_not_active() const noexcept;
|
||||
|
||||
private:
|
||||
json_iterator *iter{};
|
||||
#ifdef SIMDJSON_ONDEMAND_SAFETY_RAILS
|
||||
uint32_t lease_depth{};
|
||||
simdjson_really_inline json_iterator_ref(json_iterator *iter, uint32_t lease_depth) noexcept;
|
||||
#else
|
||||
simdjson_really_inline json_iterator_ref(json_iterator *iter) noexcept;
|
||||
#endif
|
||||
|
||||
friend class json_iterator;
|
||||
}; // class json_iterator_ref
|
||||
|
||||
} // namespace ondemand
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // namespace simdjson
|
||||
|
||||
namespace simdjson {
|
||||
|
||||
template<>
|
||||
struct simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::json_iterator> : public SIMDJSON_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::json_iterator> {
|
||||
public:
|
||||
simdjson_really_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::json_iterator &&value) noexcept; ///< @private
|
||||
simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private
|
||||
|
||||
simdjson_really_inline simdjson_result() noexcept = default;
|
||||
simdjson_really_inline simdjson_result(simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::json_iterator> &&a) noexcept = default;
|
||||
simdjson_really_inline ~simdjson_result() noexcept = default; ///< @private
|
||||
};
|
||||
|
||||
template<>
|
||||
struct simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::json_iterator_ref> : public SIMDJSON_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::json_iterator_ref> {
|
||||
public:
|
||||
simdjson_really_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::json_iterator_ref &&value) noexcept; ///< @private
|
||||
simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private
|
||||
|
||||
simdjson_really_inline simdjson_result() noexcept = default;
|
||||
simdjson_really_inline simdjson_result(simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::json_iterator_ref> &&a) noexcept = default;
|
||||
simdjson_really_inline ~simdjson_result() noexcept = default; ///< @private
|
||||
};
|
||||
|
||||
} // namespace simdjson
|
|
@ -0,0 +1,83 @@
|
|||
namespace simdjson {
|
||||
namespace SIMDJSON_IMPLEMENTATION {
|
||||
namespace ondemand {
|
||||
namespace logger {
|
||||
|
||||
static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------";
|
||||
static constexpr const int LOG_EVENT_LEN = 20;
|
||||
static constexpr const int LOG_BUFFER_LEN = 30;
|
||||
static constexpr const int LOG_SMALL_BUFFER_LEN = 10;
|
||||
static int log_depth = 0; // Not threadsafe. Log only.
|
||||
|
||||
// Helper to turn unprintable or newline characters into spaces
|
||||
static simdjson_really_inline char printable_char(char c) {
|
||||
if (c >= 0x20) {
|
||||
return c;
|
||||
} else {
|
||||
return ' ';
|
||||
}
|
||||
}
|
||||
|
||||
simdjson_really_inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept {
|
||||
log_line(iter, "", type, detail, delta, depth_delta);
|
||||
}
|
||||
simdjson_really_inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept {
|
||||
log_line(iter, "", type, detail, delta, depth_delta);
|
||||
}
|
||||
simdjson_really_inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept {
|
||||
log_line(iter, "+", type, "", delta, depth_delta);
|
||||
log_depth++;
|
||||
}
|
||||
simdjson_really_inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept {
|
||||
log_depth--;
|
||||
log_line(iter, "-", type, "", delta, depth_delta);
|
||||
}
|
||||
simdjson_really_inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept {
|
||||
log_line(iter, "ERROR: ", error, detail, delta, depth_delta);
|
||||
}
|
||||
|
||||
simdjson_really_inline void log_headers() noexcept {
|
||||
log_depth = 0;
|
||||
if (LOG_ENABLED) {
|
||||
printf("\n");
|
||||
printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#");
|
||||
printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES);
|
||||
fflush(stdout);
|
||||
}
|
||||
}
|
||||
|
||||
simdjson_really_inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta) noexcept {
|
||||
if (LOG_ENABLED) {
|
||||
const int indent = (log_depth+depth_delta)*2;
|
||||
printf("| %*s%s%-*s ",
|
||||
indent, "",
|
||||
title_prefix,
|
||||
LOG_EVENT_LEN - indent - int(strlen(title_prefix)), title
|
||||
);
|
||||
{
|
||||
// Print the current structural.
|
||||
printf("| ");
|
||||
for (int i=0;i<LOG_BUFFER_LEN;i++) {
|
||||
printf("%c", printable_char(iter.peek(delta)[i]));
|
||||
}
|
||||
printf(" ");
|
||||
}
|
||||
{
|
||||
// Print the next structural.
|
||||
printf("| ");
|
||||
for (int i=0;i<LOG_SMALL_BUFFER_LEN;i++) {
|
||||
printf("%c", printable_char(iter.peek(delta+1)[i]));
|
||||
}
|
||||
printf(" ");
|
||||
}
|
||||
printf("| %5u ", iter.peek_index(delta+1));
|
||||
printf("| %.*s ", int(detail.size()), detail.data());
|
||||
printf("|\n");
|
||||
fflush(stdout);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace logger
|
||||
} // namespace ondemand
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // namespace simdjson
|
|
@ -0,0 +1,26 @@
|
|||
namespace simdjson {
|
||||
namespace SIMDJSON_IMPLEMENTATION {
|
||||
namespace ondemand {
|
||||
|
||||
class json_iterator;
|
||||
|
||||
namespace logger {
|
||||
|
||||
#if SIMDJSON_VERBOSE_LOGGING
|
||||
static constexpr const bool LOG_ENABLED = true;
|
||||
#else
|
||||
static constexpr const bool LOG_ENABLED = false;
|
||||
#endif
|
||||
|
||||
static simdjson_really_inline void log_headers() noexcept;
|
||||
static simdjson_really_inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta) noexcept;
|
||||
static simdjson_really_inline void log_event(const json_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept;
|
||||
static simdjson_really_inline void log_value(const json_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept;
|
||||
static simdjson_really_inline void log_start_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept;
|
||||
static simdjson_really_inline void log_end_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept;
|
||||
static simdjson_really_inline void log_error(const json_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept;
|
||||
|
||||
} // namespace logger
|
||||
} // namespace ondemand
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // namespace simdjson
|
|
@ -0,0 +1,155 @@
|
|||
namespace simdjson {
|
||||
namespace SIMDJSON_IMPLEMENTATION {
|
||||
namespace ondemand {
|
||||
|
||||
//
|
||||
// ### Live States
|
||||
//
|
||||
// While iterating or looking up values, depth >= iter->depth. at_start may vary. Error is
|
||||
// always SUCCESS:
|
||||
//
|
||||
// - Start: This is the state when the object is first found and the iterator is just past the {.
|
||||
// In this state, at_start == true.
|
||||
// - Next: After we hand a scalar value to the user, or an array/object which they then fully
|
||||
// iterate over, the iterator is at the , or } before the next value. In this state,
|
||||
// depth == iter->depth, at_start == false, and error == SUCCESS.
|
||||
// - Unfinished Business: When we hand an array/object to the user which they do not fully
|
||||
// iterate over, we need to finish that iteration by skipping child values until we reach the
|
||||
// Next state. In this state, depth > iter->depth, at_start == false, and error == SUCCESS.
|
||||
//
|
||||
// ## Error States
|
||||
//
|
||||
// In error states, we will yield exactly one more value before stopping. iter->depth == depth
|
||||
// and at_start is always false. We decrement after yielding the error, moving to the Finished
|
||||
// state.
|
||||
//
|
||||
// - Chained Error: When the object iterator is part of an error chain--for example, in
|
||||
// `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an
|
||||
// object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and
|
||||
// iter->depth == depth, and at_start == false. We decrement depth when we yield the error.
|
||||
// - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields,
|
||||
// we flag that as an error and treat it exactly the same as a Chained Error. In this state,
|
||||
// error == TAPE_ERROR, iter->depth == depth, and at_start == false.
|
||||
//
|
||||
// Errors that occur while reading a field to give to the user (such as when the key is not a
|
||||
// string or the field is missing a colon) are yielded immediately. Depth is then decremented,
|
||||
// moving to the Finished state without transitioning through an Error state at all.
|
||||
//
|
||||
// ## Terminal State
|
||||
//
|
||||
// The terminal state has iter->depth < depth. at_start is always false.
|
||||
//
|
||||
// - Finished: When we have reached a }, we are finished. We signal this by decrementing depth.
|
||||
// In this state, iter->depth < depth, at_start == false, and error == SUCCESS.
|
||||
//
|
||||
|
||||
simdjson_really_inline object::object(json_iterator_ref &&_iter) noexcept
|
||||
: iter{std::forward<json_iterator_ref>(_iter)},
|
||||
at_start{iter.is_alive()}
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
simdjson_really_inline object::~object() noexcept {
|
||||
if (iter.is_alive()) {
|
||||
logger::log_event(*iter, "unfinished", "object");
|
||||
SIMDJSON_UNUSED auto _err = iter->skip_container();
|
||||
iter.release();
|
||||
}
|
||||
}
|
||||
|
||||
simdjson_really_inline error_code object::find_field(const std::string_view key) noexcept {
|
||||
if (!iter.is_alive()) { return NO_SUCH_FIELD; }
|
||||
|
||||
// Unless this is the first field, we need to advance past the , and check for }
|
||||
error_code error;
|
||||
bool has_value;
|
||||
if (at_start) {
|
||||
at_start = false;
|
||||
has_value = true;
|
||||
} else {
|
||||
if ((error = iter->has_next_field().get(has_value) )) { iter.release(); return error; }
|
||||
}
|
||||
while (has_value) {
|
||||
// Get the key
|
||||
raw_json_string actual_key;
|
||||
if ((error = iter->field_key().get(actual_key) )) { iter.release(); return error; };
|
||||
if ((error = iter->field_value() )) { iter.release(); return error; }
|
||||
|
||||
// Check if it matches
|
||||
if (actual_key == key) {
|
||||
logger::log_event(*iter, "match", key, -2);
|
||||
return SUCCESS;
|
||||
}
|
||||
logger::log_event(*iter, "no match", key, -2);
|
||||
SIMDJSON_TRY( iter->skip() ); // Skip the value entirely
|
||||
if ((error = iter->has_next_field().get(has_value) )) { iter.release(); return error; }
|
||||
}
|
||||
|
||||
// If the loop ended, we're out of fields to look at.
|
||||
iter.release();
|
||||
return NO_SUCH_FIELD;
|
||||
}
|
||||
|
||||
simdjson_really_inline simdjson_result<value> object::operator[](const std::string_view key) & noexcept {
|
||||
SIMDJSON_TRY( find_field(key) );
|
||||
return value::start(iter.borrow());
|
||||
}
|
||||
|
||||
simdjson_really_inline simdjson_result<value> object::operator[](const std::string_view key) && noexcept {
|
||||
SIMDJSON_TRY( find_field(key) );
|
||||
return value::start(std::forward<json_iterator_ref>(iter));
|
||||
}
|
||||
|
||||
simdjson_really_inline simdjson_result<object> object::start(json_iterator_ref &&iter) noexcept {
|
||||
bool has_value;
|
||||
SIMDJSON_TRY( iter->start_object().get(has_value) );
|
||||
if (!has_value) { iter.release(); }
|
||||
return object(std::forward<json_iterator_ref>(iter));
|
||||
}
|
||||
simdjson_really_inline object object::started(json_iterator_ref &&iter) noexcept {
|
||||
if (!iter->started_object()) { iter.release(); }
|
||||
return object(std::forward<json_iterator_ref>(iter));
|
||||
}
|
||||
simdjson_really_inline object_iterator object::begin() noexcept {
|
||||
if (at_start) {
|
||||
iter.assert_is_active();
|
||||
} else {
|
||||
iter.assert_is_not_active();
|
||||
}
|
||||
at_start = false;
|
||||
return iter;
|
||||
}
|
||||
simdjson_really_inline object_iterator object::end() noexcept {
|
||||
return {};
|
||||
}
|
||||
|
||||
} // namespace ondemand
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // namespace simdjson
|
||||
|
||||
namespace simdjson {
|
||||
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::object>::simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::object &&value) noexcept
|
||||
: implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::object>(std::forward<SIMDJSON_IMPLEMENTATION::ondemand::object>(value)) {}
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::object>::simdjson_result(error_code error) noexcept
|
||||
: implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::object>(error) {}
|
||||
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::object_iterator> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::object>::begin() noexcept {
|
||||
if (error()) { return error(); }
|
||||
return first.begin();
|
||||
}
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::object_iterator> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::object>::end() noexcept {
|
||||
if (error()) { return error(); }
|
||||
return first.end();
|
||||
}
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::object>::operator[](std::string_view key) & noexcept {
|
||||
if (error()) { return error(); }
|
||||
return first[key];
|
||||
}
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::object>::operator[](std::string_view key) && noexcept {
|
||||
if (error()) { return error(); }
|
||||
return std::forward<SIMDJSON_IMPLEMENTATION::ondemand::object>(first)[key];
|
||||
}
|
||||
|
||||
} // namespace simdjson
|
|
@ -0,0 +1,93 @@
|
|||
#include "simdjson/error.h"
|
||||
|
||||
namespace simdjson {
|
||||
namespace SIMDJSON_IMPLEMENTATION {
|
||||
namespace ondemand {
|
||||
|
||||
/**
|
||||
* A forward-only JSON object field iterator.
|
||||
*/
|
||||
class object {
|
||||
public:
|
||||
/**
|
||||
* Create a new invalid object.
|
||||
*
|
||||
* Exists so you can declare a variable and later assign to it before use.
|
||||
*/
|
||||
simdjson_really_inline object() noexcept = default;
|
||||
|
||||
simdjson_really_inline object(object &&other) noexcept = default;
|
||||
simdjson_really_inline object &operator=(object &&other) noexcept = default;
|
||||
object(const object &) = delete;
|
||||
object &operator=(const object &) = delete;
|
||||
|
||||
simdjson_really_inline ~object() noexcept;
|
||||
|
||||
simdjson_really_inline object_iterator begin() noexcept;
|
||||
simdjson_really_inline object_iterator end() noexcept;
|
||||
simdjson_really_inline simdjson_result<value> operator[](const std::string_view key) & noexcept;
|
||||
simdjson_really_inline simdjson_result<value> operator[](const std::string_view key) && noexcept;
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Begin object iteration.
|
||||
*
|
||||
* @param doc The document containing the object. The iterator must be just after the opening `{`.
|
||||
* @param error If this is not SUCCESS, creates an error chained object.
|
||||
*/
|
||||
static simdjson_really_inline simdjson_result<object> start(json_iterator_ref &&iter) noexcept;
|
||||
static simdjson_really_inline object started(json_iterator_ref &&iter) noexcept;
|
||||
|
||||
/**
|
||||
* Internal object creation. Call object::begin(doc) instead of this.
|
||||
*
|
||||
* @param doc The document containing the object. doc->depth must already be incremented to
|
||||
* reflect the object's depth. The iterator must be just after the opening `{`.
|
||||
*/
|
||||
simdjson_really_inline object(json_iterator_ref &&_iter) noexcept;
|
||||
|
||||
simdjson_really_inline error_code find_field(const std::string_view key) noexcept;
|
||||
|
||||
/**
|
||||
* Document containing the primary iterator.
|
||||
*
|
||||
* PERF NOTE: expected to be elided in favor of the parent document: this is set when the object
|
||||
* is first used, and never changes afterwards.
|
||||
*/
|
||||
json_iterator_ref iter{};
|
||||
/**
|
||||
* Whether we are at the start.
|
||||
*
|
||||
* PERF NOTE: this should be elided into inline control flow: it is only used for the first []
|
||||
* or * call, and SSA optimizers commonly do first-iteration loop optimization.
|
||||
*/
|
||||
bool at_start{};
|
||||
|
||||
friend class value;
|
||||
friend class document;
|
||||
friend struct simdjson_result<object>;
|
||||
};
|
||||
|
||||
} // namespace ondemand
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // namespace simdjson
|
||||
|
||||
namespace simdjson {
|
||||
|
||||
template<>
|
||||
struct simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::object> : public SIMDJSON_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::object> {
|
||||
public:
|
||||
simdjson_really_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::object &&value) noexcept; ///< @private
|
||||
simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private
|
||||
|
||||
simdjson_really_inline simdjson_result() noexcept = default;
|
||||
simdjson_really_inline simdjson_result(simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::object> &&a) noexcept = default;
|
||||
simdjson_really_inline ~simdjson_result() noexcept = default; ///< @private
|
||||
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::object_iterator> begin() noexcept;
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::object_iterator> end() noexcept;
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value> operator[](std::string_view key) & noexcept;
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value> operator[](std::string_view key) && noexcept;
|
||||
};
|
||||
|
||||
} // namespace simdjson
|
|
@ -0,0 +1,74 @@
|
|||
namespace simdjson {
|
||||
namespace SIMDJSON_IMPLEMENTATION {
|
||||
namespace ondemand {
|
||||
|
||||
//
|
||||
// object_iterator
|
||||
//
|
||||
|
||||
simdjson_really_inline object_iterator::object_iterator(json_iterator_ref &_iter) noexcept : iter{&_iter} {}
|
||||
|
||||
simdjson_really_inline simdjson_result<field> object_iterator::operator*() noexcept {
|
||||
error_code error = (*iter)->error();
|
||||
if (error) { iter->release(); return error; }
|
||||
auto result = field::start(*iter);
|
||||
// TODO this is a safety rail ... users should exit loops as soon as they receive an error.
|
||||
// Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free.
|
||||
if (result.error()) { iter->release(); }
|
||||
return result;
|
||||
}
|
||||
simdjson_really_inline bool object_iterator::operator==(const object_iterator &other) noexcept {
|
||||
return !(*this != other);
|
||||
}
|
||||
simdjson_really_inline bool object_iterator::operator!=(const object_iterator &) noexcept {
|
||||
return iter->is_alive();
|
||||
}
|
||||
simdjson_really_inline object_iterator &object_iterator::operator++() noexcept {
|
||||
// TODO this is a safety rail ... users should exit loops as soon as they receive an error.
|
||||
// Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free.
|
||||
if (!iter->is_alive()) { return *this; } // Iterator will be released if there is an error
|
||||
bool has_value;
|
||||
error_code error = (*iter)->has_next_field().get(has_value);
|
||||
if (!(error || has_value)) { iter->release(); }
|
||||
return *this;
|
||||
}
|
||||
|
||||
} // namespace ondemand
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // namespace simdjson
|
||||
|
||||
namespace simdjson {
|
||||
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::object_iterator>::simdjson_result(
|
||||
SIMDJSON_IMPLEMENTATION::ondemand::object_iterator &&value
|
||||
) noexcept
|
||||
: implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::object_iterator>(std::forward<SIMDJSON_IMPLEMENTATION::ondemand::object_iterator>(value))
|
||||
{
|
||||
}
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::object_iterator>::simdjson_result(error_code error) noexcept
|
||||
: implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::object_iterator>({}, error)
|
||||
{
|
||||
}
|
||||
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::field> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::object_iterator>::operator*() noexcept {
|
||||
if (error()) { second = SUCCESS; return error(); }
|
||||
return *first;
|
||||
}
|
||||
// Assumes it's being compared with the end. true if depth < iter->depth.
|
||||
simdjson_really_inline bool simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::object_iterator>::operator==(const simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::object_iterator> &other) noexcept {
|
||||
if (error()) { return true; }
|
||||
return first == other.first;
|
||||
}
|
||||
// Assumes it's being compared with the end. true if depth >= iter->depth.
|
||||
simdjson_really_inline bool simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::object_iterator>::operator!=(const simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::object_iterator> &other) noexcept {
|
||||
if (error()) { return false; }
|
||||
return first != other.first;
|
||||
}
|
||||
// Checks for ']' and ','
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::object_iterator> &simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::object_iterator>::operator++() noexcept {
|
||||
if (error()) { return *this; }
|
||||
++first;
|
||||
return *this;
|
||||
}
|
||||
|
||||
} // namespace simdjson
|
|
@ -0,0 +1,71 @@
|
|||
#include "simdjson/error.h"
|
||||
|
||||
namespace simdjson {
|
||||
namespace SIMDJSON_IMPLEMENTATION {
|
||||
namespace ondemand {
|
||||
|
||||
class field;
|
||||
|
||||
class object_iterator {
|
||||
public:
|
||||
/**
|
||||
* Create a new invalid object_iterator.
|
||||
*
|
||||
* Exists so you can declare a variable and later assign to it before use.
|
||||
*/
|
||||
simdjson_really_inline object_iterator() noexcept = default;
|
||||
|
||||
simdjson_really_inline object_iterator(const object_iterator &o) noexcept = default;
|
||||
simdjson_really_inline object_iterator &operator=(const object_iterator &o) noexcept = default;
|
||||
|
||||
//
|
||||
// Iterator interface
|
||||
//
|
||||
|
||||
// Reads key and value, yielding them to the user.
|
||||
// MUST ONLY BE CALLED ONCE PER ITERATION.
|
||||
simdjson_really_inline simdjson_result<field> operator*() noexcept;
|
||||
// Assumes it's being compared with the end. true if depth < iter->depth.
|
||||
simdjson_really_inline bool operator==(const object_iterator &) noexcept;
|
||||
// Assumes it's being compared with the end. true if depth >= iter->depth.
|
||||
simdjson_really_inline bool operator!=(const object_iterator &) noexcept;
|
||||
// Checks for ']' and ','
|
||||
simdjson_really_inline object_iterator &operator++() noexcept;
|
||||
private:
|
||||
json_iterator_ref *iter{};
|
||||
simdjson_really_inline object_iterator(json_iterator_ref &iter) noexcept;
|
||||
friend struct simdjson_result<object_iterator>;
|
||||
friend class object;
|
||||
};
|
||||
|
||||
} // namespace ondemand
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // namespace simdjson
|
||||
|
||||
namespace simdjson {
|
||||
|
||||
template<>
|
||||
struct simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::object_iterator> : public SIMDJSON_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::object_iterator> {
|
||||
public:
|
||||
simdjson_really_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::object_iterator &&value) noexcept; ///< @private
|
||||
simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private
|
||||
|
||||
simdjson_really_inline simdjson_result() noexcept = default;
|
||||
simdjson_really_inline simdjson_result(simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::object_iterator> &&a) noexcept = default;
|
||||
simdjson_really_inline ~simdjson_result() noexcept = default; ///< @private
|
||||
|
||||
//
|
||||
// Iterator interface
|
||||
//
|
||||
|
||||
// Reads key and value, yielding them to the user.
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::field> operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION.
|
||||
// Assumes it's being compared with the end. true if depth < iter->depth.
|
||||
simdjson_really_inline bool operator==(const simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::object_iterator> &) noexcept;
|
||||
// Assumes it's being compared with the end. true if depth >= iter->depth.
|
||||
simdjson_really_inline bool operator!=(const simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::object_iterator> &) noexcept;
|
||||
// Checks for ']' and ','
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::object_iterator> &operator++() noexcept;
|
||||
};
|
||||
|
||||
} // namespace simdjson
|
|
@ -0,0 +1,54 @@
|
|||
namespace simdjson {
|
||||
namespace SIMDJSON_IMPLEMENTATION {
|
||||
namespace ondemand {
|
||||
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept {
|
||||
if (string_buf && new_capacity == _capacity && new_max_depth == _max_depth) { return SUCCESS; }
|
||||
|
||||
// string_capacity copied from document::allocate
|
||||
_capacity = 0;
|
||||
_max_depth = 0;
|
||||
// The most string buffer we could possibly need is capacity-2 (a string the whole document long).
|
||||
// Allocate up to capacity so we don't have to check for capacity == 0 or 1.
|
||||
string_buf.reset(new (std::nothrow) uint8_t[new_capacity]);
|
||||
SIMDJSON_TRY( dom_parser.set_capacity(new_capacity) );
|
||||
SIMDJSON_TRY( dom_parser.set_max_depth(DEFAULT_MAX_DEPTH) );
|
||||
_capacity = new_capacity;
|
||||
_max_depth = new_max_depth;
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<document> parser::iterate(const padded_string &buf) & noexcept {
|
||||
// Allocate if needed
|
||||
if (_capacity < buf.size() || !string_buf) {
|
||||
SIMDJSON_TRY( allocate(buf.size(), _max_depth) );
|
||||
}
|
||||
|
||||
// Run stage 1.
|
||||
SIMDJSON_TRY( dom_parser.stage1((const uint8_t *)buf.data(), buf.size(), false) );
|
||||
return document::start(this);
|
||||
}
|
||||
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline simdjson_result<json_iterator> parser::iterate_raw(const padded_string &buf) & noexcept {
|
||||
// Allocate if needed
|
||||
if (_capacity < buf.size()) {
|
||||
SIMDJSON_TRY( allocate(buf.size(), _max_depth) );
|
||||
}
|
||||
|
||||
// Run stage 1.
|
||||
SIMDJSON_TRY( dom_parser.stage1((const uint8_t *)buf.data(), buf.size(), false) );
|
||||
return json_iterator(this);
|
||||
}
|
||||
|
||||
} // namespace ondemand
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // namespace simdjson
|
||||
|
||||
namespace simdjson {
|
||||
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::parser>::simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::parser &&value) noexcept
|
||||
: implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::parser>(std::forward<SIMDJSON_IMPLEMENTATION::ondemand::parser>(value)) {}
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::parser>::simdjson_result(error_code error) noexcept
|
||||
: implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::parser>(error) {}
|
||||
|
||||
} // namespace simdjson
|
|
@ -0,0 +1,139 @@
|
|||
#include "simdjson/error.h"
|
||||
|
||||
namespace simdjson {
|
||||
namespace SIMDJSON_IMPLEMENTATION {
|
||||
namespace ondemand {
|
||||
|
||||
class array;
|
||||
class object;
|
||||
class value;
|
||||
class raw_json_string;
|
||||
|
||||
/**
|
||||
* A JSON fragment iterator.
|
||||
*
|
||||
* This holds the actual iterator as well as the buffer for writing strings.
|
||||
*/
|
||||
class parser {
|
||||
public:
|
||||
/**
|
||||
* Create a JSON parser.
|
||||
*
|
||||
* The new parser will have zero capacity.
|
||||
*/
|
||||
inline parser() noexcept = default;
|
||||
|
||||
inline parser(parser &&other) noexcept = default;
|
||||
simdjson_really_inline parser(const parser &other) = delete;
|
||||
simdjson_really_inline parser &operator=(const parser &other) = delete;
|
||||
|
||||
/** Deallocate the JSON parser. */
|
||||
inline ~parser() noexcept = default;
|
||||
|
||||
/**
|
||||
* Start iterating an on-demand JSON document.
|
||||
*
|
||||
* ondemand::parser parser;
|
||||
* document doc = parser.iterate(json);
|
||||
*
|
||||
* ### IMPORTANT: Buffer Lifetime
|
||||
*
|
||||
* Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as
|
||||
* long as the document iteration.
|
||||
*
|
||||
* ### IMPORTANT: Document Lifetime
|
||||
*
|
||||
* Only one iteration at a time can happen per parser, and the parser *must* be kept alive during
|
||||
* iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before
|
||||
* you call parse() again or destroy the parser.
|
||||
*
|
||||
* ### REQUIRED: Buffer Padding
|
||||
*
|
||||
* The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what
|
||||
* those bytes are initialized to, as long as they are allocated.
|
||||
*
|
||||
* @param json The JSON to parse.
|
||||
*
|
||||
* @return The document, or an error:
|
||||
* - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory
|
||||
* allocation fails.
|
||||
* - EMPTY if the document is all whitespace.
|
||||
* - UTF8_ERROR if the document is not valid UTF-8.
|
||||
* - UNESCAPED_CHARS if a string contains control characters that must be escaped
|
||||
* - UNCLOSED_STRING if there is an unclosed string in the document.
|
||||
*/
|
||||
SIMDJSON_WARN_UNUSED simdjson_result<document> iterate(const padded_string &json) & noexcept;
|
||||
SIMDJSON_WARN_UNUSED simdjson_result<document> iterate(const std::string &json) & noexcept = delete;
|
||||
/**
|
||||
* @private
|
||||
*
|
||||
* Start iterating an on-demand JSON document.
|
||||
*
|
||||
* ondemand::parser parser;
|
||||
* json_iterator doc = parser.iterate(json);
|
||||
*
|
||||
* ### IMPORTANT: Buffer Lifetime
|
||||
*
|
||||
* Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as
|
||||
* long as the document iteration.
|
||||
*
|
||||
* ### IMPORTANT: Document Lifetime
|
||||
*
|
||||
* Only one iteration at a time can happen per parser, and the parser *must* be kept alive during
|
||||
* iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before
|
||||
* you call parse() again or destroy the parser.
|
||||
*
|
||||
* ### REQUIRED: Buffer Padding
|
||||
*
|
||||
* The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what
|
||||
* those bytes are initialized to, as long as they are allocated.
|
||||
*
|
||||
* @param json The JSON to parse.
|
||||
*
|
||||
* @return The iterator, or an error:
|
||||
* - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory
|
||||
* allocation fails.
|
||||
* - EMPTY if the document is all whitespace.
|
||||
* - UTF8_ERROR if the document is not valid UTF-8.
|
||||
* - UNESCAPED_CHARS if a string contains control characters that must be escaped
|
||||
* - UNCLOSED_STRING if there is an unclosed string in the document.
|
||||
*/
|
||||
SIMDJSON_WARN_UNUSED simdjson_result<json_iterator> iterate_raw(const padded_string &json) & noexcept;
|
||||
|
||||
private:
|
||||
dom_parser_implementation dom_parser{};
|
||||
size_t _capacity{0};
|
||||
size_t _max_depth{0};
|
||||
std::unique_ptr<uint8_t[]> string_buf{};
|
||||
|
||||
/**
|
||||
* Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length
|
||||
* and `max_depth` depth.
|
||||
*
|
||||
* @param capacity The new capacity.
|
||||
* @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH.
|
||||
* @return The error, if there is one.
|
||||
*/
|
||||
SIMDJSON_WARN_UNUSED error_code allocate(size_t capacity, size_t max_depth=DEFAULT_MAX_DEPTH) noexcept;
|
||||
|
||||
friend class json_iterator;
|
||||
};
|
||||
|
||||
} // namespace ondemand
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // namespace simdjson
|
||||
|
||||
namespace simdjson {
|
||||
|
||||
template<>
|
||||
struct simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::parser> : public SIMDJSON_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::parser> {
|
||||
public:
|
||||
simdjson_really_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::parser &&value) noexcept; ///< @private
|
||||
simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private
|
||||
|
||||
simdjson_really_inline simdjson_result() noexcept = default;
|
||||
simdjson_really_inline simdjson_result(simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::parser> &&a) noexcept = default;
|
||||
simdjson_really_inline ~simdjson_result() noexcept = default; ///< @private
|
||||
};
|
||||
|
||||
} // namespace simdjson
|
|
@ -0,0 +1,74 @@
|
|||
namespace simdjson {
|
||||
namespace SIMDJSON_IMPLEMENTATION {
|
||||
namespace ondemand {
|
||||
|
||||
simdjson_really_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {}
|
||||
|
||||
simdjson_really_inline const char * raw_json_string::raw() const noexcept { return (const char *)buf; }
|
||||
simdjson_really_inline SIMDJSON_WARN_UNUSED simdjson_result<std::string_view> raw_json_string::unescape(uint8_t *&dst) const noexcept {
|
||||
uint8_t *end = stringparsing::parse_string(buf, dst);
|
||||
if (!end) { return STRING_ERROR; }
|
||||
std::string_view result((const char *)dst, end-dst);
|
||||
dst = end;
|
||||
return result;
|
||||
}
|
||||
|
||||
simdjson_really_inline SIMDJSON_WARN_UNUSED simdjson_result<std::string_view> raw_json_string::unescape(json_iterator &iter) const noexcept {
|
||||
return unescape(iter.current_string_buf_loc);
|
||||
}
|
||||
|
||||
SIMDJSON_UNUSED simdjson_really_inline bool operator==(const raw_json_string &a, std::string_view b) noexcept {
|
||||
return !memcmp(a.raw(), b.data(), b.size());
|
||||
}
|
||||
|
||||
SIMDJSON_UNUSED simdjson_really_inline bool operator==(std::string_view a, const raw_json_string &b) noexcept {
|
||||
return b == a;
|
||||
}
|
||||
|
||||
SIMDJSON_UNUSED simdjson_really_inline bool operator!=(const raw_json_string &a, std::string_view b) noexcept {
|
||||
return !(a == b);
|
||||
}
|
||||
|
||||
SIMDJSON_UNUSED simdjson_really_inline bool operator!=(std::string_view a, const raw_json_string &b) noexcept {
|
||||
return !(a == b);
|
||||
}
|
||||
|
||||
SIMDJSON_UNUSED simdjson_really_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept {
|
||||
bool in_escape = false;
|
||||
const char *s = str.raw();
|
||||
while (true) {
|
||||
switch (*s) {
|
||||
case '\\': in_escape = !in_escape; break;
|
||||
case '"': if (in_escape) { in_escape = false; } else { return out; } break;
|
||||
default: if (in_escape) { in_escape = false; }
|
||||
}
|
||||
out << *s;
|
||||
s++;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace ondemand
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // namespace simdjson
|
||||
|
||||
namespace simdjson {
|
||||
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string>::simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string &&value) noexcept
|
||||
: implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string>(std::forward<SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string>(value)) {}
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string>::simdjson_result(error_code error) noexcept
|
||||
: implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string>(error) {}
|
||||
|
||||
simdjson_really_inline simdjson_result<const char *> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string>::raw() const noexcept {
|
||||
if (error()) { return error(); }
|
||||
return first.raw();
|
||||
}
|
||||
simdjson_really_inline SIMDJSON_WARN_UNUSED simdjson_result<std::string_view> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string>::unescape(uint8_t *&dst) const noexcept {
|
||||
if (error()) { return error(); }
|
||||
return first.unescape(dst);
|
||||
}
|
||||
simdjson_really_inline SIMDJSON_WARN_UNUSED simdjson_result<std::string_view> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string>::unescape(SIMDJSON_IMPLEMENTATION::ondemand::json_iterator &iter) const noexcept {
|
||||
if (error()) { return error(); }
|
||||
return first.unescape(iter);
|
||||
}
|
||||
|
||||
} // namespace simdjson
|
|
@ -0,0 +1,98 @@
|
|||
#include "simdjson/error.h"
|
||||
|
||||
namespace simdjson {
|
||||
namespace SIMDJSON_IMPLEMENTATION {
|
||||
namespace ondemand {
|
||||
|
||||
class object;
|
||||
class parser;
|
||||
|
||||
/**
|
||||
* A string escaped per JSON rules, terminated with quote (")
|
||||
*
|
||||
* (In other words, a pointer to the beginning of a string, just after the start quote, inside a
|
||||
* JSON file.)
|
||||
*/
|
||||
class raw_json_string {
|
||||
public:
|
||||
/**
|
||||
* Create a new invalid raw_json_string.
|
||||
*
|
||||
* Exists so you can declare a variable and later assign to it before use.
|
||||
*/
|
||||
simdjson_really_inline raw_json_string() noexcept = default;
|
||||
|
||||
simdjson_really_inline raw_json_string(const raw_json_string &other) noexcept = default;
|
||||
simdjson_really_inline raw_json_string &operator=(const raw_json_string &other) noexcept = default;
|
||||
|
||||
/**
|
||||
* Create a new invalid raw_json_string pointed at the given location in the JSON.
|
||||
*
|
||||
* The given location must be just *after* the beginning quote (") in the JSON file.
|
||||
*
|
||||
* It *must* be terminated by a ", and be a valid JSON string.
|
||||
*/
|
||||
simdjson_really_inline raw_json_string(const uint8_t * _buf) noexcept;
|
||||
/**
|
||||
* Get the raw pointer to the beginning of the string in the JSON (just after the ").
|
||||
*/
|
||||
simdjson_really_inline const char * raw() const noexcept;
|
||||
/**
|
||||
* Unescape this JSON string, replacing \\ with \, \n with newline, etc.
|
||||
*
|
||||
* ## IMPORTANT: string_view lifetime
|
||||
*
|
||||
* The string_view is only valid as long as the bytes in dst.
|
||||
*
|
||||
* @param dst A pointer to a buffer at least large enough to write this string as well as a \0.
|
||||
* dst will be updated to the next unused location (just after the \0 written out at
|
||||
* the end of this string).
|
||||
* @return A string_view pointing at the unescaped string in dst
|
||||
* @error STRING_ERROR if escapes are incorrect.
|
||||
*/
|
||||
simdjson_really_inline SIMDJSON_WARN_UNUSED simdjson_result<std::string_view> unescape(uint8_t *&dst) const noexcept;
|
||||
/**
|
||||
* Unescape this JSON string, replacing \\ with \, \n with newline, etc.
|
||||
*
|
||||
* ## IMPORTANT: string_view lifetime
|
||||
*
|
||||
* The string_view is only valid until the next parse() call on the parser.
|
||||
*
|
||||
* @param iter A json_iterator, which contains a buffer where the string will be written.
|
||||
*/
|
||||
simdjson_really_inline SIMDJSON_WARN_UNUSED simdjson_result<std::string_view> unescape(json_iterator &iter) const noexcept;
|
||||
|
||||
private:
|
||||
const uint8_t * buf{};
|
||||
friend class object;
|
||||
};
|
||||
|
||||
SIMDJSON_UNUSED simdjson_really_inline bool operator==(const raw_json_string &a, std::string_view b) noexcept;
|
||||
SIMDJSON_UNUSED simdjson_really_inline bool operator==(std::string_view a, const raw_json_string &b) noexcept;
|
||||
SIMDJSON_UNUSED simdjson_really_inline bool operator!=(const raw_json_string &a, std::string_view b) noexcept;
|
||||
SIMDJSON_UNUSED simdjson_really_inline bool operator!=(std::string_view a, const raw_json_string &b) noexcept;
|
||||
|
||||
SIMDJSON_UNUSED simdjson_really_inline std::ostream &operator<<(std::ostream &, const raw_json_string &) noexcept;
|
||||
|
||||
} // namespace ondemand
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // namespace simdjson
|
||||
|
||||
namespace simdjson {
|
||||
|
||||
template<>
|
||||
struct simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string> : public SIMDJSON_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string> {
|
||||
public:
|
||||
simdjson_really_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string &&value) noexcept; ///< @private
|
||||
simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private
|
||||
|
||||
simdjson_really_inline simdjson_result() noexcept = default;
|
||||
simdjson_really_inline simdjson_result(const simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string> &a) noexcept = default;
|
||||
simdjson_really_inline ~simdjson_result() noexcept = default; ///< @private
|
||||
|
||||
simdjson_really_inline simdjson_result<const char *> raw() const noexcept;
|
||||
simdjson_really_inline SIMDJSON_WARN_UNUSED simdjson_result<std::string_view> unescape(uint8_t *&dst) const noexcept;
|
||||
simdjson_really_inline SIMDJSON_WARN_UNUSED simdjson_result<std::string_view> unescape(SIMDJSON_IMPLEMENTATION::ondemand::json_iterator &iter) const noexcept;
|
||||
};
|
||||
|
||||
} // namespace simdjson
|
|
@ -0,0 +1,53 @@
|
|||
namespace simdjson {
|
||||
namespace SIMDJSON_IMPLEMENTATION {
|
||||
namespace ondemand {
|
||||
|
||||
simdjson_really_inline token_iterator::token_iterator(const uint8_t *_buf, uint32_t *_index) noexcept
|
||||
: buf{_buf}, index{_index}
|
||||
{
|
||||
}
|
||||
|
||||
simdjson_really_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept {
|
||||
return &buf[*(index+delta)];
|
||||
}
|
||||
simdjson_really_inline const uint8_t *token_iterator::advance() noexcept {
|
||||
return &buf[*(index++)];
|
||||
}
|
||||
simdjson_really_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept {
|
||||
return *(index+delta);
|
||||
}
|
||||
simdjson_really_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept {
|
||||
return *(index+delta+1) - *(index+delta);
|
||||
}
|
||||
|
||||
simdjson_really_inline bool token_iterator::operator==(const token_iterator &other) const noexcept {
|
||||
return index == other.index;
|
||||
}
|
||||
simdjson_really_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept {
|
||||
return index != other.index;
|
||||
}
|
||||
simdjson_really_inline bool token_iterator::operator>(const token_iterator &other) const noexcept {
|
||||
return index > other.index;
|
||||
}
|
||||
simdjson_really_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept {
|
||||
return index >= other.index;
|
||||
}
|
||||
simdjson_really_inline bool token_iterator::operator<(const token_iterator &other) const noexcept {
|
||||
return index < other.index;
|
||||
}
|
||||
simdjson_really_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept {
|
||||
return index <= other.index;
|
||||
}
|
||||
|
||||
} // namespace ondemand
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // namespace simdjson
|
||||
|
||||
namespace simdjson {
|
||||
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::token_iterator>::simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::token_iterator &&value) noexcept
|
||||
: implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::token_iterator>(std::forward<SIMDJSON_IMPLEMENTATION::ondemand::token_iterator>(value)) {}
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::token_iterator>::simdjson_result(error_code error) noexcept
|
||||
: implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::token_iterator>(error) {}
|
||||
|
||||
} // namespace simdjson
|
|
@ -0,0 +1,98 @@
|
|||
namespace simdjson {
|
||||
namespace SIMDJSON_IMPLEMENTATION {
|
||||
namespace ondemand {
|
||||
|
||||
/**
|
||||
* Iterates through JSON tokens (`{` `}` `[` `]` `,` `:` `"<string>"` `123` `true` `false` `null`)
|
||||
* detected by stage 1.
|
||||
*
|
||||
* @private This is not intended for external use.
|
||||
*/
|
||||
class token_iterator {
|
||||
public:
|
||||
/**
|
||||
* Create a new invalid token_iterator.
|
||||
*
|
||||
* Exists so you can declare a variable and later assign to it before use.
|
||||
*/
|
||||
simdjson_really_inline token_iterator() noexcept = default;
|
||||
|
||||
simdjson_really_inline token_iterator(token_iterator &&other) noexcept = default;
|
||||
simdjson_really_inline token_iterator &operator=(token_iterator &&other) noexcept = default;
|
||||
simdjson_really_inline token_iterator(const token_iterator &other) noexcept = delete;
|
||||
simdjson_really_inline token_iterator &operator=(const token_iterator &other) noexcept = delete;
|
||||
|
||||
/**
|
||||
* Get the JSON text for a given token (relative).
|
||||
*
|
||||
* This is not null-terminated; it is a view into the JSON.
|
||||
*
|
||||
* @param delta The relative position of the token to retrieve. e.g. 0 = current token,
|
||||
* 1 = next token, -1 = prev token.
|
||||
*
|
||||
* TODO consider a string_view, assuming the length will get stripped out by the optimizer when
|
||||
* it isn't used ...
|
||||
*/
|
||||
simdjson_really_inline const uint8_t *peek(int32_t delta=0) const noexcept;
|
||||
/**
|
||||
* Get the maximum length of the JSON text for a given token.
|
||||
*
|
||||
* The length will include any whitespace at the end of the token.
|
||||
*
|
||||
* @param delta The relative position of the token to retrieve. e.g. 0 = current token,
|
||||
* 1 = next token, -1 = prev token.
|
||||
*/
|
||||
simdjson_really_inline uint32_t peek_length(int32_t delta=0) const noexcept;
|
||||
/**
|
||||
* Advance to the next token (returning the current one).
|
||||
*
|
||||
* Does not check or update depth/expect_value. Caller is responsible for that.
|
||||
*/
|
||||
simdjson_really_inline const uint8_t *advance() noexcept;
|
||||
|
||||
// NOTE: we don't support a full C++ iterator interface, because we expect people to make
|
||||
// different calls to advance the iterator based on *their own* state.
|
||||
|
||||
simdjson_really_inline bool operator==(const token_iterator &other) const noexcept;
|
||||
simdjson_really_inline bool operator!=(const token_iterator &other) const noexcept;
|
||||
simdjson_really_inline bool operator>(const token_iterator &other) const noexcept;
|
||||
simdjson_really_inline bool operator>=(const token_iterator &other) const noexcept;
|
||||
simdjson_really_inline bool operator<(const token_iterator &other) const noexcept;
|
||||
simdjson_really_inline bool operator<=(const token_iterator &other) const noexcept;
|
||||
|
||||
protected:
|
||||
simdjson_really_inline token_iterator(const uint8_t *buf, uint32_t *index) noexcept;
|
||||
|
||||
/**
|
||||
* Get the index of the JSON text for a given token (relative).
|
||||
*
|
||||
* This is not null-terminated; it is a view into the JSON.
|
||||
*
|
||||
* @param delta The relative position of the token to retrieve. e.g. 0 = current token,
|
||||
* 1 = next token, -1 = prev token.
|
||||
*
|
||||
*/
|
||||
simdjson_really_inline uint32_t peek_index(int32_t delta=0) const noexcept;
|
||||
|
||||
const uint8_t *buf{};
|
||||
const uint32_t *index{};
|
||||
};
|
||||
|
||||
} // namespace ondemand
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // namespace simdjson
|
||||
|
||||
namespace simdjson {
|
||||
|
||||
template<>
|
||||
struct simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::token_iterator> : public SIMDJSON_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::token_iterator> {
|
||||
public:
|
||||
simdjson_really_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::token_iterator &&value) noexcept; ///< @private
|
||||
simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private
|
||||
|
||||
simdjson_really_inline simdjson_result() noexcept = default;
|
||||
simdjson_really_inline simdjson_result(simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::token_iterator> &&a) noexcept = default;
|
||||
simdjson_really_inline ~simdjson_result() noexcept = default; ///< @private
|
||||
};
|
||||
|
||||
} // namespace simdjson
|
|
@ -0,0 +1,388 @@
|
|||
namespace simdjson {
|
||||
namespace SIMDJSON_IMPLEMENTATION {
|
||||
namespace ondemand {
|
||||
|
||||
simdjson_really_inline value::value(json_iterator_ref && _iter, const uint8_t *_json) noexcept
|
||||
: iter{std::forward<json_iterator_ref>(_iter)},
|
||||
json{_json}
|
||||
{
|
||||
iter.assert_is_active();
|
||||
SIMDJSON_ASSUME(json != nullptr);
|
||||
}
|
||||
|
||||
simdjson_really_inline value::~value() noexcept {
|
||||
// If the user didn't actually use the value, we need to check if it's an array/object and bump
|
||||
// depth so that the array/object iteration routines will work correctly.
|
||||
// PERF TODO this better be elided entirely when people actually use the value. Don't care if it
|
||||
// gets bumped on the error path unless that's costing us something important.
|
||||
if (iter.is_alive()) {
|
||||
if (*json == '[' || *json == '{') {
|
||||
logger::log_start_value(*iter, "unused");
|
||||
SIMDJSON_UNUSED auto _err = iter->skip_container();
|
||||
} else {
|
||||
logger::log_value(*iter, "unused");
|
||||
}
|
||||
iter.release();
|
||||
}
|
||||
}
|
||||
|
||||
simdjson_really_inline value value::start(json_iterator_ref &&iter) noexcept {
|
||||
return { std::forward<json_iterator_ref>(iter), iter->advance() };
|
||||
}
|
||||
|
||||
simdjson_really_inline const uint8_t *value::consume() noexcept {
|
||||
iter.release();
|
||||
return json;
|
||||
}
|
||||
template<typename T>
|
||||
simdjson_really_inline simdjson_result<T> value::consume_if_success(simdjson_result<T> &&result) noexcept {
|
||||
if (!result.error()) { consume(); }
|
||||
return std::forward<simdjson_result<T>>(result);
|
||||
}
|
||||
|
||||
simdjson_really_inline simdjson_result<array> value::get_array() noexcept {
|
||||
bool has_value;
|
||||
SIMDJSON_TRY( iter->start_array(json).get(has_value) );
|
||||
if (!has_value) { iter.release(); }
|
||||
return array(std::move(iter));
|
||||
}
|
||||
simdjson_really_inline simdjson_result<object> value::get_object() noexcept {
|
||||
bool has_value;
|
||||
SIMDJSON_TRY( iter->start_object(json).get(has_value) );
|
||||
if (!has_value) { iter.release(); }
|
||||
return object(std::move(iter));
|
||||
}
|
||||
simdjson_really_inline simdjson_result<raw_json_string> value::get_raw_json_string() && noexcept {
|
||||
return iter->consume_raw_json_string();
|
||||
}
|
||||
simdjson_really_inline simdjson_result<raw_json_string> value::get_raw_json_string() & noexcept {
|
||||
return consume_if_success( iter->parse_raw_json_string(json) );
|
||||
}
|
||||
simdjson_really_inline simdjson_result<std::string_view> value::get_string() && noexcept {
|
||||
auto result = iter->parse_string(json);
|
||||
consume();
|
||||
return result;
|
||||
}
|
||||
simdjson_really_inline simdjson_result<std::string_view> value::get_string() & noexcept {
|
||||
return consume_if_success( iter->parse_string(json) );
|
||||
}
|
||||
simdjson_really_inline simdjson_result<double> value::get_double() && noexcept {
|
||||
return iter->parse_double(consume());
|
||||
}
|
||||
simdjson_really_inline simdjson_result<double> value::get_double() & noexcept {
|
||||
return consume_if_success( iter->parse_double(json) );
|
||||
}
|
||||
simdjson_really_inline simdjson_result<uint64_t> value::get_uint64() && noexcept {
|
||||
return iter->parse_uint64(consume());
|
||||
}
|
||||
simdjson_really_inline simdjson_result<uint64_t> value::get_uint64() & noexcept {
|
||||
return consume_if_success( iter->parse_uint64(json) );
|
||||
}
|
||||
simdjson_really_inline simdjson_result<int64_t> value::get_int64() && noexcept {
|
||||
return iter->parse_int64(consume());
|
||||
}
|
||||
simdjson_really_inline simdjson_result<int64_t> value::get_int64() & noexcept {
|
||||
return consume_if_success( iter->parse_int64(json) );
|
||||
}
|
||||
simdjson_really_inline simdjson_result<bool> value::get_bool() && noexcept {
|
||||
return iter->parse_bool(consume());
|
||||
}
|
||||
simdjson_really_inline simdjson_result<bool> value::get_bool() & noexcept {
|
||||
return consume_if_success( iter->parse_bool(json) );
|
||||
}
|
||||
simdjson_really_inline bool value::is_null() && noexcept {
|
||||
return iter->is_null(consume());
|
||||
}
|
||||
simdjson_really_inline bool value::is_null() & noexcept {
|
||||
if (!iter->is_null(json)) { return false; }
|
||||
consume();
|
||||
return true;
|
||||
}
|
||||
|
||||
template<> simdjson_really_inline simdjson_result<array> value::get() & noexcept { return get_array(); }
|
||||
template<> simdjson_really_inline simdjson_result<object> value::get() & noexcept { return get_object(); }
|
||||
template<> simdjson_really_inline simdjson_result<raw_json_string> value::get() & noexcept { return get_raw_json_string(); }
|
||||
template<> simdjson_really_inline simdjson_result<std::string_view> value::get() & noexcept { return get_string(); }
|
||||
template<> simdjson_really_inline simdjson_result<double> value::get() & noexcept { return get_double(); }
|
||||
template<> simdjson_really_inline simdjson_result<uint64_t> value::get() & noexcept { return get_uint64(); }
|
||||
template<> simdjson_really_inline simdjson_result<int64_t> value::get() & noexcept { return get_int64(); }
|
||||
template<> simdjson_really_inline simdjson_result<bool> value::get() & noexcept { return get_bool(); }
|
||||
|
||||
template<> simdjson_really_inline simdjson_result<value> value::get() && noexcept { return std::forward<value>(*this); }
|
||||
template<> simdjson_really_inline simdjson_result<array> value::get() && noexcept { return std::forward<value>(*this).get_array(); }
|
||||
template<> simdjson_really_inline simdjson_result<object> value::get() && noexcept { return std::forward<value>(*this).get_object(); }
|
||||
template<> simdjson_really_inline simdjson_result<raw_json_string> value::get() && noexcept { return std::forward<value>(*this).get_raw_json_string(); }
|
||||
template<> simdjson_really_inline simdjson_result<std::string_view> value::get() && noexcept { return std::forward<value>(*this).get_string(); }
|
||||
template<> simdjson_really_inline simdjson_result<double> value::get() && noexcept { return std::forward<value>(*this).get_double(); }
|
||||
template<> simdjson_really_inline simdjson_result<uint64_t> value::get() && noexcept { return std::forward<value>(*this).get_uint64(); }
|
||||
template<> simdjson_really_inline simdjson_result<int64_t> value::get() && noexcept { return std::forward<value>(*this).get_int64(); }
|
||||
template<> simdjson_really_inline simdjson_result<bool> value::get() && noexcept { return std::forward<value>(*this).get_bool(); }
|
||||
|
||||
template<typename T> simdjson_really_inline error_code value::get(T &out) & noexcept {
|
||||
return get<T>().get(out);
|
||||
}
|
||||
template<typename T> simdjson_really_inline error_code value::get(T &out) && noexcept {
|
||||
return std::forward<value>(*this).get<T>().get(out);
|
||||
}
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
simdjson_really_inline value::operator array() noexcept(false) {
|
||||
return std::forward<value>(*this).get_array();
|
||||
}
|
||||
simdjson_really_inline value::operator object() noexcept(false) {
|
||||
return std::forward<value>(*this).get_object();
|
||||
}
|
||||
simdjson_really_inline value::operator uint64_t() && noexcept(false) {
|
||||
return std::forward<value>(*this).get_uint64();
|
||||
}
|
||||
simdjson_really_inline value::operator uint64_t() & noexcept(false) {
|
||||
return std::forward<value>(*this).get_uint64();
|
||||
}
|
||||
simdjson_really_inline value::operator int64_t() && noexcept(false) {
|
||||
return std::forward<value>(*this).get_int64();
|
||||
}
|
||||
simdjson_really_inline value::operator int64_t() & noexcept(false) {
|
||||
return std::forward<value>(*this).get_int64();
|
||||
}
|
||||
simdjson_really_inline value::operator double() && noexcept(false) {
|
||||
return std::forward<value>(*this).get_double();
|
||||
}
|
||||
simdjson_really_inline value::operator double() & noexcept(false) {
|
||||
return std::forward<value>(*this).get_double();
|
||||
}
|
||||
simdjson_really_inline value::operator std::string_view() && noexcept(false) {
|
||||
return std::forward<value>(*this).get_string();
|
||||
}
|
||||
simdjson_really_inline value::operator std::string_view() & noexcept(false) {
|
||||
return std::forward<value>(*this).get_string();
|
||||
}
|
||||
simdjson_really_inline value::operator raw_json_string() && noexcept(false) {
|
||||
return std::forward<value>(*this).get_raw_json_string();
|
||||
}
|
||||
simdjson_really_inline value::operator raw_json_string() & noexcept(false) {
|
||||
return std::forward<value>(*this).get_raw_json_string();
|
||||
}
|
||||
simdjson_really_inline value::operator bool() && noexcept(false) {
|
||||
return std::forward<value>(*this).get_bool();
|
||||
}
|
||||
simdjson_really_inline value::operator bool() & noexcept(false) {
|
||||
return std::forward<value>(*this).get_bool();
|
||||
}
|
||||
#endif
|
||||
|
||||
simdjson_really_inline simdjson_result<array_iterator<value>> value::begin() & noexcept {
|
||||
return array_iterator<value>::start(*this, json);
|
||||
}
|
||||
simdjson_really_inline simdjson_result<array_iterator<value>> value::end() & noexcept {
|
||||
return {};
|
||||
}
|
||||
|
||||
simdjson_really_inline void value::log_value(const char *type) const noexcept {
|
||||
char json_char[]{char(json[0]), '\0'};
|
||||
logger::log_value(*iter, type, json_char);
|
||||
}
|
||||
simdjson_really_inline void value::log_error(const char *message) const noexcept {
|
||||
char json_char[]{char(json[0]), '\0'};
|
||||
logger::log_error(*iter, message, json_char);
|
||||
}
|
||||
|
||||
//
|
||||
// For array_iterator
|
||||
//
|
||||
simdjson_really_inline json_iterator &value::get_iterator() noexcept {
|
||||
return *iter;
|
||||
}
|
||||
simdjson_really_inline json_iterator_ref value::borrow_iterator() noexcept {
|
||||
return iter.borrow();
|
||||
}
|
||||
simdjson_really_inline bool value::is_iterator_alive() const noexcept {
|
||||
return iter.is_alive();
|
||||
}
|
||||
simdjson_really_inline void value::iteration_finished() noexcept {
|
||||
iter.release();
|
||||
}
|
||||
|
||||
} // namespace ondemand
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // namespace simdjson
|
||||
|
||||
namespace simdjson {
|
||||
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::simdjson_result(
|
||||
SIMDJSON_IMPLEMENTATION::ondemand::value &&value
|
||||
) noexcept :
|
||||
implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::value>(
|
||||
std::forward<SIMDJSON_IMPLEMENTATION::ondemand::value>(value)
|
||||
)
|
||||
{
|
||||
}
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::simdjson_result(
|
||||
error_code error
|
||||
) noexcept :
|
||||
implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::value>(error)
|
||||
{
|
||||
}
|
||||
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array_iterator<SIMDJSON_IMPLEMENTATION::ondemand::value>> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::begin() & noexcept {
|
||||
if (error()) { return error(); }
|
||||
return first.begin();
|
||||
}
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array_iterator<SIMDJSON_IMPLEMENTATION::ondemand::value>> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::end() & noexcept {
|
||||
if (error()) { return error(); }
|
||||
return {};
|
||||
}
|
||||
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::get_array() noexcept {
|
||||
if (error()) { return error(); }
|
||||
return std::forward<SIMDJSON_IMPLEMENTATION::ondemand::value>(first).get_array();
|
||||
}
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::object> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::get_object() noexcept {
|
||||
if (error()) { return error(); }
|
||||
return std::forward<SIMDJSON_IMPLEMENTATION::ondemand::value>(first).get_object();
|
||||
}
|
||||
simdjson_really_inline simdjson_result<uint64_t> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::get_uint64() && noexcept {
|
||||
if (error()) { return error(); }
|
||||
return std::forward<SIMDJSON_IMPLEMENTATION::ondemand::value>(first).get_uint64();
|
||||
}
|
||||
simdjson_really_inline simdjson_result<uint64_t> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::get_uint64() & noexcept {
|
||||
if (error()) { return error(); }
|
||||
return std::forward<SIMDJSON_IMPLEMENTATION::ondemand::value>(first).get_uint64();
|
||||
}
|
||||
simdjson_really_inline simdjson_result<int64_t> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::get_int64() && noexcept {
|
||||
if (error()) { return error(); }
|
||||
return std::forward<SIMDJSON_IMPLEMENTATION::ondemand::value>(first).get_int64();
|
||||
}
|
||||
simdjson_really_inline simdjson_result<int64_t> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::get_int64() & noexcept {
|
||||
if (error()) { return error(); }
|
||||
return std::forward<SIMDJSON_IMPLEMENTATION::ondemand::value>(first).get_int64();
|
||||
}
|
||||
simdjson_really_inline simdjson_result<double> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::get_double() && noexcept {
|
||||
if (error()) { return error(); }
|
||||
return std::forward<SIMDJSON_IMPLEMENTATION::ondemand::value>(first).get_double();
|
||||
}
|
||||
simdjson_really_inline simdjson_result<double> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::get_double() & noexcept {
|
||||
if (error()) { return error(); }
|
||||
return std::forward<SIMDJSON_IMPLEMENTATION::ondemand::value>(first).get_double();
|
||||
}
|
||||
simdjson_really_inline simdjson_result<std::string_view> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::get_string() && noexcept {
|
||||
if (error()) { return error(); }
|
||||
return std::forward<SIMDJSON_IMPLEMENTATION::ondemand::value>(first).get_string();
|
||||
}
|
||||
simdjson_really_inline simdjson_result<std::string_view> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::get_string() & noexcept {
|
||||
if (error()) { return error(); }
|
||||
return std::forward<SIMDJSON_IMPLEMENTATION::ondemand::value>(first).get_string();
|
||||
}
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::get_raw_json_string() && noexcept {
|
||||
if (error()) { return error(); }
|
||||
return std::forward<SIMDJSON_IMPLEMENTATION::ondemand::value>(first).get_raw_json_string();
|
||||
}
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::get_raw_json_string() & noexcept {
|
||||
if (error()) { return error(); }
|
||||
return std::forward<SIMDJSON_IMPLEMENTATION::ondemand::value>(first).get_raw_json_string();
|
||||
}
|
||||
simdjson_really_inline simdjson_result<bool> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::get_bool() && noexcept {
|
||||
if (error()) { return error(); }
|
||||
return std::forward<SIMDJSON_IMPLEMENTATION::ondemand::value>(first).get_bool();
|
||||
}
|
||||
simdjson_really_inline simdjson_result<bool> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::get_bool() & noexcept {
|
||||
if (error()) { return error(); }
|
||||
return std::forward<SIMDJSON_IMPLEMENTATION::ondemand::value>(first).get_bool();
|
||||
}
|
||||
simdjson_really_inline bool simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::is_null() && noexcept {
|
||||
if (error()) { return false; }
|
||||
return std::forward<SIMDJSON_IMPLEMENTATION::ondemand::value>(first).is_null();
|
||||
}
|
||||
simdjson_really_inline bool simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::is_null() & noexcept {
|
||||
if (error()) { return false; }
|
||||
return std::forward<SIMDJSON_IMPLEMENTATION::ondemand::value>(first).is_null();
|
||||
}
|
||||
|
||||
template<typename T> simdjson_really_inline simdjson_result<T> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::get() & noexcept {
|
||||
if (error()) { return error(); }
|
||||
return first.get<T>();
|
||||
}
|
||||
template<typename T> simdjson_really_inline simdjson_result<T> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::get() && noexcept {
|
||||
if (error()) { return error(); }
|
||||
return std::forward<SIMDJSON_IMPLEMENTATION::ondemand::value>(first).get<T>();
|
||||
}
|
||||
template<typename T> simdjson_really_inline error_code simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::get(T &out) & noexcept {
|
||||
if (error()) { return error(); }
|
||||
return first.get<T>(out);
|
||||
}
|
||||
template<typename T> simdjson_really_inline error_code simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::get(T &out) && noexcept {
|
||||
if (error()) { return error(); }
|
||||
return std::forward<SIMDJSON_IMPLEMENTATION::ondemand::value>(first).get<T>(out);
|
||||
}
|
||||
|
||||
template<> simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::get<SIMDJSON_IMPLEMENTATION::ondemand::value>() & noexcept = delete;
|
||||
template<> simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value> simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::get<SIMDJSON_IMPLEMENTATION::ondemand::value>() && noexcept {
|
||||
if (error()) { return error(); }
|
||||
return std::forward<SIMDJSON_IMPLEMENTATION::ondemand::value>(first);
|
||||
}
|
||||
template<> simdjson_really_inline error_code simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::get<SIMDJSON_IMPLEMENTATION::ondemand::value>(SIMDJSON_IMPLEMENTATION::ondemand::value &out) & noexcept = delete;
|
||||
template<> simdjson_really_inline error_code simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::get<SIMDJSON_IMPLEMENTATION::ondemand::value>(SIMDJSON_IMPLEMENTATION::ondemand::value &out) && noexcept {
|
||||
if (error()) { return error(); }
|
||||
out = std::forward<SIMDJSON_IMPLEMENTATION::ondemand::value>(first);
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::operator SIMDJSON_IMPLEMENTATION::ondemand::array() noexcept(false) {
|
||||
if (error()) { throw simdjson_error(error()); }
|
||||
return std::forward<SIMDJSON_IMPLEMENTATION::ondemand::value>(first);
|
||||
}
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::operator SIMDJSON_IMPLEMENTATION::ondemand::object() noexcept(false) {
|
||||
if (error()) { throw simdjson_error(error()); }
|
||||
return std::forward<SIMDJSON_IMPLEMENTATION::ondemand::value>(first);
|
||||
}
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::operator uint64_t() && noexcept(false) {
|
||||
if (error()) { throw simdjson_error(error()); }
|
||||
return std::forward<SIMDJSON_IMPLEMENTATION::ondemand::value>(first);
|
||||
}
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::operator uint64_t() & noexcept(false) {
|
||||
if (error()) { throw simdjson_error(error()); }
|
||||
return std::forward<SIMDJSON_IMPLEMENTATION::ondemand::value>(first);
|
||||
}
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::operator int64_t() && noexcept(false) {
|
||||
if (error()) { throw simdjson_error(error()); }
|
||||
return std::forward<SIMDJSON_IMPLEMENTATION::ondemand::value>(first);
|
||||
}
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::operator int64_t() & noexcept(false) {
|
||||
if (error()) { throw simdjson_error(error()); }
|
||||
return std::forward<SIMDJSON_IMPLEMENTATION::ondemand::value>(first);
|
||||
}
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::operator double() && noexcept(false) {
|
||||
if (error()) { throw simdjson_error(error()); }
|
||||
return std::forward<SIMDJSON_IMPLEMENTATION::ondemand::value>(first);
|
||||
}
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::operator double() & noexcept(false) {
|
||||
if (error()) { throw simdjson_error(error()); }
|
||||
return std::forward<SIMDJSON_IMPLEMENTATION::ondemand::value>(first);
|
||||
}
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::operator std::string_view() && noexcept(false) {
|
||||
if (error()) { throw simdjson_error(error()); }
|
||||
return std::forward<SIMDJSON_IMPLEMENTATION::ondemand::value>(first);
|
||||
}
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::operator std::string_view() & noexcept(false) {
|
||||
if (error()) { throw simdjson_error(error()); }
|
||||
return std::forward<SIMDJSON_IMPLEMENTATION::ondemand::value>(first);
|
||||
}
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::operator SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string() && noexcept(false) {
|
||||
if (error()) { throw simdjson_error(error()); }
|
||||
return std::forward<SIMDJSON_IMPLEMENTATION::ondemand::value>(first);
|
||||
}
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::operator SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string() & noexcept(false) {
|
||||
if (error()) { throw simdjson_error(error()); }
|
||||
return std::forward<SIMDJSON_IMPLEMENTATION::ondemand::value>(first);
|
||||
}
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::operator bool() && noexcept(false) {
|
||||
if (error()) { throw simdjson_error(error()); }
|
||||
return std::forward<SIMDJSON_IMPLEMENTATION::ondemand::value>(first);
|
||||
}
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value>::operator bool() & noexcept(false) {
|
||||
if (error()) { throw simdjson_error(error()); }
|
||||
return std::forward<SIMDJSON_IMPLEMENTATION::ondemand::value>(first);
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace simdjson
|
|
@ -0,0 +1,365 @@
|
|||
#include "simdjson/error.h"
|
||||
|
||||
namespace simdjson {
|
||||
namespace SIMDJSON_IMPLEMENTATION {
|
||||
namespace ondemand {
|
||||
|
||||
class array;
|
||||
class document;
|
||||
class field;
|
||||
class object;
|
||||
class raw_json_string;
|
||||
|
||||
/**
|
||||
* An ephemeral JSON value returned during iteration.
|
||||
*/
|
||||
class value {
|
||||
public:
|
||||
/**
|
||||
* Create a new invalid value.
|
||||
*
|
||||
* Exists so you can declare a variable and later assign to it before use.
|
||||
*/
|
||||
simdjson_really_inline value() noexcept = default;
|
||||
|
||||
simdjson_really_inline value(value &&other) noexcept = default;
|
||||
simdjson_really_inline value &operator=(value && other) noexcept = default;
|
||||
simdjson_really_inline value(const value &) noexcept = delete;
|
||||
simdjson_really_inline value &operator=(const value &) noexcept = delete;
|
||||
|
||||
/**
|
||||
* Skips the value if the value was not successfully parsed or used.
|
||||
*/
|
||||
simdjson_really_inline ~value() noexcept;
|
||||
|
||||
/**
|
||||
* Get this value as the given type.
|
||||
*
|
||||
* Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool
|
||||
*
|
||||
* @returns A value of the given type, parsed from the JSON.
|
||||
* @returns INCORRECT_TYPE If the JSON value is not the given type.
|
||||
*/
|
||||
template<typename T> simdjson_really_inline simdjson_result<T> get() & noexcept;
|
||||
/** @overload template<typename T> simdjson_result<T> get() & noexcept */
|
||||
template<typename T> simdjson_really_inline simdjson_result<T> get() && noexcept;
|
||||
|
||||
/**
|
||||
* Get this value as the given type.
|
||||
*
|
||||
* Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool
|
||||
*
|
||||
* @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized.
|
||||
* @returns INCORRECT_TYPE If the JSON value is not an object.
|
||||
* @returns SUCCESS If the parse succeeded and the out parameter was set to the value.
|
||||
*/
|
||||
template<typename T> simdjson_really_inline error_code get(T &out) & noexcept;
|
||||
/** @overload template<typename T> error_code get(T &out) & noexcept */
|
||||
template<typename T> simdjson_really_inline error_code get(T &out) && noexcept;
|
||||
|
||||
/**
|
||||
* Cast this JSON value to an array.
|
||||
*
|
||||
* @returns An object that can be used to iterate the array.
|
||||
* @returns INCORRECT_TYPE If the JSON value is not an array.
|
||||
*/
|
||||
simdjson_really_inline simdjson_result<array> get_array() noexcept;
|
||||
|
||||
/**
|
||||
* Cast this JSON value to an object.
|
||||
*
|
||||
* @returns An object that can be used to look up or iterate fields.
|
||||
* @returns INCORRECT_TYPE If the JSON value is not an object.
|
||||
*/
|
||||
simdjson_really_inline simdjson_result<object> get_object() noexcept;
|
||||
|
||||
// PERF NOTE: get_XXX() methods generally have both && and & variants because performance is demonstrably better on clang.
|
||||
// Specifically, in typical cases where you use a temporary value (like doc["x"].get_double()) the && version is faster
|
||||
// because the & version has to branch to check whether the parse failed or not before deciding whether the value was consumed.
|
||||
|
||||
/**
|
||||
* Cast this JSON value to an unsigned integer.
|
||||
*
|
||||
* @returns A signed 64-bit integer.
|
||||
* @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer.
|
||||
*/
|
||||
simdjson_really_inline simdjson_result<uint64_t> get_uint64() && noexcept;
|
||||
/** @overload simdjson_really_inline simdjson_result<uint64_t> get_uint64() && noexcept */
|
||||
simdjson_really_inline simdjson_result<uint64_t> get_uint64() & noexcept;
|
||||
|
||||
/**
|
||||
* Cast this JSON value to a signed integer.
|
||||
*
|
||||
* @returns A signed 64-bit integer.
|
||||
* @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer.
|
||||
*/
|
||||
simdjson_really_inline simdjson_result<int64_t> get_int64() && noexcept;
|
||||
/** @overload simdjson_really_inline simdjson_result<int64_t> get_int64() && noexcept */
|
||||
simdjson_really_inline simdjson_result<int64_t> get_int64() & noexcept;
|
||||
|
||||
/**
|
||||
* Cast this JSON value to a double.
|
||||
*
|
||||
* @returns A double.
|
||||
* @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number.
|
||||
*/
|
||||
simdjson_really_inline simdjson_result<double> get_double() && noexcept;
|
||||
/** @overload simdjson_really_inline simdjson_result<double> get_double() && noexcept */
|
||||
simdjson_really_inline simdjson_result<double> get_double() & noexcept;
|
||||
|
||||
/**
|
||||
* Cast this JSON value to a string.
|
||||
*
|
||||
* The string is guaranteed to be valid UTF-8.
|
||||
*
|
||||
* Equivalent to get<std::string_view>().
|
||||
*
|
||||
* @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next
|
||||
* time it parses a document or when it is destroyed.
|
||||
* @returns INCORRECT_TYPE if the JSON value is not a string.
|
||||
*/
|
||||
simdjson_really_inline simdjson_result<std::string_view> get_string() && noexcept;
|
||||
/** @overload simdjson_really_inline simdjson_result<std::string_view> get_string() && noexcept */
|
||||
simdjson_really_inline simdjson_result<std::string_view> get_string() & noexcept;
|
||||
|
||||
/**
|
||||
* Cast this JSON value to a raw_json_string.
|
||||
*
|
||||
* The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n).
|
||||
*
|
||||
* @returns A pointer to the raw JSON for the given string.
|
||||
* @returns INCORRECT_TYPE if the JSON value is not a string.
|
||||
*/
|
||||
simdjson_really_inline simdjson_result<raw_json_string> get_raw_json_string() && noexcept;
|
||||
/** @overload simdjson_really_inline simdjson_result<raw_json_string> get_raw_json_string() && noexcept */
|
||||
simdjson_really_inline simdjson_result<raw_json_string> get_raw_json_string() & noexcept;
|
||||
|
||||
/**
|
||||
* Cast this JSON value to a bool.
|
||||
*
|
||||
* @returns A bool value.
|
||||
* @returns INCORRECT_TYPE if the JSON value is not true or false.
|
||||
*/
|
||||
simdjson_really_inline simdjson_result<bool> get_bool() && noexcept;
|
||||
/** @overload simdjson_really_inline simdjson_result<bool> get_bool() && noexcept */
|
||||
simdjson_really_inline simdjson_result<bool> get_bool() & noexcept;
|
||||
|
||||
/**
|
||||
* Checks if this JSON value is null.
|
||||
*
|
||||
* @returns Whether the value is null.
|
||||
*/
|
||||
simdjson_really_inline bool is_null() && noexcept;
|
||||
/** @overload simdjson_really_inline bool is_null() && noexcept */
|
||||
simdjson_really_inline bool is_null() & noexcept;
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
/**
|
||||
* Cast this JSON value to an array.
|
||||
*
|
||||
* @returns An object that can be used to iterate the array.
|
||||
* @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array.
|
||||
*/
|
||||
simdjson_really_inline operator array() noexcept(false);
|
||||
/**
|
||||
* Cast this JSON value to an object.
|
||||
*
|
||||
* @returns An object that can be used to look up or iterate fields.
|
||||
* @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object.
|
||||
*/
|
||||
simdjson_really_inline operator object() noexcept(false);
|
||||
/**
|
||||
* Cast this JSON value to an unsigned integer.
|
||||
*
|
||||
* @returns A signed 64-bit integer.
|
||||
* @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer.
|
||||
*/
|
||||
simdjson_really_inline operator uint64_t() && noexcept(false);
|
||||
/** @overload simdjson_really_inline operator uint64_t() && noexcept(false); */
|
||||
simdjson_really_inline operator uint64_t() & noexcept(false);
|
||||
/**
|
||||
* Cast this JSON value to a signed integer.
|
||||
*
|
||||
* @returns A signed 64-bit integer.
|
||||
* @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer.
|
||||
*/
|
||||
simdjson_really_inline operator int64_t() && noexcept(false);
|
||||
/** @overload simdjson_really_inline operator int64_t() && noexcept(false); */
|
||||
simdjson_really_inline operator int64_t() & noexcept(false);
|
||||
/**
|
||||
* Cast this JSON value to a double.
|
||||
*
|
||||
* @returns A double.
|
||||
* @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number.
|
||||
*/
|
||||
simdjson_really_inline operator double() && noexcept(false);
|
||||
/** @overload simdjson_really_inline operator double() && noexcept(false); */
|
||||
simdjson_really_inline operator double() & noexcept(false);
|
||||
/**
|
||||
* Cast this JSON value to a string.
|
||||
*
|
||||
* The string is guaranteed to be valid UTF-8.
|
||||
*
|
||||
* Equivalent to get<std::string_view>().
|
||||
*
|
||||
* @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next
|
||||
* time it parses a document or when it is destroyed.
|
||||
* @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string.
|
||||
*/
|
||||
simdjson_really_inline operator std::string_view() && noexcept(false);
|
||||
/** @overload simdjson_really_inline operator std::string_view() && noexcept(false); */
|
||||
simdjson_really_inline operator std::string_view() & noexcept(false);
|
||||
/**
|
||||
* Cast this JSON value to a raw_json_string.
|
||||
*
|
||||
* The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n).
|
||||
*
|
||||
* @returns A pointer to the raw JSON for the given string.
|
||||
* @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string.
|
||||
*/
|
||||
simdjson_really_inline operator raw_json_string() && noexcept(false);
|
||||
/** @overload simdjson_really_inline operator raw_json_string() && noexcept(false); */
|
||||
simdjson_really_inline operator raw_json_string() & noexcept(false);
|
||||
/**
|
||||
* Cast this JSON value to a bool.
|
||||
*
|
||||
* @returns A bool value.
|
||||
* @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false.
|
||||
*/
|
||||
simdjson_really_inline operator bool() && noexcept(false);
|
||||
/** @overload simdjson_really_inline operator bool() && noexcept(false); */
|
||||
simdjson_really_inline operator bool() & noexcept(false);
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Begin array iteration.
|
||||
*
|
||||
* Part of the std::iterable interface.
|
||||
*
|
||||
* @returns INCORRECT_TYPE If the JSON value is not an array.
|
||||
*/
|
||||
simdjson_really_inline simdjson_result<array_iterator<value>> begin() & noexcept;
|
||||
/**
|
||||
* Sentinel representing the end of the array.
|
||||
*
|
||||
* Part of the std::iterable interface.
|
||||
*/
|
||||
simdjson_really_inline simdjson_result<array_iterator<value>> end() & noexcept;
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Create a value.
|
||||
*
|
||||
* Use value::read() instead of this.
|
||||
*/
|
||||
simdjson_really_inline value(json_iterator_ref &&iter, const uint8_t *json) noexcept;
|
||||
|
||||
/**
|
||||
* Read a value.
|
||||
*
|
||||
* If the value is an array or object, only the opening brace will be consumed.
|
||||
*
|
||||
* @param doc The document containing the value. Iterator must be at the value start position.
|
||||
*/
|
||||
static simdjson_really_inline value start(json_iterator_ref &&iter) noexcept;
|
||||
|
||||
/**
|
||||
* Skip this value, allowing iteration to continue.
|
||||
*/
|
||||
simdjson_really_inline void skip() noexcept;
|
||||
|
||||
simdjson_really_inline void log_value(const char *type) const noexcept;
|
||||
simdjson_really_inline void log_error(const char *message) const noexcept;
|
||||
|
||||
//
|
||||
// For array_iterator
|
||||
//
|
||||
simdjson_really_inline json_iterator &get_iterator() noexcept;
|
||||
simdjson_really_inline json_iterator_ref borrow_iterator() noexcept;
|
||||
simdjson_really_inline bool is_iterator_alive() const noexcept;
|
||||
simdjson_really_inline void iteration_finished() noexcept;
|
||||
simdjson_really_inline const uint8_t *consume() noexcept;
|
||||
template<typename T>
|
||||
simdjson_really_inline simdjson_result<T> consume_if_success(simdjson_result<T> &&result) noexcept;
|
||||
|
||||
json_iterator_ref iter{};
|
||||
const uint8_t *json{}; // The JSON text of the value
|
||||
|
||||
friend class document;
|
||||
template<typename T> friend class array_iterator;
|
||||
friend class field;
|
||||
friend class object;
|
||||
friend struct simdjson_result<value>;
|
||||
friend struct simdjson_result<document>;
|
||||
friend struct simdjson_result<field>;
|
||||
};
|
||||
|
||||
} // namespace ondemand
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // namespace simdjson
|
||||
|
||||
namespace simdjson {
|
||||
|
||||
template<>
|
||||
struct simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value> : public SIMDJSON_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::value> {
|
||||
public:
|
||||
simdjson_really_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::value &&value) noexcept; ///< @private
|
||||
simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private
|
||||
|
||||
simdjson_really_inline simdjson_result() noexcept = default;
|
||||
simdjson_really_inline simdjson_result(simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value> &&a) noexcept = default;
|
||||
simdjson_really_inline ~simdjson_result() noexcept = default; ///< @private
|
||||
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array> get_array() noexcept;
|
||||
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::object> get_object() noexcept;
|
||||
|
||||
simdjson_really_inline simdjson_result<uint64_t> get_uint64() && noexcept;
|
||||
simdjson_really_inline simdjson_result<uint64_t> get_uint64() & noexcept;
|
||||
|
||||
simdjson_really_inline simdjson_result<int64_t> get_int64() && noexcept;
|
||||
simdjson_really_inline simdjson_result<int64_t> get_int64() & noexcept;
|
||||
|
||||
simdjson_really_inline simdjson_result<double> get_double() && noexcept;
|
||||
simdjson_really_inline simdjson_result<double> get_double() & noexcept;
|
||||
|
||||
simdjson_really_inline simdjson_result<std::string_view> get_string() && noexcept;
|
||||
simdjson_really_inline simdjson_result<std::string_view> get_string() & noexcept;
|
||||
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string> get_raw_json_string() && noexcept;
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string> get_raw_json_string() & noexcept;
|
||||
|
||||
simdjson_really_inline simdjson_result<bool> get_bool() && noexcept;
|
||||
simdjson_really_inline simdjson_result<bool> get_bool() & noexcept;
|
||||
|
||||
simdjson_really_inline bool is_null() && noexcept;
|
||||
simdjson_really_inline bool is_null() & noexcept;
|
||||
|
||||
template<typename T> simdjson_really_inline simdjson_result<T> get() & noexcept;
|
||||
template<typename T> simdjson_really_inline simdjson_result<T> get() && noexcept;
|
||||
|
||||
template<typename T> simdjson_really_inline error_code get(T &out) & noexcept;
|
||||
template<typename T> simdjson_really_inline error_code get(T &out) && noexcept;
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
simdjson_really_inline operator SIMDJSON_IMPLEMENTATION::ondemand::array() noexcept(false);
|
||||
simdjson_really_inline operator SIMDJSON_IMPLEMENTATION::ondemand::object() noexcept(false);
|
||||
simdjson_really_inline operator uint64_t() && noexcept(false);
|
||||
simdjson_really_inline operator uint64_t() & noexcept(false);
|
||||
simdjson_really_inline operator int64_t() && noexcept(false);
|
||||
simdjson_really_inline operator int64_t() & noexcept(false);
|
||||
simdjson_really_inline operator double() && noexcept(false);
|
||||
simdjson_really_inline operator double() & noexcept(false);
|
||||
simdjson_really_inline operator std::string_view() && noexcept(false);
|
||||
simdjson_really_inline operator std::string_view() & noexcept(false);
|
||||
simdjson_really_inline operator SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string() && noexcept(false);
|
||||
simdjson_really_inline operator SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string() & noexcept(false);
|
||||
simdjson_really_inline operator bool() && noexcept(false);
|
||||
simdjson_really_inline operator bool() & noexcept(false);
|
||||
#endif
|
||||
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array_iterator<SIMDJSON_IMPLEMENTATION::ondemand::value>> begin() & noexcept;
|
||||
simdjson_really_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::array_iterator<SIMDJSON_IMPLEMENTATION::ondemand::value>> end() & noexcept;
|
||||
};
|
||||
|
||||
} // namespace simdjson
|
|
@ -1,9 +1,10 @@
|
|||
// This file contains the common code every implementation uses
|
||||
// It is intended to be included multiple times and compiled multiple times
|
||||
|
||||
namespace {
|
||||
namespace simdjson {
|
||||
namespace SIMDJSON_IMPLEMENTATION {
|
||||
namespace stage2 {
|
||||
namespace {
|
||||
/// @private
|
||||
namespace stringparsing {
|
||||
|
||||
// begin copypasta
|
||||
|
@ -41,10 +42,10 @@ static const uint8_t escape_map[256] = {
|
|||
SIMDJSON_WARN_UNUSED
|
||||
simdjson_really_inline bool handle_unicode_codepoint(const uint8_t **src_ptr,
|
||||
uint8_t **dst_ptr) {
|
||||
// hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the
|
||||
// jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the
|
||||
// conversion isn't valid; we defer the check for this to inside the
|
||||
// multilingual plane check
|
||||
uint32_t code_point = hex_to_u32_nocheck(*src_ptr + 2);
|
||||
uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2);
|
||||
*src_ptr += 6;
|
||||
// check for low surrogate for characters outside the Basic
|
||||
// Multilingual Plane.
|
||||
|
@ -52,7 +53,7 @@ simdjson_really_inline bool handle_unicode_codepoint(const uint8_t **src_ptr,
|
|||
if (((*src_ptr)[0] != '\\') || (*src_ptr)[1] != 'u') {
|
||||
return false;
|
||||
}
|
||||
uint32_t code_point_2 = hex_to_u32_nocheck(*src_ptr + 2);
|
||||
uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2);
|
||||
|
||||
// if the first code point is invalid we will get here, as we will go past
|
||||
// the check for being outside the Basic Multilingual plane. If we don't
|
||||
|
@ -67,13 +68,12 @@ simdjson_really_inline bool handle_unicode_codepoint(const uint8_t **src_ptr,
|
|||
(((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000;
|
||||
*src_ptr += 6;
|
||||
}
|
||||
size_t offset = codepoint_to_utf8(code_point, *dst_ptr);
|
||||
size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr);
|
||||
*dst_ptr += offset;
|
||||
return offset > 0;
|
||||
}
|
||||
|
||||
SIMDJSON_WARN_UNUSED simdjson_really_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst) {
|
||||
src++;
|
||||
while (1) {
|
||||
// Copy the next n bytes, and find the backslash and quote in them.
|
||||
auto bs_quote = backslash_and_quote::copy_and_find(src, dst);
|
||||
|
@ -120,7 +120,7 @@ SIMDJSON_WARN_UNUSED simdjson_really_inline uint8_t *parse_string(const uint8_t
|
|||
}
|
||||
|
||||
SIMDJSON_UNUSED SIMDJSON_WARN_UNUSED simdjson_really_inline error_code parse_string_to_buffer(const uint8_t *src, uint8_t *¤t_string_buf_loc, std::string_view &s) {
|
||||
if (src[0] != '"') { return STRING_ERROR; }
|
||||
if (*(src++) != '"') { return STRING_ERROR; }
|
||||
auto end = stringparsing::parse_string(src, current_string_buf_loc);
|
||||
if (!end) { return STRING_ERROR; }
|
||||
s = std::string_view((const char *)current_string_buf_loc, end-current_string_buf_loc);
|
||||
|
@ -129,6 +129,6 @@ SIMDJSON_UNUSED SIMDJSON_WARN_UNUSED simdjson_really_inline error_code parse_str
|
|||
}
|
||||
|
||||
} // namespace stringparsing
|
||||
} // namespace stage2
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // unnamed namespace
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // namespace simdjson
|
|
@ -0,0 +1,62 @@
|
|||
#ifndef SIMDJSON_HASWELL_H
|
||||
#define SIMDJSON_HASWELL_H
|
||||
|
||||
#ifdef SIMDJSON_WESTMERE_H
|
||||
#error "haswell.h must be included before westmere.h"
|
||||
#endif
|
||||
#ifdef SIMDJSON_FALLBACK_H
|
||||
#error "haswell.h must be included before fallback.h"
|
||||
#endif
|
||||
|
||||
#include "simdjson/portability.h"
|
||||
|
||||
// Default Haswell to on if this is x86-64. Even if we're not compiled for it, it could be selected
|
||||
// at runtime.
|
||||
#ifndef SIMDJSON_IMPLEMENTATION_HASWELL
|
||||
#define SIMDJSON_IMPLEMENTATION_HASWELL (SIMDJSON_IS_X86_64)
|
||||
#endif
|
||||
#define SIMDJSON_CAN_ALWAYS_RUN_HASWELL ((SIMDJSON_IMPLEMENTATION_HASWELL) && (SIMDJSON_IS_X86_64) && (__AVX2__) && (__BMI__) && (__PCLMUL__) && (__LZCNT__))
|
||||
|
||||
#if SIMDJSON_IMPLEMENTATION_HASWELL
|
||||
|
||||
#define SIMDJSON_TARGET_HASWELL SIMDJSON_TARGET_REGION("avx2,bmi,pclmul,lzcnt")
|
||||
|
||||
namespace simdjson {
|
||||
/**
|
||||
* Implementation for Haswell (Intel AVX2).
|
||||
*/
|
||||
namespace haswell {
|
||||
} // namespace haswell
|
||||
} // namespace simdjson
|
||||
|
||||
//
|
||||
// These two need to be included outside SIMDJSON_TARGET_REGION
|
||||
//
|
||||
#include "simdjson/haswell/implementation.h"
|
||||
#include "simdjson/haswell/intrinsics.h"
|
||||
|
||||
//
|
||||
// The rest need to be inside the region
|
||||
//
|
||||
#include "simdjson/haswell/begin.h"
|
||||
|
||||
// Declarations
|
||||
#include "simdjson/generic/dom_parser_implementation.h"
|
||||
#include "simdjson/haswell/bitmanipulation.h"
|
||||
#include "simdjson/haswell/bitmask.h"
|
||||
#include "simdjson/haswell/simd.h"
|
||||
#include "simdjson/generic/jsoncharutils.h"
|
||||
#include "simdjson/generic/atomparsing.h"
|
||||
#include "simdjson/haswell/stringparsing.h"
|
||||
#include "simdjson/haswell/numberparsing.h"
|
||||
#include "simdjson/generic/implementation_simdjson_result_base.h"
|
||||
#include "simdjson/generic/ondemand.h"
|
||||
|
||||
// Inline definitions
|
||||
#include "simdjson/generic/implementation_simdjson_result_base-inl.h"
|
||||
#include "simdjson/generic/ondemand-inl.h"
|
||||
|
||||
#include "simdjson/haswell/end.h"
|
||||
|
||||
#endif // SIMDJSON_IMPLEMENTATION_HASWELL
|
||||
#endif // SIMDJSON_HASWELL_COMMON_H
|
|
@ -0,0 +1,2 @@
|
|||
#define SIMDJSON_IMPLEMENTATION haswell
|
||||
SIMDJSON_TARGET_HASWELL
|
|
@ -1,8 +1,9 @@
|
|||
#ifndef SIMDJSON_HASWELL_BITMANIPULATION_H
|
||||
#define SIMDJSON_HASWELL_BITMANIPULATION_H
|
||||
|
||||
namespace {
|
||||
namespace simdjson {
|
||||
namespace SIMDJSON_IMPLEMENTATION {
|
||||
namespace {
|
||||
|
||||
// We sometimes call trailing_zero on inputs that are zero,
|
||||
// but the algorithms do not end up using the returned value.
|
||||
|
@ -53,7 +54,8 @@ simdjson_really_inline bool add_overflow(uint64_t value1, uint64_t value2,
|
|||
#endif
|
||||
}
|
||||
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // unnamed namespace
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // namespace simdjson
|
||||
|
||||
#endif // SIMDJSON_HASWELL_BITMANIPULATION_H
|
|
@ -1,8 +1,9 @@
|
|||
#ifndef SIMDJSON_HASWELL_BITMASK_H
|
||||
#define SIMDJSON_HASWELL_BITMASK_H
|
||||
|
||||
namespace {
|
||||
namespace simdjson {
|
||||
namespace SIMDJSON_IMPLEMENTATION {
|
||||
namespace {
|
||||
|
||||
//
|
||||
// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered.
|
||||
|
@ -17,7 +18,8 @@ simdjson_really_inline uint64_t prefix_xor(const uint64_t bitmask) {
|
|||
return _mm_cvtsi128_si64(result);
|
||||
}
|
||||
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // unnamed namespace
|
||||
} // namespace SIMDJSON_IMPLEMENTATION
|
||||
} // namespace simdjson
|
||||
|
||||
#endif // SIMDJSON_HASWELL_BITMASK_H
|
|
@ -1,2 +1,2 @@
|
|||
#undef SIMDJSON_IMPLEMENTATION
|
||||
SIMDJSON_UNTARGET_REGION
|
||||
#undef SIMDJSON_IMPLEMENTATION
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue