remove trailing whitespace (#1284)
This commit is contained in:
parent
9f78559cc8
commit
af4db55e66
|
@ -18,7 +18,7 @@ Before submitting an issue, please ensure that you have read the documentation:
|
|||
**Describe the bug**
|
||||
A clear and concise description of what the bug is.
|
||||
|
||||
Note that a compiler warning is not a bug.
|
||||
Note that a compiler warning is not a bug.
|
||||
|
||||
**To Reproduce**
|
||||
Steps to reproduce the behaviour: provide a code sample if possible.
|
||||
|
@ -32,10 +32,10 @@ Note that a stack trace from your own program is not enough.
|
|||
- Compiler [e.g. Apple clang version 11.0.3 (clang-1103.0.32.59) x86_64-apple-darwin19.4.0]
|
||||
- Version [e.g. 22]
|
||||
|
||||
We support up-to-date 64-bit ARM and x64 FreeBSD, macOS, Windows and Linux systems. Please ensure that your configuration is supported before labelling the issue as a bug. In particular, we do not support legacy 32-bit systems.
|
||||
We support up-to-date 64-bit ARM and x64 FreeBSD, macOS, Windows and Linux systems. Please ensure that your configuration is supported before labelling the issue as a bug. In particular, we do not support legacy 32-bit systems.
|
||||
|
||||
**Indicate whether you are willing or able to provide a bug fix as a pull request**
|
||||
|
||||
If you plan to contribute to simdjson, please read our
|
||||
If you plan to contribute to simdjson, please read our
|
||||
* CONTRIBUTING guide: https://github.com/simdjson/simdjson/blob/master/CONTRIBUTING.md and our
|
||||
* HACKING guide: https://github.com/simdjson/simdjson/blob/master/HACKING.md
|
||||
|
|
|
@ -32,6 +32,6 @@ A clear and concise description of any alternative solutions or features you've
|
|||
Add any other context or screenshots about the feature request here.
|
||||
|
||||
** Are you willing to contribute code or documentation toward this new feature? **
|
||||
If you plan to contribute to simdjson, please read our
|
||||
If you plan to contribute to simdjson, please read our
|
||||
* CONTRIBUTING guide: https://github.com/simdjson/simdjson/blob/master/CONTRIBUTING.md and our
|
||||
* HACKING guide: https://github.com/simdjson/simdjson/blob/master/HACKING.md
|
||||
|
|
|
@ -27,6 +27,6 @@ Is your issue:
|
|||
4. A documentation issue? Can you suggest an improvement?
|
||||
|
||||
|
||||
If you plan to contribute to simdjson, please read our
|
||||
If you plan to contribute to simdjson, please read our
|
||||
* CONTRIBUTING guide: https://github.com/simdjson/simdjson/blob/master/CONTRIBUTING.md and our
|
||||
* HACKING guide: https://github.com/simdjson/simdjson/blob/master/HACKING.md
|
||||
|
|
|
@ -40,7 +40,7 @@ Table of Contents
|
|||
Quick Start
|
||||
-----------
|
||||
|
||||
|
||||
|
||||
The simdjson library is easily consumable with a single .h and .cpp file.
|
||||
|
||||
0. Prerequisites: `g++` (version 7 or better) or `clang++` (version 6 or better), and a 64-bit system with a command-line shell (e.g., Linux, macOS, freeBSD). We also support programming environnements like Visual Studio and Xcode, but different steps are needed.
|
||||
|
@ -168,7 +168,7 @@ instructions, reducing branch misprediction, and reducing data dependency to tak
|
|||
CPU's multiple execution cores.
|
||||
|
||||
Some people [enjoy reading our paper](https://arxiv.org/abs/1902.08318): A description of the design
|
||||
and implementation of simdjson is in our research article:
|
||||
and implementation of simdjson is in our research article:
|
||||
- Geoff Langdale, Daniel Lemire, [Parsing Gigabytes of JSON per Second](https://arxiv.org/abs/1902.08318), VLDB Journal 28 (6), 2019.
|
||||
|
||||
We have an in-depth paper focused on the UTF-8 validation:
|
||||
|
|
|
@ -87,8 +87,8 @@ static void serialize_big_string_to_string(State& state) {
|
|||
std::vector<char> content;
|
||||
content.push_back('\"');
|
||||
for(size_t i = 0 ; i < 100000; i ++) {
|
||||
content.push_back('0' + char(i%10)); // we add what looks like a long list of digits
|
||||
}
|
||||
content.push_back('0' + char(i%10)); // we add what looks like a long list of digits
|
||||
}
|
||||
content.push_back('\"');
|
||||
dom::element doc;
|
||||
simdjson::error_code error;
|
||||
|
@ -139,7 +139,7 @@ static void serialize_twitter_to_string(State& state) {
|
|||
}
|
||||
// we validate the result
|
||||
{
|
||||
auto serial = simdjson::to_string(doc);
|
||||
auto serial = simdjson::to_string(doc);
|
||||
dom::element doc2; // we parse the stringify output
|
||||
if ((error = parser.parse(serial).get(doc2))) { throw std::runtime_error("serialization error"); }
|
||||
auto serial2 = simdjson::to_string(doc2); // we stringify again
|
||||
|
@ -211,7 +211,7 @@ static void numbers_scan(State& state) {
|
|||
}
|
||||
benchmark::DoNotOptimize(container.data());
|
||||
benchmark::ClobberMemory();
|
||||
}
|
||||
}
|
||||
}
|
||||
BENCHMARK(numbers_scan);
|
||||
|
||||
|
@ -236,7 +236,7 @@ static void numbers_size_scan(State& state) {
|
|||
if(pos != container.size()) { cerr << "bad count" << endl; }
|
||||
benchmark::DoNotOptimize(container.data());
|
||||
benchmark::ClobberMemory();
|
||||
}
|
||||
}
|
||||
}
|
||||
BENCHMARK(numbers_size_scan);
|
||||
|
||||
|
@ -315,7 +315,7 @@ static void numbers_load_scan(State& state) {
|
|||
}
|
||||
benchmark::DoNotOptimize(container.data());
|
||||
benchmark::ClobberMemory();
|
||||
}
|
||||
}
|
||||
}
|
||||
BENCHMARK(numbers_load_scan);
|
||||
|
||||
|
@ -341,7 +341,7 @@ static void numbers_load_size_scan(State& state) {
|
|||
if(pos != container.size()) { cerr << "bad count" << endl; }
|
||||
benchmark::DoNotOptimize(container.data());
|
||||
benchmark::ClobberMemory();
|
||||
}
|
||||
}
|
||||
}
|
||||
BENCHMARK(numbers_load_size_scan);
|
||||
|
||||
|
@ -360,7 +360,7 @@ static void numbers_exceptions_scan(State& state) {
|
|||
}
|
||||
benchmark::DoNotOptimize(container.data());
|
||||
benchmark::ClobberMemory();
|
||||
}
|
||||
}
|
||||
}
|
||||
BENCHMARK(numbers_exceptions_scan);
|
||||
|
||||
|
@ -378,7 +378,7 @@ static void numbers_exceptions_size_scan(State& state) {
|
|||
if(pos != container.size()) { cerr << "bad count" << endl; }
|
||||
benchmark::DoNotOptimize(container.data());
|
||||
benchmark::ClobberMemory();
|
||||
}
|
||||
}
|
||||
}
|
||||
BENCHMARK(numbers_exceptions_size_scan);
|
||||
|
||||
|
@ -437,7 +437,7 @@ static void numbers_exceptions_load_scan(State& state) {
|
|||
}
|
||||
benchmark::DoNotOptimize(container.data());
|
||||
benchmark::ClobberMemory();
|
||||
}
|
||||
}
|
||||
}
|
||||
BENCHMARK(numbers_exceptions_load_scan);
|
||||
|
||||
|
@ -456,7 +456,7 @@ static void numbers_exceptions_load_size_scan(State& state) {
|
|||
if(pos != container.size()) { cerr << "bad count" << endl; }
|
||||
benchmark::DoNotOptimize(container.data());
|
||||
benchmark::ClobberMemory();
|
||||
}
|
||||
}
|
||||
}
|
||||
BENCHMARK(numbers_exceptions_load_size_scan);
|
||||
|
||||
|
@ -711,7 +711,7 @@ static void iterator_twitter_image_sizes(State& state) {
|
|||
if (!iter.up()) { return; } // back to entities
|
||||
}
|
||||
if (!iter.up()) { return; } // back to status
|
||||
}
|
||||
}
|
||||
} while (iter.next()); // next status
|
||||
}
|
||||
|
||||
|
|
|
@ -228,7 +228,7 @@ struct progress_bar {
|
|||
/**
|
||||
* The speed at which we can allocate memory is strictly system specific.
|
||||
* It depends on the OS and the runtime library. It is subject to various
|
||||
* system-specific knobs. It is not something that we can reasonably
|
||||
* system-specific knobs. It is not something that we can reasonably
|
||||
* benchmark with crude timings.
|
||||
* If someone wants to optimize how simdjson allocate memory, then it will
|
||||
* almost surely require a distinct benchmarking tool. What is meant by
|
||||
|
|
|
@ -95,7 +95,7 @@ if (SIMDJSON_IS_UNDER_GIT AND SIMDJSON_GIT AND Git_FOUND AND (GIT_VERSION_STRING
|
|||
else()
|
||||
if (CMAKE_GENERATOR MATCHES Ninja)
|
||||
message(STATUS "We disable the checkperf targets under Ninja.")
|
||||
else()
|
||||
else()
|
||||
message(STATUS "Either git is unavailable or else it is too old. We are disabling checkperf targets.")
|
||||
endif()
|
||||
endif ()
|
||||
|
|
|
@ -20,7 +20,7 @@ void remove_duplicates(std::vector<int64_t> &v) {
|
|||
|
||||
namespace distinct_user_id {
|
||||
template<typename T> static void DistinctUserID(benchmark::State &state);
|
||||
} // namespace
|
||||
} // namespace
|
||||
|
||||
//
|
||||
// Implementation
|
||||
|
|
|
@ -12,12 +12,12 @@ using namespace simdjson::builtin;
|
|||
|
||||
class OnDemand {
|
||||
public:
|
||||
OnDemand() {
|
||||
OnDemand() {
|
||||
if(!displayed_implementation) {
|
||||
std::cout << "On Demand implementation: " << builtin_implementation()->name() << std::endl;
|
||||
std::cout << "On Demand implementation: " << builtin_implementation()->name() << std::endl;
|
||||
displayed_implementation = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
simdjson_really_inline bool Run(const padded_string &json);
|
||||
simdjson_really_inline const std::vector<int64_t> &Result() { return ids; }
|
||||
simdjson_really_inline size_t ItemCount() { return ids.size(); }
|
||||
|
@ -39,20 +39,20 @@ simdjson_really_inline bool OnDemand::Run(const padded_string &json) {
|
|||
//
|
||||
// You might think that you do not need the braces, but
|
||||
// you do, otherwise you will get the wrong answer. That is
|
||||
// because you can only have one active object or array
|
||||
// at a time.
|
||||
// because you can only have one active object or array
|
||||
// at a time.
|
||||
{
|
||||
ondemand::object user = tweet["user"];
|
||||
int64_t id = user["id"];
|
||||
int64_t id = user["id"];
|
||||
ids.push_back(id);
|
||||
}
|
||||
// Not all tweets have a "retweeted_status", but when they do
|
||||
// Not all tweets have a "retweeted_status", but when they do
|
||||
// we want to go and find the user within.
|
||||
auto retweet = tweet["retweeted_status"];
|
||||
if(!retweet.error()) {
|
||||
ondemand::object retweet_content = retweet;
|
||||
ondemand::object reuser = retweet_content["user"];
|
||||
int64_t rid = reuser["id"];
|
||||
int64_t rid = reuser["id"];
|
||||
ids.push_back(rid);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,7 +29,7 @@ int main(int argc, char *argv[]) {
|
|||
<< std::endl;
|
||||
}
|
||||
simdjson::padded_string p;
|
||||
bench(filename, p);
|
||||
bench(filename, p);
|
||||
double meanval = 0;
|
||||
double maxval = 0;
|
||||
double minval = 10000;
|
||||
|
|
|
@ -45,7 +45,7 @@ static std::string build_json_array(size_t N) {
|
|||
myss << R"( "info": "some info")" << endl;
|
||||
myss << R"(})" << endl;
|
||||
string answer = myss.str();
|
||||
cout << "Creating a source file spanning " << (answer.size() + 512) / 1024 << " KB " << endl;
|
||||
cout << "Creating a source file spanning " << (answer.size() + 512) / 1024 << " KB " << endl;
|
||||
return answer;
|
||||
}
|
||||
|
||||
|
|
|
@ -29,7 +29,7 @@ static std::string build_json_array(size_t N) {
|
|||
myss << std::endl;
|
||||
myss << "]" << std::endl;
|
||||
std::string answer = myss.str();
|
||||
std::cout << "Creating a source file spanning " << (answer.size() + 512) / 1024 << " KB " << std::endl;
|
||||
std::cout << "Creating a source file spanning " << (answer.size() + 512) / 1024 << " KB " << std::endl;
|
||||
return answer;
|
||||
}
|
||||
|
||||
|
|
|
@ -52,7 +52,7 @@ public:
|
|||
simdjson_really_inline error_code visit_key(json_iterator &, const uint8_t * key) {
|
||||
switch(key[1]) {
|
||||
// Technically, we should check the other characters
|
||||
// in the key, but we are cheating to go as fast
|
||||
// in the key, but we are cheating to go as fast
|
||||
// as possible.
|
||||
case 'x':
|
||||
idx = GOT_X;
|
||||
|
@ -62,11 +62,11 @@ public:
|
|||
break;
|
||||
case 'z':
|
||||
idx = GOT_Z;
|
||||
break;
|
||||
break;
|
||||
default:
|
||||
idx = GOT_SOMETHING_ELSE;
|
||||
idx = GOT_SOMETHING_ELSE;
|
||||
}
|
||||
return SUCCESS;
|
||||
return SUCCESS;
|
||||
}
|
||||
simdjson_really_inline error_code visit_array_start(json_iterator &) { return SUCCESS; }
|
||||
simdjson_really_inline error_code visit_array_end(json_iterator &) { return SUCCESS; }
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
#else // no __has_include
|
||||
// Please insure that linux headers have been installed.
|
||||
#include <asm/unistd.h> // for __NR_perf_event_open
|
||||
#endif
|
||||
#endif
|
||||
#include <linux/perf_event.h> // for perf event constants
|
||||
#include <sys/ioctl.h> // for ioctl
|
||||
#include <unistd.h> // for syscall
|
||||
|
|
|
@ -122,7 +122,7 @@ int main(int argc, char *argv[]) {
|
|||
BEST_TIME_NOCHECK(
|
||||
"despacing with std::minify", simdjson_stringme(p),, repeat, volume, !just_data);
|
||||
|
||||
|
||||
|
||||
memcpy(buffer, p.data(), p.size());
|
||||
size_t outlength;
|
||||
uint8_t *cbuffer = (uint8_t *)buffer;
|
||||
|
|
|
@ -192,14 +192,14 @@ bool bench(const char *filename, bool verbose, bool just_data,
|
|||
BEST_TIME("Boost.json", execute(sv), false, , repeat, volume, !just_data);
|
||||
}
|
||||
{
|
||||
|
||||
|
||||
auto execute = [&p]() -> bool {
|
||||
yyjson_doc *doc = yyjson_read(p.data(), p.size(), 0);
|
||||
bool is_ok = doc != nullptr;
|
||||
yyjson_doc_free(doc);
|
||||
return is_ok;
|
||||
};
|
||||
|
||||
|
||||
BEST_TIME("yyjson", execute(), true, , repeat, volume, !just_data);
|
||||
}
|
||||
#ifndef ALLPARSER
|
||||
|
|
|
@ -37,7 +37,7 @@ simdjson_really_inline bool Iter::Run(const padded_string &json) {
|
|||
|
||||
// Walk the document, parsing the tweets as we go
|
||||
|
||||
// { "statuses":
|
||||
// { "statuses":
|
||||
auto iter = parser.iterate_raw(json).value();
|
||||
if (!iter.start_object() || !iter.find_field_raw("statuses")) { return false; }
|
||||
// { "statuses": [
|
||||
|
|
|
@ -12,12 +12,12 @@ using namespace simdjson::builtin;
|
|||
|
||||
class OnDemand {
|
||||
public:
|
||||
OnDemand() {
|
||||
OnDemand() {
|
||||
if(!displayed_implementation) {
|
||||
std::cout << "On Demand implementation: " << builtin_implementation()->name() << std::endl;
|
||||
std::cout << "On Demand implementation: " << builtin_implementation()->name() << std::endl;
|
||||
displayed_implementation = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
simdjson_really_inline bool Run(const padded_string &json);
|
||||
simdjson_really_inline const std::vector<tweet> &Result() { return tweets; }
|
||||
simdjson_really_inline size_t ItemCount() { return tweets.size(); }
|
||||
|
|
|
@ -7,28 +7,28 @@
|
|||
// //////////////////////////////////////////////////////////////////////
|
||||
|
||||
/*
|
||||
The JsonCpp library's source code, including accompanying documentation,
|
||||
The JsonCpp library's source code, including accompanying documentation,
|
||||
tests and demonstration applications, are licensed under the following
|
||||
conditions...
|
||||
|
||||
Baptiste Lepilleur and The JsonCpp Authors explicitly disclaim copyright in all
|
||||
jurisdictions which recognize such a disclaimer. In such jurisdictions,
|
||||
Baptiste Lepilleur and The JsonCpp Authors explicitly disclaim copyright in all
|
||||
jurisdictions which recognize such a disclaimer. In such jurisdictions,
|
||||
this software is released into the Public Domain.
|
||||
|
||||
In jurisdictions which do not recognize Public Domain property (e.g. Germany as of
|
||||
2010), this software is Copyright (c) 2007-2010 by Baptiste Lepilleur and
|
||||
The JsonCpp Authors, and is released under the terms of the MIT License (see below).
|
||||
|
||||
In jurisdictions which recognize Public Domain property, the user of this
|
||||
software may choose to accept it either as 1) Public Domain, 2) under the
|
||||
conditions of the MIT License (see below), or 3) under the terms of dual
|
||||
In jurisdictions which recognize Public Domain property, the user of this
|
||||
software may choose to accept it either as 1) Public Domain, 2) under the
|
||||
conditions of the MIT License (see below), or 3) under the terms of dual
|
||||
Public Domain/MIT License conditions described here, as they choose.
|
||||
|
||||
The MIT License is about as close to Public Domain as a license can get, and is
|
||||
described in clear, concise terms at:
|
||||
|
||||
http://en.wikipedia.org/wiki/MIT_License
|
||||
|
||||
|
||||
The full text of the MIT License follows:
|
||||
|
||||
========================================================================
|
||||
|
|
|
@ -6,28 +6,28 @@
|
|||
// //////////////////////////////////////////////////////////////////////
|
||||
|
||||
/*
|
||||
The JsonCpp library's source code, including accompanying documentation,
|
||||
The JsonCpp library's source code, including accompanying documentation,
|
||||
tests and demonstration applications, are licensed under the following
|
||||
conditions...
|
||||
|
||||
Baptiste Lepilleur and The JsonCpp Authors explicitly disclaim copyright in all
|
||||
jurisdictions which recognize such a disclaimer. In such jurisdictions,
|
||||
Baptiste Lepilleur and The JsonCpp Authors explicitly disclaim copyright in all
|
||||
jurisdictions which recognize such a disclaimer. In such jurisdictions,
|
||||
this software is released into the Public Domain.
|
||||
|
||||
In jurisdictions which do not recognize Public Domain property (e.g. Germany as of
|
||||
2010), this software is Copyright (c) 2007-2010 by Baptiste Lepilleur and
|
||||
The JsonCpp Authors, and is released under the terms of the MIT License (see below).
|
||||
|
||||
In jurisdictions which recognize Public Domain property, the user of this
|
||||
software may choose to accept it either as 1) Public Domain, 2) under the
|
||||
conditions of the MIT License (see below), or 3) under the terms of dual
|
||||
In jurisdictions which recognize Public Domain property, the user of this
|
||||
software may choose to accept it either as 1) Public Domain, 2) under the
|
||||
conditions of the MIT License (see below), or 3) under the terms of dual
|
||||
Public Domain/MIT License conditions described here, as they choose.
|
||||
|
||||
The MIT License is about as close to Public Domain as a license can get, and is
|
||||
described in clear, concise terms at:
|
||||
|
||||
http://en.wikipedia.org/wiki/MIT_License
|
||||
|
||||
|
||||
The full text of the MIT License follows:
|
||||
|
||||
========================================================================
|
||||
|
|
|
@ -6,28 +6,28 @@
|
|||
// //////////////////////////////////////////////////////////////////////
|
||||
|
||||
/*
|
||||
The JsonCpp library's source code, including accompanying documentation,
|
||||
The JsonCpp library's source code, including accompanying documentation,
|
||||
tests and demonstration applications, are licensed under the following
|
||||
conditions...
|
||||
|
||||
Baptiste Lepilleur and The JsonCpp Authors explicitly disclaim copyright in all
|
||||
jurisdictions which recognize such a disclaimer. In such jurisdictions,
|
||||
Baptiste Lepilleur and The JsonCpp Authors explicitly disclaim copyright in all
|
||||
jurisdictions which recognize such a disclaimer. In such jurisdictions,
|
||||
this software is released into the Public Domain.
|
||||
|
||||
In jurisdictions which do not recognize Public Domain property (e.g. Germany as of
|
||||
2010), this software is Copyright (c) 2007-2010 by Baptiste Lepilleur and
|
||||
The JsonCpp Authors, and is released under the terms of the MIT License (see below).
|
||||
|
||||
In jurisdictions which recognize Public Domain property, the user of this
|
||||
software may choose to accept it either as 1) Public Domain, 2) under the
|
||||
conditions of the MIT License (see below), or 3) under the terms of dual
|
||||
In jurisdictions which recognize Public Domain property, the user of this
|
||||
software may choose to accept it either as 1) Public Domain, 2) under the
|
||||
conditions of the MIT License (see below), or 3) under the terms of dual
|
||||
Public Domain/MIT License conditions described here, as they choose.
|
||||
|
||||
The MIT License is about as close to Public Domain as a license can get, and is
|
||||
described in clear, concise terms at:
|
||||
|
||||
http://en.wikipedia.org/wiki/MIT_License
|
||||
|
||||
|
||||
The full text of the MIT License follows:
|
||||
|
||||
========================================================================
|
||||
|
|
|
@ -112,7 +112,7 @@ dom::element doc = parser.parse("[1,2,3]"_padded); // parse a string, the _padde
|
|||
```
|
||||
|
||||
The parsed document resulting from the `parser.load` and `parser.parse` calls depends on the `parser` instance. Thus the `parser` instance must remain in scope. Furthermore, you must have at most one parsed document in play per `parser` instance.
|
||||
You cannot copy a `parser` instance, you may only move it.
|
||||
You cannot copy a `parser` instance, you may only move it.
|
||||
|
||||
If you need to keep a document around long term, you can keep or move the parser instance. Note that moving a parser instance, or keeping one in a movable data structure like vector or map, can cause any outstanding `element`, `object` or `array` instances to be invalidated. If you need to store a parser in a movable data structure, you should use a `std::unique_ptr` to avoid this invalidation(e.g., `std::unique_ptr<dom::parser> parser(new dom::parser{})`).
|
||||
|
||||
|
|
|
@ -93,7 +93,7 @@ dom::element doc = parser.parse("[1,2,3]"_padded); // parse a string, the _padde
|
|||
```
|
||||
|
||||
The parsed document resulting from the `parser.load` and `parser.parse` calls depends on the `parser` instance. Thus the `parser` instance must remain in scope. Furthermore, you must have at most one parsed document in play per `parser` instance.
|
||||
You cannot copy a `parser` instance, you may only move it.
|
||||
You cannot copy a `parser` instance, you may only move it.
|
||||
|
||||
If you need to keep a document around long term, you can keep or move the parser instance. Note that moving a parser instance, or keeping one in a movable data structure like vector or map, can cause any outstanding `element`, `object` or `array` instances to be invalidated. If you need to store a parser in a movable data structure, you should use a `std::unique_ptr` to avoid this invalidation(e.g., `std::unique_ptr<dom::parser> parser(new dom::parser{})`).
|
||||
|
||||
|
|
18
doc/tape.md
18
doc/tape.md
|
@ -1,7 +1,7 @@
|
|||
|
||||
# Tape structure in simdjson
|
||||
# Tape structure in simdjson
|
||||
|
||||
We parse a JSON document to a tape. A tape is an array of 64-bit values. Each node encountered in the JSON document is written to the tape using one or more 64-bit tape elements; the layout of the tape is in "document order": elements are stored as they are encountered in the JSON document.
|
||||
We parse a JSON document to a tape. A tape is an array of 64-bit values. Each node encountered in the JSON document is written to the tape using one or more 64-bit tape elements; the layout of the tape is in "document order": elements are stored as they are encountered in the JSON document.
|
||||
|
||||
Throughout, little endian encoding is assumed. The tape is indexed starting at 0 (the first element is at index 0).
|
||||
|
||||
|
@ -70,7 +70,7 @@ The following is a dump of the content of the tape, with the first number of eac
|
|||
Most tape elements are written as `('c' << 56) + x` where `'c'` is some ASCII character determining the type of the element (out of 't', 'f', 'n', 'l', 'u', 'd', '"', '{', '}', '[', ']' ,'r') and where `x` is a 56-bit value called the payload. The payload is normally interpreted as an unsigned 56-bit integer. Note that 56-bit integers can be quite large.
|
||||
|
||||
|
||||
Performance consideration: We believe that accessing the tape in regular units of 64 bits is more important for performance than saving memory.
|
||||
Performance consideration: We believe that accessing the tape in regular units of 64 bits is more important for performance than saving memory.
|
||||
|
||||
## Simple JSON values
|
||||
|
||||
|
@ -91,7 +91,7 @@ Integer values are represented as two 64-bit tape elements:
|
|||
Float values are represented as two 64-bit tape elements:
|
||||
- The 64-bit value `('d' << 56)` followed by the 64-bit double value literally in standard IEEE 754 notation.
|
||||
|
||||
Performance consideration: We store numbers of the main tape because we believe that locality of reference is helpful for performance.
|
||||
Performance consideration: We store numbers of the main tape because we believe that locality of reference is helpful for performance.
|
||||
|
||||
## Root node
|
||||
|
||||
|
@ -109,20 +109,20 @@ Hint: We can read the first tape element to determine the length of the tape.
|
|||
|
||||
We prefix the string data itself by a 32-bit header to be interpreted as a 32-bit integer. It indicates the length of the string. The actual string data starts at an offset of 4 bytes.
|
||||
|
||||
We store string values using UTF-8 encoding with null termination on a separate tape. A string value is represented on the main tape as the 64-bit tape element `('"' << 56) + x` where the payload `x` is the location on the string tape of the null-terminated string.
|
||||
We store string values using UTF-8 encoding with null termination on a separate tape. A string value is represented on the main tape as the 64-bit tape element `('"' << 56) + x` where the payload `x` is the location on the string tape of the null-terminated string.
|
||||
|
||||
## Arrays
|
||||
## Arrays
|
||||
|
||||
JSON arrays are represented using two 64-bit tape elements.
|
||||
|
||||
- The first 64-bit tape element contains the value `('[' << 56) + x` where the payload `x` is 1 + the index of the second 64-bit tape element on the tape.
|
||||
- The first 64-bit tape element contains the value `('[' << 56) + x` where the payload `x` is 1 + the index of the second 64-bit tape element on the tape.
|
||||
- The second 64-bit tape element contains the value `(']' << 56) + x` where the payload `x` contains the index of the first 64-bit tape element on the tape.
|
||||
|
||||
All the content of the array is located between these two tape elements, including arrays and objects.
|
||||
|
||||
Performance consideration: We can skip the content of an array entirely by accessing the first 64-bit tape element, reading the payload and moving to the corresponding index on the tape.
|
||||
|
||||
## Objects
|
||||
## Objects
|
||||
|
||||
JSON objects are represented using two 64-bit tape elements.
|
||||
|
||||
|
@ -131,6 +131,6 @@ JSON objects are represented using two 64-bit tape elements.
|
|||
|
||||
In-between these two tape elements, we alternate between key (which must be strings) and values. A value could be an object or an array.
|
||||
|
||||
All the content of the object is located between these two tape elements, including arrays and objects.
|
||||
All the content of the object is located between these two tape elements, including arrays and objects.
|
||||
|
||||
Performance consideration: We can skip the content of an object entirely by accessing the first 64-bit tape element, reading the payload and moving to the corresponding index on the tape.
|
||||
|
|
|
@ -31,12 +31,12 @@ variant=replay
|
|||
if [ ! -d build-$variant ] ; then
|
||||
mkdir build-$variant
|
||||
cd build-$variant
|
||||
|
||||
|
||||
cmake .. \
|
||||
$COMMON \
|
||||
-DCMAKE_BUILD_TYPE=Debug \
|
||||
-DSIMDJSON_FUZZ_LINKMAIN=On
|
||||
|
||||
|
||||
ninja all_fuzzers
|
||||
cd ..
|
||||
fi
|
||||
|
@ -58,7 +58,7 @@ fi
|
|||
variant=sanitizers-O3
|
||||
|
||||
if [ ! -d build-$variant ] ; then
|
||||
|
||||
|
||||
mkdir build-$variant
|
||||
cd build-$variant
|
||||
cmake .. \
|
||||
|
@ -68,7 +68,7 @@ variant=sanitizers-O3
|
|||
-DCMAKE_BUILD_TYPE=Debug \
|
||||
-DSIMDJSON_FUZZ_LINKMAIN=Off \
|
||||
-DSIMDJSON_FUZZ_LDFLAGS="-fsanitize=fuzzer"
|
||||
|
||||
|
||||
ninja all_fuzzers
|
||||
cd ..
|
||||
fi
|
||||
|
@ -76,7 +76,7 @@ variant=sanitizers-O3
|
|||
variant=sanitizers-O0
|
||||
|
||||
if [ ! -d build-$variant ] ; then
|
||||
|
||||
|
||||
mkdir build-$variant
|
||||
cd build-$variant
|
||||
cmake .. \
|
||||
|
@ -86,7 +86,7 @@ variant=sanitizers-O0
|
|||
-DCMAKE_BUILD_TYPE=Debug \
|
||||
-DSIMDJSON_FUZZ_LINKMAIN=Off \
|
||||
-DSIMDJSON_FUZZ_LDFLAGS="-fsanitize=fuzzer"
|
||||
|
||||
|
||||
ninja all_fuzzers
|
||||
cd ..
|
||||
fi
|
||||
|
@ -95,10 +95,10 @@ variant=sanitizers-O0
|
|||
# A fast fuzzer, for fast exploration rather than finding bugs.
|
||||
variant=fast
|
||||
if [ ! -d build-$variant ] ; then
|
||||
|
||||
|
||||
mkdir build-$variant
|
||||
cd build-$variant
|
||||
|
||||
|
||||
cmake .. \
|
||||
$COMMON \
|
||||
-DCMAKE_CXX_FLAGS="-fsanitize=fuzzer-no-link" \
|
||||
|
@ -106,7 +106,7 @@ variant=fast
|
|||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DSIMDJSON_FUZZ_LINKMAIN=Off \
|
||||
-DSIMDJSON_FUZZ_LDFLAGS="-fsanitize=fuzzer"
|
||||
|
||||
|
||||
ninja all_fuzzers
|
||||
cd ..
|
||||
fi
|
||||
|
|
|
@ -73,7 +73,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
|
|||
// or what hardware it runs on
|
||||
constexpr std::size_t Nimplementations_max=3;
|
||||
const std::size_t Nimplementations = supported_implementations.size();
|
||||
|
||||
|
||||
if(Nimplementations>Nimplementations_max) {
|
||||
//there is another backend added, please bump Nimplementations_max!
|
||||
std::abort();
|
||||
|
|
|
@ -12,7 +12,7 @@ NO_SANITIZE_UNDEFINED
|
|||
simdjson_really_inline int trailing_zeroes(uint64_t input_num) {
|
||||
#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
|
||||
unsigned long ret;
|
||||
// Search the mask data from least significant bit (LSB)
|
||||
// Search the mask data from least significant bit (LSB)
|
||||
// to the most significant bit (MSB) for a set bit (1).
|
||||
_BitScanForward64(&ret, input_num);
|
||||
return (int)ret;
|
||||
|
@ -30,7 +30,7 @@ simdjson_really_inline uint64_t clear_lowest_bit(uint64_t input_num) {
|
|||
simdjson_really_inline int leading_zeroes(uint64_t input_num) {
|
||||
#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
|
||||
unsigned long leading_zero = 0;
|
||||
// Search the mask data from most significant bit (MSB)
|
||||
// Search the mask data from most significant bit (MSB)
|
||||
// to least significant bit (LSB) for a set bit (1).
|
||||
if (_BitScanReverse64(&leading_zero, input_num))
|
||||
return (int)(63 - leading_zero);
|
||||
|
|
|
@ -13,15 +13,15 @@ namespace {
|
|||
simdjson_really_inline uint64_t prefix_xor(uint64_t bitmask) {
|
||||
/////////////
|
||||
// We could do this with PMULL, but it is apparently slow.
|
||||
//
|
||||
//
|
||||
//#ifdef __ARM_FEATURE_CRYPTO // some ARM processors lack this extension
|
||||
//return vmull_p64(-1ULL, bitmask);
|
||||
//#else
|
||||
// Analysis by @sebpop:
|
||||
// When diffing the assembly for src/stage1_find_marks.cpp I see that the eors are all spread out
|
||||
// in between other vector code, so effectively the extra cycles of the sequence do not matter
|
||||
// in between other vector code, so effectively the extra cycles of the sequence do not matter
|
||||
// because the GPR units are idle otherwise and the critical path is on the FP side.
|
||||
// Also the PMULL requires two extra fmovs: GPR->FP (3 cycles in N1, 5 cycles in A72 )
|
||||
// Also the PMULL requires two extra fmovs: GPR->FP (3 cycles in N1, 5 cycles in A72 )
|
||||
// and FP->GPR (2 cycles on N1 and 5 cycles on A72.)
|
||||
///////////
|
||||
bitmask ^= bitmask << 1;
|
||||
|
|
|
@ -423,7 +423,7 @@ simdjson_really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x
|
|||
simd8x64(const simd8x64<T>& o) = delete; // no copy allowed
|
||||
simd8x64<T>& operator=(const simd8<T> other) = delete; // no assignment allowed
|
||||
simd8x64() = delete; // no default constructor allowed
|
||||
|
||||
|
||||
simdjson_really_inline simd8x64(const simd8<T> chunk0, const simd8<T> chunk1, const simd8<T> chunk2, const simd8<T> chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {}
|
||||
simdjson_really_inline simd8x64(const T ptr[64]) : chunks{simd8<T>::load(ptr), simd8<T>::load(ptr+16), simd8<T>::load(ptr+32), simd8<T>::load(ptr+48)} {}
|
||||
|
||||
|
|
|
@ -106,7 +106,7 @@ public:
|
|||
/**
|
||||
* Get the value at the given index. This function has linear-time complexity and
|
||||
* is equivalent to the following:
|
||||
*
|
||||
*
|
||||
* size_t i=0;
|
||||
* for (auto element : *this) {
|
||||
* if (i == index) { return element; }
|
||||
|
@ -115,7 +115,7 @@ public:
|
|||
* return INDEX_OUT_OF_BOUNDS;
|
||||
*
|
||||
* Avoid calling the at() function repeatedly.
|
||||
*
|
||||
*
|
||||
* @return The value at the given index, or:
|
||||
* - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length
|
||||
*/
|
||||
|
|
|
@ -94,7 +94,7 @@ simdjson_really_inline document_stream::document_stream() noexcept
|
|||
error{UNINITIALIZED}
|
||||
#ifdef SIMDJSON_THREADS_ENABLED
|
||||
, use_thread(false)
|
||||
#endif
|
||||
#endif
|
||||
{
|
||||
}
|
||||
|
||||
|
|
|
@ -22,13 +22,13 @@ struct stage1_worker {
|
|||
stage1_worker(stage1_worker&&) = delete;
|
||||
stage1_worker operator=(const stage1_worker&) = delete;
|
||||
~stage1_worker();
|
||||
/**
|
||||
/**
|
||||
* We only start the thread when it is needed, not at object construction, this may throw.
|
||||
* You should only call this once.
|
||||
* You should only call this once.
|
||||
**/
|
||||
void start_thread();
|
||||
/**
|
||||
* Start a stage 1 job. You should first call 'run', then 'finish'.
|
||||
/**
|
||||
* Start a stage 1 job. You should first call 'run', then 'finish'.
|
||||
* You must call start_thread once before.
|
||||
*/
|
||||
void run(document_stream * ds, dom::parser * stage1, size_t next_batch_start);
|
||||
|
@ -37,10 +37,10 @@ struct stage1_worker {
|
|||
|
||||
private:
|
||||
|
||||
/**
|
||||
/**
|
||||
* Normally, we would never stop the thread. But we do in the destructor.
|
||||
* This function is only safe assuming that you are not waiting for results. You
|
||||
* should have called run, then finish, and be done.
|
||||
* This function is only safe assuming that you are not waiting for results. You
|
||||
* should have called run, then finish, and be done.
|
||||
**/
|
||||
void stop_thread();
|
||||
|
||||
|
@ -49,8 +49,8 @@ private:
|
|||
dom::parser * stage1_thread_parser{};
|
||||
size_t _next_batch_start{};
|
||||
document_stream * owner{};
|
||||
/**
|
||||
* We have two state variables. This could be streamlined to one variable in the future but
|
||||
/**
|
||||
* We have two state variables. This could be streamlined to one variable in the future but
|
||||
* we use two for clarity.
|
||||
*/
|
||||
bool has_work{false};
|
||||
|
@ -108,7 +108,7 @@ public:
|
|||
simdjson_really_inline bool operator!=(const iterator &other) const noexcept;
|
||||
/**
|
||||
* @private
|
||||
*
|
||||
*
|
||||
* Gives the current index in the input document in bytes.
|
||||
*
|
||||
* document_stream stream = parser.parse_many(json,window);
|
||||
|
@ -116,15 +116,15 @@ public:
|
|||
* auto doc = *i;
|
||||
* size_t index = i.current_index();
|
||||
* }
|
||||
*
|
||||
*
|
||||
* This function (current_index()) is experimental and the usage
|
||||
* may change in future versions of simdjson: we find the API somewhat
|
||||
* awkward and we would like to offer something friendlier.
|
||||
* awkward and we would like to offer something friendlier.
|
||||
*/
|
||||
simdjson_really_inline size_t current_index() const noexcept;
|
||||
/**
|
||||
* @private
|
||||
*
|
||||
*
|
||||
* Gives a view of the current document.
|
||||
*
|
||||
* document_stream stream = parser.parse_many(json,window);
|
||||
|
@ -132,14 +132,14 @@ public:
|
|||
* auto doc = *i;
|
||||
* std::string_view v = i->source();
|
||||
* }
|
||||
*
|
||||
*
|
||||
* The returned string_view instance is simply a map to the (unparsed)
|
||||
* source string: it may thus include white-space characters and all manner
|
||||
* of padding.
|
||||
*
|
||||
*
|
||||
* This function (source()) is experimental and the usage
|
||||
* may change in future versions of simdjson: we find the API somewhat
|
||||
* awkward and we would like to offer something friendlier.
|
||||
* awkward and we would like to offer something friendlier.
|
||||
*/
|
||||
simdjson_really_inline std::string_view source() const noexcept;
|
||||
|
||||
|
@ -169,7 +169,7 @@ private:
|
|||
/**
|
||||
* Construct a document_stream. Does not allocate or parse anything until the iterator is
|
||||
* used.
|
||||
*
|
||||
*
|
||||
* @param parser is a reference to the parser instance used to generate this document_stream
|
||||
* @param buf is the raw byte buffer we need to process
|
||||
* @param len is the length of the raw byte buffer in bytes
|
||||
|
@ -237,7 +237,7 @@ private:
|
|||
|
||||
#ifdef SIMDJSON_THREADS_ENABLED
|
||||
/** Indicates whether we use threads. Note that this needs to be a constant during the execution of the parsing. */
|
||||
bool use_thread;
|
||||
bool use_thread;
|
||||
|
||||
inline void load_from_stage1_thread() noexcept;
|
||||
|
||||
|
|
|
@ -64,14 +64,14 @@ public:
|
|||
*/
|
||||
inline simdjson_result<object> get_object() const noexcept;
|
||||
/**
|
||||
* Cast this element to a null-terminated C string.
|
||||
*
|
||||
* Cast this element to a null-terminated C string.
|
||||
*
|
||||
* The string is guaranteed to be valid UTF-8.
|
||||
*
|
||||
* The get_c_str() function is equivalent to get<const char *>().
|
||||
*
|
||||
*
|
||||
* The length of the string is given by get_string_length(). Because JSON strings
|
||||
* may contain null characters, it may be incorrect to use strlen to determine the
|
||||
* may contain null characters, it may be incorrect to use strlen to determine the
|
||||
* string length.
|
||||
*
|
||||
* It is possible to get a single string_view instance which represents both the string
|
||||
|
@ -84,7 +84,7 @@ public:
|
|||
inline simdjson_result<const char *> get_c_str() const noexcept;
|
||||
/**
|
||||
* Gives the length in bytes of the string.
|
||||
*
|
||||
*
|
||||
* It is possible to get a single string_view instance which represents both the string
|
||||
* content and its length: see get_string().
|
||||
*
|
||||
|
@ -93,8 +93,8 @@ public:
|
|||
*/
|
||||
inline simdjson_result<size_t> get_string_length() const noexcept;
|
||||
/**
|
||||
* Cast this element to a string.
|
||||
*
|
||||
* Cast this element to a string.
|
||||
*
|
||||
* The string is guaranteed to be valid UTF-8.
|
||||
*
|
||||
* Equivalent to get<std::string_view>().
|
||||
|
@ -279,7 +279,7 @@ public:
|
|||
|
||||
/**
|
||||
* Read this element as a null-terminated UTF-8 string.
|
||||
*
|
||||
*
|
||||
* Be mindful that JSON allows strings to contain null characters.
|
||||
*
|
||||
* Does *not* convert other types to a string; requires that the JSON type of the element was
|
||||
|
@ -402,7 +402,7 @@ public:
|
|||
* dom::parser parser;
|
||||
* object obj = parser.parse(R"({ "": { "a": [ 10, 20, 30 ] }})"_padded);
|
||||
* obj.at_pointer("//a/1") == 20
|
||||
*
|
||||
*
|
||||
* @return The value associated with the given JSON pointer, or:
|
||||
* - NO_SUCH_FIELD if a field does not exist in an object
|
||||
* - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length
|
||||
|
@ -411,21 +411,21 @@ public:
|
|||
*/
|
||||
inline simdjson_result<element> at_pointer(const std::string_view json_pointer) const noexcept;
|
||||
|
||||
#ifndef SIMDJSON_DISABLE_DEPRECATED_API
|
||||
#ifndef SIMDJSON_DISABLE_DEPRECATED_API
|
||||
/**
|
||||
*
|
||||
*
|
||||
* Version 0.4 of simdjson used an incorrect interpretation of the JSON Pointer standard
|
||||
* and allowed the following :
|
||||
*
|
||||
*
|
||||
* dom::parser parser;
|
||||
* element doc = parser.parse(R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded);
|
||||
* doc.at("foo/a/1") == 20
|
||||
*
|
||||
*
|
||||
* Though it is intuitive, it is not compliant with RFC 6901
|
||||
* https://tools.ietf.org/html/rfc6901
|
||||
*
|
||||
* https://tools.ietf.org/html/rfc6901
|
||||
*
|
||||
* For standard compliance, use the at_pointer function instead.
|
||||
*
|
||||
*
|
||||
* @return The value associated with the given JSON pointer, or:
|
||||
* - NO_SUCH_FIELD if a field does not exist in an object
|
||||
* - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length
|
||||
|
|
|
@ -29,14 +29,14 @@ static constexpr size_t DEFAULT_BATCH_SIZE = 1000000;
|
|||
* as well as memory for a single document. The parsed document is overwritten on each parse.
|
||||
*
|
||||
* This class cannot be copied, only moved, to avoid unintended allocations.
|
||||
*
|
||||
* @note Moving a parser instance may invalidate "dom::element" instances. If you need to
|
||||
*
|
||||
* @note Moving a parser instance may invalidate "dom::element" instances. If you need to
|
||||
* preserve both the "dom::element" instances and the parser, consider wrapping the parser
|
||||
* instance in a std::unique_ptr instance:
|
||||
*
|
||||
*
|
||||
* std::unique_ptr<dom::parser> parser(new dom::parser{});
|
||||
* auto error = parser->load(f).get(root);
|
||||
*
|
||||
*
|
||||
* You can then move std::unique_ptr safely.
|
||||
*
|
||||
* @note This is not thread safe: one parser cannot produce two documents at the same time!
|
||||
|
@ -78,10 +78,10 @@ public:
|
|||
*
|
||||
* dom::parser parser;
|
||||
* const element doc = parser.load("jsonexamples/twitter.json");
|
||||
*
|
||||
*
|
||||
* The function is eager: the file's content is loaded in memory inside the parser instance
|
||||
* and immediately parsed. The file can be deleted after the `parser.load` call.
|
||||
*
|
||||
*
|
||||
* ### IMPORTANT: Document Lifetime
|
||||
*
|
||||
* The JSON document still lives in the parser: this is the most efficient way to parse JSON
|
||||
|
@ -90,8 +90,8 @@ public:
|
|||
*
|
||||
* Moving the parser instance is safe, but it invalidates the element instances. You may store
|
||||
* the parser instance without moving it by wrapping it inside an `unique_ptr` instance like
|
||||
* so: `std::unique_ptr<dom::parser> parser(new dom::parser{});`.
|
||||
*
|
||||
* so: `std::unique_ptr<dom::parser> parser(new dom::parser{});`.
|
||||
*
|
||||
* ### Parser Capacity
|
||||
*
|
||||
* If the parser's current capacity is less than the file length, it will allocate enough capacity
|
||||
|
@ -112,7 +112,7 @@ public:
|
|||
*
|
||||
* dom::parser parser;
|
||||
* element doc = parser.parse(buf, len);
|
||||
*
|
||||
*
|
||||
* The function eagerly parses the input: the input can be modified and discarded after
|
||||
* the `parser.parse(buf, len)` call has completed.
|
||||
*
|
||||
|
@ -121,10 +121,10 @@ public:
|
|||
* The JSON document still lives in the parser: this is the most efficient way to parse JSON
|
||||
* documents because it reuses the same buffers, but you *must* use the document before you
|
||||
* destroy the parser or call parse() again.
|
||||
*
|
||||
*
|
||||
* Moving the parser instance is safe, but it invalidates the element instances. You may store
|
||||
* the parser instance without moving it by wrapping it inside an `unique_ptr` instance like
|
||||
* so: `std::unique_ptr<dom::parser> parser(new dom::parser{});`.
|
||||
* so: `std::unique_ptr<dom::parser> parser(new dom::parser{});`.
|
||||
*
|
||||
* ### REQUIRED: Buffer Padding
|
||||
*
|
||||
|
@ -132,22 +132,22 @@ public:
|
|||
* those bytes are initialized to, as long as they are allocated.
|
||||
*
|
||||
* If realloc_if_needed is true (the default), it is assumed that the buffer does *not* have enough padding,
|
||||
* and it is copied into an enlarged temporary buffer before parsing. Thus the following is safe:
|
||||
*
|
||||
* and it is copied into an enlarged temporary buffer before parsing. Thus the following is safe:
|
||||
*
|
||||
* const char *json = R"({"key":"value"})";
|
||||
* const size_t json_len = std::strlen(json);
|
||||
* simdjson::dom::parser parser;
|
||||
* simdjson::dom::element element = parser.parse(json, json_len);
|
||||
*
|
||||
* If you set realloc_if_needed to false (e.g., parser.parse(json, json_len, false)),
|
||||
*
|
||||
* If you set realloc_if_needed to false (e.g., parser.parse(json, json_len, false)),
|
||||
* you must provide a buffer with at least SIMDJSON_PADDING extra bytes at the end.
|
||||
* The benefit of setting realloc_if_needed to false is that you avoid a temporary
|
||||
* memory allocation and a copy.
|
||||
*
|
||||
*
|
||||
* The padded bytes may be read. It is not important how you initialize
|
||||
* these bytes though we recommend a sensible default like null character values or spaces.
|
||||
* For example, the following low-level code is safe:
|
||||
*
|
||||
*
|
||||
* const char *json = R"({"key":"value"})";
|
||||
* const size_t json_len = std::strlen(json);
|
||||
* std::unique_ptr<char[]> padded_json_copy{new char[json_len + SIMDJSON_PADDING]};
|
||||
|
@ -197,11 +197,11 @@ public:
|
|||
*
|
||||
* The file is loaded in memory and can be safely deleted after the `parser.load_many(path)`
|
||||
* function has returned. The memory is held by the `parser` instance.
|
||||
*
|
||||
*
|
||||
* The function is lazy: it may be that no more than one JSON document at a time is parsed.
|
||||
* And, possibly, no document many have been parsed when the `parser.load_many(path)` function
|
||||
* returned.
|
||||
*
|
||||
*
|
||||
* ### Format
|
||||
*
|
||||
* The file must contain a series of one or more JSON documents, concatenated into a single
|
||||
|
@ -212,7 +212,7 @@ public:
|
|||
* Documents that consist of an object or array may omit the whitespace between them, concatenating
|
||||
* with no separator. documents that consist of a single primitive (i.e. documents that are not
|
||||
* arrays or objects) MUST be separated with whitespace.
|
||||
*
|
||||
*
|
||||
* The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse.
|
||||
* Setting batch_size to excessively large or excesively small values may impact negatively the
|
||||
* performance.
|
||||
|
@ -245,7 +245,7 @@ public:
|
|||
* If the parser's current capacity is less than batch_size, it will allocate enough capacity
|
||||
* to handle it (up to max_capacity).
|
||||
*
|
||||
* @param path File name pointing at the concatenated JSON to parse.
|
||||
* @param path File name pointing at the concatenated JSON to parse.
|
||||
* @param batch_size The batch size to use. MUST be larger than the largest document. The sweet
|
||||
* spot is cache-related: small enough to fit in cache, yet big enough to
|
||||
* parse as many documents as possible in one tight loop.
|
||||
|
@ -272,25 +272,25 @@ public:
|
|||
* The function is lazy: it may be that no more than one JSON document at a time is parsed.
|
||||
* And, possibly, no document many have been parsed when the `parser.load_many(path)` function
|
||||
* returned.
|
||||
*
|
||||
*
|
||||
* The caller is responsabile to ensure that the input string data remains unchanged and is
|
||||
* not deleted during the loop. In particular, the following is unsafe and will not compile:
|
||||
*
|
||||
*
|
||||
* auto docs = parser.parse_many("[\"temporary data\"]"_padded);
|
||||
* // here the string "[\"temporary data\"]" may no longer exist in memory
|
||||
* // the parser instance may not have even accessed the input yet
|
||||
* for (element doc : docs) {
|
||||
* cout << std::string(doc["title"]) << endl;
|
||||
* }
|
||||
*
|
||||
* The following is safe:
|
||||
*
|
||||
*
|
||||
* The following is safe:
|
||||
*
|
||||
* auto json = "[\"temporary data\"]"_padded;
|
||||
* auto docs = parser.parse_many(json);
|
||||
* for (element doc : docs) {
|
||||
* cout << std::string(doc["title"]) << endl;
|
||||
* }
|
||||
*
|
||||
*
|
||||
* ### Format
|
||||
*
|
||||
* The buffer must contain a series of one or more JSON documents, concatenated into a single
|
||||
|
@ -301,7 +301,7 @@ public:
|
|||
* documents that consist of an object or array may omit the whitespace between them, concatenating
|
||||
* with no separator. documents that consist of a single primitive (i.e. documents that are not
|
||||
* arrays or objects) MUST be separated with whitespace.
|
||||
*
|
||||
*
|
||||
* The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse.
|
||||
* Setting batch_size to excessively large or excesively small values may impact negatively the
|
||||
* performance.
|
||||
|
@ -360,7 +360,7 @@ public:
|
|||
/** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */
|
||||
inline simdjson_result<document_stream> parse_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept;
|
||||
inline simdjson_result<document_stream> parse_many(const padded_string &&s, size_t batch_size) = delete;// unsafe
|
||||
|
||||
|
||||
/** @private We do not want to allow implicit conversion from C string to std::string. */
|
||||
simdjson_result<document_stream> parse_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete;
|
||||
|
||||
|
|
|
@ -122,7 +122,7 @@ simdjson_really_inline void mini_formatter::number(int64_t x) {
|
|||
simdjson_really_inline void mini_formatter::number(double x) {
|
||||
char number_buffer[24];
|
||||
// Currently, passing the nullptr to the second argument is
|
||||
// safe because our implementation does not check the second
|
||||
// safe because our implementation does not check the second
|
||||
// argument.
|
||||
char *newp = internal::to_chars(number_buffer, nullptr, x);
|
||||
buffer.insert(buffer.end(), number_buffer, newp);
|
||||
|
@ -135,7 +135,7 @@ simdjson_really_inline void mini_formatter::end_object() { one_char('}'); }
|
|||
simdjson_really_inline void mini_formatter::comma() { one_char(','); }
|
||||
|
||||
|
||||
simdjson_really_inline void mini_formatter::true_atom() {
|
||||
simdjson_really_inline void mini_formatter::true_atom() {
|
||||
const char * s = "true";
|
||||
buffer.insert(buffer.end(), s, s + 4);
|
||||
}
|
||||
|
@ -157,29 +157,29 @@ simdjson_really_inline void mini_formatter::string(std::string_view unescaped) {
|
|||
size_t i = 0;
|
||||
// Fast path for the case where we have no control character, no ", and no backslash.
|
||||
// This should include most keys.
|
||||
constexpr static bool needs_escaping[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
|
||||
constexpr static bool needs_escaping[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||
for(;i + 8 <= unescaped.length(); i += 8) {
|
||||
for(;i + 8 <= unescaped.length(); i += 8) {
|
||||
// Poor's man vectorization. This could get much faster if we used SIMD.
|
||||
if(needs_escaping[uint8_t(unescaped[i])] | needs_escaping[uint8_t(unescaped[i+1])]
|
||||
if(needs_escaping[uint8_t(unescaped[i])] | needs_escaping[uint8_t(unescaped[i+1])]
|
||||
| needs_escaping[uint8_t(unescaped[i+2])] | needs_escaping[uint8_t(unescaped[i+3])]
|
||||
| needs_escaping[uint8_t(unescaped[i+4])] | needs_escaping[uint8_t(unescaped[i+5])]
|
||||
| needs_escaping[uint8_t(unescaped[i+4])] | needs_escaping[uint8_t(unescaped[i+5])]
|
||||
| needs_escaping[uint8_t(unescaped[i+6])] | needs_escaping[uint8_t(unescaped[i+7])]
|
||||
) { break; }
|
||||
}
|
||||
for(;i < unescaped.length(); i++) {
|
||||
for(;i < unescaped.length(); i++) {
|
||||
if(needs_escaping[uint8_t(unescaped[i])]) { break; }
|
||||
}
|
||||
// The following is also possible and omits a 256-byte table, but it is slower:
|
||||
// for (; (i < unescaped.length()) && (uint8_t(unescaped[i]) > 0x1F)
|
||||
// for (; (i < unescaped.length()) && (uint8_t(unescaped[i]) > 0x1F)
|
||||
// && (unescaped[i] != '\"') && (unescaped[i] != '\\'); i++) {}
|
||||
|
||||
// At least for long strings, the following should be fast. We could
|
||||
|
|
|
@ -14,7 +14,7 @@ namespace simdjson {
|
|||
|
||||
/**
|
||||
* The string_builder template and mini_formatter class
|
||||
* are not part of our public API and are subject to change
|
||||
* are not part of our public API and are subject to change
|
||||
* at any time!
|
||||
*/
|
||||
namespace internal {
|
||||
|
@ -28,7 +28,7 @@ class mini_formatter;
|
|||
* the string_builder template could support both minification
|
||||
* and prettification, and various other tradeoffs.
|
||||
*/
|
||||
template <class formatter = mini_formatter>
|
||||
template <class formatter = mini_formatter>
|
||||
class string_builder {
|
||||
public:
|
||||
/** Construct an initially empty builder, would print the empty string **/
|
||||
|
@ -41,12 +41,12 @@ public:
|
|||
inline void append(simdjson::dom::object value);
|
||||
/** Reset the builder (so that it would print the empty string) **/
|
||||
simdjson_really_inline void clear();
|
||||
/**
|
||||
/**
|
||||
* Get access to the string. The string_view is owned by the builder
|
||||
* and it is invalid to use it after the string_builder has been
|
||||
* and it is invalid to use it after the string_builder has been
|
||||
* destroyed.
|
||||
* However you can make a copy of the string_view on memory that you
|
||||
* own.
|
||||
* own.
|
||||
*/
|
||||
simdjson_really_inline std::string_view str() const;
|
||||
/** Append a key_value_pair to the builder (to be printed) **/
|
||||
|
@ -91,9 +91,9 @@ public:
|
|||
simdjson_really_inline void string(std::string_view unescaped);
|
||||
/** Clears out the content. **/
|
||||
simdjson_really_inline void clear();
|
||||
/**
|
||||
/**
|
||||
* Get access to the buffer, it is own by the instance, but
|
||||
* the user can make a copy.
|
||||
* the user can make a copy.
|
||||
**/
|
||||
simdjson_really_inline std::string_view str() const;
|
||||
|
||||
|
@ -116,13 +116,13 @@ namespace dom {
|
|||
* @param value The element.
|
||||
* @throw if there is an error with the underlying output stream. simdjson itself will not throw.
|
||||
*/
|
||||
inline std::ostream& operator<<(std::ostream& out, simdjson::dom::element value) {
|
||||
inline std::ostream& operator<<(std::ostream& out, simdjson::dom::element value) {
|
||||
simdjson::internal::string_builder<> sb;
|
||||
sb.append(value);
|
||||
return (out << sb.str());
|
||||
}
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<simdjson::dom::element> x) {
|
||||
inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<simdjson::dom::element> x) {
|
||||
if (x.error()) { throw simdjson::simdjson_error(x.error()); }
|
||||
return (out << x.value());
|
||||
}
|
||||
|
@ -134,13 +134,13 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<sim
|
|||
* @param value The array.
|
||||
* @throw if there is an error with the underlying output stream. simdjson itself will not throw.
|
||||
*/
|
||||
inline std::ostream& operator<<(std::ostream& out, simdjson::dom::array value) {
|
||||
inline std::ostream& operator<<(std::ostream& out, simdjson::dom::array value) {
|
||||
simdjson::internal::string_builder<> sb;
|
||||
sb.append(value);
|
||||
return (out << sb.str());
|
||||
}
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<simdjson::dom::array> x) {
|
||||
inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<simdjson::dom::array> x) {
|
||||
if (x.error()) { throw simdjson::simdjson_error(x.error()); }
|
||||
return (out << x.value());
|
||||
}
|
||||
|
@ -152,17 +152,17 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<sim
|
|||
* @param value The objet.
|
||||
* @throw if there is an error with the underlying output stream. simdjson itself will not throw.
|
||||
*/
|
||||
inline std::ostream& operator<<(std::ostream& out, simdjson::dom::object value) {
|
||||
inline std::ostream& operator<<(std::ostream& out, simdjson::dom::object value) {
|
||||
simdjson::internal::string_builder<> sb;
|
||||
sb.append(value);
|
||||
return (out << sb.str());
|
||||
}
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<simdjson::dom::object> x) {
|
||||
inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<simdjson::dom::object> x) {
|
||||
if (x.error()) { throw simdjson::simdjson_error(x.error()); }
|
||||
return (out << x.value());
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
} // namespace dom
|
||||
|
||||
/**
|
||||
|
@ -173,10 +173,10 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<si
|
|||
* cout << to_string(doc) << endl; // prints [1,2,3]
|
||||
*
|
||||
*/
|
||||
template <class T>
|
||||
template <class T>
|
||||
std::string to_string(T x) {
|
||||
// in C++, to_string is standard: http://www.cplusplus.com/reference/string/to_string/
|
||||
// Currently minify and to_string are identical but in the future, they may
|
||||
// Currently minify and to_string are identical but in the future, they may
|
||||
// differ.
|
||||
simdjson::internal::string_builder<> sb;
|
||||
sb.append(x);
|
||||
|
@ -184,12 +184,12 @@ std::string to_string(T x) {
|
|||
return std::string(answer.data(), answer.size());
|
||||
}
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
template <class T>
|
||||
template <class T>
|
||||
std::string to_string(simdjson_result<T> x) {
|
||||
if (x.error()) { throw simdjson_error(x.error()); }
|
||||
return to_string(x.value());
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Minifies a JSON element or document, printing the smallest possible valid JSON.
|
||||
|
@ -199,18 +199,18 @@ std::string to_string(simdjson_result<T> x) {
|
|||
* cout << minify(doc) << endl; // prints [1,2,3]
|
||||
*
|
||||
*/
|
||||
template <class T>
|
||||
template <class T>
|
||||
std::string minify(T x) {
|
||||
return to_string(x);
|
||||
}
|
||||
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
template <class T>
|
||||
template <class T>
|
||||
std::string minify(simdjson_result<T> x) {
|
||||
if (x.error()) { throw simdjson_error(x.error()); }
|
||||
return to_string(x.value());
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
} // namespace simdjson
|
||||
|
|
|
@ -29,7 +29,7 @@ static unsigned char _BitScanReverse64(unsigned long* ret, uint64_t x) {
|
|||
simdjson_really_inline int leading_zeroes(uint64_t input_num) {
|
||||
#ifdef _MSC_VER
|
||||
unsigned long leading_zero = 0;
|
||||
// Search the mask data from most significant bit (MSB)
|
||||
// Search the mask data from most significant bit (MSB)
|
||||
// to least significant bit (LSB) for a set bit (1).
|
||||
if (_BitScanReverse64(&leading_zero, input_num))
|
||||
return (int)(63 - leading_zero);
|
||||
|
|
|
@ -25,7 +25,7 @@ namespace numberparsing {
|
|||
namespace {
|
||||
// Convert a mantissa, an exponent and a sign bit into an ieee64 double.
|
||||
// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable).
|
||||
// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed.
|
||||
// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed.
|
||||
simdjson_really_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) {
|
||||
double d;
|
||||
mantissa &= ~(1ULL << 52);
|
||||
|
@ -149,7 +149,7 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg
|
|||
// We want the most significant 64 bits of the product. We know
|
||||
// this will be non-zero because the most significant bit of i is
|
||||
// 1.
|
||||
const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power);
|
||||
const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power);
|
||||
// Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.)
|
||||
//
|
||||
// The full_multiplication function computes the 128-bit product of two 64-bit words
|
||||
|
@ -158,7 +158,7 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg
|
|||
// to the 64-bit most significant bits of the product.
|
||||
simdjson::internal::value128 firstproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index]);
|
||||
// Both i and power_of_five_128[index] have their most significant bit set to 1 which
|
||||
// implies that the either the most or the second most significant bit of the product
|
||||
// implies that the either the most or the second most significant bit of the product
|
||||
// is 1. We pack values in this manner for efficiency reasons: it maximizes the use
|
||||
// we make of the product. It also makes it easy to reason aboutthe product: there
|
||||
// 0 or 1 leading zero in the product.
|
||||
|
@ -173,17 +173,17 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg
|
|||
// Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing
|
||||
// the full computation is wasteful. So we do what is called a "truncated
|
||||
// multiplication".
|
||||
// We take the most significant 64-bits, and we put them in
|
||||
// We take the most significant 64-bits, and we put them in
|
||||
// power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q
|
||||
// to the desired approximation using one multiplication. Sometimes it does not suffice.
|
||||
// to the desired approximation using one multiplication. Sometimes it does not suffice.
|
||||
// Then we store the next most significant 64 bits in power_of_five_128[index + 1], and
|
||||
// then we get a better approximation to i * 5^q. In very rare cases, even that
|
||||
// will not suffice, though it is seemingly very hard to find such a scenario.
|
||||
//
|
||||
//
|
||||
// That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat
|
||||
// more complicated.
|
||||
//
|
||||
// There is an extra layer of complexity in that we need more than 55 bits of
|
||||
// There is an extra layer of complexity in that we need more than 55 bits of
|
||||
// accuracy in the round-to-even scenario.
|
||||
//
|
||||
// The full_multiplication function computes the 128-bit product of two 64-bit words
|
||||
|
@ -216,7 +216,7 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg
|
|||
if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure.
|
||||
d = 0.0;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
// next line is safe because -real_exponent + 1 < 0
|
||||
mantissa >>= -real_exponent + 1;
|
||||
// Thankfully, we can't have both "round-to-even" and subnormals because
|
||||
|
@ -229,7 +229,7 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg
|
|||
// whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round
|
||||
// up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer
|
||||
// subnormal, but we can only know this after rounding.
|
||||
// So we only declare a subnormal if we are smaller than the threshold.
|
||||
// So we only declare a subnormal if we are smaller than the threshold.
|
||||
real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1;
|
||||
d = to_double(mantissa, real_exponent, negative);
|
||||
return true;
|
||||
|
@ -239,7 +239,7 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg
|
|||
// which we guard against.
|
||||
// If we have lots of trailing zeros, we may fall right between two
|
||||
// floating-point values.
|
||||
//
|
||||
//
|
||||
// The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54]
|
||||
// times a power of two. That is, it is right between a number with binary significand
|
||||
// m and another number with binary significand m+1; and it must be the case
|
||||
|
@ -250,11 +250,11 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg
|
|||
// When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that
|
||||
// 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23.
|
||||
// When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so
|
||||
// (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have
|
||||
// (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have
|
||||
// 2^{53} x 5^{-q} < 2^{64}.
|
||||
// Hence we have 5^{-q} < 2^{11}$ or q>= -4.
|
||||
// Hence we have 5^{-q} < 2^{11}$ or q>= -4.
|
||||
//
|
||||
// We require lower <= 1 and not lower == 0 because we could not prove that
|
||||
// We require lower <= 1 and not lower == 0 because we could not prove that
|
||||
// that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test.
|
||||
if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) {
|
||||
if((mantissa << (upperbit + 64 - 53 - 2)) == upper) {
|
||||
|
@ -462,7 +462,7 @@ simdjson_really_inline error_code write_float(const uint8_t *const src, bool neg
|
|||
// Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero
|
||||
// so something x 10^-343 goes to zero, but not so with something x 10^-342.
|
||||
static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough");
|
||||
//
|
||||
//
|
||||
if((exponent < simdjson::internal::smallest_power) || (i == 0)) {
|
||||
WRITE_DOUBLE(0, src, writer);
|
||||
return SUCCESS;
|
||||
|
|
|
@ -2,7 +2,7 @@ namespace simdjson {
|
|||
namespace SIMDJSON_IMPLEMENTATION {
|
||||
/**
|
||||
* A fast, simple, DOM-like interface that parses JSON as you use it.
|
||||
*
|
||||
*
|
||||
* Designed for maximum speed and a lower memory profile.
|
||||
*/
|
||||
namespace ondemand {
|
||||
|
|
|
@ -18,7 +18,7 @@ namespace ondemand {
|
|||
// Next state. In this state, depth > iter->depth, at_start == false, and error == SUCCESS.
|
||||
//
|
||||
// ## Error States
|
||||
//
|
||||
//
|
||||
// In error states, we will yield exactly one more value before stopping. iter->depth == depth
|
||||
// and at_start is always false. We decrement after yielding the error, moving to the Finished
|
||||
// state.
|
||||
|
|
|
@ -14,7 +14,7 @@ class array {
|
|||
public:
|
||||
/**
|
||||
* Create a new invalid array.
|
||||
*
|
||||
*
|
||||
* Exists so you can declare a variable and later assign to it before use.
|
||||
*/
|
||||
simdjson_really_inline array() noexcept = default;
|
||||
|
@ -52,7 +52,7 @@ protected:
|
|||
static simdjson_really_inline simdjson_result<array> start(json_iterator_ref &&iter) noexcept;
|
||||
/**
|
||||
* Begin array iteration.
|
||||
*
|
||||
*
|
||||
* This version of the method should be called after the initial [ has been verified, and is
|
||||
* intended for use by switch statements that check the type of a value.
|
||||
*
|
||||
|
@ -79,7 +79,7 @@ protected:
|
|||
|
||||
/**
|
||||
* Iterator marking current position.
|
||||
*
|
||||
*
|
||||
* iter.is_alive() == false indicates iteration is complete.
|
||||
*/
|
||||
json_iterator_ref iter{};
|
||||
|
|
|
@ -10,7 +10,7 @@ class document;
|
|||
|
||||
/**
|
||||
* A forward-only JSON array.
|
||||
*
|
||||
*
|
||||
* This is an input_iterator, meaning:
|
||||
* - It is forward-only
|
||||
* - * must be called exactly once per element.
|
||||
|
@ -30,7 +30,7 @@ public:
|
|||
|
||||
/**
|
||||
* Get the current element.
|
||||
*
|
||||
*
|
||||
* Part of the std::iterator interface.
|
||||
*/
|
||||
simdjson_really_inline simdjson_result<value> operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION.
|
||||
|
@ -38,7 +38,7 @@ public:
|
|||
* Check if we are at the end of the JSON.
|
||||
*
|
||||
* Part of the std::iterator interface.
|
||||
*
|
||||
*
|
||||
* @return true if there are no more elements in the JSON array.
|
||||
*/
|
||||
simdjson_really_inline bool operator==(const array_iterator<T> &) noexcept;
|
||||
|
@ -46,13 +46,13 @@ public:
|
|||
* Check if there are more elements in the JSON array.
|
||||
*
|
||||
* Part of the std::iterator interface.
|
||||
*
|
||||
*
|
||||
* @return true if there are more elements in the JSON array.
|
||||
*/
|
||||
simdjson_really_inline bool operator!=(const array_iterator<T> &) noexcept;
|
||||
/**
|
||||
* Move to the next element.
|
||||
*
|
||||
*
|
||||
* Part of the std::iterator interface.
|
||||
*/
|
||||
simdjson_really_inline array_iterator<T> &operator++() noexcept;
|
||||
|
|
|
@ -25,7 +25,7 @@ public:
|
|||
|
||||
/**
|
||||
* Create a new invalid document.
|
||||
*
|
||||
*
|
||||
* Exists so you can declare a variable and later assign to it before use.
|
||||
*/
|
||||
simdjson_really_inline document() noexcept = default;
|
||||
|
@ -73,7 +73,7 @@ public:
|
|||
simdjson_really_inline simdjson_result<double> get_double() noexcept;
|
||||
/**
|
||||
* Cast this JSON value to a string.
|
||||
*
|
||||
*
|
||||
* The string is guaranteed to be valid UTF-8.
|
||||
*
|
||||
* Equivalent to get<std::string_view>().
|
||||
|
@ -85,7 +85,7 @@ public:
|
|||
simdjson_really_inline simdjson_result<std::string_view> get_string() & noexcept;
|
||||
/**
|
||||
* Cast this JSON value to a raw_json_string.
|
||||
*
|
||||
*
|
||||
* The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n).
|
||||
*
|
||||
* @returns A pointer to the raw JSON for the given string.
|
||||
|
@ -101,7 +101,7 @@ public:
|
|||
simdjson_really_inline simdjson_result<bool> get_bool() noexcept;
|
||||
/**
|
||||
* Checks if this JSON value is null.
|
||||
*
|
||||
*
|
||||
* @returns Whether the value is null.
|
||||
*/
|
||||
simdjson_really_inline bool is_null() noexcept;
|
||||
|
@ -110,7 +110,7 @@ public:
|
|||
* Get this value as the given type.
|
||||
*
|
||||
* Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool
|
||||
*
|
||||
*
|
||||
* @returns A value of the given type, parsed from the JSON.
|
||||
* @returns INCORRECT_TYPE If the JSON value is not the given type.
|
||||
*/
|
||||
|
@ -122,7 +122,7 @@ public:
|
|||
* Get this value as the given type.
|
||||
*
|
||||
* Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool
|
||||
*
|
||||
*
|
||||
* @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized.
|
||||
* @returns INCORRECT_TYPE If the JSON value is not an object.
|
||||
* @returns SUCCESS If the parse succeeded and the out parameter was set to the value.
|
||||
|
@ -169,7 +169,7 @@ public:
|
|||
simdjson_really_inline operator double() noexcept(false);
|
||||
/**
|
||||
* Cast this JSON value to a string.
|
||||
*
|
||||
*
|
||||
* The string is guaranteed to be valid UTF-8.
|
||||
*
|
||||
* Equivalent to get<std::string_view>().
|
||||
|
@ -181,7 +181,7 @@ public:
|
|||
simdjson_really_inline operator std::string_view() & noexcept(false);
|
||||
/**
|
||||
* Cast this JSON value to a raw_json_string.
|
||||
*
|
||||
*
|
||||
* The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n).
|
||||
*
|
||||
* @returns A pointer to the raw JSON for the given string.
|
||||
|
@ -215,7 +215,7 @@ public:
|
|||
*
|
||||
* This method may only be called once on a given value. If you want to look up multiple fields,
|
||||
* you must first get the object using value.get_object() or object(value).
|
||||
*
|
||||
*
|
||||
* @param key The key to look up.
|
||||
* @returns INCORRECT_TYPE If the JSON value is not an array.
|
||||
*/
|
||||
|
@ -225,7 +225,7 @@ public:
|
|||
*
|
||||
* This method may only be called once on a given value. If you want to look up multiple fields,
|
||||
* you must first get the object using value.get_object() or object(value).
|
||||
*
|
||||
*
|
||||
* @param key The key to look up.
|
||||
* @returns INCORRECT_TYPE If the JSON value is not an array.
|
||||
*/
|
||||
|
@ -239,7 +239,7 @@ protected:
|
|||
static simdjson_really_inline document start(ondemand::json_iterator &&iter) noexcept;
|
||||
/**
|
||||
* Set json to null if the result is successful.
|
||||
*
|
||||
*
|
||||
* Convenience function for value-getters.
|
||||
*/
|
||||
template<typename T>
|
||||
|
|
|
@ -6,16 +6,16 @@ namespace ondemand {
|
|||
|
||||
/**
|
||||
* A JSON field (key/value pair) in an object.
|
||||
*
|
||||
*
|
||||
* Returned from object iteration.
|
||||
*
|
||||
*
|
||||
* Extends from std::pair<raw_json_string, value> so you can use C++ algorithms that rely on pairs.
|
||||
*/
|
||||
class field : public std::pair<raw_json_string, value> {
|
||||
public:
|
||||
/**
|
||||
* Create a new invalid field.
|
||||
*
|
||||
*
|
||||
* Exists so you can declare a variable and later assign to it before use.
|
||||
*/
|
||||
simdjson_really_inline field() noexcept;
|
||||
|
@ -29,7 +29,7 @@ public:
|
|||
* Get the key as a string_view (for higher speed, consider raw_key).
|
||||
* We deliberately use a more cumbersome name (unescaped_key) to force users
|
||||
* to think twice about using it.
|
||||
*
|
||||
*
|
||||
* This consumes the key: once you have called unescaped_key(), you cannot
|
||||
* call it again nor can you call key().
|
||||
*/
|
||||
|
|
|
@ -117,7 +117,7 @@ simdjson_warn_unused simdjson_really_inline bool json_iterator::started_array()
|
|||
advance();
|
||||
return false;
|
||||
}
|
||||
logger::log_start_value(*this, "array");
|
||||
logger::log_start_value(*this, "array");
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -223,7 +223,7 @@ simdjson_warn_unused simdjson_result<uint64_t> json_iterator::consume_root_uint6
|
|||
return parse_root_uint64(advance());
|
||||
}
|
||||
simdjson_warn_unused simdjson_result<int64_t> json_iterator::parse_root_int64(const uint8_t *json) noexcept {
|
||||
uint8_t tmpbuf[20+1]; // -<19 digits> is the longest possible integer
|
||||
uint8_t tmpbuf[20+1]; // -<19 digits> is the longest possible integer
|
||||
if (!copy_to_buffer(json, tmpbuf)) { logger::log_error(*this, "Root number more than 20 characters"); return NUMBER_ERROR; }
|
||||
logger::log_value(*this, "int64", "");
|
||||
auto result = numberparsing::parse_integer(tmpbuf);
|
||||
|
|
|
@ -12,7 +12,7 @@ class json_iterator_ref;
|
|||
|
||||
/**
|
||||
* Iterates through JSON, with structure-sensitive algorithms.
|
||||
*
|
||||
*
|
||||
* @private This is not intended for external use.
|
||||
*/
|
||||
class json_iterator : public token_iterator {
|
||||
|
@ -48,17 +48,17 @@ public:
|
|||
* Start an object iteration after the user has already checked and moved past the {.
|
||||
*
|
||||
* Does not move the iterator.
|
||||
*
|
||||
*
|
||||
* @returns Whether the object had any fields (returns false for empty).
|
||||
*/
|
||||
simdjson_warn_unused simdjson_really_inline bool started_object() noexcept;
|
||||
|
||||
/**
|
||||
* Moves to the next field in an object.
|
||||
*
|
||||
*
|
||||
* Looks for , and }. If } is found, the object is finished and the iterator advances past it.
|
||||
* Otherwise, it advances to the next value.
|
||||
*
|
||||
*
|
||||
* @return whether there is another field in the object.
|
||||
* @error TAPE_ERROR If there is a comma missing between fields.
|
||||
*/
|
||||
|
@ -78,7 +78,7 @@ public:
|
|||
* Find the next field with the given key.
|
||||
*
|
||||
* Assumes you have called next_field() or otherwise matched the previous value.
|
||||
*
|
||||
*
|
||||
* Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to
|
||||
* unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may
|
||||
* fail to match some keys with escapes (\u, \n, etc.).
|
||||
|
@ -112,10 +112,10 @@ public:
|
|||
|
||||
/**
|
||||
* Moves to the next element in an array.
|
||||
*
|
||||
*
|
||||
* Looks for , and ]. If ] is found, the array is finished and the iterator advances past it.
|
||||
* Otherwise, it advances to the next value.
|
||||
*
|
||||
*
|
||||
* @return Whether there is another element in the array.
|
||||
* @error TAPE_ERROR If there is a comma missing between elements.
|
||||
*/
|
||||
|
@ -154,7 +154,7 @@ public:
|
|||
|
||||
/**
|
||||
* Skips to the end of a JSON object or array.
|
||||
*
|
||||
*
|
||||
* @return true if this was the end of an array, false if it was the end of an object.
|
||||
*/
|
||||
simdjson_warn_unused simdjson_really_inline error_code skip_container() noexcept;
|
||||
|
@ -176,7 +176,7 @@ public:
|
|||
|
||||
/**
|
||||
* Report an error, preventing further iteration.
|
||||
*
|
||||
*
|
||||
* @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD.
|
||||
* @param message An error message to report with the error.
|
||||
*/
|
||||
|
@ -191,13 +191,13 @@ protected:
|
|||
ondemand::parser *parser{};
|
||||
/**
|
||||
* Next free location in the string buffer.
|
||||
*
|
||||
*
|
||||
* Used by raw_json_string::unescape() to have a place to unescape strings to.
|
||||
*/
|
||||
uint8_t *current_string_buf_loc{};
|
||||
/**
|
||||
* JSON error, if there is one.
|
||||
*
|
||||
*
|
||||
* INCORRECT_TYPE and NO_SUCH_FIELD are *not* stored here, ever.
|
||||
*
|
||||
* PERF NOTE: we *hope* this will be elided into control flow, as it is only used (a) in the first
|
||||
|
|
|
@ -18,7 +18,7 @@ namespace ondemand {
|
|||
// Next state. In this state, depth > iter->depth, at_start == false, and error == SUCCESS.
|
||||
//
|
||||
// ## Error States
|
||||
//
|
||||
//
|
||||
// In error states, we will yield exactly one more value before stopping. iter->depth == depth
|
||||
// and at_start is always false. We decrement after yielding the error, moving to the Finished
|
||||
// state.
|
||||
|
|
|
@ -11,7 +11,7 @@ class object {
|
|||
public:
|
||||
/**
|
||||
* Create a new invalid object.
|
||||
*
|
||||
*
|
||||
* Exists so you can declare a variable and later assign to it before use.
|
||||
*/
|
||||
simdjson_really_inline object() noexcept = default;
|
||||
|
@ -57,7 +57,7 @@ protected:
|
|||
json_iterator_ref iter{};
|
||||
/**
|
||||
* Whether we are at the start.
|
||||
*
|
||||
*
|
||||
* PERF NOTE: this should be elided into inline control flow: it is only used for the first []
|
||||
* or * call, and SSA optimizers commonly do first-iteration loop optimization.
|
||||
*/
|
||||
|
|
|
@ -10,7 +10,7 @@ class object_iterator {
|
|||
public:
|
||||
/**
|
||||
* Create a new invalid object_iterator.
|
||||
*
|
||||
*
|
||||
* Exists so you can declare a variable and later assign to it before use.
|
||||
*/
|
||||
simdjson_really_inline object_iterator() noexcept = default;
|
||||
|
|
|
@ -24,7 +24,7 @@ simdjson_warn_unused simdjson_really_inline simdjson_result<document> parser::it
|
|||
}
|
||||
|
||||
// Run stage 1.
|
||||
SIMDJSON_TRY( dom_parser.stage1((const uint8_t *)buf.data(), buf.size(), false) );
|
||||
SIMDJSON_TRY( dom_parser.stage1((const uint8_t *)buf.data(), buf.size(), false) );
|
||||
return document::start(this);
|
||||
}
|
||||
|
||||
|
@ -35,7 +35,7 @@ simdjson_warn_unused simdjson_really_inline simdjson_result<json_iterator> parse
|
|||
}
|
||||
|
||||
// Run stage 1.
|
||||
SIMDJSON_TRY( dom_parser.stage1((const uint8_t *)buf.data(), buf.size(), false) );
|
||||
SIMDJSON_TRY( dom_parser.stage1((const uint8_t *)buf.data(), buf.size(), false) );
|
||||
return json_iterator(this);
|
||||
}
|
||||
|
||||
|
|
|
@ -32,17 +32,17 @@ public:
|
|||
|
||||
/**
|
||||
* Start iterating an on-demand JSON document.
|
||||
*
|
||||
*
|
||||
* ondemand::parser parser;
|
||||
* document doc = parser.iterate(json);
|
||||
*
|
||||
*
|
||||
* ### IMPORTANT: Buffer Lifetime
|
||||
*
|
||||
*
|
||||
* Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as
|
||||
* long as the document iteration.
|
||||
*
|
||||
*
|
||||
* ### IMPORTANT: Document Lifetime
|
||||
*
|
||||
*
|
||||
* Only one iteration at a time can happen per parser, and the parser *must* be kept alive during
|
||||
* iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before
|
||||
* you call parse() again or destroy the parser.
|
||||
|
@ -53,7 +53,7 @@ public:
|
|||
* those bytes are initialized to, as long as they are allocated.
|
||||
*
|
||||
* @param json The JSON to parse.
|
||||
*
|
||||
*
|
||||
* @return The document, or an error:
|
||||
* - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory
|
||||
* allocation fails.
|
||||
|
@ -66,19 +66,19 @@ public:
|
|||
simdjson_warn_unused simdjson_result<document> iterate(const std::string &json) & noexcept = delete;
|
||||
/**
|
||||
* @private
|
||||
*
|
||||
*
|
||||
* Start iterating an on-demand JSON document.
|
||||
*
|
||||
*
|
||||
* ondemand::parser parser;
|
||||
* json_iterator doc = parser.iterate(json);
|
||||
*
|
||||
*
|
||||
* ### IMPORTANT: Buffer Lifetime
|
||||
*
|
||||
*
|
||||
* Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as
|
||||
* long as the document iteration.
|
||||
*
|
||||
*
|
||||
* ### IMPORTANT: Document Lifetime
|
||||
*
|
||||
*
|
||||
* Only one iteration at a time can happen per parser, and the parser *must* be kept alive during
|
||||
* iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before
|
||||
* you call parse() again or destroy the parser.
|
||||
|
@ -89,7 +89,7 @@ public:
|
|||
* those bytes are initialized to, as long as they are allocated.
|
||||
*
|
||||
* @param json The JSON to parse.
|
||||
*
|
||||
*
|
||||
* @return The iterator, or an error:
|
||||
* - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory
|
||||
* allocation fails.
|
||||
|
|
|
@ -13,11 +13,11 @@ class parser;
|
|||
*
|
||||
* (In other words, a pointer to the beginning of a string, just after the start quote, inside a
|
||||
* JSON file.)
|
||||
*
|
||||
*
|
||||
* This class is deliberately simplistic and has little functionality. You can
|
||||
* compare two raw_json_string instances, or compare a raw_json_string with a string_view, but
|
||||
* that is pretty much all you can do.
|
||||
*
|
||||
*
|
||||
* They originate typically from field instance which in turn represent key-value pairs from
|
||||
* object instances. From a field instance, you get the raw_json_string instance by calling key().
|
||||
* You can, if you want a more usable string_view instance, call the unescaped_key() method
|
||||
|
@ -27,7 +27,7 @@ class raw_json_string {
|
|||
public:
|
||||
/**
|
||||
* Create a new invalid raw_json_string.
|
||||
*
|
||||
*
|
||||
* Exists so you can declare a variable and later assign to it before use.
|
||||
*/
|
||||
simdjson_really_inline raw_json_string() noexcept = default;
|
||||
|
@ -37,15 +37,15 @@ public:
|
|||
|
||||
/**
|
||||
* Create a new invalid raw_json_string pointed at the given location in the JSON.
|
||||
*
|
||||
*
|
||||
* The given location must be just *after* the beginning quote (") in the JSON file.
|
||||
*
|
||||
*
|
||||
* It *must* be terminated by a ", and be a valid JSON string.
|
||||
*/
|
||||
simdjson_really_inline raw_json_string(const uint8_t * _buf) noexcept;
|
||||
/**
|
||||
* Get the raw pointer to the beginning of the string in the JSON (just after the ").
|
||||
*
|
||||
*
|
||||
* It is possible for this function to return a null pointer if the instance
|
||||
* has outlived its existence.
|
||||
*/
|
||||
|
@ -65,11 +65,11 @@ private:
|
|||
|
||||
/**
|
||||
* Unescape this JSON string, replacing \\ with \, \n with newline, etc.
|
||||
*
|
||||
*
|
||||
* ## IMPORTANT: string_view lifetime
|
||||
*
|
||||
*
|
||||
* The string_view is only valid as long as the bytes in dst.
|
||||
*
|
||||
*
|
||||
* @param dst A pointer to a buffer at least large enough to write this string as well as a \0.
|
||||
* dst will be updated to the next unused location (just after the \0 written out at
|
||||
* the end of this string).
|
||||
|
@ -79,11 +79,11 @@ private:
|
|||
simdjson_really_inline simdjson_warn_unused simdjson_result<std::string_view> unescape(uint8_t *&dst) const noexcept;
|
||||
/**
|
||||
* Unescape this JSON string, replacing \\ with \, \n with newline, etc.
|
||||
*
|
||||
*
|
||||
* ## IMPORTANT: string_view lifetime
|
||||
*
|
||||
*
|
||||
* The string_view is only valid until the next parse() call on the parser.
|
||||
*
|
||||
*
|
||||
* @param iter A json_iterator, which contains a buffer where the string will be written.
|
||||
*/
|
||||
simdjson_really_inline simdjson_warn_unused simdjson_result<std::string_view> unescape(json_iterator &iter) const noexcept;
|
||||
|
|
|
@ -12,7 +12,7 @@ class token_iterator {
|
|||
public:
|
||||
/**
|
||||
* Create a new invalid token_iterator.
|
||||
*
|
||||
*
|
||||
* Exists so you can declare a variable and later assign to it before use.
|
||||
*/
|
||||
simdjson_really_inline token_iterator() noexcept = default;
|
||||
|
|
|
@ -17,7 +17,7 @@ class value {
|
|||
public:
|
||||
/**
|
||||
* Create a new invalid value.
|
||||
*
|
||||
*
|
||||
* Exists so you can declare a variable and later assign to it before use.
|
||||
*/
|
||||
simdjson_really_inline value() noexcept = default;
|
||||
|
@ -36,7 +36,7 @@ public:
|
|||
* Get this value as the given type.
|
||||
*
|
||||
* Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool
|
||||
*
|
||||
*
|
||||
* @returns A value of the given type, parsed from the JSON.
|
||||
* @returns INCORRECT_TYPE If the JSON value is not the given type.
|
||||
*/
|
||||
|
@ -48,7 +48,7 @@ public:
|
|||
* Get this value as the given type.
|
||||
*
|
||||
* Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool
|
||||
*
|
||||
*
|
||||
* @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized.
|
||||
* @returns INCORRECT_TYPE If the JSON value is not an object.
|
||||
* @returns SUCCESS If the parse succeeded and the out parameter was set to the value.
|
||||
|
@ -109,7 +109,7 @@ public:
|
|||
|
||||
/**
|
||||
* Cast this JSON value to a string.
|
||||
*
|
||||
*
|
||||
* The string is guaranteed to be valid UTF-8.
|
||||
*
|
||||
* Equivalent to get<std::string_view>().
|
||||
|
@ -124,7 +124,7 @@ public:
|
|||
|
||||
/**
|
||||
* Cast this JSON value to a raw_json_string.
|
||||
*
|
||||
*
|
||||
* The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n).
|
||||
*
|
||||
* @returns A pointer to the raw JSON for the given string.
|
||||
|
@ -146,7 +146,7 @@ public:
|
|||
|
||||
/**
|
||||
* Checks if this JSON value is null.
|
||||
*
|
||||
*
|
||||
* @returns Whether the value is null.
|
||||
*/
|
||||
simdjson_really_inline bool is_null() && noexcept;
|
||||
|
@ -197,7 +197,7 @@ public:
|
|||
simdjson_really_inline operator double() & noexcept(false);
|
||||
/**
|
||||
* Cast this JSON value to a string.
|
||||
*
|
||||
*
|
||||
* The string is guaranteed to be valid UTF-8.
|
||||
*
|
||||
* Equivalent to get<std::string_view>().
|
||||
|
@ -211,7 +211,7 @@ public:
|
|||
simdjson_really_inline operator std::string_view() & noexcept(false);
|
||||
/**
|
||||
* Cast this JSON value to a raw_json_string.
|
||||
*
|
||||
*
|
||||
* The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n).
|
||||
*
|
||||
* @returns A pointer to the raw JSON for the given string.
|
||||
|
@ -235,7 +235,7 @@ public:
|
|||
* Begin array iteration.
|
||||
*
|
||||
* Part of the std::iterable interface.
|
||||
*
|
||||
*
|
||||
* @returns INCORRECT_TYPE If the JSON value is not an array.
|
||||
*/
|
||||
simdjson_really_inline simdjson_result<array_iterator<value>> begin() & noexcept;
|
||||
|
|
|
@ -14,7 +14,7 @@ simdjson_really_inline int trailing_zeroes(uint64_t input_num) {
|
|||
return (int)_tzcnt_u64(input_num);
|
||||
#else // SIMDJSON_REGULAR_VISUAL_STUDIO
|
||||
////////
|
||||
// You might expect the next line to be equivalent to
|
||||
// You might expect the next line to be equivalent to
|
||||
// return (int)_tzcnt_u64(input_num);
|
||||
// but the generated code differs and might be less efficient?
|
||||
////////
|
||||
|
|
|
@ -132,11 +132,11 @@ namespace simd {
|
|||
// next line just loads the 64-bit values thintable_epi8[mask1] and
|
||||
// thintable_epi8[mask2] into a 128-bit register, using only
|
||||
// two instructions on most compilers.
|
||||
__m256i shufmask = _mm256_set_epi64x(thintable_epi8[mask4], thintable_epi8[mask3],
|
||||
__m256i shufmask = _mm256_set_epi64x(thintable_epi8[mask4], thintable_epi8[mask3],
|
||||
thintable_epi8[mask2], thintable_epi8[mask1]);
|
||||
// we increment by 0x08 the second half of the mask and so forth
|
||||
shufmask =
|
||||
_mm256_add_epi8(shufmask, _mm256_set_epi32(0x18181818, 0x18181818,
|
||||
_mm256_add_epi8(shufmask, _mm256_set_epi32(0x18181818, 0x18181818,
|
||||
0x10101010, 0x10101010, 0x08080808, 0x08080808, 0, 0));
|
||||
// this is the version "nearly pruned"
|
||||
__m256i pruned = _mm256_shuffle_epi8(*this, shufmask);
|
||||
|
|
|
@ -72,12 +72,12 @@ public:
|
|||
* @return the name of the implementation, e.g. "haswell", "westmere", "arm64"
|
||||
*/
|
||||
virtual const std::string &description() const { return _description; }
|
||||
|
||||
|
||||
/**
|
||||
* The instruction sets this implementation is compiled against
|
||||
* and the current CPU match. This function may poll the current CPU/system
|
||||
* and should therefore not be called too often if performance is a concern.
|
||||
*
|
||||
*
|
||||
*
|
||||
* @return true if the implementation can be safely used on the current system (determined at runtime)
|
||||
*/
|
||||
|
@ -123,9 +123,9 @@ public:
|
|||
* @return the error code, or SUCCESS if there was no error.
|
||||
*/
|
||||
simdjson_warn_unused virtual error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept = 0;
|
||||
|
||||
|
||||
/**
|
||||
|
||||
|
||||
/**
|
||||
* Validate the UTF-8 string.
|
||||
*
|
||||
* Overridden by each implementation.
|
||||
|
|
|
@ -26,7 +26,7 @@ public:
|
|||
* @private For internal implementation use
|
||||
*
|
||||
* Run a full JSON parse on a single document (stage1 + stage2).
|
||||
*
|
||||
*
|
||||
* Guaranteed only to be called when capacity > document length.
|
||||
*
|
||||
* Overridden by each implementation.
|
||||
|
@ -41,7 +41,7 @@ public:
|
|||
* @private For internal implementation use
|
||||
*
|
||||
* Stage 1 of the document parser.
|
||||
*
|
||||
*
|
||||
* Guaranteed only to be called when capacity > document length.
|
||||
*
|
||||
* Overridden by each implementation.
|
||||
|
@ -57,7 +57,7 @@ public:
|
|||
* @private For internal implementation use
|
||||
*
|
||||
* Stage 2 of the document parser.
|
||||
*
|
||||
*
|
||||
* Called after stage1().
|
||||
*
|
||||
* Overridden by each implementation.
|
||||
|
@ -82,7 +82,7 @@ public:
|
|||
|
||||
/**
|
||||
* Change the capacity of this parser.
|
||||
*
|
||||
*
|
||||
* Generally used for reallocation.
|
||||
*
|
||||
* @param capacity The new capacity.
|
||||
|
|
|
@ -9,12 +9,12 @@ namespace internal {
|
|||
* The smallest non-zero float (binary64) is 2^-1074.
|
||||
* We take as input numbers of the form w x 10^q where w < 2^64.
|
||||
* We have that w * 10^-343 < 2^(64-344) 5^-343 < 2^-1076.
|
||||
* However, we have that
|
||||
* However, we have that
|
||||
* (2^64-1) * 10^-342 = (2^64-1) * 2^-342 * 5^-342 > 2^-1074.
|
||||
* Thus it is possible for a number of the form w * 10^-342 where
|
||||
* Thus it is possible for a number of the form w * 10^-342 where
|
||||
* w is a 64-bit value to be a non-zero floating-point number.
|
||||
*********
|
||||
* Any number of form w * 10^309 where w>= 1 is going to be
|
||||
* Any number of form w * 10^309 where w>= 1 is going to be
|
||||
* infinite in binary64 so we never need to worry about powers
|
||||
* of 5 greater than 308.
|
||||
*/
|
||||
|
@ -46,7 +46,7 @@ extern SIMDJSON_DLLIMPORTEXPORT const double power_of_ten[];
|
|||
* are not a concern since they can be represented
|
||||
* exactly using the binary notation, only the powers of five
|
||||
* affect the binary significand.
|
||||
*/
|
||||
*/
|
||||
|
||||
|
||||
// The truncated powers of five from 5^-342 all the way to 5^308
|
||||
|
|
|
@ -14,6 +14,6 @@ extern SIMDJSON_DLLIMPORTEXPORT const uint8_t pshufb_combine_table[272];
|
|||
extern SIMDJSON_DLLIMPORTEXPORT const uint64_t thintable_epi8[256];
|
||||
|
||||
} // namespace internal
|
||||
} // namespace simdjson
|
||||
} // namespace simdjson
|
||||
|
||||
#endif // SIMDJSON_INTERNAL_SIMDPRUNE_TABLES_H
|
||||
|
|
|
@ -15,7 +15,7 @@ namespace simdjson {
|
|||
*
|
||||
* Minify the input string assuming that it represents a JSON string, does not parse or validate.
|
||||
* This function is much faster than parsing a JSON string and then writing a minified version of it.
|
||||
* However, it does not validate the input. It will merely return an error in simple cases (e.g., if
|
||||
* However, it does not validate the input. It will merely return an error in simple cases (e.g., if
|
||||
* there is a string that was never terminated).
|
||||
*
|
||||
*
|
||||
|
|
|
@ -12,11 +12,11 @@
|
|||
namespace simdjson {
|
||||
namespace internal {
|
||||
|
||||
// The allocate_padded_buffer function is a low-level function to allocate memory
|
||||
// with padding so we can read past the "length" bytes safely. It is used by
|
||||
// The allocate_padded_buffer function is a low-level function to allocate memory
|
||||
// with padding so we can read past the "length" bytes safely. It is used by
|
||||
// the padded_string class automatically. It returns nullptr in case
|
||||
// of error: the caller should check for a null pointer.
|
||||
// The length parameter is the maximum size in bytes of the string.
|
||||
// The length parameter is the maximum size in bytes of the string.
|
||||
// The caller is responsible to free the memory (e.g., delete[] (...)).
|
||||
inline char *allocate_padded_buffer(size_t length) noexcept {
|
||||
size_t totalpaddedlength = length + SIMDJSON_PADDING;
|
||||
|
@ -24,8 +24,8 @@ inline char *allocate_padded_buffer(size_t length) noexcept {
|
|||
if (padded_buffer == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
// We write zeroes in the padded region to avoid having uninitized
|
||||
// garbage. If nothing else, garbage getting read might trigger a
|
||||
// We write zeroes in the padded region to avoid having uninitized
|
||||
// garbage. If nothing else, garbage getting read might trigger a
|
||||
// warning in a memory checking.
|
||||
std::memset(padded_buffer + length, 0, totalpaddedlength - length);
|
||||
return padded_buffer;
|
||||
|
|
|
@ -145,11 +145,11 @@ inline simdjson::padded_string operator "" _padded(const char *str, size_t len)
|
|||
namespace simdjson {
|
||||
namespace internal {
|
||||
|
||||
// The allocate_padded_buffer function is a low-level function to allocate memory
|
||||
// with padding so we can read past the "length" bytes safely. It is used by
|
||||
// The allocate_padded_buffer function is a low-level function to allocate memory
|
||||
// with padding so we can read past the "length" bytes safely. It is used by
|
||||
// the padded_string class automatically. It returns nullptr in case
|
||||
// of error: the caller should check for a null pointer.
|
||||
// The length parameter is the maximum size in bytes of the string.
|
||||
// The length parameter is the maximum size in bytes of the string.
|
||||
// The caller is responsible to free the memory (e.g., delete[] (...)).
|
||||
inline char *allocate_padded_buffer(size_t length) noexcept;
|
||||
|
||||
|
|
|
@ -12,7 +12,7 @@ NO_SANITIZE_UNDEFINED
|
|||
simdjson_really_inline int trailing_zeroes(uint64_t input_num) {
|
||||
#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
|
||||
unsigned long ret;
|
||||
// Search the mask data from least significant bit (LSB)
|
||||
// Search the mask data from least significant bit (LSB)
|
||||
// to the most significant bit (MSB) for a set bit (1).
|
||||
_BitScanForward64(&ret, input_num);
|
||||
return (int)ret;
|
||||
|
@ -30,7 +30,7 @@ simdjson_really_inline uint64_t clear_lowest_bit(uint64_t input_num) {
|
|||
simdjson_really_inline int leading_zeroes(uint64_t input_num) {
|
||||
#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
|
||||
unsigned long leading_zero = 0;
|
||||
// Search the mask data from most significant bit (MSB)
|
||||
// Search the mask data from most significant bit (MSB)
|
||||
// to least significant bit (LSB) for a set bit (1).
|
||||
if (_BitScanReverse64(&leading_zero, input_num))
|
||||
return (int)(63 - leading_zero);
|
||||
|
|
|
@ -298,7 +298,7 @@ namespace simd {
|
|||
uint64_t r3 = this->chunks[3].to_bitmask() ;
|
||||
return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48);
|
||||
}
|
||||
|
||||
|
||||
simdjson_really_inline uint64_t eq(const T m) const {
|
||||
const simd8<T> mask = simd8<T>::splat(m);
|
||||
return simd8x64<bool>(
|
||||
|
|
|
@ -12,7 +12,7 @@ def verifyContent(f,filename):
|
|||
except UnicodeEncodeError as e:
|
||||
#print(f"a: found problem {e} at line {linenumber+1} in {filename}:")
|
||||
print(f"Found problem at line {linenumber+1} in {filename}:")
|
||||
print(line.rstrip())
|
||||
print(line.rstrip())
|
||||
for col, char in enumerate(line.encode('utf-8')):
|
||||
if char>=127:
|
||||
offender=char
|
||||
|
@ -27,7 +27,7 @@ def verifyContent(f,filename):
|
|||
sys.exit(1)
|
||||
|
||||
|
||||
|
||||
|
||||
for filename in sys.argv[1:]:
|
||||
with open(filename,encoding='utf-8') as f:
|
||||
#print(f"file {filename} was possible to open as utf-8")
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
|
||||
cd $SCRIPTPATH/..
|
||||
make jsonstats
|
||||
echo
|
||||
echo
|
||||
for i in $SCRIPTPATH/../jsonexamples/*.json; do
|
||||
[ -f "$i" ] || break
|
||||
echo $i
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
|
||||
cd $SCRIPTPATH/..
|
||||
make minifiercompetition
|
||||
echo
|
||||
echo
|
||||
for i in $SCRIPTPATH/../jsonexamples/*.json; do
|
||||
[ -f "$i" ] || break
|
||||
echo $i
|
||||
|
|
|
@ -3,7 +3,7 @@ SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
|
|||
cd $SCRIPTPATH/..
|
||||
make parseandstatcompetition
|
||||
echo "parsing and collecting basic stats on json documents as quickly as possible"
|
||||
echo
|
||||
echo
|
||||
for i in $SCRIPTPATH/../jsonexamples/*.json; do
|
||||
[ -f "$i" ] || break
|
||||
echo $i
|
||||
|
@ -13,7 +13,7 @@ done
|
|||
|
||||
make distinctuseridcompetition
|
||||
echo "parsing and finding all user.id"
|
||||
echo
|
||||
echo
|
||||
|
||||
for i in $SCRIPTPATH/../jsonexamples/twitter.json; do
|
||||
[ -f "$i" ] || break
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
|
||||
cd $SCRIPTPATH/..
|
||||
make parsingcompetition
|
||||
echo
|
||||
echo
|
||||
for i in $SCRIPTPATH/../jsonexamples/*.json; do
|
||||
[ -f "$i" ] || break
|
||||
echo $i
|
||||
|
|
|
@ -10,7 +10,7 @@ os=$(uname)
|
|||
|
||||
make parsingcompetition allparsingcompetition
|
||||
echo "parsing (with competition)"
|
||||
echo
|
||||
echo
|
||||
for i in $SCRIPTPATH/../jsonexamples/*.json; do
|
||||
[ -f "$i" ] || break
|
||||
echo $i
|
||||
|
@ -23,4 +23,4 @@ done
|
|||
|
||||
echo "see results in "$datadirectory
|
||||
|
||||
cd $datadirectory && gnuplot bar.gnuplot
|
||||
cd $datadirectory && gnuplot bar.gnuplot
|
||||
|
|
|
@ -47,7 +47,7 @@ fi
|
|||
|
||||
make parsingcompetition
|
||||
echo "parsing (with competition)"
|
||||
echo
|
||||
echo
|
||||
for i in $SCRIPTPATH/../jsonexamples/*.json; do
|
||||
[ -f "$i" ] || break
|
||||
echo $i
|
||||
|
@ -64,7 +64,7 @@ done
|
|||
|
||||
make parseandstatcompetition
|
||||
echo "parsing and collecting basic stats on json documents as quickly as possible"
|
||||
echo
|
||||
echo
|
||||
for i in $SCRIPTPATH/../jsonexamples/*.json; do
|
||||
[ -f "$i" ] || break
|
||||
echo $i
|
||||
|
@ -80,7 +80,7 @@ done
|
|||
|
||||
make distinctuseridcompetition
|
||||
echo "parsing and finding all user.id"
|
||||
echo
|
||||
echo
|
||||
|
||||
for i in $SCRIPTPATH/../jsonexamples/twitter.json; do
|
||||
[ -f "$i" ] || break
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
|
||||
cd $SCRIPTPATH/..
|
||||
make parsingcompetition
|
||||
echo
|
||||
echo
|
||||
for i in "$SCRIPTPATH/../jsonexamples/twitter.json" "$SCRIPTPATH/../jsonexamples/update-center.json" "$SCRIPTPATH/../jsonexamples/github_events.json" "$SCRIPTPATH/../jsonexamples/gsoc-2018.json" ; do
|
||||
[ -f "$i" ] || break
|
||||
echo $i
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
#######
|
||||
# taken from http://hbfs.wordpress.com/2013/06/18/fast-path-finding-part-ii/
|
||||
# might require sudo apt-get install cpufrequtils
|
||||
# invoke with performance or ondemand
|
||||
# invoke with performance or ondemand
|
||||
# type cpufreq-info to check results, you can also verify with cat /proc/cpuinfo
|
||||
# enumerate found CPUs
|
||||
cpus=$( grep processor /proc/cpuinfo | cut -d: -f 2 )
|
||||
|
@ -23,7 +23,7 @@ else
|
|||
exit -1
|
||||
fi
|
||||
|
||||
echo "chosen policy " $1
|
||||
echo "chosen policy " $1
|
||||
# set governor for each CPU
|
||||
#
|
||||
for cpu in ${cpus[@]}
|
||||
|
|
|
@ -2,5 +2,5 @@ cd "${0%/*}"
|
|||
export CXX=g++-7
|
||||
export CC=gcc-7
|
||||
#./powerpolicy.sh performance
|
||||
./disablehyperthreading.sh
|
||||
./disablehyperthreading.sh
|
||||
./turboboost.sh on
|
||||
|
|
|
@ -18,7 +18,7 @@ do
|
|||
echo -n "| $file Cycles | $file Instructions | $file Missed Branches "
|
||||
done
|
||||
echo "|"
|
||||
|
||||
|
||||
git checkout jkeiser/lookup2_simpler_intel
|
||||
make parse
|
||||
report_perf lookup2 "$@"
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* auto-generated on 2020-11-03 06:07:17 +0100. Do not edit! */
|
||||
/* auto-generated on 2020-11-03 21:40:10 +0100. Do not edit! */
|
||||
/* begin file src/simdjson.cpp */
|
||||
#include "simdjson.h"
|
||||
|
||||
|
@ -960,7 +960,7 @@ namespace simdjson {
|
|||
namespace internal {
|
||||
|
||||
/**
|
||||
* The code in the internal::from_chars function is meant to handle the floating-point number parsing
|
||||
* The code in the internal::from_chars function is meant to handle the floating-point number parsing
|
||||
* when we have more than 19 digits in the decimal mantissa. This should only be seen
|
||||
* in adversarial scenarios: we do not expect production systems to even produce
|
||||
* such floating-point numbers.
|
||||
|
@ -1032,7 +1032,7 @@ decimal parse_decimal(const char *&p) noexcept {
|
|||
while (is_integer(*p)) {
|
||||
if (answer.num_digits < max_digits) {
|
||||
answer.digits[answer.num_digits] = uint8_t(*p - '0');
|
||||
}
|
||||
}
|
||||
answer.num_digits++;
|
||||
++p;
|
||||
}
|
||||
|
@ -1049,7 +1049,7 @@ decimal parse_decimal(const char *&p) noexcept {
|
|||
while (is_integer(*p)) {
|
||||
if (answer.num_digits < max_digits) {
|
||||
answer.digits[answer.num_digits] = uint8_t(*p - '0');
|
||||
}
|
||||
}
|
||||
answer.num_digits++;
|
||||
++p;
|
||||
}
|
||||
|
@ -1314,21 +1314,21 @@ template <typename binary> adjusted_mantissa compute_float(decimal &d) {
|
|||
}
|
||||
// At this point, going further, we can assume that d.num_digits > 0.
|
||||
// We want to guard against excessive decimal point values because
|
||||
// they can result in long running times. Indeed, we do
|
||||
// they can result in long running times. Indeed, we do
|
||||
// shifts by at most 60 bits. We have that log(10**400)/log(2**60) ~= 22
|
||||
// which is fine, but log(10**299995)/log(2**60) ~= 16609 which is not
|
||||
// fine (runs for a long time).
|
||||
//
|
||||
if(d.decimal_point < -324) {
|
||||
// We have something smaller than 1e-324 which is always zero
|
||||
// in binary64 and binary32.
|
||||
// in binary64 and binary32.
|
||||
// It should be zero.
|
||||
answer.power2 = 0;
|
||||
answer.mantissa = 0;
|
||||
return answer;
|
||||
} else if(d.decimal_point >= 310) {
|
||||
// We have something at least as large as 0.1e310 which is
|
||||
// always infinite.
|
||||
// always infinite.
|
||||
answer.power2 = binary::infinite_power();
|
||||
answer.mantissa = 0;
|
||||
return answer;
|
||||
|
@ -1690,7 +1690,7 @@ SIMDJSON_DLLIMPORTEXPORT const double power_of_ten[] = {
|
|||
* are not a concern since they can be represented
|
||||
* exactly using the binary notation, only the powers of five
|
||||
* affect the binary significand.
|
||||
*/
|
||||
*/
|
||||
|
||||
|
||||
// The truncated powers of five from 5^-342 all the way to 5^308
|
||||
|
@ -2927,7 +2927,7 @@ using namespace simd;
|
|||
}
|
||||
|
||||
// The only problem that can happen at EOF is that a multibyte character is too short
|
||||
// or a byte value too large in the last bytes: check_special_cases only checks for bytes
|
||||
// or a byte value too large in the last bytes: check_special_cases only checks for bytes
|
||||
// too large in the first of two bytes.
|
||||
simdjson_really_inline void check_eof() {
|
||||
// If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't
|
||||
|
@ -3229,7 +3229,7 @@ namespace stage1 {
|
|||
* We seek to identify pseudo-structural characters. Anything that is inside
|
||||
* a string must be omitted (hence & ~_string.string_tail()).
|
||||
* Otherwise, pseudo-structural characters come in two forms.
|
||||
* 1. We have the structural characters ([,],{,},:, comma). The
|
||||
* 1. We have the structural characters ([,],{,},:, comma). The
|
||||
* term 'structural character' is from the JSON RFC.
|
||||
* 2. We have the 'scalar pseudo-structural characters'.
|
||||
* Scalars are quotes, and any character except structural characters and white space.
|
||||
|
@ -3439,7 +3439,7 @@ error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, s
|
|||
|
||||
// Index the last (remainder) block, padded with spaces
|
||||
uint8_t block[STEP_SIZE];
|
||||
size_t remaining_bytes = reader.get_remainder(block);
|
||||
size_t remaining_bytes = reader.get_remainder(block);
|
||||
if (remaining_bytes > 0) {
|
||||
// We do not want to write directly to the output stream. Rather, we write
|
||||
// to a local buffer (for safety).
|
||||
|
@ -4220,7 +4220,7 @@ namespace stage2 {
|
|||
struct tape_writer {
|
||||
/** The next place to write to tape */
|
||||
uint64_t *next_tape_loc;
|
||||
|
||||
|
||||
/** Write a signed 64-bit value to tape. */
|
||||
simdjson_really_inline void append_s64(int64_t value) noexcept;
|
||||
|
||||
|
@ -5007,7 +5007,7 @@ simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_
|
|||
}
|
||||
|
||||
// credit: based on code from Google Fuchsia (Apache Licensed)
|
||||
simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
|
||||
simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
|
||||
const uint8_t *data = (const uint8_t *)buf;
|
||||
uint64_t pos = 0;
|
||||
uint32_t code_point = 0;
|
||||
|
@ -5493,7 +5493,7 @@ namespace stage2 {
|
|||
struct tape_writer {
|
||||
/** The next place to write to tape */
|
||||
uint64_t *next_tape_loc;
|
||||
|
||||
|
||||
/** Write a signed 64-bit value to tape. */
|
||||
simdjson_really_inline void append_s64(int64_t value) noexcept;
|
||||
|
||||
|
@ -6015,7 +6015,7 @@ simdjson_really_inline json_character_block json_character_block::classify(const
|
|||
_mm256_shuffle_epi8(op_table, in.chunks[0]),
|
||||
_mm256_shuffle_epi8(op_table, in.chunks[1])
|
||||
});
|
||||
|
||||
|
||||
return { whitespace, op };
|
||||
}
|
||||
|
||||
|
@ -6186,7 +6186,7 @@ using namespace simd;
|
|||
}
|
||||
|
||||
// The only problem that can happen at EOF is that a multibyte character is too short
|
||||
// or a byte value too large in the last bytes: check_special_cases only checks for bytes
|
||||
// or a byte value too large in the last bytes: check_special_cases only checks for bytes
|
||||
// too large in the first of two bytes.
|
||||
simdjson_really_inline void check_eof() {
|
||||
// If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't
|
||||
|
@ -6488,7 +6488,7 @@ namespace stage1 {
|
|||
* We seek to identify pseudo-structural characters. Anything that is inside
|
||||
* a string must be omitted (hence & ~_string.string_tail()).
|
||||
* Otherwise, pseudo-structural characters come in two forms.
|
||||
* 1. We have the structural characters ([,],{,},:, comma). The
|
||||
* 1. We have the structural characters ([,],{,},:, comma). The
|
||||
* term 'structural character' is from the JSON RFC.
|
||||
* 2. We have the 'scalar pseudo-structural characters'.
|
||||
* Scalars are quotes, and any character except structural characters and white space.
|
||||
|
@ -6698,7 +6698,7 @@ error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, s
|
|||
|
||||
// Index the last (remainder) block, padded with spaces
|
||||
uint8_t block[STEP_SIZE];
|
||||
size_t remaining_bytes = reader.get_remainder(block);
|
||||
size_t remaining_bytes = reader.get_remainder(block);
|
||||
if (remaining_bytes > 0) {
|
||||
// We do not want to write directly to the output stream. Rather, we write
|
||||
// to a local buffer (for safety).
|
||||
|
@ -7478,7 +7478,7 @@ namespace stage2 {
|
|||
struct tape_writer {
|
||||
/** The next place to write to tape */
|
||||
uint64_t *next_tape_loc;
|
||||
|
||||
|
||||
/** Write a signed 64-bit value to tape. */
|
||||
simdjson_really_inline void append_s64(int64_t value) noexcept;
|
||||
|
||||
|
@ -8161,7 +8161,7 @@ using namespace simd;
|
|||
}
|
||||
|
||||
// The only problem that can happen at EOF is that a multibyte character is too short
|
||||
// or a byte value too large in the last bytes: check_special_cases only checks for bytes
|
||||
// or a byte value too large in the last bytes: check_special_cases only checks for bytes
|
||||
// too large in the first of two bytes.
|
||||
simdjson_really_inline void check_eof() {
|
||||
// If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't
|
||||
|
@ -8463,7 +8463,7 @@ namespace stage1 {
|
|||
* We seek to identify pseudo-structural characters. Anything that is inside
|
||||
* a string must be omitted (hence & ~_string.string_tail()).
|
||||
* Otherwise, pseudo-structural characters come in two forms.
|
||||
* 1. We have the structural characters ([,],{,},:, comma). The
|
||||
* 1. We have the structural characters ([,],{,},:, comma). The
|
||||
* term 'structural character' is from the JSON RFC.
|
||||
* 2. We have the 'scalar pseudo-structural characters'.
|
||||
* Scalars are quotes, and any character except structural characters and white space.
|
||||
|
@ -8673,7 +8673,7 @@ error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, s
|
|||
|
||||
// Index the last (remainder) block, padded with spaces
|
||||
uint8_t block[STEP_SIZE];
|
||||
size_t remaining_bytes = reader.get_remainder(block);
|
||||
size_t remaining_bytes = reader.get_remainder(block);
|
||||
if (remaining_bytes > 0) {
|
||||
// We do not want to write directly to the output stream. Rather, we write
|
||||
// to a local buffer (for safety).
|
||||
|
@ -9454,7 +9454,7 @@ namespace stage2 {
|
|||
struct tape_writer {
|
||||
/** The next place to write to tape */
|
||||
uint64_t *next_tape_loc;
|
||||
|
||||
|
||||
/** Write a signed 64-bit value to tape. */
|
||||
simdjson_really_inline void append_s64(int64_t value) noexcept;
|
||||
|
||||
|
@ -10173,7 +10173,7 @@ using namespace simd;
|
|||
}
|
||||
|
||||
// The only problem that can happen at EOF is that a multibyte character is too short
|
||||
// or a byte value too large in the last bytes: check_special_cases only checks for bytes
|
||||
// or a byte value too large in the last bytes: check_special_cases only checks for bytes
|
||||
// too large in the first of two bytes.
|
||||
simdjson_really_inline void check_eof() {
|
||||
// If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't
|
||||
|
@ -10475,7 +10475,7 @@ namespace stage1 {
|
|||
* We seek to identify pseudo-structural characters. Anything that is inside
|
||||
* a string must be omitted (hence & ~_string.string_tail()).
|
||||
* Otherwise, pseudo-structural characters come in two forms.
|
||||
* 1. We have the structural characters ([,],{,},:, comma). The
|
||||
* 1. We have the structural characters ([,],{,},:, comma). The
|
||||
* term 'structural character' is from the JSON RFC.
|
||||
* 2. We have the 'scalar pseudo-structural characters'.
|
||||
* Scalars are quotes, and any character except structural characters and white space.
|
||||
|
@ -10685,7 +10685,7 @@ error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, s
|
|||
|
||||
// Index the last (remainder) block, padded with spaces
|
||||
uint8_t block[STEP_SIZE];
|
||||
size_t remaining_bytes = reader.get_remainder(block);
|
||||
size_t remaining_bytes = reader.get_remainder(block);
|
||||
if (remaining_bytes > 0) {
|
||||
// We do not want to write directly to the output stream. Rather, we write
|
||||
// to a local buffer (for safety).
|
||||
|
@ -11465,7 +11465,7 @@ namespace stage2 {
|
|||
struct tape_writer {
|
||||
/** The next place to write to tape */
|
||||
uint64_t *next_tape_loc;
|
||||
|
||||
|
||||
/** Write a signed 64-bit value to tape. */
|
||||
simdjson_really_inline void append_s64(int64_t value) noexcept;
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -244,7 +244,7 @@ simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_
|
|||
}
|
||||
|
||||
// credit: based on code from Google Fuchsia (Apache Licensed)
|
||||
simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
|
||||
simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
|
||||
const uint8_t *data = (const uint8_t *)buf;
|
||||
uint64_t pos = 0;
|
||||
uint32_t code_point = 0;
|
||||
|
|
|
@ -4,7 +4,7 @@ namespace simdjson {
|
|||
namespace internal {
|
||||
|
||||
/**
|
||||
* The code in the internal::from_chars function is meant to handle the floating-point number parsing
|
||||
* The code in the internal::from_chars function is meant to handle the floating-point number parsing
|
||||
* when we have more than 19 digits in the decimal mantissa. This should only be seen
|
||||
* in adversarial scenarios: we do not expect production systems to even produce
|
||||
* such floating-point numbers.
|
||||
|
@ -76,7 +76,7 @@ decimal parse_decimal(const char *&p) noexcept {
|
|||
while (is_integer(*p)) {
|
||||
if (answer.num_digits < max_digits) {
|
||||
answer.digits[answer.num_digits] = uint8_t(*p - '0');
|
||||
}
|
||||
}
|
||||
answer.num_digits++;
|
||||
++p;
|
||||
}
|
||||
|
@ -93,7 +93,7 @@ decimal parse_decimal(const char *&p) noexcept {
|
|||
while (is_integer(*p)) {
|
||||
if (answer.num_digits < max_digits) {
|
||||
answer.digits[answer.num_digits] = uint8_t(*p - '0');
|
||||
}
|
||||
}
|
||||
answer.num_digits++;
|
||||
++p;
|
||||
}
|
||||
|
@ -358,21 +358,21 @@ template <typename binary> adjusted_mantissa compute_float(decimal &d) {
|
|||
}
|
||||
// At this point, going further, we can assume that d.num_digits > 0.
|
||||
// We want to guard against excessive decimal point values because
|
||||
// they can result in long running times. Indeed, we do
|
||||
// they can result in long running times. Indeed, we do
|
||||
// shifts by at most 60 bits. We have that log(10**400)/log(2**60) ~= 22
|
||||
// which is fine, but log(10**299995)/log(2**60) ~= 16609 which is not
|
||||
// fine (runs for a long time).
|
||||
//
|
||||
if(d.decimal_point < -324) {
|
||||
// We have something smaller than 1e-324 which is always zero
|
||||
// in binary64 and binary32.
|
||||
// in binary64 and binary32.
|
||||
// It should be zero.
|
||||
answer.power2 = 0;
|
||||
answer.mantissa = 0;
|
||||
return answer;
|
||||
} else if(d.decimal_point >= 310) {
|
||||
// We have something at least as large as 0.1e310 which is
|
||||
// always infinite.
|
||||
// always infinite.
|
||||
answer.power2 = binary::infinite_power();
|
||||
answer.mantissa = 0;
|
||||
return answer;
|
||||
|
|
|
@ -69,7 +69,7 @@ error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, s
|
|||
|
||||
// Index the last (remainder) block, padded with spaces
|
||||
uint8_t block[STEP_SIZE];
|
||||
size_t remaining_bytes = reader.get_remainder(block);
|
||||
size_t remaining_bytes = reader.get_remainder(block);
|
||||
if (remaining_bytes > 0) {
|
||||
// We do not want to write directly to the output stream. Rather, we write
|
||||
// to a local buffer (for safety).
|
||||
|
|
|
@ -9,7 +9,7 @@ namespace stage1 {
|
|||
* We seek to identify pseudo-structural characters. Anything that is inside
|
||||
* a string must be omitted (hence & ~_string.string_tail()).
|
||||
* Otherwise, pseudo-structural characters come in two forms.
|
||||
* 1. We have the structural characters ([,],{,},:, comma). The
|
||||
* 1. We have the structural characters ([,],{,},:, comma). The
|
||||
* term 'structural character' is from the JSON RFC.
|
||||
* 2. We have the 'scalar pseudo-structural characters'.
|
||||
* Scalars are quotes, and any character except structural characters and white space.
|
||||
|
|
|
@ -93,7 +93,7 @@ using namespace simd;
|
|||
static const int TOO_LARGE = 0x10; // 11110100 (1001|101_)____
|
||||
static const int TOO_LARGE_2 = 0x20; // 1111(1___|011_|0101) 10______
|
||||
|
||||
// New with lookup3. We want to catch the case where an non-continuation
|
||||
// New with lookup3. We want to catch the case where an non-continuation
|
||||
// follows a leading byte
|
||||
static const int TOO_SHORT_2_3_4 = 0x40; // (110_|1110|1111) ____ (0___|110_|1111) ____
|
||||
// We also want to catch a continuation that is preceded by an ASCII byte
|
||||
|
@ -226,7 +226,7 @@ using namespace simd;
|
|||
this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
|
||||
this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
|
||||
this->check_utf8_bytes(input.chunks[3], input.chunks[2]);
|
||||
}
|
||||
}
|
||||
this->prev_incomplete = is_incomplete(input.chunks[simd8x64<uint8_t>::NUM_CHUNKS-1]);
|
||||
this->prev_input_block = input.chunks[simd8x64<uint8_t>::NUM_CHUNKS-1];
|
||||
}
|
||||
|
|
|
@ -141,7 +141,7 @@ using namespace simd;
|
|||
}
|
||||
|
||||
// The only problem that can happen at EOF is that a multibyte character is too short
|
||||
// or a byte value too large in the last bytes: check_special_cases only checks for bytes
|
||||
// or a byte value too large in the last bytes: check_special_cases only checks for bytes
|
||||
// too large in the first of two bytes.
|
||||
simdjson_really_inline void check_eof() {
|
||||
// If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't
|
||||
|
|
|
@ -11,7 +11,7 @@ namespace utf8_validation {
|
|||
// are straight up concatenated into the final value. The first byte of a multibyte character is a
|
||||
// "leading byte" and starts with N 1's, where N is the total number of bytes (110_____ = 2 byte
|
||||
// lead). The remaining bytes of a multibyte character all start with 10. 1-byte characters just
|
||||
// start with 0, because that's what ASCII looks like. Here's what each size
|
||||
// start with 0, because that's what ASCII looks like. Here's what each size
|
||||
//
|
||||
// - ASCII (7 bits): 0_______
|
||||
// - 2 byte character (11 bits): 110_____ 10______
|
||||
|
@ -52,9 +52,9 @@ namespace utf8_validation {
|
|||
// support values with more than 23 bits (which a 4-byte character supports).
|
||||
//
|
||||
// e.g. 11111000 10100000 10000000 10000000 10000000 (U+800000)
|
||||
//
|
||||
//
|
||||
// Legal utf-8 byte sequences per http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf - page 94:
|
||||
//
|
||||
//
|
||||
// Code Points 1st 2s 3s 4s
|
||||
// U+0000..U+007F 00..7F
|
||||
// U+0080..U+07FF C2..DF 80..BF
|
||||
|
|
|
@ -14,7 +14,7 @@ namespace SIMDJSON_IMPLEMENTATION {
|
|||
// are straight up concatenated into the final value. The first byte of a multibyte character is a
|
||||
// "leading byte" and starts with N 1's, where N is the total number of bytes (110_____ = 2 byte
|
||||
// lead). The remaining bytes of a multibyte character all start with 10. 1-byte characters just
|
||||
// start with 0, because that's what ASCII looks like. Here's what each size
|
||||
// start with 0, because that's what ASCII looks like. Here's what each size
|
||||
//
|
||||
// | Character Length | UTF-8 Byte Sequence |
|
||||
// |-----------------------------|---------------------------------------|
|
||||
|
@ -69,7 +69,7 @@ namespace SIMDJSON_IMPLEMENTATION {
|
|||
// e.g. `11101101 10100000 10000000` (U+D800)
|
||||
//
|
||||
// ### 5+ byte characters
|
||||
//
|
||||
//
|
||||
// INVALID_5_BYTE: 5-byte, 6-byte, 7-byte and 8-byte characters are unsupported; Unicode does not
|
||||
// support values with more than 23 bits (which a 4-byte character supports).
|
||||
//
|
||||
|
@ -77,9 +77,9 @@ namespace SIMDJSON_IMPLEMENTATION {
|
|||
// Unicode max value), or overlong (could fit in 4+ bytes).
|
||||
//
|
||||
// e.g. `11111000 10100000 10000000 10000000 10000000` (U+800000)
|
||||
//
|
||||
//
|
||||
// Legal utf-8 byte sequences per http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf - page 94:
|
||||
//
|
||||
//
|
||||
// | Code Points | 1st | 2nd | 3s | 4s |
|
||||
// |--------------------|--------|--------|--------|--------|
|
||||
// | U+0000..U+007F | 00..7F | | | |
|
||||
|
@ -267,7 +267,7 @@ struct utf8_checker {
|
|||
|
||||
// Look up error masks for three consecutive nibbles. We need to
|
||||
// AND with 0x0F for each one, because vpshufb has the neat
|
||||
// "feature" that negative values in an index byte will result in
|
||||
// "feature" that negative values in an index byte will result in
|
||||
// a zero.
|
||||
simd8<uint8_t> nibble_1_error = shifted_bytes.shr<4>().lookup_16<uint8_t>(
|
||||
0, 0, 0, 0,
|
||||
|
@ -294,7 +294,7 @@ struct utf8_checker {
|
|||
TOO_LARGE_2, // 1111[0101..1111] ________ > U+10FFFF
|
||||
TOO_LARGE_2,
|
||||
TOO_LARGE_2,
|
||||
|
||||
|
||||
TOO_LARGE_2,
|
||||
TOO_LARGE_2,
|
||||
TOO_LARGE_2,
|
||||
|
|
|
@ -9,7 +9,7 @@ public:
|
|||
uint32_t *next_structural;
|
||||
dom_parser_implementation &dom_parser;
|
||||
|
||||
// Start a structural
|
||||
// Start a structural
|
||||
simdjson_really_inline structural_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index)
|
||||
: buf{_dom_parser.buf},
|
||||
next_structural{&_dom_parser.structural_indexes[start_structural_index]},
|
||||
|
|
|
@ -6,7 +6,7 @@ namespace stage2 {
|
|||
struct tape_writer {
|
||||
/** The next place to write to tape */
|
||||
uint64_t *next_tape_loc;
|
||||
|
||||
|
||||
/** Write a signed 64-bit value to tape. */
|
||||
simdjson_really_inline void append_s64(int64_t value) noexcept;
|
||||
|
||||
|
|
|
@ -78,7 +78,7 @@ simdjson_really_inline json_character_block json_character_block::classify(const
|
|||
_mm256_shuffle_epi8(op_table, in.chunks[0]),
|
||||
_mm256_shuffle_epi8(op_table, in.chunks[1])
|
||||
});
|
||||
|
||||
|
||||
return { whitespace, op };
|
||||
}
|
||||
|
||||
|
|
|
@ -18,7 +18,7 @@ SIMDJSON_DLLIMPORTEXPORT const double power_of_ten[] = {
|
|||
* are not a concern since they can be represented
|
||||
* exactly using the binary notation, only the powers of five
|
||||
* affect the binary significand.
|
||||
*/
|
||||
*/
|
||||
|
||||
|
||||
// The truncated powers of five from 5^-342 all the way to 5^308
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
/**
|
||||
* Some systems have bad floating-point parsing. We want to exclude them.
|
||||
*/
|
||||
#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined (__linux__) || defined (__APPLE__) || defined(__FreeBSD__)
|
||||
#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined (__linux__) || defined (__APPLE__) || defined(__FreeBSD__)
|
||||
// Finally, we want to exclude legacy 32-bit systems.
|
||||
#ifndef SIMDJSON_IS_32BITS
|
||||
// So we only run some of the floating-point tests under 64-bit linux, apple, regular visual studio, freebsd.
|
||||
|
@ -35,7 +35,7 @@ namespace number_tests {
|
|||
bool ground_truth() {
|
||||
std::cout << __func__ << std::endl;
|
||||
std::pair<std::string,double> ground_truth[] = {
|
||||
{"9355950000000000000.00000000000000000000000000000000001844674407370955161600000184467440737095516161844674407370955161407370955161618446744073709551616000184467440737095516166000001844674407370955161618446744073709551614073709551616184467440737095516160001844674407370955161601844674407370955674451616184467440737095516140737095516161844674407370955161600018446744073709551616018446744073709551611616000184467440737095001844674407370955161600184467440737095516160018446744073709551168164467440737095516160001844073709551616018446744073709551616184467440737095516160001844674407536910751601611616000184467440737095001844674407370955161600184467440737095516160018446744073709551616184467440737095516160001844955161618446744073709551616000184467440753691075160018446744073709",0x1.03ae05e8fca1cp+63},
|
||||
{"9355950000000000000.00000000000000000000000000000000001844674407370955161600000184467440737095516161844674407370955161407370955161618446744073709551616000184467440737095516166000001844674407370955161618446744073709551614073709551616184467440737095516160001844674407370955161601844674407370955674451616184467440737095516140737095516161844674407370955161600018446744073709551616018446744073709551611616000184467440737095001844674407370955161600184467440737095516160018446744073709551168164467440737095516160001844073709551616018446744073709551616184467440737095516160001844674407536910751601611616000184467440737095001844674407370955161600184467440737095516160018446744073709551616184467440737095516160001844955161618446744073709551616000184467440753691075160018446744073709",0x1.03ae05e8fca1cp+63},
|
||||
{"2.2250738585072013e-308",0x1p-1022},
|
||||
{"-92666518056446206563E3", -0x1.39f764644154dp+76},
|
||||
{"-92666518056446206563E3", -0x1.39f764644154dp+76},
|
||||
|
@ -128,7 +128,7 @@ namespace number_tests {
|
|||
if (n >= sizeof(buf)) { abort(); }
|
||||
double actual;
|
||||
auto error = parser.parse(buf, n).get(actual);
|
||||
if (error) { std::cerr << error << std::endl; return false; }
|
||||
if (error) { std::cerr << error << std::endl; return false; }
|
||||
if(actual!=expected) {
|
||||
std::cerr << "JSON '" << buf << " parsed to ";
|
||||
fprintf( stderr," %18.18g instead of %18.18g\n", actual, expected); // formatting numbers is easier with printf
|
||||
|
@ -283,7 +283,7 @@ namespace number_tests {
|
|||
|
||||
bool specific_tests() {
|
||||
std::cout << __func__ << std::endl;
|
||||
return basic_test_64bit("-2402844368454405395.2",-2402844368454405395.2) &&
|
||||
return basic_test_64bit("-2402844368454405395.2",-2402844368454405395.2) &&
|
||||
basic_test_64bit("4503599627370496.5", 4503599627370496.5) &&
|
||||
basic_test_64bit("4503599627475352.5", 4503599627475352.5) &&
|
||||
basic_test_64bit("4503599627475353.5", 4503599627475353.5) &&
|
||||
|
@ -322,7 +322,7 @@ namespace parse_api_tests {
|
|||
std::cout << "Running " << __func__ << std::endl;
|
||||
typedef std::tuple<std::string, std::unique_ptr<parser>,element> simdjson_tuple;
|
||||
std::vector<simdjson_tuple> results;
|
||||
std::vector<std::string> my_data = {"[1,2,3]", "[1,2,3]", "[1,2,3]"};
|
||||
std::vector<std::string> my_data = {"[1,2,3]", "[1,2,3]", "[1,2,3]"};
|
||||
|
||||
for (std::string s : my_data) {
|
||||
std::unique_ptr<dom::parser> parser(new dom::parser{});
|
||||
|
@ -1311,7 +1311,7 @@ namespace type_tests {
|
|||
#else
|
||||
// We don't trust the underlying system so we only run the test_cast
|
||||
// exact test when the expected_value is within the 53-bit range.
|
||||
&& ((expected_value<-9007199254740992) || (expected_value>9007199254740992) || test_cast<double>(result, static_cast<double>(expected_value)))
|
||||
&& ((expected_value<-9007199254740992) || (expected_value>9007199254740992) || test_cast<double>(result, static_cast<double>(expected_value)))
|
||||
#endif
|
||||
&& test_cast_error<bool>(result, INCORRECT_TYPE)
|
||||
&& test_is_null(result, false);
|
||||
|
@ -1338,7 +1338,7 @@ namespace type_tests {
|
|||
#else
|
||||
// We don't trust the underlying system so we only run the test_cast
|
||||
// exact test when the expected_value is within the 53-bit range.
|
||||
&& ((expected_value>9007199254740992) || test_cast<double>(result, static_cast<double>(expected_value)))
|
||||
&& ((expected_value>9007199254740992) || test_cast<double>(result, static_cast<double>(expected_value)))
|
||||
#endif
|
||||
&& test_cast_error<bool>(result, INCORRECT_TYPE)
|
||||
&& test_is_null(result, false);
|
||||
|
|
|
@ -11,10 +11,10 @@ bool single_document() {
|
|||
|
||||
#if COMPILATION_TEST_USE_FAILING_CODE
|
||||
auto error = parser.parse_many(json).get(R"({"hello": "world"})"_padded);
|
||||
#else
|
||||
#else
|
||||
auto json = R"({"hello": "world"})"_padded;
|
||||
auto error = parser.parse_many(json).get(stream);
|
||||
#endif
|
||||
#endif
|
||||
if(error) {
|
||||
std::cerr << error << std::endl;
|
||||
return false;
|
||||
|
|
|
@ -178,7 +178,7 @@ namespace document_stream_tests {
|
|||
}
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
bool large_window() {
|
||||
std::cout << "Running " << __func__ << std::endl;
|
||||
|
@ -310,11 +310,11 @@ namespace document_stream_tests {
|
|||
}
|
||||
|
||||
bool run() {
|
||||
return test_current_index() &&
|
||||
return test_current_index() &&
|
||||
single_document() &&
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
single_document_exceptions() &&
|
||||
issue1133() &&
|
||||
issue1133() &&
|
||||
#endif
|
||||
#ifdef SIMDJSON_THREADS_ENABLED
|
||||
threaded_disabled() &&
|
||||
|
|
|
@ -191,8 +191,8 @@ namespace adversarial {
|
|||
int main() {
|
||||
// this is put here deliberately to check that the documentation is correct (README),
|
||||
// should this fail to compile, you should update the documentation:
|
||||
if (simdjson::active_implementation->name() == "unsupported") {
|
||||
printf("unsupported CPU\n");
|
||||
if (simdjson::active_implementation->name() == "unsupported") {
|
||||
printf("unsupported CPU\n");
|
||||
}
|
||||
std::cout << "Running error tests." << std::endl;
|
||||
if (!(true
|
||||
|
|
|
@ -26,9 +26,9 @@ void found_unsigned_integer(uint64_t result, const uint8_t *buf);
|
|||
/**
|
||||
* Some systems have bad floating-point parsing. We want to exclude them.
|
||||
*/
|
||||
#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined (__linux__) || defined (__APPLE__) || defined(__FreeBSD__)
|
||||
#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined (__linux__) || defined (__APPLE__) || defined(__FreeBSD__)
|
||||
// Ok. So under Visual Studio, linux, apple and freebsd systems, we have a good chance of having a decent
|
||||
// enough strtod. It is not certain, but it is maybe a good enough heuristics. We exclude systems like msys2
|
||||
// enough strtod. It is not certain, but it is maybe a good enough heuristics. We exclude systems like msys2
|
||||
// or cygwin.
|
||||
//
|
||||
// Finally, we want to exclude legacy 32-bit systems.
|
||||
|
@ -37,7 +37,7 @@ void found_unsigned_integer(uint64_t result, const uint8_t *buf);
|
|||
#define TEST_FLOATS
|
||||
// Apple and freebsd need a special header, typically.
|
||||
#if defined __APPLE__ || defined(__FreeBSD__)
|
||||
# include <xlocale.h>
|
||||
# include <xlocale.h>
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -84,7 +84,7 @@ void found_invalid_number(const uint8_t *buf) {
|
|||
#else
|
||||
static locale_t c_locale = newlocale(LC_ALL_MASK, "C", NULL);
|
||||
double expected = strtod_l((const char *)buf, &endptr, c_locale);
|
||||
#endif
|
||||
#endif
|
||||
if (endptr != (const char *)buf) {
|
||||
if (!is_in_bad_list((const char *)buf)) {
|
||||
printf("Warning: found_invalid_number %.32s whereas strtod parses it to "
|
||||
|
@ -140,7 +140,7 @@ void found_float(double result, const uint8_t *buf) {
|
|||
#else
|
||||
static locale_t c_locale = newlocale(LC_ALL_MASK, "C", NULL);
|
||||
double expected = strtod_l((const char *)buf, &endptr, c_locale);
|
||||
#endif
|
||||
#endif
|
||||
if (endptr == (const char *)buf) {
|
||||
fprintf(stderr,
|
||||
"parsed %f from %.32s whereas strtod refuses to parse a float, ",
|
||||
|
|
|
@ -105,7 +105,7 @@ namespace key_string_tests {
|
|||
}
|
||||
#endif
|
||||
bool run() {
|
||||
return
|
||||
return
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
parser_key_value() &&
|
||||
#endif
|
||||
|
@ -124,7 +124,7 @@ namespace active_tests {
|
|||
ondemand::object parent = doc["parent"];
|
||||
{
|
||||
ondemand::object c1 = parent["child1"];
|
||||
if(std::string_view(c1["name"]) != "John") { return false; }
|
||||
if(std::string_view(c1["name"]) != "John") { return false; }
|
||||
}
|
||||
{
|
||||
ondemand::object c2 = parent["child2"];
|
||||
|
@ -161,7 +161,7 @@ namespace active_tests {
|
|||
}
|
||||
#endif
|
||||
bool run() {
|
||||
return
|
||||
return
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
parser_child() &&
|
||||
parser_doc_correct() &&
|
||||
|
@ -818,7 +818,7 @@ namespace ordering_tests {
|
|||
y += double(point_object["y"]);
|
||||
z += double(point_object["z"]);
|
||||
}
|
||||
return (x == 1.1) && (y == 2.2) && (z == 3.3);
|
||||
return (x == 1.1) && (y == 2.2) && (z == 3.3);
|
||||
}
|
||||
|
||||
bool out_of_order() {
|
||||
|
@ -839,7 +839,7 @@ namespace ordering_tests {
|
|||
return false;
|
||||
} catch(simdjson_error&) {}
|
||||
}
|
||||
return (x == 0) && (y == 0) && (z == 3.3);
|
||||
return (x == 0) && (y == 0) && (z == 3.3);
|
||||
}
|
||||
|
||||
bool robust_order() {
|
||||
|
@ -856,7 +856,7 @@ namespace ordering_tests {
|
|||
else if (field.key() == "y") { y += double(field.value()); }
|
||||
}
|
||||
}
|
||||
return (x == 1.1) && (y == 2.2) && (z == 3.3);
|
||||
return (x == 1.1) && (y == 2.2) && (z == 3.3);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/***************
|
||||
* We refer the programmer to
|
||||
* We refer the programmer to
|
||||
* JavaScript Object Notation (JSON) Pointer
|
||||
* https://tools.ietf.org/html/rfc6901
|
||||
*/
|
||||
|
@ -165,7 +165,7 @@ bool issue1142() {
|
|||
ASSERT_EQUAL(std::string("1"), simdjson::minify(e0))
|
||||
auto o = dom::array(example).at(2).at_pointer("");
|
||||
ASSERT_EQUAL(std::string(R"({"1":"bla"})"), simdjson::minify(o))
|
||||
std::string_view s0 = dom::array(example).at(2).at_pointer("/1").at_pointer("");
|
||||
std::string_view s0 = dom::array(example).at(2).at_pointer("/1").at_pointer("");
|
||||
if(s0 != "bla") {
|
||||
std::cerr << s0 << std::endl;
|
||||
return false;
|
||||
|
|
|
@ -14,9 +14,9 @@
|
|||
/**
|
||||
* Some systems have bad floating-point parsing. We want to exclude them.
|
||||
*/
|
||||
#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined (__linux__) || defined (__APPLE__) || defined(__FreeBSD__)
|
||||
#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined (__linux__) || defined (__APPLE__) || defined(__FreeBSD__)
|
||||
// Ok. So under Visual Studio, linux, apple and freebsd systems, we have a good chance of having a decent
|
||||
// enough strtod. It is not certain, but it is maybe a good enough heuristics. We exclude systems like msys2
|
||||
// enough strtod. It is not certain, but it is maybe a good enough heuristics. We exclude systems like msys2
|
||||
// or cygwin.
|
||||
//
|
||||
// Finally, we want to exclude legacy 32-bit systems.
|
||||
|
@ -25,7 +25,7 @@
|
|||
#define TEST_FLOATS
|
||||
// Apple and freebsd need a special header, typically.
|
||||
#if defined __APPLE__ || defined(__FreeBSD__)
|
||||
# include <xlocale.h>
|
||||
# include <xlocale.h>
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -35,7 +35,7 @@
|
|||
|
||||
struct RandomEngine {
|
||||
RandomEngine() = delete;
|
||||
RandomEngine(uint32_t seed) : one_zero_generator(0,1), digit_generator(0,9), nonzero_digit_generator(1,9), digit_count_generator (1,40),exp_count_generator (1,3), generator(seed) {}
|
||||
RandomEngine(uint32_t seed) : one_zero_generator(0,1), digit_generator(0,9), nonzero_digit_generator(1,9), digit_count_generator (1,40),exp_count_generator (1,3), generator(seed) {}
|
||||
std::uniform_int_distribution<int> one_zero_generator;
|
||||
std::uniform_int_distribution<int> digit_generator;
|
||||
std::uniform_int_distribution<int> nonzero_digit_generator;
|
||||
|
@ -62,7 +62,7 @@ size_t build_random_string(RandomEngine &rand, char *buffer) {
|
|||
for (size_t i = 0; i < number_of_digits; i++) {
|
||||
if (i == location_of_decimal_separator) {
|
||||
buffer[pos++] = '.';
|
||||
}
|
||||
}
|
||||
if (( i == 0) && (location_of_decimal_separator != 1)) {
|
||||
buffer[pos++] = char(rand.next_nonzero_digit() + '0');
|
||||
} else {
|
||||
|
@ -111,7 +111,7 @@ bool check_float(double result, const char *buf) {
|
|||
#else
|
||||
static locale_t c_locale = newlocale(LC_ALL_MASK, "C", NULL);
|
||||
double expected = strtod_l((const char *)buf, &endptr, c_locale);
|
||||
#endif
|
||||
#endif
|
||||
if (endptr == (const char *)buf) {
|
||||
fprintf(stderr,
|
||||
"parsed %f from %.32s whereas strtod refuses to parse a float, ",
|
||||
|
@ -137,13 +137,13 @@ bool tester(int seed, size_t volume) {
|
|||
char buffer[1024]; // large buffer (can't overflow)
|
||||
simdjson::dom::parser parser;
|
||||
RandomEngine rand(seed);
|
||||
double result;
|
||||
double result;
|
||||
for (size_t i = 0; i < volume; i++) {
|
||||
if((i%100000) == 0) { std::cout << "."; std::cout.flush(); }
|
||||
size_t length = build_random_string(rand, buffer);
|
||||
auto error = parser.parse(buffer, length).get(result);
|
||||
// When we parse a (finite) number, it better match strtod.
|
||||
if ((!error) && (!check_float(result, buffer))) { return false; }
|
||||
if ((!error) && (!check_float(result, buffer))) { return false; }
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -95,7 +95,7 @@ simdjson_really_inline bool assert_true(bool value, const char *operation = "res
|
|||
#define ASSERT_SUCCESS(ACTUAL) do { if (!::assert_success((ACTUAL), #ACTUAL)) { return false; } } while (0);
|
||||
#define ASSERT_ERROR(ACTUAL, EXPECTED) do { if (!::assert_error ((ACTUAL), (EXPECTED), #ACTUAL)) { return false; } } while (0);
|
||||
#define ASSERT_TRUE(ACTUAL) do { if (!::assert_true ((ACTUAL), #ACTUAL)) { return false; } } while (0);
|
||||
#define ASSERT(ACTUAL, MESSAGE) do { if (!::assert_true ((ACTUAL), (MESSAGE))) { return false; } } while (0);
|
||||
#define ASSERT(ACTUAL, MESSAGE) do { if (!::assert_true ((ACTUAL), (MESSAGE))) { return false; } } while (0);
|
||||
#define RUN_TEST(ACTUAL) do { if (!(ACTUAL)) { return false; } } while (0);
|
||||
#define TEST_FAIL(MESSAGE) do { std::cerr << "FAIL: " << (MESSAGE) << std::endl; return false; } while (0);
|
||||
#define TEST_SUCCEED() do { return true; } while (0);
|
||||
|
|
|
@ -9,7 +9,7 @@ SIMDJSON_PUSH_DISABLE_ALL_WARNINGS
|
|||
#include "cxxopts.hpp"
|
||||
SIMDJSON_POP_DISABLE_WARNINGS
|
||||
|
||||
#if CXXOPTS__VERSION_MAJOR < 3
|
||||
#if CXXOPTS__VERSION_MAJOR < 3
|
||||
int main(int argc, char *argv[]) {
|
||||
#else
|
||||
int main(int argc, const char *argv[]) {
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue