remove trailing whitespace (#1284)

This commit is contained in:
Paul Dreik 2020-11-03 21:48:09 +01:00 committed by GitHub
parent 9f78559cc8
commit af4db55e66
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
104 changed files with 1029 additions and 1029 deletions

View File

@ -18,7 +18,7 @@ Before submitting an issue, please ensure that you have read the documentation:
**Describe the bug**
A clear and concise description of what the bug is.
Note that a compiler warning is not a bug.
Note that a compiler warning is not a bug.
**To Reproduce**
Steps to reproduce the behaviour: provide a code sample if possible.
@ -32,10 +32,10 @@ Note that a stack trace from your own program is not enough.
- Compiler [e.g. Apple clang version 11.0.3 (clang-1103.0.32.59) x86_64-apple-darwin19.4.0]
- Version [e.g. 22]
We support up-to-date 64-bit ARM and x64 FreeBSD, macOS, Windows and Linux systems. Please ensure that your configuration is supported before labelling the issue as a bug. In particular, we do not support legacy 32-bit systems.
We support up-to-date 64-bit ARM and x64 FreeBSD, macOS, Windows and Linux systems. Please ensure that your configuration is supported before labelling the issue as a bug. In particular, we do not support legacy 32-bit systems.
**Indicate whether you are willing or able to provide a bug fix as a pull request**
If you plan to contribute to simdjson, please read our
If you plan to contribute to simdjson, please read our
* CONTRIBUTING guide: https://github.com/simdjson/simdjson/blob/master/CONTRIBUTING.md and our
* HACKING guide: https://github.com/simdjson/simdjson/blob/master/HACKING.md

View File

@ -32,6 +32,6 @@ A clear and concise description of any alternative solutions or features you've
Add any other context or screenshots about the feature request here.
** Are you willing to contribute code or documentation toward this new feature? **
If you plan to contribute to simdjson, please read our
If you plan to contribute to simdjson, please read our
* CONTRIBUTING guide: https://github.com/simdjson/simdjson/blob/master/CONTRIBUTING.md and our
* HACKING guide: https://github.com/simdjson/simdjson/blob/master/HACKING.md

View File

@ -27,6 +27,6 @@ Is your issue:
4. A documentation issue? Can you suggest an improvement?
If you plan to contribute to simdjson, please read our
If you plan to contribute to simdjson, please read our
* CONTRIBUTING guide: https://github.com/simdjson/simdjson/blob/master/CONTRIBUTING.md and our
* HACKING guide: https://github.com/simdjson/simdjson/blob/master/HACKING.md

View File

@ -40,7 +40,7 @@ Table of Contents
Quick Start
-----------
The simdjson library is easily consumable with a single .h and .cpp file.
0. Prerequisites: `g++` (version 7 or better) or `clang++` (version 6 or better), and a 64-bit system with a command-line shell (e.g., Linux, macOS, freeBSD). We also support programming environnements like Visual Studio and Xcode, but different steps are needed.
@ -168,7 +168,7 @@ instructions, reducing branch misprediction, and reducing data dependency to tak
CPU's multiple execution cores.
Some people [enjoy reading our paper](https://arxiv.org/abs/1902.08318): A description of the design
and implementation of simdjson is in our research article:
and implementation of simdjson is in our research article:
- Geoff Langdale, Daniel Lemire, [Parsing Gigabytes of JSON per Second](https://arxiv.org/abs/1902.08318), VLDB Journal 28 (6), 2019.
We have an in-depth paper focused on the UTF-8 validation:

View File

@ -87,8 +87,8 @@ static void serialize_big_string_to_string(State& state) {
std::vector<char> content;
content.push_back('\"');
for(size_t i = 0 ; i < 100000; i ++) {
content.push_back('0' + char(i%10)); // we add what looks like a long list of digits
}
content.push_back('0' + char(i%10)); // we add what looks like a long list of digits
}
content.push_back('\"');
dom::element doc;
simdjson::error_code error;
@ -139,7 +139,7 @@ static void serialize_twitter_to_string(State& state) {
}
// we validate the result
{
auto serial = simdjson::to_string(doc);
auto serial = simdjson::to_string(doc);
dom::element doc2; // we parse the stringify output
if ((error = parser.parse(serial).get(doc2))) { throw std::runtime_error("serialization error"); }
auto serial2 = simdjson::to_string(doc2); // we stringify again
@ -211,7 +211,7 @@ static void numbers_scan(State& state) {
}
benchmark::DoNotOptimize(container.data());
benchmark::ClobberMemory();
}
}
}
BENCHMARK(numbers_scan);
@ -236,7 +236,7 @@ static void numbers_size_scan(State& state) {
if(pos != container.size()) { cerr << "bad count" << endl; }
benchmark::DoNotOptimize(container.data());
benchmark::ClobberMemory();
}
}
}
BENCHMARK(numbers_size_scan);
@ -315,7 +315,7 @@ static void numbers_load_scan(State& state) {
}
benchmark::DoNotOptimize(container.data());
benchmark::ClobberMemory();
}
}
}
BENCHMARK(numbers_load_scan);
@ -341,7 +341,7 @@ static void numbers_load_size_scan(State& state) {
if(pos != container.size()) { cerr << "bad count" << endl; }
benchmark::DoNotOptimize(container.data());
benchmark::ClobberMemory();
}
}
}
BENCHMARK(numbers_load_size_scan);
@ -360,7 +360,7 @@ static void numbers_exceptions_scan(State& state) {
}
benchmark::DoNotOptimize(container.data());
benchmark::ClobberMemory();
}
}
}
BENCHMARK(numbers_exceptions_scan);
@ -378,7 +378,7 @@ static void numbers_exceptions_size_scan(State& state) {
if(pos != container.size()) { cerr << "bad count" << endl; }
benchmark::DoNotOptimize(container.data());
benchmark::ClobberMemory();
}
}
}
BENCHMARK(numbers_exceptions_size_scan);
@ -437,7 +437,7 @@ static void numbers_exceptions_load_scan(State& state) {
}
benchmark::DoNotOptimize(container.data());
benchmark::ClobberMemory();
}
}
}
BENCHMARK(numbers_exceptions_load_scan);
@ -456,7 +456,7 @@ static void numbers_exceptions_load_size_scan(State& state) {
if(pos != container.size()) { cerr << "bad count" << endl; }
benchmark::DoNotOptimize(container.data());
benchmark::ClobberMemory();
}
}
}
BENCHMARK(numbers_exceptions_load_size_scan);
@ -711,7 +711,7 @@ static void iterator_twitter_image_sizes(State& state) {
if (!iter.up()) { return; } // back to entities
}
if (!iter.up()) { return; } // back to status
}
}
} while (iter.next()); // next status
}

View File

@ -228,7 +228,7 @@ struct progress_bar {
/**
* The speed at which we can allocate memory is strictly system specific.
* It depends on the OS and the runtime library. It is subject to various
* system-specific knobs. It is not something that we can reasonably
* system-specific knobs. It is not something that we can reasonably
* benchmark with crude timings.
* If someone wants to optimize how simdjson allocate memory, then it will
* almost surely require a distinct benchmarking tool. What is meant by

View File

@ -95,7 +95,7 @@ if (SIMDJSON_IS_UNDER_GIT AND SIMDJSON_GIT AND Git_FOUND AND (GIT_VERSION_STRING
else()
if (CMAKE_GENERATOR MATCHES Ninja)
message(STATUS "We disable the checkperf targets under Ninja.")
else()
else()
message(STATUS "Either git is unavailable or else it is too old. We are disabling checkperf targets.")
endif()
endif ()

View File

@ -20,7 +20,7 @@ void remove_duplicates(std::vector<int64_t> &v) {
namespace distinct_user_id {
template<typename T> static void DistinctUserID(benchmark::State &state);
} // namespace
} // namespace
//
// Implementation

View File

@ -12,12 +12,12 @@ using namespace simdjson::builtin;
class OnDemand {
public:
OnDemand() {
OnDemand() {
if(!displayed_implementation) {
std::cout << "On Demand implementation: " << builtin_implementation()->name() << std::endl;
std::cout << "On Demand implementation: " << builtin_implementation()->name() << std::endl;
displayed_implementation = true;
}
}
}
simdjson_really_inline bool Run(const padded_string &json);
simdjson_really_inline const std::vector<int64_t> &Result() { return ids; }
simdjson_really_inline size_t ItemCount() { return ids.size(); }
@ -39,20 +39,20 @@ simdjson_really_inline bool OnDemand::Run(const padded_string &json) {
//
// You might think that you do not need the braces, but
// you do, otherwise you will get the wrong answer. That is
// because you can only have one active object or array
// at a time.
// because you can only have one active object or array
// at a time.
{
ondemand::object user = tweet["user"];
int64_t id = user["id"];
int64_t id = user["id"];
ids.push_back(id);
}
// Not all tweets have a "retweeted_status", but when they do
// Not all tweets have a "retweeted_status", but when they do
// we want to go and find the user within.
auto retweet = tweet["retweeted_status"];
if(!retweet.error()) {
ondemand::object retweet_content = retweet;
ondemand::object reuser = retweet_content["user"];
int64_t rid = reuser["id"];
int64_t rid = reuser["id"];
ids.push_back(rid);
}
}

View File

@ -29,7 +29,7 @@ int main(int argc, char *argv[]) {
<< std::endl;
}
simdjson::padded_string p;
bench(filename, p);
bench(filename, p);
double meanval = 0;
double maxval = 0;
double minval = 10000;

View File

@ -45,7 +45,7 @@ static std::string build_json_array(size_t N) {
myss << R"( "info": "some info")" << endl;
myss << R"(})" << endl;
string answer = myss.str();
cout << "Creating a source file spanning " << (answer.size() + 512) / 1024 << " KB " << endl;
cout << "Creating a source file spanning " << (answer.size() + 512) / 1024 << " KB " << endl;
return answer;
}

View File

@ -29,7 +29,7 @@ static std::string build_json_array(size_t N) {
myss << std::endl;
myss << "]" << std::endl;
std::string answer = myss.str();
std::cout << "Creating a source file spanning " << (answer.size() + 512) / 1024 << " KB " << std::endl;
std::cout << "Creating a source file spanning " << (answer.size() + 512) / 1024 << " KB " << std::endl;
return answer;
}

View File

@ -52,7 +52,7 @@ public:
simdjson_really_inline error_code visit_key(json_iterator &, const uint8_t * key) {
switch(key[1]) {
// Technically, we should check the other characters
// in the key, but we are cheating to go as fast
// in the key, but we are cheating to go as fast
// as possible.
case 'x':
idx = GOT_X;
@ -62,11 +62,11 @@ public:
break;
case 'z':
idx = GOT_Z;
break;
break;
default:
idx = GOT_SOMETHING_ELSE;
idx = GOT_SOMETHING_ELSE;
}
return SUCCESS;
return SUCCESS;
}
simdjson_really_inline error_code visit_array_start(json_iterator &) { return SUCCESS; }
simdjson_really_inline error_code visit_array_end(json_iterator &) { return SUCCESS; }

View File

@ -10,7 +10,7 @@
#else // no __has_include
// Please insure that linux headers have been installed.
#include <asm/unistd.h> // for __NR_perf_event_open
#endif
#endif
#include <linux/perf_event.h> // for perf event constants
#include <sys/ioctl.h> // for ioctl
#include <unistd.h> // for syscall

View File

@ -122,7 +122,7 @@ int main(int argc, char *argv[]) {
BEST_TIME_NOCHECK(
"despacing with std::minify", simdjson_stringme(p),, repeat, volume, !just_data);
memcpy(buffer, p.data(), p.size());
size_t outlength;
uint8_t *cbuffer = (uint8_t *)buffer;

View File

@ -192,14 +192,14 @@ bool bench(const char *filename, bool verbose, bool just_data,
BEST_TIME("Boost.json", execute(sv), false, , repeat, volume, !just_data);
}
{
auto execute = [&p]() -> bool {
yyjson_doc *doc = yyjson_read(p.data(), p.size(), 0);
bool is_ok = doc != nullptr;
yyjson_doc_free(doc);
return is_ok;
};
BEST_TIME("yyjson", execute(), true, , repeat, volume, !just_data);
}
#ifndef ALLPARSER

View File

@ -37,7 +37,7 @@ simdjson_really_inline bool Iter::Run(const padded_string &json) {
// Walk the document, parsing the tweets as we go
// { "statuses":
// { "statuses":
auto iter = parser.iterate_raw(json).value();
if (!iter.start_object() || !iter.find_field_raw("statuses")) { return false; }
// { "statuses": [

View File

@ -12,12 +12,12 @@ using namespace simdjson::builtin;
class OnDemand {
public:
OnDemand() {
OnDemand() {
if(!displayed_implementation) {
std::cout << "On Demand implementation: " << builtin_implementation()->name() << std::endl;
std::cout << "On Demand implementation: " << builtin_implementation()->name() << std::endl;
displayed_implementation = true;
}
}
}
simdjson_really_inline bool Run(const padded_string &json);
simdjson_really_inline const std::vector<tweet> &Result() { return tweets; }
simdjson_really_inline size_t ItemCount() { return tweets.size(); }

View File

@ -7,28 +7,28 @@
// //////////////////////////////////////////////////////////////////////
/*
The JsonCpp library's source code, including accompanying documentation,
The JsonCpp library's source code, including accompanying documentation,
tests and demonstration applications, are licensed under the following
conditions...
Baptiste Lepilleur and The JsonCpp Authors explicitly disclaim copyright in all
jurisdictions which recognize such a disclaimer. In such jurisdictions,
Baptiste Lepilleur and The JsonCpp Authors explicitly disclaim copyright in all
jurisdictions which recognize such a disclaimer. In such jurisdictions,
this software is released into the Public Domain.
In jurisdictions which do not recognize Public Domain property (e.g. Germany as of
2010), this software is Copyright (c) 2007-2010 by Baptiste Lepilleur and
The JsonCpp Authors, and is released under the terms of the MIT License (see below).
In jurisdictions which recognize Public Domain property, the user of this
software may choose to accept it either as 1) Public Domain, 2) under the
conditions of the MIT License (see below), or 3) under the terms of dual
In jurisdictions which recognize Public Domain property, the user of this
software may choose to accept it either as 1) Public Domain, 2) under the
conditions of the MIT License (see below), or 3) under the terms of dual
Public Domain/MIT License conditions described here, as they choose.
The MIT License is about as close to Public Domain as a license can get, and is
described in clear, concise terms at:
http://en.wikipedia.org/wiki/MIT_License
The full text of the MIT License follows:
========================================================================

View File

@ -6,28 +6,28 @@
// //////////////////////////////////////////////////////////////////////
/*
The JsonCpp library's source code, including accompanying documentation,
The JsonCpp library's source code, including accompanying documentation,
tests and demonstration applications, are licensed under the following
conditions...
Baptiste Lepilleur and The JsonCpp Authors explicitly disclaim copyright in all
jurisdictions which recognize such a disclaimer. In such jurisdictions,
Baptiste Lepilleur and The JsonCpp Authors explicitly disclaim copyright in all
jurisdictions which recognize such a disclaimer. In such jurisdictions,
this software is released into the Public Domain.
In jurisdictions which do not recognize Public Domain property (e.g. Germany as of
2010), this software is Copyright (c) 2007-2010 by Baptiste Lepilleur and
The JsonCpp Authors, and is released under the terms of the MIT License (see below).
In jurisdictions which recognize Public Domain property, the user of this
software may choose to accept it either as 1) Public Domain, 2) under the
conditions of the MIT License (see below), or 3) under the terms of dual
In jurisdictions which recognize Public Domain property, the user of this
software may choose to accept it either as 1) Public Domain, 2) under the
conditions of the MIT License (see below), or 3) under the terms of dual
Public Domain/MIT License conditions described here, as they choose.
The MIT License is about as close to Public Domain as a license can get, and is
described in clear, concise terms at:
http://en.wikipedia.org/wiki/MIT_License
The full text of the MIT License follows:
========================================================================

View File

@ -6,28 +6,28 @@
// //////////////////////////////////////////////////////////////////////
/*
The JsonCpp library's source code, including accompanying documentation,
The JsonCpp library's source code, including accompanying documentation,
tests and demonstration applications, are licensed under the following
conditions...
Baptiste Lepilleur and The JsonCpp Authors explicitly disclaim copyright in all
jurisdictions which recognize such a disclaimer. In such jurisdictions,
Baptiste Lepilleur and The JsonCpp Authors explicitly disclaim copyright in all
jurisdictions which recognize such a disclaimer. In such jurisdictions,
this software is released into the Public Domain.
In jurisdictions which do not recognize Public Domain property (e.g. Germany as of
2010), this software is Copyright (c) 2007-2010 by Baptiste Lepilleur and
The JsonCpp Authors, and is released under the terms of the MIT License (see below).
In jurisdictions which recognize Public Domain property, the user of this
software may choose to accept it either as 1) Public Domain, 2) under the
conditions of the MIT License (see below), or 3) under the terms of dual
In jurisdictions which recognize Public Domain property, the user of this
software may choose to accept it either as 1) Public Domain, 2) under the
conditions of the MIT License (see below), or 3) under the terms of dual
Public Domain/MIT License conditions described here, as they choose.
The MIT License is about as close to Public Domain as a license can get, and is
described in clear, concise terms at:
http://en.wikipedia.org/wiki/MIT_License
The full text of the MIT License follows:
========================================================================

View File

@ -112,7 +112,7 @@ dom::element doc = parser.parse("[1,2,3]"_padded); // parse a string, the _padde
```
The parsed document resulting from the `parser.load` and `parser.parse` calls depends on the `parser` instance. Thus the `parser` instance must remain in scope. Furthermore, you must have at most one parsed document in play per `parser` instance.
You cannot copy a `parser` instance, you may only move it.
You cannot copy a `parser` instance, you may only move it.
If you need to keep a document around long term, you can keep or move the parser instance. Note that moving a parser instance, or keeping one in a movable data structure like vector or map, can cause any outstanding `element`, `object` or `array` instances to be invalidated. If you need to store a parser in a movable data structure, you should use a `std::unique_ptr` to avoid this invalidation(e.g., `std::unique_ptr<dom::parser> parser(new dom::parser{})`).

View File

@ -93,7 +93,7 @@ dom::element doc = parser.parse("[1,2,3]"_padded); // parse a string, the _padde
```
The parsed document resulting from the `parser.load` and `parser.parse` calls depends on the `parser` instance. Thus the `parser` instance must remain in scope. Furthermore, you must have at most one parsed document in play per `parser` instance.
You cannot copy a `parser` instance, you may only move it.
You cannot copy a `parser` instance, you may only move it.
If you need to keep a document around long term, you can keep or move the parser instance. Note that moving a parser instance, or keeping one in a movable data structure like vector or map, can cause any outstanding `element`, `object` or `array` instances to be invalidated. If you need to store a parser in a movable data structure, you should use a `std::unique_ptr` to avoid this invalidation(e.g., `std::unique_ptr<dom::parser> parser(new dom::parser{})`).

View File

@ -1,7 +1,7 @@
# Tape structure in simdjson
# Tape structure in simdjson
We parse a JSON document to a tape. A tape is an array of 64-bit values. Each node encountered in the JSON document is written to the tape using one or more 64-bit tape elements; the layout of the tape is in "document order": elements are stored as they are encountered in the JSON document.
We parse a JSON document to a tape. A tape is an array of 64-bit values. Each node encountered in the JSON document is written to the tape using one or more 64-bit tape elements; the layout of the tape is in "document order": elements are stored as they are encountered in the JSON document.
Throughout, little endian encoding is assumed. The tape is indexed starting at 0 (the first element is at index 0).
@ -70,7 +70,7 @@ The following is a dump of the content of the tape, with the first number of eac
Most tape elements are written as `('c' << 56) + x` where `'c'` is some ASCII character determining the type of the element (out of 't', 'f', 'n', 'l', 'u', 'd', '"', '{', '}', '[', ']' ,'r') and where `x` is a 56-bit value called the payload. The payload is normally interpreted as an unsigned 56-bit integer. Note that 56-bit integers can be quite large.
Performance consideration: We believe that accessing the tape in regular units of 64 bits is more important for performance than saving memory.
Performance consideration: We believe that accessing the tape in regular units of 64 bits is more important for performance than saving memory.
## Simple JSON values
@ -91,7 +91,7 @@ Integer values are represented as two 64-bit tape elements:
Float values are represented as two 64-bit tape elements:
- The 64-bit value `('d' << 56)` followed by the 64-bit double value literally in standard IEEE 754 notation.
Performance consideration: We store numbers of the main tape because we believe that locality of reference is helpful for performance.
Performance consideration: We store numbers of the main tape because we believe that locality of reference is helpful for performance.
## Root node
@ -109,20 +109,20 @@ Hint: We can read the first tape element to determine the length of the tape.
We prefix the string data itself by a 32-bit header to be interpreted as a 32-bit integer. It indicates the length of the string. The actual string data starts at an offset of 4 bytes.
We store string values using UTF-8 encoding with null termination on a separate tape. A string value is represented on the main tape as the 64-bit tape element `('"' << 56) + x` where the payload `x` is the location on the string tape of the null-terminated string.
We store string values using UTF-8 encoding with null termination on a separate tape. A string value is represented on the main tape as the 64-bit tape element `('"' << 56) + x` where the payload `x` is the location on the string tape of the null-terminated string.
## Arrays
## Arrays
JSON arrays are represented using two 64-bit tape elements.
- The first 64-bit tape element contains the value `('[' << 56) + x` where the payload `x` is 1 + the index of the second 64-bit tape element on the tape.
- The first 64-bit tape element contains the value `('[' << 56) + x` where the payload `x` is 1 + the index of the second 64-bit tape element on the tape.
- The second 64-bit tape element contains the value `(']' << 56) + x` where the payload `x` contains the index of the first 64-bit tape element on the tape.
All the content of the array is located between these two tape elements, including arrays and objects.
Performance consideration: We can skip the content of an array entirely by accessing the first 64-bit tape element, reading the payload and moving to the corresponding index on the tape.
## Objects
## Objects
JSON objects are represented using two 64-bit tape elements.
@ -131,6 +131,6 @@ JSON objects are represented using two 64-bit tape elements.
In-between these two tape elements, we alternate between key (which must be strings) and values. A value could be an object or an array.
All the content of the object is located between these two tape elements, including arrays and objects.
All the content of the object is located between these two tape elements, including arrays and objects.
Performance consideration: We can skip the content of an object entirely by accessing the first 64-bit tape element, reading the payload and moving to the corresponding index on the tape.

View File

@ -31,12 +31,12 @@ variant=replay
if [ ! -d build-$variant ] ; then
mkdir build-$variant
cd build-$variant
cmake .. \
$COMMON \
-DCMAKE_BUILD_TYPE=Debug \
-DSIMDJSON_FUZZ_LINKMAIN=On
ninja all_fuzzers
cd ..
fi
@ -58,7 +58,7 @@ fi
variant=sanitizers-O3
if [ ! -d build-$variant ] ; then
mkdir build-$variant
cd build-$variant
cmake .. \
@ -68,7 +68,7 @@ variant=sanitizers-O3
-DCMAKE_BUILD_TYPE=Debug \
-DSIMDJSON_FUZZ_LINKMAIN=Off \
-DSIMDJSON_FUZZ_LDFLAGS="-fsanitize=fuzzer"
ninja all_fuzzers
cd ..
fi
@ -76,7 +76,7 @@ variant=sanitizers-O3
variant=sanitizers-O0
if [ ! -d build-$variant ] ; then
mkdir build-$variant
cd build-$variant
cmake .. \
@ -86,7 +86,7 @@ variant=sanitizers-O0
-DCMAKE_BUILD_TYPE=Debug \
-DSIMDJSON_FUZZ_LINKMAIN=Off \
-DSIMDJSON_FUZZ_LDFLAGS="-fsanitize=fuzzer"
ninja all_fuzzers
cd ..
fi
@ -95,10 +95,10 @@ variant=sanitizers-O0
# A fast fuzzer, for fast exploration rather than finding bugs.
variant=fast
if [ ! -d build-$variant ] ; then
mkdir build-$variant
cd build-$variant
cmake .. \
$COMMON \
-DCMAKE_CXX_FLAGS="-fsanitize=fuzzer-no-link" \
@ -106,7 +106,7 @@ variant=fast
-DCMAKE_BUILD_TYPE=Release \
-DSIMDJSON_FUZZ_LINKMAIN=Off \
-DSIMDJSON_FUZZ_LDFLAGS="-fsanitize=fuzzer"
ninja all_fuzzers
cd ..
fi

View File

@ -73,7 +73,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
// or what hardware it runs on
constexpr std::size_t Nimplementations_max=3;
const std::size_t Nimplementations = supported_implementations.size();
if(Nimplementations>Nimplementations_max) {
//there is another backend added, please bump Nimplementations_max!
std::abort();

View File

@ -12,7 +12,7 @@ NO_SANITIZE_UNDEFINED
simdjson_really_inline int trailing_zeroes(uint64_t input_num) {
#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
unsigned long ret;
// Search the mask data from least significant bit (LSB)
// Search the mask data from least significant bit (LSB)
// to the most significant bit (MSB) for a set bit (1).
_BitScanForward64(&ret, input_num);
return (int)ret;
@ -30,7 +30,7 @@ simdjson_really_inline uint64_t clear_lowest_bit(uint64_t input_num) {
simdjson_really_inline int leading_zeroes(uint64_t input_num) {
#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
unsigned long leading_zero = 0;
// Search the mask data from most significant bit (MSB)
// Search the mask data from most significant bit (MSB)
// to least significant bit (LSB) for a set bit (1).
if (_BitScanReverse64(&leading_zero, input_num))
return (int)(63 - leading_zero);

View File

@ -13,15 +13,15 @@ namespace {
simdjson_really_inline uint64_t prefix_xor(uint64_t bitmask) {
/////////////
// We could do this with PMULL, but it is apparently slow.
//
//
//#ifdef __ARM_FEATURE_CRYPTO // some ARM processors lack this extension
//return vmull_p64(-1ULL, bitmask);
//#else
// Analysis by @sebpop:
// When diffing the assembly for src/stage1_find_marks.cpp I see that the eors are all spread out
// in between other vector code, so effectively the extra cycles of the sequence do not matter
// in between other vector code, so effectively the extra cycles of the sequence do not matter
// because the GPR units are idle otherwise and the critical path is on the FP side.
// Also the PMULL requires two extra fmovs: GPR->FP (3 cycles in N1, 5 cycles in A72 )
// Also the PMULL requires two extra fmovs: GPR->FP (3 cycles in N1, 5 cycles in A72 )
// and FP->GPR (2 cycles on N1 and 5 cycles on A72.)
///////////
bitmask ^= bitmask << 1;

View File

@ -423,7 +423,7 @@ simdjson_really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x
simd8x64(const simd8x64<T>& o) = delete; // no copy allowed
simd8x64<T>& operator=(const simd8<T> other) = delete; // no assignment allowed
simd8x64() = delete; // no default constructor allowed
simdjson_really_inline simd8x64(const simd8<T> chunk0, const simd8<T> chunk1, const simd8<T> chunk2, const simd8<T> chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {}
simdjson_really_inline simd8x64(const T ptr[64]) : chunks{simd8<T>::load(ptr), simd8<T>::load(ptr+16), simd8<T>::load(ptr+32), simd8<T>::load(ptr+48)} {}

View File

@ -106,7 +106,7 @@ public:
/**
* Get the value at the given index. This function has linear-time complexity and
* is equivalent to the following:
*
*
* size_t i=0;
* for (auto element : *this) {
* if (i == index) { return element; }
@ -115,7 +115,7 @@ public:
* return INDEX_OUT_OF_BOUNDS;
*
* Avoid calling the at() function repeatedly.
*
*
* @return The value at the given index, or:
* - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length
*/

View File

@ -94,7 +94,7 @@ simdjson_really_inline document_stream::document_stream() noexcept
error{UNINITIALIZED}
#ifdef SIMDJSON_THREADS_ENABLED
, use_thread(false)
#endif
#endif
{
}

View File

@ -22,13 +22,13 @@ struct stage1_worker {
stage1_worker(stage1_worker&&) = delete;
stage1_worker operator=(const stage1_worker&) = delete;
~stage1_worker();
/**
/**
* We only start the thread when it is needed, not at object construction, this may throw.
* You should only call this once.
* You should only call this once.
**/
void start_thread();
/**
* Start a stage 1 job. You should first call 'run', then 'finish'.
/**
* Start a stage 1 job. You should first call 'run', then 'finish'.
* You must call start_thread once before.
*/
void run(document_stream * ds, dom::parser * stage1, size_t next_batch_start);
@ -37,10 +37,10 @@ struct stage1_worker {
private:
/**
/**
* Normally, we would never stop the thread. But we do in the destructor.
* This function is only safe assuming that you are not waiting for results. You
* should have called run, then finish, and be done.
* This function is only safe assuming that you are not waiting for results. You
* should have called run, then finish, and be done.
**/
void stop_thread();
@ -49,8 +49,8 @@ private:
dom::parser * stage1_thread_parser{};
size_t _next_batch_start{};
document_stream * owner{};
/**
* We have two state variables. This could be streamlined to one variable in the future but
/**
* We have two state variables. This could be streamlined to one variable in the future but
* we use two for clarity.
*/
bool has_work{false};
@ -108,7 +108,7 @@ public:
simdjson_really_inline bool operator!=(const iterator &other) const noexcept;
/**
* @private
*
*
* Gives the current index in the input document in bytes.
*
* document_stream stream = parser.parse_many(json,window);
@ -116,15 +116,15 @@ public:
* auto doc = *i;
* size_t index = i.current_index();
* }
*
*
* This function (current_index()) is experimental and the usage
* may change in future versions of simdjson: we find the API somewhat
* awkward and we would like to offer something friendlier.
* awkward and we would like to offer something friendlier.
*/
simdjson_really_inline size_t current_index() const noexcept;
/**
* @private
*
*
* Gives a view of the current document.
*
* document_stream stream = parser.parse_many(json,window);
@ -132,14 +132,14 @@ public:
* auto doc = *i;
* std::string_view v = i->source();
* }
*
*
* The returned string_view instance is simply a map to the (unparsed)
* source string: it may thus include white-space characters and all manner
* of padding.
*
*
* This function (source()) is experimental and the usage
* may change in future versions of simdjson: we find the API somewhat
* awkward and we would like to offer something friendlier.
* awkward and we would like to offer something friendlier.
*/
simdjson_really_inline std::string_view source() const noexcept;
@ -169,7 +169,7 @@ private:
/**
* Construct a document_stream. Does not allocate or parse anything until the iterator is
* used.
*
*
* @param parser is a reference to the parser instance used to generate this document_stream
* @param buf is the raw byte buffer we need to process
* @param len is the length of the raw byte buffer in bytes
@ -237,7 +237,7 @@ private:
#ifdef SIMDJSON_THREADS_ENABLED
/** Indicates whether we use threads. Note that this needs to be a constant during the execution of the parsing. */
bool use_thread;
bool use_thread;
inline void load_from_stage1_thread() noexcept;

View File

@ -64,14 +64,14 @@ public:
*/
inline simdjson_result<object> get_object() const noexcept;
/**
* Cast this element to a null-terminated C string.
*
* Cast this element to a null-terminated C string.
*
* The string is guaranteed to be valid UTF-8.
*
* The get_c_str() function is equivalent to get<const char *>().
*
*
* The length of the string is given by get_string_length(). Because JSON strings
* may contain null characters, it may be incorrect to use strlen to determine the
* may contain null characters, it may be incorrect to use strlen to determine the
* string length.
*
* It is possible to get a single string_view instance which represents both the string
@ -84,7 +84,7 @@ public:
inline simdjson_result<const char *> get_c_str() const noexcept;
/**
* Gives the length in bytes of the string.
*
*
* It is possible to get a single string_view instance which represents both the string
* content and its length: see get_string().
*
@ -93,8 +93,8 @@ public:
*/
inline simdjson_result<size_t> get_string_length() const noexcept;
/**
* Cast this element to a string.
*
* Cast this element to a string.
*
* The string is guaranteed to be valid UTF-8.
*
* Equivalent to get<std::string_view>().
@ -279,7 +279,7 @@ public:
/**
* Read this element as a null-terminated UTF-8 string.
*
*
* Be mindful that JSON allows strings to contain null characters.
*
* Does *not* convert other types to a string; requires that the JSON type of the element was
@ -402,7 +402,7 @@ public:
* dom::parser parser;
* object obj = parser.parse(R"({ "": { "a": [ 10, 20, 30 ] }})"_padded);
* obj.at_pointer("//a/1") == 20
*
*
* @return The value associated with the given JSON pointer, or:
* - NO_SUCH_FIELD if a field does not exist in an object
* - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length
@ -411,21 +411,21 @@ public:
*/
inline simdjson_result<element> at_pointer(const std::string_view json_pointer) const noexcept;
#ifndef SIMDJSON_DISABLE_DEPRECATED_API
#ifndef SIMDJSON_DISABLE_DEPRECATED_API
/**
*
*
* Version 0.4 of simdjson used an incorrect interpretation of the JSON Pointer standard
* and allowed the following :
*
*
* dom::parser parser;
* element doc = parser.parse(R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded);
* doc.at("foo/a/1") == 20
*
*
* Though it is intuitive, it is not compliant with RFC 6901
* https://tools.ietf.org/html/rfc6901
*
* https://tools.ietf.org/html/rfc6901
*
* For standard compliance, use the at_pointer function instead.
*
*
* @return The value associated with the given JSON pointer, or:
* - NO_SUCH_FIELD if a field does not exist in an object
* - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length

View File

@ -29,14 +29,14 @@ static constexpr size_t DEFAULT_BATCH_SIZE = 1000000;
* as well as memory for a single document. The parsed document is overwritten on each parse.
*
* This class cannot be copied, only moved, to avoid unintended allocations.
*
* @note Moving a parser instance may invalidate "dom::element" instances. If you need to
*
* @note Moving a parser instance may invalidate "dom::element" instances. If you need to
* preserve both the "dom::element" instances and the parser, consider wrapping the parser
* instance in a std::unique_ptr instance:
*
*
* std::unique_ptr<dom::parser> parser(new dom::parser{});
* auto error = parser->load(f).get(root);
*
*
* You can then move std::unique_ptr safely.
*
* @note This is not thread safe: one parser cannot produce two documents at the same time!
@ -78,10 +78,10 @@ public:
*
* dom::parser parser;
* const element doc = parser.load("jsonexamples/twitter.json");
*
*
* The function is eager: the file's content is loaded in memory inside the parser instance
* and immediately parsed. The file can be deleted after the `parser.load` call.
*
*
* ### IMPORTANT: Document Lifetime
*
* The JSON document still lives in the parser: this is the most efficient way to parse JSON
@ -90,8 +90,8 @@ public:
*
* Moving the parser instance is safe, but it invalidates the element instances. You may store
* the parser instance without moving it by wrapping it inside an `unique_ptr` instance like
* so: `std::unique_ptr<dom::parser> parser(new dom::parser{});`.
*
* so: `std::unique_ptr<dom::parser> parser(new dom::parser{});`.
*
* ### Parser Capacity
*
* If the parser's current capacity is less than the file length, it will allocate enough capacity
@ -112,7 +112,7 @@ public:
*
* dom::parser parser;
* element doc = parser.parse(buf, len);
*
*
* The function eagerly parses the input: the input can be modified and discarded after
* the `parser.parse(buf, len)` call has completed.
*
@ -121,10 +121,10 @@ public:
* The JSON document still lives in the parser: this is the most efficient way to parse JSON
* documents because it reuses the same buffers, but you *must* use the document before you
* destroy the parser or call parse() again.
*
*
* Moving the parser instance is safe, but it invalidates the element instances. You may store
* the parser instance without moving it by wrapping it inside an `unique_ptr` instance like
* so: `std::unique_ptr<dom::parser> parser(new dom::parser{});`.
* so: `std::unique_ptr<dom::parser> parser(new dom::parser{});`.
*
* ### REQUIRED: Buffer Padding
*
@ -132,22 +132,22 @@ public:
* those bytes are initialized to, as long as they are allocated.
*
* If realloc_if_needed is true (the default), it is assumed that the buffer does *not* have enough padding,
* and it is copied into an enlarged temporary buffer before parsing. Thus the following is safe:
*
* and it is copied into an enlarged temporary buffer before parsing. Thus the following is safe:
*
* const char *json = R"({"key":"value"})";
* const size_t json_len = std::strlen(json);
* simdjson::dom::parser parser;
* simdjson::dom::element element = parser.parse(json, json_len);
*
* If you set realloc_if_needed to false (e.g., parser.parse(json, json_len, false)),
*
* If you set realloc_if_needed to false (e.g., parser.parse(json, json_len, false)),
* you must provide a buffer with at least SIMDJSON_PADDING extra bytes at the end.
* The benefit of setting realloc_if_needed to false is that you avoid a temporary
* memory allocation and a copy.
*
*
* The padded bytes may be read. It is not important how you initialize
* these bytes though we recommend a sensible default like null character values or spaces.
* For example, the following low-level code is safe:
*
*
* const char *json = R"({"key":"value"})";
* const size_t json_len = std::strlen(json);
* std::unique_ptr<char[]> padded_json_copy{new char[json_len + SIMDJSON_PADDING]};
@ -197,11 +197,11 @@ public:
*
* The file is loaded in memory and can be safely deleted after the `parser.load_many(path)`
* function has returned. The memory is held by the `parser` instance.
*
*
* The function is lazy: it may be that no more than one JSON document at a time is parsed.
* And, possibly, no document many have been parsed when the `parser.load_many(path)` function
* returned.
*
*
* ### Format
*
* The file must contain a series of one or more JSON documents, concatenated into a single
@ -212,7 +212,7 @@ public:
* Documents that consist of an object or array may omit the whitespace between them, concatenating
* with no separator. documents that consist of a single primitive (i.e. documents that are not
* arrays or objects) MUST be separated with whitespace.
*
*
* The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse.
* Setting batch_size to excessively large or excesively small values may impact negatively the
* performance.
@ -245,7 +245,7 @@ public:
* If the parser's current capacity is less than batch_size, it will allocate enough capacity
* to handle it (up to max_capacity).
*
* @param path File name pointing at the concatenated JSON to parse.
* @param path File name pointing at the concatenated JSON to parse.
* @param batch_size The batch size to use. MUST be larger than the largest document. The sweet
* spot is cache-related: small enough to fit in cache, yet big enough to
* parse as many documents as possible in one tight loop.
@ -272,25 +272,25 @@ public:
* The function is lazy: it may be that no more than one JSON document at a time is parsed.
* And, possibly, no document many have been parsed when the `parser.load_many(path)` function
* returned.
*
*
* The caller is responsabile to ensure that the input string data remains unchanged and is
* not deleted during the loop. In particular, the following is unsafe and will not compile:
*
*
* auto docs = parser.parse_many("[\"temporary data\"]"_padded);
* // here the string "[\"temporary data\"]" may no longer exist in memory
* // the parser instance may not have even accessed the input yet
* for (element doc : docs) {
* cout << std::string(doc["title"]) << endl;
* }
*
* The following is safe:
*
*
* The following is safe:
*
* auto json = "[\"temporary data\"]"_padded;
* auto docs = parser.parse_many(json);
* for (element doc : docs) {
* cout << std::string(doc["title"]) << endl;
* }
*
*
* ### Format
*
* The buffer must contain a series of one or more JSON documents, concatenated into a single
@ -301,7 +301,7 @@ public:
* documents that consist of an object or array may omit the whitespace between them, concatenating
* with no separator. documents that consist of a single primitive (i.e. documents that are not
* arrays or objects) MUST be separated with whitespace.
*
*
* The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse.
* Setting batch_size to excessively large or excesively small values may impact negatively the
* performance.
@ -360,7 +360,7 @@ public:
/** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */
inline simdjson_result<document_stream> parse_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept;
inline simdjson_result<document_stream> parse_many(const padded_string &&s, size_t batch_size) = delete;// unsafe
/** @private We do not want to allow implicit conversion from C string to std::string. */
simdjson_result<document_stream> parse_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete;

View File

@ -122,7 +122,7 @@ simdjson_really_inline void mini_formatter::number(int64_t x) {
simdjson_really_inline void mini_formatter::number(double x) {
char number_buffer[24];
// Currently, passing the nullptr to the second argument is
// safe because our implementation does not check the second
// safe because our implementation does not check the second
// argument.
char *newp = internal::to_chars(number_buffer, nullptr, x);
buffer.insert(buffer.end(), number_buffer, newp);
@ -135,7 +135,7 @@ simdjson_really_inline void mini_formatter::end_object() { one_char('}'); }
simdjson_really_inline void mini_formatter::comma() { one_char(','); }
simdjson_really_inline void mini_formatter::true_atom() {
simdjson_really_inline void mini_formatter::true_atom() {
const char * s = "true";
buffer.insert(buffer.end(), s, s + 4);
}
@ -157,29 +157,29 @@ simdjson_really_inline void mini_formatter::string(std::string_view unescaped) {
size_t i = 0;
// Fast path for the case where we have no control character, no ", and no backslash.
// This should include most keys.
constexpr static bool needs_escaping[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
constexpr static bool needs_escaping[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
for(;i + 8 <= unescaped.length(); i += 8) {
for(;i + 8 <= unescaped.length(); i += 8) {
// Poor's man vectorization. This could get much faster if we used SIMD.
if(needs_escaping[uint8_t(unescaped[i])] | needs_escaping[uint8_t(unescaped[i+1])]
if(needs_escaping[uint8_t(unescaped[i])] | needs_escaping[uint8_t(unescaped[i+1])]
| needs_escaping[uint8_t(unescaped[i+2])] | needs_escaping[uint8_t(unescaped[i+3])]
| needs_escaping[uint8_t(unescaped[i+4])] | needs_escaping[uint8_t(unescaped[i+5])]
| needs_escaping[uint8_t(unescaped[i+4])] | needs_escaping[uint8_t(unescaped[i+5])]
| needs_escaping[uint8_t(unescaped[i+6])] | needs_escaping[uint8_t(unescaped[i+7])]
) { break; }
}
for(;i < unescaped.length(); i++) {
for(;i < unescaped.length(); i++) {
if(needs_escaping[uint8_t(unescaped[i])]) { break; }
}
// The following is also possible and omits a 256-byte table, but it is slower:
// for (; (i < unescaped.length()) && (uint8_t(unescaped[i]) > 0x1F)
// for (; (i < unescaped.length()) && (uint8_t(unescaped[i]) > 0x1F)
// && (unescaped[i] != '\"') && (unescaped[i] != '\\'); i++) {}
// At least for long strings, the following should be fast. We could

View File

@ -14,7 +14,7 @@ namespace simdjson {
/**
* The string_builder template and mini_formatter class
* are not part of our public API and are subject to change
* are not part of our public API and are subject to change
* at any time!
*/
namespace internal {
@ -28,7 +28,7 @@ class mini_formatter;
* the string_builder template could support both minification
* and prettification, and various other tradeoffs.
*/
template <class formatter = mini_formatter>
template <class formatter = mini_formatter>
class string_builder {
public:
/** Construct an initially empty builder, would print the empty string **/
@ -41,12 +41,12 @@ public:
inline void append(simdjson::dom::object value);
/** Reset the builder (so that it would print the empty string) **/
simdjson_really_inline void clear();
/**
/**
* Get access to the string. The string_view is owned by the builder
* and it is invalid to use it after the string_builder has been
* and it is invalid to use it after the string_builder has been
* destroyed.
* However you can make a copy of the string_view on memory that you
* own.
* own.
*/
simdjson_really_inline std::string_view str() const;
/** Append a key_value_pair to the builder (to be printed) **/
@ -91,9 +91,9 @@ public:
simdjson_really_inline void string(std::string_view unescaped);
/** Clears out the content. **/
simdjson_really_inline void clear();
/**
/**
* Get access to the buffer, it is own by the instance, but
* the user can make a copy.
* the user can make a copy.
**/
simdjson_really_inline std::string_view str() const;
@ -116,13 +116,13 @@ namespace dom {
* @param value The element.
* @throw if there is an error with the underlying output stream. simdjson itself will not throw.
*/
inline std::ostream& operator<<(std::ostream& out, simdjson::dom::element value) {
inline std::ostream& operator<<(std::ostream& out, simdjson::dom::element value) {
simdjson::internal::string_builder<> sb;
sb.append(value);
return (out << sb.str());
}
#if SIMDJSON_EXCEPTIONS
inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<simdjson::dom::element> x) {
inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<simdjson::dom::element> x) {
if (x.error()) { throw simdjson::simdjson_error(x.error()); }
return (out << x.value());
}
@ -134,13 +134,13 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<sim
* @param value The array.
* @throw if there is an error with the underlying output stream. simdjson itself will not throw.
*/
inline std::ostream& operator<<(std::ostream& out, simdjson::dom::array value) {
inline std::ostream& operator<<(std::ostream& out, simdjson::dom::array value) {
simdjson::internal::string_builder<> sb;
sb.append(value);
return (out << sb.str());
}
#if SIMDJSON_EXCEPTIONS
inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<simdjson::dom::array> x) {
inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<simdjson::dom::array> x) {
if (x.error()) { throw simdjson::simdjson_error(x.error()); }
return (out << x.value());
}
@ -152,17 +152,17 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<sim
* @param value The objet.
* @throw if there is an error with the underlying output stream. simdjson itself will not throw.
*/
inline std::ostream& operator<<(std::ostream& out, simdjson::dom::object value) {
inline std::ostream& operator<<(std::ostream& out, simdjson::dom::object value) {
simdjson::internal::string_builder<> sb;
sb.append(value);
return (out << sb.str());
}
#if SIMDJSON_EXCEPTIONS
inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<simdjson::dom::object> x) {
inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<simdjson::dom::object> x) {
if (x.error()) { throw simdjson::simdjson_error(x.error()); }
return (out << x.value());
}
#endif
#endif
} // namespace dom
/**
@ -173,10 +173,10 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<si
* cout << to_string(doc) << endl; // prints [1,2,3]
*
*/
template <class T>
template <class T>
std::string to_string(T x) {
// in C++, to_string is standard: http://www.cplusplus.com/reference/string/to_string/
// Currently minify and to_string are identical but in the future, they may
// Currently minify and to_string are identical but in the future, they may
// differ.
simdjson::internal::string_builder<> sb;
sb.append(x);
@ -184,12 +184,12 @@ std::string to_string(T x) {
return std::string(answer.data(), answer.size());
}
#if SIMDJSON_EXCEPTIONS
template <class T>
template <class T>
std::string to_string(simdjson_result<T> x) {
if (x.error()) { throw simdjson_error(x.error()); }
return to_string(x.value());
}
#endif
#endif
/**
* Minifies a JSON element or document, printing the smallest possible valid JSON.
@ -199,18 +199,18 @@ std::string to_string(simdjson_result<T> x) {
* cout << minify(doc) << endl; // prints [1,2,3]
*
*/
template <class T>
template <class T>
std::string minify(T x) {
return to_string(x);
}
#if SIMDJSON_EXCEPTIONS
template <class T>
template <class T>
std::string minify(simdjson_result<T> x) {
if (x.error()) { throw simdjson_error(x.error()); }
return to_string(x.value());
}
#endif
#endif
} // namespace simdjson

View File

@ -29,7 +29,7 @@ static unsigned char _BitScanReverse64(unsigned long* ret, uint64_t x) {
simdjson_really_inline int leading_zeroes(uint64_t input_num) {
#ifdef _MSC_VER
unsigned long leading_zero = 0;
// Search the mask data from most significant bit (MSB)
// Search the mask data from most significant bit (MSB)
// to least significant bit (LSB) for a set bit (1).
if (_BitScanReverse64(&leading_zero, input_num))
return (int)(63 - leading_zero);

View File

@ -25,7 +25,7 @@ namespace numberparsing {
namespace {
// Convert a mantissa, an exponent and a sign bit into an ieee64 double.
// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable).
// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed.
// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed.
simdjson_really_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) {
double d;
mantissa &= ~(1ULL << 52);
@ -149,7 +149,7 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg
// We want the most significant 64 bits of the product. We know
// this will be non-zero because the most significant bit of i is
// 1.
const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power);
const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power);
// Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.)
//
// The full_multiplication function computes the 128-bit product of two 64-bit words
@ -158,7 +158,7 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg
// to the 64-bit most significant bits of the product.
simdjson::internal::value128 firstproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index]);
// Both i and power_of_five_128[index] have their most significant bit set to 1 which
// implies that the either the most or the second most significant bit of the product
// implies that the either the most or the second most significant bit of the product
// is 1. We pack values in this manner for efficiency reasons: it maximizes the use
// we make of the product. It also makes it easy to reason aboutthe product: there
// 0 or 1 leading zero in the product.
@ -173,17 +173,17 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg
// Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing
// the full computation is wasteful. So we do what is called a "truncated
// multiplication".
// We take the most significant 64-bits, and we put them in
// We take the most significant 64-bits, and we put them in
// power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q
// to the desired approximation using one multiplication. Sometimes it does not suffice.
// to the desired approximation using one multiplication. Sometimes it does not suffice.
// Then we store the next most significant 64 bits in power_of_five_128[index + 1], and
// then we get a better approximation to i * 5^q. In very rare cases, even that
// will not suffice, though it is seemingly very hard to find such a scenario.
//
//
// That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat
// more complicated.
//
// There is an extra layer of complexity in that we need more than 55 bits of
// There is an extra layer of complexity in that we need more than 55 bits of
// accuracy in the round-to-even scenario.
//
// The full_multiplication function computes the 128-bit product of two 64-bit words
@ -216,7 +216,7 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg
if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure.
d = 0.0;
return true;
}
}
// next line is safe because -real_exponent + 1 < 0
mantissa >>= -real_exponent + 1;
// Thankfully, we can't have both "round-to-even" and subnormals because
@ -229,7 +229,7 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg
// whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round
// up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer
// subnormal, but we can only know this after rounding.
// So we only declare a subnormal if we are smaller than the threshold.
// So we only declare a subnormal if we are smaller than the threshold.
real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1;
d = to_double(mantissa, real_exponent, negative);
return true;
@ -239,7 +239,7 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg
// which we guard against.
// If we have lots of trailing zeros, we may fall right between two
// floating-point values.
//
//
// The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54]
// times a power of two. That is, it is right between a number with binary significand
// m and another number with binary significand m+1; and it must be the case
@ -250,11 +250,11 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg
// When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that
// 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23.
// When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so
// (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have
// (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have
// 2^{53} x 5^{-q} < 2^{64}.
// Hence we have 5^{-q} < 2^{11}$ or q>= -4.
// Hence we have 5^{-q} < 2^{11}$ or q>= -4.
//
// We require lower <= 1 and not lower == 0 because we could not prove that
// We require lower <= 1 and not lower == 0 because we could not prove that
// that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test.
if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) {
if((mantissa << (upperbit + 64 - 53 - 2)) == upper) {
@ -462,7 +462,7 @@ simdjson_really_inline error_code write_float(const uint8_t *const src, bool neg
// Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero
// so something x 10^-343 goes to zero, but not so with something x 10^-342.
static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough");
//
//
if((exponent < simdjson::internal::smallest_power) || (i == 0)) {
WRITE_DOUBLE(0, src, writer);
return SUCCESS;

View File

@ -2,7 +2,7 @@ namespace simdjson {
namespace SIMDJSON_IMPLEMENTATION {
/**
* A fast, simple, DOM-like interface that parses JSON as you use it.
*
*
* Designed for maximum speed and a lower memory profile.
*/
namespace ondemand {

View File

@ -18,7 +18,7 @@ namespace ondemand {
// Next state. In this state, depth > iter->depth, at_start == false, and error == SUCCESS.
//
// ## Error States
//
//
// In error states, we will yield exactly one more value before stopping. iter->depth == depth
// and at_start is always false. We decrement after yielding the error, moving to the Finished
// state.

View File

@ -14,7 +14,7 @@ class array {
public:
/**
* Create a new invalid array.
*
*
* Exists so you can declare a variable and later assign to it before use.
*/
simdjson_really_inline array() noexcept = default;
@ -52,7 +52,7 @@ protected:
static simdjson_really_inline simdjson_result<array> start(json_iterator_ref &&iter) noexcept;
/**
* Begin array iteration.
*
*
* This version of the method should be called after the initial [ has been verified, and is
* intended for use by switch statements that check the type of a value.
*
@ -79,7 +79,7 @@ protected:
/**
* Iterator marking current position.
*
*
* iter.is_alive() == false indicates iteration is complete.
*/
json_iterator_ref iter{};

View File

@ -10,7 +10,7 @@ class document;
/**
* A forward-only JSON array.
*
*
* This is an input_iterator, meaning:
* - It is forward-only
* - * must be called exactly once per element.
@ -30,7 +30,7 @@ public:
/**
* Get the current element.
*
*
* Part of the std::iterator interface.
*/
simdjson_really_inline simdjson_result<value> operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION.
@ -38,7 +38,7 @@ public:
* Check if we are at the end of the JSON.
*
* Part of the std::iterator interface.
*
*
* @return true if there are no more elements in the JSON array.
*/
simdjson_really_inline bool operator==(const array_iterator<T> &) noexcept;
@ -46,13 +46,13 @@ public:
* Check if there are more elements in the JSON array.
*
* Part of the std::iterator interface.
*
*
* @return true if there are more elements in the JSON array.
*/
simdjson_really_inline bool operator!=(const array_iterator<T> &) noexcept;
/**
* Move to the next element.
*
*
* Part of the std::iterator interface.
*/
simdjson_really_inline array_iterator<T> &operator++() noexcept;

View File

@ -25,7 +25,7 @@ public:
/**
* Create a new invalid document.
*
*
* Exists so you can declare a variable and later assign to it before use.
*/
simdjson_really_inline document() noexcept = default;
@ -73,7 +73,7 @@ public:
simdjson_really_inline simdjson_result<double> get_double() noexcept;
/**
* Cast this JSON value to a string.
*
*
* The string is guaranteed to be valid UTF-8.
*
* Equivalent to get<std::string_view>().
@ -85,7 +85,7 @@ public:
simdjson_really_inline simdjson_result<std::string_view> get_string() & noexcept;
/**
* Cast this JSON value to a raw_json_string.
*
*
* The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n).
*
* @returns A pointer to the raw JSON for the given string.
@ -101,7 +101,7 @@ public:
simdjson_really_inline simdjson_result<bool> get_bool() noexcept;
/**
* Checks if this JSON value is null.
*
*
* @returns Whether the value is null.
*/
simdjson_really_inline bool is_null() noexcept;
@ -110,7 +110,7 @@ public:
* Get this value as the given type.
*
* Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool
*
*
* @returns A value of the given type, parsed from the JSON.
* @returns INCORRECT_TYPE If the JSON value is not the given type.
*/
@ -122,7 +122,7 @@ public:
* Get this value as the given type.
*
* Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool
*
*
* @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized.
* @returns INCORRECT_TYPE If the JSON value is not an object.
* @returns SUCCESS If the parse succeeded and the out parameter was set to the value.
@ -169,7 +169,7 @@ public:
simdjson_really_inline operator double() noexcept(false);
/**
* Cast this JSON value to a string.
*
*
* The string is guaranteed to be valid UTF-8.
*
* Equivalent to get<std::string_view>().
@ -181,7 +181,7 @@ public:
simdjson_really_inline operator std::string_view() & noexcept(false);
/**
* Cast this JSON value to a raw_json_string.
*
*
* The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n).
*
* @returns A pointer to the raw JSON for the given string.
@ -215,7 +215,7 @@ public:
*
* This method may only be called once on a given value. If you want to look up multiple fields,
* you must first get the object using value.get_object() or object(value).
*
*
* @param key The key to look up.
* @returns INCORRECT_TYPE If the JSON value is not an array.
*/
@ -225,7 +225,7 @@ public:
*
* This method may only be called once on a given value. If you want to look up multiple fields,
* you must first get the object using value.get_object() or object(value).
*
*
* @param key The key to look up.
* @returns INCORRECT_TYPE If the JSON value is not an array.
*/
@ -239,7 +239,7 @@ protected:
static simdjson_really_inline document start(ondemand::json_iterator &&iter) noexcept;
/**
* Set json to null if the result is successful.
*
*
* Convenience function for value-getters.
*/
template<typename T>

View File

@ -6,16 +6,16 @@ namespace ondemand {
/**
* A JSON field (key/value pair) in an object.
*
*
* Returned from object iteration.
*
*
* Extends from std::pair<raw_json_string, value> so you can use C++ algorithms that rely on pairs.
*/
class field : public std::pair<raw_json_string, value> {
public:
/**
* Create a new invalid field.
*
*
* Exists so you can declare a variable and later assign to it before use.
*/
simdjson_really_inline field() noexcept;
@ -29,7 +29,7 @@ public:
* Get the key as a string_view (for higher speed, consider raw_key).
* We deliberately use a more cumbersome name (unescaped_key) to force users
* to think twice about using it.
*
*
* This consumes the key: once you have called unescaped_key(), you cannot
* call it again nor can you call key().
*/

View File

@ -117,7 +117,7 @@ simdjson_warn_unused simdjson_really_inline bool json_iterator::started_array()
advance();
return false;
}
logger::log_start_value(*this, "array");
logger::log_start_value(*this, "array");
return true;
}
@ -223,7 +223,7 @@ simdjson_warn_unused simdjson_result<uint64_t> json_iterator::consume_root_uint6
return parse_root_uint64(advance());
}
simdjson_warn_unused simdjson_result<int64_t> json_iterator::parse_root_int64(const uint8_t *json) noexcept {
uint8_t tmpbuf[20+1]; // -<19 digits> is the longest possible integer
uint8_t tmpbuf[20+1]; // -<19 digits> is the longest possible integer
if (!copy_to_buffer(json, tmpbuf)) { logger::log_error(*this, "Root number more than 20 characters"); return NUMBER_ERROR; }
logger::log_value(*this, "int64", "");
auto result = numberparsing::parse_integer(tmpbuf);

View File

@ -12,7 +12,7 @@ class json_iterator_ref;
/**
* Iterates through JSON, with structure-sensitive algorithms.
*
*
* @private This is not intended for external use.
*/
class json_iterator : public token_iterator {
@ -48,17 +48,17 @@ public:
* Start an object iteration after the user has already checked and moved past the {.
*
* Does not move the iterator.
*
*
* @returns Whether the object had any fields (returns false for empty).
*/
simdjson_warn_unused simdjson_really_inline bool started_object() noexcept;
/**
* Moves to the next field in an object.
*
*
* Looks for , and }. If } is found, the object is finished and the iterator advances past it.
* Otherwise, it advances to the next value.
*
*
* @return whether there is another field in the object.
* @error TAPE_ERROR If there is a comma missing between fields.
*/
@ -78,7 +78,7 @@ public:
* Find the next field with the given key.
*
* Assumes you have called next_field() or otherwise matched the previous value.
*
*
* Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to
* unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may
* fail to match some keys with escapes (\u, \n, etc.).
@ -112,10 +112,10 @@ public:
/**
* Moves to the next element in an array.
*
*
* Looks for , and ]. If ] is found, the array is finished and the iterator advances past it.
* Otherwise, it advances to the next value.
*
*
* @return Whether there is another element in the array.
* @error TAPE_ERROR If there is a comma missing between elements.
*/
@ -154,7 +154,7 @@ public:
/**
* Skips to the end of a JSON object or array.
*
*
* @return true if this was the end of an array, false if it was the end of an object.
*/
simdjson_warn_unused simdjson_really_inline error_code skip_container() noexcept;
@ -176,7 +176,7 @@ public:
/**
* Report an error, preventing further iteration.
*
*
* @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD.
* @param message An error message to report with the error.
*/
@ -191,13 +191,13 @@ protected:
ondemand::parser *parser{};
/**
* Next free location in the string buffer.
*
*
* Used by raw_json_string::unescape() to have a place to unescape strings to.
*/
uint8_t *current_string_buf_loc{};
/**
* JSON error, if there is one.
*
*
* INCORRECT_TYPE and NO_SUCH_FIELD are *not* stored here, ever.
*
* PERF NOTE: we *hope* this will be elided into control flow, as it is only used (a) in the first

View File

@ -18,7 +18,7 @@ namespace ondemand {
// Next state. In this state, depth > iter->depth, at_start == false, and error == SUCCESS.
//
// ## Error States
//
//
// In error states, we will yield exactly one more value before stopping. iter->depth == depth
// and at_start is always false. We decrement after yielding the error, moving to the Finished
// state.

View File

@ -11,7 +11,7 @@ class object {
public:
/**
* Create a new invalid object.
*
*
* Exists so you can declare a variable and later assign to it before use.
*/
simdjson_really_inline object() noexcept = default;
@ -57,7 +57,7 @@ protected:
json_iterator_ref iter{};
/**
* Whether we are at the start.
*
*
* PERF NOTE: this should be elided into inline control flow: it is only used for the first []
* or * call, and SSA optimizers commonly do first-iteration loop optimization.
*/

View File

@ -10,7 +10,7 @@ class object_iterator {
public:
/**
* Create a new invalid object_iterator.
*
*
* Exists so you can declare a variable and later assign to it before use.
*/
simdjson_really_inline object_iterator() noexcept = default;

View File

@ -24,7 +24,7 @@ simdjson_warn_unused simdjson_really_inline simdjson_result<document> parser::it
}
// Run stage 1.
SIMDJSON_TRY( dom_parser.stage1((const uint8_t *)buf.data(), buf.size(), false) );
SIMDJSON_TRY( dom_parser.stage1((const uint8_t *)buf.data(), buf.size(), false) );
return document::start(this);
}
@ -35,7 +35,7 @@ simdjson_warn_unused simdjson_really_inline simdjson_result<json_iterator> parse
}
// Run stage 1.
SIMDJSON_TRY( dom_parser.stage1((const uint8_t *)buf.data(), buf.size(), false) );
SIMDJSON_TRY( dom_parser.stage1((const uint8_t *)buf.data(), buf.size(), false) );
return json_iterator(this);
}

View File

@ -32,17 +32,17 @@ public:
/**
* Start iterating an on-demand JSON document.
*
*
* ondemand::parser parser;
* document doc = parser.iterate(json);
*
*
* ### IMPORTANT: Buffer Lifetime
*
*
* Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as
* long as the document iteration.
*
*
* ### IMPORTANT: Document Lifetime
*
*
* Only one iteration at a time can happen per parser, and the parser *must* be kept alive during
* iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before
* you call parse() again or destroy the parser.
@ -53,7 +53,7 @@ public:
* those bytes are initialized to, as long as they are allocated.
*
* @param json The JSON to parse.
*
*
* @return The document, or an error:
* - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory
* allocation fails.
@ -66,19 +66,19 @@ public:
simdjson_warn_unused simdjson_result<document> iterate(const std::string &json) & noexcept = delete;
/**
* @private
*
*
* Start iterating an on-demand JSON document.
*
*
* ondemand::parser parser;
* json_iterator doc = parser.iterate(json);
*
*
* ### IMPORTANT: Buffer Lifetime
*
*
* Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as
* long as the document iteration.
*
*
* ### IMPORTANT: Document Lifetime
*
*
* Only one iteration at a time can happen per parser, and the parser *must* be kept alive during
* iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before
* you call parse() again or destroy the parser.
@ -89,7 +89,7 @@ public:
* those bytes are initialized to, as long as they are allocated.
*
* @param json The JSON to parse.
*
*
* @return The iterator, or an error:
* - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory
* allocation fails.

View File

@ -13,11 +13,11 @@ class parser;
*
* (In other words, a pointer to the beginning of a string, just after the start quote, inside a
* JSON file.)
*
*
* This class is deliberately simplistic and has little functionality. You can
* compare two raw_json_string instances, or compare a raw_json_string with a string_view, but
* that is pretty much all you can do.
*
*
* They originate typically from field instance which in turn represent key-value pairs from
* object instances. From a field instance, you get the raw_json_string instance by calling key().
* You can, if you want a more usable string_view instance, call the unescaped_key() method
@ -27,7 +27,7 @@ class raw_json_string {
public:
/**
* Create a new invalid raw_json_string.
*
*
* Exists so you can declare a variable and later assign to it before use.
*/
simdjson_really_inline raw_json_string() noexcept = default;
@ -37,15 +37,15 @@ public:
/**
* Create a new invalid raw_json_string pointed at the given location in the JSON.
*
*
* The given location must be just *after* the beginning quote (") in the JSON file.
*
*
* It *must* be terminated by a ", and be a valid JSON string.
*/
simdjson_really_inline raw_json_string(const uint8_t * _buf) noexcept;
/**
* Get the raw pointer to the beginning of the string in the JSON (just after the ").
*
*
* It is possible for this function to return a null pointer if the instance
* has outlived its existence.
*/
@ -65,11 +65,11 @@ private:
/**
* Unescape this JSON string, replacing \\ with \, \n with newline, etc.
*
*
* ## IMPORTANT: string_view lifetime
*
*
* The string_view is only valid as long as the bytes in dst.
*
*
* @param dst A pointer to a buffer at least large enough to write this string as well as a \0.
* dst will be updated to the next unused location (just after the \0 written out at
* the end of this string).
@ -79,11 +79,11 @@ private:
simdjson_really_inline simdjson_warn_unused simdjson_result<std::string_view> unescape(uint8_t *&dst) const noexcept;
/**
* Unescape this JSON string, replacing \\ with \, \n with newline, etc.
*
*
* ## IMPORTANT: string_view lifetime
*
*
* The string_view is only valid until the next parse() call on the parser.
*
*
* @param iter A json_iterator, which contains a buffer where the string will be written.
*/
simdjson_really_inline simdjson_warn_unused simdjson_result<std::string_view> unescape(json_iterator &iter) const noexcept;

View File

@ -12,7 +12,7 @@ class token_iterator {
public:
/**
* Create a new invalid token_iterator.
*
*
* Exists so you can declare a variable and later assign to it before use.
*/
simdjson_really_inline token_iterator() noexcept = default;

View File

@ -17,7 +17,7 @@ class value {
public:
/**
* Create a new invalid value.
*
*
* Exists so you can declare a variable and later assign to it before use.
*/
simdjson_really_inline value() noexcept = default;
@ -36,7 +36,7 @@ public:
* Get this value as the given type.
*
* Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool
*
*
* @returns A value of the given type, parsed from the JSON.
* @returns INCORRECT_TYPE If the JSON value is not the given type.
*/
@ -48,7 +48,7 @@ public:
* Get this value as the given type.
*
* Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool
*
*
* @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized.
* @returns INCORRECT_TYPE If the JSON value is not an object.
* @returns SUCCESS If the parse succeeded and the out parameter was set to the value.
@ -109,7 +109,7 @@ public:
/**
* Cast this JSON value to a string.
*
*
* The string is guaranteed to be valid UTF-8.
*
* Equivalent to get<std::string_view>().
@ -124,7 +124,7 @@ public:
/**
* Cast this JSON value to a raw_json_string.
*
*
* The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n).
*
* @returns A pointer to the raw JSON for the given string.
@ -146,7 +146,7 @@ public:
/**
* Checks if this JSON value is null.
*
*
* @returns Whether the value is null.
*/
simdjson_really_inline bool is_null() && noexcept;
@ -197,7 +197,7 @@ public:
simdjson_really_inline operator double() & noexcept(false);
/**
* Cast this JSON value to a string.
*
*
* The string is guaranteed to be valid UTF-8.
*
* Equivalent to get<std::string_view>().
@ -211,7 +211,7 @@ public:
simdjson_really_inline operator std::string_view() & noexcept(false);
/**
* Cast this JSON value to a raw_json_string.
*
*
* The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n).
*
* @returns A pointer to the raw JSON for the given string.
@ -235,7 +235,7 @@ public:
* Begin array iteration.
*
* Part of the std::iterable interface.
*
*
* @returns INCORRECT_TYPE If the JSON value is not an array.
*/
simdjson_really_inline simdjson_result<array_iterator<value>> begin() & noexcept;

View File

@ -14,7 +14,7 @@ simdjson_really_inline int trailing_zeroes(uint64_t input_num) {
return (int)_tzcnt_u64(input_num);
#else // SIMDJSON_REGULAR_VISUAL_STUDIO
////////
// You might expect the next line to be equivalent to
// You might expect the next line to be equivalent to
// return (int)_tzcnt_u64(input_num);
// but the generated code differs and might be less efficient?
////////

View File

@ -132,11 +132,11 @@ namespace simd {
// next line just loads the 64-bit values thintable_epi8[mask1] and
// thintable_epi8[mask2] into a 128-bit register, using only
// two instructions on most compilers.
__m256i shufmask = _mm256_set_epi64x(thintable_epi8[mask4], thintable_epi8[mask3],
__m256i shufmask = _mm256_set_epi64x(thintable_epi8[mask4], thintable_epi8[mask3],
thintable_epi8[mask2], thintable_epi8[mask1]);
// we increment by 0x08 the second half of the mask and so forth
shufmask =
_mm256_add_epi8(shufmask, _mm256_set_epi32(0x18181818, 0x18181818,
_mm256_add_epi8(shufmask, _mm256_set_epi32(0x18181818, 0x18181818,
0x10101010, 0x10101010, 0x08080808, 0x08080808, 0, 0));
// this is the version "nearly pruned"
__m256i pruned = _mm256_shuffle_epi8(*this, shufmask);

View File

@ -72,12 +72,12 @@ public:
* @return the name of the implementation, e.g. "haswell", "westmere", "arm64"
*/
virtual const std::string &description() const { return _description; }
/**
* The instruction sets this implementation is compiled against
* and the current CPU match. This function may poll the current CPU/system
* and should therefore not be called too often if performance is a concern.
*
*
*
* @return true if the implementation can be safely used on the current system (determined at runtime)
*/
@ -123,9 +123,9 @@ public:
* @return the error code, or SUCCESS if there was no error.
*/
simdjson_warn_unused virtual error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept = 0;
/**
/**
* Validate the UTF-8 string.
*
* Overridden by each implementation.

View File

@ -26,7 +26,7 @@ public:
* @private For internal implementation use
*
* Run a full JSON parse on a single document (stage1 + stage2).
*
*
* Guaranteed only to be called when capacity > document length.
*
* Overridden by each implementation.
@ -41,7 +41,7 @@ public:
* @private For internal implementation use
*
* Stage 1 of the document parser.
*
*
* Guaranteed only to be called when capacity > document length.
*
* Overridden by each implementation.
@ -57,7 +57,7 @@ public:
* @private For internal implementation use
*
* Stage 2 of the document parser.
*
*
* Called after stage1().
*
* Overridden by each implementation.
@ -82,7 +82,7 @@ public:
/**
* Change the capacity of this parser.
*
*
* Generally used for reallocation.
*
* @param capacity The new capacity.

View File

@ -9,12 +9,12 @@ namespace internal {
* The smallest non-zero float (binary64) is 2^-1074.
* We take as input numbers of the form w x 10^q where w < 2^64.
* We have that w * 10^-343 < 2^(64-344) 5^-343 < 2^-1076.
* However, we have that
* However, we have that
* (2^64-1) * 10^-342 = (2^64-1) * 2^-342 * 5^-342 > 2^-1074.
* Thus it is possible for a number of the form w * 10^-342 where
* Thus it is possible for a number of the form w * 10^-342 where
* w is a 64-bit value to be a non-zero floating-point number.
*********
* Any number of form w * 10^309 where w>= 1 is going to be
* Any number of form w * 10^309 where w>= 1 is going to be
* infinite in binary64 so we never need to worry about powers
* of 5 greater than 308.
*/
@ -46,7 +46,7 @@ extern SIMDJSON_DLLIMPORTEXPORT const double power_of_ten[];
* are not a concern since they can be represented
* exactly using the binary notation, only the powers of five
* affect the binary significand.
*/
*/
// The truncated powers of five from 5^-342 all the way to 5^308

View File

@ -14,6 +14,6 @@ extern SIMDJSON_DLLIMPORTEXPORT const uint8_t pshufb_combine_table[272];
extern SIMDJSON_DLLIMPORTEXPORT const uint64_t thintable_epi8[256];
} // namespace internal
} // namespace simdjson
} // namespace simdjson
#endif // SIMDJSON_INTERNAL_SIMDPRUNE_TABLES_H

View File

@ -15,7 +15,7 @@ namespace simdjson {
*
* Minify the input string assuming that it represents a JSON string, does not parse or validate.
* This function is much faster than parsing a JSON string and then writing a minified version of it.
* However, it does not validate the input. It will merely return an error in simple cases (e.g., if
* However, it does not validate the input. It will merely return an error in simple cases (e.g., if
* there is a string that was never terminated).
*
*

View File

@ -12,11 +12,11 @@
namespace simdjson {
namespace internal {
// The allocate_padded_buffer function is a low-level function to allocate memory
// with padding so we can read past the "length" bytes safely. It is used by
// The allocate_padded_buffer function is a low-level function to allocate memory
// with padding so we can read past the "length" bytes safely. It is used by
// the padded_string class automatically. It returns nullptr in case
// of error: the caller should check for a null pointer.
// The length parameter is the maximum size in bytes of the string.
// The length parameter is the maximum size in bytes of the string.
// The caller is responsible to free the memory (e.g., delete[] (...)).
inline char *allocate_padded_buffer(size_t length) noexcept {
size_t totalpaddedlength = length + SIMDJSON_PADDING;
@ -24,8 +24,8 @@ inline char *allocate_padded_buffer(size_t length) noexcept {
if (padded_buffer == nullptr) {
return nullptr;
}
// We write zeroes in the padded region to avoid having uninitized
// garbage. If nothing else, garbage getting read might trigger a
// We write zeroes in the padded region to avoid having uninitized
// garbage. If nothing else, garbage getting read might trigger a
// warning in a memory checking.
std::memset(padded_buffer + length, 0, totalpaddedlength - length);
return padded_buffer;

View File

@ -145,11 +145,11 @@ inline simdjson::padded_string operator "" _padded(const char *str, size_t len)
namespace simdjson {
namespace internal {
// The allocate_padded_buffer function is a low-level function to allocate memory
// with padding so we can read past the "length" bytes safely. It is used by
// The allocate_padded_buffer function is a low-level function to allocate memory
// with padding so we can read past the "length" bytes safely. It is used by
// the padded_string class automatically. It returns nullptr in case
// of error: the caller should check for a null pointer.
// The length parameter is the maximum size in bytes of the string.
// The length parameter is the maximum size in bytes of the string.
// The caller is responsible to free the memory (e.g., delete[] (...)).
inline char *allocate_padded_buffer(size_t length) noexcept;

View File

@ -12,7 +12,7 @@ NO_SANITIZE_UNDEFINED
simdjson_really_inline int trailing_zeroes(uint64_t input_num) {
#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
unsigned long ret;
// Search the mask data from least significant bit (LSB)
// Search the mask data from least significant bit (LSB)
// to the most significant bit (MSB) for a set bit (1).
_BitScanForward64(&ret, input_num);
return (int)ret;
@ -30,7 +30,7 @@ simdjson_really_inline uint64_t clear_lowest_bit(uint64_t input_num) {
simdjson_really_inline int leading_zeroes(uint64_t input_num) {
#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
unsigned long leading_zero = 0;
// Search the mask data from most significant bit (MSB)
// Search the mask data from most significant bit (MSB)
// to least significant bit (LSB) for a set bit (1).
if (_BitScanReverse64(&leading_zero, input_num))
return (int)(63 - leading_zero);

View File

@ -298,7 +298,7 @@ namespace simd {
uint64_t r3 = this->chunks[3].to_bitmask() ;
return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48);
}
simdjson_really_inline uint64_t eq(const T m) const {
const simd8<T> mask = simd8<T>::splat(m);
return simd8x64<bool>(

View File

@ -12,7 +12,7 @@ def verifyContent(f,filename):
except UnicodeEncodeError as e:
#print(f"a: found problem {e} at line {linenumber+1} in {filename}:")
print(f"Found problem at line {linenumber+1} in {filename}:")
print(line.rstrip())
print(line.rstrip())
for col, char in enumerate(line.encode('utf-8')):
if char>=127:
offender=char
@ -27,7 +27,7 @@ def verifyContent(f,filename):
sys.exit(1)
for filename in sys.argv[1:]:
with open(filename,encoding='utf-8') as f:
#print(f"file {filename} was possible to open as utf-8")

View File

@ -2,7 +2,7 @@
SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
cd $SCRIPTPATH/..
make jsonstats
echo
echo
for i in $SCRIPTPATH/../jsonexamples/*.json; do
[ -f "$i" ] || break
echo $i

View File

@ -2,7 +2,7 @@
SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
cd $SCRIPTPATH/..
make minifiercompetition
echo
echo
for i in $SCRIPTPATH/../jsonexamples/*.json; do
[ -f "$i" ] || break
echo $i

View File

@ -3,7 +3,7 @@ SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
cd $SCRIPTPATH/..
make parseandstatcompetition
echo "parsing and collecting basic stats on json documents as quickly as possible"
echo
echo
for i in $SCRIPTPATH/../jsonexamples/*.json; do
[ -f "$i" ] || break
echo $i
@ -13,7 +13,7 @@ done
make distinctuseridcompetition
echo "parsing and finding all user.id"
echo
echo
for i in $SCRIPTPATH/../jsonexamples/twitter.json; do
[ -f "$i" ] || break

View File

@ -2,7 +2,7 @@
SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
cd $SCRIPTPATH/..
make parsingcompetition
echo
echo
for i in $SCRIPTPATH/../jsonexamples/*.json; do
[ -f "$i" ] || break
echo $i

View File

@ -10,7 +10,7 @@ os=$(uname)
make parsingcompetition allparsingcompetition
echo "parsing (with competition)"
echo
echo
for i in $SCRIPTPATH/../jsonexamples/*.json; do
[ -f "$i" ] || break
echo $i
@ -23,4 +23,4 @@ done
echo "see results in "$datadirectory
cd $datadirectory && gnuplot bar.gnuplot
cd $datadirectory && gnuplot bar.gnuplot

View File

@ -47,7 +47,7 @@ fi
make parsingcompetition
echo "parsing (with competition)"
echo
echo
for i in $SCRIPTPATH/../jsonexamples/*.json; do
[ -f "$i" ] || break
echo $i
@ -64,7 +64,7 @@ done
make parseandstatcompetition
echo "parsing and collecting basic stats on json documents as quickly as possible"
echo
echo
for i in $SCRIPTPATH/../jsonexamples/*.json; do
[ -f "$i" ] || break
echo $i
@ -80,7 +80,7 @@ done
make distinctuseridcompetition
echo "parsing and finding all user.id"
echo
echo
for i in $SCRIPTPATH/../jsonexamples/twitter.json; do
[ -f "$i" ] || break

View File

@ -2,7 +2,7 @@
SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
cd $SCRIPTPATH/..
make parsingcompetition
echo
echo
for i in "$SCRIPTPATH/../jsonexamples/twitter.json" "$SCRIPTPATH/../jsonexamples/update-center.json" "$SCRIPTPATH/../jsonexamples/github_events.json" "$SCRIPTPATH/../jsonexamples/gsoc-2018.json" ; do
[ -f "$i" ] || break
echo $i

View File

@ -3,7 +3,7 @@
#######
# taken from http://hbfs.wordpress.com/2013/06/18/fast-path-finding-part-ii/
# might require sudo apt-get install cpufrequtils
# invoke with performance or ondemand
# invoke with performance or ondemand
# type cpufreq-info to check results, you can also verify with cat /proc/cpuinfo
# enumerate found CPUs
cpus=$( grep processor /proc/cpuinfo | cut -d: -f 2 )
@ -23,7 +23,7 @@ else
exit -1
fi
echo "chosen policy " $1
echo "chosen policy " $1
# set governor for each CPU
#
for cpu in ${cpus[@]}

View File

@ -2,5 +2,5 @@ cd "${0%/*}"
export CXX=g++-7
export CC=gcc-7
#./powerpolicy.sh performance
./disablehyperthreading.sh
./disablehyperthreading.sh
./turboboost.sh on

View File

@ -18,7 +18,7 @@ do
echo -n "| $file Cycles | $file Instructions | $file Missed Branches "
done
echo "|"
git checkout jkeiser/lookup2_simpler_intel
make parse
report_perf lookup2 "$@"

View File

@ -1,4 +1,4 @@
/* auto-generated on 2020-11-03 06:07:17 +0100. Do not edit! */
/* auto-generated on 2020-11-03 21:40:10 +0100. Do not edit! */
/* begin file src/simdjson.cpp */
#include "simdjson.h"
@ -960,7 +960,7 @@ namespace simdjson {
namespace internal {
/**
* The code in the internal::from_chars function is meant to handle the floating-point number parsing
* The code in the internal::from_chars function is meant to handle the floating-point number parsing
* when we have more than 19 digits in the decimal mantissa. This should only be seen
* in adversarial scenarios: we do not expect production systems to even produce
* such floating-point numbers.
@ -1032,7 +1032,7 @@ decimal parse_decimal(const char *&p) noexcept {
while (is_integer(*p)) {
if (answer.num_digits < max_digits) {
answer.digits[answer.num_digits] = uint8_t(*p - '0');
}
}
answer.num_digits++;
++p;
}
@ -1049,7 +1049,7 @@ decimal parse_decimal(const char *&p) noexcept {
while (is_integer(*p)) {
if (answer.num_digits < max_digits) {
answer.digits[answer.num_digits] = uint8_t(*p - '0');
}
}
answer.num_digits++;
++p;
}
@ -1314,21 +1314,21 @@ template <typename binary> adjusted_mantissa compute_float(decimal &d) {
}
// At this point, going further, we can assume that d.num_digits > 0.
// We want to guard against excessive decimal point values because
// they can result in long running times. Indeed, we do
// they can result in long running times. Indeed, we do
// shifts by at most 60 bits. We have that log(10**400)/log(2**60) ~= 22
// which is fine, but log(10**299995)/log(2**60) ~= 16609 which is not
// fine (runs for a long time).
//
if(d.decimal_point < -324) {
// We have something smaller than 1e-324 which is always zero
// in binary64 and binary32.
// in binary64 and binary32.
// It should be zero.
answer.power2 = 0;
answer.mantissa = 0;
return answer;
} else if(d.decimal_point >= 310) {
// We have something at least as large as 0.1e310 which is
// always infinite.
// always infinite.
answer.power2 = binary::infinite_power();
answer.mantissa = 0;
return answer;
@ -1690,7 +1690,7 @@ SIMDJSON_DLLIMPORTEXPORT const double power_of_ten[] = {
* are not a concern since they can be represented
* exactly using the binary notation, only the powers of five
* affect the binary significand.
*/
*/
// The truncated powers of five from 5^-342 all the way to 5^308
@ -2927,7 +2927,7 @@ using namespace simd;
}
// The only problem that can happen at EOF is that a multibyte character is too short
// or a byte value too large in the last bytes: check_special_cases only checks for bytes
// or a byte value too large in the last bytes: check_special_cases only checks for bytes
// too large in the first of two bytes.
simdjson_really_inline void check_eof() {
// If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't
@ -3229,7 +3229,7 @@ namespace stage1 {
* We seek to identify pseudo-structural characters. Anything that is inside
* a string must be omitted (hence & ~_string.string_tail()).
* Otherwise, pseudo-structural characters come in two forms.
* 1. We have the structural characters ([,],{,},:, comma). The
* 1. We have the structural characters ([,],{,},:, comma). The
* term 'structural character' is from the JSON RFC.
* 2. We have the 'scalar pseudo-structural characters'.
* Scalars are quotes, and any character except structural characters and white space.
@ -3439,7 +3439,7 @@ error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, s
// Index the last (remainder) block, padded with spaces
uint8_t block[STEP_SIZE];
size_t remaining_bytes = reader.get_remainder(block);
size_t remaining_bytes = reader.get_remainder(block);
if (remaining_bytes > 0) {
// We do not want to write directly to the output stream. Rather, we write
// to a local buffer (for safety).
@ -4220,7 +4220,7 @@ namespace stage2 {
struct tape_writer {
/** The next place to write to tape */
uint64_t *next_tape_loc;
/** Write a signed 64-bit value to tape. */
simdjson_really_inline void append_s64(int64_t value) noexcept;
@ -5007,7 +5007,7 @@ simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_
}
// credit: based on code from Google Fuchsia (Apache Licensed)
simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
const uint8_t *data = (const uint8_t *)buf;
uint64_t pos = 0;
uint32_t code_point = 0;
@ -5493,7 +5493,7 @@ namespace stage2 {
struct tape_writer {
/** The next place to write to tape */
uint64_t *next_tape_loc;
/** Write a signed 64-bit value to tape. */
simdjson_really_inline void append_s64(int64_t value) noexcept;
@ -6015,7 +6015,7 @@ simdjson_really_inline json_character_block json_character_block::classify(const
_mm256_shuffle_epi8(op_table, in.chunks[0]),
_mm256_shuffle_epi8(op_table, in.chunks[1])
});
return { whitespace, op };
}
@ -6186,7 +6186,7 @@ using namespace simd;
}
// The only problem that can happen at EOF is that a multibyte character is too short
// or a byte value too large in the last bytes: check_special_cases only checks for bytes
// or a byte value too large in the last bytes: check_special_cases only checks for bytes
// too large in the first of two bytes.
simdjson_really_inline void check_eof() {
// If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't
@ -6488,7 +6488,7 @@ namespace stage1 {
* We seek to identify pseudo-structural characters. Anything that is inside
* a string must be omitted (hence & ~_string.string_tail()).
* Otherwise, pseudo-structural characters come in two forms.
* 1. We have the structural characters ([,],{,},:, comma). The
* 1. We have the structural characters ([,],{,},:, comma). The
* term 'structural character' is from the JSON RFC.
* 2. We have the 'scalar pseudo-structural characters'.
* Scalars are quotes, and any character except structural characters and white space.
@ -6698,7 +6698,7 @@ error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, s
// Index the last (remainder) block, padded with spaces
uint8_t block[STEP_SIZE];
size_t remaining_bytes = reader.get_remainder(block);
size_t remaining_bytes = reader.get_remainder(block);
if (remaining_bytes > 0) {
// We do not want to write directly to the output stream. Rather, we write
// to a local buffer (for safety).
@ -7478,7 +7478,7 @@ namespace stage2 {
struct tape_writer {
/** The next place to write to tape */
uint64_t *next_tape_loc;
/** Write a signed 64-bit value to tape. */
simdjson_really_inline void append_s64(int64_t value) noexcept;
@ -8161,7 +8161,7 @@ using namespace simd;
}
// The only problem that can happen at EOF is that a multibyte character is too short
// or a byte value too large in the last bytes: check_special_cases only checks for bytes
// or a byte value too large in the last bytes: check_special_cases only checks for bytes
// too large in the first of two bytes.
simdjson_really_inline void check_eof() {
// If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't
@ -8463,7 +8463,7 @@ namespace stage1 {
* We seek to identify pseudo-structural characters. Anything that is inside
* a string must be omitted (hence & ~_string.string_tail()).
* Otherwise, pseudo-structural characters come in two forms.
* 1. We have the structural characters ([,],{,},:, comma). The
* 1. We have the structural characters ([,],{,},:, comma). The
* term 'structural character' is from the JSON RFC.
* 2. We have the 'scalar pseudo-structural characters'.
* Scalars are quotes, and any character except structural characters and white space.
@ -8673,7 +8673,7 @@ error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, s
// Index the last (remainder) block, padded with spaces
uint8_t block[STEP_SIZE];
size_t remaining_bytes = reader.get_remainder(block);
size_t remaining_bytes = reader.get_remainder(block);
if (remaining_bytes > 0) {
// We do not want to write directly to the output stream. Rather, we write
// to a local buffer (for safety).
@ -9454,7 +9454,7 @@ namespace stage2 {
struct tape_writer {
/** The next place to write to tape */
uint64_t *next_tape_loc;
/** Write a signed 64-bit value to tape. */
simdjson_really_inline void append_s64(int64_t value) noexcept;
@ -10173,7 +10173,7 @@ using namespace simd;
}
// The only problem that can happen at EOF is that a multibyte character is too short
// or a byte value too large in the last bytes: check_special_cases only checks for bytes
// or a byte value too large in the last bytes: check_special_cases only checks for bytes
// too large in the first of two bytes.
simdjson_really_inline void check_eof() {
// If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't
@ -10475,7 +10475,7 @@ namespace stage1 {
* We seek to identify pseudo-structural characters. Anything that is inside
* a string must be omitted (hence & ~_string.string_tail()).
* Otherwise, pseudo-structural characters come in two forms.
* 1. We have the structural characters ([,],{,},:, comma). The
* 1. We have the structural characters ([,],{,},:, comma). The
* term 'structural character' is from the JSON RFC.
* 2. We have the 'scalar pseudo-structural characters'.
* Scalars are quotes, and any character except structural characters and white space.
@ -10685,7 +10685,7 @@ error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, s
// Index the last (remainder) block, padded with spaces
uint8_t block[STEP_SIZE];
size_t remaining_bytes = reader.get_remainder(block);
size_t remaining_bytes = reader.get_remainder(block);
if (remaining_bytes > 0) {
// We do not want to write directly to the output stream. Rather, we write
// to a local buffer (for safety).
@ -11465,7 +11465,7 @@ namespace stage2 {
struct tape_writer {
/** The next place to write to tape */
uint64_t *next_tape_loc;
/** Write a signed 64-bit value to tape. */
simdjson_really_inline void append_s64(int64_t value) noexcept;

File diff suppressed because it is too large Load Diff

View File

@ -244,7 +244,7 @@ simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_
}
// credit: based on code from Google Fuchsia (Apache Licensed)
simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
const uint8_t *data = (const uint8_t *)buf;
uint64_t pos = 0;
uint32_t code_point = 0;

View File

@ -4,7 +4,7 @@ namespace simdjson {
namespace internal {
/**
* The code in the internal::from_chars function is meant to handle the floating-point number parsing
* The code in the internal::from_chars function is meant to handle the floating-point number parsing
* when we have more than 19 digits in the decimal mantissa. This should only be seen
* in adversarial scenarios: we do not expect production systems to even produce
* such floating-point numbers.
@ -76,7 +76,7 @@ decimal parse_decimal(const char *&p) noexcept {
while (is_integer(*p)) {
if (answer.num_digits < max_digits) {
answer.digits[answer.num_digits] = uint8_t(*p - '0');
}
}
answer.num_digits++;
++p;
}
@ -93,7 +93,7 @@ decimal parse_decimal(const char *&p) noexcept {
while (is_integer(*p)) {
if (answer.num_digits < max_digits) {
answer.digits[answer.num_digits] = uint8_t(*p - '0');
}
}
answer.num_digits++;
++p;
}
@ -358,21 +358,21 @@ template <typename binary> adjusted_mantissa compute_float(decimal &d) {
}
// At this point, going further, we can assume that d.num_digits > 0.
// We want to guard against excessive decimal point values because
// they can result in long running times. Indeed, we do
// they can result in long running times. Indeed, we do
// shifts by at most 60 bits. We have that log(10**400)/log(2**60) ~= 22
// which is fine, but log(10**299995)/log(2**60) ~= 16609 which is not
// fine (runs for a long time).
//
if(d.decimal_point < -324) {
// We have something smaller than 1e-324 which is always zero
// in binary64 and binary32.
// in binary64 and binary32.
// It should be zero.
answer.power2 = 0;
answer.mantissa = 0;
return answer;
} else if(d.decimal_point >= 310) {
// We have something at least as large as 0.1e310 which is
// always infinite.
// always infinite.
answer.power2 = binary::infinite_power();
answer.mantissa = 0;
return answer;

View File

@ -69,7 +69,7 @@ error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, s
// Index the last (remainder) block, padded with spaces
uint8_t block[STEP_SIZE];
size_t remaining_bytes = reader.get_remainder(block);
size_t remaining_bytes = reader.get_remainder(block);
if (remaining_bytes > 0) {
// We do not want to write directly to the output stream. Rather, we write
// to a local buffer (for safety).

View File

@ -9,7 +9,7 @@ namespace stage1 {
* We seek to identify pseudo-structural characters. Anything that is inside
* a string must be omitted (hence & ~_string.string_tail()).
* Otherwise, pseudo-structural characters come in two forms.
* 1. We have the structural characters ([,],{,},:, comma). The
* 1. We have the structural characters ([,],{,},:, comma). The
* term 'structural character' is from the JSON RFC.
* 2. We have the 'scalar pseudo-structural characters'.
* Scalars are quotes, and any character except structural characters and white space.

View File

@ -93,7 +93,7 @@ using namespace simd;
static const int TOO_LARGE = 0x10; // 11110100 (1001|101_)____
static const int TOO_LARGE_2 = 0x20; // 1111(1___|011_|0101) 10______
// New with lookup3. We want to catch the case where an non-continuation
// New with lookup3. We want to catch the case where an non-continuation
// follows a leading byte
static const int TOO_SHORT_2_3_4 = 0x40; // (110_|1110|1111) ____ (0___|110_|1111) ____
// We also want to catch a continuation that is preceded by an ASCII byte
@ -226,7 +226,7 @@ using namespace simd;
this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
this->check_utf8_bytes(input.chunks[3], input.chunks[2]);
}
}
this->prev_incomplete = is_incomplete(input.chunks[simd8x64<uint8_t>::NUM_CHUNKS-1]);
this->prev_input_block = input.chunks[simd8x64<uint8_t>::NUM_CHUNKS-1];
}

View File

@ -141,7 +141,7 @@ using namespace simd;
}
// The only problem that can happen at EOF is that a multibyte character is too short
// or a byte value too large in the last bytes: check_special_cases only checks for bytes
// or a byte value too large in the last bytes: check_special_cases only checks for bytes
// too large in the first of two bytes.
simdjson_really_inline void check_eof() {
// If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't

View File

@ -11,7 +11,7 @@ namespace utf8_validation {
// are straight up concatenated into the final value. The first byte of a multibyte character is a
// "leading byte" and starts with N 1's, where N is the total number of bytes (110_____ = 2 byte
// lead). The remaining bytes of a multibyte character all start with 10. 1-byte characters just
// start with 0, because that's what ASCII looks like. Here's what each size
// start with 0, because that's what ASCII looks like. Here's what each size
//
// - ASCII (7 bits): 0_______
// - 2 byte character (11 bits): 110_____ 10______
@ -52,9 +52,9 @@ namespace utf8_validation {
// support values with more than 23 bits (which a 4-byte character supports).
//
// e.g. 11111000 10100000 10000000 10000000 10000000 (U+800000)
//
//
// Legal utf-8 byte sequences per http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf - page 94:
//
//
// Code Points 1st 2s 3s 4s
// U+0000..U+007F 00..7F
// U+0080..U+07FF C2..DF 80..BF

View File

@ -14,7 +14,7 @@ namespace SIMDJSON_IMPLEMENTATION {
// are straight up concatenated into the final value. The first byte of a multibyte character is a
// "leading byte" and starts with N 1's, where N is the total number of bytes (110_____ = 2 byte
// lead). The remaining bytes of a multibyte character all start with 10. 1-byte characters just
// start with 0, because that's what ASCII looks like. Here's what each size
// start with 0, because that's what ASCII looks like. Here's what each size
//
// | Character Length | UTF-8 Byte Sequence |
// |-----------------------------|---------------------------------------|
@ -69,7 +69,7 @@ namespace SIMDJSON_IMPLEMENTATION {
// e.g. `11101101 10100000 10000000` (U+D800)
//
// ### 5+ byte characters
//
//
// INVALID_5_BYTE: 5-byte, 6-byte, 7-byte and 8-byte characters are unsupported; Unicode does not
// support values with more than 23 bits (which a 4-byte character supports).
//
@ -77,9 +77,9 @@ namespace SIMDJSON_IMPLEMENTATION {
// Unicode max value), or overlong (could fit in 4+ bytes).
//
// e.g. `11111000 10100000 10000000 10000000 10000000` (U+800000)
//
//
// Legal utf-8 byte sequences per http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf - page 94:
//
//
// | Code Points | 1st | 2nd | 3s | 4s |
// |--------------------|--------|--------|--------|--------|
// | U+0000..U+007F | 00..7F | | | |
@ -267,7 +267,7 @@ struct utf8_checker {
// Look up error masks for three consecutive nibbles. We need to
// AND with 0x0F for each one, because vpshufb has the neat
// "feature" that negative values in an index byte will result in
// "feature" that negative values in an index byte will result in
// a zero.
simd8<uint8_t> nibble_1_error = shifted_bytes.shr<4>().lookup_16<uint8_t>(
0, 0, 0, 0,
@ -294,7 +294,7 @@ struct utf8_checker {
TOO_LARGE_2, // 1111[0101..1111] ________ > U+10FFFF
TOO_LARGE_2,
TOO_LARGE_2,
TOO_LARGE_2,
TOO_LARGE_2,
TOO_LARGE_2,

View File

@ -9,7 +9,7 @@ public:
uint32_t *next_structural;
dom_parser_implementation &dom_parser;
// Start a structural
// Start a structural
simdjson_really_inline structural_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index)
: buf{_dom_parser.buf},
next_structural{&_dom_parser.structural_indexes[start_structural_index]},

View File

@ -6,7 +6,7 @@ namespace stage2 {
struct tape_writer {
/** The next place to write to tape */
uint64_t *next_tape_loc;
/** Write a signed 64-bit value to tape. */
simdjson_really_inline void append_s64(int64_t value) noexcept;

View File

@ -78,7 +78,7 @@ simdjson_really_inline json_character_block json_character_block::classify(const
_mm256_shuffle_epi8(op_table, in.chunks[0]),
_mm256_shuffle_epi8(op_table, in.chunks[1])
});
return { whitespace, op };
}

View File

@ -18,7 +18,7 @@ SIMDJSON_DLLIMPORTEXPORT const double power_of_ten[] = {
* are not a concern since they can be represented
* exactly using the binary notation, only the powers of five
* affect the binary significand.
*/
*/
// The truncated powers of five from 5^-342 all the way to 5^308

View File

@ -19,7 +19,7 @@
/**
* Some systems have bad floating-point parsing. We want to exclude them.
*/
#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined (__linux__) || defined (__APPLE__) || defined(__FreeBSD__)
#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined (__linux__) || defined (__APPLE__) || defined(__FreeBSD__)
// Finally, we want to exclude legacy 32-bit systems.
#ifndef SIMDJSON_IS_32BITS
// So we only run some of the floating-point tests under 64-bit linux, apple, regular visual studio, freebsd.
@ -35,7 +35,7 @@ namespace number_tests {
bool ground_truth() {
std::cout << __func__ << std::endl;
std::pair<std::string,double> ground_truth[] = {
{x1.03ae05e8fca1cp+63},
{x1.03ae05e8fca1cp+63},
{"2.2250738585072013e-308",0x1p-1022},
{"-92666518056446206563E3", -0x1.39f764644154dp+76},
{"-92666518056446206563E3", -0x1.39f764644154dp+76},
@ -128,7 +128,7 @@ namespace number_tests {
if (n >= sizeof(buf)) { abort(); }
double actual;
auto error = parser.parse(buf, n).get(actual);
if (error) { std::cerr << error << std::endl; return false; }
if (error) { std::cerr << error << std::endl; return false; }
if(actual!=expected) {
std::cerr << "JSON '" << buf << " parsed to ";
fprintf( stderr," %18.18g instead of %18.18g\n", actual, expected); // formatting numbers is easier with printf
@ -283,7 +283,7 @@ namespace number_tests {
bool specific_tests() {
std::cout << __func__ << std::endl;
return basic_test_64bit("-2402844368454405395.2",-2402844368454405395.2) &&
return basic_test_64bit("-2402844368454405395.2",-2402844368454405395.2) &&
basic_test_64bit("4503599627370496.5", 4503599627370496.5) &&
basic_test_64bit("4503599627475352.5", 4503599627475352.5) &&
basic_test_64bit("4503599627475353.5", 4503599627475353.5) &&
@ -322,7 +322,7 @@ namespace parse_api_tests {
std::cout << "Running " << __func__ << std::endl;
typedef std::tuple<std::string, std::unique_ptr<parser>,element> simdjson_tuple;
std::vector<simdjson_tuple> results;
std::vector<std::string> my_data = {"[1,2,3]", "[1,2,3]", "[1,2,3]"};
std::vector<std::string> my_data = {"[1,2,3]", "[1,2,3]", "[1,2,3]"};
for (std::string s : my_data) {
std::unique_ptr<dom::parser> parser(new dom::parser{});
@ -1311,7 +1311,7 @@ namespace type_tests {
#else
// We don't trust the underlying system so we only run the test_cast
// exact test when the expected_value is within the 53-bit range.
&& ((expected_value<-9007199254740992) || (expected_value>9007199254740992) || test_cast<double>(result, static_cast<double>(expected_value)))
&& ((expected_value<-9007199254740992) || (expected_value>9007199254740992) || test_cast<double>(result, static_cast<double>(expected_value)))
#endif
&& test_cast_error<bool>(result, INCORRECT_TYPE)
&& test_is_null(result, false);
@ -1338,7 +1338,7 @@ namespace type_tests {
#else
// We don't trust the underlying system so we only run the test_cast
// exact test when the expected_value is within the 53-bit range.
&& ((expected_value>9007199254740992) || test_cast<double>(result, static_cast<double>(expected_value)))
&& ((expected_value>9007199254740992) || test_cast<double>(result, static_cast<double>(expected_value)))
#endif
&& test_cast_error<bool>(result, INCORRECT_TYPE)
&& test_is_null(result, false);

View File

@ -11,10 +11,10 @@ bool single_document() {
#if COMPILATION_TEST_USE_FAILING_CODE
auto error = parser.parse_many(json).get(R"({"hello": "world"})"_padded);
#else
#else
auto json = R"({"hello": "world"})"_padded;
auto error = parser.parse_many(json).get(stream);
#endif
#endif
if(error) {
std::cerr << error << std::endl;
return false;

View File

@ -178,7 +178,7 @@ namespace document_stream_tests {
}
return true;
}
#endif
#endif
bool large_window() {
std::cout << "Running " << __func__ << std::endl;
@ -310,11 +310,11 @@ namespace document_stream_tests {
}
bool run() {
return test_current_index() &&
return test_current_index() &&
single_document() &&
#if SIMDJSON_EXCEPTIONS
single_document_exceptions() &&
issue1133() &&
issue1133() &&
#endif
#ifdef SIMDJSON_THREADS_ENABLED
threaded_disabled() &&

View File

@ -191,8 +191,8 @@ namespace adversarial {
int main() {
// this is put here deliberately to check that the documentation is correct (README),
// should this fail to compile, you should update the documentation:
if (simdjson::active_implementation->name() == "unsupported") {
printf("unsupported CPU\n");
if (simdjson::active_implementation->name() == "unsupported") {
printf("unsupported CPU\n");
}
std::cout << "Running error tests." << std::endl;
if (!(true

View File

@ -26,9 +26,9 @@ void found_unsigned_integer(uint64_t result, const uint8_t *buf);
/**
* Some systems have bad floating-point parsing. We want to exclude them.
*/
#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined (__linux__) || defined (__APPLE__) || defined(__FreeBSD__)
#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined (__linux__) || defined (__APPLE__) || defined(__FreeBSD__)
// Ok. So under Visual Studio, linux, apple and freebsd systems, we have a good chance of having a decent
// enough strtod. It is not certain, but it is maybe a good enough heuristics. We exclude systems like msys2
// enough strtod. It is not certain, but it is maybe a good enough heuristics. We exclude systems like msys2
// or cygwin.
//
// Finally, we want to exclude legacy 32-bit systems.
@ -37,7 +37,7 @@ void found_unsigned_integer(uint64_t result, const uint8_t *buf);
#define TEST_FLOATS
// Apple and freebsd need a special header, typically.
#if defined __APPLE__ || defined(__FreeBSD__)
# include <xlocale.h>
# include <xlocale.h>
#endif
#endif
@ -84,7 +84,7 @@ void found_invalid_number(const uint8_t *buf) {
#else
static locale_t c_locale = newlocale(LC_ALL_MASK, "C", NULL);
double expected = strtod_l((const char *)buf, &endptr, c_locale);
#endif
#endif
if (endptr != (const char *)buf) {
if (!is_in_bad_list((const char *)buf)) {
printf("Warning: found_invalid_number %.32s whereas strtod parses it to "
@ -140,7 +140,7 @@ void found_float(double result, const uint8_t *buf) {
#else
static locale_t c_locale = newlocale(LC_ALL_MASK, "C", NULL);
double expected = strtod_l((const char *)buf, &endptr, c_locale);
#endif
#endif
if (endptr == (const char *)buf) {
fprintf(stderr,
"parsed %f from %.32s whereas strtod refuses to parse a float, ",

View File

@ -105,7 +105,7 @@ namespace key_string_tests {
}
#endif
bool run() {
return
return
#if SIMDJSON_EXCEPTIONS
parser_key_value() &&
#endif
@ -124,7 +124,7 @@ namespace active_tests {
ondemand::object parent = doc["parent"];
{
ondemand::object c1 = parent["child1"];
if(std::string_view(c1["name"]) != "John") { return false; }
if(std::string_view(c1["name"]) != "John") { return false; }
}
{
ondemand::object c2 = parent["child2"];
@ -161,7 +161,7 @@ namespace active_tests {
}
#endif
bool run() {
return
return
#if SIMDJSON_EXCEPTIONS
parser_child() &&
parser_doc_correct() &&
@ -818,7 +818,7 @@ namespace ordering_tests {
y += double(point_object["y"]);
z += double(point_object["z"]);
}
return (x == 1.1) && (y == 2.2) && (z == 3.3);
return (x == 1.1) && (y == 2.2) && (z == 3.3);
}
bool out_of_order() {
@ -839,7 +839,7 @@ namespace ordering_tests {
return false;
} catch(simdjson_error&) {}
}
return (x == 0) && (y == 0) && (z == 3.3);
return (x == 0) && (y == 0) && (z == 3.3);
}
bool robust_order() {
@ -856,7 +856,7 @@ namespace ordering_tests {
else if (field.key() == "y") { y += double(field.value()); }
}
}
return (x == 1.1) && (y == 2.2) && (z == 3.3);
return (x == 1.1) && (y == 2.2) && (z == 3.3);
}
#endif

View File

@ -1,5 +1,5 @@
/***************
* We refer the programmer to
* We refer the programmer to
* JavaScript Object Notation (JSON) Pointer
* https://tools.ietf.org/html/rfc6901
*/
@ -165,7 +165,7 @@ bool issue1142() {
ASSERT_EQUAL(std::string("1"), simdjson::minify(e0))
auto o = dom::array(example).at(2).at_pointer("");
ASSERT_EQUAL(std::string(R"({"1":"bla"})"), simdjson::minify(o))
std::string_view s0 = dom::array(example).at(2).at_pointer("/1").at_pointer("");
std::string_view s0 = dom::array(example).at(2).at_pointer("/1").at_pointer("");
if(s0 != "bla") {
std::cerr << s0 << std::endl;
return false;

View File

@ -14,9 +14,9 @@
/**
* Some systems have bad floating-point parsing. We want to exclude them.
*/
#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined (__linux__) || defined (__APPLE__) || defined(__FreeBSD__)
#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined (__linux__) || defined (__APPLE__) || defined(__FreeBSD__)
// Ok. So under Visual Studio, linux, apple and freebsd systems, we have a good chance of having a decent
// enough strtod. It is not certain, but it is maybe a good enough heuristics. We exclude systems like msys2
// enough strtod. It is not certain, but it is maybe a good enough heuristics. We exclude systems like msys2
// or cygwin.
//
// Finally, we want to exclude legacy 32-bit systems.
@ -25,7 +25,7 @@
#define TEST_FLOATS
// Apple and freebsd need a special header, typically.
#if defined __APPLE__ || defined(__FreeBSD__)
# include <xlocale.h>
# include <xlocale.h>
#endif
#endif
@ -35,7 +35,7 @@
struct RandomEngine {
RandomEngine() = delete;
RandomEngine(uint32_t seed) : one_zero_generator(0,1), digit_generator(0,9), nonzero_digit_generator(1,9), digit_count_generator (1,40),exp_count_generator (1,3), generator(seed) {}
RandomEngine(uint32_t seed) : one_zero_generator(0,1), digit_generator(0,9), nonzero_digit_generator(1,9), digit_count_generator (1,40),exp_count_generator (1,3), generator(seed) {}
std::uniform_int_distribution<int> one_zero_generator;
std::uniform_int_distribution<int> digit_generator;
std::uniform_int_distribution<int> nonzero_digit_generator;
@ -62,7 +62,7 @@ size_t build_random_string(RandomEngine &rand, char *buffer) {
for (size_t i = 0; i < number_of_digits; i++) {
if (i == location_of_decimal_separator) {
buffer[pos++] = '.';
}
}
if (( i == 0) && (location_of_decimal_separator != 1)) {
buffer[pos++] = char(rand.next_nonzero_digit() + '0');
} else {
@ -111,7 +111,7 @@ bool check_float(double result, const char *buf) {
#else
static locale_t c_locale = newlocale(LC_ALL_MASK, "C", NULL);
double expected = strtod_l((const char *)buf, &endptr, c_locale);
#endif
#endif
if (endptr == (const char *)buf) {
fprintf(stderr,
"parsed %f from %.32s whereas strtod refuses to parse a float, ",
@ -137,13 +137,13 @@ bool tester(int seed, size_t volume) {
char buffer[1024]; // large buffer (can't overflow)
simdjson::dom::parser parser;
RandomEngine rand(seed);
double result;
double result;
for (size_t i = 0; i < volume; i++) {
if((i%100000) == 0) { std::cout << "."; std::cout.flush(); }
size_t length = build_random_string(rand, buffer);
auto error = parser.parse(buffer, length).get(result);
// When we parse a (finite) number, it better match strtod.
if ((!error) && (!check_float(result, buffer))) { return false; }
if ((!error) && (!check_float(result, buffer))) { return false; }
}
return true;
}

View File

@ -95,7 +95,7 @@ simdjson_really_inline bool assert_true(bool value, const char *operation = "res
#define ASSERT_SUCCESS(ACTUAL) do { if (!::assert_success((ACTUAL), #ACTUAL)) { return false; } } while (0);
#define ASSERT_ERROR(ACTUAL, EXPECTED) do { if (!::assert_error ((ACTUAL), (EXPECTED), #ACTUAL)) { return false; } } while (0);
#define ASSERT_TRUE(ACTUAL) do { if (!::assert_true ((ACTUAL), #ACTUAL)) { return false; } } while (0);
#define ASSERT(ACTUAL, MESSAGE) do { if (!::assert_true ((ACTUAL), (MESSAGE))) { return false; } } while (0);
#define ASSERT(ACTUAL, MESSAGE) do { if (!::assert_true ((ACTUAL), (MESSAGE))) { return false; } } while (0);
#define RUN_TEST(ACTUAL) do { if (!(ACTUAL)) { return false; } } while (0);
#define TEST_FAIL(MESSAGE) do { std::cerr << "FAIL: " << (MESSAGE) << std::endl; return false; } while (0);
#define TEST_SUCCEED() do { return true; } while (0);

View File

@ -9,7 +9,7 @@ SIMDJSON_PUSH_DISABLE_ALL_WARNINGS
#include "cxxopts.hpp"
SIMDJSON_POP_DISABLE_WARNINGS
#if CXXOPTS__VERSION_MAJOR < 3
#if CXXOPTS__VERSION_MAJOR < 3
int main(int argc, char *argv[]) {
#else
int main(int argc, const char *argv[]) {

Some files were not shown because too many files have changed in this diff Show More