Change parse_json return type from bool to int (#82)
* Added simdjerr namespace * Updated jsonparser files * updated stage1 and stage2 * removed stage2 inline function * Added forgotten return statements * Updated tools and benchmarks * Corrected parenthesis * Removed extra = * Accidentally undid reinterpret_cast * Better comments, undid a header name fuckup * Added an errorMsg method, updated readme * Removed useless header from stage2 * Updated single-header file * added simdjerr.cpp contents to simdjson.cpp * Made single header version work * Updated singleheader test, fixed simdjson.cpp * Renamed simdjerr namespace and files to simdjson * Updating the amalgamation.
This commit is contained in:
parent
10b6b0445e
commit
352dd5e7fa
|
@ -21,7 +21,7 @@ jobs:
|
|||
|
||||
- run:
|
||||
name: Running tests (gcc)
|
||||
command: make quiettest
|
||||
command: make quiettest amalgamate
|
||||
|
||||
- run:
|
||||
name: Building (gcc, cmake)
|
||||
|
@ -58,7 +58,7 @@ jobs:
|
|||
|
||||
- run:
|
||||
name: Running tests (clang)
|
||||
command: make quiettest
|
||||
command: make quiettest amalgamate
|
||||
|
||||
- run:
|
||||
name: Building (clang, cmake)
|
||||
|
|
|
@ -7,3 +7,4 @@ steps:
|
|||
commands:
|
||||
- make -j2
|
||||
- make quiettest -j2
|
||||
- make amalgamate
|
||||
|
|
6
Makefile
6
Makefile
|
@ -28,7 +28,7 @@ COMPARISONEXECUTABLES=minifiercompetition parsingcompetition parseandstatcompeti
|
|||
SUPPLEMENTARYEXECUTABLES=parse_noutf8validation parse_nonumberparsing parse_nostringparsing
|
||||
|
||||
HEADERS= include/simdjson/simdutf8check.h include/simdjson/stringparsing.h include/simdjson/numberparsing.h include/simdjson/jsonparser.h include/simdjson/common_defs.h include/simdjson/jsonioutil.h benchmark/benchmark.h benchmark/linux/linux-perf-events.h include/simdjson/parsedjson.h include/simdjson/stage1_find_marks.h include/simdjson/stage2_build_tape.h include/simdjson/jsoncharutils.h include/simdjson/jsonformatutils.h
|
||||
LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/stage1_find_marks.cpp src/stage2_build_tape.cpp src/parsedjson.cpp src/parsedjsoniterator.cpp
|
||||
LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/simdjson.cpp src/stage1_find_marks.cpp src/stage2_build_tape.cpp src/parsedjson.cpp src/parsedjsoniterator.cpp
|
||||
MINIFIERHEADERS=include/simdjson/jsonminifier.h include/simdjson/simdprune_tables.h
|
||||
MINIFIERLIBFILES=src/jsonminifier.cpp
|
||||
|
||||
|
@ -116,11 +116,11 @@ jsoncheck:tests/jsoncheck.cpp $(HEADERS) $(LIBFILES)
|
|||
$(CXX) $(CXXFLAGS) -o jsoncheck $(LIBFILES) tests/jsoncheck.cpp -I. $(LIBFLAGS)
|
||||
|
||||
numberparsingcheck:tests/numberparsingcheck.cpp $(HEADERS) $(LIBFILES)
|
||||
$(CXX) $(CXXFLAGS) -o numberparsingcheck tests/numberparsingcheck.cpp src/jsonioutil.cpp src/jsonparser.cpp src/stage1_find_marks.cpp src/parsedjson.cpp -I. $(LIBFLAGS) -DJSON_TEST_NUMBERS
|
||||
$(CXX) $(CXXFLAGS) -o numberparsingcheck tests/numberparsingcheck.cpp src/jsonioutil.cpp src/jsonparser.cpp src/simdjson.cpp src/stage1_find_marks.cpp src/parsedjson.cpp -I. $(LIBFLAGS) -DJSON_TEST_NUMBERS
|
||||
|
||||
|
||||
stringparsingcheck:tests/stringparsingcheck.cpp $(HEADERS) $(LIBFILES)
|
||||
$(CXX) $(CXXFLAGS) -o stringparsingcheck tests/stringparsingcheck.cpp src/jsonioutil.cpp src/jsonparser.cpp src/stage1_find_marks.cpp src/parsedjson.cpp -I. $(LIBFLAGS) -DJSON_TEST_STRINGS
|
||||
$(CXX) $(CXXFLAGS) -o stringparsingcheck tests/stringparsingcheck.cpp src/jsonioutil.cpp src/jsonparser.cpp src/simdjson.cpp src/stage1_find_marks.cpp src/parsedjson.cpp -I. $(LIBFLAGS) -DJSON_TEST_STRINGS
|
||||
|
||||
|
||||
minifiercompetition: benchmark/minifiercompetition.cpp $(HEADERS) $(LIBS) $(MINIFIERHEADERS) $(LIBFILES) $(MINIFIERLIBFILES)
|
||||
|
|
114
README.md
114
README.md
|
@ -1,51 +1,51 @@
|
|||
# simdjson : Parsing gigabytes of JSON per second
|
||||
|
||||
[![Build Status](https://cloud.drone.io/api/badges/lemire/simdjson/status.svg)](https://cloud.drone.io/lemire/simdjson/)
|
||||
[![CircleCI](https://circleci.com/gh/lemire/simdjson.svg?style=svg)](https://circleci.com/gh/lemire/simdjson)
|
||||
[![CircleCI](https://circleci.com/gh/lemire/simdjson.svg?style=svg)](https://circleci.com/gh/lemire/simdjson)
|
||||
[![][license img]][license]
|
||||
|
||||
|
||||
## A C++ library to see how fast we can parse JSON with complete validation.
|
||||
## A C++ library to see how fast we can parse JSON with complete validation.
|
||||
|
||||
JSON documents are everywhere on the Internet. Servers spend a lot of time parsing these documents. We want to accelerate the parsing of JSON per se using commonly available SIMD instructions as much as possible while doing full validation (including character encoding).
|
||||
|
||||
## Paper
|
||||
|
||||
A description of the design and implementation of simdjson appears at https://arxiv.org/abs/1902.08318 and an informal blog post providing some background and context is at https://branchfree.org/2019/02/25/paper-parsing-gigabytes-of-json-per-second/.
|
||||
|
||||
## Some performance results
|
||||
|
||||
We can use a quarter or fewer instructions than a state-of-the-art parser like RapidJSON, and half as many as sajson. To our knowledge, simdjson is the first fully-validating JSON parser to run at gigabytes per second on commodity processors.
|
||||
|
||||
<img src="doc/gbps.png" width="90%">
|
||||
|
||||
|
||||
On a Skylake processor, the parsing speeds (in GB/s) of various processors on the twitter.json file are as follows.
|
||||
|
||||
| parser | GB/s |
|
||||
|---|---|
|
||||
| simdjson | 2.2 |
|
||||
| RapidJSON encoding-validation | 0.51|
|
||||
| RapidJSON encoding-validation, insitu | 0.71|
|
||||
| sajson (insitu, dynamic) | 0.70|
|
||||
| sajson (insitu, static) | 0.97|
|
||||
| dropbox | 0.14|
|
||||
| fastjson | 0.26|
|
||||
| gason | 0.85|
|
||||
| ultrajson | 0.42|
|
||||
| jsmn | 0.28|
|
||||
|cJSON | 0.34|
|
||||
| parser | GB/s |
|
||||
| ------------------------------------- | ---- |
|
||||
| simdjson | 2.2 |
|
||||
| RapidJSON encoding-validation | 0.51 |
|
||||
| RapidJSON encoding-validation, insitu | 0.71 |
|
||||
| sajson (insitu, dynamic) | 0.70 |
|
||||
| sajson (insitu, static) | 0.97 |
|
||||
| dropbox | 0.14 |
|
||||
| fastjson | 0.26 |
|
||||
| gason | 0.85 |
|
||||
| ultrajson | 0.42 |
|
||||
| jsmn | 0.28 |
|
||||
| cJSON | 0.34 |
|
||||
|
||||
## Requirements
|
||||
|
||||
- We support platforms like Linux or macOS, as well as Windows through Visual Studio 2017 or later.
|
||||
- A processor with AVX2 (i.e., Intel processors starting with the Haswell microarchitecture released 2013, and processors from AMD starting with the Ryzen)
|
||||
- A recent C++ compiler (e.g., GNU GCC or LLVM CLANG or Visual Studio 2017), we assume C++17. GNU GCC 7 or better or LLVM's clang 6 or better.
|
||||
- Some benchmark scripts assume bash and other common utilities, but they are optional.
|
||||
- Some benchmark scripts assume bash and other common utilities, but they are optional.
|
||||
|
||||
## License
|
||||
|
||||
This code is made available under the Apache License 2.0.
|
||||
This code is made available under the Apache License 2.0.
|
||||
|
||||
Under Windows, we build some tools using the windows/dirent_portable.h file (which is outside our library code): it under the liberal (business-friendly) MIT license.
|
||||
Under Windows, we build some tools using the windows/dirent_portable.h file (which is outside our library code): it under the liberal (business-friendly) MIT license.
|
||||
|
||||
## Code example
|
||||
|
||||
|
@ -60,8 +60,12 @@ const char * filename = ... //
|
|||
std::string_view p = get_corpus(filename);
|
||||
ParsedJson pj;
|
||||
pj.allocateCapacity(p.size()); // allocate memory for parsing up to p.size() bytes
|
||||
bool is_ok = json_parse(p, pj); // do the parsing, return false on error
|
||||
const int res = json_parse(p, pj); // do the parsing, return 0 on success
|
||||
// parsing is done!
|
||||
if (res != 0) {
|
||||
// You can use the "simdjson/simdjson.h" header to access the error message
|
||||
std::cout << "Error parsing:" << simdjson::errorMsg(res) << std::endl;
|
||||
}
|
||||
// You can safely delete the string content
|
||||
free((void*)p.data());
|
||||
// the ParsedJson document can be used here
|
||||
|
@ -96,7 +100,7 @@ copy the files in your project in your include path. You can then include them q
|
|||
#include "simdjson.h"
|
||||
#include "simdjson.cpp"
|
||||
int main(int argc, char *argv[]) {
|
||||
const char * filename = argv[1];
|
||||
const char * filename = argv[1];
|
||||
std::string_view p = get_corpus(filename);
|
||||
ParsedJson pj = build_parsed_json(p); // do the parsing
|
||||
if( ! pj.isValid() ) {
|
||||
|
@ -121,12 +125,13 @@ make
|
|||
make test
|
||||
```
|
||||
|
||||
|
||||
To run benchmarks:
|
||||
|
||||
```
|
||||
make parse
|
||||
./parse jsonexamples/twitter.json
|
||||
```
|
||||
|
||||
Under Linux, the `parse` command gives a detailed analysis of the performance counters.
|
||||
|
||||
To run comparative benchmarks (with other parsers):
|
||||
|
@ -137,7 +142,7 @@ make benchmark
|
|||
|
||||
## Usage (CMake on platforms like Linux or macOS)
|
||||
|
||||
Requirements: We require a recent version of cmake. On macOS, the easiest way to install cmake might be to use [brew](https://brew.sh) and then type
|
||||
Requirements: We require a recent version of cmake. On macOS, the easiest way to install cmake might be to use [brew](https://brew.sh) and then type
|
||||
|
||||
```
|
||||
brew install cmake
|
||||
|
@ -145,15 +150,13 @@ brew install cmake
|
|||
|
||||
There is an [equivalent brew on Linux which works the same way as well](https://linuxbrew.sh).
|
||||
|
||||
You need a recent compiler like clang or gcc. We recommend at least GNU GCC/G++ 7 or LLVM clang 6. For example, you can install a recent compiler with brew:
|
||||
You need a recent compiler like clang or gcc. We recommend at least GNU GCC/G++ 7 or LLVM clang 6. For example, you can install a recent compiler with brew:
|
||||
|
||||
```
|
||||
brew install gcc@8
|
||||
```
|
||||
|
||||
Optional: You need to tell cmake which compiler you wish to use by setting the CC and CXX variables. Under bash, you can do so with commands such as ``export CC=gcc-7`` and ``export CXX=g++-7``.
|
||||
|
||||
|
||||
Optional: You need to tell cmake which compiler you wish to use by setting the CC and CXX variables. Under bash, you can do so with commands such as `export CC=gcc-7` and `export CXX=g++-7`.
|
||||
|
||||
Building: While in the project repository, do the following:
|
||||
|
||||
|
@ -173,11 +176,10 @@ You can build a static library:
|
|||
mkdir buildstatic
|
||||
cd buildstatic
|
||||
cmake -DSIMDJSON_BUILD_STATIC=ON ..
|
||||
make
|
||||
make
|
||||
make test
|
||||
```
|
||||
|
||||
|
||||
In some cases, you may want to specify your compiler, especially if the default compiler on your system is too old. You may proceed as follows:
|
||||
|
||||
```
|
||||
|
@ -190,19 +192,16 @@ make
|
|||
make test
|
||||
```
|
||||
|
||||
|
||||
## Usage (CMake on Windows using Visual Studio)
|
||||
|
||||
|
||||
We are assuming that you have a common Windows PC with at least Visual Studio 2017, and an x64 processor with AVX2 support (2013 Haswell or later).
|
||||
|
||||
- Grab the simdjson code from GitHub, e.g., by cloning it using [GitHub Desktop](https://desktop.github.com/).
|
||||
- Install [CMake](https://cmake.org/download/). When you install it, make sure to ask that ``cmake`` be made available from the command line. Please choose a recent version of cmake.
|
||||
- Create a subdirectory within simdjson, such as ``VisualStudio``.
|
||||
- Using a shell, go to this newly created directory.
|
||||
- Type ``cmake -DCMAKE_GENERATOR_PLATFORM=x64 ..`` in the shell while in the ``VisualStudio`` repository. (Alternatively, if you want to build a DLL, you may use the command line ``cmake -DCMAKE_GENERATOR_PLATFORM=x64 -DSIMDJSON_BUILD_STATIC=OFF ..``.)
|
||||
- This last command created a Visual Studio solution file in the newly created directory (e.g., ``simdjson.sln``). Open this file in Visual Studio. You should now be able to build the project and run the tests. For example, in the ``Solution Explorer`` window (available from the ``View`` menu), right-click ``ALL_BUILD`` and select ``Build``. To test the code, still in the ``Solution Explorer`` window, select ``RUN_TESTS`` and select ``Build``.
|
||||
|
||||
- Install [CMake](https://cmake.org/download/). When you install it, make sure to ask that `cmake` be made available from the command line. Please choose a recent version of cmake.
|
||||
- Create a subdirectory within simdjson, such as `VisualStudio`.
|
||||
- Using a shell, go to this newly created directory.
|
||||
- Type `cmake -DCMAKE_GENERATOR_PLATFORM=x64 ..` in the shell while in the `VisualStudio` repository. (Alternatively, if you want to build a DLL, you may use the command line `cmake -DCMAKE_GENERATOR_PLATFORM=x64 -DSIMDJSON_BUILD_STATIC=OFF ..`.)
|
||||
- This last command created a Visual Studio solution file in the newly created directory (e.g., `simdjson.sln`). Open this file in Visual Studio. You should now be able to build the project and run the tests. For example, in the `Solution Explorer` window (available from the `View` menu), right-click `ALL_BUILD` and select `Build`. To test the code, still in the `Solution Explorer` window, select `RUN_TESTS` and select `Build`.
|
||||
|
||||
## Tools
|
||||
|
||||
|
@ -224,13 +223,12 @@ To simplify the engineering, we make some assumptions.
|
|||
- As allowed by the specification, we allow repeated keys within an object (other parsers like sajson do the same).
|
||||
- Performance is optimized for JSON documents spanning at least a tens kilobytes up to many megabytes: the performance issues with having to parse many tiny JSON documents or one truly enormous JSON document are different.
|
||||
|
||||
*We do not aim to provide a general-purpose JSON library.* A library like RapidJSON offers much more than just parsing, it helps you generate JSON and offers various other convenient functions. We merely parse the document.
|
||||
|
||||
_We do not aim to provide a general-purpose JSON library._ A library like RapidJSON offers much more than just parsing, it helps you generate JSON and offers various other convenient functions. We merely parse the document.
|
||||
|
||||
## Features
|
||||
|
||||
- The input string is unmodified. (Parsers like sajson and RapidJSON use the input string as a buffer.)
|
||||
- We parse integers and floating-point numbers as separate types which allows us to support large 64-bit integers in [-9223372036854775808,9223372036854775808), like a Java `long` or a C/C++ `long long`. Among the parsers that differentiate between integers and floating-point numbers, not all support 64-bit integers. (For example, sajson rejects JSON files with integers larger than or equal to 2147483648. RapidJSON will parse a file containing an overly long integer like 18446744073709551616 as a floating-point number.) When we cannot represent exactly an integer as a signed 64-bit value, we reject the JSON document.
|
||||
- We parse integers and floating-point numbers as separate types which allows us to support large 64-bit integers in [-9223372036854775808,9223372036854775808), like a Java `long` or a C/C++ `long long`. Among the parsers that differentiate between integers and floating-point numbers, not all support 64-bit integers. (For example, sajson rejects JSON files with integers larger than or equal to 2147483648. RapidJSON will parse a file containing an overly long integer like 18446744073709551616 as a floating-point number.) When we cannot represent exactly an integer as a signed 64-bit value, we reject the JSON document.
|
||||
- We do full UTF-8 validation as part of the parsing. (Parsers like fastjson, gason and dropbox json11 do not do UTF-8 validation.)
|
||||
- We fully validate the numbers. (Parsers like gason and ultranjson will accept `[0e+]` as valid JSON.)
|
||||
- We validate string content for unescaped characters. (Parsers like fastjson and ultrajson accept unescaped line breaks and tabs in strings.)
|
||||
|
@ -242,7 +240,6 @@ The parser works in two stages:
|
|||
- Stage 1. (Find marks) Identifies quickly structure elements, strings, and so forth. We validate UTF-8 encoding at that stage.
|
||||
- Stage 2. (Structure building) Involves constructing a "tree" of sort (materialized as a tape) to navigate through the data. Strings and numbers are parsed at this stage.
|
||||
|
||||
|
||||
## Navigating the parsed document
|
||||
|
||||
Here is a code sample to dump back the parsed JSON to a string:
|
||||
|
@ -338,7 +335,6 @@ void simdjson_traverse(std::vector<int64_t> &answer, ParsedJson::iterator &i) {
|
|||
}
|
||||
```
|
||||
|
||||
|
||||
## In-depth comparisons
|
||||
|
||||
If you want to see how a wide range of parsers validate a given JSON file:
|
||||
|
@ -368,14 +364,13 @@ make allparsingcompetition
|
|||
- [SimdJsonSharp](https://github.com/EgorBo/SimdJsonSharp): C# version for .NET Core
|
||||
- [simdjson_nodejs](https://github.com/luizperes/simdjson_nodejs): Node.js bindings for the simdjson project.
|
||||
|
||||
|
||||
## Various References
|
||||
|
||||
- [Google double-conv](https://github.com/google/double-conversion/)
|
||||
- [How to implement atoi using SIMD?](https://stackoverflow.com/questions/35127060/how-to-implement-atoi-using-simd)
|
||||
- [Parsing JSON is a Minefield 💣](http://seriot.ch/parsing_json.php)
|
||||
- https://tools.ietf.org/html/rfc7159
|
||||
- The Mison implementation in rust https://github.com/pikkr/pikkr
|
||||
- The Mison implementation in rust https://github.com/pikkr/pikkr
|
||||
- http://rapidjson.org/md_doc_sax.html
|
||||
- https://github.com/Geal/parser_benchmarks/tree/master/json
|
||||
- Gron: A command line tool that makes JSON greppable https://news.ycombinator.com/item?id=16727665
|
||||
|
@ -385,25 +380,25 @@ make allparsingcompetition
|
|||
- RapidJSON. http://rapidjson.org/
|
||||
|
||||
Inspiring links:
|
||||
|
||||
- https://auth0.com/blog/beating-json-performance-with-protobuf/
|
||||
- https://gist.github.com/shijuvar/25ad7de9505232c87034b8359543404a
|
||||
- https://github.com/frankmcsherry/blog/blob/master/posts/2018-02-11.md
|
||||
|
||||
|
||||
Validating UTF-8 takes no more than 0.7 cycles per byte:
|
||||
- https://github.com/lemire/fastvalidate-utf-8 https://lemire.me/blog/2018/05/16/validating-utf-8-strings-using-as-little-as-0-7-cycles-per-byte/
|
||||
|
||||
- https://github.com/lemire/fastvalidate-utf-8 https://lemire.me/blog/2018/05/16/validating-utf-8-strings-using-as-little-as-0-7-cycles-per-byte/
|
||||
|
||||
## Remarks on JSON parsing
|
||||
|
||||
- The JSON spec defines what a JSON parser is:
|
||||
> A JSON parser transforms a JSON text into another representation. A JSON parser MUST accept all texts that conform to the JSON grammar. A JSON parser MAY accept non-JSON forms or extensions. An implementation may set limits on the size of texts that it accepts. An implementation may set limits on the maximum depth of nesting. An implementation may set limits on the range and precision of numbers. An implementation may set limits on the length and character contents of strings.
|
||||
> A JSON parser transforms a JSON text into another representation. A JSON parser MUST accept all texts that conform to the JSON grammar. A JSON parser MAY accept non-JSON forms or extensions. An implementation may set limits on the size of texts that it accepts. An implementation may set limits on the maximum depth of nesting. An implementation may set limits on the range and precision of numbers. An implementation may set limits on the length and character contents of strings.
|
||||
|
||||
* JSON is not JavaScript:
|
||||
|
||||
- JSON is not JavaScript:
|
||||
> All JSON is Javascript but NOT all Javascript is JSON. So {property:1} is invalid because property does not have double quotes around it. {'property':1} is also invalid, because it's single quoted while the only thing that can placate the JSON specification is double quoting. JSON is even fussy enough that {"property":.1} is invalid too, because you should have of course written {"property":0.1}. Also, don't even think about having comments or semicolons, you guessed it: they're invalid. (credit:https://github.com/elzr/vim-json)
|
||||
> All JSON is Javascript but NOT all Javascript is JSON. So {property:1} is invalid because property does not have double quotes around it. {'property':1} is also invalid, because it's single quoted while the only thing that can placate the JSON specification is double quoting. JSON is even fussy enough that {"property":.1} is invalid too, because you should have of course written {"property":0.1}. Also, don't even think about having comments or semicolons, you guessed it: they're invalid. (credit:https://github.com/elzr/vim-json)
|
||||
|
||||
- The structural characters are:
|
||||
* The structural characters are:
|
||||
|
||||
|
||||
begin-array = [ left square bracket
|
||||
|
@ -413,7 +408,6 @@ Validating UTF-8 takes no more than 0.7 cycles per byte:
|
|||
name-separator = : colon
|
||||
value-separator = , comma
|
||||
|
||||
|
||||
### Pseudo-structural elements
|
||||
|
||||
A character is pseudo-structural if and only if:
|
||||
|
@ -421,15 +415,12 @@ A character is pseudo-structural if and only if:
|
|||
1. Not enclosed in quotes, AND
|
||||
2. Is a non-whitespace character, AND
|
||||
3. It's preceding character is either:
|
||||
(a) a structural character, OR
|
||||
(b) whitespace.
|
||||
(a) a structural character, OR
|
||||
(b) whitespace.
|
||||
|
||||
This helps as we redefine some new characters as pseudo-structural such as the characters 1, 1, G, n in the following:
|
||||
|
||||
> { "foo" : 1.5, "bar" : 1.5 GEOFF_IS_A_DUMMY bla bla , "baz", null }
|
||||
|
||||
|
||||
|
||||
> { "foo" : 1.5, "bar" : 1.5 GEOFF_IS_A_DUMMY bla bla , "baz", null }
|
||||
|
||||
## Academic References
|
||||
|
||||
|
@ -453,8 +444,7 @@ This helps as we redefine some new characters as pseudo-structural such as the c
|
|||
- Cameron, Robert D., et al. "Fast Regular Expression Matching with Bit-parallel Data Streams."
|
||||
- Lin, Dan. Bits filter: a high-performance multiple string pattern matching algorithm for malware detection. Diss. School of Computing Science-Simon Fraser University, 2010.
|
||||
- Yang, Shiyang. Validation of XML Document Based on Parallel Bit Stream Technology. Diss. Applied Sciences: School of Computing Science, 2013.
|
||||
- N. Nakasato, "Implementation of a parallel tree method on a GPU", Journal of Computational Science, vol. 3, no. 3, pp. 132-141, 2012.
|
||||
- N. Nakasato, "Implementation of a parallel tree method on a GPU", Journal of Computational Science, vol. 3, no. 3, pp. 132-141, 2012.
|
||||
|
||||
|
||||
[license]:LICENSE
|
||||
[license img]:https://img.shields.io/badge/License-Apache%202-blue.svg
|
||||
[license]: LICENSE
|
||||
[license img]: https://img.shields.io/badge/License-Apache%202-blue.svg
|
||||
|
|
|
@ -25,6 +25,7 @@ $SCRIPTPATH/src/parsedjsoniterator.cpp
|
|||
# order matters
|
||||
ALLCHEADERS="
|
||||
$SCRIPTPATH/include/simdjson/simdjson_version.h
|
||||
$SCRIPTPATH/include/simdjson/simdjson.h
|
||||
$SCRIPTPATH/include/simdjson/portability.h
|
||||
$SCRIPTPATH/include/simdjson/common_defs.h
|
||||
$SCRIPTPATH/include/simdjson/jsoncharutils.h
|
||||
|
@ -54,10 +55,11 @@ function stripinc()
|
|||
}
|
||||
function dofile()
|
||||
{
|
||||
echo "/* begin file $1 */"
|
||||
RELFILE=${1#"$SCRIPTPATH/"}
|
||||
echo "/* begin file $RELFILE */"
|
||||
# echo "#line 8 \"$1\"" ## redefining the line/file is not nearly as useful as it sounds for debugging. It breaks IDEs.
|
||||
stripinc < $1
|
||||
echo "/* end file $1 */"
|
||||
echo "/* end file $RELFILE */"
|
||||
}
|
||||
|
||||
timestamp=$(date)
|
||||
|
|
|
@ -171,7 +171,7 @@ int main(int argc, char *argv[]) {
|
|||
unified.start();
|
||||
#endif
|
||||
|
||||
isok = isok && unified_machine(p.data(), p.size(), pj);
|
||||
isok = isok && !unified_machine(p.data(), p.size(), pj);
|
||||
#ifndef SQUASH_COUNTERS
|
||||
unified.end(results);
|
||||
cy2 += results[0];
|
||||
|
|
|
@ -6,20 +6,18 @@
|
|||
#include "simdjson/parsedjson.h"
|
||||
#include "simdjson/stage1_find_marks.h"
|
||||
#include "simdjson/stage2_build_tape.h"
|
||||
|
||||
|
||||
|
||||
#include "simdjson/simdjson.h"
|
||||
|
||||
// Parse a document found in buf, need to preallocate ParsedJson.
|
||||
// Return false in case of a failure. You can also check validity
|
||||
// by calling pj.isValid(). The same ParsedJson can be reused for other documents.
|
||||
// Return 0 on success, an error code from simdjson/simdjson.h otherwise
|
||||
// You can also check validit by calling pj.isValid(). The same ParsedJson can be reused for other documents.
|
||||
//
|
||||
// If reallocifneeded is true (default) then a temporary buffer is created when needed during processing
|
||||
// (a copy of the input string is made).
|
||||
// The input buf should be readable up to buf + len + SIMDJSON_PADDING if reallocifneeded is false,
|
||||
// all bytes at and after buf + len are ignored (can be garbage).
|
||||
WARN_UNUSED
|
||||
bool json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifneeded = true);
|
||||
int json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifneeded = true);
|
||||
|
||||
// Parse a document found in buf, need to preallocate ParsedJson.
|
||||
// Return false in case of a failure. You can also check validity
|
||||
|
@ -30,7 +28,7 @@ bool json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifne
|
|||
// The input buf should be readable up to buf + len + SIMDJSON_PADDING if reallocifneeded is false,
|
||||
// all bytes at and after buf + len are ignored (can be garbage).
|
||||
WARN_UNUSED
|
||||
inline bool json_parse(const char * buf, size_t len, ParsedJson &pj, bool reallocifneeded = true) {
|
||||
inline int json_parse(const char * buf, size_t len, ParsedJson &pj, bool reallocifneeded = true) {
|
||||
return json_parse(reinterpret_cast<const uint8_t *>(buf), len, pj, reallocifneeded);
|
||||
}
|
||||
|
||||
|
@ -43,7 +41,7 @@ inline bool json_parse(const char * buf, size_t len, ParsedJson &pj, bool reallo
|
|||
// the input s should be readable up to s.data() + s.size() + SIMDJSON_PADDING if reallocifneeded is false,
|
||||
// all bytes at and after s.data()+s.size() are ignored (can be garbage).
|
||||
WARN_UNUSED
|
||||
inline bool json_parse(const std::string_view &s, ParsedJson &pj, bool reallocifneeded = true) {
|
||||
inline int json_parse(const std::string_view &s, ParsedJson &pj, bool reallocifneeded = true) {
|
||||
return json_parse(s.data(), s.size(), pj, reallocifneeded);
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,16 @@
|
|||
#ifndef SIMDJSON_ERR_H
|
||||
# define SIMDJSON_ERR_H
|
||||
|
||||
#include <string>
|
||||
|
||||
struct simdjson {
|
||||
enum errorValues {
|
||||
SUCCESS = 0,
|
||||
CAPACITY, // This ParsedJson can't support a document that big
|
||||
MEMALLOC, // Error allocating memory, most likely out of memory
|
||||
TAPE_ERROR, // Something went wrong while writing to the tape
|
||||
};
|
||||
static const std::string& errorMsg(const int);
|
||||
};
|
||||
|
||||
#endif
|
|
@ -2,14 +2,13 @@
|
|||
#define SIMDJSON_STAGE1_FIND_MARKS_H
|
||||
|
||||
#include "simdjson/common_defs.h"
|
||||
#include "simdjson/parsedjson.h"
|
||||
|
||||
struct ParsedJson;
|
||||
|
||||
WARN_UNUSED
|
||||
bool find_structural_bits(const uint8_t *buf, size_t len, ParsedJson &pj);
|
||||
|
||||
WARN_UNUSED
|
||||
static inline bool find_structural_bits(const char *buf, size_t len, ParsedJson &pj) {
|
||||
return find_structural_bits(reinterpret_cast<const uint8_t *>(buf), len, pj);
|
||||
}
|
||||
bool find_structural_bits(const char *buf, size_t len, ParsedJson &pj);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -2,17 +2,15 @@
|
|||
#define SIMDJSON_STAGE2_BUILD_TAPE_H
|
||||
|
||||
#include "simdjson/common_defs.h"
|
||||
#include "simdjson/parsedjson.h"
|
||||
|
||||
struct ParsedJson;
|
||||
|
||||
void init_state_machine();
|
||||
|
||||
WARN_UNUSED
|
||||
bool unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj);
|
||||
|
||||
int unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj);
|
||||
|
||||
WARN_UNUSED
|
||||
static inline bool unified_machine(const char *buf, size_t len, ParsedJson &pj) {
|
||||
return unified_machine(reinterpret_cast<const uint8_t *>(buf),len,pj);
|
||||
}
|
||||
int unified_machine(const char *buf, size_t len, ParsedJson &pj);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* auto-generated on Tue 26 Feb 2019 10:14:31 EST. Do not edit! */
|
||||
/* auto-generated on Fri 1 Mar 2019 16:20:33 EST. Do not edit! */
|
||||
|
||||
#include <iostream>
|
||||
#include "simdjson.h"
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* auto-generated on Tue 26 Feb 2019 10:14:31 EST. Do not edit! */
|
||||
/* auto-generated on Fri 1 Mar 2019 16:20:33 EST. Do not edit! */
|
||||
#include "simdjson.h"
|
||||
|
||||
/* used for http://dmalloc.com/ Dmalloc - Debug Malloc Library */
|
||||
|
@ -6,7 +6,7 @@
|
|||
#include "dmalloc.h"
|
||||
#endif
|
||||
|
||||
/* begin file /Users/lemire/CVS/github/simdjson/src/jsonioutil.cpp */
|
||||
/* begin file src/jsonioutil.cpp */
|
||||
#include <cstring>
|
||||
#include <cstdlib>
|
||||
|
||||
|
@ -48,8 +48,8 @@ std::string_view get_corpus(const std::string& filename) {
|
|||
}
|
||||
throw std::runtime_error("could not load corpus");
|
||||
}
|
||||
/* end file /Users/lemire/CVS/github/simdjson/src/jsonioutil.cpp */
|
||||
/* begin file /Users/lemire/CVS/github/simdjson/src/jsonminifier.cpp */
|
||||
/* end file src/jsonioutil.cpp */
|
||||
/* begin file src/jsonminifier.cpp */
|
||||
#include <cstdint>
|
||||
#ifndef __AVX2__
|
||||
|
||||
|
@ -299,8 +299,8 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
|
|||
}
|
||||
|
||||
#endif
|
||||
/* end file /Users/lemire/CVS/github/simdjson/src/jsonminifier.cpp */
|
||||
/* begin file /Users/lemire/CVS/github/simdjson/src/jsonparser.cpp */
|
||||
/* end file src/jsonminifier.cpp */
|
||||
/* begin file src/jsonparser.cpp */
|
||||
#ifdef _MSC_VER
|
||||
#include <windows.h>
|
||||
#include <sysinfoapi.h>
|
||||
|
@ -308,20 +308,11 @@ size_t jsonminify(const uint8_t *buf, size_t len, uint8_t *out) {
|
|||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// parse a document found in buf, need to preallocate ParsedJson.
|
||||
WARN_UNUSED
|
||||
bool json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifneeded) {
|
||||
int json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifneeded) {
|
||||
if (pj.bytecapacity < len) {
|
||||
std::cerr << "Your ParsedJson cannot support documents that big: " << len
|
||||
<< std::endl;
|
||||
return false;
|
||||
return simdjson::CAPACITY;
|
||||
}
|
||||
bool reallocated = false;
|
||||
if(reallocifneeded) {
|
||||
|
@ -335,24 +326,19 @@ bool json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifne
|
|||
#endif
|
||||
if ( (reinterpret_cast<uintptr_t>(buf + len - 1) % pagesize ) < SIMDJSON_PADDING ) {
|
||||
const uint8_t *tmpbuf = buf;
|
||||
buf = reinterpret_cast<uint8_t *>(allocate_padded_buffer(len));
|
||||
if(buf == nullptr) { return false;
|
||||
}
|
||||
buf = (uint8_t *) allocate_padded_buffer(len);
|
||||
if(buf == NULL) return simdjson::MEMALLOC;
|
||||
memcpy((void*)buf,tmpbuf,len);
|
||||
reallocated = true;
|
||||
}
|
||||
}
|
||||
bool isok = find_structural_bits(buf, len, pj);
|
||||
if (isok) {
|
||||
isok = unified_machine(buf, len, pj);
|
||||
} else {
|
||||
if(reallocated) { free((void*)buf);
|
||||
}
|
||||
return false;
|
||||
// find_structural_bits returns a boolean, not an int, we invert its result to keep consistent with res == 0 meaning success
|
||||
int res = !find_structural_bits(buf, len, pj);
|
||||
if (!res) {
|
||||
res = unified_machine(buf, len, pj);
|
||||
}
|
||||
if(reallocated) { free((void*)buf);
|
||||
}
|
||||
return isok;
|
||||
if(reallocated) { aligned_free((void*)buf);}
|
||||
return res;
|
||||
}
|
||||
|
||||
WARN_UNUSED
|
||||
|
@ -360,15 +346,16 @@ ParsedJson build_parsed_json(const uint8_t *buf, size_t len, bool reallocifneede
|
|||
ParsedJson pj;
|
||||
bool ok = pj.allocateCapacity(len);
|
||||
if(ok) {
|
||||
ok = json_parse(buf, len, pj, reallocifneeded);
|
||||
int res = json_parse(buf, len, pj, reallocifneeded);
|
||||
ok = res == simdjson::SUCCESS;
|
||||
assert(ok == pj.isValid());
|
||||
} else {
|
||||
std::cerr << "failure during memory allocation " << std::endl;
|
||||
}
|
||||
return pj;
|
||||
}
|
||||
/* end file /Users/lemire/CVS/github/simdjson/src/jsonparser.cpp */
|
||||
/* begin file /Users/lemire/CVS/github/simdjson/src/stage1_find_marks.cpp */
|
||||
/* end file src/jsonparser.cpp */
|
||||
/* begin file src/stage1_find_marks.cpp */
|
||||
#include <cassert>
|
||||
|
||||
#ifndef SIMDJSON_SKIPUTF8VALIDATION
|
||||
|
@ -821,8 +808,12 @@ WARN_UNUSED
|
|||
return true;
|
||||
#endif
|
||||
}
|
||||
/* end file /Users/lemire/CVS/github/simdjson/src/stage1_find_marks.cpp */
|
||||
/* begin file /Users/lemire/CVS/github/simdjson/src/stage2_build_tape.cpp */
|
||||
|
||||
bool find_structural_bits(const char *buf, size_t len, ParsedJson &pj) {
|
||||
return find_structural_bits(reinterpret_cast<const uint8_t*>(buf), len, pj);
|
||||
}
|
||||
/* end file src/stage1_find_marks.cpp */
|
||||
/* begin file src/stage2_build_tape.cpp */
|
||||
#ifdef _MSC_VER
|
||||
/* Microsoft C/C++-compatible compiler */
|
||||
#include <intrin.h>
|
||||
|
@ -882,7 +873,7 @@ really_inline bool is_valid_null_atom(const uint8_t *loc) {
|
|||
* for documentation.
|
||||
***********/
|
||||
WARN_UNUSED
|
||||
bool unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
||||
int unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
||||
uint32_t i = 0; // index of the structural character (0,1,2,3...)
|
||||
uint32_t idx; // location of the structural character in the input (buf)
|
||||
uint8_t c; // used to track the (structural) character we are looking at, updated
|
||||
|
@ -890,8 +881,7 @@ bool unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
|||
uint32_t depth = 0; // could have an arbitrary starting depth
|
||||
pj.init();
|
||||
if(pj.bytecapacity < len) {
|
||||
fprintf(stderr, "insufficient capacity\n");
|
||||
return false;
|
||||
return simdjson::CAPACITY;
|
||||
}
|
||||
// this macro reads the next structural character, updating idx, i and c.
|
||||
#define UPDATE_CHAR() \
|
||||
|
@ -1329,13 +1319,17 @@ succeed:
|
|||
|
||||
|
||||
pj.isvalid = true;
|
||||
return true;
|
||||
return simdjson::SUCCESS;
|
||||
|
||||
fail:
|
||||
return false;
|
||||
return simdjson::TAPE_ERROR;
|
||||
}
|
||||
/* end file /Users/lemire/CVS/github/simdjson/src/stage2_build_tape.cpp */
|
||||
/* begin file /Users/lemire/CVS/github/simdjson/src/parsedjson.cpp */
|
||||
|
||||
int unified_machine(const char *buf, size_t len, ParsedJson &pj) {
|
||||
return unified_machine(reinterpret_cast<const uint8_t*>(buf), len, pj);
|
||||
}
|
||||
/* end file src/stage2_build_tape.cpp */
|
||||
/* begin file src/parsedjson.cpp */
|
||||
|
||||
ParsedJson::ParsedJson() :
|
||||
structural_indexes(nullptr), tape(nullptr), containing_scope_offset(nullptr),
|
||||
|
@ -1375,12 +1369,9 @@ bool ParsedJson::allocateCapacity(size_t len, size_t maxdepth) {
|
|||
std::cerr << "capacities must be non-zero " << std::endl;
|
||||
return false;
|
||||
}
|
||||
if (len > 0) {
|
||||
if ((len <= bytecapacity) && (depthcapacity < maxdepth)) {
|
||||
return true;
|
||||
}
|
||||
deallocate();
|
||||
}
|
||||
if ((len <= bytecapacity) && (depthcapacity < maxdepth))
|
||||
return true;
|
||||
deallocate();
|
||||
isvalid = false;
|
||||
bytecapacity = 0; // will only set it to len after allocations are a success
|
||||
n_structural_indexes = 0;
|
||||
|
@ -1424,16 +1415,11 @@ void ParsedJson::deallocate() {
|
|||
depthcapacity = 0;
|
||||
tapecapacity = 0;
|
||||
stringcapacity = 0;
|
||||
{delete[] ret_address;
|
||||
}
|
||||
{delete[] containing_scope_offset;
|
||||
}
|
||||
{delete[] tape;
|
||||
}
|
||||
{delete[] string_buf;
|
||||
}
|
||||
{delete[] structural_indexes;
|
||||
}
|
||||
delete[] ret_address;
|
||||
delete[] containing_scope_offset;
|
||||
delete[] tape;
|
||||
delete[] string_buf;
|
||||
delete[] structural_indexes;
|
||||
isvalid = false;
|
||||
}
|
||||
|
||||
|
@ -1445,8 +1431,9 @@ void ParsedJson::init() {
|
|||
|
||||
WARN_UNUSED
|
||||
bool ParsedJson::printjson(std::ostream &os) {
|
||||
if(!isvalid) { return false;
|
||||
}
|
||||
if(!isvalid) {
|
||||
return false;
|
||||
}
|
||||
size_t tapeidx = 0;
|
||||
uint64_t tape_val = tape[tapeidx];
|
||||
uint8_t type = (tape_val >> 56);
|
||||
|
@ -1475,16 +1462,16 @@ bool ParsedJson::printjson(std::ostream &os) {
|
|||
if (!inobject[depth]) {
|
||||
if ((inobjectidx[depth] > 0) && (type != ']')) {
|
||||
os << ",";
|
||||
}
|
||||
}
|
||||
inobjectidx[depth]++;
|
||||
} else { // if (inobject) {
|
||||
if ((inobjectidx[depth] > 0) && ((inobjectidx[depth] & 1) == 0) &&
|
||||
(type != '}')) {
|
||||
os << ",";
|
||||
}
|
||||
}
|
||||
if (((inobjectidx[depth] & 1) == 1)) {
|
||||
os << ":";
|
||||
}
|
||||
}
|
||||
inobjectidx[depth]++;
|
||||
}
|
||||
switch (type) {
|
||||
|
@ -1495,14 +1482,18 @@ bool ParsedJson::printjson(std::ostream &os) {
|
|||
break;
|
||||
case 'l': // we have a long int
|
||||
if (tapeidx + 1 >= howmany) {
|
||||
delete[] inobject;
|
||||
delete[] inobjectidx;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
os << static_cast<int64_t>(tape[++tapeidx]);
|
||||
break;
|
||||
case 'd': // we have a double
|
||||
if (tapeidx + 1 >= howmany) {
|
||||
if (tapeidx + 1 >= howmany){
|
||||
delete[] inobject;
|
||||
delete[] inobjectidx;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
double answer;
|
||||
memcpy(&answer, &tape[++tapeidx], sizeof(answer));
|
||||
os << answer;
|
||||
|
@ -1586,14 +1577,14 @@ bool ParsedJson::dump_raw_tape(std::ostream &os) {
|
|||
case 'l': // we have a long int
|
||||
if (tapeidx + 1 >= howmany) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
os << "integer " << static_cast<int64_t>(tape[++tapeidx]) << "\n";
|
||||
break;
|
||||
case 'd': // we have a double
|
||||
os << "float ";
|
||||
if (tapeidx + 1 >= howmany) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
double answer;
|
||||
memcpy(&answer, &tape[++tapeidx], sizeof(answer));
|
||||
os << answer << '\n';
|
||||
|
@ -1632,8 +1623,8 @@ bool ParsedJson::dump_raw_tape(std::ostream &os) {
|
|||
os << tapeidx << " : "<< type <<"\t// pointing to " << payload <<" (start root)\n";
|
||||
return true;
|
||||
}
|
||||
/* end file /Users/lemire/CVS/github/simdjson/src/parsedjson.cpp */
|
||||
/* begin file /Users/lemire/CVS/github/simdjson/src/parsedjsoniterator.cpp */
|
||||
/* end file src/parsedjson.cpp */
|
||||
/* begin file src/parsedjsoniterator.cpp */
|
||||
|
||||
ParsedJson::iterator::iterator(ParsedJson &pj_) : pj(pj_), depth(0), location(0), tape_length(0), depthindex(nullptr) {
|
||||
if(pj.isValid()) {
|
||||
|
@ -1933,4 +1924,4 @@ bool ParsedJson::iterator::print(std::ostream &os, bool escape_strings) const {
|
|||
}
|
||||
return true;
|
||||
}
|
||||
/* end file /Users/lemire/CVS/github/simdjson/src/parsedjsoniterator.cpp */
|
||||
/* end file src/parsedjsoniterator.cpp */
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* auto-generated on Tue 26 Feb 2019 10:14:31 EST. Do not edit! */
|
||||
/* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/simdjson_version.h */
|
||||
/* auto-generated on Fri 1 Mar 2019 16:20:33 EST. Do not edit! */
|
||||
/* begin file include/simdjson/simdjson_version.h */
|
||||
// /include/simdjson/simdjson_version.h automatically generated by release.py, do not change by hand
|
||||
#ifndef SIMDJSON_INCLUDE_SIMDJSON_VERSION
|
||||
#define SIMDJSON_INCLUDE_SIMDJSON_VERSION
|
||||
|
@ -10,8 +10,26 @@ enum {
|
|||
SIMDJSON_VERSION_REVISION = 1
|
||||
};
|
||||
#endif // SIMDJSON_INCLUDE_SIMDJSON_VERSION
|
||||
/* end file /Users/lemire/CVS/github/simdjson/include/simdjson/simdjson_version.h */
|
||||
/* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/portability.h */
|
||||
/* end file include/simdjson/simdjson_version.h */
|
||||
/* begin file include/simdjson/simdjson.h */
|
||||
#ifndef SIMDJSON_ERR_H
|
||||
# define SIMDJSON_ERR_H
|
||||
|
||||
#include <string>
|
||||
|
||||
struct simdjson {
|
||||
enum errorValues {
|
||||
SUCCESS = 0,
|
||||
CAPACITY, // This ParsedJson can't support a document that big
|
||||
MEMALLOC, // Error allocating memory, most likely out of memory
|
||||
TAPE_ERROR, // Something went wrong while writing to the tape
|
||||
};
|
||||
static const std::string& errorMsg(const int);
|
||||
};
|
||||
|
||||
#endif
|
||||
/* end file include/simdjson/simdjson.h */
|
||||
/* begin file include/simdjson/portability.h */
|
||||
#ifndef SIMDJSON_PORTABILITY_H
|
||||
#define SIMDJSON_PORTABILITY_H
|
||||
|
||||
|
@ -139,8 +157,8 @@ static inline void aligned_free(void *memblock) {
|
|||
}
|
||||
|
||||
#endif // SIMDJSON_PORTABILITY_H
|
||||
/* end file /Users/lemire/CVS/github/simdjson/include/simdjson/portability.h */
|
||||
/* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/common_defs.h */
|
||||
/* end file include/simdjson/portability.h */
|
||||
/* begin file include/simdjson/common_defs.h */
|
||||
#ifndef SIMDJSON_COMMON_DEFS_H
|
||||
#define SIMDJSON_COMMON_DEFS_H
|
||||
|
||||
|
@ -199,8 +217,8 @@ static inline void aligned_free(void *memblock) {
|
|||
#endif // MSC_VER
|
||||
|
||||
#endif // SIMDJSON_COMMON_DEFS_H
|
||||
/* end file /Users/lemire/CVS/github/simdjson/include/simdjson/common_defs.h */
|
||||
/* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/jsoncharutils.h */
|
||||
/* end file include/simdjson/common_defs.h */
|
||||
/* begin file include/simdjson/jsoncharutils.h */
|
||||
#ifndef SIMDJSON_JSONCHARUTILS_H
|
||||
#define SIMDJSON_JSONCHARUTILS_H
|
||||
|
||||
|
@ -322,8 +340,8 @@ inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) {
|
|||
}
|
||||
|
||||
#endif
|
||||
/* end file /Users/lemire/CVS/github/simdjson/include/simdjson/jsoncharutils.h */
|
||||
/* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/jsonformatutils.h */
|
||||
/* end file include/simdjson/jsoncharutils.h */
|
||||
/* begin file include/simdjson/jsonformatutils.h */
|
||||
#ifndef SIMDJSON_JSONFORMATUTILS_H
|
||||
#define SIMDJSON_JSONFORMATUTILS_H
|
||||
|
||||
|
@ -422,8 +440,8 @@ static inline void print_with_escapes(const char *src, std::ostream &os) {
|
|||
}
|
||||
|
||||
#endif
|
||||
/* end file /Users/lemire/CVS/github/simdjson/include/simdjson/jsonformatutils.h */
|
||||
/* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/jsonioutil.h */
|
||||
/* end file include/simdjson/jsonformatutils.h */
|
||||
/* begin file include/simdjson/jsonioutil.h */
|
||||
#ifndef SIMDJSON_JSONIOUTIL_H
|
||||
#define SIMDJSON_JSONIOUTIL_H
|
||||
|
||||
|
@ -463,8 +481,8 @@ std::string_view get_corpus(const std::string& filename);
|
|||
|
||||
|
||||
#endif
|
||||
/* end file /Users/lemire/CVS/github/simdjson/include/simdjson/jsonioutil.h */
|
||||
/* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/simdprune_tables.h */
|
||||
/* end file include/simdjson/jsonioutil.h */
|
||||
/* begin file include/simdjson/simdprune_tables.h */
|
||||
#ifndef SIMDJSON_SIMDPRUNE_TABLES_H
|
||||
#define SIMDJSON_SIMDPRUNE_TABLES_H
|
||||
|
||||
|
@ -35543,8 +35561,8 @@ static const uint32_t mask256_epi32[] = {
|
|||
#endif //__AVX2__
|
||||
|
||||
#endif
|
||||
/* end file /Users/lemire/CVS/github/simdjson/include/simdjson/simdprune_tables.h */
|
||||
/* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/simdutf8check.h */
|
||||
/* end file include/simdjson/simdprune_tables.h */
|
||||
/* begin file include/simdjson/simdutf8check.h */
|
||||
|
||||
#ifndef SIMDJSON_SIMDUTF8CHECK_H
|
||||
#define SIMDJSON_SIMDUTF8CHECK_H
|
||||
|
@ -35739,8 +35757,8 @@ avxcheckUTF8Bytes(__m256i current_bytes,
|
|||
#warning "We require AVX2 support!"
|
||||
#endif // __AVX2__
|
||||
#endif
|
||||
/* end file /Users/lemire/CVS/github/simdjson/include/simdjson/simdutf8check.h */
|
||||
/* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/jsonminifier.h */
|
||||
/* end file include/simdjson/simdutf8check.h */
|
||||
/* begin file include/simdjson/jsonminifier.h */
|
||||
#ifndef SIMDJSON_JSONMINIFIER_H
|
||||
#define SIMDJSON_JSONMINIFIER_H
|
||||
|
||||
|
@ -35763,8 +35781,8 @@ static inline size_t jsonminify(const std::string_view & p, char *out) {
|
|||
}
|
||||
|
||||
#endif
|
||||
/* end file /Users/lemire/CVS/github/simdjson/include/simdjson/jsonminifier.h */
|
||||
/* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/parsedjson.h */
|
||||
/* end file include/simdjson/jsonminifier.h */
|
||||
/* begin file include/simdjson/parsedjson.h */
|
||||
#ifndef SIMDJSON_PARSEDJSON_H
|
||||
#define SIMDJSON_PARSEDJSON_H
|
||||
|
||||
|
@ -36016,23 +36034,23 @@ inline void dumpbits32_always(uint32_t v, const std::string &msg) {
|
|||
|
||||
|
||||
#endif
|
||||
/* end file /Users/lemire/CVS/github/simdjson/include/simdjson/parsedjson.h */
|
||||
/* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/stage1_find_marks.h */
|
||||
/* end file include/simdjson/parsedjson.h */
|
||||
/* begin file include/simdjson/stage1_find_marks.h */
|
||||
#ifndef SIMDJSON_STAGE1_FIND_MARKS_H
|
||||
#define SIMDJSON_STAGE1_FIND_MARKS_H
|
||||
|
||||
|
||||
struct ParsedJson;
|
||||
|
||||
WARN_UNUSED
|
||||
bool find_structural_bits(const uint8_t *buf, size_t len, ParsedJson &pj);
|
||||
|
||||
WARN_UNUSED
|
||||
static inline bool find_structural_bits(const char *buf, size_t len, ParsedJson &pj) {
|
||||
return find_structural_bits(reinterpret_cast<const uint8_t *>(buf), len, pj);
|
||||
}
|
||||
bool find_structural_bits(const char *buf, size_t len, ParsedJson &pj);
|
||||
|
||||
#endif
|
||||
/* end file /Users/lemire/CVS/github/simdjson/include/simdjson/stage1_find_marks.h */
|
||||
/* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/stringparsing.h */
|
||||
/* end file include/simdjson/stage1_find_marks.h */
|
||||
/* begin file include/simdjson/stringparsing.h */
|
||||
#ifndef SIMDJSON_STRINGPARSING_H
|
||||
#define SIMDJSON_STRINGPARSING_H
|
||||
|
||||
|
@ -36223,8 +36241,8 @@ really_inline bool parse_string(const uint8_t *buf, UNUSED size_t len,
|
|||
|
||||
|
||||
#endif
|
||||
/* end file /Users/lemire/CVS/github/simdjson/include/simdjson/stringparsing.h */
|
||||
/* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/numberparsing.h */
|
||||
/* end file include/simdjson/stringparsing.h */
|
||||
/* begin file include/simdjson/numberparsing.h */
|
||||
#ifndef SIMDJSON_NUMBERPARSING_H
|
||||
#define SIMDJSON_NUMBERPARSING_H
|
||||
|
||||
|
@ -36741,43 +36759,39 @@ static really_inline bool parse_number(const uint8_t *const buf,
|
|||
}
|
||||
|
||||
#endif
|
||||
/* end file /Users/lemire/CVS/github/simdjson/include/simdjson/numberparsing.h */
|
||||
/* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/stage2_build_tape.h */
|
||||
/* end file include/simdjson/numberparsing.h */
|
||||
/* begin file include/simdjson/stage2_build_tape.h */
|
||||
#ifndef SIMDJSON_STAGE2_BUILD_TAPE_H
|
||||
#define SIMDJSON_STAGE2_BUILD_TAPE_H
|
||||
|
||||
|
||||
struct ParsedJson;
|
||||
|
||||
void init_state_machine();
|
||||
|
||||
WARN_UNUSED
|
||||
bool unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj);
|
||||
|
||||
int unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj);
|
||||
|
||||
WARN_UNUSED
|
||||
static inline bool unified_machine(const char *buf, size_t len, ParsedJson &pj) {
|
||||
return unified_machine(reinterpret_cast<const uint8_t *>(buf),len,pj);
|
||||
}
|
||||
int unified_machine(const char *buf, size_t len, ParsedJson &pj);
|
||||
|
||||
#endif
|
||||
/* end file /Users/lemire/CVS/github/simdjson/include/simdjson/stage2_build_tape.h */
|
||||
/* begin file /Users/lemire/CVS/github/simdjson/include/simdjson/jsonparser.h */
|
||||
/* end file include/simdjson/stage2_build_tape.h */
|
||||
/* begin file include/simdjson/jsonparser.h */
|
||||
#ifndef SIMDJSON_JSONPARSER_H
|
||||
#define SIMDJSON_JSONPARSER_H
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Parse a document found in buf, need to preallocate ParsedJson.
|
||||
// Return false in case of a failure. You can also check validity
|
||||
// by calling pj.isValid(). The same ParsedJson can be reused for other documents.
|
||||
// Return 0 on success, an error code from simdjson/simdjson.h otherwise
|
||||
// You can also check validit by calling pj.isValid(). The same ParsedJson can be reused for other documents.
|
||||
//
|
||||
// If reallocifneeded is true (default) then a temporary buffer is created when needed during processing
|
||||
// (a copy of the input string is made).
|
||||
// The input buf should be readable up to buf + len + SIMDJSON_PADDING if reallocifneeded is false,
|
||||
// all bytes at and after buf + len are ignored (can be garbage).
|
||||
WARN_UNUSED
|
||||
bool json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifneeded = true);
|
||||
int json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifneeded = true);
|
||||
|
||||
// Parse a document found in buf, need to preallocate ParsedJson.
|
||||
// Return false in case of a failure. You can also check validity
|
||||
|
@ -36788,7 +36802,7 @@ bool json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifne
|
|||
// The input buf should be readable up to buf + len + SIMDJSON_PADDING if reallocifneeded is false,
|
||||
// all bytes at and after buf + len are ignored (can be garbage).
|
||||
WARN_UNUSED
|
||||
inline bool json_parse(const char * buf, size_t len, ParsedJson &pj, bool reallocifneeded = true) {
|
||||
inline int json_parse(const char * buf, size_t len, ParsedJson &pj, bool reallocifneeded = true) {
|
||||
return json_parse(reinterpret_cast<const uint8_t *>(buf), len, pj, reallocifneeded);
|
||||
}
|
||||
|
||||
|
@ -36801,7 +36815,7 @@ inline bool json_parse(const char * buf, size_t len, ParsedJson &pj, bool reallo
|
|||
// the input s should be readable up to s.data() + s.size() + SIMDJSON_PADDING if reallocifneeded is false,
|
||||
// all bytes at and after s.data()+s.size() are ignored (can be garbage).
|
||||
WARN_UNUSED
|
||||
inline bool json_parse(const std::string_view &s, ParsedJson &pj, bool reallocifneeded = true) {
|
||||
inline int json_parse(const std::string_view &s, ParsedJson &pj, bool reallocifneeded = true) {
|
||||
return json_parse(s.data(), s.size(), pj, reallocifneeded);
|
||||
}
|
||||
|
||||
|
@ -36840,4 +36854,4 @@ inline ParsedJson build_parsed_json(const std::string_view &s, bool reallocifnee
|
|||
}
|
||||
|
||||
#endif
|
||||
/* end file /Users/lemire/CVS/github/simdjson/include/simdjson/jsonparser.h */
|
||||
/* end file include/simdjson/jsonparser.h */
|
||||
|
|
|
@ -21,7 +21,9 @@ set(SIMDJSON_SRC
|
|||
stage1_find_marks.cpp
|
||||
stage2_build_tape.cpp
|
||||
parsedjson.cpp
|
||||
parsedjsoniterator.cpp)
|
||||
parsedjsoniterator.cpp
|
||||
simdjson.cpp
|
||||
)
|
||||
|
||||
add_library(${SIMDJSON_LIB_NAME} ${SIMDJSON_LIB_TYPE} ${SIMDJSON_SRC})
|
||||
target_include_directories(${SIMDJSON_LIB_NAME}
|
||||
|
|
|
@ -5,21 +5,13 @@
|
|||
#else
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#include "simdjson/simdjson.h"
|
||||
|
||||
// parse a document found in buf, need to preallocate ParsedJson.
|
||||
WARN_UNUSED
|
||||
bool json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifneeded) {
|
||||
int json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifneeded) {
|
||||
if (pj.bytecapacity < len) {
|
||||
std::cerr << "Your ParsedJson cannot support documents that big: " << len
|
||||
<< std::endl;
|
||||
return false;
|
||||
return simdjson::CAPACITY;
|
||||
}
|
||||
bool reallocated = false;
|
||||
if(reallocifneeded) {
|
||||
|
@ -33,24 +25,19 @@ bool json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifne
|
|||
#endif
|
||||
if ( (reinterpret_cast<uintptr_t>(buf + len - 1) % pagesize ) < SIMDJSON_PADDING ) {
|
||||
const uint8_t *tmpbuf = buf;
|
||||
buf = reinterpret_cast<uint8_t *>(allocate_padded_buffer(len));
|
||||
if(buf == nullptr) { return false;
|
||||
}
|
||||
buf = (uint8_t *) allocate_padded_buffer(len);
|
||||
if(buf == NULL) return simdjson::MEMALLOC;
|
||||
memcpy((void*)buf,tmpbuf,len);
|
||||
reallocated = true;
|
||||
}
|
||||
}
|
||||
bool isok = find_structural_bits(buf, len, pj);
|
||||
if (isok) {
|
||||
isok = unified_machine(buf, len, pj);
|
||||
} else {
|
||||
if(reallocated) { aligned_free((void*)buf);
|
||||
}
|
||||
return false;
|
||||
// find_structural_bits returns a boolean, not an int, we invert its result to keep consistent with res == 0 meaning success
|
||||
int res = !find_structural_bits(buf, len, pj);
|
||||
if (!res) {
|
||||
res = unified_machine(buf, len, pj);
|
||||
}
|
||||
if(reallocated) { aligned_free((void*)buf);
|
||||
}
|
||||
return isok;
|
||||
if(reallocated) { aligned_free((void*)buf);}
|
||||
return res;
|
||||
}
|
||||
|
||||
WARN_UNUSED
|
||||
|
@ -58,7 +45,8 @@ ParsedJson build_parsed_json(const uint8_t *buf, size_t len, bool reallocifneede
|
|||
ParsedJson pj;
|
||||
bool ok = pj.allocateCapacity(len);
|
||||
if(ok) {
|
||||
ok = json_parse(buf, len, pj, reallocifneeded);
|
||||
int res = json_parse(buf, len, pj, reallocifneeded);
|
||||
ok = res == simdjson::SUCCESS;
|
||||
assert(ok == pj.isValid());
|
||||
} else {
|
||||
std::cerr << "failure during memory allocation " << std::endl;
|
||||
|
|
|
@ -0,0 +1,13 @@
|
|||
#include <map>
|
||||
#include "simdjson/simdjson.h"
|
||||
|
||||
const std::map<int, const std::string> errorStrings = {
|
||||
{simdjson::SUCCESS, "No errors"},
|
||||
{simdjson::CAPACITY, "This ParsedJson can't support a document that big"},
|
||||
{simdjson::MEMALLOC, "Error allocating memory, we're most likely out of memory"},
|
||||
{simdjson::TAPE_ERROR, "Something went wrong while writing to the tape"}
|
||||
};
|
||||
|
||||
const std::string& simdjson::errorMsg(const int errorCode) {
|
||||
return errorStrings.at(errorCode);
|
||||
}
|
|
@ -454,3 +454,7 @@ WARN_UNUSED
|
|||
return true;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool find_structural_bits(const char *buf, size_t len, ParsedJson &pj) {
|
||||
return find_structural_bits(reinterpret_cast<const uint8_t*>(buf), len, pj);
|
||||
}
|
|
@ -13,6 +13,7 @@
|
|||
#include "simdjson/numberparsing.h"
|
||||
#include "simdjson/parsedjson.h"
|
||||
#include "simdjson/stringparsing.h"
|
||||
#include "simdjson/simdjson.h"
|
||||
|
||||
#include <iostream>
|
||||
#define PATH_SEP '/'
|
||||
|
@ -62,7 +63,7 @@ really_inline bool is_valid_null_atom(const uint8_t *loc) {
|
|||
* for documentation.
|
||||
***********/
|
||||
WARN_UNUSED
|
||||
bool unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
||||
int unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
||||
uint32_t i = 0; // index of the structural character (0,1,2,3...)
|
||||
uint32_t idx; // location of the structural character in the input (buf)
|
||||
uint8_t c; // used to track the (structural) character we are looking at, updated
|
||||
|
@ -70,8 +71,7 @@ bool unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
|||
uint32_t depth = 0; // could have an arbitrary starting depth
|
||||
pj.init();
|
||||
if(pj.bytecapacity < len) {
|
||||
fprintf(stderr, "insufficient capacity\n");
|
||||
return false;
|
||||
return simdjson::CAPACITY;
|
||||
}
|
||||
// this macro reads the next structural character, updating idx, i and c.
|
||||
#define UPDATE_CHAR() \
|
||||
|
@ -509,8 +509,12 @@ succeed:
|
|||
|
||||
|
||||
pj.isvalid = true;
|
||||
return true;
|
||||
return simdjson::SUCCESS;
|
||||
|
||||
fail:
|
||||
return false;
|
||||
return simdjson::TAPE_ERROR;
|
||||
}
|
||||
|
||||
int unified_machine(const char *buf, size_t len, ParsedJson &pj) {
|
||||
return unified_machine(reinterpret_cast<const uint8_t*>(buf), len, pj);
|
||||
}
|
||||
|
|
|
@ -79,24 +79,20 @@ bool validate(const char *dirname) {
|
|||
return false;
|
||||
}
|
||||
++howmany;
|
||||
bool isok = json_parse(p, pj);
|
||||
const int parseRes = json_parse(p, pj);
|
||||
aligned_free((void*)p.data());
|
||||
printf("%s\n", isok ? "ok" : "invalid");
|
||||
printf("%s\n", parseRes == 0 ? "ok" : "invalid");
|
||||
if(contains("EXCLUDE",name)) {
|
||||
// skipping
|
||||
howmany--;
|
||||
} else if (startsWith("pass", name)) {
|
||||
if (!isok) {
|
||||
} else if (startsWith("pass", name) && parseRes != 0) {
|
||||
isfileasexpected[i] = false;
|
||||
printf("warning: file %s should pass but it fails.\n", name);
|
||||
printf("warning: file %s should pass but it fails. Error is: %s\n", name, simdjson::errorMsg(parseRes).data());
|
||||
everythingfine = false;
|
||||
}
|
||||
} else if (startsWith("fail", name)) {
|
||||
if (isok) {
|
||||
} else if (startsWith("fail", name) && parseRes == 0) {
|
||||
isfileasexpected[i] = false;
|
||||
printf("warning: file %s should fail but it passes.\n", name);
|
||||
everythingfine = false;
|
||||
}
|
||||
}
|
||||
free(fullpath);
|
||||
}
|
||||
|
|
|
@ -9,8 +9,11 @@ int main() {
|
|||
return EXIT_FAILURE;
|
||||
}
|
||||
pj.allocateCapacity(p.size());
|
||||
bool is_ok = json_parse(p, pj);
|
||||
if (!is_ok) {return EXIT_FAILURE;}
|
||||
const int res = json_parse(p, pj);
|
||||
if (res) {
|
||||
std::cerr << simdjson::errorMsg(res) << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
free((void*)p.data());
|
||||
return EXIT_SUCCESS;
|
||||
}
|
|
@ -87,9 +87,9 @@ int main(int argc, char *argv[]) {
|
|||
std::cerr << "failed to allocate memory" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
bool is_ok = json_parse(p, pj); // do the parsing, return false on error
|
||||
int res = json_parse(p, pj); // do the parsing, return false on error
|
||||
aligned_free((void *)p.data());
|
||||
if (!is_ok) {
|
||||
if (res) {
|
||||
std::cerr << " Parsing failed. " << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
@ -101,7 +101,7 @@ int main(int argc, char *argv[]) {
|
|||
}
|
||||
compute_dump(pjh);
|
||||
} else {
|
||||
is_ok = rawdump ? pj.dump_raw_tape(std::cout) : pj.printjson(std::cout);
|
||||
const bool is_ok = rawdump ? pj.dump_raw_tape(std::cout) : pj.printjson(std::cout);
|
||||
if (!is_ok) {
|
||||
std::cerr << " Could not print out parsed result. " << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
|
|
Loading…
Reference in New Issue