Fix for issues 32, 50, 131, 137
* Improving portability. * Revisiting faulty logic regarding same-page overruns. * Disabling same-page overruns under VS. * Clarifying the documentation * Fix for issue 131 + being more explicit regarding memory realloc. * Fix for issue 137. * removing "using namespace std" throughout. Fix for 50 * Introducing typed malloc/free. * Introducing a custom class (padded_string) that solves several minor usability issues. * Updating amalgamation for testing.
This commit is contained in:
parent
c5a3f9ccd4
commit
e370a65383
47
README.md
47
README.md
|
@ -66,7 +66,7 @@ Under Windows, we build some tools using the windows/dirent_portable.h file (whi
|
|||
const char * filename = ... //
|
||||
|
||||
// use whatever means you want to get a string (UTF-8) of your JSON document
|
||||
std::string_view p = get_corpus(filename); // you are responsible for freeing p.data()
|
||||
padded_string p = get_corpus(filename);
|
||||
ParsedJson pj;
|
||||
pj.allocateCapacity(p.size()); // allocate memory for parsing up to p.size() bytes
|
||||
const int res = json_parse(p, pj); // do the parsing, return 0 on success
|
||||
|
@ -75,8 +75,6 @@ if (res != 0) {
|
|||
// You can use the "simdjson/simdjson.h" header to access the error message
|
||||
std::cout << "Error parsing:" << simdjson::errorMsg(res) << std::endl;
|
||||
}
|
||||
// You can safely delete the string content
|
||||
aligned_free((void*)p.data());
|
||||
// the ParsedJson document can be used here
|
||||
// pj can be reused with other json_parse calls.
|
||||
```
|
||||
|
@ -90,21 +88,49 @@ of memory allocation with each new JSON document:
|
|||
/...
|
||||
|
||||
const char * filename = ... //
|
||||
std::string_view p = get_corpus(filename);
|
||||
padding_string p = get_corpus(filename);
|
||||
ParsedJson pj = build_parsed_json(p); // do the parsing
|
||||
// you no longer need p at this point, can do aligned_free((void*)p.data())
|
||||
if( ! pj.isValid() ) {
|
||||
// something went wrong
|
||||
}
|
||||
aligned_free((void*)p.data());
|
||||
```
|
||||
|
||||
You can call `json_parse` and `build_parsed_json`, passing a standard `std::string` object.
|
||||
Though the `padded_string` class is recommended for best performance, you can call `json_parse` and `build_parsed_json`, passing a standard `std::string` object.
|
||||
|
||||
|
||||
## Memory overallocation `
|
||||
```C
|
||||
#include "simdjson/jsonparser.h"
|
||||
|
||||
As needed, the `json_parse` and `build_parsed_json` functions copy the input data to a temporary buffer readable up to SIMDJSON_PADDING bytes beyond the end of the data. To avoid this potentially expensive copy, overallocate your own input data and then call the `json_parse` and `build_parsed_json` functions with an extra parameter value set to `false` (e.g., `build_parsed_json(p,false)` and `parsed_json(p,pj,false)`). In such instance, no temporary copy is made. The `get_corpus` function does this automatically as well as the provide `char * allocate_padded_buffer(size_t length)` function to achieve the desired effect.
|
||||
/...
|
||||
std::string mystring = ... //
|
||||
ParsedJson pj;
|
||||
pj.allocateCapacity(mystring.size()); // allocate memory for parsing up to p.size() bytes
|
||||
// std::string may not overallocate so a copy will be needed
|
||||
const int res = json_parse(mystring, pj); // do the parsing, return 0 on success
|
||||
// parsing is done!
|
||||
if (res != 0) {
|
||||
// You can use the "simdjson/simdjson.h" header to access the error message
|
||||
std::cout << "Error parsing:" << simdjson::errorMsg(res) << std::endl;
|
||||
}
|
||||
// pj can be reused with other json_parse calls.
|
||||
```
|
||||
|
||||
or
|
||||
|
||||
```C
|
||||
#include "simdjson/jsonparser.h"
|
||||
|
||||
/...
|
||||
|
||||
std::string mystring = ... //
|
||||
// std::string may not overallocate so a copy will be needed
|
||||
ParsedJson pj = build_parsed_json(mystring); // do the parsing
|
||||
if( ! pj.isValid() ) {
|
||||
// something went wrong
|
||||
}
|
||||
```
|
||||
|
||||
As needed, the `json_parse` and `build_parsed_json` functions copy the input data to a temporary buffer readable up to SIMDJSON_PADDING bytes beyond the end of the data.
|
||||
|
||||
## Usage: easy single-header version
|
||||
|
||||
|
@ -118,14 +144,13 @@ copy the files in your project in your include path. You can then include them q
|
|||
#include "simdjson.cpp"
|
||||
int main(int argc, char *argv[]) {
|
||||
const char * filename = argv[1];
|
||||
std::string_view p = get_corpus(filename);
|
||||
padded_string p = get_corpus(filename);
|
||||
ParsedJson pj = build_parsed_json(p); // do the parsing
|
||||
if( ! pj.isValid() ) {
|
||||
std::cout << "not valid" << std::endl;
|
||||
} else {
|
||||
std::cout << "valid" << std::endl;
|
||||
}
|
||||
aligned_free((void*)p.data());
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
```
|
||||
|
|
|
@ -28,6 +28,7 @@ $SCRIPTPATH/include/simdjson/simdjson_version.h
|
|||
$SCRIPTPATH/include/simdjson/simdjson.h
|
||||
$SCRIPTPATH/include/simdjson/portability.h
|
||||
$SCRIPTPATH/include/simdjson/common_defs.h
|
||||
$SCRIPTPATH/include/simdjson/padded_string.h
|
||||
$SCRIPTPATH/include/simdjson/jsoncharutils.h
|
||||
$SCRIPTPATH/include/simdjson/jsonformatutils.h
|
||||
$SCRIPTPATH/include/simdjson/jsonioutil.h
|
||||
|
@ -100,7 +101,7 @@ cat <<< '
|
|||
#include "simdjson.cpp"
|
||||
int main(int argc, char *argv[]) {
|
||||
const char * filename = argv[1];
|
||||
std::string_view p = get_corpus(filename);
|
||||
padded_string p = get_corpus(filename);
|
||||
ParsedJson pj = build_parsed_json(p); // do the parsing
|
||||
if( ! pj.isValid() ) {
|
||||
std::cout << "not valid" << std::endl;
|
||||
|
|
|
@ -14,17 +14,16 @@
|
|||
#include "sajson.h"
|
||||
|
||||
using namespace rapidjson;
|
||||
using namespace std;
|
||||
|
||||
bool equals(const char *s1, const char *s2) { return strcmp(s1, s2) == 0; }
|
||||
|
||||
void remove_duplicates(vector<int64_t> &v) {
|
||||
void remove_duplicates(std::vector<int64_t> &v) {
|
||||
std::sort(v.begin(), v.end());
|
||||
auto last = std::unique(v.begin(), v.end());
|
||||
v.erase(last, v.end());
|
||||
}
|
||||
|
||||
void print_vec(vector<int64_t> &v) {
|
||||
void print_vec(const std::vector<int64_t> &v) {
|
||||
for (auto i : v) {
|
||||
std::cout << i << " ";
|
||||
}
|
||||
|
@ -73,7 +72,7 @@ void simdjson_traverse(std::vector<int64_t> &answer, ParsedJson::iterator &i) {
|
|||
}
|
||||
}
|
||||
|
||||
std::vector<int64_t> simdjson_computestats(const std::string_view &p) {
|
||||
std::vector<int64_t> simdjson_computestats(const padded_string &p) {
|
||||
std::vector<int64_t> answer;
|
||||
ParsedJson pj = build_parsed_json(p);
|
||||
if (!pj.isValid()) {
|
||||
|
@ -134,7 +133,7 @@ void sajson_traverse(std::vector<int64_t> &answer, const sajson::value &node) {
|
|||
}
|
||||
}
|
||||
|
||||
std::vector<int64_t> sasjon_computestats(const std::string_view &p) {
|
||||
std::vector<int64_t> sasjon_computestats(const padded_string &p) {
|
||||
std::vector<int64_t> answer;
|
||||
char *buffer = (char *)malloc(p.size());
|
||||
memcpy(buffer, p.data(), p.size());
|
||||
|
@ -187,7 +186,7 @@ void rapid_traverse(std::vector<int64_t> &answer, const rapidjson::Value &v) {
|
|||
}
|
||||
}
|
||||
|
||||
std::vector<int64_t> rapid_computestats(const std::string_view &p) {
|
||||
std::vector<int64_t> rapid_computestats(const padded_string &p) {
|
||||
std::vector<int64_t> answer;
|
||||
char *buffer = (char *)malloc(p.size() + 1);
|
||||
memcpy(buffer, p.data(), p.size());
|
||||
|
@ -220,19 +219,19 @@ int main(int argc, char *argv[]) {
|
|||
abort();
|
||||
}
|
||||
if (optind >= argc) {
|
||||
cerr << "Using different parsers, we compute the content statistics of "
|
||||
"JSON documents.\n";
|
||||
cerr << "Usage: " << argv[0] << " <jsonfile>\n";
|
||||
cerr << "Or " << argv[0] << " -v <jsonfile>\n";
|
||||
std::cerr << "Using different parsers, we compute the content statistics of "
|
||||
"JSON documents." << std::endl;
|
||||
std::cerr << "Usage: " << argv[0] << " <jsonfile>" << std::endl;
|
||||
std::cerr << "Or " << argv[0] << " -v <jsonfile>" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
const char *filename = argv[optind];
|
||||
if (optind + 1 < argc) {
|
||||
cerr << "warning: ignoring everything after " << argv[optind + 1] << endl;
|
||||
std::cerr << "warning: ignoring everything after " << argv[optind + 1] << std::endl;
|
||||
}
|
||||
std::string_view p;
|
||||
padded_string p;
|
||||
try {
|
||||
p = get_corpus(filename);
|
||||
get_corpus(filename).swap(p);
|
||||
} catch (const std::exception &e) { // caught by reference to base
|
||||
std::cout << "Could not load the file " << filename << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
|
@ -279,5 +278,4 @@ int main(int argc, char *argv[]) {
|
|||
!justdata);
|
||||
BEST_TIME("sasjon ", sasjon_computestats(p).size(), size, , repeat, volume,
|
||||
!justdata);
|
||||
aligned_free((void*)p.data());
|
||||
}
|
||||
|
|
|
@ -16,7 +16,6 @@
|
|||
|
||||
|
||||
using namespace rapidjson;
|
||||
using namespace std;
|
||||
|
||||
std::string rapidstringmeInsitu(char *json) {
|
||||
Document d;
|
||||
|
@ -62,13 +61,13 @@ int main(int argc, char *argv[]) {
|
|||
abort ();
|
||||
}
|
||||
if (optind >= argc) {
|
||||
cerr << "Usage: " << argv[0] << " <jsonfile>" << endl;
|
||||
std::cerr << "Usage: " << argv[0] << " <jsonfile>" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
const char * filename = argv[optind];
|
||||
std::string_view p;
|
||||
padded_string p;
|
||||
try {
|
||||
p = get_corpus(filename);
|
||||
get_corpus(filename).swap(p);
|
||||
} catch (const std::exception& e) { // caught by reference to base
|
||||
std::cout << "Could not load the file " << filename << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
|
@ -140,7 +139,8 @@ int main(int argc, char *argv[]) {
|
|||
fprintf(stderr, "failed to allocate memory\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
BEST_TIME("simdjson orig", json_parse((const uint8_t*)buffer, p.size(), pj), true, memcpy(buffer, p.data(), p.size()), repeat, volume, !justdata);
|
||||
bool automated_reallocation = false;
|
||||
BEST_TIME("simdjson orig", json_parse((const uint8_t*)buffer, p.size(), pj, automated_reallocation), true, memcpy(buffer, p.data(), p.size()), repeat, volume, !justdata);
|
||||
|
||||
ParsedJson pj2;
|
||||
bool isallocok2 = pj2.allocateCapacity(p.size(), 1024);
|
||||
|
@ -148,9 +148,8 @@ int main(int argc, char *argv[]) {
|
|||
fprintf(stderr, "failed to allocate memory\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
BEST_TIME("simdjson despaced", json_parse((const uint8_t*)buffer, minisize, pj2), true, memcpy(buffer, minibuffer, p.size()), repeat, volume, !justdata);
|
||||
aligned_free((void*)p.data());
|
||||
automated_reallocation = false;
|
||||
BEST_TIME("simdjson despaced", json_parse((const uint8_t*)buffer, minisize, pj2, automated_reallocation), true, memcpy(buffer, minibuffer, p.size()), repeat, volume, !justdata);
|
||||
free(buffer);
|
||||
free(ast_buffer);
|
||||
free(minibuffer);
|
||||
|
|
|
@ -33,7 +33,6 @@
|
|||
#include "simdjson/parsedjson.h"
|
||||
#include "simdjson/stage1_find_marks.h"
|
||||
#include "simdjson/stage2_build_tape.h"
|
||||
using namespace std;
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
bool verbose = false;
|
||||
|
@ -69,26 +68,26 @@ int main(int argc, char *argv[]) {
|
|||
int optind = 1;
|
||||
#endif
|
||||
if (optind >= argc) {
|
||||
cerr << "Usage: " << argv[0] << " <jsonfile>" << endl;
|
||||
std::cerr << "Usage: " << argv[0] << " <jsonfile>" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
const char *filename = argv[optind];
|
||||
if (optind + 1 < argc) {
|
||||
cerr << "warning: ignoring everything after " << argv[optind + 1] << endl;
|
||||
std::cerr << "warning: ignoring everything after " << argv[optind + 1] << std::endl;
|
||||
}
|
||||
if (verbose) {
|
||||
cout << "[verbose] loading " << filename << endl;
|
||||
}
|
||||
std::string_view p;
|
||||
std::cout << "[verbose] loading " << filename << std::endl;
|
||||
}
|
||||
padded_string p;
|
||||
try {
|
||||
p = get_corpus(filename);
|
||||
get_corpus(filename).swap(p);
|
||||
} catch (const std::exception &e) { // caught by reference to base
|
||||
std::cout << "Could not load the file " << filename << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
if (verbose) {
|
||||
cout << "[verbose] loaded " << filename << " (" << p.size() << " bytes)"
|
||||
<< endl;
|
||||
std::cout << "[verbose] loaded " << filename << " (" << p.size() << " bytes)"
|
||||
<< std::endl;
|
||||
}
|
||||
#if defined(DEBUG)
|
||||
const uint32_t iterations = 1;
|
||||
|
@ -96,7 +95,7 @@ int main(int argc, char *argv[]) {
|
|||
const uint32_t iterations =
|
||||
forceoneiteration ? 1 : (p.size() < 1 * 1000 * 1000 ? 1000 : 10);
|
||||
#endif
|
||||
vector<double> res;
|
||||
std::vector<double> res;
|
||||
res.resize(iterations);
|
||||
|
||||
#if !defined(__linux__)
|
||||
|
@ -107,14 +106,14 @@ int main(int argc, char *argv[]) {
|
|||
#endif
|
||||
|
||||
#ifndef SQUASH_COUNTERS
|
||||
vector<int> evts;
|
||||
std::vector<int> evts;
|
||||
evts.push_back(PERF_COUNT_HW_CPU_CYCLES);
|
||||
evts.push_back(PERF_COUNT_HW_INSTRUCTIONS);
|
||||
evts.push_back(PERF_COUNT_HW_BRANCH_MISSES);
|
||||
evts.push_back(PERF_COUNT_HW_CACHE_REFERENCES);
|
||||
evts.push_back(PERF_COUNT_HW_CACHE_MISSES);
|
||||
LinuxEvents<PERF_TYPE_HARDWARE> unified(evts);
|
||||
vector<unsigned long long> results;
|
||||
std::vector<unsigned long long> results;
|
||||
results.resize(evts.size());
|
||||
unsigned long cy0 = 0, cy1 = 0, cy2 = 0;
|
||||
unsigned long cl0 = 0, cl1 = 0, cl2 = 0;
|
||||
|
@ -126,8 +125,8 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
for (uint32_t i = 0; i < iterations; i++) {
|
||||
if (verbose) {
|
||||
cout << "[verbose] iteration # " << i << endl;
|
||||
}
|
||||
std::cout << "[verbose] iteration # " << i << std::endl;
|
||||
}
|
||||
#ifndef SQUASH_COUNTERS
|
||||
unified.start();
|
||||
#endif
|
||||
|
@ -135,7 +134,6 @@ int main(int argc, char *argv[]) {
|
|||
bool allocok = pj.allocateCapacity(p.size());
|
||||
if (!allocok) {
|
||||
std::cerr << "failed to allocate memory" << std::endl;
|
||||
aligned_free((void *)p.data());
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
#ifndef SQUASH_COUNTERS
|
||||
|
@ -147,7 +145,7 @@ int main(int argc, char *argv[]) {
|
|||
cmis0 += results[4];
|
||||
#endif
|
||||
if (verbose) {
|
||||
cout << "[verbose] allocated memory for parsed JSON " << endl;
|
||||
std::cout << "[verbose] allocated memory for parsed JSON " << std::endl;
|
||||
}
|
||||
|
||||
auto start = std::chrono::steady_clock::now();
|
||||
|
@ -163,7 +161,7 @@ int main(int argc, char *argv[]) {
|
|||
cref1 += results[3];
|
||||
cmis1 += results[4];
|
||||
if (!isok) {
|
||||
cout << "Failed during stage 1\n";
|
||||
std::cout << "Failed during stage 1" << std::endl;
|
||||
break;
|
||||
}
|
||||
unified.start();
|
||||
|
@ -178,7 +176,7 @@ int main(int argc, char *argv[]) {
|
|||
cref2 += results[3];
|
||||
cmis2 += results[4];
|
||||
if (!isok) {
|
||||
cout << "Failed during stage 2\n";
|
||||
std::cout << "Failed during stage 2" << std::endl;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
|
@ -190,7 +188,6 @@ int main(int argc, char *argv[]) {
|
|||
ParsedJson pj = build_parsed_json(p); // do the parsing again to get the stats
|
||||
if (!pj.isValid()) {
|
||||
std::cerr << "Could not parse. " << std::endl;
|
||||
aligned_free((void *)p.data());
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
#ifndef SQUASH_COUNTERS
|
||||
|
@ -202,7 +199,6 @@ int main(int argc, char *argv[]) {
|
|||
float cpbtotal = (double)total / (iterations * p.size());
|
||||
char *newfile = (char *)malloc(strlen(filename) + 1);
|
||||
if (newfile == NULL) {
|
||||
aligned_free((void *)p.data());
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
::strcpy(newfile, filename);
|
||||
|
@ -255,9 +251,9 @@ int main(int argc, char *argv[]) {
|
|||
#endif
|
||||
double min_result = *min_element(res.begin(), res.end());
|
||||
if (!justdata) {
|
||||
cout << "Min: " << min_result << " bytes read: " << p.size()
|
||||
std::cout << "Min: " << min_result << " bytes read: " << p.size()
|
||||
<< " Gigabytes/second: " << (p.size()) / (min_result * 1000000000.0)
|
||||
<< "\n";
|
||||
<< std::endl;
|
||||
}
|
||||
if (jsonoutput) {
|
||||
isok = isok && pj.printjson(std::cout);
|
||||
|
@ -265,7 +261,6 @@ int main(int argc, char *argv[]) {
|
|||
if (dump) {
|
||||
isok = isok && pj.dump_raw_tape(std::cout);
|
||||
}
|
||||
aligned_free((void *)p.data());
|
||||
if (!isok) {
|
||||
fprintf(stderr, " Parsing failed. \n ");
|
||||
return EXIT_FAILURE;
|
||||
|
|
|
@ -12,7 +12,6 @@
|
|||
#include "sajson.h"
|
||||
|
||||
using namespace rapidjson;
|
||||
using namespace std;
|
||||
|
||||
struct stat_s {
|
||||
size_t number_count;
|
||||
|
@ -45,7 +44,7 @@ void print_stat(const stat_t &s) {
|
|||
}
|
||||
|
||||
__attribute__ ((noinline))
|
||||
stat_t simdjson_computestats(const std::string_view &p) {
|
||||
stat_t simdjson_computestats(const padded_string &p) {
|
||||
stat_t answer;
|
||||
ParsedJson pj = build_parsed_json(p);
|
||||
answer.valid = pj.isValid();
|
||||
|
@ -147,7 +146,7 @@ void sajson_traverse(stat_t &stats, const sajson::value &node) {
|
|||
}
|
||||
|
||||
__attribute__ ((noinline))
|
||||
stat_t sasjon_computestats(const std::string_view &p) {
|
||||
stat_t sasjon_computestats(const padded_string &p) {
|
||||
stat_t answer;
|
||||
char *buffer = (char *)malloc(p.size());
|
||||
memcpy(buffer, p.data(), p.size());
|
||||
|
@ -205,7 +204,7 @@ void rapid_traverse(stat_t &stats, const rapidjson::Value &v) {
|
|||
}
|
||||
|
||||
__attribute__ ((noinline))
|
||||
stat_t rapid_computestats(const std::string_view &p) {
|
||||
stat_t rapid_computestats(const padded_string &p) {
|
||||
stat_t answer;
|
||||
char *buffer = (char *)malloc(p.size() + 1);
|
||||
memcpy(buffer, p.data(), p.size());
|
||||
|
@ -244,19 +243,19 @@ int main(int argc, char *argv[]) {
|
|||
abort();
|
||||
}
|
||||
if (optind >= argc) {
|
||||
cerr << "Using different parsers, we compute the content statistics of "
|
||||
"JSON documents.\n";
|
||||
cerr << "Usage: " << argv[0] << " <jsonfile>\n";
|
||||
cerr << "Or " << argv[0] << " -v <jsonfile>\n";
|
||||
std::cerr << "Using different parsers, we compute the content statistics of "
|
||||
"JSON documents." << std::endl;
|
||||
std::cerr << "Usage: " << argv[0] << " <jsonfile>" << std::endl;
|
||||
std::cerr << "Or " << argv[0] << " -v <jsonfile>" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
const char *filename = argv[optind];
|
||||
if (optind + 1 < argc) {
|
||||
cerr << "warning: ignoring everything after " << argv[optind + 1] << endl;
|
||||
std::cerr << "warning: ignoring everything after " << argv[optind + 1] << std::endl;
|
||||
}
|
||||
std::string_view p;
|
||||
padded_string p;
|
||||
try {
|
||||
p = get_corpus(filename);
|
||||
get_corpus(filename).swap(p);
|
||||
} catch (const std::exception &e) { // caught by reference to base
|
||||
std::cout << "Could not load the file " << filename << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
|
@ -300,5 +299,4 @@ int main(int argc, char *argv[]) {
|
|||
!justdata);
|
||||
BEST_TIME("sasjon ", sasjon_computestats(p).valid, true, , repeat, volume,
|
||||
!justdata);
|
||||
aligned_free((void*)p.data());
|
||||
}
|
||||
|
|
|
@ -44,7 +44,6 @@ extern "C" {
|
|||
#endif
|
||||
|
||||
using namespace rapidjson;
|
||||
using namespace std;
|
||||
|
||||
|
||||
#ifdef ALLPARSER
|
||||
|
@ -77,19 +76,19 @@ int main(int argc, char *argv[]) {
|
|||
abort();
|
||||
}
|
||||
if (optind >= argc) {
|
||||
cerr << "Usage: " << argv[0] << " <jsonfile>\n";
|
||||
cerr << "Or " << argv[0] << " -v <jsonfile>\n";
|
||||
cerr << "To enable parsers that are not standard compliant, use the -a "
|
||||
"flag\n";
|
||||
std::cerr << "Usage: " << argv[0] << " <jsonfile>" << std::endl;
|
||||
std::cerr << "Or " << argv[0] << " -v <jsonfile>" << std::endl;
|
||||
std::cerr << "To enable parsers that are not standard compliant, use the -a "
|
||||
"flag" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
const char *filename = argv[optind];
|
||||
if (optind + 1 < argc) {
|
||||
cerr << "warning: ignoring everything after " << argv[optind + 1] << endl;
|
||||
std::cerr << "warning: ignoring everything after " << argv[optind + 1] << std::endl;
|
||||
}
|
||||
std::string_view p;
|
||||
padded_string p;
|
||||
try {
|
||||
p = get_corpus(filename);
|
||||
get_corpus(filename).swap(p);
|
||||
} catch (const std::exception &e) { // caught by reference to base
|
||||
std::cout << "Could not load the file " << filename << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
|
@ -158,15 +157,15 @@ int main(int argc, char *argv[]) {
|
|||
true, memcpy(buffer, p.data(), p.size()), repeat, volume, !justdata);
|
||||
#ifdef __linux__
|
||||
if(!justdata) {
|
||||
vector<int> evts;
|
||||
std::vector<int> evts;
|
||||
evts.push_back(PERF_COUNT_HW_CPU_CYCLES);
|
||||
evts.push_back(PERF_COUNT_HW_INSTRUCTIONS);
|
||||
evts.push_back(PERF_COUNT_HW_BRANCH_MISSES);
|
||||
evts.push_back(PERF_COUNT_HW_CACHE_REFERENCES);
|
||||
evts.push_back(PERF_COUNT_HW_CACHE_MISSES);
|
||||
LinuxEvents<PERF_TYPE_HARDWARE> unified(evts);
|
||||
vector<unsigned long long> results;
|
||||
vector<unsigned long long> stats;
|
||||
std::vector<unsigned long long> results;
|
||||
std::vector<unsigned long long> stats;
|
||||
results.resize(evts.size());
|
||||
stats.resize(evts.size());
|
||||
std::fill(stats.begin(), stats.end(), 0);// unnecessary
|
||||
|
@ -227,10 +226,8 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
|
||||
|
||||
auto * tokens = make_unique<jsmntok_t[](p.size());
|
||||
if(tokens == NULL) {
|
||||
printf("Failed to alloc memory for jsmn\n");
|
||||
} else {
|
||||
{
|
||||
std::unique_ptr<jsmntok_t[]> tokens = std::make_unique<jsmntok_t[]>(p.size());
|
||||
jsmn_parser parser;
|
||||
jsmn_init(&parser);
|
||||
memcpy(buffer, p.data(), p.size());
|
||||
|
@ -239,7 +236,6 @@ int main(int argc, char *argv[]) {
|
|||
(jsmn_parse(&parser, buffer, p.size(), tokens.get(), p.size()) > 0), true,
|
||||
jsmn_init(&parser), repeat, volume, !justdata);
|
||||
}
|
||||
|
||||
memcpy(buffer, p.data(), p.size());
|
||||
buffer[p.size()] = '\0';
|
||||
cJSON * tree = cJSON_Parse(buffer);
|
||||
|
@ -260,7 +256,6 @@ int main(int argc, char *argv[]) {
|
|||
if(!justdata) BEST_TIME("memcpy ",
|
||||
(memcpy(buffer, p.data(), p.size()) == buffer), true, , repeat,
|
||||
volume, !justdata);
|
||||
aligned_free((void *)p.data());
|
||||
free(ast_buffer);
|
||||
free(buffer);
|
||||
}
|
||||
|
|
|
@ -8,8 +8,6 @@
|
|||
#include "linux-perf-events.h"
|
||||
#endif
|
||||
|
||||
using namespace std;
|
||||
|
||||
size_t count_nonasciibytes(const uint8_t *input, size_t length) {
|
||||
size_t count = 0;
|
||||
for (size_t i = 0; i < length; i++) {
|
||||
|
@ -44,7 +42,7 @@ struct stat_s {
|
|||
|
||||
using stat_t = struct stat_s;
|
||||
|
||||
stat_t simdjson_computestats(const std::string_view &p) {
|
||||
stat_t simdjson_computestats(const padded_string &p) {
|
||||
stat_t answer;
|
||||
ParsedJson pj = build_parsed_json(p);
|
||||
answer.valid = pj.isValid();
|
||||
|
@ -126,8 +124,8 @@ int main(int argc, char *argv[]) {
|
|||
int optind = 1;
|
||||
#endif
|
||||
if (optind >= argc) {
|
||||
cerr << "Reads json, prints stats. " << endl;
|
||||
cerr << "Usage: " << argv[0] << " <jsonfile>" << endl;
|
||||
std::cerr << "Reads json, prints stats. " << std::endl;
|
||||
std::cerr << "Usage: " << argv[0] << " <jsonfile>" << std::endl;
|
||||
|
||||
exit(1);
|
||||
}
|
||||
|
@ -136,9 +134,9 @@ int main(int argc, char *argv[]) {
|
|||
std::cerr << "warning: ignoring everything after " << argv[optind + 1]
|
||||
<< std::endl;
|
||||
}
|
||||
std::string_view p;
|
||||
padded_string p;
|
||||
try {
|
||||
p = get_corpus(filename);
|
||||
get_corpus(filename).swap(p);
|
||||
} catch (const std::exception &e) { // caught by reference to base
|
||||
std::cerr << "Could not load the file " << filename << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
|
@ -172,13 +170,13 @@ int main(int argc, char *argv[]) {
|
|||
return EXIT_FAILURE;
|
||||
}
|
||||
const uint32_t iterations = p.size() < 1 * 1000 * 1000 ? 1000 : 50;
|
||||
vector<int> evts;
|
||||
std::vector<int> evts;
|
||||
evts.push_back(PERF_COUNT_HW_CPU_CYCLES);
|
||||
evts.push_back(PERF_COUNT_HW_INSTRUCTIONS);
|
||||
LinuxEvents<PERF_TYPE_HARDWARE> unified(evts);
|
||||
unsigned long cy1 = 0, cy2 = 0;
|
||||
unsigned long cl1 = 0, cl2 = 0;
|
||||
vector<unsigned long long> results;
|
||||
std::vector<unsigned long long> results;
|
||||
results.resize(evts.size());
|
||||
for (uint32_t i = 0; i < iterations; i++) {
|
||||
unified.start();
|
||||
|
|
|
@ -50,20 +50,22 @@
|
|||
|
||||
#else
|
||||
|
||||
// The following is likely unnecessarily complex.
|
||||
#ifdef __SANITIZE_ADDRESS__
|
||||
#define ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER __attribute__((no_sanitize("address")))
|
||||
#elif defined(__has_feature)
|
||||
# if (__has_feature(address_sanitizer))
|
||||
#define ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER __attribute__((no_sanitize("address")))
|
||||
# endif
|
||||
#endif
|
||||
|
||||
// for non-Visual Studio compilers, we assume that same-page buffer overrun is fine:
|
||||
#ifndef ALLOW_SAME_PAGE_BUFFER_OVERRUN
|
||||
#define ALLOW_SAME_PAGE_BUFFER_OVERRUN
|
||||
#endif
|
||||
|
||||
// The following is likely unnecessarily complex.
|
||||
#ifdef __SANITIZE_ADDRESS__
|
||||
// we have GCC, stuck with https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67368
|
||||
#undef ALLOW_SAME_PAGE_BUFFER_OVERRUN
|
||||
#elif defined(__has_feature)
|
||||
// we have CLANG?
|
||||
# if (__has_feature(address_sanitizer))
|
||||
#define ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER __attribute__((no_sanitize("address")))
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#define really_inline inline __attribute__((always_inline, unused))
|
||||
#define never_inline inline __attribute__((noinline, unused))
|
||||
|
||||
|
|
|
@ -9,13 +9,7 @@
|
|||
#include <string>
|
||||
|
||||
|
||||
// low-level function to allocate memory with padding so we can read passed the "length" bytes
|
||||
// safely.
|
||||
// if you must provide a pointer to some data, create it with this function:
|
||||
// length is the max. size in bytes of the string
|
||||
// caller is responsible to free the memory (free(...))
|
||||
char * allocate_padded_buffer(size_t length);
|
||||
|
||||
#include "simdjson/padded_string.h"
|
||||
|
||||
|
||||
|
||||
|
@ -34,7 +28,7 @@ char * allocate_padded_buffer(size_t length);
|
|||
// aligned_free((void*)p.data());
|
||||
// std::cout << "Could not load the file " << filename << std::endl;
|
||||
// }
|
||||
std::string_view get_corpus(const std::string& filename);
|
||||
padded_string get_corpus(const std::string& filename);
|
||||
|
||||
|
||||
#endif
|
||||
|
|
|
@ -19,4 +19,8 @@ static inline size_t jsonminify(const std::string_view & p, char *out) {
|
|||
return jsonminify(p.data(), p.size(), out);
|
||||
}
|
||||
|
||||
static inline size_t jsonminify(const padded_string & p, char *out) {
|
||||
return jsonminify(p.data(), p.size(), out);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
#define SIMDJSON_JSONPARSER_H
|
||||
#include <string>
|
||||
#include "simdjson/common_defs.h"
|
||||
#include "simdjson/padded_string.h"
|
||||
#include "simdjson/jsonioutil.h"
|
||||
#include "simdjson/parsedjson.h"
|
||||
#include "simdjson/stage1_find_marks.h"
|
||||
|
@ -46,10 +47,10 @@ inline int json_parse(const char * buf, size_t len, ParsedJson &pj, bool realloc
|
|||
// The input s should be readable up to s.data() + s.size() + SIMDJSON_PADDING if reallocifneeded is false,
|
||||
// all bytes at and after s.data()+s.size() are ignored (can be garbage).
|
||||
// The ParsedJson object can be reused.
|
||||
WARN_UNUSED
|
||||
inline int json_parse(const std::string_view &s, ParsedJson &pj, bool reallocifneeded = true) {
|
||||
return json_parse(s.data(), s.size(), pj, reallocifneeded);
|
||||
}
|
||||
//WARN_UNUSED
|
||||
//inline int json_parse(const std::string_view &s, ParsedJson &pj, bool reallocifneeded = true) {
|
||||
// return json_parse(s.data(), s.size(), pj, reallocifneeded);
|
||||
//}
|
||||
|
||||
|
||||
|
||||
|
@ -65,6 +66,14 @@ inline int json_parse(const std::string &s, ParsedJson &pj) {
|
|||
return json_parse(s.data(), s.length(), pj, true);
|
||||
}
|
||||
|
||||
// Parse a document found in in string s.
|
||||
// You need to preallocate ParsedJson with a capacity of len (e.g., pj.allocateCapacity(len)).
|
||||
// Return SUCCESS (an integer = 1) in case of a success. You can also check validity
|
||||
// by calling pj.isValid(). The same ParsedJson can be reused for other documents.
|
||||
WARN_UNUSED
|
||||
inline int json_parse(const padded_string &s, ParsedJson &pj) {
|
||||
return json_parse(s.data(), s.length(), pj, false);
|
||||
}
|
||||
|
||||
|
||||
// Build a ParsedJson object. You can check validity
|
||||
|
@ -96,9 +105,9 @@ WARN_UNUSED
|
|||
// (a copy of the input string is made).
|
||||
// The input s should be readable up to s.data() + s.size() + SIMDJSON_PADDING if reallocifneeded is false,
|
||||
// all bytes at and after s.data()+s.size() are ignored (can be garbage).
|
||||
inline ParsedJson build_parsed_json(const std::string_view &s, bool reallocifneeded = true) {
|
||||
return build_parsed_json(s.data(), s.size(), reallocifneeded);
|
||||
}
|
||||
//inline ParsedJson build_parsed_json(const std::string_view &s, bool reallocifneeded = true) {
|
||||
// return build_parsed_json(s.data(), s.size(), reallocifneeded);
|
||||
//}
|
||||
|
||||
// Parse a document found in in string s.
|
||||
// You need to preallocate ParsedJson with a capacity of len (e.g., pj.allocateCapacity(len)).
|
||||
|
@ -113,6 +122,15 @@ inline ParsedJson build_parsed_json(const std::string &s) {
|
|||
}
|
||||
|
||||
|
||||
// Parse a document found in in string s.
|
||||
// You need to preallocate ParsedJson with a capacity of len (e.g., pj.allocateCapacity(len)).
|
||||
// Return SUCCESS (an integer = 1) in case of a success. You can also check validity
|
||||
// by calling pj.isValid(). The same ParsedJson can be reused for other documents.
|
||||
WARN_UNUSED
|
||||
inline ParsedJson build_parsed_json(const padded_string &s) {
|
||||
return build_parsed_json(s.data(), s.length(), false);
|
||||
}
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
|
|
@ -99,7 +99,7 @@ const bool structural_or_whitespace_or_exponent_or_decimal_negated[256] = {
|
|||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
|
||||
|
||||
really_inline bool
|
||||
is_not_structural_or_whitespace_or_exponent_or_decimal(unsigned char c) {
|
||||
is_not_structural_or_whitespace_or_exponent_or_decimal_or_null(unsigned char c) {
|
||||
return structural_or_whitespace_or_exponent_or_decimal_negated[c];
|
||||
}
|
||||
|
||||
|
@ -115,6 +115,9 @@ is_not_structural_or_whitespace_or_exponent_or_decimal(unsigned char c) {
|
|||
// http://0x80.pl/articles/swar-digits-validate.html
|
||||
static inline bool is_made_of_eight_digits_fast(const char *chars) {
|
||||
uint64_t val;
|
||||
// this can read up to 7 bytes beyond the buffer size, but we require
|
||||
// SIMDJSON_PADDING of padding
|
||||
static_assert(7 <= SIMDJSON_PADDING);
|
||||
memcpy(&val, chars, 8);
|
||||
// a branchy method might be faster:
|
||||
// return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030)
|
||||
|
@ -128,6 +131,9 @@ static inline bool is_made_of_eight_digits_fast(const char *chars) {
|
|||
// this is more efficient apparently than the scalar code above (fewer instructions)
|
||||
static inline bool is_made_of_eight_digits_fast(const char *chars) {
|
||||
__m64 val;
|
||||
// this can read up to 7 bytes beyond the buffer size, but we require
|
||||
// SIMDJSON_PADDING of padding
|
||||
static_assert(7 <= SIMDJSON_PADDING);
|
||||
memcpy(&val, chars, 8);
|
||||
__m64 base = _mm_sub_pi8(val,_mm_set1_pi8('0'));
|
||||
__m64 basecmp = _mm_subs_pu8(base,_mm_set1_pi8(9));
|
||||
|
@ -388,7 +394,7 @@ static really_inline bool parse_number(const uint8_t *const buf,
|
|||
uint64_t i; // an unsigned int avoids signed overflows (which are bad)
|
||||
if (*p == '0') { // 0 cannot be followed by an integer
|
||||
++p;
|
||||
if (is_not_structural_or_whitespace_or_exponent_or_decimal(*p)) {
|
||||
if (is_not_structural_or_whitespace_or_exponent_or_decimal_or_null(*p)) {
|
||||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
foundInvalidNumber(buf + offset);
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,67 @@
|
|||
#ifndef SIMDJSON_PADDING_STRING_H
|
||||
#define SIMDJSON_PADDING_STRING_H
|
||||
#include "simdjson/portability.h"
|
||||
#include <memory>
|
||||
#include <cstring>
|
||||
// low-level function to allocate memory with padding so we can read passed the
|
||||
// "length" bytes safely. if you must provide a pointer to some data, create it
|
||||
// with this function: length is the max. size in bytes of the string caller is
|
||||
// responsible to free the memory (free(...))
|
||||
char *allocate_padded_buffer(size_t length);
|
||||
|
||||
// Simple string with padded allocation.
|
||||
// We deliberately forbid copies, users should rely on swap or move
|
||||
// constructors.
|
||||
class padded_string {
|
||||
public:
|
||||
explicit padded_string() noexcept : viable_size(0), data_ptr(nullptr) {}
|
||||
explicit padded_string(size_t length) noexcept
|
||||
: viable_size(length), data_ptr(allocate_padded_buffer(length)) {
|
||||
|
||||
if (data_ptr != nullptr)
|
||||
data_ptr[length] = '\0'; // easier when you need a c_str
|
||||
}
|
||||
explicit padded_string(char *data, size_t length) noexcept
|
||||
: viable_size(length), data_ptr(allocate_padded_buffer(length)) {
|
||||
if (data_ptr != nullptr) {
|
||||
memcpy(data_ptr, data, length);
|
||||
data_ptr[length] = '\0'; // easier when you need a c_str
|
||||
}
|
||||
}
|
||||
padded_string(std::string s) noexcept
|
||||
: viable_size(s.size()), data_ptr(allocate_padded_buffer(s.size())) {
|
||||
if (data_ptr != nullptr) {
|
||||
memcpy(data_ptr, s.data(), s.size());
|
||||
data_ptr[s.size()] = '\0'; // easier when you need a c_str
|
||||
}
|
||||
}
|
||||
padded_string(padded_string &&o) noexcept
|
||||
: viable_size(o.viable_size), data_ptr(o.data_ptr) {
|
||||
o.data_ptr = nullptr; // we take ownership
|
||||
}
|
||||
void swap(padded_string &o) {
|
||||
size_t tmp_viable_size = viable_size;
|
||||
char *tmp_data_ptr = data_ptr;
|
||||
viable_size = o.viable_size;
|
||||
data_ptr = o.data_ptr;
|
||||
o.data_ptr = tmp_data_ptr;
|
||||
o.viable_size = tmp_viable_size;
|
||||
}
|
||||
|
||||
~padded_string() { aligned_free_char(data_ptr); }
|
||||
|
||||
size_t size() const { return viable_size; }
|
||||
|
||||
size_t length() const { return viable_size; }
|
||||
|
||||
char *data() const { return data_ptr; }
|
||||
|
||||
private:
|
||||
padded_string &operator=(const padded_string &o) = delete;
|
||||
padded_string(const padded_string &o) = delete;
|
||||
|
||||
size_t viable_size;
|
||||
char *data_ptr;
|
||||
};
|
||||
|
||||
#endif
|
|
@ -95,6 +95,9 @@ static inline void *aligned_malloc(size_t alignment, size_t size) {
|
|||
return p;
|
||||
}
|
||||
|
||||
static inline char *aligned_malloc_char(size_t alignment, size_t size) {
|
||||
return (char*)aligned_malloc(alignment, size);
|
||||
}
|
||||
|
||||
#ifdef __AVX2__
|
||||
|
||||
|
@ -131,4 +134,10 @@ static inline void aligned_free(void *memblock) {
|
|||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
static inline void aligned_free_char(char *memblock) {
|
||||
aligned_free((void*)memblock);
|
||||
}
|
||||
|
||||
#endif // SIMDJSON_PORTABILITY_H
|
||||
|
|
|
@ -89,6 +89,9 @@ really_inline bool parse_string(UNUSED const uint8_t *buf, UNUSED size_t len,
|
|||
const uint8_t *const start_of_string = dst;
|
||||
while (1) {
|
||||
#ifdef __AVX2__
|
||||
// this can read up to 31 bytes beyond the buffer size, but we require
|
||||
// SIMDJSON_PADDING of padding
|
||||
static_assert(sizeof(__m256i) - 1 <= SIMDJSON_PADDING);
|
||||
__m256i v = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src));
|
||||
// store to dest unconditionally - we can overwrite the bits we don't like
|
||||
// later
|
||||
|
@ -99,6 +102,9 @@ really_inline bool parse_string(UNUSED const uint8_t *buf, UNUSED size_t len,
|
|||
auto quote_bits =
|
||||
static_cast<uint32_t>(_mm256_movemask_epi8(quote_mask));
|
||||
#else
|
||||
// this can read up to 31 bytes beyond the buffer size, but we require
|
||||
// SIMDJSON_PADDING of padding
|
||||
static_assert(2 * sizeof(uint8x16_t) - 1 <= SIMDJSON_PADDING);
|
||||
uint8x16_t v0 = vld1q_u8(src);
|
||||
uint8x16_t v1 = vld1q_u8(src+16);
|
||||
vst1q_u8(dst, v0);
|
||||
|
|
|
@ -1 +1 @@
|
|||
0
|
||||
0
|
|
@ -1,11 +1,11 @@
|
|||
/* auto-generated on Wed 13 Mar 2019 21:02:37 EDT. Do not edit! */
|
||||
/* auto-generated on Thu May 9 17:40:56 EDT 2019. Do not edit! */
|
||||
|
||||
#include <iostream>
|
||||
#include "simdjson.h"
|
||||
#include "simdjson.cpp"
|
||||
int main(int argc, char *argv[]) {
|
||||
const char * filename = argv[1];
|
||||
std::string_view p = get_corpus(filename);
|
||||
padded_string p = get_corpus(filename);
|
||||
ParsedJson pj = build_parsed_json(p); // do the parsing
|
||||
if( ! pj.isValid() ) {
|
||||
std::cout << "not valid" << std::endl;
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* auto-generated on Wed 13 Mar 2019 21:02:37 EDT. Do not edit! */
|
||||
/* auto-generated on Thu May 9 17:40:56 EDT 2019. Do not edit! */
|
||||
#include "simdjson.h"
|
||||
|
||||
/* used for http://dmalloc.com/ Dmalloc - Debug Malloc Library */
|
||||
|
@ -15,28 +15,27 @@ char * allocate_padded_buffer(size_t length) {
|
|||
//return (char *) malloc(length + SIMDJSON_PADDING);
|
||||
// However, we might as well align to cache lines...
|
||||
size_t totalpaddedlength = length + SIMDJSON_PADDING;
|
||||
char *padded_buffer = (char *) aligned_malloc(64, totalpaddedlength);
|
||||
char *padded_buffer = aligned_malloc_char(64, totalpaddedlength);
|
||||
return padded_buffer;
|
||||
}
|
||||
|
||||
std::string_view get_corpus(const std::string& filename) {
|
||||
padded_string get_corpus(const std::string& filename) {
|
||||
std::FILE *fp = std::fopen(filename.c_str(), "rb");
|
||||
if (fp != nullptr) {
|
||||
std::fseek(fp, 0, SEEK_END);
|
||||
size_t len = std::ftell(fp);
|
||||
char * buf = allocate_padded_buffer(len);
|
||||
if(buf == nullptr) {
|
||||
padded_string s(len);
|
||||
if(s.data() == nullptr) {
|
||||
std::fclose(fp);
|
||||
throw std::runtime_error("could not allocate memory");
|
||||
}
|
||||
std::rewind(fp);
|
||||
size_t readb = std::fread(buf, 1, len, fp);
|
||||
size_t readb = std::fread(s.data(), 1, len, fp);
|
||||
std::fclose(fp);
|
||||
if(readb != len) {
|
||||
aligned_free(buf);
|
||||
throw std::runtime_error("could not read the data");
|
||||
}
|
||||
return std::string_view(buf,len);
|
||||
return s;
|
||||
}
|
||||
throw std::runtime_error("could not load corpus");
|
||||
}
|
||||
|
@ -308,16 +307,27 @@ int json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifnee
|
|||
}
|
||||
bool reallocated = false;
|
||||
if(reallocifneeded) {
|
||||
// realloc is needed if the end of the memory crosses a page
|
||||
#ifdef ALLOW_SAME_PAGE_BUFFER_OVERRUN
|
||||
// realloc is needed if the end of the memory crosses a page
|
||||
#ifdef _MSC_VER
|
||||
SYSTEM_INFO sysInfo;
|
||||
GetSystemInfo(&sysInfo);
|
||||
long pagesize = sysInfo.dwPageSize;
|
||||
#else
|
||||
long pagesize = sysconf (_SC_PAGESIZE);
|
||||
long pagesize = sysconf (_SC_PAGESIZE);
|
||||
#endif
|
||||
if ( (reinterpret_cast<uintptr_t>(buf + len - 1) % pagesize ) < SIMDJSON_PADDING ) {
|
||||
const uint8_t *tmpbuf = buf;
|
||||
//////////////
|
||||
// We want to check that buf + len - 1 and buf + len - 1 + SIMDJSON_PADDING
|
||||
// are in the same page.
|
||||
// That is, we want to check that
|
||||
// (buf + len - 1) / pagesize == (buf + len - 1 + SIMDJSON_PADDING) / pagesize
|
||||
// That's true if (buf + len - 1) % pagesize + SIMDJSON_PADDING < pagesize.
|
||||
///////////
|
||||
if ( (reinterpret_cast<uintptr_t>(buf + len - 1) % pagesize ) + SIMDJSON_PADDING < static_cast<uintptr_t>(pagesize) ) {
|
||||
#else // SIMDJSON_SAFE_SAME_PAGE_READ_OVERRUN
|
||||
if(true) { // if not SIMDJSON_SAFE_SAME_PAGE_READ_OVERRUN, we always reallocate
|
||||
#endif
|
||||
const uint8_t *tmpbuf = buf;
|
||||
buf = (uint8_t *) allocate_padded_buffer(len);
|
||||
if(buf == NULL) return simdjson::MEMALLOC;
|
||||
memcpy((void*)buf,tmpbuf,len);
|
||||
|
@ -350,8 +360,18 @@ ParsedJson build_parsed_json(const uint8_t *buf, size_t len, bool reallocifneede
|
|||
/* begin file src/stage1_find_marks.cpp */
|
||||
#include <cassert>
|
||||
|
||||
|
||||
#ifdef __AVX2__
|
||||
|
||||
#ifndef SIMDJSON_SKIPUTF8VALIDATION
|
||||
#define SIMDJSON_UTF8VALIDATE
|
||||
|
||||
#endif
|
||||
#else
|
||||
// currently we don't UTF8 validate for ARM
|
||||
// also we assume that if you're not __AVX2__
|
||||
// you're ARM, which is a bit dumb. TODO: Fix...
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
// It seems that many parsers do UTF-8 validation.
|
||||
|
@ -359,13 +379,51 @@ ParsedJson build_parsed_json(const uint8_t *buf, size_t len, bool reallocifneede
|
|||
// allows it.
|
||||
#ifdef SIMDJSON_UTF8VALIDATE
|
||||
#endif
|
||||
using namespace std;
|
||||
|
||||
really_inline void check_utf8(__m256i input_lo, __m256i input_hi,
|
||||
#define TRANSPOSE
|
||||
|
||||
struct simd_input {
|
||||
#ifdef __AVX2__
|
||||
__m256i lo;
|
||||
__m256i hi;
|
||||
#elif defined(__ARM_NEON)
|
||||
#ifndef TRANSPOSE
|
||||
uint8x16_t i0;
|
||||
uint8x16_t i1;
|
||||
uint8x16_t i2;
|
||||
uint8x16_t i3;
|
||||
#else
|
||||
uint8x16x4_t i;
|
||||
#endif
|
||||
#else
|
||||
#error "It's called SIMDjson for a reason, bro"
|
||||
#endif
|
||||
};
|
||||
|
||||
really_inline simd_input fill_input(const uint8_t * ptr) {
|
||||
struct simd_input in;
|
||||
#ifdef __AVX2__
|
||||
in.lo = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptr + 0));
|
||||
in.hi = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptr + 32));
|
||||
#elif defined(__ARM_NEON)
|
||||
#ifndef TRANSPOSE
|
||||
in.i0 = vld1q_u8(ptr + 0);
|
||||
in.i1 = vld1q_u8(ptr + 16);
|
||||
in.i2 = vld1q_u8(ptr + 32);
|
||||
in.i3 = vld1q_u8(ptr + 48);
|
||||
#else
|
||||
in.i = vld4q_u8(ptr);
|
||||
#endif
|
||||
#endif
|
||||
return in;
|
||||
}
|
||||
|
||||
#ifdef SIMDJSON_UTF8VALIDATE
|
||||
really_inline void check_utf8(simd_input in,
|
||||
__m256i &has_error,
|
||||
struct avx_processed_utf_bytes &previous) {
|
||||
__m256i highbit = _mm256_set1_epi8(0x80);
|
||||
if ((_mm256_testz_si256(_mm256_or_si256(input_lo, input_hi), highbit)) == 1) {
|
||||
if ((_mm256_testz_si256(_mm256_or_si256(in.lo, in.hi), highbit)) == 1) {
|
||||
// it is ascii, we just check continuation
|
||||
has_error = _mm256_or_si256(
|
||||
_mm256_cmpgt_epi8(
|
||||
|
@ -375,30 +433,101 @@ really_inline void check_utf8(__m256i input_lo, __m256i input_hi,
|
|||
has_error);
|
||||
} else {
|
||||
// it is not ascii so we have to do heavy work
|
||||
previous = avxcheckUTF8Bytes(input_lo, &previous, &has_error);
|
||||
previous = avxcheckUTF8Bytes(input_hi, &previous, &has_error);
|
||||
previous = avxcheckUTF8Bytes(in.lo, &previous, &has_error);
|
||||
previous = avxcheckUTF8Bytes(in.hi, &previous, &has_error);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __ARM_NEON
|
||||
uint16_t neonmovemask(uint8x16_t input) {
|
||||
const uint8x16_t bitmask = { 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
|
||||
0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80};
|
||||
uint8x16_t minput = vandq_u8(input, bitmask);
|
||||
uint8x16_t tmp = vpaddq_u8(minput, minput);
|
||||
tmp = vpaddq_u8(tmp, tmp);
|
||||
tmp = vpaddq_u8(tmp, tmp);
|
||||
return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0);
|
||||
}
|
||||
|
||||
really_inline
|
||||
uint64_t neonmovemask_bulk(uint8x16_t p0, uint8x16_t p1, uint8x16_t p2, uint8x16_t p3) {
|
||||
#ifndef TRANSPOSE
|
||||
const uint8x16_t bitmask = { 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
|
||||
0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80};
|
||||
uint8x16_t t0 = vandq_u8(p0, bitmask);
|
||||
uint8x16_t t1 = vandq_u8(p1, bitmask);
|
||||
uint8x16_t t2 = vandq_u8(p2, bitmask);
|
||||
uint8x16_t t3 = vandq_u8(p3, bitmask);
|
||||
uint8x16_t sum0 = vpaddq_u8(t0, t1);
|
||||
uint8x16_t sum1 = vpaddq_u8(t2, t3);
|
||||
sum0 = vpaddq_u8(sum0, sum1);
|
||||
sum0 = vpaddq_u8(sum0, sum0);
|
||||
return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0);
|
||||
#else
|
||||
const uint8x16_t bitmask1 = { 0x01, 0x10, 0x01, 0x10, 0x01, 0x10, 0x01, 0x10,
|
||||
0x01, 0x10, 0x01, 0x10, 0x01, 0x10, 0x01, 0x10};
|
||||
const uint8x16_t bitmask2 = { 0x02, 0x20, 0x02, 0x20, 0x02, 0x20, 0x02, 0x20,
|
||||
0x02, 0x20, 0x02, 0x20, 0x02, 0x20, 0x02, 0x20};
|
||||
const uint8x16_t bitmask3 = { 0x04, 0x40, 0x04, 0x40, 0x04, 0x40, 0x04, 0x40,
|
||||
0x04, 0x40, 0x04, 0x40, 0x04, 0x40, 0x04, 0x40};
|
||||
const uint8x16_t bitmask4 = { 0x08, 0x80, 0x08, 0x80, 0x08, 0x80, 0x08, 0x80,
|
||||
0x08, 0x80, 0x08, 0x80, 0x08, 0x80, 0x08, 0x80};
|
||||
#if 0
|
||||
uint8x16_t t0 = vandq_u8(p0, bitmask1);
|
||||
uint8x16_t t1 = vandq_u8(p1, bitmask2);
|
||||
uint8x16_t t2 = vandq_u8(p2, bitmask3);
|
||||
uint8x16_t t3 = vandq_u8(p3, bitmask4);
|
||||
uint8x16_t tmp = vorrq_u8(vorrq_u8(t0, t1), vorrq_u8(t2, t3));
|
||||
#else
|
||||
uint8x16_t t0 = vandq_u8(p0, bitmask1);
|
||||
uint8x16_t t1 = vbslq_u8(bitmask2, p1, t0);
|
||||
uint8x16_t t2 = vbslq_u8(bitmask3, p2, t1);
|
||||
uint8x16_t tmp = vbslq_u8(bitmask4, p3, t2);
|
||||
#endif
|
||||
uint8x16_t sum = vpaddq_u8(tmp, tmp);
|
||||
return vgetq_lane_u64(vreinterpretq_u64_u8(sum), 0);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
// a straightforward comparison of a mask against input. 5 uops; would be
|
||||
// cheaper in AVX512.
|
||||
really_inline uint64_t cmp_mask_against_input(__m256i input_lo,
|
||||
__m256i input_hi, __m256i mask) {
|
||||
__m256i cmp_res_0 = _mm256_cmpeq_epi8(input_lo, mask);
|
||||
really_inline uint64_t cmp_mask_against_input(simd_input in, uint8_t m) {
|
||||
#ifdef __AVX2__
|
||||
const __m256i mask = _mm256_set1_epi8(m);
|
||||
__m256i cmp_res_0 = _mm256_cmpeq_epi8(in.lo, mask);
|
||||
uint64_t res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(cmp_res_0));
|
||||
__m256i cmp_res_1 = _mm256_cmpeq_epi8(input_hi, mask);
|
||||
__m256i cmp_res_1 = _mm256_cmpeq_epi8(in.hi, mask);
|
||||
uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1);
|
||||
return res_0 | (res_1 << 32);
|
||||
#elif defined(__ARM_NEON)
|
||||
const uint8x16_t mask = vmovq_n_u8(m);
|
||||
uint8x16_t cmp_res_0 = vceqq_u8(in.i.val[0], mask);
|
||||
uint8x16_t cmp_res_1 = vceqq_u8(in.i.val[1], mask);
|
||||
uint8x16_t cmp_res_2 = vceqq_u8(in.i.val[2], mask);
|
||||
uint8x16_t cmp_res_3 = vceqq_u8(in.i.val[3], mask);
|
||||
return neonmovemask_bulk(cmp_res_0, cmp_res_1, cmp_res_2, cmp_res_3);
|
||||
#endif
|
||||
}
|
||||
|
||||
// find all values less than or equal than the content of maxval (using unsigned arithmetic)
|
||||
really_inline uint64_t unsigned_lteq_against_input(__m256i input_lo,
|
||||
__m256i input_hi, __m256i maxval) {
|
||||
__m256i cmp_res_0 = _mm256_cmpeq_epi8(_mm256_max_epu8(maxval,input_lo),maxval);
|
||||
really_inline uint64_t unsigned_lteq_against_input(simd_input in, uint8_t m) {
|
||||
#ifdef __AVX2__
|
||||
const __m256i maxval = _mm256_set1_epi8(m);
|
||||
__m256i cmp_res_0 = _mm256_cmpeq_epi8(_mm256_max_epu8(maxval,in.lo),maxval);
|
||||
uint64_t res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(cmp_res_0));
|
||||
__m256i cmp_res_1 = _mm256_cmpeq_epi8(_mm256_max_epu8(maxval,input_hi),maxval);
|
||||
__m256i cmp_res_1 = _mm256_cmpeq_epi8(_mm256_max_epu8(maxval,in.hi),maxval);
|
||||
uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1);
|
||||
return res_0 | (res_1 << 32);
|
||||
#elif defined(__ARM_NEON)
|
||||
const uint8x16_t mask = vmovq_n_u8(m);
|
||||
uint8x16_t cmp_res_0 = vcleq_u8(in.i.val[0], mask);
|
||||
uint8x16_t cmp_res_1 = vcleq_u8(in.i.val[1], mask);
|
||||
uint8x16_t cmp_res_2 = vcleq_u8(in.i.val[2], mask);
|
||||
uint8x16_t cmp_res_3 = vcleq_u8(in.i.val[3], mask);
|
||||
return neonmovemask_bulk(cmp_res_0, cmp_res_1, cmp_res_2, cmp_res_3);
|
||||
#endif
|
||||
}
|
||||
|
||||
// return a bitvector indicating where we have characters that end an odd-length
|
||||
|
@ -411,12 +540,11 @@ really_inline uint64_t unsigned_lteq_against_input(__m256i input_lo,
|
|||
// backslashes, which modifies our subsequent search for odd-length
|
||||
// sequences of backslashes in an obvious way.
|
||||
really_inline uint64_t
|
||||
find_odd_backslash_sequences(__m256i input_lo, __m256i input_hi,
|
||||
find_odd_backslash_sequences(simd_input in,
|
||||
uint64_t &prev_iter_ends_odd_backslash) {
|
||||
const uint64_t even_bits = 0x5555555555555555ULL;
|
||||
const uint64_t odd_bits = ~even_bits;
|
||||
uint64_t bs_bits =
|
||||
cmp_mask_against_input(input_lo, input_hi, _mm256_set1_epi8('\\'));
|
||||
uint64_t bs_bits = cmp_mask_against_input(in, '\\');
|
||||
uint64_t start_edges = bs_bits & ~(bs_bits << 1);
|
||||
// flip lowest if we have an odd-length run at the end of the prior
|
||||
// iteration
|
||||
|
@ -457,22 +585,24 @@ find_odd_backslash_sequences(__m256i input_lo, __m256i input_hi,
|
|||
// Note that we don't do any error checking to see if we have backslash
|
||||
// sequences outside quotes; these
|
||||
// backslash sequences (of any length) will be detected elsewhere.
|
||||
really_inline uint64_t find_quote_mask_and_bits(
|
||||
__m256i input_lo, __m256i input_hi, uint64_t odd_ends,
|
||||
really_inline uint64_t find_quote_mask_and_bits(simd_input in, uint64_t odd_ends,
|
||||
uint64_t &prev_iter_inside_quote, uint64_t "e_bits, uint64_t &error_mask) {
|
||||
quote_bits =
|
||||
cmp_mask_against_input(input_lo, input_hi, _mm256_set1_epi8('"'));
|
||||
quote_bits = cmp_mask_against_input(in, '"');
|
||||
quote_bits = quote_bits & ~odd_ends;
|
||||
// remove from the valid quoted region the unescapted characters.
|
||||
#ifdef __AVX2__
|
||||
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
|
||||
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
|
||||
#elif defined(__ARM_NEON)
|
||||
uint64_t quote_mask = vmull_p64( -1ULL, quote_bits);
|
||||
#endif
|
||||
quote_mask ^= prev_iter_inside_quote;
|
||||
// All Unicode characters may be placed within the
|
||||
// quotation marks, except for the characters that MUST be escaped:
|
||||
// quotation mark, reverse solidus, and the control characters (U+0000
|
||||
//through U+001F).
|
||||
// https://tools.ietf.org/html/rfc8259
|
||||
uint64_t unescaped = unsigned_lteq_against_input(input_lo, input_hi, _mm256_set1_epi8(0x1F));
|
||||
uint64_t unescaped = unsigned_lteq_against_input(in, 0x1F);
|
||||
error_mask |= quote_mask & unescaped;
|
||||
// right shift of a signed value expected to be well-defined and standard
|
||||
// compliant as of C++20,
|
||||
|
@ -482,8 +612,7 @@ really_inline uint64_t find_quote_mask_and_bits(
|
|||
return quote_mask;
|
||||
}
|
||||
|
||||
really_inline void find_whitespace_and_structurals(const __m256i input_lo,
|
||||
__m256i input_hi,
|
||||
really_inline void find_whitespace_and_structurals(simd_input in,
|
||||
uint64_t &whitespace,
|
||||
uint64_t &structurals) {
|
||||
// do a 'shufti' to detect structural JSON characters
|
||||
|
@ -493,26 +622,27 @@ really_inline void find_whitespace_and_structurals(const __m256i input_lo,
|
|||
// we are also interested in the four whitespace characters
|
||||
// space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d
|
||||
// these go into the next 2 buckets of the comparison (8/16)
|
||||
#ifdef __AVX2__
|
||||
const __m256i low_nibble_mask = _mm256_setr_epi8(
|
||||
16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 8, 12, 1, 2, 9, 0, 0);
|
||||
16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0,
|
||||
16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0);
|
||||
const __m256i high_nibble_mask = _mm256_setr_epi8(
|
||||
8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0, 8, 0, 18, 4, 0, 1, 0, 1,
|
||||
0, 0, 0, 3, 2, 1, 0, 0);
|
||||
8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0,
|
||||
8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0);
|
||||
|
||||
__m256i structural_shufti_mask = _mm256_set1_epi8(0x7);
|
||||
__m256i whitespace_shufti_mask = _mm256_set1_epi8(0x18);
|
||||
|
||||
__m256i v_lo = _mm256_and_si256(
|
||||
_mm256_shuffle_epi8(low_nibble_mask, input_lo),
|
||||
_mm256_shuffle_epi8(low_nibble_mask, in.lo),
|
||||
_mm256_shuffle_epi8(high_nibble_mask,
|
||||
_mm256_and_si256(_mm256_srli_epi32(input_lo, 4),
|
||||
_mm256_and_si256(_mm256_srli_epi32(in.lo, 4),
|
||||
_mm256_set1_epi8(0x7f))));
|
||||
|
||||
__m256i v_hi = _mm256_and_si256(
|
||||
_mm256_shuffle_epi8(low_nibble_mask, input_hi),
|
||||
_mm256_shuffle_epi8(low_nibble_mask, in.hi),
|
||||
_mm256_shuffle_epi8(high_nibble_mask,
|
||||
_mm256_and_si256(_mm256_srli_epi32(input_hi, 4),
|
||||
_mm256_and_si256(_mm256_srli_epi32(in.hi, 4),
|
||||
_mm256_set1_epi8(0x7f))));
|
||||
__m256i tmp_lo = _mm256_cmpeq_epi8(
|
||||
_mm256_and_si256(v_lo, structural_shufti_mask), _mm256_set1_epi8(0));
|
||||
|
@ -532,6 +662,124 @@ really_inline void find_whitespace_and_structurals(const __m256i input_lo,
|
|||
uint64_t ws_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(tmp_ws_lo));
|
||||
uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
|
||||
whitespace = ~(ws_res_0 | (ws_res_1 << 32));
|
||||
#elif defined(__ARM_NEON)
|
||||
#ifndef FUNKY_BAD_TABLE
|
||||
const uint8x16_t low_nibble_mask = (uint8x16_t){
|
||||
16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0};
|
||||
const uint8x16_t high_nibble_mask = (uint8x16_t){
|
||||
8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0};
|
||||
const uint8x16_t structural_shufti_mask = vmovq_n_u8(0x7);
|
||||
const uint8x16_t whitespace_shufti_mask = vmovq_n_u8(0x18);
|
||||
const uint8x16_t low_nib_and_mask = vmovq_n_u8(0xf);
|
||||
|
||||
uint8x16_t nib_0_lo = vandq_u8(in.i.val[0], low_nib_and_mask);
|
||||
uint8x16_t nib_0_hi = vshrq_n_u8(in.i.val[0], 4);
|
||||
uint8x16_t shuf_0_lo = vqtbl1q_u8(low_nibble_mask, nib_0_lo);
|
||||
uint8x16_t shuf_0_hi = vqtbl1q_u8(high_nibble_mask, nib_0_hi);
|
||||
uint8x16_t v_0 = vandq_u8(shuf_0_lo, shuf_0_hi);
|
||||
|
||||
uint8x16_t nib_1_lo = vandq_u8(in.i.val[1], low_nib_and_mask);
|
||||
uint8x16_t nib_1_hi = vshrq_n_u8(in.i.val[1], 4);
|
||||
uint8x16_t shuf_1_lo = vqtbl1q_u8(low_nibble_mask, nib_1_lo);
|
||||
uint8x16_t shuf_1_hi = vqtbl1q_u8(high_nibble_mask, nib_1_hi);
|
||||
uint8x16_t v_1 = vandq_u8(shuf_1_lo, shuf_1_hi);
|
||||
|
||||
uint8x16_t nib_2_lo = vandq_u8(in.i.val[2], low_nib_and_mask);
|
||||
uint8x16_t nib_2_hi = vshrq_n_u8(in.i.val[2], 4);
|
||||
uint8x16_t shuf_2_lo = vqtbl1q_u8(low_nibble_mask, nib_2_lo);
|
||||
uint8x16_t shuf_2_hi = vqtbl1q_u8(high_nibble_mask, nib_2_hi);
|
||||
uint8x16_t v_2 = vandq_u8(shuf_2_lo, shuf_2_hi);
|
||||
|
||||
uint8x16_t nib_3_lo = vandq_u8(in.i.val[3], low_nib_and_mask);
|
||||
uint8x16_t nib_3_hi = vshrq_n_u8(in.i.val[3], 4);
|
||||
uint8x16_t shuf_3_lo = vqtbl1q_u8(low_nibble_mask, nib_3_lo);
|
||||
uint8x16_t shuf_3_hi = vqtbl1q_u8(high_nibble_mask, nib_3_hi);
|
||||
uint8x16_t v_3 = vandq_u8(shuf_3_lo, shuf_3_hi);
|
||||
|
||||
uint8x16_t tmp_0 = vtstq_u8(v_0, structural_shufti_mask);
|
||||
uint8x16_t tmp_1 = vtstq_u8(v_1, structural_shufti_mask);
|
||||
uint8x16_t tmp_2 = vtstq_u8(v_2, structural_shufti_mask);
|
||||
uint8x16_t tmp_3 = vtstq_u8(v_3, structural_shufti_mask);
|
||||
structurals = neonmovemask_bulk(tmp_0, tmp_1, tmp_2, tmp_3);
|
||||
|
||||
uint8x16_t tmp_ws_0 = vtstq_u8(v_0, whitespace_shufti_mask);
|
||||
uint8x16_t tmp_ws_1 = vtstq_u8(v_1, whitespace_shufti_mask);
|
||||
uint8x16_t tmp_ws_2 = vtstq_u8(v_2, whitespace_shufti_mask);
|
||||
uint8x16_t tmp_ws_3 = vtstq_u8(v_3, whitespace_shufti_mask);
|
||||
whitespace = neonmovemask_bulk(tmp_ws_0, tmp_ws_1, tmp_ws_2, tmp_ws_3);
|
||||
#else
|
||||
// I think this one is garbage. In order to save the expense
|
||||
// of another shuffle, I use an equally expensive shift, and
|
||||
// this gets glued to the end of the dependency chain. Seems a bit
|
||||
// slower for no good reason.
|
||||
//
|
||||
// need to use a weird arrangement. Bytes in this bitvector
|
||||
// are in conventional order, but bits are reversed as we are
|
||||
// using a signed left shift (that is a +ve value from 0..7) to
|
||||
// shift upwards to 0x80 in the bit. So we need to reverse bits.
|
||||
|
||||
// note no structural/whitespace has the high bit on
|
||||
// so it's OK to put the high 5 bits into our TBL shuffle
|
||||
//
|
||||
|
||||
// structurals are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c
|
||||
// or in 5 bit, 3 bit form thats
|
||||
// (15,3) (15, 5) (7,2) (11,3) (11,5) (5,4)
|
||||
// bit-reversing (subtract low 3 bits from 7) yields:
|
||||
// (15,4) (15, 2) (7,5) (11,4) (11,2) (5,3)
|
||||
|
||||
const uint8x16_t structural_bitvec = (uint8x16_t){
|
||||
0, 0, 0, 0,
|
||||
0, 8, 0, 32,
|
||||
0, 0, 0, 20,
|
||||
0, 0, 0, 20};
|
||||
// we are also interested in the four whitespace characters
|
||||
// space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d
|
||||
// (4,0) (1, 2) (1, 1) (1, 5)
|
||||
// bit-reversing (subtract low 3 bits from 7) yields:
|
||||
// (4,7) (1, 5) (1, 6) (1, 2)
|
||||
|
||||
const uint8x16_t whitespace_bitvec = (uint8x16_t){
|
||||
0, 100, 0, 0,
|
||||
128, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0};
|
||||
const uint8x16_t low_3bits_and_mask = vmovq_n_u8(0x7);
|
||||
const uint8x16_t high_1bit_tst_mask = vmovq_n_u8(0x80);
|
||||
|
||||
int8x16_t low_3bits_0 = vreinterpretq_s8_u8(vandq_u8(in.i.val[0], low_3bits_and_mask));
|
||||
uint8x16_t high_5bits_0 = vshrq_n_u8(in.i.val[0], 3);
|
||||
uint8x16_t shuffle_structural_0 = vshlq_u8(vqtbl1q_u8(structural_bitvec, high_5bits_0), low_3bits_0);
|
||||
uint8x16_t shuffle_ws_0 = vshlq_u8(vqtbl1q_u8(whitespace_bitvec, high_5bits_0), low_3bits_0);
|
||||
uint8x16_t tmp_0 = vtstq_u8(shuffle_structural_0, high_1bit_tst_mask);
|
||||
uint8x16_t tmp_ws_0 = vtstq_u8(shuffle_ws_0, high_1bit_tst_mask);
|
||||
|
||||
int8x16_t low_3bits_1 = vreinterpretq_s8_u8(vandq_u8(in.i.val[1], low_3bits_and_mask));
|
||||
uint8x16_t high_5bits_1 = vshrq_n_u8(in.i.val[1], 3);
|
||||
uint8x16_t shuffle_structural_1 = vshlq_u8(vqtbl1q_u8(structural_bitvec, high_5bits_1), low_3bits_1);
|
||||
uint8x16_t shuffle_ws_1 = vshlq_u8(vqtbl1q_u8(whitespace_bitvec, high_5bits_1), low_3bits_1);
|
||||
uint8x16_t tmp_1 = vtstq_u8(shuffle_structural_1, high_1bit_tst_mask);
|
||||
uint8x16_t tmp_ws_1 = vtstq_u8(shuffle_ws_1, high_1bit_tst_mask);
|
||||
|
||||
int8x16_t low_3bits_2 = vreinterpretq_s8_u8(vandq_u8(in.i.val[2], low_3bits_and_mask));
|
||||
uint8x16_t high_5bits_2 = vshrq_n_u8(in.i.val[2], 3);
|
||||
uint8x16_t shuffle_structural_2 = vshlq_u8(vqtbl1q_u8(structural_bitvec, high_5bits_2), low_3bits_2);
|
||||
uint8x16_t shuffle_ws_2 = vshlq_u8(vqtbl1q_u8(whitespace_bitvec, high_5bits_2), low_3bits_2);
|
||||
uint8x16_t tmp_2 = vtstq_u8(shuffle_structural_2, high_1bit_tst_mask);
|
||||
uint8x16_t tmp_ws_2 = vtstq_u8(shuffle_ws_2, high_1bit_tst_mask);
|
||||
|
||||
int8x16_t low_3bits_3 = vreinterpretq_s8_u8(vandq_u8(in.i.val[3], low_3bits_and_mask));
|
||||
uint8x16_t high_5bits_3 = vshrq_n_u8(in.i.val[3], 3);
|
||||
uint8x16_t shuffle_structural_3 = vshlq_u8(vqtbl1q_u8(structural_bitvec, high_5bits_3), low_3bits_3);
|
||||
uint8x16_t shuffle_ws_3 = vshlq_u8(vqtbl1q_u8(whitespace_bitvec, high_5bits_3), low_3bits_3);
|
||||
uint8x16_t tmp_3 = vtstq_u8(shuffle_structural_3, high_1bit_tst_mask);
|
||||
uint8x16_t tmp_ws_3 = vtstq_u8(shuffle_ws_3, high_1bit_tst_mask);
|
||||
|
||||
structurals = neonmovemask_bulk(tmp_0, tmp_1, tmp_2, tmp_3);
|
||||
whitespace = neonmovemask_bulk(tmp_ws_0, tmp_ws_1, tmp_ws_2, tmp_ws_3);
|
||||
#endif
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
// flatten out values in 'bits' assuming that they are are to have values of idx
|
||||
|
@ -608,9 +856,9 @@ WARN_UNUSED
|
|||
/*never_inline*/ bool find_structural_bits(const uint8_t *buf, size_t len,
|
||||
ParsedJson &pj) {
|
||||
if (len > pj.bytecapacity) {
|
||||
cerr << "Your ParsedJson object only supports documents up to "
|
||||
std::cerr << "Your ParsedJson object only supports documents up to "
|
||||
<< pj.bytecapacity << " bytes but you are trying to process " << len
|
||||
<< " bytes\n";
|
||||
<< " bytes" << std::endl;
|
||||
return false;
|
||||
}
|
||||
uint32_t *base_ptr = pj.structural_indexes;
|
||||
|
@ -654,32 +902,26 @@ WARN_UNUSED
|
|||
#ifndef _MSC_VER
|
||||
__builtin_prefetch(buf + idx + 128);
|
||||
#endif
|
||||
__m256i input_lo =
|
||||
_mm256_loadu_si256(reinterpret_cast<const __m256i *>(buf + idx + 0));
|
||||
__m256i input_hi =
|
||||
_mm256_loadu_si256(reinterpret_cast<const __m256i *>(buf + idx + 32));
|
||||
|
||||
simd_input in = fill_input(buf+idx);
|
||||
#ifdef SIMDJSON_UTF8VALIDATE
|
||||
check_utf8(input_lo, input_hi, has_error, previous);
|
||||
check_utf8(in, has_error, previous);
|
||||
#endif
|
||||
|
||||
// detect odd sequences of backslashes
|
||||
uint64_t odd_ends = find_odd_backslash_sequences(
|
||||
input_lo, input_hi, prev_iter_ends_odd_backslash);
|
||||
in, prev_iter_ends_odd_backslash);
|
||||
|
||||
// detect insides of quote pairs ("quote_mask") and also our quote_bits
|
||||
// themselves
|
||||
uint64_t quote_bits;
|
||||
uint64_t quote_mask = find_quote_mask_and_bits(
|
||||
input_lo, input_hi, odd_ends, prev_iter_inside_quote, quote_bits, error_mask);
|
||||
in, odd_ends, prev_iter_inside_quote, quote_bits, error_mask);
|
||||
|
||||
// take the previous iterations structural bits, not our current iteration,
|
||||
// and flatten
|
||||
flatten_bits(base_ptr, base, idx, structurals);
|
||||
|
||||
uint64_t whitespace;
|
||||
find_whitespace_and_structurals(input_lo, input_hi, whitespace,
|
||||
structurals);
|
||||
find_whitespace_and_structurals(in, whitespace, structurals);
|
||||
|
||||
// fixup structurals to reflect quotes and add pseudo-structural characters
|
||||
structurals = finalize_structurals(structurals, whitespace, quote_mask,
|
||||
|
@ -695,38 +937,39 @@ WARN_UNUSED
|
|||
uint8_t tmpbuf[64];
|
||||
memset(tmpbuf, 0x20, 64);
|
||||
memcpy(tmpbuf, buf + idx, len - idx);
|
||||
__m256i input_lo =
|
||||
_mm256_loadu_si256(reinterpret_cast<const __m256i *>(tmpbuf + 0));
|
||||
__m256i input_hi =
|
||||
_mm256_loadu_si256(reinterpret_cast<const __m256i *>(tmpbuf + 32));
|
||||
|
||||
simd_input in = fill_input(tmpbuf);
|
||||
#ifdef SIMDJSON_UTF8VALIDATE
|
||||
check_utf8(input_lo, input_hi, has_error, previous);
|
||||
check_utf8(in, has_error, previous);
|
||||
#endif
|
||||
|
||||
// detect odd sequences of backslashes
|
||||
uint64_t odd_ends = find_odd_backslash_sequences(
|
||||
input_lo, input_hi, prev_iter_ends_odd_backslash);
|
||||
in, prev_iter_ends_odd_backslash);
|
||||
|
||||
// detect insides of quote pairs ("quote_mask") and also our quote_bits
|
||||
// themselves
|
||||
uint64_t quote_bits;
|
||||
uint64_t quote_mask = find_quote_mask_and_bits(
|
||||
input_lo, input_hi, odd_ends, prev_iter_inside_quote, quote_bits, error_mask);
|
||||
in, odd_ends, prev_iter_inside_quote, quote_bits, error_mask);
|
||||
|
||||
// take the previous iterations structural bits, not our current iteration,
|
||||
// and flatten
|
||||
flatten_bits(base_ptr, base, idx, structurals);
|
||||
|
||||
uint64_t whitespace;
|
||||
find_whitespace_and_structurals(input_lo, input_hi, whitespace,
|
||||
structurals);
|
||||
find_whitespace_and_structurals(in, whitespace, structurals);
|
||||
|
||||
// fixup structurals to reflect quotes and add pseudo-structural characters
|
||||
structurals = finalize_structurals(structurals, whitespace, quote_mask,
|
||||
quote_bits, prev_iter_ends_pseudo_pred);
|
||||
idx += 64;
|
||||
}
|
||||
|
||||
// is last string quote closed?
|
||||
if (prev_iter_inside_quote) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// finally, flatten out the remaining structurals from the last iteration
|
||||
flatten_bits(base_ptr, base, idx, structurals);
|
||||
|
||||
|
@ -734,6 +977,7 @@ WARN_UNUSED
|
|||
// a valid JSON file cannot have zero structural indexes - we should have
|
||||
// found something
|
||||
if (pj.n_structural_indexes == 0u) {
|
||||
printf("wacky exit\n");
|
||||
return false;
|
||||
}
|
||||
if (base_ptr[pj.n_structural_indexes - 1] > len) {
|
||||
|
@ -748,6 +992,7 @@ WARN_UNUSED
|
|||
// make it safe to dereference one beyond this array
|
||||
base_ptr[pj.n_structural_indexes] = 0;
|
||||
if (error_mask) {
|
||||
printf("had error mask\n");
|
||||
return false;
|
||||
}
|
||||
#ifdef SIMDJSON_UTF8VALIDATE
|
||||
|
@ -762,13 +1007,6 @@ bool find_structural_bits(const char *buf, size_t len, ParsedJson &pj) {
|
|||
}
|
||||
/* end file src/stage1_find_marks.cpp */
|
||||
/* begin file src/stage2_build_tape.cpp */
|
||||
#ifdef _MSC_VER
|
||||
/* Microsoft C/C++-compatible compiler */
|
||||
#include <intrin.h>
|
||||
#else
|
||||
#include <x86intrin.h>
|
||||
#endif
|
||||
|
||||
#include <cassert>
|
||||
#include <cstring>
|
||||
|
||||
|
@ -777,14 +1015,15 @@ bool find_structural_bits(const char *buf, size_t len, ParsedJson &pj) {
|
|||
#define PATH_SEP '/'
|
||||
|
||||
|
||||
using namespace std;
|
||||
|
||||
WARN_UNUSED
|
||||
really_inline bool is_valid_true_atom(const uint8_t *loc) {
|
||||
uint64_t tv = *reinterpret_cast<const uint64_t *>("true ");
|
||||
uint64_t mask4 = 0x00000000ffffffff;
|
||||
uint32_t error = 0;
|
||||
uint64_t locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
|
||||
// this can read up to 7 bytes beyond the buffer size, but we require
|
||||
// SIMDJSON_PADDING of padding
|
||||
static_assert(sizeof(uint64_t) - 1 <= SIMDJSON_PADDING);
|
||||
std::memcpy(&locval, loc, sizeof(uint64_t));
|
||||
error = (locval & mask4) ^ tv;
|
||||
error |= is_not_structural_or_whitespace(loc[4]);
|
||||
|
@ -793,10 +1032,21 @@ really_inline bool is_valid_true_atom(const uint8_t *loc) {
|
|||
|
||||
WARN_UNUSED
|
||||
really_inline bool is_valid_false_atom(const uint8_t *loc) {
|
||||
uint64_t fv = *reinterpret_cast<const uint64_t *>("false ");
|
||||
// We have to use an integer constant because the space in the cast
|
||||
// below would lead to values illegally being qualified
|
||||
// uint64_t fv = *reinterpret_cast<const uint64_t *>("false ");
|
||||
// using this constant (that is the same false) but nulls out the
|
||||
// unused bits solves that
|
||||
uint64_t fv = 0x00000065736c6166; // takes into account endianness
|
||||
uint64_t mask5 = 0x000000ffffffffff;
|
||||
uint32_t error = 0;
|
||||
// we can't use the 32 bit value for checking for errors otherwise
|
||||
// the last character of false (it being 5 byte long!) would be
|
||||
// ignored
|
||||
uint64_t error = 0;
|
||||
uint64_t locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
|
||||
// this can read up to 7 bytes beyond the buffer size, but we require
|
||||
// SIMDJSON_PADDING of padding
|
||||
static_assert(sizeof(uint64_t) - 1 <= SIMDJSON_PADDING);
|
||||
std::memcpy(&locval, loc, sizeof(uint64_t));
|
||||
error = (locval & mask5) ^ fv;
|
||||
error |= is_not_structural_or_whitespace(loc[5]);
|
||||
|
@ -809,6 +1059,9 @@ really_inline bool is_valid_null_atom(const uint8_t *loc) {
|
|||
uint64_t mask4 = 0x00000000ffffffff;
|
||||
uint32_t error = 0;
|
||||
uint64_t locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
|
||||
// this can read up to 7 bytes beyond the buffer size, but we require
|
||||
// SIMDJSON_PADDING of padding
|
||||
static_assert(sizeof(uint64_t) - 1 <= SIMDJSON_PADDING);
|
||||
std::memcpy(&locval, loc, sizeof(uint64_t));
|
||||
error = (locval & mask4) ^ nv;
|
||||
error |= is_not_structural_or_whitespace(loc[4]);
|
||||
|
@ -820,7 +1073,7 @@ really_inline bool is_valid_null_atom(const uint8_t *loc) {
|
|||
* The JSON is parsed to a tape, see the accompanying tape.md file
|
||||
* for documentation.
|
||||
***********/
|
||||
WARN_UNUSED
|
||||
WARN_UNUSED ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER
|
||||
int unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj) {
|
||||
uint32_t i = 0; // index of the structural character (0,1,2,3...)
|
||||
uint32_t idx; // location of the structural character in the input (buf)
|
||||
|
@ -1587,26 +1840,32 @@ bool ParsedJson::dump_raw_tape(std::ostream &os) {
|
|||
#include <iterator>
|
||||
|
||||
ParsedJson::iterator::iterator(ParsedJson &pj_) : pj(pj_), depth(0), location(0), tape_length(0), depthindex(nullptr) {
|
||||
if(pj.isValid()) {
|
||||
depthindex = new scopeindex_t[pj.depthcapacity];
|
||||
if(depthindex == nullptr) { return;
|
||||
}
|
||||
depthindex[0].start_of_scope = location;
|
||||
current_val = pj.tape[location++];
|
||||
current_type = (current_val >> 56);
|
||||
depthindex[0].scope_type = current_type;
|
||||
if (current_type == 'r') {
|
||||
tape_length = current_val & JSONVALUEMASK;
|
||||
if(location < tape_length) {
|
||||
if(!pj.isValid()) {
|
||||
throw InvalidJSON();
|
||||
}
|
||||
depthindex = new scopeindex_t[pj.depthcapacity];
|
||||
// memory allocation would throw
|
||||
//if(depthindex == nullptr) {
|
||||
// return;
|
||||
//}
|
||||
depthindex[0].start_of_scope = location;
|
||||
current_val = pj.tape[location++];
|
||||
current_type = (current_val >> 56);
|
||||
depthindex[0].scope_type = current_type;
|
||||
if (current_type == 'r') {
|
||||
tape_length = current_val & JSONVALUEMASK;
|
||||
if(location < tape_length) {
|
||||
current_val = pj.tape[location];
|
||||
current_type = (current_val >> 56);
|
||||
depth++;
|
||||
depthindex[depth].start_of_scope = location;
|
||||
depthindex[depth].scope_type = current_type;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// should never happen
|
||||
throw InvalidJSON();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ParsedJson::iterator::~iterator() {
|
||||
delete[] depthindex;
|
||||
|
@ -1614,14 +1873,12 @@ ParsedJson::iterator::~iterator() {
|
|||
|
||||
ParsedJson::iterator::iterator(const iterator &o):
|
||||
pj(o.pj), depth(o.depth), location(o.location),
|
||||
tape_length(o.tape_length), current_type(o.current_type),
|
||||
tape_length(0), current_type(o.current_type),
|
||||
current_val(o.current_val), depthindex(nullptr) {
|
||||
depthindex = new scopeindex_t[pj.depthcapacity];
|
||||
if(depthindex != nullptr) {
|
||||
memcpy(depthindex, o.depthindex, pj.depthcapacity * sizeof(depthindex[0]));
|
||||
} else {
|
||||
tape_length = 0;
|
||||
}
|
||||
// allocation might throw
|
||||
memcpy(depthindex, o.depthindex, pj.depthcapacity * sizeof(depthindex[0]));
|
||||
tape_length = o.tape_length;
|
||||
}
|
||||
|
||||
ParsedJson::iterator::iterator(iterator &&o):
|
||||
|
@ -1741,6 +1998,18 @@ bool ParsedJson::iterator::is_double() const {
|
|||
return get_type() == 'd';
|
||||
}
|
||||
|
||||
bool ParsedJson::iterator::is_true() const {
|
||||
return get_type() == 't';
|
||||
}
|
||||
|
||||
bool ParsedJson::iterator::is_false() const {
|
||||
return get_type() == 'f';
|
||||
}
|
||||
|
||||
bool ParsedJson::iterator::is_null() const {
|
||||
return get_type() == 'n';
|
||||
}
|
||||
|
||||
bool ParsedJson::iterator::is_object_or_array(uint8_t type) {
|
||||
return (type == '[' || (type == '{'));
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* auto-generated on Wed 13 Mar 2019 21:02:37 EDT. Do not edit! */
|
||||
/* auto-generated on Thu May 9 17:40:56 EDT 2019. Do not edit! */
|
||||
/* begin file include/simdjson/simdjson_version.h */
|
||||
// /include/simdjson/simdjson_version.h automatically generated by release.py, do not change by hand
|
||||
#ifndef SIMDJSON_INCLUDE_SIMDJSON_VERSION
|
||||
|
@ -27,18 +27,11 @@ struct simdjson {
|
|||
static const std::string& errorMsg(const int);
|
||||
};
|
||||
|
||||
#endif
|
||||
/* end file include/simdjson/simdjson.h */
|
||||
#endif/* end file include/simdjson/simdjson.h */
|
||||
/* begin file include/simdjson/portability.h */
|
||||
#ifndef SIMDJSON_PORTABILITY_H
|
||||
#define SIMDJSON_PORTABILITY_H
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#include <intrin.h>
|
||||
#else
|
||||
#include <x86intrin.h>
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
/* Microsoft C/C++-compatible compiler */
|
||||
#include <intrin.h>
|
||||
|
@ -75,7 +68,11 @@ static inline int hamming(uint64_t input_num) {
|
|||
|
||||
#else
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
|
||||
#if defined(__BMI2__) || defined(__POPCOUNT__) || defined(__AVX2__)
|
||||
#include <x86intrin.h>
|
||||
#endif
|
||||
|
||||
static inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
|
||||
return __builtin_uaddll_overflow(value1, value2, (unsigned long long*)result);
|
||||
|
@ -86,28 +83,34 @@ static inline bool mul_overflow(uint64_t value1, uint64_t value2, uint64_t *re
|
|||
|
||||
/* result might be undefined when input_num is zero */
|
||||
static inline int trailingzeroes(uint64_t input_num) {
|
||||
#ifdef __BMI__
|
||||
#ifdef __BMI2__
|
||||
return _tzcnt_u64(input_num);
|
||||
#else
|
||||
#warning "BMI is missing?"
|
||||
return __builtin_ctzll(input_num);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* result might be undefined when input_num is zero */
|
||||
static inline int leadingzeroes(uint64_t input_num) {
|
||||
#ifdef __BMI2__
|
||||
return _lzcnt_u64(input_num);
|
||||
#else
|
||||
return __builtin_clzll(input_num);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* result might be undefined when input_num is zero */
|
||||
static inline int hamming(uint64_t input_num) {
|
||||
#ifdef __POPCOUNT__
|
||||
return _popcnt64(input_num);
|
||||
#else
|
||||
return __builtin_popcountll(input_num);
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif // _MSC_VER
|
||||
|
||||
|
||||
|
||||
// portable version of posix_memalign
|
||||
static inline void *aligned_malloc(size_t alignment, size_t size) {
|
||||
void *p;
|
||||
|
@ -123,6 +126,11 @@ static inline void *aligned_malloc(size_t alignment, size_t size) {
|
|||
return p;
|
||||
}
|
||||
|
||||
static inline char *aligned_malloc_char(size_t alignment, size_t size) {
|
||||
return (char*)aligned_malloc(alignment, size);
|
||||
}
|
||||
|
||||
#ifdef __AVX2__
|
||||
|
||||
#ifndef __clang__
|
||||
#ifndef _MSC_VER
|
||||
|
@ -144,6 +152,7 @@ static inline void _mm256_storeu2_m128i(__m128i *__addr_hi, __m128i *__addr_lo,
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#endif // AVX_2
|
||||
|
||||
static inline void aligned_free(void *memblock) {
|
||||
if(memblock == nullptr) { return; }
|
||||
|
@ -156,6 +165,12 @@ static inline void aligned_free(void *memblock) {
|
|||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
static inline void aligned_free_char(char *memblock) {
|
||||
aligned_free((void*)memblock);
|
||||
}
|
||||
|
||||
#endif // SIMDJSON_PORTABILITY_H
|
||||
/* end file include/simdjson/portability.h */
|
||||
/* begin file include/simdjson/common_defs.h */
|
||||
|
@ -169,7 +184,13 @@ static inline void aligned_free(void *memblock) {
|
|||
#define SIMDJSON_MAXSIZE_BYTES 0xFFFFFFFF
|
||||
|
||||
// the input buf should be readable up to buf + SIMDJSON_PADDING
|
||||
#ifdef __AVX2__
|
||||
#define SIMDJSON_PADDING sizeof(__m256i)
|
||||
#else
|
||||
// this is a stopgap; there should be a better description of the
|
||||
// main loop and its behavior that abstracts over this
|
||||
#define SIMDJSON_PADDING 32
|
||||
#endif
|
||||
|
||||
#ifndef _MSC_VER
|
||||
// Implemented using Labels as Values which works in GCC and CLANG (and maybe
|
||||
|
@ -187,8 +208,8 @@ static inline void aligned_free(void *memblock) {
|
|||
#define ISALIGNED_N(ptr, n) (((uintptr_t)(ptr) & ((n)-1)) == 0)
|
||||
|
||||
#ifdef _MSC_VER
|
||||
|
||||
|
||||
// Visual Studio won't allow it:
|
||||
//#define ALLOW_SAME_PAGE_BUFFER_OVERRUN
|
||||
#define really_inline inline
|
||||
#define never_inline __declspec(noinline)
|
||||
|
||||
|
@ -204,6 +225,22 @@ static inline void aligned_free(void *memblock) {
|
|||
|
||||
#else
|
||||
|
||||
// for non-Visual Studio compilers, we assume that same-page buffer overrun is fine:
|
||||
#ifndef ALLOW_SAME_PAGE_BUFFER_OVERRUN
|
||||
#define ALLOW_SAME_PAGE_BUFFER_OVERRUN
|
||||
#endif
|
||||
|
||||
// The following is likely unnecessarily complex.
|
||||
#ifdef __SANITIZE_ADDRESS__
|
||||
// we have GCC, stuck with https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67368
|
||||
#undef ALLOW_SAME_PAGE_BUFFER_OVERRUN
|
||||
#elif defined(__has_feature)
|
||||
// we have CLANG?
|
||||
# if (__has_feature(address_sanitizer))
|
||||
#define ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER __attribute__((no_sanitize("address")))
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#define really_inline inline __attribute__((always_inline, unused))
|
||||
#define never_inline inline __attribute__((noinline, unused))
|
||||
|
||||
|
@ -219,8 +256,80 @@ static inline void aligned_free(void *memblock) {
|
|||
|
||||
#endif // MSC_VER
|
||||
|
||||
// if it does not apply, make it an empty macro
|
||||
#ifndef ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER
|
||||
#define ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER
|
||||
#endif
|
||||
#endif // SIMDJSON_COMMON_DEFS_H
|
||||
/* end file include/simdjson/common_defs.h */
|
||||
/* begin file include/simdjson/padded_string.h */
|
||||
#ifndef SIMDJSON_PADDING_STRING_H
|
||||
#define SIMDJSON_PADDING_STRING_H
|
||||
#include <memory>
|
||||
#include <cstring>
|
||||
// low-level function to allocate memory with padding so we can read passed the
|
||||
// "length" bytes safely. if you must provide a pointer to some data, create it
|
||||
// with this function: length is the max. size in bytes of the string caller is
|
||||
// responsible to free the memory (free(...))
|
||||
char *allocate_padded_buffer(size_t length);
|
||||
|
||||
// Simple string with padded allocation.
|
||||
// We deliberately forbid copies, users should rely on swap or move
|
||||
// constructors.
|
||||
class padded_string {
|
||||
public:
|
||||
explicit padded_string() noexcept : viable_size(0), data_ptr(nullptr) {}
|
||||
explicit padded_string(size_t length) noexcept
|
||||
: viable_size(length), data_ptr(allocate_padded_buffer(length)) {
|
||||
|
||||
if (data_ptr != nullptr)
|
||||
data_ptr[length] = '\0'; // easier when you need a c_str
|
||||
}
|
||||
explicit padded_string(char *data, size_t length) noexcept
|
||||
: viable_size(length), data_ptr(allocate_padded_buffer(length)) {
|
||||
if (data_ptr != nullptr) {
|
||||
memcpy(data_ptr, data, length);
|
||||
data_ptr[length] = '\0'; // easier when you need a c_str
|
||||
}
|
||||
}
|
||||
padded_string(std::string s) noexcept
|
||||
: viable_size(s.size()), data_ptr(allocate_padded_buffer(s.size())) {
|
||||
if (data_ptr != nullptr) {
|
||||
memcpy(data_ptr, s.data(), s.size());
|
||||
data_ptr[s.size()] = '\0'; // easier when you need a c_str
|
||||
}
|
||||
}
|
||||
padded_string(padded_string &&o) noexcept
|
||||
: viable_size(o.viable_size), data_ptr(o.data_ptr) {
|
||||
o.data_ptr = nullptr; // we take ownership
|
||||
}
|
||||
void swap(padded_string &o) {
|
||||
size_t tmp_viable_size = viable_size;
|
||||
char *tmp_data_ptr = data_ptr;
|
||||
viable_size = o.viable_size;
|
||||
data_ptr = o.data_ptr;
|
||||
o.data_ptr = tmp_data_ptr;
|
||||
o.viable_size = tmp_viable_size;
|
||||
}
|
||||
|
||||
~padded_string() { aligned_free_char(data_ptr); }
|
||||
|
||||
size_t size() const { return viable_size; }
|
||||
|
||||
size_t length() const { return viable_size; }
|
||||
|
||||
char *data() const { return data_ptr; }
|
||||
|
||||
private:
|
||||
padded_string &operator=(const padded_string &o) = delete;
|
||||
padded_string(const padded_string &o) = delete;
|
||||
|
||||
size_t viable_size;
|
||||
char *data_ptr;
|
||||
};
|
||||
|
||||
#endif
|
||||
/* end file include/simdjson/padded_string.h */
|
||||
/* begin file include/simdjson/jsoncharutils.h */
|
||||
#ifndef SIMDJSON_JSONCHARUTILS_H
|
||||
#define SIMDJSON_JSONCHARUTILS_H
|
||||
|
@ -273,34 +382,166 @@ really_inline uint32_t is_structural_or_whitespace(uint8_t c) {
|
|||
return structural_or_whitespace[c];
|
||||
}
|
||||
|
||||
const signed char digittoval[256] = {
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8,
|
||||
9, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1};
|
||||
|
||||
const uint32_t digittoval32[886] = {
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0x0, 0x1, 0x2, 0x3, 0x4, 0x5,
|
||||
0x6, 0x7, 0x8, 0x9, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xa,
|
||||
0xb, 0xc, 0xd, 0xe, 0xf, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xa, 0xb, 0xc, 0xd, 0xe,
|
||||
0xf, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0x0, 0x10, 0x20, 0x30, 0x40, 0x50,
|
||||
0x60, 0x70, 0x80, 0x90, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xa0,
|
||||
0xb0, 0xc0, 0xd0, 0xe0, 0xf0, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0,
|
||||
0xf0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0x0, 0x100, 0x200, 0x300, 0x400, 0x500,
|
||||
0x600, 0x700, 0x800, 0x900, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xa00,
|
||||
0xb00, 0xc00, 0xd00, 0xe00, 0xf00, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xa00, 0xb00, 0xc00, 0xd00, 0xe00,
|
||||
0xf00, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0x0, 0x1000, 0x2000, 0x3000, 0x4000, 0x5000,
|
||||
0x6000, 0x7000, 0x8000, 0x9000, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xa000,
|
||||
0xb000, 0xc000, 0xd000, 0xe000, 0xf000, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xa000, 0xb000, 0xc000, 0xd000, 0xe000,
|
||||
0xf000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF};
|
||||
// returns a value with the high 16 bits set if not valid
|
||||
// otherwise returns the conversion of the 4 hex digits at src into the bottom 16 bits of the 32-bit
|
||||
// return register
|
||||
//
|
||||
// see https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/
|
||||
static inline uint32_t hex_to_u32_nocheck(const uint8_t *src) {// strictly speaking, static inline is a C-ism
|
||||
// all these will sign-extend the chars looked up, placing 1-bits into the high 28 bits of every
|
||||
// invalid value. After the shifts, this will *still* result in the outcome that the high 16 bits of any
|
||||
// value with any invalid char will be all 1's. We check for this in the caller.
|
||||
int32_t v1 = digittoval[src[0]];
|
||||
int32_t v2 = digittoval[src[1]];
|
||||
int32_t v3 = digittoval[src[2]];
|
||||
int32_t v4 = digittoval[src[3]];
|
||||
return static_cast<uint32_t>(v1 << 12 | v2 << 8 | v3 << 4 | v4);
|
||||
uint32_t v1 = digittoval32[630 + src[0]];
|
||||
uint32_t v2 = digittoval32[420 + src[1]];
|
||||
uint32_t v3 = digittoval32[210 + src[2]];
|
||||
uint32_t v4 = digittoval32[0 + src[3]];
|
||||
return v1 | v2 | v3 | v4;
|
||||
}
|
||||
|
||||
// given a code point cp, writes to c
|
||||
|
@ -557,13 +798,6 @@ static inline void print_with_escapes(const char *src, std::ostream &os,
|
|||
#include <string>
|
||||
|
||||
|
||||
// low-level function to allocate memory with padding so we can read passed the "length" bytes
|
||||
// safely.
|
||||
// if you must provide a pointer to some data, create it with this function:
|
||||
// length is the max. size in bytes of the string
|
||||
// caller is responsible to free the memory (free(...))
|
||||
char * allocate_padded_buffer(size_t length);
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -573,16 +807,16 @@ char * allocate_padded_buffer(size_t length);
|
|||
// throws exceptions in case of failure
|
||||
// first element of the pair is a string (null terminated)
|
||||
// whereas the second element is the length.
|
||||
// caller is responsible to free (free((void*)result.data())))
|
||||
// caller is responsible to free (aligned_free((void*)result.data())))
|
||||
//
|
||||
// throws an exception if the file cannot be opened, use try/catch
|
||||
// try {
|
||||
// p = get_corpus(filename);
|
||||
// } catch (const std::exception& e) {
|
||||
// free((void*)p.data());//use aligned_free if you plan to use VisualStudio
|
||||
// aligned_free((void*)p.data());
|
||||
// std::cout << "Could not load the file " << filename << std::endl;
|
||||
// }
|
||||
std::string_view get_corpus(const std::string& filename);
|
||||
padded_string get_corpus(const std::string& filename);
|
||||
|
||||
|
||||
#endif
|
||||
|
@ -35789,29 +36023,31 @@ static inline void avxcheckOverlong(__m256i current_bytes,
|
|||
__m256i *has_error) {
|
||||
__m256i off1_hibits = push_last_byte_of_a_to_b(previous_hibits, hibits);
|
||||
__m256i initial_mins = _mm256_shuffle_epi8(
|
||||
_mm256_setr_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128,
|
||||
-128, -128, -128, // 10xx => false
|
||||
0xC2, -128, // 110x
|
||||
0xE1, // 1110
|
||||
0xF1, -128, -128, -128, -128, -128, -128, -128, -128,
|
||||
_mm256_setr_epi8(-128, -128, -128, -128, -128, -128, -128, -128,
|
||||
-128, -128, -128, -128, // 10xx => false
|
||||
0xC2, -128, // 110x
|
||||
0xE1, // 1110
|
||||
0xF1),
|
||||
0xF1, // 1111
|
||||
-128, -128, -128, -128, -128, -128, -128, -128,
|
||||
-128, -128, -128, -128, // 10xx => false
|
||||
0xC2, -128, // 110x
|
||||
0xE1, // 1110
|
||||
0xF1), // 1111
|
||||
off1_hibits);
|
||||
|
||||
__m256i initial_under = _mm256_cmpgt_epi8(initial_mins, off1_current_bytes);
|
||||
|
||||
__m256i second_mins = _mm256_shuffle_epi8(
|
||||
_mm256_setr_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128,
|
||||
-128, -128, -128, // 10xx => false
|
||||
127, 127, // 110x => true
|
||||
0xA0, // 1110
|
||||
0x90, -128, -128, -128, -128, -128, -128, -128, -128,
|
||||
_mm256_setr_epi8(-128, -128, -128, -128, -128, -128, -128, -128,
|
||||
-128, -128, -128, -128, // 10xx => false
|
||||
127, 127, // 110x => true
|
||||
0xA0, // 1110
|
||||
0x90),
|
||||
0x90, // 1111
|
||||
-128, -128, -128, -128, -128, -128, -128, -128,
|
||||
-128, -128, -128, -128, // 10xx => false
|
||||
127, 127, // 110x => true
|
||||
0xA0, // 1110
|
||||
0x90), // 1111
|
||||
off1_hibits);
|
||||
__m256i second_under = _mm256_cmpgt_epi8(second_mins, current_bytes);
|
||||
*has_error = _mm256_or_si256(*has_error,
|
||||
|
@ -35885,6 +36121,10 @@ static inline size_t jsonminify(const std::string_view & p, char *out) {
|
|||
return jsonminify(p.data(), p.size(), out);
|
||||
}
|
||||
|
||||
static inline size_t jsonminify(const padded_string & p, char *out) {
|
||||
return jsonminify(p.data(), p.size(), out);
|
||||
}
|
||||
|
||||
#endif
|
||||
/* end file include/simdjson/jsonminifier.h */
|
||||
/* begin file include/simdjson/parsedjson.h */
|
||||
|
@ -35976,7 +36216,14 @@ public:
|
|||
tape[saved_loc] |= val;
|
||||
}
|
||||
|
||||
struct InvalidJSON : public std::exception {
|
||||
const char * what () const throw () {
|
||||
return "JSON document is invalid";
|
||||
}
|
||||
};
|
||||
|
||||
struct iterator {
|
||||
// might throw InvalidJSON if ParsedJson is invalid
|
||||
explicit iterator(ParsedJson &pj_);
|
||||
~iterator();
|
||||
|
||||
|
@ -36034,6 +36281,12 @@ public:
|
|||
|
||||
bool is_double() const;
|
||||
|
||||
bool is_true() const;
|
||||
|
||||
bool is_false() const;
|
||||
|
||||
bool is_null() const;
|
||||
|
||||
static bool is_object_or_array(uint8_t type);
|
||||
|
||||
// when at {, go one level deep, looking for a given key
|
||||
|
@ -36234,9 +36487,13 @@ really_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, uint8_t **d
|
|||
return offset > 0;
|
||||
}
|
||||
|
||||
WARN_UNUSED
|
||||
really_inline bool parse_string(const uint8_t *buf, UNUSED size_t len,
|
||||
ParsedJson &pj, UNUSED const uint32_t depth, uint32_t offset) {
|
||||
#ifdef __ARM_NEON
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
WARN_UNUSED ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER
|
||||
really_inline bool parse_string(UNUSED const uint8_t *buf, UNUSED size_t len,
|
||||
ParsedJson &pj, UNUSED const uint32_t depth, UNUSED uint32_t offset) {
|
||||
#ifdef SIMDJSON_SKIPSTRINGPARSING // for performance analysis, it is sometimes useful to skip parsing
|
||||
pj.write_tape(0, '"');// don't bother with the string parsing at all
|
||||
return true; // always succeeds
|
||||
|
@ -36246,6 +36503,10 @@ really_inline bool parse_string(const uint8_t *buf, UNUSED size_t len,
|
|||
uint8_t *dst = pj.current_string_buf_loc + sizeof(uint32_t);
|
||||
const uint8_t *const start_of_string = dst;
|
||||
while (1) {
|
||||
#ifdef __AVX2__
|
||||
// this can read up to 31 bytes beyond the buffer size, but we require
|
||||
// SIMDJSON_PADDING of padding
|
||||
static_assert(sizeof(__m256i) - 1 <= SIMDJSON_PADDING);
|
||||
__m256i v = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src));
|
||||
// store to dest unconditionally - we can overwrite the bits we don't like
|
||||
// later
|
||||
|
@ -36255,6 +36516,36 @@ really_inline bool parse_string(const uint8_t *buf, UNUSED size_t len,
|
|||
auto quote_mask = _mm256_cmpeq_epi8(v, _mm256_set1_epi8('"'));
|
||||
auto quote_bits =
|
||||
static_cast<uint32_t>(_mm256_movemask_epi8(quote_mask));
|
||||
#else
|
||||
// this can read up to 31 bytes beyond the buffer size, but we require
|
||||
// SIMDJSON_PADDING of padding
|
||||
static_assert(2 * sizeof(uint8x16_t) - 1 <= SIMDJSON_PADDING);
|
||||
uint8x16_t v0 = vld1q_u8(src);
|
||||
uint8x16_t v1 = vld1q_u8(src+16);
|
||||
vst1q_u8(dst, v0);
|
||||
vst1q_u8(dst+16, v1);
|
||||
|
||||
uint8x16_t bs_mask = vmovq_n_u8('\\');
|
||||
uint8x16_t qt_mask = vmovq_n_u8('"');
|
||||
const uint8x16_t bitmask = { 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
|
||||
0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80};
|
||||
uint8x16_t cmp_bs_0 = vceqq_u8(v0, bs_mask);
|
||||
uint8x16_t cmp_bs_1 = vceqq_u8(v1, bs_mask);
|
||||
uint8x16_t cmp_qt_0 = vceqq_u8(v0, qt_mask);
|
||||
uint8x16_t cmp_qt_1 = vceqq_u8(v1, qt_mask);
|
||||
|
||||
cmp_bs_0 = vandq_u8(cmp_bs_0, bitmask);
|
||||
cmp_bs_1 = vandq_u8(cmp_bs_1, bitmask);
|
||||
cmp_qt_0 = vandq_u8(cmp_qt_0, bitmask);
|
||||
cmp_qt_1 = vandq_u8(cmp_qt_1, bitmask);
|
||||
|
||||
uint8x16_t sum0 = vpaddq_u8(cmp_bs_0, cmp_bs_1);
|
||||
uint8x16_t sum1 = vpaddq_u8(cmp_qt_0, cmp_qt_1);
|
||||
sum0 = vpaddq_u8(sum0, sum1);
|
||||
sum0 = vpaddq_u8(sum0, sum0);
|
||||
auto bs_bits = vgetq_lane_u32(vreinterpretq_u32_u8(sum0), 0);
|
||||
auto quote_bits = vgetq_lane_u32(vreinterpretq_u32_u8(sum0), 1);
|
||||
#endif
|
||||
if(((bs_bits - 1) & quote_bits) != 0 ) {
|
||||
// we encountered quotes first. Move dst to point to quotes and exit
|
||||
|
||||
|
@ -36414,7 +36705,7 @@ static inline bool is_integer(char c) {
|
|||
// probably frequent and it is hard than it looks. We are building all of this
|
||||
// just to differentiate between 0x1 (invalid), 0,1 (valid) 0e1 (valid)...
|
||||
const bool structural_or_whitespace_or_exponent_or_decimal_negated[256] = {
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1,
|
||||
|
@ -36427,11 +36718,13 @@ const bool structural_or_whitespace_or_exponent_or_decimal_negated[256] = {
|
|||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
|
||||
|
||||
really_inline bool
|
||||
is_not_structural_or_whitespace_or_exponent_or_decimal(unsigned char c) {
|
||||
is_not_structural_or_whitespace_or_exponent_or_decimal_or_null(unsigned char c) {
|
||||
return structural_or_whitespace_or_exponent_or_decimal_negated[c];
|
||||
}
|
||||
|
||||
#ifdef __AVX2__
|
||||
#define SWAR_NUMBER_PARSING
|
||||
#endif
|
||||
|
||||
#ifdef SWAR_NUMBER_PARSING
|
||||
|
||||
|
@ -36441,6 +36734,9 @@ is_not_structural_or_whitespace_or_exponent_or_decimal(unsigned char c) {
|
|||
// http://0x80.pl/articles/swar-digits-validate.html
|
||||
static inline bool is_made_of_eight_digits_fast(const char *chars) {
|
||||
uint64_t val;
|
||||
// this can read up to 7 bytes beyond the buffer size, but we require
|
||||
// SIMDJSON_PADDING of padding
|
||||
static_assert(7 <= SIMDJSON_PADDING);
|
||||
memcpy(&val, chars, 8);
|
||||
// a branchy method might be faster:
|
||||
// return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030)
|
||||
|
@ -36454,6 +36750,9 @@ static inline bool is_made_of_eight_digits_fast(const char *chars) {
|
|||
// this is more efficient apparently than the scalar code above (fewer instructions)
|
||||
static inline bool is_made_of_eight_digits_fast(const char *chars) {
|
||||
__m64 val;
|
||||
// this can read up to 7 bytes beyond the buffer size, but we require
|
||||
// SIMDJSON_PADDING of padding
|
||||
static_assert(7 <= SIMDJSON_PADDING);
|
||||
memcpy(&val, chars, 8);
|
||||
__m64 base = _mm_sub_pi8(val,_mm_set1_pi8('0'));
|
||||
__m64 basecmp = _mm_subs_pu8(base,_mm_set1_pi8(9));
|
||||
|
@ -36461,6 +36760,23 @@ static inline bool is_made_of_eight_digits_fast(const char *chars) {
|
|||
}
|
||||
#endif
|
||||
|
||||
// clang-format off
|
||||
/***
|
||||
Should parse_eight_digits_unrolled be out of the question, one could
|
||||
use a standard approach like the following:
|
||||
|
||||
static inline uint32_t newparse_eight_digits_unrolled(const char *chars) {
|
||||
uint64_t val;
|
||||
memcpy(&val, chars, sizeof(uint64_t));
|
||||
val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8;
|
||||
val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16;
|
||||
return (val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32;
|
||||
}
|
||||
|
||||
credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/
|
||||
*/
|
||||
// clang-format on
|
||||
|
||||
static inline uint32_t parse_eight_digits_unrolled(const char *chars) {
|
||||
// this actually computes *16* values so we are being wasteful.
|
||||
const __m128i ascii0 = _mm_set1_epi8('0');
|
||||
|
@ -36575,14 +36891,14 @@ parse_float(const uint8_t *const buf,
|
|||
#endif
|
||||
return false;
|
||||
}
|
||||
int exponent = (negexp ? -expnumber : expnumber);
|
||||
if ((exponent > 308) || (exponent < -308)) {
|
||||
if (expnumber > 308) {
|
||||
// we refuse to parse this
|
||||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
foundInvalidNumber(buf + offset);
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
int exponent = (negexp ? -expnumber : expnumber);
|
||||
i *= power_of_ten[308 + exponent];
|
||||
}
|
||||
if(is_not_structural_or_whitespace(*p)) {
|
||||
|
@ -36694,10 +37010,10 @@ static really_inline bool parse_number(const uint8_t *const buf,
|
|||
}
|
||||
const char *const startdigits = p;
|
||||
|
||||
int64_t i;
|
||||
uint64_t i; // an unsigned int avoids signed overflows (which are bad)
|
||||
if (*p == '0') { // 0 cannot be followed by an integer
|
||||
++p;
|
||||
if (is_not_structural_or_whitespace_or_exponent_or_decimal(*p)) {
|
||||
if (is_not_structural_or_whitespace_or_exponent_or_decimal_or_null(*p)) {
|
||||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
foundInvalidNumber(buf + offset);
|
||||
#endif
|
||||
|
@ -36744,7 +37060,6 @@ static really_inline bool parse_number(const uint8_t *const buf,
|
|||
if (is_made_of_eight_digits_fast(p)) {
|
||||
i = i * 100000000 + parse_eight_digits_unrolled(p);
|
||||
p += 8;
|
||||
// exponent -= 8;
|
||||
}
|
||||
#endif
|
||||
while (is_integer(*p)) {
|
||||
|
@ -36792,9 +37107,15 @@ static really_inline bool parse_number(const uint8_t *const buf,
|
|||
#endif
|
||||
return false;
|
||||
}
|
||||
if(expnumber > 308) {
|
||||
// we refuse to parse this
|
||||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
foundInvalidNumber(buf + offset);
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
exponent += (negexp ? -expnumber : expnumber);
|
||||
}
|
||||
i = negative ? -i : i;
|
||||
if ((exponent != 0) || (expnumber != 0)) {
|
||||
if (unlikely(digitcount >= 19)) { // this is uncommon!!!
|
||||
// this is almost never going to get called!!!
|
||||
|
@ -36811,16 +37132,9 @@ static really_inline bool parse_number(const uint8_t *const buf,
|
|||
foundFloat(0.0, buf + offset);
|
||||
#endif
|
||||
} else {
|
||||
if ((exponent > 308) || (exponent < -308)) {
|
||||
// we refuse to parse this
|
||||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
foundInvalidNumber(buf + offset);
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
double d = i;
|
||||
d = negative ? -d : d;
|
||||
d *= power_of_ten[308 + exponent];
|
||||
// d = negative ? -d : d;
|
||||
pj.write_tape_double(d);
|
||||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
foundFloat(d, buf + offset);
|
||||
|
@ -36831,6 +37145,7 @@ static really_inline bool parse_number(const uint8_t *const buf,
|
|||
return parse_large_integer(buf, pj, offset,
|
||||
found_minus);
|
||||
}
|
||||
i = negative ? 0-i : i;
|
||||
pj.write_tape_s64(i);
|
||||
#ifdef JSON_TEST_NUMBERS // for unit testing
|
||||
foundInteger(i, buf + offset);
|
||||
|
@ -36862,20 +37177,23 @@ int unified_machine(const char *buf, size_t len, ParsedJson &pj);
|
|||
/* begin file include/simdjson/jsonparser.h */
|
||||
#ifndef SIMDJSON_JSONPARSER_H
|
||||
#define SIMDJSON_JSONPARSER_H
|
||||
#include <string>
|
||||
|
||||
|
||||
// Parse a document found in buf, need to preallocate ParsedJson.
|
||||
// Parse a document found in buf.
|
||||
// You need to preallocate ParsedJson with a capacity of len (e.g., pj.allocateCapacity(len)).
|
||||
// Return 0 on success, an error code from simdjson/simdjson.h otherwise
|
||||
// You can also check validit by calling pj.isValid(). The same ParsedJson can be reused for other documents.
|
||||
// You can also check validity by calling pj.isValid(). The same ParsedJson can be reused for other documents.
|
||||
//
|
||||
// If reallocifneeded is true (default) then a temporary buffer is created when needed during processing
|
||||
// (a copy of the input string is made).
|
||||
// The input buf should be readable up to buf + len + SIMDJSON_PADDING if reallocifneeded is false,
|
||||
// all bytes at and after buf + len are ignored (can be garbage).
|
||||
// The ParsedJson object can be reused.
|
||||
WARN_UNUSED
|
||||
int json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifneeded = true);
|
||||
|
||||
// Parse a document found in buf, need to preallocate ParsedJson.
|
||||
// Parse a document found in buf.
|
||||
// You need to preallocate ParsedJson with a capacity of len (e.g., pj.allocateCapacity(len)).
|
||||
// Return SUCCESS (an integer = 1) in case of a success. You can also check validity
|
||||
// by calling pj.isValid(). The same ParsedJson can be reused for other documents.
|
||||
//
|
||||
|
@ -36883,22 +37201,48 @@ int json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifnee
|
|||
// (a copy of the input string is made).
|
||||
// The input buf should be readable up to buf + len + SIMDJSON_PADDING if reallocifneeded is false,
|
||||
// all bytes at and after buf + len are ignored (can be garbage).
|
||||
// The ParsedJson object can be reused.
|
||||
WARN_UNUSED
|
||||
inline int json_parse(const char * buf, size_t len, ParsedJson &pj, bool reallocifneeded = true) {
|
||||
return json_parse(reinterpret_cast<const uint8_t *>(buf), len, pj, reallocifneeded);
|
||||
}
|
||||
|
||||
// Parse a document found in buf, need to preallocate ParsedJson.
|
||||
// Parse a document found in buf.
|
||||
// You need to preallocate ParsedJson with a capacity of len (e.g., pj.allocateCapacity(len)).
|
||||
// Return SUCCESS (an integer = 1) in case of a success. You can also check validity
|
||||
// by calling pj.isValid(). The same ParsedJson can be reused for other documents.
|
||||
//
|
||||
// If reallocifneeded is true (default) then a temporary buffer is created when needed during processing
|
||||
// (a copy of the input string is made).
|
||||
// the input s should be readable up to s.data() + s.size() + SIMDJSON_PADDING if reallocifneeded is false,
|
||||
// The input s should be readable up to s.data() + s.size() + SIMDJSON_PADDING if reallocifneeded is false,
|
||||
// all bytes at and after s.data()+s.size() are ignored (can be garbage).
|
||||
// The ParsedJson object can be reused.
|
||||
//WARN_UNUSED
|
||||
//inline int json_parse(const std::string_view &s, ParsedJson &pj, bool reallocifneeded = true) {
|
||||
// return json_parse(s.data(), s.size(), pj, reallocifneeded);
|
||||
//}
|
||||
|
||||
|
||||
|
||||
// Parse a document found in in string s.
|
||||
// You need to preallocate ParsedJson with a capacity of len (e.g., pj.allocateCapacity(len)).
|
||||
// Return SUCCESS (an integer = 1) in case of a success. You can also check validity
|
||||
// by calling pj.isValid(). The same ParsedJson can be reused for other documents.
|
||||
//
|
||||
// A temporary buffer is created when needed during processing
|
||||
// (a copy of the input string is made).
|
||||
WARN_UNUSED
|
||||
inline int json_parse(const std::string_view &s, ParsedJson &pj, bool reallocifneeded = true) {
|
||||
return json_parse(s.data(), s.size(), pj, reallocifneeded);
|
||||
inline int json_parse(const std::string &s, ParsedJson &pj) {
|
||||
return json_parse(s.data(), s.length(), pj, true);
|
||||
}
|
||||
|
||||
// Parse a document found in in string s.
|
||||
// You need to preallocate ParsedJson with a capacity of len (e.g., pj.allocateCapacity(len)).
|
||||
// Return SUCCESS (an integer = 1) in case of a success. You can also check validity
|
||||
// by calling pj.isValid(). The same ParsedJson can be reused for other documents.
|
||||
WARN_UNUSED
|
||||
inline int json_parse(const padded_string &s, ParsedJson &pj) {
|
||||
return json_parse(s.data(), s.length(), pj, false);
|
||||
}
|
||||
|
||||
|
||||
|
@ -36931,9 +37275,33 @@ WARN_UNUSED
|
|||
// (a copy of the input string is made).
|
||||
// The input s should be readable up to s.data() + s.size() + SIMDJSON_PADDING if reallocifneeded is false,
|
||||
// all bytes at and after s.data()+s.size() are ignored (can be garbage).
|
||||
inline ParsedJson build_parsed_json(const std::string_view &s, bool reallocifneeded = true) {
|
||||
return build_parsed_json(s.data(), s.size(), reallocifneeded);
|
||||
//inline ParsedJson build_parsed_json(const std::string_view &s, bool reallocifneeded = true) {
|
||||
// return build_parsed_json(s.data(), s.size(), reallocifneeded);
|
||||
//}
|
||||
|
||||
// Parse a document found in in string s.
|
||||
// You need to preallocate ParsedJson with a capacity of len (e.g., pj.allocateCapacity(len)).
|
||||
// Return SUCCESS (an integer = 1) in case of a success. You can also check validity
|
||||
// by calling pj.isValid(). The same ParsedJson can be reused for other documents.
|
||||
//
|
||||
// A temporary buffer is created when needed during processing
|
||||
// (a copy of the input string is made).
|
||||
WARN_UNUSED
|
||||
inline ParsedJson build_parsed_json(const std::string &s) {
|
||||
return build_parsed_json(s.data(), s.length(), true);
|
||||
}
|
||||
|
||||
|
||||
// Parse a document found in in string s.
|
||||
// You need to preallocate ParsedJson with a capacity of len (e.g., pj.allocateCapacity(len)).
|
||||
// Return SUCCESS (an integer = 1) in case of a success. You can also check validity
|
||||
// by calling pj.isValid(). The same ParsedJson can be reused for other documents.
|
||||
WARN_UNUSED
|
||||
inline ParsedJson build_parsed_json(const padded_string &s) {
|
||||
return build_parsed_json(s.data(), s.length(), false);
|
||||
}
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
/* end file include/simdjson/jsonparser.h */
|
||||
|
|
|
@ -7,28 +7,27 @@ char * allocate_padded_buffer(size_t length) {
|
|||
//return (char *) malloc(length + SIMDJSON_PADDING);
|
||||
// However, we might as well align to cache lines...
|
||||
size_t totalpaddedlength = length + SIMDJSON_PADDING;
|
||||
char *padded_buffer = (char *) aligned_malloc(64, totalpaddedlength);
|
||||
char *padded_buffer = aligned_malloc_char(64, totalpaddedlength);
|
||||
return padded_buffer;
|
||||
}
|
||||
|
||||
std::string_view get_corpus(const std::string& filename) {
|
||||
padded_string get_corpus(const std::string& filename) {
|
||||
std::FILE *fp = std::fopen(filename.c_str(), "rb");
|
||||
if (fp != nullptr) {
|
||||
std::fseek(fp, 0, SEEK_END);
|
||||
size_t len = std::ftell(fp);
|
||||
char * buf = allocate_padded_buffer(len);
|
||||
if(buf == nullptr) {
|
||||
padded_string s(len);
|
||||
if(s.data() == nullptr) {
|
||||
std::fclose(fp);
|
||||
throw std::runtime_error("could not allocate memory");
|
||||
}
|
||||
std::rewind(fp);
|
||||
size_t readb = std::fread(buf, 1, len, fp);
|
||||
size_t readb = std::fread(s.data(), 1, len, fp);
|
||||
std::fclose(fp);
|
||||
if(readb != len) {
|
||||
aligned_free(buf);
|
||||
throw std::runtime_error("could not read the data");
|
||||
}
|
||||
return std::string_view(buf,len);
|
||||
return s;
|
||||
}
|
||||
throw std::runtime_error("could not load corpus");
|
||||
}
|
||||
|
|
|
@ -23,7 +23,6 @@
|
|||
#ifdef SIMDJSON_UTF8VALIDATE
|
||||
#include "simdjson/simdutf8check.h"
|
||||
#endif
|
||||
using namespace std;
|
||||
|
||||
#define TRANSPOSE
|
||||
|
||||
|
@ -501,9 +500,9 @@ WARN_UNUSED
|
|||
/*never_inline*/ bool find_structural_bits(const uint8_t *buf, size_t len,
|
||||
ParsedJson &pj) {
|
||||
if (len > pj.bytecapacity) {
|
||||
cerr << "Your ParsedJson object only supports documents up to "
|
||||
std::cerr << "Your ParsedJson object only supports documents up to "
|
||||
<< pj.bytecapacity << " bytes but you are trying to process " << len
|
||||
<< " bytes\n";
|
||||
<< " bytes" << std::endl;
|
||||
return false;
|
||||
}
|
||||
uint32_t *base_ptr = pj.structural_indexes;
|
||||
|
|
|
@ -12,14 +12,15 @@
|
|||
#define PATH_SEP '/'
|
||||
|
||||
|
||||
using namespace std;
|
||||
|
||||
WARN_UNUSED
|
||||
really_inline bool is_valid_true_atom(const uint8_t *loc) {
|
||||
uint64_t tv = *reinterpret_cast<const uint64_t *>("true ");
|
||||
uint64_t mask4 = 0x00000000ffffffff;
|
||||
uint32_t error = 0;
|
||||
uint64_t locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
|
||||
// this can read up to 7 bytes beyond the buffer size, but we require
|
||||
// SIMDJSON_PADDING of padding
|
||||
static_assert(sizeof(uint64_t) - 1 <= SIMDJSON_PADDING);
|
||||
std::memcpy(&locval, loc, sizeof(uint64_t));
|
||||
error = (locval & mask4) ^ tv;
|
||||
error |= is_not_structural_or_whitespace(loc[4]);
|
||||
|
@ -40,6 +41,9 @@ really_inline bool is_valid_false_atom(const uint8_t *loc) {
|
|||
// ignored
|
||||
uint64_t error = 0;
|
||||
uint64_t locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
|
||||
// this can read up to 7 bytes beyond the buffer size, but we require
|
||||
// SIMDJSON_PADDING of padding
|
||||
static_assert(sizeof(uint64_t) - 1 <= SIMDJSON_PADDING);
|
||||
std::memcpy(&locval, loc, sizeof(uint64_t));
|
||||
error = (locval & mask5) ^ fv;
|
||||
error |= is_not_structural_or_whitespace(loc[5]);
|
||||
|
@ -52,6 +56,9 @@ really_inline bool is_valid_null_atom(const uint8_t *loc) {
|
|||
uint64_t mask4 = 0x00000000ffffffff;
|
||||
uint32_t error = 0;
|
||||
uint64_t locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
|
||||
// this can read up to 7 bytes beyond the buffer size, but we require
|
||||
// SIMDJSON_PADDING of padding
|
||||
static_assert(sizeof(uint64_t) - 1 <= SIMDJSON_PADDING);
|
||||
std::memcpy(&locval, loc, sizeof(uint64_t));
|
||||
error = (locval & mask4) ^ nv;
|
||||
error |= is_not_structural_or_whitespace(loc[4]);
|
||||
|
|
|
@ -40,7 +40,6 @@ bool fastjson_parse(const char *input) {
|
|||
|
||||
|
||||
using namespace rapidjson;
|
||||
using namespace std;
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
bool verbose = false;
|
||||
|
@ -55,14 +54,14 @@ int main(int argc, char *argv[]) {
|
|||
abort ();
|
||||
}
|
||||
if (optind >= argc) {
|
||||
cerr << "Usage: " << argv[0] << " <jsonfile>\n";
|
||||
cerr << "Or " << argv[0] << " -v <jsonfile>\n";
|
||||
std::cerr << "Usage: " << argv[0] << " <jsonfile>" << std::endl;
|
||||
std::cerr << "Or " << argv[0] << " -v <jsonfile>" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
const char * filename = argv[optind];
|
||||
std::string_view p;
|
||||
padded_string p;
|
||||
try {
|
||||
p = get_corpus(filename);
|
||||
get_corpus(filename).swap(p);
|
||||
} catch (const std::exception& e) { // caught by reference to base
|
||||
std::cout << "Could not load the file " << filename << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
|
@ -83,7 +82,7 @@ int main(int argc, char *argv[]) {
|
|||
std::cerr << "can't allocate memory" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
bool ours_correct = json_parse(p, pj) == 0; // returns 0 on success
|
||||
bool ours_correct = (json_parse(p, pj) == 0); // returns 0 on success
|
||||
|
||||
rapidjson::Document d;
|
||||
|
||||
|
@ -103,7 +102,7 @@ int main(int argc, char *argv[]) {
|
|||
void *state;
|
||||
bool ultrajson_correct = ((UJDecode(buffer, p.size(), NULL, &state) == NULL) == false);
|
||||
|
||||
auto tokens = make_unique<jsmntok_t[]>(p.size());
|
||||
auto tokens = std::make_unique<jsmntok_t[]>(p.size());
|
||||
bool jsmn_correct = false;
|
||||
if(tokens == nullptr) {
|
||||
printf("Failed to alloc memory for jsmn\n");
|
||||
|
@ -145,7 +144,6 @@ int main(int argc, char *argv[]) {
|
|||
printf("cjson : %s \n", cjson_correct ? "correct":"invalid");
|
||||
printf("jsoncpp : %s \n", isjsoncppok ? "correct":"invalid");
|
||||
|
||||
aligned_free((void*)p.data());
|
||||
free(buffer);
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
|
|
@ -65,9 +65,9 @@ bool validate(const char *dirname) {
|
|||
} else {
|
||||
strcpy(fullpath + dirlen, name);
|
||||
}
|
||||
std::string_view p;
|
||||
padded_string p;
|
||||
try {
|
||||
p = get_corpus(fullpath);
|
||||
get_corpus(fullpath).swap(p);
|
||||
} catch (const std::exception& e) {
|
||||
std::cerr << "Could not load the file " << fullpath << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
|
@ -80,7 +80,6 @@ bool validate(const char *dirname) {
|
|||
}
|
||||
++howmany;
|
||||
const int parseRes = json_parse(p, pj);
|
||||
aligned_free((void*)p.data());
|
||||
printf("%s\n", parseRes == 0 ? "ok" : "invalid");
|
||||
if(contains("EXCLUDE",name)) {
|
||||
// skipping
|
||||
|
|
|
@ -79,8 +79,8 @@ inline void foundFloat(double result, const uint8_t *buf) {
|
|||
parse_error |= PARSE_ERROR;
|
||||
}
|
||||
// we want to get some reasonable relative accuracy
|
||||
else if (fabs(expected - result) / fmin(fabs(expected), fabs(result)) >
|
||||
1e-14) {
|
||||
else if (fabs(expected - result) >
|
||||
1e-14 * fmin(fabs(expected), fabs(result))) {
|
||||
fprintf(stderr, "parsed %.128e from \n", result);
|
||||
fprintf(stderr, " %.32s whereas strtod gives\n", buf);
|
||||
fprintf(stderr, " %.128e,", expected);
|
||||
|
@ -128,9 +128,9 @@ bool validate(const char *dirname) {
|
|||
} else {
|
||||
strcpy(fullpath + dirlen, name);
|
||||
}
|
||||
std::string_view p;
|
||||
padded_string p;
|
||||
try {
|
||||
p = get_corpus(fullpath);
|
||||
get_corpus(fullpath).swap(p);
|
||||
} catch (const std::exception& e) {
|
||||
std::cout << "Could not load the file " << fullpath << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
|
@ -154,7 +154,6 @@ bool validate(const char *dirname) {
|
|||
float_count, invalid_count,
|
||||
int_count + float_count + invalid_count);
|
||||
}
|
||||
aligned_free((void*)p.data());
|
||||
free(fullpath);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
int main() {
|
||||
const char * filename = JSON_TEST_PATH;
|
||||
std::string_view p = get_corpus(filename);
|
||||
padded_string p = get_corpus(filename);
|
||||
ParsedJson pj = build_parsed_json(p); // do the parsing
|
||||
if( ! pj.isValid() ) {
|
||||
return EXIT_FAILURE;
|
||||
|
@ -16,6 +16,5 @@ int main() {
|
|||
std::cerr << simdjson::errorMsg(res) << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
aligned_free((void*)p.data());
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -325,9 +325,9 @@ bool validate(const char *dirname) {
|
|||
} else {
|
||||
strcpy(fullpath + dirlen, name);
|
||||
}
|
||||
std::string_view p;
|
||||
padded_string p;
|
||||
try {
|
||||
p = get_corpus(fullpath);
|
||||
get_corpus(fullpath).swap(p);
|
||||
} catch (const std::exception& e) {
|
||||
std::cout << "Could not load the file " << fullpath << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
|
@ -341,7 +341,6 @@ bool validate(const char *dirname) {
|
|||
bigbuffer = (char *) malloc(p.size());
|
||||
if(bigbuffer == NULL) {
|
||||
std::cerr << "can't allocate memory" << std::endl;
|
||||
aligned_free((void*)p.data());
|
||||
return false;
|
||||
}
|
||||
bad_string = 0;
|
||||
|
@ -350,7 +349,6 @@ bool validate(const char *dirname) {
|
|||
empty_string = 0;
|
||||
bool isok = json_parse(p, pj);
|
||||
free(bigbuffer);
|
||||
aligned_free((void*)p.data());
|
||||
if (good_string > 0) {
|
||||
printf("File %40s %s --- bad strings: %10zu \tgood strings: %10zu\t "
|
||||
"empty strings: %10zu "
|
||||
|
|
|
@ -5,8 +5,6 @@
|
|||
#include "simdjson/jsonioutil.h"
|
||||
#include "simdjson/jsonparser.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
void compute_dump(ParsedJson::iterator &pjh) {
|
||||
if (pjh.is_object()) {
|
||||
std::cout << "{";
|
||||
|
@ -64,19 +62,19 @@ int main(int argc, char *argv[]) {
|
|||
int optind = 1;
|
||||
#endif
|
||||
if (optind >= argc) {
|
||||
cerr << "Reads json in, out the result of the parsing. " << endl;
|
||||
cerr << "Usage: " << argv[0] << " <jsonfile>" << endl;
|
||||
cerr << "The -d flag dumps the raw content of the tape." << endl;
|
||||
std::cerr << "Reads json in, out the result of the parsing. " << std::endl;
|
||||
std::cerr << "Usage: " << argv[0] << " <jsonfile>" << std::endl;
|
||||
std::cerr << "The -d flag dumps the raw content of the tape." << std::endl;
|
||||
|
||||
exit(1);
|
||||
}
|
||||
const char *filename = argv[optind];
|
||||
if (optind + 1 < argc) {
|
||||
cerr << "warning: ignoring everything after " << argv[optind + 1] << endl;
|
||||
std::cerr << "warning: ignoring everything after " << argv[optind + 1] << std::endl;
|
||||
}
|
||||
std::string_view p;
|
||||
padded_string p;
|
||||
try {
|
||||
p = get_corpus(filename);
|
||||
get_corpus(filename).swap(p);
|
||||
} catch (const std::exception &e) { // caught by reference to base
|
||||
std::cout << "Could not load the file " << filename << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
|
@ -88,7 +86,6 @@ int main(int argc, char *argv[]) {
|
|||
return EXIT_FAILURE;
|
||||
}
|
||||
int res = json_parse(p, pj); // do the parsing, return false on error
|
||||
aligned_free((void *)p.data());
|
||||
if (res) {
|
||||
std::cerr << " Parsing failed. " << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
|
|
|
@ -3,8 +3,6 @@
|
|||
#include "simdjson/jsonioutil.h"
|
||||
#include "simdjson/jsonparser.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
size_t count_nonasciibytes(const uint8_t* input, size_t length) {
|
||||
size_t count = 0;
|
||||
for(size_t i = 0; i < length; i++) {
|
||||
|
@ -43,7 +41,7 @@ using stat_t = struct stat_s;
|
|||
|
||||
|
||||
|
||||
stat_t simdjson_computestats(const std::string_view &p) {
|
||||
stat_t simdjson_computestats(const padded_string &p) {
|
||||
stat_t answer;
|
||||
ParsedJson pj = build_parsed_json(p);
|
||||
answer.valid = pj.isValid();
|
||||
|
@ -119,18 +117,17 @@ stat_t simdjson_computestats(const std::string_view &p) {
|
|||
int main(int argc, char *argv[]) {
|
||||
int optind = 1;
|
||||
if (optind >= argc) {
|
||||
cerr << "Reads json, prints stats. " << endl;
|
||||
cerr << "Usage: " << argv[0] << " <jsonfile>" << endl;
|
||||
|
||||
std::cerr << "Reads json, prints stats. " << std::endl;
|
||||
std::cerr << "Usage: " << argv[0] << " <jsonfile>" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
const char *filename = argv[optind];
|
||||
if (optind + 1 < argc) {
|
||||
std::cerr << "warning: ignoring everything after " << argv[optind + 1] << std::endl;
|
||||
}
|
||||
std::string_view p;
|
||||
padded_string p;
|
||||
try {
|
||||
p = get_corpus(filename);
|
||||
get_corpus(filename).swap(p);
|
||||
} catch (const std::exception &e) { // caught by reference to base
|
||||
std::cerr << "Could not load the file " << filename << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
|
|
|
@ -8,15 +8,14 @@ int main(int argc, char *argv[]) {
|
|||
std::cerr << "Usage: " << argv[0] << " <jsonfile>\n";
|
||||
exit(1);
|
||||
}
|
||||
std::string_view p;
|
||||
padded_string p;
|
||||
std::string filename = argv[argc - 1];
|
||||
try{
|
||||
p = get_corpus(filename);
|
||||
get_corpus(filename).swap(p);
|
||||
} catch (const std::exception& e) {
|
||||
std::cout << "Could not load the file " << filename << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
jsonminify(p, const_cast<char *>(p.data()));
|
||||
jsonminify(p, p.data());
|
||||
printf("%s",p.data());
|
||||
aligned_free((void*)p.data());
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue