Merge branch 'master' of https://github.com/lemire/simdjson into HEAD
This commit is contained in:
commit
c2e4b8ca9a
11
README.md
11
README.md
|
@ -12,6 +12,11 @@ JSON documents are everywhere on the Internet. Servers spend a lot of time parsi
|
|||
<img src="images/logo.png" width="10%">
|
||||
|
||||
|
||||
## Real-world usage
|
||||
|
||||
- [Microsoft FishStore](https://github.com/microsoft/FishStore)
|
||||
- [Yandex ClickHouse](https://github.com/yandex/ClickHouse)
|
||||
|
||||
## Paper
|
||||
|
||||
A description of the design and implementation of simdjson appears at https://arxiv.org/abs/1902.08318 and an informal blog post providing some background and context is at https://branchfree.org/2019/02/25/paper-parsing-gigabytes-of-json-per-second/.
|
||||
|
@ -517,5 +522,11 @@ This helps as we redefine some new characters as pseudo-structural such as the c
|
|||
- Yang, Shiyang. Validation of XML Document Based on Parallel Bit Stream Technology. Diss. Applied Sciences: School of Computing Science, 2013.
|
||||
- N. Nakasato, "Implementation of a parallel tree method on a GPU", Journal of Computational Science, vol. 3, no. 3, pp. 132-141, 2012.
|
||||
|
||||
|
||||
## Funding
|
||||
|
||||
The work is supported by the Natural Sciences and Engineering Research Council of Canada under grant number RGPIN-2017-03910.
|
||||
|
||||
|
||||
[license]: LICENSE
|
||||
[license img]: https://img.shields.io/badge/License-Apache%202-blue.svg
|
||||
|
|
|
@ -105,8 +105,10 @@ int main(int argc, char *argv[]) {
|
|||
rapid_correct_checkencoding ? "correct" : "invalid");
|
||||
printf("sajson : %s \n",
|
||||
sajson_correct ? "correct" : "invalid");
|
||||
if(oursreturn == simdjson::DEPTH_ERROR) {
|
||||
printf("simdjson encountered a DEPTH_ERROR, it was parametrized to reject documents with depth exceeding %zu.\n", maxdepth);
|
||||
if (oursreturn == simdjson::DEPTH_ERROR) {
|
||||
printf("simdjson encountered a DEPTH_ERROR, it was parametrized to "
|
||||
"reject documents with depth exceeding %zu.\n",
|
||||
maxdepth);
|
||||
}
|
||||
if ((ours_correct != rapid_correct_checkencoding) ||
|
||||
(rapid_correct_checkencoding != sajson_correct) ||
|
||||
|
|
|
@ -15,10 +15,10 @@ bool skyprophet_test() {
|
|||
std::vector<std::string> data;
|
||||
char buf[1024];
|
||||
for (size_t i = 0; i < n_records; ++i) {
|
||||
auto n = sprintf(buf,
|
||||
"{\"id\": %zu, \"name\": \"name%zu\", \"gender\": \"%s\", "
|
||||
auto n =
|
||||
sprintf(buf, "{\"id\": %zu, \"name\": \"name%zu\", \"gender\": \"%s\", "
|
||||
"\"school\": {\"id\": %zu, \"name\": \"school%zu\"}}",
|
||||
i, i, (i % 2) ? "male" : "female", i % 10, i % 10);
|
||||
i, i, (i % 2) ? "male" : "female", i % 10, i % 10);
|
||||
data.emplace_back(std::string(buf, n));
|
||||
}
|
||||
for (size_t i = 0; i < n_records; ++i) {
|
||||
|
|
|
@ -31,7 +31,6 @@ bool contains(const char *pre, const char *str) {
|
|||
return (strstr(str, pre) != nullptr);
|
||||
}
|
||||
|
||||
|
||||
bool validate(const char *dirname) {
|
||||
bool everythingfine = true;
|
||||
const char *extension = ".json";
|
||||
|
@ -46,9 +45,10 @@ bool validate(const char *dirname) {
|
|||
printf("nothing in dir %s \n", dirname);
|
||||
return false;
|
||||
}
|
||||
bool * isfileasexpected = new bool[c];
|
||||
for(int i = 0; i < c; i++) { isfileasexpected[i] = true;
|
||||
}
|
||||
bool *isfileasexpected = new bool[c];
|
||||
for (int i = 0; i < c; i++) {
|
||||
isfileasexpected[i] = true;
|
||||
}
|
||||
size_t howmany = 0;
|
||||
bool needsep = (strlen(dirname) > 1) && (dirname[strlen(dirname) - 1] != '/');
|
||||
for (int i = 0; i < c; i++) {
|
||||
|
@ -68,47 +68,50 @@ bool validate(const char *dirname) {
|
|||
padded_string p;
|
||||
try {
|
||||
get_corpus(fullpath).swap(p);
|
||||
} catch (const std::exception& e) {
|
||||
} catch (const std::exception &e) {
|
||||
std::cerr << "Could not load the file " << fullpath << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
ParsedJson pj;
|
||||
bool allocok = pj.allocateCapacity(p.size(), 1024);
|
||||
if(!allocok) {
|
||||
std::cerr << "can't allocate memory"<<std::endl;
|
||||
if (!allocok) {
|
||||
std::cerr << "can't allocate memory" << std::endl;
|
||||
return false;
|
||||
}
|
||||
++howmany;
|
||||
const int parseRes = json_parse(p, pj);
|
||||
printf("%s\n", parseRes == 0 ? "ok" : "invalid");
|
||||
if(contains("EXCLUDE",name)) {
|
||||
if (contains("EXCLUDE", name)) {
|
||||
// skipping
|
||||
howmany--;
|
||||
} else if (startsWith("pass", name) && parseRes != 0) {
|
||||
isfileasexpected[i] = false;
|
||||
printf("warning: file %s should pass but it fails. Error is: %s\n", name, simdjson::errorMsg(parseRes).data());
|
||||
everythingfine = false;
|
||||
isfileasexpected[i] = false;
|
||||
printf("warning: file %s should pass but it fails. Error is: %s\n",
|
||||
name, simdjson::errorMsg(parseRes).data());
|
||||
everythingfine = false;
|
||||
} else if (startsWith("fail", name) && parseRes == 0) {
|
||||
isfileasexpected[i] = false;
|
||||
printf("warning: file %s should fail but it passes.\n", name);
|
||||
everythingfine = false;
|
||||
isfileasexpected[i] = false;
|
||||
printf("warning: file %s should fail but it passes.\n", name);
|
||||
everythingfine = false;
|
||||
}
|
||||
free(fullpath);
|
||||
}
|
||||
}
|
||||
printf("%zu files checked.\n", howmany);
|
||||
if(everythingfine) {
|
||||
if (everythingfine) {
|
||||
printf("All ok!\n");
|
||||
} else {
|
||||
fprintf(stderr, "There were problems! Consider reviewing the following files:\n");
|
||||
for(int i = 0; i < c; i++) {
|
||||
if(!isfileasexpected[i]) { fprintf(stderr, "%s \n", entry_list[i]->d_name);
|
||||
}
|
||||
fprintf(stderr,
|
||||
"There were problems! Consider reviewing the following files:\n");
|
||||
for (int i = 0; i < c; i++) {
|
||||
if (!isfileasexpected[i]) {
|
||||
fprintf(stderr, "%s \n", entry_list[i]->d_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < c; ++i) {
|
||||
free(entry_list[i]);
|
||||
}
|
||||
}
|
||||
free(entry_list);
|
||||
delete[] isfileasexpected;
|
||||
return everythingfine;
|
||||
|
@ -124,9 +127,8 @@ int main(int argc, char *argv[]) {
|
|||
<< std::endl;
|
||||
return validate("jsonchecker/") ? EXIT_SUCCESS : EXIT_FAILURE;
|
||||
#else
|
||||
std::cout
|
||||
<< "We are going to assume you mean to use the '"<< SIMDJSON_TEST_DATA_DIR <<"' directory."
|
||||
<< std::endl;
|
||||
std::cout << "We are going to assume you mean to use the '"
|
||||
<< SIMDJSON_TEST_DATA_DIR << "' directory." << std::endl;
|
||||
return validate(SIMDJSON_TEST_DATA_DIR) ? EXIT_SUCCESS : EXIT_FAILURE;
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -30,7 +30,8 @@ bool startsWith(const char *pre, const char *str) {
|
|||
}
|
||||
|
||||
bool is_in_bad_list(const char *buf) {
|
||||
if(buf[0] != '0') return false;
|
||||
if (buf[0] != '0')
|
||||
return false;
|
||||
for (size_t i = 0; i < sizeof(really_bad) / sizeof(really_bad[0]); i++)
|
||||
if (startsWith(really_bad[i], buf))
|
||||
return true;
|
||||
|
@ -68,19 +69,22 @@ inline void foundFloat(double result, const uint8_t *buf) {
|
|||
float_count++;
|
||||
double expected = strtod((const char *)buf, &endptr);
|
||||
if (endptr == (const char *)buf) {
|
||||
fprintf(stderr, "parsed %f from %.32s whereas strtod refuses to parse a float, ",
|
||||
result, buf);
|
||||
fprintf(stderr,
|
||||
"parsed %f from %.32s whereas strtod refuses to parse a float, ",
|
||||
result, buf);
|
||||
fprintf(stderr, " while parsing %s \n", fullpath);
|
||||
parse_error |= PARSE_ERROR;
|
||||
}
|
||||
if( fpclassify(expected) != fpclassify(result) ) {
|
||||
fprintf(stderr, "floats not in the same category expected: %f observed: %f \n", expected, result);
|
||||
if (fpclassify(expected) != fpclassify(result)) {
|
||||
fprintf(stderr,
|
||||
"floats not in the same category expected: %f observed: %f \n",
|
||||
expected, result);
|
||||
fprintf(stderr, "%.32s\n", buf);
|
||||
parse_error |= PARSE_ERROR;
|
||||
}
|
||||
// we want to get some reasonable relative accuracy
|
||||
else if (fabs(expected - result) >
|
||||
1e-14 * fmin(fabs(expected), fabs(result))) {
|
||||
else if (fabs(expected - result) >
|
||||
1e-14 * fmin(fabs(expected), fabs(result))) {
|
||||
fprintf(stderr, "parsed %.128e from \n", result);
|
||||
fprintf(stderr, " %.32s whereas strtod gives\n", buf);
|
||||
fprintf(stderr, " %.128e,", expected);
|
||||
|
@ -131,7 +135,7 @@ bool validate(const char *dirname) {
|
|||
padded_string p;
|
||||
try {
|
||||
get_corpus(fullpath).swap(p);
|
||||
} catch (const std::exception& e) {
|
||||
} catch (const std::exception &e) {
|
||||
std::cout << "Could not load the file " << fullpath << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
@ -172,12 +176,14 @@ int main(int argc, char *argv[]) {
|
|||
if (argc != 2) {
|
||||
std::cerr << "Usage: " << argv[0] << " <directorywithjsonfiles>"
|
||||
<< std::endl;
|
||||
#if defined(SIMDJSON_TEST_DATA_DIR) && defined(SIMDJSON_BENCHMARK_DATA_DIR)
|
||||
std::cout
|
||||
<< "We are going to assume you mean to use the '"<< SIMDJSON_TEST_DATA_DIR <<"' and '"<< SIMDJSON_BENCHMARK_DATA_DIR <<"'directories."
|
||||
<< std::endl;
|
||||
return validate(SIMDJSON_TEST_DATA_DIR) && validate(SIMDJSON_BENCHMARK_DATA_DIR) ? EXIT_SUCCESS
|
||||
: EXIT_FAILURE;
|
||||
#if defined(SIMDJSON_TEST_DATA_DIR) && defined(SIMDJSON_BENCHMARK_DATA_DIR)
|
||||
std::cout << "We are going to assume you mean to use the '"
|
||||
<< SIMDJSON_TEST_DATA_DIR << "' and '"
|
||||
<< SIMDJSON_BENCHMARK_DATA_DIR << "'directories." << std::endl;
|
||||
return validate(SIMDJSON_TEST_DATA_DIR) &&
|
||||
validate(SIMDJSON_BENCHMARK_DATA_DIR)
|
||||
? EXIT_SUCCESS
|
||||
: EXIT_FAILURE;
|
||||
#else
|
||||
std::cout << "We are going to assume you mean to use the 'jsonchecker' and "
|
||||
"'jsonexamples' directories."
|
||||
|
|
|
@ -1,14 +1,14 @@
|
|||
#include <iostream>
|
||||
#include "../singleheader/simdjson.h"
|
||||
#include <iostream>
|
||||
|
||||
int main() {
|
||||
const char * filename = JSON_TEST_PATH;
|
||||
const char *filename = JSON_TEST_PATH;
|
||||
padded_string p = get_corpus(filename);
|
||||
ParsedJson pj = build_parsed_json(p); // do the parsing
|
||||
if( ! pj.isValid() ) {
|
||||
if (!pj.isValid()) {
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
if( ! pj.allocateCapacity(p.size()) ) {
|
||||
if (!pj.allocateCapacity(p.size())) {
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
const int res = json_parse(p, pj);
|
||||
|
|
|
@ -2,11 +2,11 @@
|
|||
#include <cstring>
|
||||
#include <dirent.h>
|
||||
#include <inttypes.h>
|
||||
#include <iostream>
|
||||
#include <math.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <iostream>
|
||||
|
||||
#ifndef JSON_TEST_STRINGS
|
||||
#define JSON_TEST_STRINGS
|
||||
|
@ -201,7 +201,7 @@ static bool parse_string(const char *p, char *output, char **end) {
|
|||
}
|
||||
}
|
||||
// end of borrowed code
|
||||
char * bigbuffer; // global variable
|
||||
char *bigbuffer; // global variable
|
||||
|
||||
inline void foundBadString(const uint8_t *buf) {
|
||||
bad_string++;
|
||||
|
@ -328,18 +328,18 @@ bool validate(const char *dirname) {
|
|||
padded_string p;
|
||||
try {
|
||||
get_corpus(fullpath).swap(p);
|
||||
} catch (const std::exception& e) {
|
||||
} catch (const std::exception &e) {
|
||||
std::cout << "Could not load the file " << fullpath << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
ParsedJson pj;
|
||||
bool allocok = pj.allocateCapacity(p.size(), 1024);
|
||||
if (!allocok) {
|
||||
std::cerr << "can't allocate memory" << std::endl;
|
||||
return false;
|
||||
}
|
||||
bigbuffer = (char *) malloc(p.size());
|
||||
if(bigbuffer == NULL) {
|
||||
bigbuffer = (char *)malloc(p.size());
|
||||
if (bigbuffer == NULL) {
|
||||
std::cerr << "can't allocate memory" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
@ -380,12 +380,14 @@ int main(int argc, char *argv[]) {
|
|||
if (argc != 2) {
|
||||
std::cerr << "Usage: " << argv[0] << " <directorywithjsonfiles>"
|
||||
<< std::endl;
|
||||
#if defined(SIMDJSON_TEST_DATA_DIR) && defined(SIMDJSON_BENCHMARK_DATA_DIR)
|
||||
std::cout
|
||||
<< "We are going to assume you mean to use the '"<< SIMDJSON_TEST_DATA_DIR <<"' and '"<< SIMDJSON_BENCHMARK_DATA_DIR <<"'directories."
|
||||
<< std::endl;
|
||||
return validate(SIMDJSON_TEST_DATA_DIR) && validate(SIMDJSON_BENCHMARK_DATA_DIR) ? EXIT_SUCCESS
|
||||
: EXIT_FAILURE;
|
||||
#if defined(SIMDJSON_TEST_DATA_DIR) && defined(SIMDJSON_BENCHMARK_DATA_DIR)
|
||||
std::cout << "We are going to assume you mean to use the '"
|
||||
<< SIMDJSON_TEST_DATA_DIR << "' and '"
|
||||
<< SIMDJSON_BENCHMARK_DATA_DIR << "'directories." << std::endl;
|
||||
return validate(SIMDJSON_TEST_DATA_DIR) &&
|
||||
validate(SIMDJSON_BENCHMARK_DATA_DIR)
|
||||
? EXIT_SUCCESS
|
||||
: EXIT_FAILURE;
|
||||
#else
|
||||
std::cout << "We are going to assume you mean to use the 'jsonchecker' and "
|
||||
"'jsonexamples' directories."
|
||||
|
|
Loading…
Reference in New Issue