Merge branch 'master' of https://github.com/lemire/simdjson into HEAD

This commit is contained in:
ioioioio 2019-06-28 15:18:05 -04:00
commit c2e4b8ca9a
7 changed files with 81 additions and 58 deletions

View File

@ -12,6 +12,11 @@ JSON documents are everywhere on the Internet. Servers spend a lot of time parsi
<img src="images/logo.png" width="10%">
## Real-world usage
- [Microsoft FishStore](https://github.com/microsoft/FishStore)
- [Yandex ClickHouse](https://github.com/yandex/ClickHouse)
## Paper
A description of the design and implementation of simdjson appears at https://arxiv.org/abs/1902.08318 and an informal blog post providing some background and context is at https://branchfree.org/2019/02/25/paper-parsing-gigabytes-of-json-per-second/.
@ -517,5 +522,11 @@ This helps as we redefine some new characters as pseudo-structural such as the c
- Yang, Shiyang. Validation of XML Document Based on Parallel Bit Stream Technology. Diss. Applied Sciences: School of Computing Science, 2013.
- N. Nakasato, "Implementation of a parallel tree method on a GPU", Journal of Computational Science, vol. 3, no. 3, pp. 132-141, 2012.
## Funding
The work is supported by the Natural Sciences and Engineering Research Council of Canada under grant number RGPIN-2017-03910.
[license]: LICENSE
[license img]: https://img.shields.io/badge/License-Apache%202-blue.svg

View File

@ -106,7 +106,9 @@ int main(int argc, char *argv[]) {
printf("sajson : %s \n",
sajson_correct ? "correct" : "invalid");
if (oursreturn == simdjson::DEPTH_ERROR) {
printf("simdjson encountered a DEPTH_ERROR, it was parametrized to reject documents with depth exceeding %zu.\n", maxdepth);
printf("simdjson encountered a DEPTH_ERROR, it was parametrized to "
"reject documents with depth exceeding %zu.\n",
maxdepth);
}
if ((ours_correct != rapid_correct_checkencoding) ||
(rapid_correct_checkencoding != sajson_correct) ||

View File

@ -15,8 +15,8 @@ bool skyprophet_test() {
std::vector<std::string> data;
char buf[1024];
for (size_t i = 0; i < n_records; ++i) {
auto n = sprintf(buf,
"{\"id\": %zu, \"name\": \"name%zu\", \"gender\": \"%s\", "
auto n =
sprintf(buf, "{\"id\": %zu, \"name\": \"name%zu\", \"gender\": \"%s\", "
"\"school\": {\"id\": %zu, \"name\": \"school%zu\"}}",
i, i, (i % 2) ? "male" : "female", i % 10, i % 10);
data.emplace_back(std::string(buf, n));

View File

@ -31,7 +31,6 @@ bool contains(const char *pre, const char *str) {
return (strstr(str, pre) != nullptr);
}
bool validate(const char *dirname) {
bool everythingfine = true;
const char *extension = ".json";
@ -47,7 +46,8 @@ bool validate(const char *dirname) {
return false;
}
bool *isfileasexpected = new bool[c];
for(int i = 0; i < c; i++) { isfileasexpected[i] = true;
for (int i = 0; i < c; i++) {
isfileasexpected[i] = true;
}
size_t howmany = 0;
bool needsep = (strlen(dirname) > 1) && (dirname[strlen(dirname) - 1] != '/');
@ -86,7 +86,8 @@ bool validate(const char *dirname) {
howmany--;
} else if (startsWith("pass", name) && parseRes != 0) {
isfileasexpected[i] = false;
printf("warning: file %s should pass but it fails. Error is: %s\n", name, simdjson::errorMsg(parseRes).data());
printf("warning: file %s should pass but it fails. Error is: %s\n",
name, simdjson::errorMsg(parseRes).data());
everythingfine = false;
} else if (startsWith("fail", name) && parseRes == 0) {
isfileasexpected[i] = false;
@ -100,9 +101,11 @@ bool validate(const char *dirname) {
if (everythingfine) {
printf("All ok!\n");
} else {
fprintf(stderr, "There were problems! Consider reviewing the following files:\n");
fprintf(stderr,
"There were problems! Consider reviewing the following files:\n");
for (int i = 0; i < c; i++) {
if(!isfileasexpected[i]) { fprintf(stderr, "%s \n", entry_list[i]->d_name);
if (!isfileasexpected[i]) {
fprintf(stderr, "%s \n", entry_list[i]->d_name);
}
}
}
@ -124,9 +127,8 @@ int main(int argc, char *argv[]) {
<< std::endl;
return validate("jsonchecker/") ? EXIT_SUCCESS : EXIT_FAILURE;
#else
std::cout
<< "We are going to assume you mean to use the '"<< SIMDJSON_TEST_DATA_DIR <<"' directory."
<< std::endl;
std::cout << "We are going to assume you mean to use the '"
<< SIMDJSON_TEST_DATA_DIR << "' directory." << std::endl;
return validate(SIMDJSON_TEST_DATA_DIR) ? EXIT_SUCCESS : EXIT_FAILURE;
#endif
}

View File

@ -30,7 +30,8 @@ bool startsWith(const char *pre, const char *str) {
}
bool is_in_bad_list(const char *buf) {
if(buf[0] != '0') return false;
if (buf[0] != '0')
return false;
for (size_t i = 0; i < sizeof(really_bad) / sizeof(really_bad[0]); i++)
if (startsWith(really_bad[i], buf))
return true;
@ -68,13 +69,16 @@ inline void foundFloat(double result, const uint8_t *buf) {
float_count++;
double expected = strtod((const char *)buf, &endptr);
if (endptr == (const char *)buf) {
fprintf(stderr, "parsed %f from %.32s whereas strtod refuses to parse a float, ",
fprintf(stderr,
"parsed %f from %.32s whereas strtod refuses to parse a float, ",
result, buf);
fprintf(stderr, " while parsing %s \n", fullpath);
parse_error |= PARSE_ERROR;
}
if (fpclassify(expected) != fpclassify(result)) {
fprintf(stderr, "floats not in the same category expected: %f observed: %f \n", expected, result);
fprintf(stderr,
"floats not in the same category expected: %f observed: %f \n",
expected, result);
fprintf(stderr, "%.32s\n", buf);
parse_error |= PARSE_ERROR;
}
@ -173,10 +177,12 @@ int main(int argc, char *argv[]) {
std::cerr << "Usage: " << argv[0] << " <directorywithjsonfiles>"
<< std::endl;
#if defined(SIMDJSON_TEST_DATA_DIR) && defined(SIMDJSON_BENCHMARK_DATA_DIR)
std::cout
<< "We are going to assume you mean to use the '"<< SIMDJSON_TEST_DATA_DIR <<"' and '"<< SIMDJSON_BENCHMARK_DATA_DIR <<"'directories."
<< std::endl;
return validate(SIMDJSON_TEST_DATA_DIR) && validate(SIMDJSON_BENCHMARK_DATA_DIR) ? EXIT_SUCCESS
std::cout << "We are going to assume you mean to use the '"
<< SIMDJSON_TEST_DATA_DIR << "' and '"
<< SIMDJSON_BENCHMARK_DATA_DIR << "'directories." << std::endl;
return validate(SIMDJSON_TEST_DATA_DIR) &&
validate(SIMDJSON_BENCHMARK_DATA_DIR)
? EXIT_SUCCESS
: EXIT_FAILURE;
#else
std::cout << "We are going to assume you mean to use the 'jsonchecker' and "

View File

@ -1,5 +1,5 @@
#include <iostream>
#include "../singleheader/simdjson.h"
#include <iostream>
int main() {
const char *filename = JSON_TEST_PATH;

View File

@ -2,11 +2,11 @@
#include <cstring>
#include <dirent.h>
#include <inttypes.h>
#include <iostream>
#include <math.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <iostream>
#ifndef JSON_TEST_STRINGS
#define JSON_TEST_STRINGS
@ -381,10 +381,12 @@ int main(int argc, char *argv[]) {
std::cerr << "Usage: " << argv[0] << " <directorywithjsonfiles>"
<< std::endl;
#if defined(SIMDJSON_TEST_DATA_DIR) && defined(SIMDJSON_BENCHMARK_DATA_DIR)
std::cout
<< "We are going to assume you mean to use the '"<< SIMDJSON_TEST_DATA_DIR <<"' and '"<< SIMDJSON_BENCHMARK_DATA_DIR <<"'directories."
<< std::endl;
return validate(SIMDJSON_TEST_DATA_DIR) && validate(SIMDJSON_BENCHMARK_DATA_DIR) ? EXIT_SUCCESS
std::cout << "We are going to assume you mean to use the '"
<< SIMDJSON_TEST_DATA_DIR << "' and '"
<< SIMDJSON_BENCHMARK_DATA_DIR << "'directories." << std::endl;
return validate(SIMDJSON_TEST_DATA_DIR) &&
validate(SIMDJSON_BENCHMARK_DATA_DIR)
? EXIT_SUCCESS
: EXIT_FAILURE;
#else
std::cout << "We are going to assume you mean to use the 'jsonchecker' and "