597 lines
17 KiB
C++
597 lines
17 KiB
C++
#include <cassert>
|
|
#include <cinttypes>
|
|
#include <cstdio>
|
|
#include <cstdlib>
|
|
#include <cstring>
|
|
#include <iostream>
|
|
#include <string>
|
|
#include <vector>
|
|
#include <cmath>
|
|
|
|
#include "simdjson/jsonparser.h"
|
|
#include "simdjson/jsonstream.h"
|
|
#include "simdjson/document.h"
|
|
|
|
// ulp distance
|
|
// Marc B. Reynolds, 2016-2019
|
|
// Public Domain under http://unlicense.org, see link for details.
|
|
// adapted by D. Lemire
|
|
inline uint64_t f64_ulp_dist(double a, double b) {
|
|
uint64_t ua, ub;
|
|
memcpy(&ua, &a, sizeof(ua));
|
|
memcpy(&ub, &b, sizeof(ub));
|
|
if ((int64_t)(ub ^ ua) >= 0)
|
|
return (int64_t)(ua - ub) >= 0 ? (ua - ub) : (ub - ua);
|
|
return ua + ub + 0x80000000;
|
|
}
|
|
|
|
|
|
bool number_test_small_integers() {
|
|
char buf[1024];
|
|
simdjson::document::parser parser;
|
|
if (!parser.allocate_capacity(1024)) {
|
|
printf("allocation failure in number_test_small_integers\n");
|
|
return false;
|
|
}
|
|
for (int m = 10; m < 20; m++) {
|
|
for (int i = -1024; i < 1024; i++) {
|
|
auto n = sprintf(buf, "%*d", m, i);
|
|
buf[n] = '\0';
|
|
fflush(NULL);
|
|
auto ok1 = json_parse(buf, n, parser);
|
|
if (ok1 != 0 || !parser.is_valid()) {
|
|
printf("Could not parse: %s.\n", buf);
|
|
return false;
|
|
}
|
|
simdjson::document::iterator iter(parser);
|
|
if(!iter.is_number()) {
|
|
printf("Root should be number\n");
|
|
return false;
|
|
}
|
|
if(!iter.is_integer()) {
|
|
printf("Root should be an integer\n");
|
|
return false;
|
|
}
|
|
int64_t x = iter.get_integer();
|
|
if(x != i) {
|
|
printf("failed to parse %s. \n", buf);
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
printf("Small integers can be parsed.\n");
|
|
return true;
|
|
}
|
|
|
|
|
|
bool number_test_powers_of_two() {
|
|
char buf[1024];
|
|
simdjson::document::parser parser;
|
|
if (!parser.allocate_capacity(1024)) {
|
|
printf("allocation failure in number_test\n");
|
|
return false;
|
|
}
|
|
int maxulp = 0;
|
|
for (int i = -1075; i < 1024; ++i) {// large negative values should be zero.
|
|
double expected = pow(2, i);
|
|
auto n = sprintf(buf, "%.*e", std::numeric_limits<double>::max_digits10 - 1, expected);
|
|
buf[n] = '\0';
|
|
fflush(NULL);
|
|
auto ok1 = json_parse(buf, n, parser);
|
|
if (ok1 != 0 || !parser.is_valid()) {
|
|
printf("Could not parse: %s.\n", buf);
|
|
return false;
|
|
}
|
|
simdjson::document::iterator iter(parser);
|
|
if(!iter.is_number()) {
|
|
printf("Root should be number\n");
|
|
return false;
|
|
}
|
|
if(iter.is_integer()) {
|
|
int64_t x = iter.get_integer();
|
|
int power = 0;
|
|
while(x > 1) {
|
|
if((x % 2) != 0) {
|
|
printf("failed to parse %s. \n", buf);
|
|
return false;
|
|
}
|
|
x = x / 2;
|
|
power ++;
|
|
}
|
|
if(power != i) {
|
|
printf("failed to parse %s. \n", buf);
|
|
return false;
|
|
}
|
|
} else if(iter.is_unsigned_integer()) {
|
|
uint64_t x = iter.get_unsigned_integer();
|
|
int power = 0;
|
|
while(x > 1) {
|
|
if((x % 2) != 0) {
|
|
printf("failed to parse %s. \n", buf);
|
|
return false;
|
|
}
|
|
x = x / 2;
|
|
power ++;
|
|
}
|
|
if(power != i) {
|
|
printf("failed to parse %s. \n", buf);
|
|
return false;
|
|
}
|
|
} else {
|
|
double x = iter.get_double();
|
|
int ulp = f64_ulp_dist(x,expected);
|
|
if(ulp > maxulp) maxulp = ulp;
|
|
if(ulp > 3) {
|
|
printf("failed to parse %s. ULP = %d i = %d \n", buf, ulp, i);
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
printf("Powers of 2 can be parsed, maxulp = %d.\n", maxulp);
|
|
return true;
|
|
}
|
|
|
|
bool number_test_powers_of_ten() {
|
|
char buf[1024];
|
|
simdjson::document::parser parser;
|
|
if (!parser.allocate_capacity(1024)) {
|
|
printf("allocation failure in number_test\n");
|
|
return false;
|
|
}
|
|
for (int i = -1000000; i <= 308; ++i) {// large negative values should be zero.
|
|
auto n = sprintf(buf,"1e%d", i);
|
|
buf[n] = '\0';
|
|
fflush(NULL);
|
|
auto ok1 = json_parse(buf, n, parser);
|
|
if (ok1 != 0 || !parser.is_valid()) {
|
|
printf("Could not parse: %s.\n", buf);
|
|
return false;
|
|
}
|
|
simdjson::document::iterator iter(parser);
|
|
if(!iter.is_number()) {
|
|
printf("Root should be number\n");
|
|
return false;
|
|
}
|
|
if(iter.is_integer()) {
|
|
int64_t x = iter.get_integer();
|
|
int power = 0;
|
|
while(x > 1) {
|
|
if((x % 10) != 0) {
|
|
printf("failed to parse %s. \n", buf);
|
|
return false;
|
|
}
|
|
x = x / 10;
|
|
power ++;
|
|
}
|
|
if(power != i) {
|
|
printf("failed to parse %s. \n", buf);
|
|
return false;
|
|
}
|
|
} else if(iter.is_unsigned_integer()) {
|
|
uint64_t x = iter.get_unsigned_integer();
|
|
int power = 0;
|
|
while(x > 1) {
|
|
if((x % 10) != 0) {
|
|
printf("failed to parse %s. \n", buf);
|
|
return false;
|
|
}
|
|
x = x / 10;
|
|
power ++;
|
|
}
|
|
if(power != i) {
|
|
printf("failed to parse %s. \n", buf);
|
|
return false;
|
|
}
|
|
} else {
|
|
double x = iter.get_double();
|
|
double expected = std::pow(10, i);
|
|
int ulp = (int) f64_ulp_dist(x, expected);
|
|
if(ulp > 1) {
|
|
printf("failed to parse %s. \n", buf);
|
|
printf("actual: %.20g expected: %.20g \n", x, expected);
|
|
printf("ULP: %d \n", ulp);
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
printf("Powers of 10 can be parsed.\n");
|
|
return true;
|
|
}
|
|
|
|
|
|
// adversarial example that once triggred overruns, see https://github.com/lemire/simdjson/issues/345
|
|
bool bad_example() {
|
|
std::string badjson = "[7,7,7,7,6,7,7,7,6,7,7,6,[7,7,7,7,6,7,7,7,6,7,7,6,7,7,7,7,7,7,6";
|
|
simdjson::document::parser parser = simdjson::build_parsed_json(badjson);
|
|
if(parser.is_valid()) {
|
|
printf("This json should not be valid %s.\n", badjson.c_str());
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
// returns true if successful
|
|
bool stable_test() {
|
|
std::string json = "{"
|
|
"\"Image\":{"
|
|
"\"Width\":800,"
|
|
"\"Height\":600,"
|
|
"\"Title\":\"View from 15th Floor\","
|
|
"\"Thumbnail\":{"
|
|
"\"Url\":\"http://www.example.com/image/481989943\","
|
|
"\"Height\":125,"
|
|
"\"Width\":100"
|
|
"},"
|
|
"\"Animated\":false,"
|
|
"\"IDs\":[116,943.3,234,38793]"
|
|
"}"
|
|
"}";
|
|
simdjson::document::parser parser = simdjson::build_parsed_json(json);
|
|
std::ostringstream myStream;
|
|
if( ! parser.print_json(myStream) ) {
|
|
std::cout << "cannot print it out? " << std::endl;
|
|
return false;
|
|
}
|
|
std::string newjson = myStream.str();
|
|
if(json != newjson) {
|
|
std::cout << "serialized json differs!" << std::endl;
|
|
std::cout << json << std::endl;
|
|
std::cout << newjson << std::endl;
|
|
}
|
|
return newjson == json;
|
|
}
|
|
|
|
static bool parse_json_message_issue467(char const* message, std::size_t len, size_t expectedcount) {
|
|
simdjson::document::parser parser;
|
|
size_t count = 0;
|
|
if (!parser.allocate_capacity(len)) {
|
|
std::cerr << "Failed to allocated memory for simdjson::document::parser" << std::endl;
|
|
return false;
|
|
}
|
|
int res;
|
|
simdjson::padded_string str(message,len);
|
|
simdjson::JsonStream<simdjson::padded_string> js(str, parser.capacity());
|
|
do {
|
|
res = js.json_parse(parser);
|
|
count++;
|
|
} while (res == simdjson::SUCCESS_AND_HAS_MORE);
|
|
if (res != simdjson::SUCCESS) {
|
|
std::cerr << "Failed with simdjson error= " << simdjson::error_message(res) << std::endl;
|
|
return false;
|
|
}
|
|
if(count != expectedcount) {
|
|
std::cerr << "bad count" << std::endl;
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool json_issue467() {
|
|
const char * single_message = "{\"error\":[],\"result\":{\"token\":\"xxx\"}}";
|
|
const char* two_messages = "{\"error\":[],\"result\":{\"token\":\"xxx\"}}{\"error\":[],\"result\":{\"token\":\"xxx\"}}";
|
|
|
|
if(!parse_json_message_issue467(single_message, strlen(single_message),1)) {
|
|
return false;
|
|
}
|
|
if(!parse_json_message_issue467(two_messages, strlen(two_messages),2)) {
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// returns true if successful
|
|
bool navigate_test() {
|
|
std::string json = "{"
|
|
"\"Image\": {"
|
|
"\"Width\": 800,"
|
|
"\"Height\": 600,"
|
|
"\"Title\": \"View from 15th Floor\","
|
|
"\"Thumbnail\": {"
|
|
" \"Url\": \"http://www.example.com/image/481989943\","
|
|
" \"Height\": 125,"
|
|
" \"Width\": 100"
|
|
"},"
|
|
"\"Animated\" : false,"
|
|
"\"IDs\": [116, 943, 234, 38793]"
|
|
"}"
|
|
"}";
|
|
|
|
simdjson::document::parser parser = simdjson::build_parsed_json(json);
|
|
if (!parser.is_valid()) {
|
|
printf("Something is wrong in navigate: %s.\n", json.c_str());
|
|
return false;
|
|
}
|
|
simdjson::document::iterator iter(parser);
|
|
if(!iter.is_object()) {
|
|
printf("Root should be object\n");
|
|
return false;
|
|
}
|
|
if(iter.move_to_key("bad key")) {
|
|
printf("We should not move to a non-existing key\n");
|
|
return false;
|
|
}
|
|
if(!iter.is_object()) {
|
|
printf("We should have remained at the object.\n");
|
|
return false;
|
|
}
|
|
if(iter.move_to_key_insensitive("bad key")) {
|
|
printf("We should not move to a non-existing key\n");
|
|
return false;
|
|
}
|
|
if(!iter.is_object()) {
|
|
printf("We should have remained at the object.\n");
|
|
return false;
|
|
}
|
|
if(iter.move_to_key("bad key", 7)) {
|
|
printf("We should not move to a non-existing key\n");
|
|
return false;
|
|
}
|
|
if(!iter.is_object()) {
|
|
printf("We should have remained at the object.\n");
|
|
return false;
|
|
}
|
|
if(!iter.down()) {
|
|
printf("Root should not be emtpy\n");
|
|
return false;
|
|
}
|
|
if(!iter.is_string()) {
|
|
printf("Object should start with string key\n");
|
|
return false;
|
|
}
|
|
if(iter.prev()) {
|
|
printf("We should not be able to go back from the start of the scope.\n");
|
|
return false;
|
|
}
|
|
if(strcmp(iter.get_string(),"Image")!=0) {
|
|
printf("There should be a single key, image.\n");
|
|
return false;
|
|
}
|
|
iter.move_to_value();
|
|
if(!iter.is_object()) {
|
|
printf("Value of image should be object\n");
|
|
return false;
|
|
}
|
|
if(!iter.down()) {
|
|
printf("Image key should not be emtpy\n");
|
|
return false;
|
|
}
|
|
if(!iter.next()) {
|
|
printf("key should have a value\n");
|
|
return false;
|
|
}
|
|
if(!iter.prev()) {
|
|
printf("We should go back to the key.\n");
|
|
return false;
|
|
}
|
|
if(strcmp(iter.get_string(),"Width")!=0) {
|
|
printf("There should be a key Width.\n");
|
|
return false;
|
|
}
|
|
if(!iter.up()) {
|
|
return false;
|
|
}
|
|
if(!iter.move_to_key("IDs")) {
|
|
printf("We should be able to move to an existing key\n");
|
|
return false;
|
|
}
|
|
if(!iter.is_array()) {
|
|
printf("Value of IDs should be array, it is %c \n", iter.get_type());
|
|
return false;
|
|
}
|
|
if(iter.move_to_index(4)) {
|
|
printf("We should not be able to move to a non-existing index\n");
|
|
return false;
|
|
}
|
|
if(!iter.is_array()) {
|
|
printf("We should have remained at the array\n");
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// returns true if successful
|
|
bool stream_utf8_test() {
|
|
printf("Running stream_utf8_test");
|
|
fflush(NULL);
|
|
const size_t n_records = 10000;
|
|
std::string data;
|
|
char buf[1024];
|
|
for (size_t i = 0; i < n_records; ++i) {
|
|
auto n = sprintf(buf,
|
|
"{\"id\": %zu, \"name\": \"name%zu\", \"gender\": \"%s\", "
|
|
"\"été\": {\"id\": %zu, \"name\": \"éventail%zu\"}}",
|
|
i, i, (i % 2) ? "⺃" : "⺕", i % 10, i % 10);
|
|
data += std::string(buf, n);
|
|
}
|
|
for(size_t i = 1000; i < 2000; i += (i>1050?10:1)) {
|
|
printf(".");
|
|
fflush(NULL);
|
|
simdjson::padded_string str(data);
|
|
simdjson::JsonStream<simdjson::padded_string> js{str, i};
|
|
int parse_res = simdjson::SUCCESS_AND_HAS_MORE;
|
|
size_t count = 0;
|
|
simdjson::document::parser parser;
|
|
while (parse_res == simdjson::SUCCESS_AND_HAS_MORE) {
|
|
parse_res = js.json_parse(parser);
|
|
simdjson::document::iterator iter(parser);
|
|
if(!iter.is_object()) {
|
|
printf("Root should be object\n");
|
|
return false;
|
|
}
|
|
if(!iter.down()) {
|
|
printf("Root should not be emtpy\n");
|
|
return false;
|
|
}
|
|
if(!iter.is_string()) {
|
|
printf("Object should start with string key\n");
|
|
return false;
|
|
}
|
|
if(strcmp(iter.get_string(),"id")!=0) {
|
|
printf("There should a single key, id.\n");
|
|
return false;
|
|
}
|
|
iter.move_to_value();
|
|
if(!iter.is_integer()) {
|
|
printf("Value of image should be integer\n");
|
|
return false;
|
|
}
|
|
int64_t keyid = iter.get_integer();
|
|
if(keyid != (int64_t)count) {
|
|
printf("key does not match %d, expected %d\n",(int) keyid, (int) count);
|
|
return false;
|
|
}
|
|
count++;
|
|
}
|
|
if(count != n_records) {
|
|
printf("Something is wrong in stream_test at window size = %zu.\n", i);
|
|
return false;
|
|
}
|
|
}
|
|
printf("ok\n");
|
|
return true;
|
|
}
|
|
|
|
// returns true if successful
|
|
bool stream_test() {
|
|
printf("Running stream_test");
|
|
fflush(NULL);
|
|
const size_t n_records = 10000;
|
|
std::string data;
|
|
char buf[1024];
|
|
for (size_t i = 0; i < n_records; ++i) {
|
|
auto n = sprintf(buf,
|
|
"{\"id\": %zu, \"name\": \"name%zu\", \"gender\": \"%s\", "
|
|
"\"ete\": {\"id\": %zu, \"name\": \"eventail%zu\"}}",
|
|
i, i, (i % 2) ? "homme" : "femme", i % 10, i % 10);
|
|
data += std::string(buf, n);
|
|
}
|
|
for(size_t i = 1000; i < 2000; i += (i>1050?10:1)) {
|
|
printf(".");
|
|
fflush(NULL);
|
|
simdjson::padded_string str(data);
|
|
simdjson::JsonStream<simdjson::padded_string> js{str, i};
|
|
int parse_res = simdjson::SUCCESS_AND_HAS_MORE;
|
|
size_t count = 0;
|
|
simdjson::document::parser parser;
|
|
while (parse_res == simdjson::SUCCESS_AND_HAS_MORE) {
|
|
parse_res = js.json_parse(parser);
|
|
simdjson::document::iterator iter(parser);
|
|
if(!iter.is_object()) {
|
|
printf("Root should be object\n");
|
|
return false;
|
|
}
|
|
if(!iter.down()) {
|
|
printf("Root should not be emtpy\n");
|
|
return false;
|
|
}
|
|
if(!iter.is_string()) {
|
|
printf("Object should start with string key\n");
|
|
return false;
|
|
}
|
|
if(strcmp(iter.get_string(),"id")!=0) {
|
|
printf("There should a single key, id.\n");
|
|
return false;
|
|
}
|
|
iter.move_to_value();
|
|
if(!iter.is_integer()) {
|
|
printf("Value of image should be integer\n");
|
|
return false;
|
|
}
|
|
int64_t keyid = iter.get_integer();
|
|
if(keyid != (int64_t)count) {
|
|
printf("key does not match %d, expected %d\n",(int) keyid, (int) count);
|
|
return false;
|
|
}
|
|
count++;
|
|
}
|
|
if(count != n_records) {
|
|
printf("Something is wrong in stream_test at window size = %zu.\n", i);
|
|
return false;
|
|
}
|
|
}
|
|
printf("ok\n");
|
|
return true;
|
|
}
|
|
|
|
// returns true if successful
|
|
bool skyprophet_test() {
|
|
const size_t n_records = 100000;
|
|
std::vector<std::string> data;
|
|
char buf[1024];
|
|
for (size_t i = 0; i < n_records; ++i) {
|
|
auto n = sprintf(buf,
|
|
"{\"id\": %zu, \"name\": \"name%zu\", \"gender\": \"%s\", "
|
|
"\"school\": {\"id\": %zu, \"name\": \"school%zu\"}}",
|
|
i, i, (i % 2) ? "male" : "female", i % 10, i % 10);
|
|
data.emplace_back(std::string(buf, n));
|
|
}
|
|
for (size_t i = 0; i < n_records; ++i) {
|
|
auto n = sprintf(buf, "{\"counter\": %f, \"array\": [%s]}", i * 3.1416,
|
|
(i % 2) ? "true" : "false");
|
|
data.emplace_back(std::string(buf, n));
|
|
}
|
|
for (size_t i = 0; i < n_records; ++i) {
|
|
auto n = sprintf(buf, "{\"number\": %e}", i * 10000.31321321);
|
|
data.emplace_back(std::string(buf, n));
|
|
}
|
|
data.emplace_back(std::string("true"));
|
|
data.emplace_back(std::string("false"));
|
|
data.emplace_back(std::string("null"));
|
|
data.emplace_back(std::string("0.1"));
|
|
size_t maxsize = 0;
|
|
for (auto &s : data) {
|
|
if (maxsize < s.size())
|
|
maxsize = s.size();
|
|
}
|
|
simdjson::document::parser parser;
|
|
if (!parser.allocate_capacity(maxsize)) {
|
|
printf("allocation failure in skyprophet_test\n");
|
|
return false;
|
|
}
|
|
size_t counter = 0;
|
|
for (auto &rec : data) {
|
|
if ((counter % 10000) == 0) {
|
|
printf(".");
|
|
fflush(NULL);
|
|
}
|
|
counter++;
|
|
auto ok1 = json_parse(rec.c_str(), rec.length(), parser);
|
|
if (ok1 != 0 || !parser.is_valid()) {
|
|
printf("Something is wrong in skyprophet_test: %s.\n", rec.c_str());
|
|
return false;
|
|
}
|
|
auto ok2 = json_parse(rec, parser);
|
|
if (ok2 != 0 || !parser.is_valid()) {
|
|
printf("Something is wrong in skyprophet_test: %s.\n", rec.c_str());
|
|
return false;
|
|
}
|
|
}
|
|
printf("\n");
|
|
return true;
|
|
}
|
|
|
|
int main() {
|
|
std::cout << "Running basic tests." << std::endl;
|
|
if(!json_issue467())
|
|
return EXIT_FAILURE;
|
|
if(!stream_test())
|
|
return EXIT_FAILURE;
|
|
if(!stream_utf8_test())
|
|
return EXIT_FAILURE;
|
|
if(!number_test_small_integers())
|
|
return EXIT_FAILURE;
|
|
if(!stable_test())
|
|
return EXIT_FAILURE;
|
|
if(!bad_example())
|
|
return EXIT_FAILURE;
|
|
if(!number_test_powers_of_two())
|
|
return EXIT_FAILURE;
|
|
if(!number_test_powers_of_ten())
|
|
return EXIT_FAILURE;
|
|
if (!navigate_test())
|
|
return EXIT_FAILURE;
|
|
if (!skyprophet_test())
|
|
return EXIT_FAILURE;
|
|
std::cout << "Basic tests are ok." << std::endl;
|
|
return EXIT_SUCCESS;
|
|
}
|