simdjson/tests/basictests.cpp

1136 lines
38 KiB
C++
Raw Normal View History

#include <cinttypes>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <iostream>
#include <string>
#include <vector>
#include <cmath>
2020-02-25 12:59:38 +08:00
#include <set>
#include <string_view>
#include <sstream>
#include "simdjson.h"
2020-02-25 12:59:38 +08:00
#ifndef JSON_TEST_PATH
#define JSON_TEST_PATH "jsonexamples/twitter.json"
#endif
2019-10-17 05:47:52 +08:00
// ulp distance
// Marc B. Reynolds, 2016-2019
// Public Domain under http://unlicense.org, see link for details.
// adapted by D. Lemire
inline uint64_t f64_ulp_dist(double a, double b) {
uint64_t ua, ub;
memcpy(&ua, &a, sizeof(ua));
memcpy(&ub, &b, sizeof(ub));
if ((int64_t)(ub ^ ua) >= 0)
return (int64_t)(ua - ub) >= 0 ? (ua - ub) : (ub - ua);
return ua + ub + 0x80000000;
}
bool number_test_small_integers() {
char buf[1024];
simdjson::document::parser parser;
for (int m = 10; m < 20; m++) {
for (int i = -1024; i < 1024; i++) {
auto n = sprintf(buf, "%*d", m, i);
buf[n] = '\0';
fflush(NULL);
auto ok1 = json_parse(buf, n, parser);
if (ok1 != 0 || !parser.is_valid()) {
printf("Could not parse '%s': %s\n", buf, simdjson::error_message(ok1).c_str());
return false;
}
simdjson::document::iterator iter(parser);
if(!iter.is_number()) {
printf("Root should be number\n");
return false;
}
if(!iter.is_integer()) {
printf("Root should be an integer\n");
return false;
}
int64_t x = iter.get_integer();
if(x != i) {
printf("failed to parse %s. \n", buf);
return false;
}
}
}
printf("Small integers can be parsed.\n");
return true;
}
2019-10-17 05:47:52 +08:00
bool number_test_powers_of_two() {
char buf[1024];
simdjson::document::parser parser;
2019-10-17 05:47:52 +08:00
int maxulp = 0;
for (int i = -1075; i < 1024; ++i) {// large negative values should be zero.
double expected = pow(2, i);
auto n = sprintf(buf, "%.*e", std::numeric_limits<double>::max_digits10 - 1, expected);
buf[n] = '\0';
fflush(NULL);
auto ok1 = json_parse(buf, n, parser);
if (ok1 != 0 || !parser.is_valid()) {
2019-10-17 05:47:52 +08:00
printf("Could not parse: %s.\n", buf);
return false;
}
simdjson::document::iterator iter(parser);
if(!iter.is_number()) {
2019-10-17 05:47:52 +08:00
printf("Root should be number\n");
return false;
}
if(iter.is_integer()) {
int64_t x = iter.get_integer();
2019-10-17 05:47:52 +08:00
int power = 0;
while(x > 1) {
if((x % 2) != 0) {
printf("failed to parse %s. \n", buf);
return false;
}
x = x / 2;
power ++;
}
if(power != i) {
printf("failed to parse %s. \n", buf);
return false;
}
} else if(iter.is_unsigned_integer()) {
uint64_t x = iter.get_unsigned_integer();
2019-10-17 05:47:52 +08:00
int power = 0;
while(x > 1) {
if((x % 2) != 0) {
printf("failed to parse %s. \n", buf);
return false;
}
x = x / 2;
power ++;
}
if(power != i) {
printf("failed to parse %s. \n", buf);
return false;
}
} else {
double x = iter.get_double();
2019-10-17 05:47:52 +08:00
int ulp = f64_ulp_dist(x,expected);
if(ulp > maxulp) maxulp = ulp;
2019-10-19 05:30:29 +08:00
if(ulp > 3) {
2019-10-17 05:47:52 +08:00
printf("failed to parse %s. ULP = %d i = %d \n", buf, ulp, i);
return false;
}
}
}
printf("Powers of 2 can be parsed, maxulp = %d.\n", maxulp);
return true;
}
bool number_test_powers_of_ten() {
char buf[1024];
simdjson::document::parser parser;
for (int i = -1000000; i <= 308; ++i) {// large negative values should be zero.
auto n = sprintf(buf,"1e%d", i);
buf[n] = '\0';
fflush(NULL);
auto ok1 = json_parse(buf, n, parser);
if (ok1 != 0 || !parser.is_valid()) {
printf("Could not parse: %s.\n", buf);
return false;
}
simdjson::document::iterator iter(parser);
if(!iter.is_number()) {
printf("Root should be number\n");
return false;
}
if(iter.is_integer()) {
int64_t x = iter.get_integer();
int power = 0;
while(x > 1) {
if((x % 10) != 0) {
printf("failed to parse %s. \n", buf);
return false;
}
x = x / 10;
power ++;
}
if(power != i) {
printf("failed to parse %s. \n", buf);
return false;
}
} else if(iter.is_unsigned_integer()) {
uint64_t x = iter.get_unsigned_integer();
int power = 0;
while(x > 1) {
if((x % 10) != 0) {
printf("failed to parse %s. \n", buf);
return false;
}
x = x / 10;
power ++;
}
if(power != i) {
printf("failed to parse %s. \n", buf);
return false;
}
} else {
double x = iter.get_double();
2019-10-25 04:40:40 +08:00
double expected = std::pow(10, i);
2019-10-25 06:27:24 +08:00
int ulp = (int) f64_ulp_dist(x, expected);
if(ulp > 1) {
printf("failed to parse %s. \n", buf);
2019-10-25 06:27:24 +08:00
printf("actual: %.20g expected: %.20g \n", x, expected);
2019-10-25 04:40:40 +08:00
printf("ULP: %d \n", ulp);
return false;
}
}
}
printf("Powers of 10 can be parsed.\n");
return true;
}
// adversarial example that once triggred overruns, see https://github.com/lemire/simdjson/issues/345
bool bad_example() {
std::string badjson = "[7,7,7,7,6,7,7,7,6,7,7,6,[7,7,7,7,6,7,7,7,6,7,7,6,7,7,7,7,7,7,6";
simdjson::document::parser parser = simdjson::build_parsed_json(badjson);
if(parser.is_valid()) {
printf("This json should not be valid %s.\n", badjson.c_str());
return false;
}
return true;
}
2019-12-11 21:13:29 +08:00
// returns true if successful
bool stable_test() {
std::string json = "{"
"\"Image\":{"
"\"Width\":800,"
"\"Height\":600,"
"\"Title\":\"View from 15th Floor\","
"\"Thumbnail\":{"
"\"Url\":\"http://www.example.com/image/481989943\","
"\"Height\":125,"
"\"Width\":100"
"},"
"\"Animated\":false,"
"\"IDs\":[116,943.3,234,38793]"
"}"
"}";
simdjson::document::parser parser = simdjson::build_parsed_json(json);
2019-12-11 21:13:29 +08:00
std::ostringstream myStream;
if( ! parser.print_json(myStream) ) {
2019-12-11 21:13:29 +08:00
std::cout << "cannot print it out? " << std::endl;
return false;
}
std::string newjson = myStream.str();
if(json != newjson) {
std::cout << "serialized json differs!" << std::endl;
std::cout << json << std::endl;
std::cout << newjson << std::endl;
}
return newjson == json;
}
static simdjson::document::stream parse_many_stream_return(simdjson::document::parser &parser, simdjson::padded_string &str) {
return parser.parse_many(str);
}
// this is a compilation test
UNUSED static void parse_many_stream_assign() {
simdjson::document::parser parser;
simdjson::padded_string str("{}",2);
simdjson::document::stream s1 = parse_many_stream_return(parser, str);
}
static bool parse_json_message_issue467(char const* message, std::size_t len, size_t expectedcount) {
simdjson::document::parser parser;
size_t count = 0;
simdjson::padded_string str(message,len);
for (auto [doc, error] : parser.parse_many(str, len)) {
if (error) {
std::cerr << "Failed with simdjson error= " << error << std::endl;
return false;
}
count++;
}
if(count != expectedcount) {
std::cerr << "bad count" << std::endl;
return false;
}
return true;
}
bool json_issue467() {
std::cout << "Running " << __func__ << std::endl;
const char * single_message = "{\"error\":[],\"result\":{\"token\":\"xxx\"}}";
const char* two_messages = "{\"error\":[],\"result\":{\"token\":\"xxx\"}}{\"error\":[],\"result\":{\"token\":\"xxx\"}}";
if(!parse_json_message_issue467(single_message, strlen(single_message),1)) {
return false;
}
if(!parse_json_message_issue467(two_messages, strlen(two_messages),2)) {
return false;
}
return true;
}
// returns true if successful
bool navigate_test() {
std::cout << "Running " << __func__ << std::endl;
std::string json = "{"
"\"Image\": {"
"\"Width\": 800,"
"\"Height\": 600,"
"\"Title\": \"View from 15th Floor\","
"\"Thumbnail\": {"
" \"Url\": \"http://www.example.com/image/481989943\","
" \"Height\": 125,"
" \"Width\": 100"
"},"
"\"Animated\" : false,"
"\"IDs\": [116, 943, 234, 38793]"
"}"
"}";
simdjson::document::parser parser = simdjson::build_parsed_json(json);
if (!parser.is_valid()) {
printf("Something is wrong in navigate: %s.\n", json.c_str());
return false;
}
simdjson::document::iterator iter(parser);
if(!iter.is_object()) {
printf("Root should be object\n");
return false;
}
if(iter.move_to_key("bad key")) {
2020-01-09 22:55:54 +08:00
printf("We should not move to a non-existing key\n");
return false;
}
if(!iter.is_object()) {
2020-01-09 22:55:54 +08:00
printf("We should have remained at the object.\n");
return false;
}
if(iter.move_to_key_insensitive("bad key")) {
2020-01-09 22:55:54 +08:00
printf("We should not move to a non-existing key\n");
return false;
}
if(!iter.is_object()) {
2020-01-09 22:55:54 +08:00
printf("We should have remained at the object.\n");
return false;
}
if(iter.move_to_key("bad key", 7)) {
2020-01-09 22:55:54 +08:00
printf("We should not move to a non-existing key\n");
return false;
}
if(!iter.is_object()) {
2020-01-09 22:55:54 +08:00
printf("We should have remained at the object.\n");
return false;
}
if(!iter.down()) {
printf("Root should not be emtpy\n");
return false;
}
if(!iter.is_string()) {
printf("Object should start with string key\n");
return false;
}
if(iter.prev()) {
printf("We should not be able to go back from the start of the scope.\n");
return false;
}
if(strcmp(iter.get_string(),"Image")!=0) {
printf("There should be a single key, image.\n");
return false;
}
iter.move_to_value();
if(!iter.is_object()) {
printf("Value of image should be object\n");
return false;
}
if(!iter.down()) {
printf("Image key should not be emtpy\n");
return false;
}
if(!iter.next()) {
printf("key should have a value\n");
return false;
}
if(!iter.prev()) {
printf("We should go back to the key.\n");
return false;
}
if(strcmp(iter.get_string(),"Width")!=0) {
printf("There should be a key Width.\n");
return false;
}
if(!iter.up()) {
2020-01-09 22:55:54 +08:00
return false;
}
if(!iter.move_to_key("IDs")) {
2020-01-09 22:55:54 +08:00
printf("We should be able to move to an existing key\n");
return false;
}
if(!iter.is_array()) {
printf("Value of IDs should be array, it is %c \n", iter.get_type());
2020-01-09 22:55:54 +08:00
return false;
}
if(iter.move_to_index(4)) {
2020-01-09 22:55:54 +08:00
printf("We should not be able to move to a non-existing index\n");
return false;
}
if(!iter.is_array()) {
2020-01-09 22:55:54 +08:00
printf("We should have remained at the array\n");
return false;
}
return true;
}
// returns true if successful
2020-03-05 00:42:29 +08:00
bool JsonStream_utf8_test() {
std::cout << "Running " << __func__ << std::endl;
fflush(NULL);
const size_t n_records = 10000;
std::string data;
char buf[1024];
for (size_t i = 0; i < n_records; ++i) {
auto n = sprintf(buf,
"{\"id\": %zu, \"name\": \"name%zu\", \"gender\": \"%s\", "
"\"été\": {\"id\": %zu, \"name\": \"éventail%zu\"}}",
i, i, (i % 2) ? "" : "", i % 10, i % 10);
data += std::string(buf, n);
}
2020-03-06 03:05:37 +08:00
const size_t batch_size = 1000;
printf(".");
fflush(NULL);
simdjson::padded_string str(data);
simdjson::JsonStream<simdjson::padded_string> js{str, batch_size};
int parse_res = simdjson::SUCCESS_AND_HAS_MORE;
size_t count = 0;
simdjson::document::parser parser;
while (parse_res == simdjson::SUCCESS_AND_HAS_MORE) {
parse_res = js.json_parse(parser);
simdjson::document::iterator iter(parser);
if(!iter.is_object()) {
printf("Root should be object\n");
return false;
}
2020-03-06 03:05:37 +08:00
if(!iter.down()) {
printf("Root should not be emtpy\n");
return false;
}
2020-03-06 03:05:37 +08:00
if(!iter.is_string()) {
printf("Object should start with string key\n");
return false;
}
if(strcmp(iter.get_string(),"id")!=0) {
printf("There should a single key, id.\n");
return false;
}
iter.move_to_value();
if(!iter.is_integer()) {
printf("Value of image should be integer\n");
return false;
}
int64_t keyid = iter.get_integer();
if(keyid != (int64_t)count) {
printf("key does not match %d, expected %d\n",(int) keyid, (int) count);
return false;
}
count++;
}
if(count != n_records) {
printf("Something is wrong in JsonStream_utf8_test at window size = %zu.\n", batch_size);
return false;
}
printf("ok\n");
return true;
}
// returns true if successful
2020-03-05 00:42:29 +08:00
bool JsonStream_test() {
std::cout << "Running " << __func__ << std::endl;
fflush(NULL);
const size_t n_records = 10000;
std::string data;
char buf[1024];
for (size_t i = 0; i < n_records; ++i) {
auto n = sprintf(buf,
"{\"id\": %zu, \"name\": \"name%zu\", \"gender\": \"%s\", "
"\"ete\": {\"id\": %zu, \"name\": \"eventail%zu\"}}",
i, i, (i % 2) ? "homme" : "femme", i % 10, i % 10);
data += std::string(buf, n);
}
2020-03-06 03:05:37 +08:00
const size_t batch_size = 1000;
printf(".");
fflush(NULL);
simdjson::padded_string str(data);
simdjson::JsonStream<simdjson::padded_string> js{str, batch_size};
int parse_res = simdjson::SUCCESS_AND_HAS_MORE;
size_t count = 0;
simdjson::document::parser parser;
while (parse_res == simdjson::SUCCESS_AND_HAS_MORE) {
parse_res = js.json_parse(parser);
simdjson::document::iterator iter(parser);
if(!iter.is_object()) {
printf("Root should be object\n");
return false;
}
if(!iter.down()) {
printf("Root should not be emtpy\n");
return false;
}
if(!iter.is_string()) {
printf("Object should start with string key\n");
return false;
}
if(strcmp(iter.get_string(),"id")!=0) {
printf("There should a single key, id.\n");
return false;
}
iter.move_to_value();
if(!iter.is_integer()) {
printf("Value of image should be integer\n");
return false;
}
int64_t keyid = iter.get_integer();
if(keyid != (int64_t)count) {
printf("key does not match %d, expected %d\n",(int) keyid, (int) count);
return false;
}
count++;
}
if(count != n_records) {
printf("Something is wrong in JsonStream_test at window size = %zu.\n", batch_size);
return false;
}
printf("ok\n");
return true;
}
// returns true if successful
bool document_stream_test() {
std::cout << "Running " << __func__ << std::endl;
2020-03-06 03:05:37 +08:00
fflush(NULL);
const size_t n_records = 10000;
std::string data;
char buf[1024];
for (size_t i = 0; i < n_records; ++i) {
auto n = sprintf(buf,
"{\"id\": %zu, \"name\": \"name%zu\", \"gender\": \"%s\", "
"\"ete\": {\"id\": %zu, \"name\": \"eventail%zu\"}}",
i, i, (i % 2) ? "homme" : "femme", i % 10, i % 10);
data += std::string(buf, n);
}
2020-03-05 00:42:29 +08:00
for(size_t batch_size = 1000; batch_size < 2000; batch_size += (batch_size>1050?10:1)) {
printf(".");
fflush(NULL);
simdjson::padded_string str(data);
simdjson::document::parser parser;
2020-03-06 03:05:37 +08:00
size_t count = 0;
for (auto [doc, error] : parser.parse_many(str, batch_size)) {
if (error) {
printf("Error at on document %zd at batch size %zu: %s\n", count, batch_size, simdjson::error_message(error));
return false;
}
2020-03-06 03:05:37 +08:00
auto [keyid, error2] = doc["id"].as_int64_t();
if (error2) {
printf("Error getting id as int64 on document %zd at batch size %zu: %s\n", count, batch_size, simdjson::error_message(error2));
return false;
}
2020-03-06 03:05:37 +08:00
if (keyid != int64_t(count)) {
2020-03-14 02:44:21 +08:00
printf("key does not match %" PRId64 ", expected %zd on document %zd at batch size %zu\n", keyid, count, count, batch_size);
return false;
}
2020-03-06 03:05:37 +08:00
count++;
}
if(count != n_records) {
2020-03-06 03:05:37 +08:00
printf("Found wrong number of documents %zd, expected %zd at batch size %zu\n", count, n_records, batch_size);
2020-03-05 00:42:29 +08:00
return false;
}
}
printf("ok\n");
return true;
}
// returns true if successful
2020-03-06 03:05:37 +08:00
bool document_stream_utf8_test() {
std::cout << "Running " << __func__ << std::endl;
2020-03-05 00:42:29 +08:00
fflush(NULL);
const size_t n_records = 10000;
std::string data;
char buf[1024];
for (size_t i = 0; i < n_records; ++i) {
auto n = sprintf(buf,
"{\"id\": %zu, \"name\": \"name%zu\", \"gender\": \"%s\", "
2020-03-06 03:05:37 +08:00
"\"été\": {\"id\": %zu, \"name\": \"éventail%zu\"}}",
i, i, (i % 2) ? "" : "", i % 10, i % 10);
2020-03-05 00:42:29 +08:00
data += std::string(buf, n);
}
for(size_t batch_size = 1000; batch_size < 2000; batch_size += (batch_size>1050?10:1)) {
printf(".");
fflush(NULL);
simdjson::padded_string str(data);
simdjson::document::parser parser;
size_t count = 0;
for (auto [doc, error] : parser.parse_many(str, batch_size)) {
if (error) {
printf("Error at on document %zd at batch size %zu: %s\n", count, batch_size, simdjson::error_message(error));
2020-03-05 00:42:29 +08:00
return false;
}
auto [keyid, error2] = doc["id"].as_int64_t();
if (error2) {
printf("Error getting id as int64 on document %zd at batch size %zu: %s\n", count, batch_size, simdjson::error_message(error2));
2020-03-05 00:42:29 +08:00
return false;
}
if (keyid != int64_t(count)) {
2020-03-14 02:44:21 +08:00
printf("key does not match %" PRId64 ", expected %zd on document %zd at batch size %zu\n", keyid, count, count, batch_size);
2020-03-05 00:42:29 +08:00
return false;
}
count++;
}
if(count != n_records) {
printf("Found wrong number of documents %zd, expected %zd at batch size %zu\n", count, n_records, batch_size);
return false;
}
}
printf("ok\n");
return true;
}
// returns true if successful
bool skyprophet_test() {
std::cout << "Running " << __func__ << std::endl;
const size_t n_records = 100000;
std::vector<std::string> data;
char buf[1024];
for (size_t i = 0; i < n_records; ++i) {
auto n = sprintf(buf,
"{\"id\": %zu, \"name\": \"name%zu\", \"gender\": \"%s\", "
"\"school\": {\"id\": %zu, \"name\": \"school%zu\"}}",
i, i, (i % 2) ? "male" : "female", i % 10, i % 10);
data.emplace_back(std::string(buf, n));
}
for (size_t i = 0; i < n_records; ++i) {
auto n = sprintf(buf, "{\"counter\": %f, \"array\": [%s]}", i * 3.1416,
(i % 2) ? "true" : "false");
data.emplace_back(std::string(buf, n));
}
for (size_t i = 0; i < n_records; ++i) {
auto n = sprintf(buf, "{\"number\": %e}", i * 10000.31321321);
data.emplace_back(std::string(buf, n));
}
data.emplace_back(std::string("true"));
data.emplace_back(std::string("false"));
data.emplace_back(std::string("null"));
data.emplace_back(std::string("0.1"));
size_t maxsize = 0;
for (auto &s : data) {
if (maxsize < s.size())
maxsize = s.size();
}
simdjson::document::parser parser;
size_t counter = 0;
for (auto &rec : data) {
if ((counter % 10000) == 0) {
printf(".");
fflush(NULL);
}
counter++;
auto ok1 = json_parse(rec.c_str(), rec.length(), parser);
if (ok1 != 0 || !parser.is_valid()) {
printf("Something is wrong in skyprophet_test: %s.\n", rec.c_str());
return false;
}
auto ok2 = json_parse(rec, parser);
if (ok2 != 0 || !parser.is_valid()) {
printf("Something is wrong in skyprophet_test: %s.\n", rec.c_str());
return false;
}
}
printf("\n");
return true;
}
2020-02-25 12:59:38 +08:00
namespace dom_api {
using namespace std;
using namespace simdjson;
bool object_iterator() {
std::cout << "Running " << __func__ << std::endl;
2020-02-25 12:59:38 +08:00
string json(R"({ "a": 1, "b": 2, "c": 3 })");
const char* expected_key[] = { "a", "b", "c" };
uint64_t expected_value[] = { 1, 2, 3 };
int i = 0;
document doc = document::parse(json);
for (auto [key, value] : document::object(doc)) {
if (key != expected_key[i] || uint64_t(value) != expected_value[i]) { cerr << "Expected " << expected_key[i] << " = " << expected_value[i] << ", got " << key << "=" << uint64_t(value) << endl; return false; }
i++;
}
if (i*sizeof(uint64_t) != sizeof(expected_value)) { cout << "Expected " << sizeof(expected_value) << " values, got " << i << endl; return false; }
return true;
}
bool array_iterator() {
std::cout << "Running " << __func__ << std::endl;
2020-02-25 12:59:38 +08:00
string json(R"([ 1, 10, 100 ])");
uint64_t expected_value[] = { 1, 10, 100 };
int i=0;
document doc = document::parse(json);
for (uint64_t value : doc.as_array()) {
if (value != expected_value[i]) { cerr << "Expected " << expected_value[i] << ", got " << value << endl; return false; }
i++;
}
if (i*sizeof(uint64_t) != sizeof(expected_value)) { cout << "Expected " << sizeof(expected_value) << " values, got " << i << endl; return false; }
return true;
}
bool object_iterator_empty() {
std::cout << "Running " << __func__ << std::endl;
2020-02-25 12:59:38 +08:00
string json(R"({})");
int i = 0;
document doc = document::parse(json);
for (auto [key, value] : doc.as_object()) {
cout << "Unexpected " << key << " = " << uint64_t(value) << endl;
i++;
}
if (i > 0) { cout << "Expected 0 values, got " << i << endl; return false; }
return true;
}
bool array_iterator_empty() {
std::cout << "Running " << __func__ << std::endl;
2020-02-25 12:59:38 +08:00
string json(R"([])");
int i=0;
document doc = document::parse(json);
for (uint64_t value : doc.as_array()) {
cout << "Unexpected value " << value << endl;
i++;
}
if (i > 0) { cout << "Expected 0 values, got " << i << endl; return false; }
return true;
}
bool string_value() {
std::cout << "Running " << __func__ << std::endl;
2020-02-25 12:59:38 +08:00
string json(R"([ "hi", "has backslash\\" ])");
document doc = document::parse(json);
auto val = document::array(doc).begin();
if (strcmp((const char*)*val, "hi")) { cerr << "Expected const char*(\"hi\") to be \"hi\", was " << (const char*)*val << endl; return false; }
if (string_view(*val) != "hi") { cerr << "Expected string_view(\"hi\") to be \"hi\", was " << string_view(*val) << endl; return false; }
++val;
if (strcmp((const char*)*val, "has backslash\\")) { cerr << "Expected const char*(\"has backslash\\\\\") to be \"has backslash\\\", was " << (const char*)*val << endl; return false; }
if (string_view(*val) != "has backslash\\") { cerr << "Expected string_view(\"has backslash\\\\\") to be \"has backslash\\\", was " << string_view(*val) << endl; return false; }
return true;
}
bool numeric_values() {
std::cout << "Running " << __func__ << std::endl;
2020-02-25 12:59:38 +08:00
string json(R"([ 0, 1, -1, 1.1 ])");
document doc = document::parse(json);
auto val = document::array(doc).begin();
if (uint64_t(*val) != 0) { cerr << "Expected uint64_t(0) to be 0, was " << uint64_t(*val) << endl; return false; }
if (int64_t(*val) != 0) { cerr << "Expected int64_t(0) to be 0, was " << int64_t(*val) << endl; return false; }
if (double(*val) != 0) { cerr << "Expected double(0) to be 0, was " << double(*val) << endl; return false; }
++val;
if (uint64_t(*val) != 1) { cerr << "Expected uint64_t(1) to be 1, was " << uint64_t(*val) << endl; return false; }
if (int64_t(*val) != 1) { cerr << "Expected int64_t(1) to be 1, was " << int64_t(*val) << endl; return false; }
if (double(*val) != 1) { cerr << "Expected double(1) to be 1, was " << double(*val) << endl; return false; }
++val;
if (int64_t(*val) != -1) { cerr << "Expected int64_t(-1) to be -1, was " << int64_t(*val) << endl; return false; }
if (double(*val) != -1) { cerr << "Expected double(-1) to be -1, was " << double(*val) << endl; return false; }
++val;
if (double(*val) != 1.1) { cerr << "Expected double(1.1) to be 1.1, was " << double(*val) << endl; return false; }
return true;
}
bool boolean_values() {
std::cout << "Running " << __func__ << std::endl;
2020-02-25 12:59:38 +08:00
string json(R"([ true, false ])");
document doc = document::parse(json);
auto val = document::array(doc).begin();
if (bool(*val) != true) { cerr << "Expected bool(true) to be true, was " << bool(*val) << endl; return false; }
++val;
if (bool(*val) != false) { cerr << "Expected bool(false) to be false, was " << bool(*val) << endl; return false; }
return true;
}
bool null_value() {
std::cout << "Running " << __func__ << std::endl;
2020-02-25 12:59:38 +08:00
string json(R"([ null ])");
document doc = document::parse(json);
auto val = document::array(doc).begin();
if (!(*val).is_null()) { cerr << "Expected null to be null!" << endl; return false; }
return true;
}
bool document_object_index() {
std::cout << "Running " << __func__ << std::endl;
2020-02-25 12:59:38 +08:00
string json(R"({ "a": 1, "b": 2, "c": 3})");
document doc = document::parse(json);
if (uint64_t(doc["a"]) != 1) { cerr << "Expected uint64_t(doc[\"a\"]) to be 1, was " << uint64_t(doc["a"]) << endl; return false; }
if (uint64_t(doc["b"]) != 2) { cerr << "Expected uint64_t(doc[\"b\"]) to be 2, was " << uint64_t(doc["b"]) << endl; return false; }
if (uint64_t(doc["c"]) != 3) { cerr << "Expected uint64_t(doc[\"c\"]) to be 3, was " << uint64_t(doc["c"]) << endl; return false; }
// Check all three again in backwards order, to ensure we can go backwards
if (uint64_t(doc["c"]) != 3) { cerr << "Expected uint64_t(doc[\"c\"]) to be 3, was " << uint64_t(doc["c"]) << endl; return false; }
if (uint64_t(doc["b"]) != 2) { cerr << "Expected uint64_t(doc[\"b\"]) to be 2, was " << uint64_t(doc["b"]) << endl; return false; }
if (uint64_t(doc["a"]) != 1) { cerr << "Expected uint64_t(doc[\"a\"]) to be 1, was " << uint64_t(doc["a"]) << endl; return false; }
auto [val, error] = doc["d"];
2020-03-07 04:14:23 +08:00
if (error != simdjson::NO_SUCH_FIELD) { cerr << "Expected NO_SUCH_FIELD error for uint64_t(doc[\"d\"]), got " << error << endl; return false; }
2020-02-25 12:59:38 +08:00
return true;
}
bool object_index() {
std::cout << "Running " << __func__ << std::endl;
2020-02-25 12:59:38 +08:00
string json(R"({ "obj": { "a": 1, "b": 2, "c": 3 } })");
document doc = document::parse(json);
if (uint64_t(doc["obj"]["a"]) != 1) { cerr << "Expected uint64_t(doc[\"obj\"][\"a\"]) to be 1, was " << uint64_t(doc["obj"]["a"]) << endl; return false; }
document::object obj = doc["obj"];
if (uint64_t(obj["a"]) != 1) { cerr << "Expected uint64_t(obj[\"a\"]) to be 1, was " << uint64_t(obj["a"]) << endl; return false; }
if (uint64_t(obj["b"]) != 2) { cerr << "Expected uint64_t(obj[\"b\"]) to be 2, was " << uint64_t(obj["b"]) << endl; return false; }
if (uint64_t(obj["c"]) != 3) { cerr << "Expected uint64_t(obj[\"c\"]) to be 3, was " << uint64_t(obj["c"]) << endl; return false; }
// Check all three again in backwards order, to ensure we can go backwards
if (uint64_t(obj["c"]) != 3) { cerr << "Expected uint64_t(obj[\"c\"]) to be 3, was " << uint64_t(obj["c"]) << endl; return false; }
if (uint64_t(obj["b"]) != 2) { cerr << "Expected uint64_t(obj[\"b\"]) to be 2, was " << uint64_t(obj["b"]) << endl; return false; }
if (uint64_t(obj["a"]) != 1) { cerr << "Expected uint64_t(obj[\"a\"]) to be 1, was " << uint64_t(obj["a"]) << endl; return false; }
auto [val, error] = obj["d"];
2020-03-07 04:14:23 +08:00
if (error != simdjson::NO_SUCH_FIELD) { cerr << "Expected NO_SUCH_FIELD error for uint64_t(obj[\"d\"]), got " << error << endl; return false; }
2020-02-25 12:59:38 +08:00
return true;
}
bool twitter_count() {
std::cout << "Running " << __func__ << std::endl;
2020-02-25 12:59:38 +08:00
// Prints the number of results in twitter.json
document doc = document::load(JSON_TEST_PATH);
2020-02-25 12:59:38 +08:00
uint64_t result_count = doc["search_metadata"]["count"];
if (result_count != 100) { cerr << "Expected twitter.json[metadata_count][count] = 100, got " << result_count << endl; return false; }
return true;
}
bool twitter_default_profile() {
std::cout << "Running " << __func__ << std::endl;
2020-02-25 12:59:38 +08:00
// Print users with a default profile.
set<string_view> default_users;
document doc = document::load(JSON_TEST_PATH);
2020-02-25 12:59:38 +08:00
for (document::object tweet : doc["statuses"].as_array()) {
document::object user = tweet["user"];
if (user["default_profile"]) {
default_users.insert(user["screen_name"]);
}
}
if (default_users.size() != 86) { cerr << "Expected twitter.json[statuses][user] to contain 86 default_profile users, got " << default_users.size() << endl; return false; }
return true;
}
bool twitter_image_sizes() {
std::cout << "Running " << __func__ << std::endl;
2020-02-25 12:59:38 +08:00
// Print image names and sizes
set<tuple<uint64_t, uint64_t>> image_sizes;
document doc = document::load(JSON_TEST_PATH);
2020-02-25 12:59:38 +08:00
for (document::object tweet : doc["statuses"].as_array()) {
auto [media, not_found] = tweet["entities"]["media"];
if (!not_found) {
for (document::object image : media.as_array()) {
for (auto [key, size] : image["sizes"].as_object()) {
image_sizes.insert({ size["w"], size["h"] });
}
}
}
}
if (image_sizes.size() != 15) { cerr << "Expected twitter.json[statuses][entities][media][sizes] to contain 15 different sizes, got " << image_sizes.size() << endl; return false; }
return true;
}
bool run_tests() {
if (!object_iterator()) { return false; }
if (!array_iterator()) { return false; }
if (!object_iterator_empty()) { return false; }
if (!array_iterator_empty()) { return false; }
if (!string_value()) { return false; }
if (!numeric_values()) { return false; }
if (!boolean_values()) { return false; }
if (!null_value()) { return false; }
if (!document_object_index()) { return false; }
if (!object_index()) { return false; }
if (!twitter_count()) { return false; }
if (!twitter_default_profile()) { return false; }
if (!twitter_image_sizes()) { return false; }
return true;
}
}
namespace format_tests {
using namespace simdjson;
using namespace std;
const padded_string DOCUMENT(string(R"({ "foo" : 1, "bar" : [ 1, 2, 3 ], "baz": { "a": 1, "b": 2, "c": 3 } })"));
const string MINIFIED(R"({"foo":1,"bar":[1,2,3],"baz":{"a":1,"b":2,"c":3}})");
bool assert_minified(ostringstream &actual, const std::string &expected=MINIFIED) {
if (actual.str() != expected) {
cerr << "Failed to correctly minify " << DOCUMENT.data() << endl;
cerr << "Expected: " << expected << endl;
cerr << "Actual: " << actual.str() << endl;
return false;
}
return true;
}
bool print_document_parse() {
std::cout << "Running " << __func__ << std::endl;
ostringstream s;
s << document::parse(DOCUMENT);
return assert_minified(s);
}
bool print_minify_document_parse() {
std::cout << "Running " << __func__ << std::endl;
ostringstream s;
s << minify(document::parse(DOCUMENT));
return assert_minified(s);
}
bool print_parser_parse() {
std::cout << "Running " << __func__ << std::endl;
document::parser parser;
if (!parser.allocate_capacity(DOCUMENT.length())) { cerr << "Couldn't allocate!" << endl; return false; }
ostringstream s;
s << parser.parse(DOCUMENT);
return assert_minified(s);
}
bool print_minify_parser_parse() {
std::cout << "Running " << __func__ << std::endl;
document::parser parser;
if (!parser.allocate_capacity(DOCUMENT.length())) { cerr << "Couldn't allocate!" << endl; return false; }
ostringstream s;
s << minify(parser.parse(DOCUMENT));
return assert_minified(s);
}
bool print_document() {
std::cout << "Running " << __func__ << std::endl;
document doc = document::parse(DOCUMENT);
ostringstream s;
s << doc;
return assert_minified(s);
}
bool print_minify_document() {
std::cout << "Running " << __func__ << std::endl;
document doc = document::parse(DOCUMENT);
ostringstream s;
s << minify(doc);
return assert_minified(s);
}
bool print_document_ref() {
std::cout << "Running " << __func__ << std::endl;
document::parser parser;
if (!parser.allocate_capacity(DOCUMENT.length())) { cerr << "Couldn't allocate!" << endl; return false; }
const document &doc_ref = parser.parse(DOCUMENT);
ostringstream s;
s << doc_ref;
return assert_minified(s);
}
bool print_minify_document_ref() {
std::cout << "Running " << __func__ << std::endl;
document::parser parser;
if (!parser.allocate_capacity(DOCUMENT.length())) { cerr << "Couldn't allocate!" << endl; return false; }
const document &doc_ref = parser.parse(DOCUMENT);
ostringstream s;
s << minify(doc_ref);
return assert_minified(s);
}
bool print_element_result() {
std::cout << "Running " << __func__ << std::endl;
const document &doc = document::parse(DOCUMENT);
ostringstream s;
s << doc["foo"];
return assert_minified(s, "1");
}
bool print_minify_element_result() {
std::cout << "Running " << __func__ << std::endl;
const document &doc = document::parse(DOCUMENT);
ostringstream s;
s << minify(doc["foo"]);
return assert_minified(s, "1");
}
bool print_element() {
std::cout << "Running " << __func__ << std::endl;
const document &doc = document::parse(DOCUMENT);
document::element value = doc["foo"];
ostringstream s;
s << value;
return assert_minified(s, "1");
}
bool print_minify_element() {
std::cout << "Running " << __func__ << std::endl;
const document &doc = document::parse(DOCUMENT);
document::element value = doc["foo"];
ostringstream s;
s << minify(value);
return assert_minified(s, "1");
}
bool print_array_result() {
std::cout << "Running " << __func__ << std::endl;
const document &doc = document::parse(DOCUMENT);
ostringstream s;
s << doc["bar"].as_array();
return assert_minified(s, "[1,2,3]");
}
bool print_minify_array_result() {
std::cout << "Running " << __func__ << std::endl;
const document &doc = document::parse(DOCUMENT);
ostringstream s;
s << minify(doc["bar"].as_array());
return assert_minified(s, "[1,2,3]");
}
bool print_array() {
std::cout << "Running " << __func__ << std::endl;
const document &doc = document::parse(DOCUMENT);
document::array value = doc["bar"];
ostringstream s;
s << value;
return assert_minified(s, "[1,2,3]");
}
bool print_minify_array() {
std::cout << "Running " << __func__ << std::endl;
const document &doc = document::parse(DOCUMENT);
document::array value = doc["bar"];
ostringstream s;
s << minify(value);
return assert_minified(s, "[1,2,3]");
}
bool print_object_result() {
std::cout << "Running " << __func__ << std::endl;
const document &doc = document::parse(DOCUMENT);
ostringstream s;
s << doc["baz"].as_object();
return assert_minified(s, R"({"a":1,"b":2,"c":3})");
}
bool print_minify_object_result() {
std::cout << "Running " << __func__ << std::endl;
const document &doc = document::parse(DOCUMENT);
ostringstream s;
s << minify(doc["baz"].as_object());
return assert_minified(s, R"({"a":1,"b":2,"c":3})");
}
bool print_object() {
std::cout << "Running " << __func__ << std::endl;
const document &doc = document::parse(DOCUMENT);
document::object value = doc["baz"];
ostringstream s;
s << value;
return assert_minified(s, R"({"a":1,"b":2,"c":3})");
}
bool print_minify_object() {
std::cout << "Running " << __func__ << std::endl;
const document &doc = document::parse(DOCUMENT);
document::object value = doc["baz"];
ostringstream s;
s << minify(value);
return assert_minified(s, R"({"a":1,"b":2,"c":3})");
}
bool run_tests() {
return print_document_parse() && print_minify_document_parse() &&
print_parser_parse() && print_minify_parser_parse() &&
print_document() && print_minify_document() &&
print_document_ref() && print_minify_document_ref() &&
print_element_result() && print_minify_element_result() &&
print_array_result() && print_minify_array_result() &&
print_object_result() && print_minify_object_result() &&
print_element() && print_minify_element() &&
print_array() && print_minify_array() &&
print_object() && print_minify_object();
}
}
bool error_messages_in_correct_order() {
std::cout << "Running " << __func__ << std::endl;
using namespace simdjson;
using namespace simdjson::internal;
using namespace std;
if ((sizeof(error_codes)/sizeof(error_code_info)) != NUM_ERROR_CODES) {
cerr << "error_codes does not have all codes in error_code enum (or too many)" << endl;
return false;
}
for (int i=0; i<NUM_ERROR_CODES; i++) {
if (error_codes[i].code != i) {
cerr << "Error " << int(error_codes[i].code) << " at wrong position (" << i << "): " << error_codes[i].message << endl;
return false;
}
}
return true;
}
2020-03-14 04:07:05 +08:00
bool lots_of_brackets() {
std::string input;
for(size_t i = 0; i < 1000; i++) {
input += "[";
}
for(size_t i = 0; i < 1000; i++) {
input += "]";
}
auto [doc, error] = simdjson::document::parse(input);
if (error) { std::cerr << "Error: " << simdjson::error_message(error) << std::endl; return false; }
std::cout << doc;
std::cout << std::endl;
return true;
}
int main() {
// this is put here deliberately to check that the documentation is correct (README),
// should this fail to compile, you should update the documentation:
if (simdjson::active_implementation->name() == "unsupported") {
printf("unsupported CPU\n");
}
std::cout << "Running basic tests." << std::endl;
2020-03-14 04:07:05 +08:00
if(!lots_of_brackets())
return EXIT_FAILURE;
if(!json_issue467())
return EXIT_FAILURE;
if(!number_test_small_integers())
return EXIT_FAILURE;
2019-12-11 21:13:29 +08:00
if(!stable_test())
return EXIT_FAILURE;
if(!bad_example())
return EXIT_FAILURE;
2019-10-17 05:47:52 +08:00
if(!number_test_powers_of_two())
return EXIT_FAILURE;
if(!number_test_powers_of_ten())
return EXIT_FAILURE;
if (!navigate_test())
return EXIT_FAILURE;
if (!skyprophet_test())
return EXIT_FAILURE;
2020-02-25 12:59:38 +08:00
if (!dom_api::run_tests())
return EXIT_FAILURE;
if (!format_tests::run_tests())
return EXIT_FAILURE;
2020-03-06 03:05:37 +08:00
if(!document_stream_test())
return EXIT_FAILURE;
if(!document_stream_utf8_test())
return EXIT_FAILURE;
if(!JsonStream_test())
return EXIT_FAILURE;
if(!JsonStream_utf8_test())
return EXIT_FAILURE;
if(!error_messages_in_correct_order())
return EXIT_FAILURE;
std::cout << "Basic tests are ok." << std::endl;
return EXIT_SUCCESS;
}