Returning basictests to its original function: basic tests (only) (#1010)

* The initial motivation behind basictests was for a quick set of sanity tests to check whether your code made sense. It
was not meant for thorough testing to find corner cases. However, over time, it grew to include such expensive tests.
This PR takes them out. It also allows us to bring back basictests to MinGW tests, since it is now cheap.

This is not an exercise in software engineering and making things prettier. This is a pragmatic change to improve our
test coverage and quality of life.

* Adds many more cheap tests.

Co-authored-by: Daniel Lemire <lemire@gmai.com>
This commit is contained in:
Daniel Lemire 2020-07-13 09:39:35 -04:00 committed by GitHub
parent a2f0933d01
commit 039d82ff1b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 483 additions and 395 deletions

View File

@ -50,9 +50,5 @@ jobs:
mkdir build32 mkdir build32
cd build32 cd build32
cmake -DSIMDJSON_BUILD_STATIC=ON -DSIMDJSON_COMPETITION=OFF -DSIMDJSON_GOOGLE_BENCHMARKS=OFF -DSIMDJSON_ENABLE_THREADS=OFF .. cmake -DSIMDJSON_BUILD_STATIC=ON -DSIMDJSON_COMPETITION=OFF -DSIMDJSON_GOOGLE_BENCHMARKS=OFF -DSIMDJSON_ENABLE_THREADS=OFF ..
cmake --build . --target basictests numberparsingcheck stringparsingcheck errortests integer_tests pointercheck --verbose cmake --build . --target parse_many_test jsoncheck basictests numberparsingcheck stringparsingcheck errortests integer_tests pointercheck --verbose
ctest . -R stringparsingcheck --output-on-failure ctest . -R "(parse_many_test|jsoncheck|basictests|stringparsingcheck|numberparsingcheck|errortests|integer_tests|pointercheck)" --output-on-failure
ctest . -R numberparsingcheck --output-on-failure
ctest . -R errortests --output-on-failure
ctest . -R integer_tests --output-on-failure
ctest . -R pointercheck --output-on-failure

View File

@ -51,10 +51,6 @@ jobs:
mkdir build64 mkdir build64
cd build64 cd build64
cmake -DSIMDJSON_BUILD_STATIC=ON -DSIMDJSON_COMPETITION=OFF -DSIMDJSON_GOOGLE_BENCHMARKS=OFF -DSIMDJSON_ENABLE_THREADS=OFF .. cmake -DSIMDJSON_BUILD_STATIC=ON -DSIMDJSON_COMPETITION=OFF -DSIMDJSON_GOOGLE_BENCHMARKS=OFF -DSIMDJSON_ENABLE_THREADS=OFF ..
cmake --build . --target basictests numberparsingcheck stringparsingcheck errortests integer_tests pointercheck --verbose cmake --build . --target parse_many_test jsoncheck basictests numberparsingcheck stringparsingcheck errortests integer_tests pointercheck --verbose
ctest . -R stringparsingcheck --output-on-failure ctest . -R "(parse_many_test|jsoncheck|basictests|stringparsingcheck|numberparsingcheck|errortests|integer_tests|pointercheck)" --output-on-failure
ctest . -R numberparsingcheck --output-on-failure
ctest . -R errortests --output-on-failure
ctest . -R integer_tests --output-on-failure
ctest . -R pointercheck --output-on-failure

View File

@ -52,6 +52,8 @@ target_compile_definitions(stringparsingcheck PRIVATE NOMINMAX)
# All remaining tests link with simdjson proper # All remaining tests link with simdjson proper
link_libraries(simdjson) link_libraries(simdjson)
add_cpp_test(basictests LABELS acceptance per_implementation) add_cpp_test(basictests LABELS acceptance per_implementation)
add_cpp_test(document_stream_tests LABELS acceptance per_implementation)
add_cpp_test(document_tests LABELS acceptance per_implementation)
add_cpp_test(errortests LABELS acceptance per_implementation) add_cpp_test(errortests LABELS acceptance per_implementation)
add_cpp_test(integer_tests LABELS acceptance per_implementation) add_cpp_test(integer_tests LABELS acceptance per_implementation)
add_cpp_test(jsoncheck LABELS acceptance per_implementation) add_cpp_test(jsoncheck LABELS acceptance per_implementation)

View File

@ -7,7 +7,6 @@
#include <vector> #include <vector>
#include <cmath> #include <cmath>
#include <set> #include <set>
#include <string_view>
#include <sstream> #include <sstream>
#include <utility> #include <utility>
#include <ciso646> #include <ciso646>
@ -187,386 +186,7 @@ namespace number_tests {
} }
} }
namespace document_tests {
bool issue938() {
std::vector<std::string> json_strings{"[true,false]", "[1,2,3,null]",
R"({"yay":"json!"})"};
simdjson::dom::parser parser1;
for (simdjson::padded_string str : json_strings) {
simdjson::dom::element element;
ASSERT_SUCCESS( parser1.parse(str).get(element) );
std::cout << element << std::endl;
}
std::vector<std::string> file_paths{
ADVERSARIAL_JSON, FLATADVERSARIAL_JSON, DEMO_JSON,
TWITTER_TIMELINE_JSON, REPEAT_JSON, SMALLDEMO_JSON,
TRUENULL_JSON};
for (auto path : file_paths) {
simdjson::dom::parser parser2;
simdjson::dom::element element;
std::cout << "file: " << path << std::endl;
ASSERT_SUCCESS( parser2.load(path).get(element) );
std::cout << element.type() << std::endl;
}
simdjson::dom::parser parser3;
for (auto path : file_paths) {
simdjson::dom::element element;
std::cout << "file: " << path << std::endl;
ASSERT_SUCCESS( parser3.load(path).get(element) );
std::cout << element.type() << std::endl;
}
return true;
}
// adversarial example that once triggred overruns, see https://github.com/lemire/simdjson/issues/345
bool bad_example() {
std::cout << __func__ << std::endl;
simdjson::padded_string badjson = "[7,7,7,7,6,7,7,7,6,7,7,6,[7,7,7,7,6,7,7,7,6,7,7,6,7,7,7,7,7,7,6"_padded;
simdjson::dom::parser parser;
ASSERT_ERROR( parser.parse(badjson), simdjson::TAPE_ERROR );
return true;
}
bool count_array_example() {
std::cout << __func__ << std::endl;
simdjson::padded_string smalljson = "[1,2,3]"_padded;
simdjson::dom::parser parser;
simdjson::dom::array array;
ASSERT_SUCCESS( parser.parse(smalljson).get(array) );
ASSERT_EQUAL( array.size(), 3 );
return true;
}
bool count_object_example() {
std::cout << __func__ << std::endl;
simdjson::padded_string smalljson = "{\"1\":1,\"2\":1,\"3\":1}"_padded;
simdjson::dom::parser parser;
simdjson::dom::object object;
ASSERT_SUCCESS( parser.parse(smalljson).get(object) );
ASSERT_EQUAL( object.size(), 3 );
return true;
}
bool padded_with_open_bracket() {
std::cout << __func__ << std::endl;
simdjson::dom::parser parser;
// This is an invalid document padded with open braces.
ASSERT_ERROR( parser.parse("[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[", 2, false), simdjson::TAPE_ERROR);
// This is a valid document padded with open braces.
ASSERT_SUCCESS( parser.parse("[][[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[", 2, false) );
return true;
}
// returns true if successful
bool stable_test() {
std::cout << __func__ << std::endl;
simdjson::padded_string json = "{"
"\"Image\":{"
"\"Width\":800,"
"\"Height\":600,"
"\"Title\":\"View from 15th Floor\","
"\"Thumbnail\":{"
"\"Url\":\"http://www.example.com/image/481989943\","
"\"Height\":125,"
"\"Width\":100"
"},"
"\"Animated\":false,"
"\"IDs\":[116,943.3,234,38793]"
"}"
"}"_padded;
simdjson::dom::parser parser;
std::ostringstream myStream;
#if SIMDJSON_EXCEPTIONS
myStream << parser.parse(json);
#else
simdjson::dom::element doc;
UNUSED auto error = parser.parse(json).get(doc);
myStream << doc;
#endif
std::string newjson = myStream.str();
if(static_cast<std::string>(json) != newjson) {
std::cout << "serialized json differs!" << std::endl;
std::cout << static_cast<std::string>(json) << std::endl;
std::cout << newjson << std::endl;
}
return newjson == static_cast<std::string>(json);
}
// returns true if successful
bool skyprophet_test() {
std::cout << "Running " << __func__ << std::endl;
const size_t n_records = 100000;
std::vector<std::string> data;
char buf[1024];
for (size_t i = 0; i < n_records; ++i) {
size_t n = snprintf(buf, sizeof(buf),
"{\"id\": %zu, \"name\": \"name%zu\", \"gender\": \"%s\", "
"\"school\": {\"id\": %zu, \"name\": \"school%zu\"}}",
i, i, (i % 2) ? "male" : "female", i % 10, i % 10);
if (n >= sizeof(buf)) { abort(); }
data.emplace_back(std::string(buf, n));
}
for (size_t i = 0; i < n_records; ++i) {
size_t n = snprintf(buf, sizeof(buf), "{\"counter\": %f, \"array\": [%s]}", static_cast<double>(i) * 3.1416,
(i % 2) ? "true" : "false");
if (n >= sizeof(buf)) { abort(); }
data.emplace_back(std::string(buf, n));
}
for (size_t i = 0; i < n_records; ++i) {
size_t n = snprintf(buf, sizeof(buf), "{\"number\": %e}", static_cast<double>(i) * 10000.31321321);
if (n >= sizeof(buf)) { abort(); }
data.emplace_back(std::string(buf, n));
}
data.emplace_back(std::string("true"));
data.emplace_back(std::string("false"));
data.emplace_back(std::string("null"));
data.emplace_back(std::string("0.1"));
size_t maxsize = 0;
for (auto &s : data) {
if (maxsize < s.size())
maxsize = s.size();
}
simdjson::dom::parser parser;
size_t counter = 0;
for (auto &rec : data) {
if ((counter % 10000) == 0) {
printf(".");
fflush(NULL);
}
counter++;
auto error = parser.parse(rec.c_str(), rec.length()).error();
if (error != simdjson::error_code::SUCCESS) {
printf("Something is wrong in skyprophet_test: %s.\n", rec.c_str());
printf("Parsing failed. Error is %s\n", simdjson::error_message(error));
return false;
}
error = parser.parse(rec.c_str(), rec.length()).error();
if (error != simdjson::error_code::SUCCESS) {
printf("Something is wrong in skyprophet_test: %s.\n", rec.c_str());
printf("Parsing failed. Error is %s\n", simdjson::error_message(error));
return false;
}
}
printf("\n");
return true;
}
bool lots_of_brackets() {
std::string input;
for(size_t i = 0; i < 200; i++) {
input += "[";
}
for(size_t i = 0; i < 200; i++) {
input += "]";
}
simdjson::dom::parser parser;
auto error = parser.parse(input).error();
if (error) { std::cerr << "Error: " << simdjson::error_message(error) << std::endl; return false; }
return true;
}
bool run() {
return issue938() &&
padded_with_open_bracket() &&
bad_example() &&
count_array_example() &&
count_object_example() &&
stable_test() &&
skyprophet_test() &&
lots_of_brackets();
}
}
namespace document_stream_tests {
static simdjson::dom::document_stream parse_many_stream_return(simdjson::dom::parser &parser, simdjson::padded_string &str) {
simdjson::dom::document_stream stream;
UNUSED auto error = parser.parse_many(str).get(stream);
return stream;
}
// this is a compilation test
UNUSED static void parse_many_stream_assign() {
simdjson::dom::parser parser;
simdjson::padded_string str("{}",2);
simdjson::dom::document_stream s1 = parse_many_stream_return(parser, str);
}
bool test_current_index() {
std::cout << "Running " << __func__ << std::endl;
std::string base("1 ");// one JSON!
std::string json;
for(size_t k = 0; k < 1000; k++) {
json += base;
}
simdjson::dom::parser parser;
const size_t window = 32; // deliberately small
simdjson::dom::document_stream stream;
ASSERT_SUCCESS( parser.parse_many(json,window).get(stream) );
auto i = stream.begin();
size_t count = 0;
for(; i != stream.end(); ++i) {
auto doc = *i;
ASSERT_SUCCESS(doc);
if( i.current_index() != count) {
std::cout << "index:" << i.current_index() << std::endl;
std::cout << "expected index:" << count << std::endl;
return false;
}
count += base.size();
}
return true;
}
bool small_window() {
std::cout << "Running " << __func__ << std::endl;
auto json = R"({"error":[],"result":{"token":"xxx"}}{"error":[],"result":{"token":"xxx"}})"_padded;
simdjson::dom::parser parser;
size_t count = 0;
size_t window_size = 10; // deliberately too small
simdjson::dom::document_stream stream;
ASSERT_SUCCESS( parser.parse_many(json, window_size).get(stream) );
for (auto doc : stream) {
if (!doc.error()) {
std::cerr << "Expected a capacity error " << doc.error() << std::endl;
return false;
}
count++;
}
if(count == 2) {
std::cerr << "Expected a capacity error " << std::endl;
return false;
}
return true;
}
bool large_window() {
std::cout << "Running " << __func__ << std::endl;
#if SIZE_MAX > 17179869184
auto json = R"({"error":[],"result":{"token":"xxx"}}{"error":[],"result":{"token":"xxx"}})"_padded;
simdjson::dom::parser parser;
size_t count = 0;
uint64_t window_size{17179869184}; // deliberately too big
simdjson::dom::document_stream stream;
ASSERT_SUCCESS( parser.parse_many(json, size_t(window_size)).get(stream) );
for (auto doc : stream) {
if (!doc.error()) {
std::cerr << "I expected a failure (too big) but got " << doc.error() << std::endl;
return false;
}
count++;
}
#endif
return true;
}
static bool parse_json_message_issue467(simdjson::padded_string &json, size_t expectedcount) {
simdjson::dom::parser parser;
size_t count = 0;
simdjson::dom::document_stream stream;
ASSERT_SUCCESS( parser.parse_many(json).get(stream) );
for (auto doc : stream) {
if (doc.error()) {
std::cerr << "Failed with simdjson error= " << doc.error() << std::endl;
return false;
}
count++;
}
if(count != expectedcount) {
std::cerr << "bad count" << std::endl;
return false;
}
return true;
}
bool json_issue467() {
std::cout << "Running " << __func__ << std::endl;
auto single_message = R"({"error":[],"result":{"token":"xxx"}})"_padded;
auto two_messages = R"({"error":[],"result":{"token":"xxx"}}{"error":[],"result":{"token":"xxx"}})"_padded;
if(!parse_json_message_issue467(single_message, 1)) {
return false;
}
if(!parse_json_message_issue467(two_messages, 2)) {
return false;
}
return true;
}
// returns true if successful
bool document_stream_test() {
std::cout << "Running " << __func__ << std::endl;
fflush(NULL);
const size_t n_records = 10000;
std::string data;
char buf[1024];
for (size_t i = 0; i < n_records; ++i) {
size_t n = snprintf(buf,
sizeof(buf),
"{\"id\": %zu, \"name\": \"name%zu\", \"gender\": \"%s\", "
"\"ete\": {\"id\": %zu, \"name\": \"eventail%zu\"}}",
i, i, (i % 2) ? "homme" : "femme", i % 10, i % 10);
if (n >= sizeof(buf)) { abort(); }
data += std::string(buf, n);
}
for(size_t batch_size = 1000; batch_size < 2000; batch_size += (batch_size>1050?10:1)) {
printf(".");
fflush(NULL);
simdjson::padded_string str(data);
simdjson::dom::parser parser;
size_t count = 0;
simdjson::dom::document_stream stream;
ASSERT_SUCCESS( parser.parse_many(str, batch_size).get(stream) );
for (auto doc : stream) {
int64_t keyid;
ASSERT_SUCCESS( doc["id"].get(keyid) );
ASSERT_EQUAL( keyid, int64_t(count) );
count++;
}
if(count != n_records) {
printf("Found wrong number of documents %zd, expected %zd at batch size %zu\n", count, n_records, batch_size);
return false;
}
}
printf("ok\n");
return true;
}
// returns true if successful
bool document_stream_utf8_test() {
std::cout << "Running " << __func__ << std::endl;
fflush(NULL);
const size_t n_records = 10000;
std::string data;
char buf[1024];
for (size_t i = 0; i < n_records; ++i) {
size_t n = snprintf(buf,
sizeof(buf),
"{\"id\": %zu, \"name\": \"name%zu\", \"gender\": \"%s\", "
"\"\xC3\xA9t\xC3\xA9\": {\"id\": %zu, \"name\": \"\xC3\xA9ventail%zu\"}}",
i, i, (i % 2) ? "\xE2\xBA\x83" : "\xE2\xBA\x95", i % 10, i % 10);
if (n >= sizeof(buf)) { abort(); }
data += std::string(buf, n);
}
for(size_t batch_size = 1000; batch_size < 2000; batch_size += (batch_size>1050?10:1)) {
printf(".");
fflush(NULL);
simdjson::padded_string str(data);
simdjson::dom::parser parser;
size_t count = 0;
simdjson::dom::document_stream stream;
ASSERT_SUCCESS( parser.parse_many(str, batch_size).get(stream) );
for (auto doc : stream) {
int64_t keyid;
ASSERT_SUCCESS( doc["id"].get(keyid) );
ASSERT_EQUAL( keyid, int64_t(count) );
count++;
}
ASSERT_EQUAL( count, n_records )
}
printf("ok\n");
return true;
}
bool run() {
return test_current_index() &&
small_window() &&
large_window() &&
json_issue467() &&
document_stream_test() &&
document_stream_utf8_test();
}
}
namespace parse_api_tests { namespace parse_api_tests {
using namespace std; using namespace std;
@ -1972,9 +1592,7 @@ int main(int argc, char *argv[]) {
dom_api_tests::run() && dom_api_tests::run() &&
type_tests::run() && type_tests::run() &&
format_tests::run() && format_tests::run() &&
document_tests::run() && number_tests::run()
number_tests::run() &&
document_stream_tests::run()
) { ) {
std::cout << "Basic tests are ok." << std::endl; std::cout << "Basic tests are ok." << std::endl;
return EXIT_SUCCESS; return EXIT_SUCCESS;

View File

@ -0,0 +1,245 @@
#include <string>
#include <vector>
#include <unistd.h>
#include "simdjson.h"
#include "test_macros.h"
namespace document_stream_tests {
static simdjson::dom::document_stream parse_many_stream_return(simdjson::dom::parser &parser, simdjson::padded_string &str) {
simdjson::dom::document_stream stream;
UNUSED auto error = parser.parse_many(str).get(stream);
return stream;
}
// this is a compilation test
UNUSED static void parse_many_stream_assign() {
simdjson::dom::parser parser;
simdjson::padded_string str("{}",2);
simdjson::dom::document_stream s1 = parse_many_stream_return(parser, str);
}
bool test_current_index() {
std::cout << "Running " << __func__ << std::endl;
std::string base("1 ");// one JSON!
std::string json;
for(size_t k = 0; k < 1000; k++) {
json += base;
}
simdjson::dom::parser parser;
const size_t window = 32; // deliberately small
simdjson::dom::document_stream stream;
ASSERT_SUCCESS( parser.parse_many(json,window).get(stream) );
auto i = stream.begin();
size_t count = 0;
for(; i != stream.end(); ++i) {
auto doc = *i;
ASSERT_SUCCESS(doc);
if( i.current_index() != count) {
std::cout << "index:" << i.current_index() << std::endl;
std::cout << "expected index:" << count << std::endl;
return false;
}
count += base.size();
}
return true;
}
bool small_window() {
std::cout << "Running " << __func__ << std::endl;
auto json = R"({"error":[],"result":{"token":"xxx"}}{"error":[],"result":{"token":"xxx"}})"_padded;
simdjson::dom::parser parser;
size_t count = 0;
size_t window_size = 10; // deliberately too small
simdjson::dom::document_stream stream;
ASSERT_SUCCESS( parser.parse_many(json, window_size).get(stream) );
for (auto doc : stream) {
if (!doc.error()) {
std::cerr << "Expected a capacity error " << doc.error() << std::endl;
return false;
}
count++;
}
if(count == 2) {
std::cerr << "Expected a capacity error " << std::endl;
return false;
}
return true;
}
bool large_window() {
std::cout << "Running " << __func__ << std::endl;
#if SIZE_MAX > 17179869184
auto json = R"({"error":[],"result":{"token":"xxx"}}{"error":[],"result":{"token":"xxx"}})"_padded;
simdjson::dom::parser parser;
size_t count = 0;
uint64_t window_size{17179869184}; // deliberately too big
simdjson::dom::document_stream stream;
ASSERT_SUCCESS( parser.parse_many(json, size_t(window_size)).get(stream) );
for (auto doc : stream) {
if (!doc.error()) {
std::cerr << "I expected a failure (too big) but got " << doc.error() << std::endl;
return false;
}
count++;
}
#endif
return true;
}
static bool parse_json_message_issue467(simdjson::padded_string &json, size_t expectedcount) {
simdjson::dom::parser parser;
size_t count = 0;
simdjson::dom::document_stream stream;
ASSERT_SUCCESS( parser.parse_many(json).get(stream) );
for (auto doc : stream) {
if (doc.error()) {
std::cerr << "Failed with simdjson error= " << doc.error() << std::endl;
return false;
}
count++;
}
if(count != expectedcount) {
std::cerr << "bad count" << std::endl;
return false;
}
return true;
}
bool json_issue467() {
std::cout << "Running " << __func__ << std::endl;
auto single_message = R"({"error":[],"result":{"token":"xxx"}})"_padded;
auto two_messages = R"({"error":[],"result":{"token":"xxx"}}{"error":[],"result":{"token":"xxx"}})"_padded;
if(!parse_json_message_issue467(single_message, 1)) {
return false;
}
if(!parse_json_message_issue467(two_messages, 2)) {
return false;
}
return true;
}
// returns true if successful
bool document_stream_test() {
std::cout << "Running " << __func__ << std::endl;
fflush(NULL);
const size_t n_records = 10000;
std::string data;
char buf[1024];
for (size_t i = 0; i < n_records; ++i) {
size_t n = snprintf(buf,
sizeof(buf),
"{\"id\": %zu, \"name\": \"name%zu\", \"gender\": \"%s\", "
"\"ete\": {\"id\": %zu, \"name\": \"eventail%zu\"}}",
i, i, (i % 2) ? "homme" : "femme", i % 10, i % 10);
if (n >= sizeof(buf)) { abort(); }
data += std::string(buf, n);
}
for(size_t batch_size = 1000; batch_size < 2000; batch_size += (batch_size>1050?10:1)) {
printf(".");
fflush(NULL);
simdjson::padded_string str(data);
simdjson::dom::parser parser;
size_t count = 0;
simdjson::dom::document_stream stream;
ASSERT_SUCCESS( parser.parse_many(str, batch_size).get(stream) );
for (auto doc : stream) {
int64_t keyid;
ASSERT_SUCCESS( doc["id"].get(keyid) );
ASSERT_EQUAL( keyid, int64_t(count) );
count++;
}
if(count != n_records) {
printf("Found wrong number of documents %zd, expected %zd at batch size %zu\n", count, n_records, batch_size);
return false;
}
}
printf("ok\n");
return true;
}
// returns true if successful
bool document_stream_utf8_test() {
std::cout << "Running " << __func__ << std::endl;
fflush(NULL);
const size_t n_records = 10000;
std::string data;
char buf[1024];
for (size_t i = 0; i < n_records; ++i) {
size_t n = snprintf(buf,
sizeof(buf),
"{\"id\": %zu, \"name\": \"name%zu\", \"gender\": \"%s\", "
"\"\xC3\xA9t\xC3\xA9\": {\"id\": %zu, \"name\": \"\xC3\xA9ventail%zu\"}}",
i, i, (i % 2) ? "\xE2\xBA\x83" : "\xE2\xBA\x95", i % 10, i % 10);
if (n >= sizeof(buf)) { abort(); }
data += std::string(buf, n);
}
for(size_t batch_size = 1000; batch_size < 2000; batch_size += (batch_size>1050?10:1)) {
printf(".");
fflush(NULL);
simdjson::padded_string str(data);
simdjson::dom::parser parser;
size_t count = 0;
simdjson::dom::document_stream stream;
ASSERT_SUCCESS( parser.parse_many(str, batch_size).get(stream) );
for (auto doc : stream) {
int64_t keyid;
ASSERT_SUCCESS( doc["id"].get(keyid) );
ASSERT_EQUAL( keyid, int64_t(count) );
count++;
}
ASSERT_EQUAL( count, n_records )
}
printf("ok\n");
return true;
}
bool run() {
return test_current_index() &&
small_window() &&
large_window() &&
json_issue467() &&
document_stream_test() &&
document_stream_utf8_test();
}
}
int main(int argc, char *argv[]) {
std::cout << std::unitbuf;
int c;
while ((c = getopt(argc, argv, "a:")) != -1) {
switch (c) {
case 'a': {
const simdjson::implementation *impl = simdjson::available_implementations[optarg];
if (!impl) {
fprintf(stderr, "Unsupported architecture value -a %s\n", optarg);
return EXIT_FAILURE;
}
simdjson::active_implementation = impl;
break;
}
default:
fprintf(stderr, "Unexpected argument %c\n", c);
return EXIT_FAILURE;
}
}
// this is put here deliberately to check that the documentation is correct (README),
// should this fail to compile, you should update the documentation:
if (simdjson::active_implementation->name() == "unsupported") {
printf("unsupported CPU\n");
}
// We want to know what we are testing.
std::cout << "Running tests against this implementation: " << simdjson::active_implementation->name();
std::cout << "(" << simdjson::active_implementation->description() << ")" << std::endl;
std::cout << "------------------------------------------------------------" << std::endl;
std::cout << "Running document_stream tests." << std::endl;
if (document_stream_tests::run()) {
std::cout << "document_stream tests are ok." << std::endl;
return EXIT_SUCCESS;
} else {
return EXIT_FAILURE;
}
}

231
tests/document_tests.cpp Normal file
View File

@ -0,0 +1,231 @@
#include <string>
#include <vector>
#include <unistd.h>
#include "simdjson.h"
#include "test_macros.h"
namespace document_tests {
bool issue938() {
std::vector<std::string> json_strings{"[true,false]", "[1,2,3,null]",
R"({"yay":"json!"})"};
simdjson::dom::parser parser1;
for (simdjson::padded_string str : json_strings) {
simdjson::dom::element element;
ASSERT_SUCCESS( parser1.parse(str).get(element) );
std::cout << element << std::endl;
}
std::vector<std::string> file_paths{
ADVERSARIAL_JSON, FLATADVERSARIAL_JSON, DEMO_JSON,
TWITTER_TIMELINE_JSON, REPEAT_JSON, SMALLDEMO_JSON,
TRUENULL_JSON};
for (auto path : file_paths) {
simdjson::dom::parser parser2;
simdjson::dom::element element;
std::cout << "file: " << path << std::endl;
ASSERT_SUCCESS( parser2.load(path).get(element) );
std::cout << element.type() << std::endl;
}
simdjson::dom::parser parser3;
for (auto path : file_paths) {
simdjson::dom::element element;
std::cout << "file: " << path << std::endl;
ASSERT_SUCCESS( parser3.load(path).get(element) );
std::cout << element.type() << std::endl;
}
return true;
}
// adversarial example that once triggred overruns, see https://github.com/lemire/simdjson/issues/345
bool bad_example() {
std::cout << __func__ << std::endl;
simdjson::padded_string badjson = "[7,7,7,7,6,7,7,7,6,7,7,6,[7,7,7,7,6,7,7,7,6,7,7,6,7,7,7,7,7,7,6"_padded;
simdjson::dom::parser parser;
ASSERT_ERROR( parser.parse(badjson), simdjson::TAPE_ERROR );
return true;
}
bool count_array_example() {
std::cout << __func__ << std::endl;
simdjson::padded_string smalljson = "[1,2,3]"_padded;
simdjson::dom::parser parser;
simdjson::dom::array array;
ASSERT_SUCCESS( parser.parse(smalljson).get(array) );
ASSERT_EQUAL( array.size(), 3 );
return true;
}
bool count_object_example() {
std::cout << __func__ << std::endl;
simdjson::padded_string smalljson = "{\"1\":1,\"2\":1,\"3\":1}"_padded;
simdjson::dom::parser parser;
simdjson::dom::object object;
ASSERT_SUCCESS( parser.parse(smalljson).get(object) );
ASSERT_EQUAL( object.size(), 3 );
return true;
}
bool padded_with_open_bracket() {
std::cout << __func__ << std::endl;
simdjson::dom::parser parser;
// This is an invalid document padded with open braces.
ASSERT_ERROR( parser.parse("[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[", 2, false), simdjson::TAPE_ERROR);
// This is a valid document padded with open braces.
ASSERT_SUCCESS( parser.parse("[][[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[", 2, false) );
return true;
}
// returns true if successful
bool stable_test() {
std::cout << __func__ << std::endl;
simdjson::padded_string json = "{"
"\"Image\":{"
"\"Width\":800,"
"\"Height\":600,"
"\"Title\":\"View from 15th Floor\","
"\"Thumbnail\":{"
"\"Url\":\"http://www.example.com/image/481989943\","
"\"Height\":125,"
"\"Width\":100"
"},"
"\"Animated\":false,"
"\"IDs\":[116,943.3,234,38793]"
"}"
"}"_padded;
simdjson::dom::parser parser;
std::ostringstream myStream;
#if SIMDJSON_EXCEPTIONS
myStream << parser.parse(json);
#else
simdjson::dom::element doc;
UNUSED auto error = parser.parse(json).get(doc);
myStream << doc;
#endif
std::string newjson = myStream.str();
if(static_cast<std::string>(json) != newjson) {
std::cout << "serialized json differs!" << std::endl;
std::cout << static_cast<std::string>(json) << std::endl;
std::cout << newjson << std::endl;
}
return newjson == static_cast<std::string>(json);
}
// returns true if successful
bool skyprophet_test() {
std::cout << "Running " << __func__ << std::endl;
const size_t n_records = 100000;
std::vector<std::string> data;
char buf[1024];
for (size_t i = 0; i < n_records; ++i) {
size_t n = snprintf(buf, sizeof(buf),
"{\"id\": %zu, \"name\": \"name%zu\", \"gender\": \"%s\", "
"\"school\": {\"id\": %zu, \"name\": \"school%zu\"}}",
i, i, (i % 2) ? "male" : "female", i % 10, i % 10);
if (n >= sizeof(buf)) { abort(); }
data.emplace_back(std::string(buf, n));
}
for (size_t i = 0; i < n_records; ++i) {
size_t n = snprintf(buf, sizeof(buf), "{\"counter\": %f, \"array\": [%s]}", static_cast<double>(i) * 3.1416,
(i % 2) ? "true" : "false");
if (n >= sizeof(buf)) { abort(); }
data.emplace_back(std::string(buf, n));
}
for (size_t i = 0; i < n_records; ++i) {
size_t n = snprintf(buf, sizeof(buf), "{\"number\": %e}", static_cast<double>(i) * 10000.31321321);
if (n >= sizeof(buf)) { abort(); }
data.emplace_back(std::string(buf, n));
}
data.emplace_back(std::string("true"));
data.emplace_back(std::string("false"));
data.emplace_back(std::string("null"));
data.emplace_back(std::string("0.1"));
size_t maxsize = 0;
for (auto &s : data) {
if (maxsize < s.size())
maxsize = s.size();
}
simdjson::dom::parser parser;
size_t counter = 0;
for (auto &rec : data) {
if ((counter % 10000) == 0) {
printf(".");
fflush(NULL);
}
counter++;
auto error = parser.parse(rec.c_str(), rec.length()).error();
if (error != simdjson::error_code::SUCCESS) {
printf("Something is wrong in skyprophet_test: %s.\n", rec.c_str());
printf("Parsing failed. Error is %s\n", simdjson::error_message(error));
return false;
}
error = parser.parse(rec.c_str(), rec.length()).error();
if (error != simdjson::error_code::SUCCESS) {
printf("Something is wrong in skyprophet_test: %s.\n", rec.c_str());
printf("Parsing failed. Error is %s\n", simdjson::error_message(error));
return false;
}
}
printf("\n");
return true;
}
bool lots_of_brackets() {
std::string input;
for(size_t i = 0; i < 200; i++) {
input += "[";
}
for(size_t i = 0; i < 200; i++) {
input += "]";
}
simdjson::dom::parser parser;
auto error = parser.parse(input).error();
if (error) { std::cerr << "Error: " << simdjson::error_message(error) << std::endl; return false; }
return true;
}
bool run() {
return issue938() &&
padded_with_open_bracket() &&
bad_example() &&
count_array_example() &&
count_object_example() &&
stable_test() &&
skyprophet_test() &&
lots_of_brackets();
}
}
int main(int argc, char *argv[]) {
std::cout << std::unitbuf;
int c;
while ((c = getopt(argc, argv, "a:")) != -1) {
switch (c) {
case 'a': {
const simdjson::implementation *impl = simdjson::available_implementations[optarg];
if (!impl) {
fprintf(stderr, "Unsupported architecture value -a %s\n", optarg);
return EXIT_FAILURE;
}
simdjson::active_implementation = impl;
break;
}
default:
fprintf(stderr, "Unexpected argument %c\n", c);
return EXIT_FAILURE;
}
}
// this is put here deliberately to check that the documentation is correct (README),
// should this fail to compile, you should update the documentation:
if (simdjson::active_implementation->name() == "unsupported") {
printf("unsupported CPU\n");
}
// We want to know what we are testing.
std::cout << "Running tests against this implementation: " << simdjson::active_implementation->name();
std::cout << "(" << simdjson::active_implementation->description() << ")" << std::endl;
std::cout << "------------------------------------------------------------" << std::endl;
std::cout << "Running document tests." << std::endl;
if (document_tests::run()) {
std::cout << "document tests are ok." << std::endl;
return EXIT_SUCCESS;
} else {
return EXIT_FAILURE;
}
}