Returning basictests to its original function: basic tests (only) (#1010)
* The initial motivation behind basictests was for a quick set of sanity tests to check whether your code made sense. It was not meant for thorough testing to find corner cases. However, over time, it grew to include such expensive tests. This PR takes them out. It also allows us to bring back basictests to MinGW tests, since it is now cheap. This is not an exercise in software engineering and making things prettier. This is a pragmatic change to improve our test coverage and quality of life. * Adds many more cheap tests. Co-authored-by: Daniel Lemire <lemire@gmai.com>
This commit is contained in:
parent
a2f0933d01
commit
039d82ff1b
|
@ -50,9 +50,5 @@ jobs:
|
|||
mkdir build32
|
||||
cd build32
|
||||
cmake -DSIMDJSON_BUILD_STATIC=ON -DSIMDJSON_COMPETITION=OFF -DSIMDJSON_GOOGLE_BENCHMARKS=OFF -DSIMDJSON_ENABLE_THREADS=OFF ..
|
||||
cmake --build . --target basictests numberparsingcheck stringparsingcheck errortests integer_tests pointercheck --verbose
|
||||
ctest . -R stringparsingcheck --output-on-failure
|
||||
ctest . -R numberparsingcheck --output-on-failure
|
||||
ctest . -R errortests --output-on-failure
|
||||
ctest . -R integer_tests --output-on-failure
|
||||
ctest . -R pointercheck --output-on-failure
|
||||
cmake --build . --target parse_many_test jsoncheck basictests numberparsingcheck stringparsingcheck errortests integer_tests pointercheck --verbose
|
||||
ctest . -R "(parse_many_test|jsoncheck|basictests|stringparsingcheck|numberparsingcheck|errortests|integer_tests|pointercheck)" --output-on-failure
|
||||
|
|
|
@ -51,10 +51,6 @@ jobs:
|
|||
mkdir build64
|
||||
cd build64
|
||||
cmake -DSIMDJSON_BUILD_STATIC=ON -DSIMDJSON_COMPETITION=OFF -DSIMDJSON_GOOGLE_BENCHMARKS=OFF -DSIMDJSON_ENABLE_THREADS=OFF ..
|
||||
cmake --build . --target basictests numberparsingcheck stringparsingcheck errortests integer_tests pointercheck --verbose
|
||||
ctest . -R stringparsingcheck --output-on-failure
|
||||
ctest . -R numberparsingcheck --output-on-failure
|
||||
ctest . -R errortests --output-on-failure
|
||||
ctest . -R integer_tests --output-on-failure
|
||||
ctest . -R pointercheck --output-on-failure
|
||||
cmake --build . --target parse_many_test jsoncheck basictests numberparsingcheck stringparsingcheck errortests integer_tests pointercheck --verbose
|
||||
ctest . -R "(parse_many_test|jsoncheck|basictests|stringparsingcheck|numberparsingcheck|errortests|integer_tests|pointercheck)" --output-on-failure
|
||||
|
||||
|
|
|
@ -52,6 +52,8 @@ target_compile_definitions(stringparsingcheck PRIVATE NOMINMAX)
|
|||
# All remaining tests link with simdjson proper
|
||||
link_libraries(simdjson)
|
||||
add_cpp_test(basictests LABELS acceptance per_implementation)
|
||||
add_cpp_test(document_stream_tests LABELS acceptance per_implementation)
|
||||
add_cpp_test(document_tests LABELS acceptance per_implementation)
|
||||
add_cpp_test(errortests LABELS acceptance per_implementation)
|
||||
add_cpp_test(integer_tests LABELS acceptance per_implementation)
|
||||
add_cpp_test(jsoncheck LABELS acceptance per_implementation)
|
||||
|
|
|
@ -7,7 +7,6 @@
|
|||
#include <vector>
|
||||
#include <cmath>
|
||||
#include <set>
|
||||
#include <string_view>
|
||||
#include <sstream>
|
||||
#include <utility>
|
||||
#include <ciso646>
|
||||
|
@ -187,386 +186,7 @@ namespace number_tests {
|
|||
}
|
||||
}
|
||||
|
||||
namespace document_tests {
|
||||
bool issue938() {
|
||||
std::vector<std::string> json_strings{"[true,false]", "[1,2,3,null]",
|
||||
R"({"yay":"json!"})"};
|
||||
simdjson::dom::parser parser1;
|
||||
for (simdjson::padded_string str : json_strings) {
|
||||
simdjson::dom::element element;
|
||||
ASSERT_SUCCESS( parser1.parse(str).get(element) );
|
||||
std::cout << element << std::endl;
|
||||
}
|
||||
std::vector<std::string> file_paths{
|
||||
ADVERSARIAL_JSON, FLATADVERSARIAL_JSON, DEMO_JSON,
|
||||
TWITTER_TIMELINE_JSON, REPEAT_JSON, SMALLDEMO_JSON,
|
||||
TRUENULL_JSON};
|
||||
for (auto path : file_paths) {
|
||||
simdjson::dom::parser parser2;
|
||||
simdjson::dom::element element;
|
||||
std::cout << "file: " << path << std::endl;
|
||||
ASSERT_SUCCESS( parser2.load(path).get(element) );
|
||||
std::cout << element.type() << std::endl;
|
||||
}
|
||||
simdjson::dom::parser parser3;
|
||||
for (auto path : file_paths) {
|
||||
simdjson::dom::element element;
|
||||
std::cout << "file: " << path << std::endl;
|
||||
ASSERT_SUCCESS( parser3.load(path).get(element) );
|
||||
std::cout << element.type() << std::endl;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// adversarial example that once triggred overruns, see https://github.com/lemire/simdjson/issues/345
|
||||
bool bad_example() {
|
||||
std::cout << __func__ << std::endl;
|
||||
simdjson::padded_string badjson = "[7,7,7,7,6,7,7,7,6,7,7,6,[7,7,7,7,6,7,7,7,6,7,7,6,7,7,7,7,7,7,6"_padded;
|
||||
simdjson::dom::parser parser;
|
||||
ASSERT_ERROR( parser.parse(badjson), simdjson::TAPE_ERROR );
|
||||
return true;
|
||||
}
|
||||
bool count_array_example() {
|
||||
std::cout << __func__ << std::endl;
|
||||
simdjson::padded_string smalljson = "[1,2,3]"_padded;
|
||||
simdjson::dom::parser parser;
|
||||
simdjson::dom::array array;
|
||||
ASSERT_SUCCESS( parser.parse(smalljson).get(array) );
|
||||
ASSERT_EQUAL( array.size(), 3 );
|
||||
return true;
|
||||
}
|
||||
bool count_object_example() {
|
||||
std::cout << __func__ << std::endl;
|
||||
simdjson::padded_string smalljson = "{\"1\":1,\"2\":1,\"3\":1}"_padded;
|
||||
simdjson::dom::parser parser;
|
||||
simdjson::dom::object object;
|
||||
ASSERT_SUCCESS( parser.parse(smalljson).get(object) );
|
||||
ASSERT_EQUAL( object.size(), 3 );
|
||||
return true;
|
||||
}
|
||||
bool padded_with_open_bracket() {
|
||||
std::cout << __func__ << std::endl;
|
||||
simdjson::dom::parser parser;
|
||||
// This is an invalid document padded with open braces.
|
||||
ASSERT_ERROR( parser.parse("[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[", 2, false), simdjson::TAPE_ERROR);
|
||||
// This is a valid document padded with open braces.
|
||||
ASSERT_SUCCESS( parser.parse("[][[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[", 2, false) );
|
||||
return true;
|
||||
}
|
||||
// returns true if successful
|
||||
bool stable_test() {
|
||||
std::cout << __func__ << std::endl;
|
||||
simdjson::padded_string json = "{"
|
||||
"\"Image\":{"
|
||||
"\"Width\":800,"
|
||||
"\"Height\":600,"
|
||||
"\"Title\":\"View from 15th Floor\","
|
||||
"\"Thumbnail\":{"
|
||||
"\"Url\":\"http://www.example.com/image/481989943\","
|
||||
"\"Height\":125,"
|
||||
"\"Width\":100"
|
||||
"},"
|
||||
"\"Animated\":false,"
|
||||
"\"IDs\":[116,943.3,234,38793]"
|
||||
"}"
|
||||
"}"_padded;
|
||||
simdjson::dom::parser parser;
|
||||
std::ostringstream myStream;
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
myStream << parser.parse(json);
|
||||
#else
|
||||
simdjson::dom::element doc;
|
||||
UNUSED auto error = parser.parse(json).get(doc);
|
||||
myStream << doc;
|
||||
#endif
|
||||
std::string newjson = myStream.str();
|
||||
if(static_cast<std::string>(json) != newjson) {
|
||||
std::cout << "serialized json differs!" << std::endl;
|
||||
std::cout << static_cast<std::string>(json) << std::endl;
|
||||
std::cout << newjson << std::endl;
|
||||
}
|
||||
return newjson == static_cast<std::string>(json);
|
||||
}
|
||||
// returns true if successful
|
||||
bool skyprophet_test() {
|
||||
std::cout << "Running " << __func__ << std::endl;
|
||||
const size_t n_records = 100000;
|
||||
std::vector<std::string> data;
|
||||
char buf[1024];
|
||||
for (size_t i = 0; i < n_records; ++i) {
|
||||
size_t n = snprintf(buf, sizeof(buf),
|
||||
"{\"id\": %zu, \"name\": \"name%zu\", \"gender\": \"%s\", "
|
||||
"\"school\": {\"id\": %zu, \"name\": \"school%zu\"}}",
|
||||
i, i, (i % 2) ? "male" : "female", i % 10, i % 10);
|
||||
if (n >= sizeof(buf)) { abort(); }
|
||||
data.emplace_back(std::string(buf, n));
|
||||
}
|
||||
for (size_t i = 0; i < n_records; ++i) {
|
||||
size_t n = snprintf(buf, sizeof(buf), "{\"counter\": %f, \"array\": [%s]}", static_cast<double>(i) * 3.1416,
|
||||
(i % 2) ? "true" : "false");
|
||||
if (n >= sizeof(buf)) { abort(); }
|
||||
data.emplace_back(std::string(buf, n));
|
||||
}
|
||||
for (size_t i = 0; i < n_records; ++i) {
|
||||
size_t n = snprintf(buf, sizeof(buf), "{\"number\": %e}", static_cast<double>(i) * 10000.31321321);
|
||||
if (n >= sizeof(buf)) { abort(); }
|
||||
data.emplace_back(std::string(buf, n));
|
||||
}
|
||||
data.emplace_back(std::string("true"));
|
||||
data.emplace_back(std::string("false"));
|
||||
data.emplace_back(std::string("null"));
|
||||
data.emplace_back(std::string("0.1"));
|
||||
size_t maxsize = 0;
|
||||
for (auto &s : data) {
|
||||
if (maxsize < s.size())
|
||||
maxsize = s.size();
|
||||
}
|
||||
simdjson::dom::parser parser;
|
||||
size_t counter = 0;
|
||||
for (auto &rec : data) {
|
||||
if ((counter % 10000) == 0) {
|
||||
printf(".");
|
||||
fflush(NULL);
|
||||
}
|
||||
counter++;
|
||||
auto error = parser.parse(rec.c_str(), rec.length()).error();
|
||||
if (error != simdjson::error_code::SUCCESS) {
|
||||
printf("Something is wrong in skyprophet_test: %s.\n", rec.c_str());
|
||||
printf("Parsing failed. Error is %s\n", simdjson::error_message(error));
|
||||
return false;
|
||||
}
|
||||
error = parser.parse(rec.c_str(), rec.length()).error();
|
||||
if (error != simdjson::error_code::SUCCESS) {
|
||||
printf("Something is wrong in skyprophet_test: %s.\n", rec.c_str());
|
||||
printf("Parsing failed. Error is %s\n", simdjson::error_message(error));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
printf("\n");
|
||||
return true;
|
||||
}
|
||||
bool lots_of_brackets() {
|
||||
std::string input;
|
||||
for(size_t i = 0; i < 200; i++) {
|
||||
input += "[";
|
||||
}
|
||||
for(size_t i = 0; i < 200; i++) {
|
||||
input += "]";
|
||||
}
|
||||
simdjson::dom::parser parser;
|
||||
auto error = parser.parse(input).error();
|
||||
if (error) { std::cerr << "Error: " << simdjson::error_message(error) << std::endl; return false; }
|
||||
return true;
|
||||
}
|
||||
bool run() {
|
||||
return issue938() &&
|
||||
padded_with_open_bracket() &&
|
||||
bad_example() &&
|
||||
count_array_example() &&
|
||||
count_object_example() &&
|
||||
stable_test() &&
|
||||
skyprophet_test() &&
|
||||
lots_of_brackets();
|
||||
}
|
||||
}
|
||||
|
||||
namespace document_stream_tests {
|
||||
static simdjson::dom::document_stream parse_many_stream_return(simdjson::dom::parser &parser, simdjson::padded_string &str) {
|
||||
simdjson::dom::document_stream stream;
|
||||
UNUSED auto error = parser.parse_many(str).get(stream);
|
||||
return stream;
|
||||
}
|
||||
// this is a compilation test
|
||||
UNUSED static void parse_many_stream_assign() {
|
||||
simdjson::dom::parser parser;
|
||||
simdjson::padded_string str("{}",2);
|
||||
simdjson::dom::document_stream s1 = parse_many_stream_return(parser, str);
|
||||
}
|
||||
bool test_current_index() {
|
||||
std::cout << "Running " << __func__ << std::endl;
|
||||
std::string base("1 ");// one JSON!
|
||||
std::string json;
|
||||
for(size_t k = 0; k < 1000; k++) {
|
||||
json += base;
|
||||
}
|
||||
simdjson::dom::parser parser;
|
||||
const size_t window = 32; // deliberately small
|
||||
simdjson::dom::document_stream stream;
|
||||
ASSERT_SUCCESS( parser.parse_many(json,window).get(stream) );
|
||||
auto i = stream.begin();
|
||||
size_t count = 0;
|
||||
for(; i != stream.end(); ++i) {
|
||||
auto doc = *i;
|
||||
ASSERT_SUCCESS(doc);
|
||||
if( i.current_index() != count) {
|
||||
std::cout << "index:" << i.current_index() << std::endl;
|
||||
std::cout << "expected index:" << count << std::endl;
|
||||
return false;
|
||||
}
|
||||
count += base.size();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
bool small_window() {
|
||||
std::cout << "Running " << __func__ << std::endl;
|
||||
auto json = R"({"error":[],"result":{"token":"xxx"}}{"error":[],"result":{"token":"xxx"}})"_padded;
|
||||
simdjson::dom::parser parser;
|
||||
size_t count = 0;
|
||||
size_t window_size = 10; // deliberately too small
|
||||
simdjson::dom::document_stream stream;
|
||||
ASSERT_SUCCESS( parser.parse_many(json, window_size).get(stream) );
|
||||
for (auto doc : stream) {
|
||||
if (!doc.error()) {
|
||||
std::cerr << "Expected a capacity error " << doc.error() << std::endl;
|
||||
return false;
|
||||
}
|
||||
count++;
|
||||
}
|
||||
if(count == 2) {
|
||||
std::cerr << "Expected a capacity error " << std::endl;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool large_window() {
|
||||
std::cout << "Running " << __func__ << std::endl;
|
||||
#if SIZE_MAX > 17179869184
|
||||
auto json = R"({"error":[],"result":{"token":"xxx"}}{"error":[],"result":{"token":"xxx"}})"_padded;
|
||||
simdjson::dom::parser parser;
|
||||
size_t count = 0;
|
||||
uint64_t window_size{17179869184}; // deliberately too big
|
||||
simdjson::dom::document_stream stream;
|
||||
ASSERT_SUCCESS( parser.parse_many(json, size_t(window_size)).get(stream) );
|
||||
for (auto doc : stream) {
|
||||
if (!doc.error()) {
|
||||
std::cerr << "I expected a failure (too big) but got " << doc.error() << std::endl;
|
||||
return false;
|
||||
}
|
||||
count++;
|
||||
}
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
static bool parse_json_message_issue467(simdjson::padded_string &json, size_t expectedcount) {
|
||||
simdjson::dom::parser parser;
|
||||
size_t count = 0;
|
||||
simdjson::dom::document_stream stream;
|
||||
ASSERT_SUCCESS( parser.parse_many(json).get(stream) );
|
||||
for (auto doc : stream) {
|
||||
if (doc.error()) {
|
||||
std::cerr << "Failed with simdjson error= " << doc.error() << std::endl;
|
||||
return false;
|
||||
}
|
||||
count++;
|
||||
}
|
||||
if(count != expectedcount) {
|
||||
std::cerr << "bad count" << std::endl;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool json_issue467() {
|
||||
std::cout << "Running " << __func__ << std::endl;
|
||||
auto single_message = R"({"error":[],"result":{"token":"xxx"}})"_padded;
|
||||
auto two_messages = R"({"error":[],"result":{"token":"xxx"}}{"error":[],"result":{"token":"xxx"}})"_padded;
|
||||
|
||||
if(!parse_json_message_issue467(single_message, 1)) {
|
||||
return false;
|
||||
}
|
||||
if(!parse_json_message_issue467(two_messages, 2)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// returns true if successful
|
||||
bool document_stream_test() {
|
||||
std::cout << "Running " << __func__ << std::endl;
|
||||
fflush(NULL);
|
||||
const size_t n_records = 10000;
|
||||
std::string data;
|
||||
char buf[1024];
|
||||
for (size_t i = 0; i < n_records; ++i) {
|
||||
size_t n = snprintf(buf,
|
||||
sizeof(buf),
|
||||
"{\"id\": %zu, \"name\": \"name%zu\", \"gender\": \"%s\", "
|
||||
"\"ete\": {\"id\": %zu, \"name\": \"eventail%zu\"}}",
|
||||
i, i, (i % 2) ? "homme" : "femme", i % 10, i % 10);
|
||||
if (n >= sizeof(buf)) { abort(); }
|
||||
data += std::string(buf, n);
|
||||
}
|
||||
for(size_t batch_size = 1000; batch_size < 2000; batch_size += (batch_size>1050?10:1)) {
|
||||
printf(".");
|
||||
fflush(NULL);
|
||||
simdjson::padded_string str(data);
|
||||
simdjson::dom::parser parser;
|
||||
size_t count = 0;
|
||||
simdjson::dom::document_stream stream;
|
||||
ASSERT_SUCCESS( parser.parse_many(str, batch_size).get(stream) );
|
||||
for (auto doc : stream) {
|
||||
int64_t keyid;
|
||||
ASSERT_SUCCESS( doc["id"].get(keyid) );
|
||||
ASSERT_EQUAL( keyid, int64_t(count) );
|
||||
|
||||
count++;
|
||||
}
|
||||
if(count != n_records) {
|
||||
printf("Found wrong number of documents %zd, expected %zd at batch size %zu\n", count, n_records, batch_size);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
printf("ok\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
// returns true if successful
|
||||
bool document_stream_utf8_test() {
|
||||
std::cout << "Running " << __func__ << std::endl;
|
||||
fflush(NULL);
|
||||
const size_t n_records = 10000;
|
||||
std::string data;
|
||||
char buf[1024];
|
||||
for (size_t i = 0; i < n_records; ++i) {
|
||||
size_t n = snprintf(buf,
|
||||
sizeof(buf),
|
||||
"{\"id\": %zu, \"name\": \"name%zu\", \"gender\": \"%s\", "
|
||||
"\"\xC3\xA9t\xC3\xA9\": {\"id\": %zu, \"name\": \"\xC3\xA9ventail%zu\"}}",
|
||||
i, i, (i % 2) ? "\xE2\xBA\x83" : "\xE2\xBA\x95", i % 10, i % 10);
|
||||
if (n >= sizeof(buf)) { abort(); }
|
||||
data += std::string(buf, n);
|
||||
}
|
||||
for(size_t batch_size = 1000; batch_size < 2000; batch_size += (batch_size>1050?10:1)) {
|
||||
printf(".");
|
||||
fflush(NULL);
|
||||
simdjson::padded_string str(data);
|
||||
simdjson::dom::parser parser;
|
||||
size_t count = 0;
|
||||
simdjson::dom::document_stream stream;
|
||||
ASSERT_SUCCESS( parser.parse_many(str, batch_size).get(stream) );
|
||||
for (auto doc : stream) {
|
||||
int64_t keyid;
|
||||
ASSERT_SUCCESS( doc["id"].get(keyid) );
|
||||
ASSERT_EQUAL( keyid, int64_t(count) );
|
||||
|
||||
count++;
|
||||
}
|
||||
ASSERT_EQUAL( count, n_records )
|
||||
}
|
||||
printf("ok\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
bool run() {
|
||||
return test_current_index() &&
|
||||
small_window() &&
|
||||
large_window() &&
|
||||
json_issue467() &&
|
||||
document_stream_test() &&
|
||||
document_stream_utf8_test();
|
||||
}
|
||||
}
|
||||
|
||||
namespace parse_api_tests {
|
||||
using namespace std;
|
||||
|
@ -1972,9 +1592,7 @@ int main(int argc, char *argv[]) {
|
|||
dom_api_tests::run() &&
|
||||
type_tests::run() &&
|
||||
format_tests::run() &&
|
||||
document_tests::run() &&
|
||||
number_tests::run() &&
|
||||
document_stream_tests::run()
|
||||
number_tests::run()
|
||||
) {
|
||||
std::cout << "Basic tests are ok." << std::endl;
|
||||
return EXIT_SUCCESS;
|
||||
|
|
|
@ -0,0 +1,245 @@
|
|||
#include <string>
|
||||
#include <vector>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "simdjson.h"
|
||||
#include "test_macros.h"
|
||||
|
||||
namespace document_stream_tests {
|
||||
static simdjson::dom::document_stream parse_many_stream_return(simdjson::dom::parser &parser, simdjson::padded_string &str) {
|
||||
simdjson::dom::document_stream stream;
|
||||
UNUSED auto error = parser.parse_many(str).get(stream);
|
||||
return stream;
|
||||
}
|
||||
// this is a compilation test
|
||||
UNUSED static void parse_many_stream_assign() {
|
||||
simdjson::dom::parser parser;
|
||||
simdjson::padded_string str("{}",2);
|
||||
simdjson::dom::document_stream s1 = parse_many_stream_return(parser, str);
|
||||
}
|
||||
bool test_current_index() {
|
||||
std::cout << "Running " << __func__ << std::endl;
|
||||
std::string base("1 ");// one JSON!
|
||||
std::string json;
|
||||
for(size_t k = 0; k < 1000; k++) {
|
||||
json += base;
|
||||
}
|
||||
simdjson::dom::parser parser;
|
||||
const size_t window = 32; // deliberately small
|
||||
simdjson::dom::document_stream stream;
|
||||
ASSERT_SUCCESS( parser.parse_many(json,window).get(stream) );
|
||||
auto i = stream.begin();
|
||||
size_t count = 0;
|
||||
for(; i != stream.end(); ++i) {
|
||||
auto doc = *i;
|
||||
ASSERT_SUCCESS(doc);
|
||||
if( i.current_index() != count) {
|
||||
std::cout << "index:" << i.current_index() << std::endl;
|
||||
std::cout << "expected index:" << count << std::endl;
|
||||
return false;
|
||||
}
|
||||
count += base.size();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
bool small_window() {
|
||||
std::cout << "Running " << __func__ << std::endl;
|
||||
auto json = R"({"error":[],"result":{"token":"xxx"}}{"error":[],"result":{"token":"xxx"}})"_padded;
|
||||
simdjson::dom::parser parser;
|
||||
size_t count = 0;
|
||||
size_t window_size = 10; // deliberately too small
|
||||
simdjson::dom::document_stream stream;
|
||||
ASSERT_SUCCESS( parser.parse_many(json, window_size).get(stream) );
|
||||
for (auto doc : stream) {
|
||||
if (!doc.error()) {
|
||||
std::cerr << "Expected a capacity error " << doc.error() << std::endl;
|
||||
return false;
|
||||
}
|
||||
count++;
|
||||
}
|
||||
if(count == 2) {
|
||||
std::cerr << "Expected a capacity error " << std::endl;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool large_window() {
|
||||
std::cout << "Running " << __func__ << std::endl;
|
||||
#if SIZE_MAX > 17179869184
|
||||
auto json = R"({"error":[],"result":{"token":"xxx"}}{"error":[],"result":{"token":"xxx"}})"_padded;
|
||||
simdjson::dom::parser parser;
|
||||
size_t count = 0;
|
||||
uint64_t window_size{17179869184}; // deliberately too big
|
||||
simdjson::dom::document_stream stream;
|
||||
ASSERT_SUCCESS( parser.parse_many(json, size_t(window_size)).get(stream) );
|
||||
for (auto doc : stream) {
|
||||
if (!doc.error()) {
|
||||
std::cerr << "I expected a failure (too big) but got " << doc.error() << std::endl;
|
||||
return false;
|
||||
}
|
||||
count++;
|
||||
}
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
static bool parse_json_message_issue467(simdjson::padded_string &json, size_t expectedcount) {
|
||||
simdjson::dom::parser parser;
|
||||
size_t count = 0;
|
||||
simdjson::dom::document_stream stream;
|
||||
ASSERT_SUCCESS( parser.parse_many(json).get(stream) );
|
||||
for (auto doc : stream) {
|
||||
if (doc.error()) {
|
||||
std::cerr << "Failed with simdjson error= " << doc.error() << std::endl;
|
||||
return false;
|
||||
}
|
||||
count++;
|
||||
}
|
||||
if(count != expectedcount) {
|
||||
std::cerr << "bad count" << std::endl;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool json_issue467() {
|
||||
std::cout << "Running " << __func__ << std::endl;
|
||||
auto single_message = R"({"error":[],"result":{"token":"xxx"}})"_padded;
|
||||
auto two_messages = R"({"error":[],"result":{"token":"xxx"}}{"error":[],"result":{"token":"xxx"}})"_padded;
|
||||
|
||||
if(!parse_json_message_issue467(single_message, 1)) {
|
||||
return false;
|
||||
}
|
||||
if(!parse_json_message_issue467(two_messages, 2)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// returns true if successful
|
||||
bool document_stream_test() {
|
||||
std::cout << "Running " << __func__ << std::endl;
|
||||
fflush(NULL);
|
||||
const size_t n_records = 10000;
|
||||
std::string data;
|
||||
char buf[1024];
|
||||
for (size_t i = 0; i < n_records; ++i) {
|
||||
size_t n = snprintf(buf,
|
||||
sizeof(buf),
|
||||
"{\"id\": %zu, \"name\": \"name%zu\", \"gender\": \"%s\", "
|
||||
"\"ete\": {\"id\": %zu, \"name\": \"eventail%zu\"}}",
|
||||
i, i, (i % 2) ? "homme" : "femme", i % 10, i % 10);
|
||||
if (n >= sizeof(buf)) { abort(); }
|
||||
data += std::string(buf, n);
|
||||
}
|
||||
for(size_t batch_size = 1000; batch_size < 2000; batch_size += (batch_size>1050?10:1)) {
|
||||
printf(".");
|
||||
fflush(NULL);
|
||||
simdjson::padded_string str(data);
|
||||
simdjson::dom::parser parser;
|
||||
size_t count = 0;
|
||||
simdjson::dom::document_stream stream;
|
||||
ASSERT_SUCCESS( parser.parse_many(str, batch_size).get(stream) );
|
||||
for (auto doc : stream) {
|
||||
int64_t keyid;
|
||||
ASSERT_SUCCESS( doc["id"].get(keyid) );
|
||||
ASSERT_EQUAL( keyid, int64_t(count) );
|
||||
|
||||
count++;
|
||||
}
|
||||
if(count != n_records) {
|
||||
printf("Found wrong number of documents %zd, expected %zd at batch size %zu\n", count, n_records, batch_size);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
printf("ok\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
// returns true if successful
|
||||
bool document_stream_utf8_test() {
|
||||
std::cout << "Running " << __func__ << std::endl;
|
||||
fflush(NULL);
|
||||
const size_t n_records = 10000;
|
||||
std::string data;
|
||||
char buf[1024];
|
||||
for (size_t i = 0; i < n_records; ++i) {
|
||||
size_t n = snprintf(buf,
|
||||
sizeof(buf),
|
||||
"{\"id\": %zu, \"name\": \"name%zu\", \"gender\": \"%s\", "
|
||||
"\"\xC3\xA9t\xC3\xA9\": {\"id\": %zu, \"name\": \"\xC3\xA9ventail%zu\"}}",
|
||||
i, i, (i % 2) ? "\xE2\xBA\x83" : "\xE2\xBA\x95", i % 10, i % 10);
|
||||
if (n >= sizeof(buf)) { abort(); }
|
||||
data += std::string(buf, n);
|
||||
}
|
||||
for(size_t batch_size = 1000; batch_size < 2000; batch_size += (batch_size>1050?10:1)) {
|
||||
printf(".");
|
||||
fflush(NULL);
|
||||
simdjson::padded_string str(data);
|
||||
simdjson::dom::parser parser;
|
||||
size_t count = 0;
|
||||
simdjson::dom::document_stream stream;
|
||||
ASSERT_SUCCESS( parser.parse_many(str, batch_size).get(stream) );
|
||||
for (auto doc : stream) {
|
||||
int64_t keyid;
|
||||
ASSERT_SUCCESS( doc["id"].get(keyid) );
|
||||
ASSERT_EQUAL( keyid, int64_t(count) );
|
||||
|
||||
count++;
|
||||
}
|
||||
ASSERT_EQUAL( count, n_records )
|
||||
}
|
||||
printf("ok\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
bool run() {
|
||||
return test_current_index() &&
|
||||
small_window() &&
|
||||
large_window() &&
|
||||
json_issue467() &&
|
||||
document_stream_test() &&
|
||||
document_stream_utf8_test();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
std::cout << std::unitbuf;
|
||||
int c;
|
||||
while ((c = getopt(argc, argv, "a:")) != -1) {
|
||||
switch (c) {
|
||||
case 'a': {
|
||||
const simdjson::implementation *impl = simdjson::available_implementations[optarg];
|
||||
if (!impl) {
|
||||
fprintf(stderr, "Unsupported architecture value -a %s\n", optarg);
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
simdjson::active_implementation = impl;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
fprintf(stderr, "Unexpected argument %c\n", c);
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
// this is put here deliberately to check that the documentation is correct (README),
|
||||
// should this fail to compile, you should update the documentation:
|
||||
if (simdjson::active_implementation->name() == "unsupported") {
|
||||
printf("unsupported CPU\n");
|
||||
}
|
||||
// We want to know what we are testing.
|
||||
std::cout << "Running tests against this implementation: " << simdjson::active_implementation->name();
|
||||
std::cout << "(" << simdjson::active_implementation->description() << ")" << std::endl;
|
||||
std::cout << "------------------------------------------------------------" << std::endl;
|
||||
|
||||
std::cout << "Running document_stream tests." << std::endl;
|
||||
if (document_stream_tests::run()) {
|
||||
std::cout << "document_stream tests are ok." << std::endl;
|
||||
return EXIT_SUCCESS;
|
||||
} else {
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,231 @@
|
|||
#include <string>
|
||||
#include <vector>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "simdjson.h"
|
||||
#include "test_macros.h"
|
||||
|
||||
namespace document_tests {
|
||||
bool issue938() {
|
||||
std::vector<std::string> json_strings{"[true,false]", "[1,2,3,null]",
|
||||
R"({"yay":"json!"})"};
|
||||
simdjson::dom::parser parser1;
|
||||
for (simdjson::padded_string str : json_strings) {
|
||||
simdjson::dom::element element;
|
||||
ASSERT_SUCCESS( parser1.parse(str).get(element) );
|
||||
std::cout << element << std::endl;
|
||||
}
|
||||
std::vector<std::string> file_paths{
|
||||
ADVERSARIAL_JSON, FLATADVERSARIAL_JSON, DEMO_JSON,
|
||||
TWITTER_TIMELINE_JSON, REPEAT_JSON, SMALLDEMO_JSON,
|
||||
TRUENULL_JSON};
|
||||
for (auto path : file_paths) {
|
||||
simdjson::dom::parser parser2;
|
||||
simdjson::dom::element element;
|
||||
std::cout << "file: " << path << std::endl;
|
||||
ASSERT_SUCCESS( parser2.load(path).get(element) );
|
||||
std::cout << element.type() << std::endl;
|
||||
}
|
||||
simdjson::dom::parser parser3;
|
||||
for (auto path : file_paths) {
|
||||
simdjson::dom::element element;
|
||||
std::cout << "file: " << path << std::endl;
|
||||
ASSERT_SUCCESS( parser3.load(path).get(element) );
|
||||
std::cout << element.type() << std::endl;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// adversarial example that once triggred overruns, see https://github.com/lemire/simdjson/issues/345
|
||||
bool bad_example() {
|
||||
std::cout << __func__ << std::endl;
|
||||
simdjson::padded_string badjson = "[7,7,7,7,6,7,7,7,6,7,7,6,[7,7,7,7,6,7,7,7,6,7,7,6,7,7,7,7,7,7,6"_padded;
|
||||
simdjson::dom::parser parser;
|
||||
ASSERT_ERROR( parser.parse(badjson), simdjson::TAPE_ERROR );
|
||||
return true;
|
||||
}
|
||||
bool count_array_example() {
|
||||
std::cout << __func__ << std::endl;
|
||||
simdjson::padded_string smalljson = "[1,2,3]"_padded;
|
||||
simdjson::dom::parser parser;
|
||||
simdjson::dom::array array;
|
||||
ASSERT_SUCCESS( parser.parse(smalljson).get(array) );
|
||||
ASSERT_EQUAL( array.size(), 3 );
|
||||
return true;
|
||||
}
|
||||
bool count_object_example() {
|
||||
std::cout << __func__ << std::endl;
|
||||
simdjson::padded_string smalljson = "{\"1\":1,\"2\":1,\"3\":1}"_padded;
|
||||
simdjson::dom::parser parser;
|
||||
simdjson::dom::object object;
|
||||
ASSERT_SUCCESS( parser.parse(smalljson).get(object) );
|
||||
ASSERT_EQUAL( object.size(), 3 );
|
||||
return true;
|
||||
}
|
||||
bool padded_with_open_bracket() {
|
||||
std::cout << __func__ << std::endl;
|
||||
simdjson::dom::parser parser;
|
||||
// This is an invalid document padded with open braces.
|
||||
ASSERT_ERROR( parser.parse("[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[", 2, false), simdjson::TAPE_ERROR);
|
||||
// This is a valid document padded with open braces.
|
||||
ASSERT_SUCCESS( parser.parse("[][[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[", 2, false) );
|
||||
return true;
|
||||
}
|
||||
// returns true if successful
|
||||
bool stable_test() {
|
||||
std::cout << __func__ << std::endl;
|
||||
simdjson::padded_string json = "{"
|
||||
"\"Image\":{"
|
||||
"\"Width\":800,"
|
||||
"\"Height\":600,"
|
||||
"\"Title\":\"View from 15th Floor\","
|
||||
"\"Thumbnail\":{"
|
||||
"\"Url\":\"http://www.example.com/image/481989943\","
|
||||
"\"Height\":125,"
|
||||
"\"Width\":100"
|
||||
"},"
|
||||
"\"Animated\":false,"
|
||||
"\"IDs\":[116,943.3,234,38793]"
|
||||
"}"
|
||||
"}"_padded;
|
||||
simdjson::dom::parser parser;
|
||||
std::ostringstream myStream;
|
||||
#if SIMDJSON_EXCEPTIONS
|
||||
myStream << parser.parse(json);
|
||||
#else
|
||||
simdjson::dom::element doc;
|
||||
UNUSED auto error = parser.parse(json).get(doc);
|
||||
myStream << doc;
|
||||
#endif
|
||||
std::string newjson = myStream.str();
|
||||
if(static_cast<std::string>(json) != newjson) {
|
||||
std::cout << "serialized json differs!" << std::endl;
|
||||
std::cout << static_cast<std::string>(json) << std::endl;
|
||||
std::cout << newjson << std::endl;
|
||||
}
|
||||
return newjson == static_cast<std::string>(json);
|
||||
}
|
||||
// returns true if successful
|
||||
bool skyprophet_test() {
|
||||
std::cout << "Running " << __func__ << std::endl;
|
||||
const size_t n_records = 100000;
|
||||
std::vector<std::string> data;
|
||||
char buf[1024];
|
||||
for (size_t i = 0; i < n_records; ++i) {
|
||||
size_t n = snprintf(buf, sizeof(buf),
|
||||
"{\"id\": %zu, \"name\": \"name%zu\", \"gender\": \"%s\", "
|
||||
"\"school\": {\"id\": %zu, \"name\": \"school%zu\"}}",
|
||||
i, i, (i % 2) ? "male" : "female", i % 10, i % 10);
|
||||
if (n >= sizeof(buf)) { abort(); }
|
||||
data.emplace_back(std::string(buf, n));
|
||||
}
|
||||
for (size_t i = 0; i < n_records; ++i) {
|
||||
size_t n = snprintf(buf, sizeof(buf), "{\"counter\": %f, \"array\": [%s]}", static_cast<double>(i) * 3.1416,
|
||||
(i % 2) ? "true" : "false");
|
||||
if (n >= sizeof(buf)) { abort(); }
|
||||
data.emplace_back(std::string(buf, n));
|
||||
}
|
||||
for (size_t i = 0; i < n_records; ++i) {
|
||||
size_t n = snprintf(buf, sizeof(buf), "{\"number\": %e}", static_cast<double>(i) * 10000.31321321);
|
||||
if (n >= sizeof(buf)) { abort(); }
|
||||
data.emplace_back(std::string(buf, n));
|
||||
}
|
||||
data.emplace_back(std::string("true"));
|
||||
data.emplace_back(std::string("false"));
|
||||
data.emplace_back(std::string("null"));
|
||||
data.emplace_back(std::string("0.1"));
|
||||
size_t maxsize = 0;
|
||||
for (auto &s : data) {
|
||||
if (maxsize < s.size())
|
||||
maxsize = s.size();
|
||||
}
|
||||
simdjson::dom::parser parser;
|
||||
size_t counter = 0;
|
||||
for (auto &rec : data) {
|
||||
if ((counter % 10000) == 0) {
|
||||
printf(".");
|
||||
fflush(NULL);
|
||||
}
|
||||
counter++;
|
||||
auto error = parser.parse(rec.c_str(), rec.length()).error();
|
||||
if (error != simdjson::error_code::SUCCESS) {
|
||||
printf("Something is wrong in skyprophet_test: %s.\n", rec.c_str());
|
||||
printf("Parsing failed. Error is %s\n", simdjson::error_message(error));
|
||||
return false;
|
||||
}
|
||||
error = parser.parse(rec.c_str(), rec.length()).error();
|
||||
if (error != simdjson::error_code::SUCCESS) {
|
||||
printf("Something is wrong in skyprophet_test: %s.\n", rec.c_str());
|
||||
printf("Parsing failed. Error is %s\n", simdjson::error_message(error));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
printf("\n");
|
||||
return true;
|
||||
}
|
||||
bool lots_of_brackets() {
|
||||
std::string input;
|
||||
for(size_t i = 0; i < 200; i++) {
|
||||
input += "[";
|
||||
}
|
||||
for(size_t i = 0; i < 200; i++) {
|
||||
input += "]";
|
||||
}
|
||||
simdjson::dom::parser parser;
|
||||
auto error = parser.parse(input).error();
|
||||
if (error) { std::cerr << "Error: " << simdjson::error_message(error) << std::endl; return false; }
|
||||
return true;
|
||||
}
|
||||
bool run() {
|
||||
return issue938() &&
|
||||
padded_with_open_bracket() &&
|
||||
bad_example() &&
|
||||
count_array_example() &&
|
||||
count_object_example() &&
|
||||
stable_test() &&
|
||||
skyprophet_test() &&
|
||||
lots_of_brackets();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
std::cout << std::unitbuf;
|
||||
int c;
|
||||
while ((c = getopt(argc, argv, "a:")) != -1) {
|
||||
switch (c) {
|
||||
case 'a': {
|
||||
const simdjson::implementation *impl = simdjson::available_implementations[optarg];
|
||||
if (!impl) {
|
||||
fprintf(stderr, "Unsupported architecture value -a %s\n", optarg);
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
simdjson::active_implementation = impl;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
fprintf(stderr, "Unexpected argument %c\n", c);
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
// this is put here deliberately to check that the documentation is correct (README),
|
||||
// should this fail to compile, you should update the documentation:
|
||||
if (simdjson::active_implementation->name() == "unsupported") {
|
||||
printf("unsupported CPU\n");
|
||||
}
|
||||
// We want to know what we are testing.
|
||||
std::cout << "Running tests against this implementation: " << simdjson::active_implementation->name();
|
||||
std::cout << "(" << simdjson::active_implementation->description() << ")" << std::endl;
|
||||
std::cout << "------------------------------------------------------------" << std::endl;
|
||||
|
||||
std::cout << "Running document tests." << std::endl;
|
||||
if (document_tests::run()) {
|
||||
std::cout << "document tests are ok." << std::endl;
|
||||
return EXIT_SUCCESS;
|
||||
} else {
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue