#include "simdjson.h" #include "test_ondemand.h" using namespace simdjson; namespace document_stream_tests { std::string my_string(ondemand::document& doc) { std::stringstream ss; ss << doc; return ss.str(); } bool simple_document_iteration() { TEST_START(); auto json = R"([1,[1,2]] {"a":1,"b":2} {"o":{"1":1,"2":2}} [1,2,3])"_padded; ondemand::parser parser; ondemand::document_stream stream; ASSERT_SUCCESS(parser.iterate_many(json).get(stream)); std::string_view expected[4] = {"[1,[1,2]]", "{\"a\":1,\"b\":2}", "{\"o\":{\"1\":1,\"2\":2}}", "[1,2,3]"}; size_t counter{0}; for(auto & doc : stream) { ASSERT_TRUE(counter < 4); ASSERT_EQUAL(my_string(doc), expected[counter++]); } ASSERT_EQUAL(counter, 4); TEST_SUCCEED(); } bool simple_document_iteration_multiple_batches() { TEST_START(); auto json = R"([1,[1,2]] {"a":1,"b":2} {"o":{"1":1,"2":2}} [1,2,3])"_padded; ondemand::parser parser; ondemand::document_stream stream; ASSERT_SUCCESS(parser.iterate_many(json,32).get(stream)); std::string_view expected[4] = {"[1,[1,2]]", "{\"a\":1,\"b\":2}", "{\"o\":{\"1\":1,\"2\":2}}", "[1,2,3]"}; size_t counter{0}; for(auto i = stream.begin(); i != stream.end(); ++i) { ASSERT_TRUE(counter < 4); ASSERT_EQUAL(i.source(), expected[counter++]); } ASSERT_EQUAL(counter, 4); TEST_SUCCEED(); } bool simple_document_iteration_with_parsing() { TEST_START(); auto json = R"([1,[1,2]] {"a":1,"b":2} {"o":{"1":1,"2":2}} [1,2,3])"_padded; ondemand::parser parser; ondemand::document_stream stream; ASSERT_SUCCESS(parser.iterate_many(json).get(stream)); std::string_view expected[4] = {"[1,[1,2]]", "{\"a\":1,\"b\":2}", "{\"o\":{\"1\":1,\"2\":2}}", "[1,2,3]"}; size_t counter{0}; auto i = stream.begin(); int64_t x; ASSERT_EQUAL(i.source(),expected[counter++]); ASSERT_SUCCESS( (*i).at_pointer("/1/1").get(x) ); ASSERT_EQUAL(x,2); ++i; ASSERT_EQUAL(i.source(),expected[counter++]); ASSERT_SUCCESS( (*i).find_field("a").get(x) ); ASSERT_EQUAL(x,1); ++i; ASSERT_EQUAL(i.source(),expected[counter++]); ASSERT_SUCCESS( (*i).at_pointer("/o/2").get(x) ); ASSERT_EQUAL(x,2); ++i; ASSERT_EQUAL(i.source(),expected[counter++]); ASSERT_SUCCESS( (*i).at_pointer("/2").get(x) ); ASSERT_EQUAL(x,3); ++i; if (i != stream.end()) { return false; } TEST_SUCCEED(); } bool atoms_json() { TEST_START(); auto json = R"(5 true 20.3 "string" )"_padded; ondemand::parser parser; ondemand::document_stream stream; ASSERT_SUCCESS(parser.iterate_many(json).get(stream)); std::string_view expected[4] = {"5", "true", "20.3", "\"string\""}; size_t counter{0}; for (auto i = stream.begin(); i != stream.end(); ++i) { ASSERT_EQUAL(i.source(), expected[counter++]); } ASSERT_EQUAL(counter,4); TEST_SUCCEED(); } bool doc_index() { TEST_START(); auto json = R"({"z":5} {"1":1,"2":2,"4":4} [7, 10, 9] [15, 11, 12, 13] [154, 110, 112, 1311])"_padded; std::string_view expected[5] = {R"({"z":5})",R"({"1":1,"2":2,"4":4})","[7, 10, 9]","[15, 11, 12, 13]","[154, 110, 112, 1311]"}; size_t expected_indexes[5] = {0, 9, 29, 44, 65}; ondemand::parser parser; ondemand::document_stream stream; size_t counter{0}; ASSERT_SUCCESS(parser.iterate_many(json).get(stream)); for(auto i = stream.begin(); i != stream.end(); ++i) { ASSERT_TRUE(counter < 5); ASSERT_EQUAL(i.current_index(), expected_indexes[counter]); ASSERT_EQUAL(i.source(), expected[counter]); counter++; } ASSERT_EQUAL(counter, 5); TEST_SUCCEED(); } bool doc_index_multiple_batches() { TEST_START(); auto json = R"({"z":5} {"1":1,"2":2,"4":4} [7, 10, 9] [15, 11, 12, 13] [154, 110, 112, 1311])"_padded; std::string_view expected[5] = {R"({"z":5})",R"({"1":1,"2":2,"4":4})","[7, 10, 9]","[15, 11, 12, 13]","[154, 110, 112, 1311]"}; size_t expected_indexes[5] = {0, 9, 29, 44, 65}; ondemand::parser parser; ondemand::document_stream stream; size_t counter{0}; ASSERT_SUCCESS(parser.iterate_many(json,32).get(stream)); for(auto i = stream.begin(); i != stream.end(); ++i) { ASSERT_TRUE(counter < 5); ASSERT_EQUAL(i.current_index(), expected_indexes[counter]); ASSERT_EQUAL(i.source(), expected[counter]); counter++; } ASSERT_EQUAL(counter, 5); TEST_SUCCEED(); } bool source_test() { TEST_START(); auto json = R"([1,[1,2]] {"a":1,"b":2} {"o":{"1":1,"2":2}} [1,2,3] )"_padded; ondemand::parser parser; ondemand::document_stream stream; ASSERT_SUCCESS(parser.iterate_many(json).get(stream)); std::string_view expected[4] = {"[1,[1,2]]", "{\"a\":1,\"b\":2}", "{\"o\":{\"1\":1,\"2\":2}}", "[1,2,3]"}; size_t counter{0}; for (auto i = stream.begin(); i != stream.end(); ++i) { ASSERT_EQUAL(i.source(), expected[counter++]); } ASSERT_EQUAL(counter,4); TEST_SUCCEED(); } bool truncated() { TEST_START(); // The last JSON document is intentionally truncated. auto json = R"([1,2,3] {"1":1,"2":3,"4":4} [1,2 )"_padded; ondemand::parser parser; ondemand::document_stream stream; ASSERT_SUCCESS(parser.iterate_many(json).get(stream)); size_t counter{0}; for (auto i = stream.begin(); i != stream.end(); ++i) { counter++; } size_t truncated = stream.truncated_bytes(); ASSERT_EQUAL(truncated, 6); ASSERT_EQUAL(counter,2); TEST_SUCCEED(); } bool truncated_complete_docs() { TEST_START(); auto json = R"([1,2,3] {"1":1,"2":3,"4":4} [1,2] )"_padded; ondemand::parser parser; ondemand::document_stream stream; ASSERT_SUCCESS(parser.iterate_many(json).get(stream)); size_t counter{0}; for (auto i = stream.begin(); i != stream.end(); ++i) { counter++; } size_t truncated = stream.truncated_bytes(); ASSERT_EQUAL(truncated, 0); ASSERT_EQUAL(counter,3); TEST_SUCCEED(); } bool truncated_unclosed_string() { TEST_START(); // The last JSON document is intentionally truncated. auto json = R"([1,2,3] {"1":1,"2":3,"4":4} "intentionally unclosed string )"_padded; ondemand::parser parser; ondemand::document_stream stream; // We use a window of json.size() though any large value would do. ASSERT_SUCCESS( parser.iterate_many(json).get(stream) ); size_t counter{0}; for(auto i = stream.begin(); i != stream.end(); ++i) { counter++; } size_t truncated = stream.truncated_bytes(); ASSERT_EQUAL(counter,2); ASSERT_EQUAL(truncated,32); TEST_SUCCEED(); } bool truncated_unclosed_string_in_object() { // The last JSON document is intentionally truncated. auto json = R"([1,2,3] {"1":1,"2":3,"4":4} {"key":"intentionally unclosed string )"_padded; ondemand::parser parser; ondemand::document_stream stream; ASSERT_SUCCESS( parser.iterate_many(json).get(stream) ); size_t counter{0}; for(auto i = stream.begin(); i != stream.end(); ++i) { counter++; } size_t truncated = stream.truncated_bytes(); ASSERT_EQUAL(counter,2); ASSERT_EQUAL(truncated,39); TEST_SUCCEED(); } bool small_window() { TEST_START(); std::vector input; input.push_back('['); for(size_t i = 1; i < 1024; i++) { input.push_back('1'); input.push_back(i < 1023 ? ',' : ']'); } auto json = simdjson::padded_string(input.data(),input.size()); ondemand::parser parser; size_t window_size{1024}; // deliberately too small ondemand::document_stream stream; ASSERT_SUCCESS( parser.iterate_many(json, window_size).get(stream) ); auto i = stream.begin(); ASSERT_ERROR(i.error(), CAPACITY); TEST_SUCCEED(); } bool large_window() { TEST_START(); #if SIZE_MAX > 17179869184 auto json = R"({"error":[],"result":{"token":"xxx"}}{"error":[],"result":{"token":"xxx"}})"_padded; ondemand::parser parser; uint64_t window_size{17179869184}; // deliberately too big ondemand::document_stream stream; ASSERT_SUCCESS( parser.iterate_many(json, size_t(window_size)).get(stream) ); auto i = stream.begin(); ASSERT_ERROR(i.error(),CAPACITY); #endif TEST_SUCCEED(); } bool test_leading_spaces() { TEST_START(); auto input = R"( [1,1] [1,2] [1,3] [1,4] [1,5] [1,6] [1,7] [1,8] [1,9] [1,10] [1,11] [1,12] [1,13] [1,14] [1,15] )"_padded;; size_t count{0}; ondemand::parser parser; ondemand::document_stream stream; ASSERT_SUCCESS(parser.iterate_many(input, 32).get(stream)); for(auto i = stream.begin(); i != stream.end(); ++i) { ASSERT_SUCCESS(i.error()); count++; } ASSERT_EQUAL(count,15); TEST_SUCCEED(); } bool test_crazy_leading_spaces() { TEST_START(); auto input = R"( [1,1] [1,2] [1,3] [1,4] [1,5] [1,6] [1,7] [1,8] [1,9] [1,10] [1,11] [1,12] [1,13] [1,14] [1,15] )"_padded;; size_t count{0}; ondemand::parser parser; ondemand::document_stream stream; ASSERT_SUCCESS(parser.iterate_many(input, 32).get(stream)); for(auto i = stream.begin(); i != stream.end(); ++i) { ASSERT_SUCCESS(i.error()); count++; } ASSERT_EQUAL(count,15); TEST_SUCCEED(); } bool adversarial_single_document() { TEST_START(); auto json = R"({"f[)"_padded; ondemand::parser parser; ondemand::document_stream stream; ASSERT_SUCCESS(parser.iterate_many(json).get(stream)); size_t count{0}; for (auto & doc : stream) { (void)doc; count++; } ASSERT_EQUAL(count,0); TEST_SUCCEED(); } bool adversarial_single_document_array() { TEST_START(); auto json = R"(["this is an unclosed string ])"_padded; ondemand::parser parser; ondemand::document_stream stream; ASSERT_SUCCESS(parser.iterate_many(json).get(stream)); size_t count{0}; for (auto & doc : stream) { (void)doc; count++; } ASSERT_EQUAL(count,0); TEST_SUCCEED(); } bool document_stream_test() { TEST_START(); fflush(NULL); const size_t n_records = 10000; std::string data; std::vector buf(1024); // Generating data for (size_t i = 0; i < n_records; ++i) { size_t n = snprintf(buf.data(), buf.size(), "{\"id\": %zu, \"name\": \"name%zu\", \"gender\": \"%s\", " "\"ete\": {\"id\": %zu, \"name\": \"eventail%zu\"}}", i, i, (i % 2) ? "homme" : "femme", i % 10, i % 10); if (n >= buf.size()) { abort(); } data += std::string(buf.data(), n); } for(size_t batch_size = 1000; batch_size < 2000; batch_size += (batch_size>1050?10:1)) { fflush(NULL); simdjson::padded_string str(data); ondemand::parser parser; ondemand::document_stream stream; size_t count{0}; ASSERT_SUCCESS( parser.iterate_many(str, batch_size).get(stream) ); for (auto & doc : stream) { int64_t keyid; ASSERT_SUCCESS( doc["id"].get(keyid) ); ASSERT_EQUAL( keyid, int64_t(count) ); count++; } ASSERT_EQUAL(count,n_records); } TEST_SUCCEED(); } bool document_stream_utf8_test() { TEST_START(); fflush(NULL); const size_t n_records = 10000; std::string data; std::vector buf(1024); // Generating data for (size_t i = 0; i < n_records; ++i) { size_t n = snprintf(buf.data(), buf.size(), "{\"id\": %zu, \"name\": \"name%zu\", \"gender\": \"%s\", " "\"\xC3\xA9t\xC3\xA9\": {\"id\": %zu, \"name\": \"\xC3\xA9ventail%zu\"}}", i, i, (i % 2) ? "\xE2\xBA\x83" : "\xE2\xBA\x95", i % 10, i % 10); if (n >= buf.size()) { abort(); } data += std::string(buf.data(), n); } for(size_t batch_size = 1000; batch_size < 2000; batch_size += (batch_size>1050?10:1)) { fflush(NULL); simdjson::padded_string str(data); ondemand::parser parser; ondemand::document_stream stream; size_t count{0}; ASSERT_SUCCESS( parser.iterate_many(str, batch_size).get(stream) ); for (auto & doc : stream) { int64_t keyid; ASSERT_SUCCESS( doc["id"].get(keyid) ); ASSERT_EQUAL( keyid, int64_t(count) ); count++; } ASSERT_EQUAL( count, n_records ) } TEST_SUCCEED(); } bool stress_data_race() { TEST_START(); // Correct JSON. auto input = R"([1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] )"_padded;; ondemand::parser parser; ondemand::document_stream stream; ASSERT_SUCCESS(parser.iterate_many(input, 32).get(stream)); for(auto i = stream.begin(); i != stream.end(); ++i) { ASSERT_SUCCESS(i.error()); } TEST_SUCCEED(); } bool stress_data_race_with_error() { TEST_START(); #if SIMDJSON_THREAD_ENABLED std::cout << "ENABLED" << std::endl; #endif // Intentionally broken auto input = R"([1,23] [1,23] [1,23] [1,23 [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] [1,23] )"_padded; ondemand::parser parser; ondemand::document_stream stream; ASSERT_SUCCESS(parser.iterate_many(input, 32).get(stream)); size_t count{0}; for(auto i = stream.begin(); i != stream.end(); ++i) { auto error = i.error(); if(count <= 3) { ASSERT_SUCCESS(error); } else { ASSERT_ERROR(error,TAPE_ERROR); break; } count++; } TEST_SUCCEED(); } bool run() { return simple_document_iteration() && simple_document_iteration_multiple_batches() && simple_document_iteration_with_parsing() && atoms_json() && doc_index() && doc_index_multiple_batches() && source_test() && truncated() && truncated_complete_docs() && truncated_unclosed_string() && small_window() && large_window() && test_leading_spaces() && test_crazy_leading_spaces() && adversarial_single_document() && adversarial_single_document_array() && document_stream_test() && document_stream_utf8_test() && stress_data_race() && stress_data_race_with_error() && true; } } // document_stream_tests int main (int argc, char *argv[]) { return test_main(argc, argv, document_stream_tests::run); }