simdjson/tests/basictests.cpp

1989 lines
67 KiB
C++
Raw Normal View History

#include <cinttypes>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <iostream>
#include <string>
#include <vector>
#include <cmath>
2020-02-25 12:59:38 +08:00
#include <set>
#include <string_view>
#include <sstream>
#include <utility>
#include <ciso646>
#include <unistd.h>
#include "simdjson.h"
2020-06-20 02:48:13 +08:00
#include "cast_tester.h"
2020-06-20 04:27:00 +08:00
#include "test_macros.h"
2020-04-03 03:14:29 +08:00
const size_t AMAZON_CELLPHONES_NDJSON_DOC_COUNT = 793;
namespace number_tests {
// ulp distance
// Marc B. Reynolds, 2016-2019
// Public Domain under http://unlicense.org, see link for details.
// adapted by D. Lemire
inline uint64_t f64_ulp_dist(double a, double b) {
uint64_t ua, ub;
memcpy(&ua, &a, sizeof(ua));
memcpy(&ub, &b, sizeof(ub));
if ((int64_t)(ub ^ ua) >= 0)
return (int64_t)(ua - ub) >= 0 ? (ua - ub) : (ub - ua);
return ua + ub + 0x80000000;
}
2019-10-17 05:47:52 +08:00
bool small_integers() {
std::cout << __func__ << std::endl;
2020-03-29 02:43:41 +08:00
simdjson::dom::parser parser;
for (int m = 10; m < 20; m++) {
for (int i = -1024; i < 1024; i++) {
auto str = std::to_string(i);
int64_t actual;
ASSERT_SUCCESS(parser.parse(str).get(actual));
if (actual != i) {
std::cerr << "JSON '" << str << "' parsed to " << actual << " instead of " << i << std::endl;
return false;
}
}
}
return true;
}
bool powers_of_two() {
std::cout << __func__ << std::endl;
char buf[1024];
2020-03-29 02:43:41 +08:00
simdjson::dom::parser parser;
uint64_t maxulp = 0;
for (int i = -1075; i < 1024; ++i) {// large negative values should be zero.
double expected = pow(2, i);
2020-05-02 04:53:23 +08:00
size_t n = snprintf(buf, sizeof(buf), "%.*e", std::numeric_limits<double>::max_digits10 - 1, expected);
if (n >= sizeof(buf)) { abort(); }
fflush(NULL);
double actual;
auto error = parser.parse(buf, n).get(actual);
if (error) { std::cerr << error << std::endl; return false; }
uint64_t ulp = f64_ulp_dist(actual,expected);
if(ulp > maxulp) maxulp = ulp;
if(ulp > 0) {
std::cerr << "JSON '" << buf << " parsed to " << actual << " instead of " << expected << std::endl;
return false;
}
}
return true;
}
static const double testing_power_of_ten[] = {
1e-307, 1e-306, 1e-305, 1e-304, 1e-303, 1e-302, 1e-301, 1e-300, 1e-299,
1e-298, 1e-297, 1e-296, 1e-295, 1e-294, 1e-293, 1e-292, 1e-291, 1e-290,
1e-289, 1e-288, 1e-287, 1e-286, 1e-285, 1e-284, 1e-283, 1e-282, 1e-281,
1e-280, 1e-279, 1e-278, 1e-277, 1e-276, 1e-275, 1e-274, 1e-273, 1e-272,
1e-271, 1e-270, 1e-269, 1e-268, 1e-267, 1e-266, 1e-265, 1e-264, 1e-263,
1e-262, 1e-261, 1e-260, 1e-259, 1e-258, 1e-257, 1e-256, 1e-255, 1e-254,
1e-253, 1e-252, 1e-251, 1e-250, 1e-249, 1e-248, 1e-247, 1e-246, 1e-245,
1e-244, 1e-243, 1e-242, 1e-241, 1e-240, 1e-239, 1e-238, 1e-237, 1e-236,
1e-235, 1e-234, 1e-233, 1e-232, 1e-231, 1e-230, 1e-229, 1e-228, 1e-227,
1e-226, 1e-225, 1e-224, 1e-223, 1e-222, 1e-221, 1e-220, 1e-219, 1e-218,
1e-217, 1e-216, 1e-215, 1e-214, 1e-213, 1e-212, 1e-211, 1e-210, 1e-209,
1e-208, 1e-207, 1e-206, 1e-205, 1e-204, 1e-203, 1e-202, 1e-201, 1e-200,
1e-199, 1e-198, 1e-197, 1e-196, 1e-195, 1e-194, 1e-193, 1e-192, 1e-191,
1e-190, 1e-189, 1e-188, 1e-187, 1e-186, 1e-185, 1e-184, 1e-183, 1e-182,
1e-181, 1e-180, 1e-179, 1e-178, 1e-177, 1e-176, 1e-175, 1e-174, 1e-173,
1e-172, 1e-171, 1e-170, 1e-169, 1e-168, 1e-167, 1e-166, 1e-165, 1e-164,
1e-163, 1e-162, 1e-161, 1e-160, 1e-159, 1e-158, 1e-157, 1e-156, 1e-155,
1e-154, 1e-153, 1e-152, 1e-151, 1e-150, 1e-149, 1e-148, 1e-147, 1e-146,
1e-145, 1e-144, 1e-143, 1e-142, 1e-141, 1e-140, 1e-139, 1e-138, 1e-137,
1e-136, 1e-135, 1e-134, 1e-133, 1e-132, 1e-131, 1e-130, 1e-129, 1e-128,
1e-127, 1e-126, 1e-125, 1e-124, 1e-123, 1e-122, 1e-121, 1e-120, 1e-119,
1e-118, 1e-117, 1e-116, 1e-115, 1e-114, 1e-113, 1e-112, 1e-111, 1e-110,
1e-109, 1e-108, 1e-107, 1e-106, 1e-105, 1e-104, 1e-103, 1e-102, 1e-101,
1e-100, 1e-99, 1e-98, 1e-97, 1e-96, 1e-95, 1e-94, 1e-93, 1e-92,
1e-91, 1e-90, 1e-89, 1e-88, 1e-87, 1e-86, 1e-85, 1e-84, 1e-83,
1e-82, 1e-81, 1e-80, 1e-79, 1e-78, 1e-77, 1e-76, 1e-75, 1e-74,
1e-73, 1e-72, 1e-71, 1e-70, 1e-69, 1e-68, 1e-67, 1e-66, 1e-65,
1e-64, 1e-63, 1e-62, 1e-61, 1e-60, 1e-59, 1e-58, 1e-57, 1e-56,
1e-55, 1e-54, 1e-53, 1e-52, 1e-51, 1e-50, 1e-49, 1e-48, 1e-47,
1e-46, 1e-45, 1e-44, 1e-43, 1e-42, 1e-41, 1e-40, 1e-39, 1e-38,
1e-37, 1e-36, 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, 1e-29,
1e-28, 1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, 1e-20,
1e-19, 1e-18, 1e-17, 1e-16, 1e-15, 1e-14, 1e-13, 1e-12, 1e-11,
1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2,
1e-1, 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7,
1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16,
1e17, 1e18, 1e19, 1e20, 1e21, 1e22, 1e23, 1e24, 1e25,
1e26, 1e27, 1e28, 1e29, 1e30, 1e31, 1e32, 1e33, 1e34,
1e35, 1e36, 1e37, 1e38, 1e39, 1e40, 1e41, 1e42, 1e43,
1e44, 1e45, 1e46, 1e47, 1e48, 1e49, 1e50, 1e51, 1e52,
1e53, 1e54, 1e55, 1e56, 1e57, 1e58, 1e59, 1e60, 1e61,
1e62, 1e63, 1e64, 1e65, 1e66, 1e67, 1e68, 1e69, 1e70,
1e71, 1e72, 1e73, 1e74, 1e75, 1e76, 1e77, 1e78, 1e79,
1e80, 1e81, 1e82, 1e83, 1e84, 1e85, 1e86, 1e87, 1e88,
1e89, 1e90, 1e91, 1e92, 1e93, 1e94, 1e95, 1e96, 1e97,
1e98, 1e99, 1e100, 1e101, 1e102, 1e103, 1e104, 1e105, 1e106,
1e107, 1e108, 1e109, 1e110, 1e111, 1e112, 1e113, 1e114, 1e115,
1e116, 1e117, 1e118, 1e119, 1e120, 1e121, 1e122, 1e123, 1e124,
1e125, 1e126, 1e127, 1e128, 1e129, 1e130, 1e131, 1e132, 1e133,
1e134, 1e135, 1e136, 1e137, 1e138, 1e139, 1e140, 1e141, 1e142,
1e143, 1e144, 1e145, 1e146, 1e147, 1e148, 1e149, 1e150, 1e151,
1e152, 1e153, 1e154, 1e155, 1e156, 1e157, 1e158, 1e159, 1e160,
1e161, 1e162, 1e163, 1e164, 1e165, 1e166, 1e167, 1e168, 1e169,
1e170, 1e171, 1e172, 1e173, 1e174, 1e175, 1e176, 1e177, 1e178,
1e179, 1e180, 1e181, 1e182, 1e183, 1e184, 1e185, 1e186, 1e187,
1e188, 1e189, 1e190, 1e191, 1e192, 1e193, 1e194, 1e195, 1e196,
1e197, 1e198, 1e199, 1e200, 1e201, 1e202, 1e203, 1e204, 1e205,
1e206, 1e207, 1e208, 1e209, 1e210, 1e211, 1e212, 1e213, 1e214,
1e215, 1e216, 1e217, 1e218, 1e219, 1e220, 1e221, 1e222, 1e223,
1e224, 1e225, 1e226, 1e227, 1e228, 1e229, 1e230, 1e231, 1e232,
1e233, 1e234, 1e235, 1e236, 1e237, 1e238, 1e239, 1e240, 1e241,
1e242, 1e243, 1e244, 1e245, 1e246, 1e247, 1e248, 1e249, 1e250,
1e251, 1e252, 1e253, 1e254, 1e255, 1e256, 1e257, 1e258, 1e259,
1e260, 1e261, 1e262, 1e263, 1e264, 1e265, 1e266, 1e267, 1e268,
1e269, 1e270, 1e271, 1e272, 1e273, 1e274, 1e275, 1e276, 1e277,
1e278, 1e279, 1e280, 1e281, 1e282, 1e283, 1e284, 1e285, 1e286,
1e287, 1e288, 1e289, 1e290, 1e291, 1e292, 1e293, 1e294, 1e295,
1e296, 1e297, 1e298, 1e299, 1e300, 1e301, 1e302, 1e303, 1e304,
1e305, 1e306, 1e307, 1e308};
bool powers_of_ten() {
std::cout << __func__ << std::endl;
char buf[1024];
2020-03-29 02:43:41 +08:00
simdjson::dom::parser parser;
for (int i = -1000000; i <= 308; ++i) {// large negative values should be zero.
2020-05-02 04:53:23 +08:00
size_t n = snprintf(buf, sizeof(buf), "1e%d", i);
if (n >= sizeof(buf)) { abort(); }
fflush(NULL);
double actual;
auto error = parser.parse(buf, n).get(actual);
if (error) { std::cerr << error << std::endl; return false; }
double expected = ((i >= -307) ? testing_power_of_ten[i + 307]: std::pow(10, i));
int ulp = (int) f64_ulp_dist(actual, expected);
if(ulp > 0) {
std::cerr << "JSON '" << buf << " parsed to " << actual << " instead of " << expected << std::endl;
return false;
}
2019-10-17 05:47:52 +08:00
}
printf("Powers of 10 can be parsed.\n");
return true;
}
bool run() {
return small_integers() &&
powers_of_two() &&
powers_of_ten();
2019-10-17 05:47:52 +08:00
}
}
namespace document_tests {
2020-06-17 08:04:09 +08:00
int issue938() {
std::vector<std::string> json_strings{"[true,false]", "[1,2,3,null]",
R"({"yay":"json!"})"};
simdjson::dom::parser parser1;
for (simdjson::padded_string str : json_strings) {
auto [element, error] = parser1.parse(str);
2020-06-17 22:19:58 +08:00
if(error) {
std::cerr << error << std::endl;
} else {
std::cout << element << std::endl;
}
2020-06-17 08:04:09 +08:00
}
std::vector<std::string> file_paths{
ADVERSARIAL_JSON, FLATADVERSARIAL_JSON, DEMO_JSON,
TWITTER_TIMELINE_JSON, REPEAT_JSON, SMALLDEMO_JSON,
TRUENULL_JSON};
for (auto path : file_paths) {
simdjson::dom::parser parser2;
std::cout << "file: " << path << std::endl;
2020-06-17 22:19:58 +08:00
auto [element, error] = parser2.load(path);
if(error) {
std::cerr << error << std::endl;
} else {
std::cout << element.type() << std::endl;
}
2020-06-17 08:04:09 +08:00
}
simdjson::dom::parser parser3;
for (auto path : file_paths) {
std::cout << "file: " << path << std::endl;
auto [element, error] = parser3.load(path);
2020-06-17 22:19:58 +08:00
if(error) {
std::cerr << error << std::endl;
} else {
std::cout << element.type() << std::endl;
}
2020-06-17 08:04:09 +08:00
}
return true;
}
// adversarial example that once triggred overruns, see https://github.com/lemire/simdjson/issues/345
bool bad_example() {
std::cout << __func__ << std::endl;
2020-03-27 07:40:29 +08:00
simdjson::padded_string badjson = "[7,7,7,7,6,7,7,7,6,7,7,6,[7,7,7,7,6,7,7,7,6,7,7,6,7,7,7,7,7,7,6"_padded;
2020-03-29 02:43:41 +08:00
simdjson::dom::parser parser;
auto error = parser.parse(badjson).error();
if (!error) {
2020-03-27 07:40:29 +08:00
printf("This json should not be valid %s.\n", badjson.data());
return false;
}
return true;
}
bool count_array_example() {
std::cout << __func__ << std::endl;
simdjson::padded_string smalljson = "[1,2,3]"_padded;
simdjson::dom::parser parser;
simdjson::dom::array array;
ASSERT_SUCCESS( parser.parse(smalljson).get(array) );
ASSERT_EQUAL( array.size(), 3 );
return true;
}
bool count_object_example() {
std::cout << __func__ << std::endl;
simdjson::padded_string smalljson = "{\"1\":1,\"2\":1,\"3\":1}"_padded;
simdjson::dom::parser parser;
simdjson::dom::object object;
ASSERT_SUCCESS( parser.parse(smalljson).get(object) );
ASSERT_EQUAL( object.size(), 3 );
return true;
}
bool padded_with_open_bracket() {
std::cout << __func__ << std::endl;
simdjson::dom::parser parser;
// This is an invalid document padded with open braces.
ASSERT_ERROR( parser.parse("[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[", 2, false).error(), simdjson::TAPE_ERROR);
// This is a valid document padded with open braces.
ASSERT_SUCCESS( parser.parse("[][[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[", 2, false).error() );
return true;
}
// returns true if successful
bool stable_test() {
std::cout << __func__ << std::endl;
2020-03-27 07:40:29 +08:00
simdjson::padded_string json = "{"
"\"Image\":{"
"\"Width\":800,"
"\"Height\":600,"
"\"Title\":\"View from 15th Floor\","
"\"Thumbnail\":{"
"\"Url\":\"http://www.example.com/image/481989943\","
"\"Height\":125,"
"\"Width\":100"
"},"
"\"Animated\":false,"
"\"IDs\":[116,943.3,234,38793]"
"}"
2020-03-27 07:40:29 +08:00
"}"_padded;
2020-03-29 02:43:41 +08:00
simdjson::dom::parser parser;
std::ostringstream myStream;
#if SIMDJSON_EXCEPTIONS
myStream << parser.parse(json);
#else
simdjson::dom::element doc;
2020-06-21 03:04:23 +08:00
UNUSED auto error = parser.parse(json).get(doc);
myStream << doc;
#endif
std::string newjson = myStream.str();
2020-03-27 07:40:29 +08:00
if(static_cast<std::string>(json) != newjson) {
std::cout << "serialized json differs!" << std::endl;
2020-03-27 07:40:29 +08:00
std::cout << static_cast<std::string>(json) << std::endl;
std::cout << newjson << std::endl;
}
2020-03-27 07:40:29 +08:00
return newjson == static_cast<std::string>(json);
}
// returns true if successful
bool skyprophet_test() {
std::cout << "Running " << __func__ << std::endl;
const size_t n_records = 100000;
std::vector<std::string> data;
char buf[1024];
for (size_t i = 0; i < n_records; ++i) {
2020-05-02 04:53:23 +08:00
size_t n = snprintf(buf, sizeof(buf),
"{\"id\": %zu, \"name\": \"name%zu\", \"gender\": \"%s\", "
"\"school\": {\"id\": %zu, \"name\": \"school%zu\"}}",
i, i, (i % 2) ? "male" : "female", i % 10, i % 10);
2020-05-02 04:53:23 +08:00
if (n >= sizeof(buf)) { abort(); }
data.emplace_back(std::string(buf, n));
}
for (size_t i = 0; i < n_records; ++i) {
2020-05-02 04:53:23 +08:00
size_t n = snprintf(buf, sizeof(buf), "{\"counter\": %f, \"array\": [%s]}", static_cast<double>(i) * 3.1416,
(i % 2) ? "true" : "false");
if (n >= sizeof(buf)) { abort(); }
data.emplace_back(std::string(buf, n));
}
for (size_t i = 0; i < n_records; ++i) {
2020-05-02 04:53:23 +08:00
size_t n = snprintf(buf, sizeof(buf), "{\"number\": %e}", static_cast<double>(i) * 10000.31321321);
if (n >= sizeof(buf)) { abort(); }
data.emplace_back(std::string(buf, n));
}
data.emplace_back(std::string("true"));
data.emplace_back(std::string("false"));
data.emplace_back(std::string("null"));
data.emplace_back(std::string("0.1"));
size_t maxsize = 0;
for (auto &s : data) {
if (maxsize < s.size())
maxsize = s.size();
}
2020-03-29 02:43:41 +08:00
simdjson::dom::parser parser;
size_t counter = 0;
for (auto &rec : data) {
if ((counter % 10000) == 0) {
printf(".");
fflush(NULL);
}
counter++;
auto error = parser.parse(rec.c_str(), rec.length()).error();
if (error != simdjson::error_code::SUCCESS) {
printf("Something is wrong in skyprophet_test: %s.\n", rec.c_str());
printf("Parsing failed. Error is %s\n", simdjson::error_message(error));
return false;
}
error = parser.parse(rec.c_str(), rec.length()).error();
if (error != simdjson::error_code::SUCCESS) {
printf("Something is wrong in skyprophet_test: %s.\n", rec.c_str());
printf("Parsing failed. Error is %s\n", simdjson::error_message(error));
return false;
}
}
printf("\n");
return true;
}
bool lots_of_brackets() {
std::string input;
for(size_t i = 0; i < 200; i++) {
input += "[";
}
for(size_t i = 0; i < 200; i++) {
input += "]";
}
2020-03-29 02:43:41 +08:00
simdjson::dom::parser parser;
auto error = parser.parse(input).error();
if (error) { std::cerr << "Error: " << simdjson::error_message(error) << std::endl; return false; }
return true;
}
bool run() {
2020-06-17 08:04:09 +08:00
return issue938() &&
padded_with_open_bracket() &&
bad_example() &&
count_array_example() &&
count_object_example() &&
stable_test() &&
skyprophet_test() &&
lots_of_brackets();
}
}
namespace document_stream_tests {
static simdjson::dom::document_stream parse_many_stream_return(simdjson::dom::parser &parser, simdjson::padded_string &str) {
simdjson::dom::document_stream stream;
UNUSED auto error = parser.parse_many(str).get(stream);
return stream;
}
// this is a compilation test
UNUSED static void parse_many_stream_assign() {
2020-03-29 02:43:41 +08:00
simdjson::dom::parser parser;
simdjson::padded_string str("{}",2);
simdjson::dom::document_stream s1 = parse_many_stream_return(parser, str);
2020-01-09 22:55:54 +08:00
}
bool test_current_index() {
std::cout << "Running " << __func__ << std::endl;
std::string base("1 ");// one JSON!
std::string json;
for(size_t k = 0; k < 1000; k++) {
json += base;
}
simdjson::dom::parser parser;
const size_t window = 32; // deliberately small
2020-06-21 13:03:57 +08:00
simdjson::dom::document_stream stream;
ASSERT_SUCCESS( parser.parse_many(json,window).get(stream) );
auto i = stream.begin();
size_t count = 0;
for(; i != stream.end(); ++i) {
auto doc = *i;
2020-06-21 13:03:57 +08:00
ASSERT_SUCCESS(doc.error());
if( i.current_index() != count) {
std::cout << "index:" << i.current_index() << std::endl;
std::cout << "expected index:" << count << std::endl;
return false;
}
count += base.size();
}
return true;
}
Using a worker instead of a thread per batch (#920) In the parse_many function, we have one thread doing the stage 1, while the main thread does stage 2. So if stage 1 and stage 2 take half the time, the parse_many could run at twice the speed. It is unlikely to do so. Still, we see benefits of about 40% due to threading. To achieve this interleaving, we load the data in batches (blocks) of some size. In the current code (master), we create a new thread for each batch. Thread creation is expensive so our approach only works over sizeable batches. This PR improves things and makes parse_many faster when using small batches. This fixes our parse_stream benchmark which is just busted. This replaces the one-thread per batch routine by a worker object that reuses the same thread. In benchmarks, this allows us to get the same maximal speed, but with smaller processing blocks. It does not help much with larger blocks because the cost of the thread create gets amortized efficiently. This PR makes parse_many beneficial over small datasets. It also makes us less dependent on the thread creation time. Unfortunately, it is going to be difficult to say anything definitive in general. The cost of creating a thread varies widely depending on the OS. On some systems, it might be cheap, in others very expensive. It should be expected that the new code will depend less drastically on the performances of the underlying system, since we create juste one thread. Co-authored-by: John Keiser <john@johnkeiser.com> Co-authored-by: Daniel Lemire <lemire@gmai.com>
2020-06-13 04:51:18 +08:00
bool small_window() {
std::cout << "Running " << __func__ << std::endl;
auto json = R"({"error":[],"result":{"token":"xxx"}}{"error":[],"result":{"token":"xxx"}})"_padded;
simdjson::dom::parser parser;
size_t count = 0;
size_t window_size = 10; // deliberately too small
2020-06-21 13:03:57 +08:00
simdjson::dom::document_stream stream;
ASSERT_SUCCESS( parser.parse_many(json, window_size).get(stream) );
for (auto doc : stream) {
Using a worker instead of a thread per batch (#920) In the parse_many function, we have one thread doing the stage 1, while the main thread does stage 2. So if stage 1 and stage 2 take half the time, the parse_many could run at twice the speed. It is unlikely to do so. Still, we see benefits of about 40% due to threading. To achieve this interleaving, we load the data in batches (blocks) of some size. In the current code (master), we create a new thread for each batch. Thread creation is expensive so our approach only works over sizeable batches. This PR improves things and makes parse_many faster when using small batches. This fixes our parse_stream benchmark which is just busted. This replaces the one-thread per batch routine by a worker object that reuses the same thread. In benchmarks, this allows us to get the same maximal speed, but with smaller processing blocks. It does not help much with larger blocks because the cost of the thread create gets amortized efficiently. This PR makes parse_many beneficial over small datasets. It also makes us less dependent on the thread creation time. Unfortunately, it is going to be difficult to say anything definitive in general. The cost of creating a thread varies widely depending on the OS. On some systems, it might be cheap, in others very expensive. It should be expected that the new code will depend less drastically on the performances of the underlying system, since we create juste one thread. Co-authored-by: John Keiser <john@johnkeiser.com> Co-authored-by: Daniel Lemire <lemire@gmai.com>
2020-06-13 04:51:18 +08:00
if (!doc.error()) {
std::cerr << "Expected a capacity error " << doc.error() << std::endl;
return false;
}
count++;
}
if(count == 2) {
std::cerr << "Expected a capacity error " << std::endl;
return false;
}
return true;
}
bool large_window() {
std::cout << "Running " << __func__ << std::endl;
#if SIZE_MAX > 17179869184
auto json = R"({"error":[],"result":{"token":"xxx"}}{"error":[],"result":{"token":"xxx"}})"_padded;
simdjson::dom::parser parser;
size_t count = 0;
uint64_t window_size{17179869184}; // deliberately too big
2020-06-21 13:03:57 +08:00
simdjson::dom::document_stream stream;
ASSERT_SUCCESS( parser.parse_many(json, size_t(window_size)).get(stream) );
for (auto doc : stream) {
Using a worker instead of a thread per batch (#920) In the parse_many function, we have one thread doing the stage 1, while the main thread does stage 2. So if stage 1 and stage 2 take half the time, the parse_many could run at twice the speed. It is unlikely to do so. Still, we see benefits of about 40% due to threading. To achieve this interleaving, we load the data in batches (blocks) of some size. In the current code (master), we create a new thread for each batch. Thread creation is expensive so our approach only works over sizeable batches. This PR improves things and makes parse_many faster when using small batches. This fixes our parse_stream benchmark which is just busted. This replaces the one-thread per batch routine by a worker object that reuses the same thread. In benchmarks, this allows us to get the same maximal speed, but with smaller processing blocks. It does not help much with larger blocks because the cost of the thread create gets amortized efficiently. This PR makes parse_many beneficial over small datasets. It also makes us less dependent on the thread creation time. Unfortunately, it is going to be difficult to say anything definitive in general. The cost of creating a thread varies widely depending on the OS. On some systems, it might be cheap, in others very expensive. It should be expected that the new code will depend less drastically on the performances of the underlying system, since we create juste one thread. Co-authored-by: John Keiser <john@johnkeiser.com> Co-authored-by: Daniel Lemire <lemire@gmai.com>
2020-06-13 04:51:18 +08:00
if (!doc.error()) {
std::cerr << "I expected a failure (too big) but got " << doc.error() << std::endl;
return false;
}
count++;
}
#endif
return true;
}
static bool parse_json_message_issue467(simdjson::padded_string &json, size_t expectedcount) {
2020-03-29 02:43:41 +08:00
simdjson::dom::parser parser;
size_t count = 0;
2020-06-21 13:03:57 +08:00
simdjson::dom::document_stream stream;
ASSERT_SUCCESS( parser.parse_many(json).get(stream) );
for (auto doc : stream) {
if (doc.error()) {
std::cerr << "Failed with simdjson error= " << doc.error() << std::endl;
return false;
}
count++;
2020-03-06 03:05:37 +08:00
}
if(count != expectedcount) {
std::cerr << "bad count" << std::endl;
return false;
2020-03-06 03:05:37 +08:00
}
return true;
}
bool json_issue467() {
std::cout << "Running " << __func__ << std::endl;
auto single_message = R"({"error":[],"result":{"token":"xxx"}})"_padded;
auto two_messages = R"({"error":[],"result":{"token":"xxx"}}{"error":[],"result":{"token":"xxx"}})"_padded;
if(!parse_json_message_issue467(single_message, 1)) {
2020-03-06 03:05:37 +08:00
return false;
}
if(!parse_json_message_issue467(two_messages, 2)) {
2020-03-06 03:05:37 +08:00
return false;
}
return true;
2020-03-06 03:05:37 +08:00
}
// returns true if successful
bool document_stream_test() {
std::cout << "Running " << __func__ << std::endl;
fflush(NULL);
const size_t n_records = 10000;
std::string data;
char buf[1024];
for (size_t i = 0; i < n_records; ++i) {
2020-05-02 04:53:23 +08:00
size_t n = snprintf(buf,
sizeof(buf),
"{\"id\": %zu, \"name\": \"name%zu\", \"gender\": \"%s\", "
"\"ete\": {\"id\": %zu, \"name\": \"eventail%zu\"}}",
i, i, (i % 2) ? "homme" : "femme", i % 10, i % 10);
2020-05-02 04:53:23 +08:00
if (n >= sizeof(buf)) { abort(); }
data += std::string(buf, n);
}
for(size_t batch_size = 1000; batch_size < 2000; batch_size += (batch_size>1050?10:1)) {
printf(".");
fflush(NULL);
simdjson::padded_string str(data);
2020-03-29 02:43:41 +08:00
simdjson::dom::parser parser;
size_t count = 0;
2020-06-21 13:03:57 +08:00
simdjson::dom::document_stream stream;
ASSERT_SUCCESS( parser.parse_many(str, batch_size).get(stream) );
for (auto [doc, error] : stream) {
if (error) {
printf("Error at on document %zd at batch size %zu: %s\n", count, batch_size, simdjson::error_message(error));
return false;
}
int64_t keyid;
ASSERT_SUCCESS( doc["id"].get(keyid) );
ASSERT_EQUAL( keyid, int64_t(count) );
count++;
}
if(count != n_records) {
printf("Found wrong number of documents %zd, expected %zd at batch size %zu\n", count, n_records, batch_size);
return false;
}
}
printf("ok\n");
return true;
}
// returns true if successful
bool document_stream_utf8_test() {
std::cout << "Running " << __func__ << std::endl;
fflush(NULL);
const size_t n_records = 10000;
std::string data;
char buf[1024];
for (size_t i = 0; i < n_records; ++i) {
2020-05-02 04:53:23 +08:00
size_t n = snprintf(buf,
sizeof(buf),
"{\"id\": %zu, \"name\": \"name%zu\", \"gender\": \"%s\", "
"\"\xC3\xA9t\xC3\xA9\": {\"id\": %zu, \"name\": \"\xC3\xA9ventail%zu\"}}",
i, i, (i % 2) ? "\xE2\xBA\x83" : "\xE2\xBA\x95", i % 10, i % 10);
2020-05-02 04:53:23 +08:00
if (n >= sizeof(buf)) { abort(); }
data += std::string(buf, n);
}
for(size_t batch_size = 1000; batch_size < 2000; batch_size += (batch_size>1050?10:1)) {
printf(".");
fflush(NULL);
simdjson::padded_string str(data);
2020-03-29 02:43:41 +08:00
simdjson::dom::parser parser;
size_t count = 0;
2020-06-21 13:03:57 +08:00
simdjson::dom::document_stream stream;
ASSERT_SUCCESS( parser.parse_many(str, batch_size).get(stream) );
for (auto [doc, error] : stream) {
if (error) {
printf("Error at on document %zd at batch size %zu: %s\n", count, batch_size, simdjson::error_message(error));
return false;
}
int64_t keyid;
ASSERT_SUCCESS( doc["id"].get(keyid) );
ASSERT_EQUAL( keyid, int64_t(count) );
count++;
}
ASSERT_EQUAL( count, n_records )
}
printf("ok\n");
return true;
}
bool run() {
return test_current_index() &&
small_window() &&
Using a worker instead of a thread per batch (#920) In the parse_many function, we have one thread doing the stage 1, while the main thread does stage 2. So if stage 1 and stage 2 take half the time, the parse_many could run at twice the speed. It is unlikely to do so. Still, we see benefits of about 40% due to threading. To achieve this interleaving, we load the data in batches (blocks) of some size. In the current code (master), we create a new thread for each batch. Thread creation is expensive so our approach only works over sizeable batches. This PR improves things and makes parse_many faster when using small batches. This fixes our parse_stream benchmark which is just busted. This replaces the one-thread per batch routine by a worker object that reuses the same thread. In benchmarks, this allows us to get the same maximal speed, but with smaller processing blocks. It does not help much with larger blocks because the cost of the thread create gets amortized efficiently. This PR makes parse_many beneficial over small datasets. It also makes us less dependent on the thread creation time. Unfortunately, it is going to be difficult to say anything definitive in general. The cost of creating a thread varies widely depending on the OS. On some systems, it might be cheap, in others very expensive. It should be expected that the new code will depend less drastically on the performances of the underlying system, since we create juste one thread. Co-authored-by: John Keiser <john@johnkeiser.com> Co-authored-by: Daniel Lemire <lemire@gmai.com>
2020-06-13 04:51:18 +08:00
large_window() &&
json_issue467() &&
document_stream_test() &&
document_stream_utf8_test();
}
}
namespace parse_api_tests {
using namespace std;
using namespace simdjson;
2020-03-29 02:43:41 +08:00
using namespace simdjson::dom;
2020-06-05 08:40:15 +08:00
const padded_string BASIC_JSON = "[1,2,3]"_padded;
const padded_string BASIC_NDJSON = "[1,2,3]\n[4,5,6]"_padded;
const padded_string EMPTY_NDJSON = ""_padded;
bool parser_parse() {
std::cout << "Running " << __func__ << std::endl;
2020-03-29 02:43:41 +08:00
dom::parser parser;
auto [doc, error] = parser.parse(BASIC_JSON);
if (error) { cerr << error << endl; return false; }
if (!doc.is<dom::array>()) { cerr << "Document did not parse as an array" << endl; return false; }
return true;
}
bool parser_parse_many() {
2020-06-21 13:03:57 +08:00
std::cout << "Running " << __func__ << std::endl;
dom::parser parser;
int count = 0;
simdjson::dom::document_stream stream;
ASSERT_SUCCESS( parser.parse_many(BASIC_NDJSON).get(stream) );
for (auto [doc, error] : stream) {
if (error) { cerr << "Error in parse_many: " << endl; return false; }
if (!doc.is<dom::array>()) { cerr << "Document did not parse as an array" << endl; return false; }
count++;
}
if (count != 2) { cerr << "parse_many returned " << count << " documents, expected 2" << endl; return false; }
return true;
}
SIMDJSON_PUSH_DISABLE_WARNINGS
SIMDJSON_DISABLE_DEPRECATED_WARNING
bool parser_parse_many_deprecated() {
std::cout << "Running " << __func__ << std::endl;
2020-03-29 02:43:41 +08:00
dom::parser parser;
int count = 0;
for (auto [doc, error] : parser.parse_many(BASIC_NDJSON)) {
2020-06-05 08:40:15 +08:00
if (error) { cerr << "Error in parse_many: " << endl; return false; }
if (!doc.is<dom::array>()) { cerr << "Document did not parse as an array" << endl; return false; }
count++;
}
if (count != 2) { cerr << "parse_many returned " << count << " documents, expected 2" << endl; return false; }
return true;
}
2020-06-21 13:03:57 +08:00
SIMDJSON_POP_DISABLE_WARNINGS
2020-06-05 08:40:15 +08:00
bool parser_parse_many_empty() {
std::cout << "Running " << __func__ << std::endl;
dom::parser parser;
int count = 0;
2020-06-21 13:03:57 +08:00
simdjson::dom::document_stream stream;
ASSERT_SUCCESS( parser.parse_many(EMPTY_NDJSON).get(stream) );
for (auto doc : stream) {
ASSERT_SUCCESS(doc.error());
2020-06-05 08:40:15 +08:00
count++;
}
ASSERT_EQUAL(count, 0);
2020-06-05 08:40:15 +08:00
return true;
}
bool parser_parse_many_empty_batches() {
std::cout << "Running " << __func__ << std::endl;
dom::parser parser;
uint64_t count = 0;
constexpr const int BATCH_SIZE = 128;
uint8_t empty_batches_ndjson[BATCH_SIZE*16+SIMDJSON_PADDING];
memset(&empty_batches_ndjson[0], ' ', BATCH_SIZE*16+SIMDJSON_PADDING);
memcpy(&empty_batches_ndjson[BATCH_SIZE*3+2], "1", 1);
memcpy(&empty_batches_ndjson[BATCH_SIZE*10+4], "2", 1);
memcpy(&empty_batches_ndjson[BATCH_SIZE*11+6], "3", 1);
2020-06-21 13:03:57 +08:00
simdjson::dom::document_stream stream;
ASSERT_SUCCESS( parser.parse_many(empty_batches_ndjson, BATCH_SIZE*16).get(stream) );
for (auto [doc, error] : stream) {
ASSERT_SUCCESS(error);
2020-06-05 08:40:15 +08:00
count++;
uint64_t val;
ASSERT_SUCCESS( doc.get(val) );
ASSERT_EQUAL( val, count );
2020-06-05 08:40:15 +08:00
}
ASSERT_EQUAL(count, 3);
2020-06-05 08:40:15 +08:00
return true;
}
bool parser_load() {
2020-04-07 00:45:45 +08:00
std::cout << "Running " << __func__ << " on " << TWITTER_JSON << std::endl;
2020-03-29 02:43:41 +08:00
dom::parser parser;
dom::object object;
ASSERT_SUCCESS( parser.load(TWITTER_JSON).get(object) );
return true;
}
bool parser_load_many() {
2020-06-21 13:03:57 +08:00
std::cout << "Running " << __func__ << " on " << AMAZON_CELLPHONES_NDJSON << std::endl;
dom::parser parser;
int count = 0;
simdjson::dom::document_stream stream;
ASSERT_SUCCESS( parser.load_many(AMAZON_CELLPHONES_NDJSON).get(stream) );
for (auto [doc, error] : stream) {
ASSERT_SUCCESS( error );
2020-06-21 13:03:57 +08:00
dom::array arr;
ASSERT_SUCCESS( doc.get(arr) ); // let us get the array
ASSERT_EQUAL(arr.size(), 9);
2020-06-21 13:03:57 +08:00
size_t arr_count = 0;
for (auto v : arr) { arr_count++; (void)v; }
ASSERT_EQUAL(arr_count, 9);
2020-06-21 13:03:57 +08:00
count++;
}
ASSERT_EQUAL(count, AMAZON_CELLPHONES_NDJSON_DOC_COUNT);
2020-06-21 13:03:57 +08:00
return true;
}
SIMDJSON_PUSH_DISABLE_WARNINGS
SIMDJSON_DISABLE_DEPRECATED_WARNING
bool parser_load_many_deprecated() {
2020-04-07 00:45:45 +08:00
std::cout << "Running " << __func__ << " on " << AMAZON_CELLPHONES_NDJSON << std::endl;
2020-03-29 02:43:41 +08:00
dom::parser parser;
int count = 0;
2020-04-07 00:45:45 +08:00
for (auto [doc, error] : parser.load_many(AMAZON_CELLPHONES_NDJSON)) {
if (error) { cerr << error << endl; return false; }
dom::array arr;
ASSERT_SUCCESS( doc.get(arr) );
ASSERT_EQUAL( arr.size(), 9 );
size_t arr_count = 0;
for (auto v : arr) { arr_count++; (void)v; }
ASSERT_EQUAL( arr_count, 9 );
count++;
}
ASSERT_EQUAL( count, AMAZON_CELLPHONES_NDJSON_DOC_COUNT );
return true;
}
2020-06-21 13:03:57 +08:00
SIMDJSON_POP_DISABLE_WARNINGS
#if SIMDJSON_EXCEPTIONS
bool parser_parse_exception() {
std::cout << "Running " << __func__ << std::endl;
2020-03-29 02:43:41 +08:00
dom::parser parser;
UNUSED dom::array array = parser.parse(BASIC_JSON);
return true;
}
bool parser_parse_many_exception() {
std::cout << "Running " << __func__ << std::endl;
2020-03-29 02:43:41 +08:00
dom::parser parser;
int count = 0;
for (UNUSED dom::array doc : parser.parse_many(BASIC_NDJSON)) {
count++;
}
ASSERT_EQUAL(count, 2);
return true;
}
bool parser_load_exception() {
std::cout << "Running " << __func__ << std::endl;
2020-03-29 02:43:41 +08:00
dom::parser parser;
size_t count = 0;
dom::object object = parser.load(TWITTER_JSON);
for (UNUSED auto field : object) {
count++;
}
ASSERT_EQUAL( count, object.size() );
return true;
}
bool parser_load_many_exception() {
std::cout << "Running " << __func__ << std::endl;
2020-03-29 02:43:41 +08:00
dom::parser parser;
int count = 0;
for (UNUSED dom::array doc : parser.load_many(AMAZON_CELLPHONES_NDJSON)) {
count++;
}
ASSERT_EQUAL( count, AMAZON_CELLPHONES_NDJSON_DOC_COUNT );
return true;
}
#endif
bool run() {
return parser_parse() &&
parser_parse_many() &&
2020-06-21 13:03:57 +08:00
parser_parse_many_deprecated() &&
2020-06-05 08:40:15 +08:00
parser_parse_many_empty() &&
parser_parse_many_empty_batches() &&
parser_load() &&
parser_load_many() &&
2020-06-21 13:03:57 +08:00
parser_load_many_deprecated() &&
#if SIMDJSON_EXCEPTIONS
parser_parse_exception() &&
parser_parse_many_exception() &&
parser_load_exception() &&
parser_load_many_exception() &&
#endif
true;
}
}
namespace dom_api_tests {
2020-02-25 12:59:38 +08:00
using namespace std;
using namespace simdjson;
2020-03-29 02:43:41 +08:00
using namespace simdjson::dom;
SIMDJSON_PUSH_DISABLE_WARNINGS
SIMDJSON_DISABLE_DEPRECATED_WARNING
// returns true if successful
bool ParsedJson_Iterator_test() {
std::cout << "Running " << __func__ << std::endl;
simdjson::padded_string json = R"({
"Image": {
"Width": 800,
"Height": 600,
"Title": "View from 15th Floor",
"Thumbnail": {
"Url": "http://www.example.com/image/481989943",
"Height": 125,
"Width": 100
},
"Animated" : false,
"IDs": [116, 943, 234, 38793]
}
})"_padded;
simdjson::ParsedJson pj = build_parsed_json(json);
if (pj.error) {
printf("Could not parse '%s': %s\n", json.data(), simdjson::error_message(pj.error));
return false;
}
simdjson::ParsedJson::Iterator iter(pj);
if (!iter.is_object()) {
printf("Root should be object\n");
return false;
}
if (iter.move_to_key("bad key")) {
printf("We should not move to a non-existing key\n");
2020-03-27 07:40:29 +08:00
return false;
}
if (!iter.is_object()) {
printf("We should have remained at the object.\n");
return false;
}
if (iter.move_to_key_insensitive("bad key")) {
printf("We should not move to a non-existing key\n");
return false;
}
if (!iter.is_object()) {
printf("We should have remained at the object.\n");
return false;
}
if (!iter.down()) {
printf("Root should not be emtpy\n");
return false;
}
if (!iter.is_string()) {
printf("Object should start with string key\n");
return false;
}
if (iter.prev()) {
printf("We should not be able to go back from the start of the scope.\n");
return false;
}
if (strcmp(iter.get_string(),"Image")!=0) {
printf("There should be a single key, image.\n");
return false;
}
iter.move_to_value();
if(!iter.is_object()) {
printf("Value of image should be object\n");
return false;
}
if(!iter.down()) {
printf("Image key should not be emtpy\n");
return false;
}
if(!iter.next()) {
printf("key should have a value\n");
return false;
}
if(!iter.prev()) {
printf("We should go back to the key.\n");
return false;
}
if (strcmp(iter.get_string(),"Width")!=0) {
printf("There should be a key Width.\n");
return false;
}
if (!iter.up()) {
return false;
}
if (!iter.move_to_key("IDs")) {
printf("We should be able to move to an existing key\n");
return false;
}
if (!iter.is_array()) {
printf("Value of IDs should be array, it is %c \n", iter.get_type());
return false;
}
if (iter.move_to_index(4)) {
printf("We should not be able to move to a non-existing index\n");
return false;
}
if (!iter.is_array()) {
printf("We should have remained at the array\n");
return false;
}
return true;
}
SIMDJSON_POP_DISABLE_WARNINGS
2020-02-25 12:59:38 +08:00
bool object_iterator() {
std::cout << "Running " << __func__ << std::endl;
string json(R"({ "a": 1, "b": 2, "c": 3 })");
2020-02-25 12:59:38 +08:00
const char* expected_key[] = { "a", "b", "c" };
uint64_t expected_value[] = { 1, 2, 3 };
2020-03-29 02:43:41 +08:00
dom::parser parser;
dom::object object;
ASSERT_SUCCESS( parser.parse(json).get(object) );
int i = 0;
for (auto [key, value] : object) {
ASSERT_EQUAL( key, expected_key[i] );
ASSERT_EQUAL( value.get<uint64_t>().value(), expected_value[i] );
2020-02-25 12:59:38 +08:00
i++;
}
ASSERT_EQUAL( i*sizeof(uint64_t), sizeof(expected_value) );
2020-02-25 12:59:38 +08:00
return true;
}
bool array_iterator() {
std::cout << "Running " << __func__ << std::endl;
string json(R"([ 1, 10, 100 ])");
2020-02-25 12:59:38 +08:00
uint64_t expected_value[] = { 1, 10, 100 };
2020-03-29 02:43:41 +08:00
dom::parser parser;
dom::array array;
ASSERT_SUCCESS( parser.parse(json).get(array) );
int i=0;
for (auto value : array) {
uint64_t v;
ASSERT_SUCCESS( value.get(v) );
ASSERT_EQUAL( v, expected_value[i] );
2020-02-25 12:59:38 +08:00
i++;
}
ASSERT_EQUAL( i*sizeof(uint64_t), sizeof(expected_value) );
2020-02-25 12:59:38 +08:00
return true;
}
bool object_iterator_empty() {
std::cout << "Running " << __func__ << std::endl;
string json(R"({})");
2020-02-25 12:59:38 +08:00
int i = 0;
2020-03-29 02:43:41 +08:00
dom::parser parser;
dom::object object;
ASSERT_SUCCESS( parser.parse(json).get(object) );
for (UNUSED auto field : object) {
TEST_FAIL("Unexpected field");
2020-02-25 12:59:38 +08:00
i++;
}
ASSERT_EQUAL(i, 0);
2020-02-25 12:59:38 +08:00
return true;
}
bool array_iterator_empty() {
std::cout << "Running " << __func__ << std::endl;
string json(R"([])");
2020-02-25 12:59:38 +08:00
int i=0;
2020-03-29 02:43:41 +08:00
dom::parser parser;
dom::array array;
ASSERT_SUCCESS( parser.parse(json).get(array) );
for (UNUSED auto value : array) {
TEST_FAIL("Unexpected value");
2020-02-25 12:59:38 +08:00
i++;
}
ASSERT_EQUAL(i, 0);
2020-02-25 12:59:38 +08:00
return true;
}
bool string_value() {
std::cout << "Running " << __func__ << std::endl;
string json(R"([ "hi", "has backslash\\" ])");
2020-03-29 02:43:41 +08:00
dom::parser parser;
dom::array array;
ASSERT_SUCCESS( parser.parse(json).get(array) );
auto iter = array.begin();
std::string_view val;
ASSERT_SUCCESS( (*iter).get(val) );
ASSERT_EQUAL( val, "hi" );
++iter;
ASSERT_SUCCESS( (*iter).get(val) );
ASSERT_EQUAL( val, "has backslash\\" );
return true;
}
bool numeric_values() {
std::cout << "Running " << __func__ << std::endl;
string json(R"([ 0, 1, -1, 1.1 ])");
2020-03-29 02:43:41 +08:00
dom::parser parser;
dom::array array;
ASSERT_SUCCESS( parser.parse(json).get(array) );
auto iter = array.begin();
ASSERT_EQUAL( (*iter).get<uint64_t>().value(), 0 );
ASSERT_EQUAL( (*iter).get<int64_t>().value(), 0 );
ASSERT_EQUAL( (*iter).get<double>().value(), 0 );
++iter;
ASSERT_EQUAL( (*iter).get<uint64_t>().value(), 1 );
ASSERT_EQUAL( (*iter).get<int64_t>().value(), 1 );
ASSERT_EQUAL( (*iter).get<double>().value(), 1 );
++iter;
ASSERT_EQUAL( (*iter).get<int64_t>().value(), -1 );
ASSERT_EQUAL( (*iter).get<double>().value(), -1 );
++iter;
ASSERT_EQUAL( (*iter).get<double>().value(), 1.1 );
return true;
}
bool boolean_values() {
std::cout << "Running " << __func__ << std::endl;
string json(R"([ true, false ])");
2020-03-29 02:43:41 +08:00
dom::parser parser;
dom::array array;
ASSERT_SUCCESS( parser.parse(json).get(array) );
auto val = array.begin();
ASSERT_EQUAL( (*val).get<bool>().first, true );
++val;
ASSERT_EQUAL( (*val).get<bool>().first, false );
return true;
}
bool null_value() {
std::cout << "Running " << __func__ << std::endl;
string json(R"([ null ])");
2020-03-29 02:43:41 +08:00
dom::parser parser;
dom::array array;
ASSERT_SUCCESS( parser.parse(json).get(array) );
auto val = array.begin();
ASSERT_EQUAL( !(*val).is_null(), 0 );
return true;
}
bool document_object_index() {
std::cout << "Running " << __func__ << std::endl;
string json(R"({ "a": 1, "b": 2, "c/d": 3})");
2020-03-29 02:43:41 +08:00
dom::parser parser;
dom::object object;
ASSERT_SUCCESS( parser.parse(json).get(object) );
ASSERT_EQUAL( object["a"].get<uint64_t>().first, 1 );
ASSERT_EQUAL( object["b"].get<uint64_t>().first, 2 );
ASSERT_EQUAL( object["c/d"].get<uint64_t>().first, 3 );
// Check all three again in backwards order, to ensure we can go backwards
ASSERT_EQUAL( object["c/d"].get<uint64_t>().first, 3 );
ASSERT_EQUAL( object["b"].get<uint64_t>().first, 2 );
ASSERT_EQUAL( object["a"].get<uint64_t>().first, 1 );
simdjson::error_code error;
2020-03-29 02:43:41 +08:00
UNUSED element val;
#ifndef _LIBCPP_VERSION // should work everywhere but with libc++, must include the <ciso646> header.
std::tie(val,error) = object["d"];
ASSERT_ERROR( error, NO_SUCH_FIELD );
std::tie(std::ignore,error) = object["d"];
ASSERT_ERROR( error, NO_SUCH_FIELD );
#endif
// tie(val, error) = object["d"]; fails with "no viable overloaded '='" on Apple clang version 11.0.0 tie(val, error) = doc["d"];
object["d"].tie(val, error);
ASSERT_ERROR( error, NO_SUCH_FIELD );
ASSERT_ERROR( object["d"].get(val), NO_SUCH_FIELD );
ASSERT_ERROR( object["d"].error(), NO_SUCH_FIELD );
return true;
}
bool object_index() {
std::cout << "Running " << __func__ << std::endl;
string json(R"({ "obj": { "a": 1, "b": 2, "c/d": 3 } })");
2020-03-29 02:43:41 +08:00
dom::parser parser;
dom::element doc;
ASSERT_SUCCESS( parser.parse(json).get(doc) );
ASSERT_EQUAL( doc["obj"]["a"].get<uint64_t>().first, 1);
2020-03-29 02:43:41 +08:00
object obj;
ASSERT_SUCCESS( doc.get(obj) );
ASSERT_EQUAL( obj["obj"]["a"].get<uint64_t>().first, 1);
ASSERT_SUCCESS( obj["obj"].get(obj) );
ASSERT_EQUAL( obj["a"].get<uint64_t>().first, 1 );
ASSERT_EQUAL( obj["b"].get<uint64_t>().first, 2 );
ASSERT_EQUAL( obj["c/d"].get<uint64_t>().first, 3 );
// Check all three again in backwards order, to ensure we can go backwards
ASSERT_EQUAL( obj["c/d"].get<uint64_t>().first, 3 );
ASSERT_EQUAL( obj["b"].get<uint64_t>().first, 2 );
ASSERT_EQUAL( obj["a"].get<uint64_t>().first, 1 );
2020-03-29 02:43:41 +08:00
UNUSED element val;
ASSERT_ERROR( doc["d"].get(val), NO_SUCH_FIELD);
return true;
}
bool twitter_count() {
std::cout << "Running " << __func__ << std::endl;
// Prints the number of results in twitter.json
2020-03-29 02:43:41 +08:00
dom::parser parser;
uint64_t result_count;
ASSERT_SUCCESS( parser.load(TWITTER_JSON)["search_metadata"]["count"].get(result_count) );
ASSERT_EQUAL( result_count, 100 );
return true;
}
bool twitter_default_profile() {
std::cout << "Running " << __func__ << std::endl;
// Print users with a default profile.
set<string_view> default_users;
2020-03-29 02:43:41 +08:00
dom::parser parser;
dom::array tweets;
ASSERT_SUCCESS( parser.load(TWITTER_JSON)["statuses"].get(tweets) );
for (auto tweet : tweets) {
2020-03-29 02:43:41 +08:00
object user;
ASSERT_SUCCESS( tweet["user"].get(user) );
bool default_profile;
ASSERT_SUCCESS( user["default_profile"].get(default_profile) );
if (default_profile) {
std::string_view screen_name;
ASSERT_SUCCESS( user["screen_name"].get(screen_name) );
default_users.insert(screen_name);
}
}
ASSERT_EQUAL( default_users.size(), 86 );
return true;
}
bool twitter_image_sizes() {
std::cout << "Running " << __func__ << std::endl;
// Print image names and sizes
set<pair<uint64_t, uint64_t>> image_sizes;
simdjson::error_code error;
2020-03-29 02:43:41 +08:00
dom::parser parser;
dom::array tweets;
ASSERT_SUCCESS( parser.load(TWITTER_JSON)["statuses"].get(tweets) );
for (auto tweet : tweets) {
dom::array media;
if (not (error = tweet["entities"]["media"].get(media))) {
for (auto image : media) {
2020-03-29 02:43:41 +08:00
object sizes;
ASSERT_SUCCESS( image["sizes"].get(sizes) );
for (auto size : sizes) {
uint64_t width, height;
ASSERT_SUCCESS( size.value["w"].get(width) );
ASSERT_SUCCESS( size.value["h"].get(height) );
image_sizes.insert(make_pair(width, height));
}
}
}
}
ASSERT_EQUAL( image_sizes.size(), 15 );
return true;
}
#if SIMDJSON_EXCEPTIONS
bool object_iterator_exception() {
std::cout << "Running " << __func__ << std::endl;
string json(R"({ "a": 1, "b": 2, "c": 3 })");
const char* expected_key[] = { "a", "b", "c" };
uint64_t expected_value[] = { 1, 2, 3 };
int i = 0;
2020-03-29 02:43:41 +08:00
dom::parser parser;
for (auto [key, value] : dom::object(parser.parse(json))) {
ASSERT_EQUAL( key, expected_key[i]);
ASSERT_EQUAL( uint64_t(value), expected_value[i] );
i++;
}
ASSERT_EQUAL( i*sizeof(uint64_t), sizeof(expected_value) );
return true;
}
bool array_iterator_exception() {
std::cout << "Running " << __func__ << std::endl;
string json(R"([ 1, 10, 100 ])");
uint64_t expected_value[] = { 1, 10, 100 };
int i=0;
2020-03-29 02:43:41 +08:00
dom::parser parser;
for (uint64_t value : parser.parse(json)) {
ASSERT_EQUAL( value, expected_value[i] );
i++;
}
ASSERT_EQUAL( i*sizeof(uint64_t), sizeof(expected_value) );
return true;
}
bool string_value_exception() {
std::cout << "Running " << __func__ << std::endl;
2020-03-29 02:43:41 +08:00
dom::parser parser;
ASSERT_EQUAL( (const char *)parser.parse(R"("hi")"_padded), "hi" );
ASSERT_EQUAL( string_view(parser.parse(R"("hi")"_padded)), "hi" );
ASSERT_EQUAL( (const char *)parser.parse(R"("has backslash\\")"_padded), "has backslash\\");
ASSERT_EQUAL( string_view(parser.parse(R"("has backslash\\")"_padded)), "has backslash\\" );
2020-02-25 12:59:38 +08:00
return true;
}
bool numeric_values_exception() {
std::cout << "Running " << __func__ << std::endl;
2020-03-29 02:43:41 +08:00
dom::parser parser;
ASSERT_EQUAL( uint64_t(parser.parse("0"_padded)), 0);
ASSERT_EQUAL( int64_t(parser.parse("0"_padded)), 0);
ASSERT_EQUAL( double(parser.parse("0"_padded)), 0);
ASSERT_EQUAL( uint64_t(parser.parse("1"_padded)), 1);
ASSERT_EQUAL( int64_t(parser.parse("1"_padded)), 1);
ASSERT_EQUAL( double(parser.parse("1"_padded)), 1);
ASSERT_EQUAL( int64_t(parser.parse("-1"_padded)), -1);
ASSERT_EQUAL( double(parser.parse("-1"_padded)), -1);
ASSERT_EQUAL( double(parser.parse("1.1"_padded)), 1.1);
2020-02-25 12:59:38 +08:00
return true;
}
bool boolean_values_exception() {
std::cout << "Running " << __func__ << std::endl;
2020-03-29 02:43:41 +08:00
dom::parser parser;
ASSERT_EQUAL( bool(parser.parse("true"_padded)), true);
ASSERT_EQUAL( bool(parser.parse("false"_padded)), false);
2020-02-25 12:59:38 +08:00
return true;
}
bool null_value_exception() {
std::cout << "Running " << __func__ << std::endl;
2020-03-29 02:43:41 +08:00
dom::parser parser;
ASSERT_EQUAL( bool(parser.parse("null"_padded).is_null()), true );
2020-02-25 12:59:38 +08:00
return true;
}
bool document_object_index_exception() {
std::cout << "Running " << __func__ << std::endl;
string json(R"({ "a": 1, "b": 2, "c": 3})");
2020-03-29 02:43:41 +08:00
dom::parser parser;
auto obj = parser.parse(json);
ASSERT_EQUAL(uint64_t(obj["a"]), 1);
2020-02-25 12:59:38 +08:00
return true;
}
bool object_index_exception() {
std::cout << "Running " << __func__ << std::endl;
string json(R"({ "obj": { "a": 1, "b": 2, "c": 3 } })");
2020-03-29 02:43:41 +08:00
dom::parser parser;
object obj = parser.parse(json)["obj"];
ASSERT_EQUAL( uint64_t(obj["a"]), 1);
2020-02-25 12:59:38 +08:00
return true;
}
bool twitter_count_exception() {
std::cout << "Running " << __func__ << std::endl;
2020-02-25 12:59:38 +08:00
// Prints the number of results in twitter.json
2020-03-29 02:43:41 +08:00
dom::parser parser;
2020-04-07 00:45:45 +08:00
element doc = parser.load(TWITTER_JSON);
2020-02-25 12:59:38 +08:00
uint64_t result_count = doc["search_metadata"]["count"];
if (result_count != 100) { cerr << "Expected twitter.json[metadata_count][count] = 100, got " << result_count << endl; return false; }
return true;
}
bool twitter_default_profile_exception() {
std::cout << "Running " << __func__ << std::endl;
2020-02-25 12:59:38 +08:00
// Print users with a default profile.
set<string_view> default_users;
2020-03-29 02:43:41 +08:00
dom::parser parser;
2020-04-07 00:45:45 +08:00
element doc = parser.load(TWITTER_JSON);
for (object tweet : doc["statuses"].get<dom::array>()) {
2020-03-29 02:43:41 +08:00
object user = tweet["user"];
2020-02-25 12:59:38 +08:00
if (user["default_profile"]) {
default_users.insert(user["screen_name"]);
}
}
if (default_users.size() != 86) { cerr << "Expected twitter.json[statuses][user] to contain 86 default_profile users, got " << default_users.size() << endl; return false; }
return true;
}
bool twitter_image_sizes_exception() {
std::cout << "Running " << __func__ << std::endl;
2020-02-25 12:59:38 +08:00
// Print image names and sizes
set<pair<uint64_t, uint64_t>> image_sizes;
2020-03-29 02:43:41 +08:00
dom::parser parser;
for (object tweet : parser.load(TWITTER_JSON)["statuses"]) {
auto media = tweet["entities"]["media"];
if (!media.error()) {
for (object image : media) {
for (auto size : object(image["sizes"])) {
image_sizes.insert(make_pair(size.value["w"], size.value["h"]));
2020-02-25 12:59:38 +08:00
}
}
}
}
ASSERT_EQUAL( image_sizes.size(), 15 );
2020-02-25 12:59:38 +08:00
return true;
}
#endif
bool run() {
return ParsedJson_Iterator_test() &&
object_iterator() &&
array_iterator() &&
object_iterator_empty() &&
array_iterator_empty() &&
string_value() &&
numeric_values() &&
boolean_values() &&
null_value() &&
document_object_index() &&
object_index() &&
twitter_count() &&
twitter_default_profile() &&
twitter_image_sizes() &&
#if SIMDJSON_EXCEPTIONS
object_iterator_exception() &&
array_iterator_exception() &&
string_value_exception() &&
numeric_values_exception() &&
boolean_values_exception() &&
null_value_exception() &&
document_object_index() &&
twitter_count_exception() &&
twitter_default_profile_exception() &&
twitter_image_sizes_exception() &&
#endif
true;
2020-02-25 12:59:38 +08:00
}
}
2020-04-03 03:14:29 +08:00
namespace type_tests {
using namespace simdjson;
using namespace std;
const padded_string ALL_TYPES_JSON = R"(
{
"array": [],
"object": {},
"string": "foo",
"0": 0,
"1": 1,
"-1": -1,
"9223372036854775807": 9223372036854775807,
"-9223372036854775808": -9223372036854775808,
"9223372036854775808": 9223372036854775808,
"18446744073709551615": 18446744073709551615,
"0.0": 0.0,
"0.1": 0.1,
"1e0": 1e0,
"1e100": 1e100,
"true": true,
"false": false,
"null": null
}
)"_padded;
template<typename T>
bool test_cast(simdjson_result<dom::element> result, T expected) {
2020-06-20 02:59:13 +08:00
cast_tester<T> tester;
std::cout << " test_cast<" << typeid(T).name() << "> expecting " << expected << std::endl;
// Grab the element out and check success
dom::element element = result.first;
2020-04-03 03:14:29 +08:00
RUN_TEST( tester.test_get_t(element, expected) );
RUN_TEST( tester.test_get_t(result, expected) );
RUN_TEST( tester.test_get(element, expected) );
2020-06-20 02:59:13 +08:00
RUN_TEST( tester.test_get(result, expected) );
// RUN_TEST( tester.test_named_get(element, expected) );
// RUN_TEST( tester.test_named_get(result, expected) );
RUN_TEST( tester.test_is(element, true) );
RUN_TEST( tester.test_is(result, true) );
// RUN_TEST( tester.test_named_is(element, true) );
// RUN_TEST( tester.test_named_is(result, true) );
#if SIMDJSON_EXCEPTIONS
2020-06-20 02:59:13 +08:00
RUN_TEST( tester.test_implicit_cast(element, expected) );
RUN_TEST( tester.test_implicit_cast(result, expected) );
#endif
2020-04-03 03:14:29 +08:00
return true;
}
template<typename T>
bool test_cast(simdjson_result<dom::element> result) {
2020-06-20 02:59:13 +08:00
cast_tester<T> tester;
std::cout << " test_cast<" << typeid(T).name() << ">" << std::endl;
// Grab the element out and check success
dom::element element = result.first;
2020-04-03 03:14:29 +08:00
RUN_TEST( tester.test_get_t(element) );
RUN_TEST( tester.test_get_t(result) );
2020-06-20 02:59:13 +08:00
RUN_TEST( tester.test_get(element) );
RUN_TEST( tester.test_get(result) );
2020-06-20 03:10:40 +08:00
RUN_TEST( tester.test_named_get(element) );
RUN_TEST( tester.test_named_get(result) );
2020-06-20 02:59:13 +08:00
RUN_TEST( tester.test_is(element, true) );
RUN_TEST( tester.test_is(result, true) );
2020-06-20 03:10:40 +08:00
RUN_TEST( tester.test_named_is(element, true) );
RUN_TEST( tester.test_named_is(result, true) );
#if SIMDJSON_EXCEPTIONS
2020-06-20 02:59:13 +08:00
RUN_TEST( tester.test_implicit_cast(element) );
RUN_TEST( tester.test_implicit_cast(result) );
#endif
return true;
}
2020-06-20 02:59:13 +08:00
//
// Test that we get errors when we cast to the wrong type
//
template<typename T>
2020-06-20 02:59:13 +08:00
bool test_cast_error(simdjson_result<dom::element> result, simdjson::error_code expected_error) {
std::cout << " test_cast_error<" << typeid(T).name() << "> expecting error '" << expected_error << "'" << std::endl;
dom::element element = result.first;
2020-04-03 03:14:29 +08:00
2020-06-20 02:59:13 +08:00
cast_tester<T> tester;
2020-06-20 02:59:13 +08:00
RUN_TEST( tester.test_get_error(element, expected_error) );
RUN_TEST( tester.test_get_error(result, expected_error) );
2020-06-20 03:10:40 +08:00
RUN_TEST( tester.test_named_get_error(element, expected_error) );
RUN_TEST( tester.test_named_get_error(result, expected_error) );
2020-06-20 02:59:13 +08:00
RUN_TEST( tester.test_is(element, false) );
RUN_TEST( tester.test_is(result, false) );
2020-06-20 03:10:40 +08:00
RUN_TEST( tester.test_named_is(element, false) );
RUN_TEST( tester.test_named_is(result, false) );
#if SIMDJSON_EXCEPTIONS
2020-06-20 02:59:13 +08:00
RUN_TEST( tester.test_implicit_cast_error(element, expected_error) );
RUN_TEST( tester.test_implicit_cast_error(result, expected_error) );
#endif
2020-04-03 03:14:29 +08:00
return true;
}
bool test_type(simdjson_result<dom::element> result, dom::element_type expected_type) {
std::cout << " test_type() expecting " << expected_type << std::endl;
dom::element element = result.first;
dom::element_type actual_type;
2020-06-21 03:04:23 +08:00
auto error = result.type().get(actual_type);
2020-04-03 03:14:29 +08:00
ASSERT_SUCCESS(error);
ASSERT_EQUAL(actual_type, expected_type);
2020-04-03 03:14:29 +08:00
actual_type = element.type();
2020-04-03 03:14:29 +08:00
ASSERT_SUCCESS(error);
ASSERT_EQUAL(actual_type, expected_type);
#if SIMDJSON_EXCEPTIONS
try {
actual_type = result.type();
ASSERT_EQUAL(actual_type, expected_type);
} catch(simdjson_error &e) {
std::cerr << e.error() << std::endl;
return false;
}
#endif // SIMDJSON_EXCEPTIONS
2020-04-03 03:14:29 +08:00
return true;
}
bool test_is_null(simdjson_result<dom::element> result, bool expected_is_null) {
std::cout << " test_is_null() expecting " << expected_is_null << std::endl;
// Grab the element out and check success
dom::element element = result.first;
bool actual_is_null;
2020-06-21 03:04:23 +08:00
auto error = result.is_null().get(actual_is_null);
2020-04-03 03:14:29 +08:00
ASSERT_SUCCESS(error);
ASSERT_EQUAL(actual_is_null, expected_is_null);
2020-04-03 03:14:29 +08:00
actual_is_null = element.is_null();
ASSERT_EQUAL(actual_is_null, expected_is_null);
#if SIMDJSON_EXCEPTIONS
try {
actual_is_null = result.is_null();
ASSERT_EQUAL(actual_is_null, expected_is_null);
} catch(simdjson_error &e) {
std::cerr << e.error() << std::endl;
return false;
}
#endif // SIMDJSON_EXCEPTIONS
2020-04-03 03:14:29 +08:00
return true;
}
bool cast_array() {
std::cout << "Running " << __func__ << std::endl;
2020-04-03 03:14:29 +08:00
dom::parser parser;
simdjson_result<dom::element> result = parser.parse(ALL_TYPES_JSON)["array"];
return true
&& test_type(result, dom::element_type::ARRAY)
&& test_cast<dom::array>(result)
2020-06-20 02:59:13 +08:00
&& test_cast_error<dom::object>(result, INCORRECT_TYPE)
&& test_cast_error<std::string_view>(result, INCORRECT_TYPE)
&& test_cast_error<const char *>(result, INCORRECT_TYPE)
&& test_cast_error<int64_t>(result, INCORRECT_TYPE)
&& test_cast_error<uint64_t>(result, INCORRECT_TYPE)
&& test_cast_error<double>(result, INCORRECT_TYPE)
&& test_cast_error<bool>(result, INCORRECT_TYPE)
&& test_is_null(result, false);
}
2020-04-03 03:14:29 +08:00
bool cast_object() {
std::cout << "Running " << __func__ << std::endl;
2020-04-03 03:14:29 +08:00
dom::parser parser;
simdjson_result<dom::element> result = parser.parse(ALL_TYPES_JSON)["object"];
return true
&& test_type(result, dom::element_type::OBJECT)
2020-06-20 02:59:13 +08:00
&& test_cast_error<dom::array>(result, INCORRECT_TYPE)
&& test_cast<dom::object>(result)
2020-06-20 02:59:13 +08:00
&& test_cast_error<std::string_view>(result, INCORRECT_TYPE)
&& test_cast_error<const char *>(result, INCORRECT_TYPE)
&& test_cast_error<int64_t>(result, INCORRECT_TYPE)
&& test_cast_error<uint64_t>(result, INCORRECT_TYPE)
&& test_cast_error<double>(result, INCORRECT_TYPE)
&& test_cast_error<bool>(result, INCORRECT_TYPE)
&& test_is_null(result, false);
2020-04-03 03:14:29 +08:00
}
bool cast_string() {
2020-04-03 03:14:29 +08:00
std::cout << "Running " << __func__ << std::endl;
dom::parser parser;
simdjson_result<dom::element> result = parser.parse(ALL_TYPES_JSON)["string"];
return true
&& test_type(result, dom::element_type::STRING)
2020-06-20 02:59:13 +08:00
&& test_cast_error<dom::array>(result, INCORRECT_TYPE)
&& test_cast_error<dom::object>(result, INCORRECT_TYPE)
&& test_cast<std::string_view>(result, "foo")
&& test_cast<const char *>(result, "foo")
2020-06-20 02:59:13 +08:00
&& test_cast_error<int64_t>(result, INCORRECT_TYPE)
&& test_cast_error<uint64_t>(result, INCORRECT_TYPE)
&& test_cast_error<double>(result, INCORRECT_TYPE)
&& test_cast_error<bool>(result, INCORRECT_TYPE)
&& test_is_null(result, false);
}
bool cast_int64(const char *key, int64_t expected_value) {
std::cout << "Running " << __func__ << "(" << key << ")" << std::endl;
2020-04-03 03:14:29 +08:00
dom::parser parser;
simdjson_result<dom::element> result = parser.parse(ALL_TYPES_JSON)[key];
return true
&& test_type(result, dom::element_type::INT64)
2020-06-20 02:59:13 +08:00
&& test_cast_error<dom::array>(result, INCORRECT_TYPE)
&& test_cast_error<dom::object>(result, INCORRECT_TYPE)
&& test_cast_error<std::string_view>(result, INCORRECT_TYPE)
&& test_cast_error<const char *>(result, INCORRECT_TYPE)
&& test_cast<int64_t>(result, expected_value)
&& (expected_value >= 0 ?
test_cast<uint64_t>(result, expected_value) :
2020-06-20 02:59:13 +08:00
test_cast_error<uint64_t>(result, NUMBER_OUT_OF_RANGE))
&& test_cast<double>(result, static_cast<double>(expected_value))
2020-06-20 02:59:13 +08:00
&& test_cast_error<bool>(result, INCORRECT_TYPE)
&& test_is_null(result, false);
}
2020-04-03 03:14:29 +08:00
bool cast_uint64(const char *key, uint64_t expected_value) {
std::cout << "Running " << __func__ << "(" << key << ")" << std::endl;
2020-04-03 03:14:29 +08:00
dom::parser parser;
simdjson_result<dom::element> result = parser.parse(ALL_TYPES_JSON)[key];
2020-04-03 03:14:29 +08:00
return true
&& test_type(result, dom::element_type::UINT64)
2020-06-20 02:59:13 +08:00
&& test_cast_error<dom::array>(result, INCORRECT_TYPE)
&& test_cast_error<dom::object>(result, INCORRECT_TYPE)
&& test_cast_error<std::string_view>(result, INCORRECT_TYPE)
&& test_cast_error<const char *>(result, INCORRECT_TYPE)
&& test_cast_error<int64_t>(result, NUMBER_OUT_OF_RANGE)
&& test_cast<uint64_t>(result, expected_value)
&& test_cast<double>(result, static_cast<double>(expected_value))
2020-06-20 02:59:13 +08:00
&& test_cast_error<bool>(result, INCORRECT_TYPE)
&& test_is_null(result, false);
}
bool cast_double(const char *key, double expected_value) {
std::cout << "Running " << __func__ << "(" << key << ")" << std::endl;
dom::parser parser;
simdjson_result<dom::element> result = parser.parse(ALL_TYPES_JSON)[key];
return true
&& test_type(result, dom::element_type::DOUBLE)
2020-06-20 02:59:13 +08:00
&& test_cast_error<dom::array>(result, INCORRECT_TYPE)
&& test_cast_error<dom::object>(result, INCORRECT_TYPE)
&& test_cast_error<std::string_view>(result, INCORRECT_TYPE)
&& test_cast_error<const char *>(result, INCORRECT_TYPE)
&& test_cast_error<int64_t>(result, INCORRECT_TYPE)
&& test_cast_error<uint64_t>(result, INCORRECT_TYPE)
&& test_cast<double>(result, expected_value)
2020-06-20 02:59:13 +08:00
&& test_cast_error<bool>(result, INCORRECT_TYPE)
&& test_is_null(result, false);
}
bool cast_bool(const char *key, bool expected_value) {
std::cout << "Running " << __func__ << "(" << key << ")" << std::endl;
dom::parser parser;
simdjson_result<dom::element> result = parser.parse(ALL_TYPES_JSON)[key];
return true
&& test_type(result, dom::element_type::BOOL)
2020-06-20 02:59:13 +08:00
&& test_cast_error<dom::array>(result, INCORRECT_TYPE)
&& test_cast_error<dom::object>(result, INCORRECT_TYPE)
&& test_cast_error<std::string_view>(result, INCORRECT_TYPE)
&& test_cast_error<const char *>(result, INCORRECT_TYPE)
&& test_cast_error<int64_t>(result, INCORRECT_TYPE)
&& test_cast_error<uint64_t>(result, INCORRECT_TYPE)
&& test_cast_error<double>(result, INCORRECT_TYPE)
&& test_cast<bool>(result, expected_value)
&& test_is_null(result, false);
}
bool cast_null() {
std::cout << "Running " << __func__ << std::endl;
dom::parser parser;
simdjson_result<dom::element> result = parser.parse(ALL_TYPES_JSON)["null"];
return true
&& test_type(result, dom::element_type::NULL_VALUE)
2020-06-20 02:59:13 +08:00
&& test_cast_error<dom::array>(result, INCORRECT_TYPE)
&& test_cast_error<dom::object>(result, INCORRECT_TYPE)
&& test_cast_error<std::string_view>(result, INCORRECT_TYPE)
&& test_cast_error<const char *>(result, INCORRECT_TYPE)
&& test_cast_error<int64_t>(result, INCORRECT_TYPE)
&& test_cast_error<uint64_t>(result, INCORRECT_TYPE)
&& test_cast_error<double>(result, INCORRECT_TYPE)
&& test_cast_error<bool>(result, INCORRECT_TYPE)
&& test_is_null(result, true);
2020-04-03 03:14:29 +08:00
}
bool run() {
return cast_array() &&
2020-04-03 03:14:29 +08:00
cast_object() &&
2020-04-03 03:14:29 +08:00
cast_string() &&
2020-04-03 03:14:29 +08:00
cast_int64("0", 0) &&
cast_int64("1", 1) &&
cast_int64("-1", -1) &&
cast_int64("9223372036854775807", 9223372036854775807LL) &&
cast_int64("-9223372036854775808", -1 - 9223372036854775807LL) &&
2020-04-03 03:14:29 +08:00
cast_uint64("9223372036854775808", 9223372036854775808ULL) &&
cast_uint64("18446744073709551615", 18446744073709551615ULL) &&
2020-04-03 03:14:29 +08:00
cast_double("0.0", 0.0) &&
cast_double("0.1", 0.1) &&
cast_double("1e0", 1e0) &&
cast_double("1e100", 1e100) &&
2020-04-03 03:14:29 +08:00
cast_bool("true", true) &&
cast_bool("false", false) &&
2020-04-03 03:14:29 +08:00
cast_null() &&
2020-04-03 03:14:29 +08:00
true;
}
}
2020-06-12 01:07:18 +08:00
namespace minify_tests {
2020-06-12 01:07:18 +08:00
bool check_minification(const char * input, size_t length, const char * expected, size_t expected_length) {
2020-06-12 01:20:28 +08:00
std::unique_ptr<char[]> buffer{new(std::nothrow) char[length + simdjson::SIMDJSON_PADDING]};
2020-06-12 01:07:18 +08:00
if(buffer.get() == nullptr) {
std::cerr << "cannot alloc " << std::endl;
return false;
}
size_t newlength{};
ASSERT_SUCCESS( simdjson::minify(input, length, buffer.get(), newlength) );
ASSERT_EQUAL( newlength, expected_length);
2020-06-12 01:07:18 +08:00
for(size_t i = 0; i < newlength; i++) {
ASSERT_EQUAL( buffer.get()[i], expected[i]);
2020-06-12 01:07:18 +08:00
}
return true;
}
bool test_minify() {
2020-06-12 01:07:18 +08:00
std::cout << "Running " << __func__ << std::endl;
const std::string test = R"({ "foo" : 1, "bar" : [ 1, 2, 3 ], "baz": { "a": 1, "b": 2, "c": 3 } })";
const std::string minified(R"({"foo":1,"bar":[1,2,3],"baz":{"a":1,"b":2,"c":3}})");
return check_minification(test.c_str(), test.size(), minified.c_str(), minified.size());
}
bool test_minify_array() {
2020-06-12 01:07:18 +08:00
std::cout << "Running " << __func__ << std::endl;
std::string test("[ 1, 2, 3]");
std::string minified("[1,2,3]");
return check_minification(test.c_str(), test.size(), minified.c_str(), minified.size());
}
bool test_minify_object() {
2020-06-12 01:07:18 +08:00
std::cout << "Running " << __func__ << std::endl;
std::string test(R"({ "foo " : 1, "b ar" : [ 1, 2, 3 ], "baz": { "a": 1, "b": 2, "c": 3 } })");
std::string minified(R"({"foo ":1,"b ar":[1,2,3],"baz":{"a":1,"b":2,"c":3}})");
return check_minification(test.c_str(), test.size(), minified.c_str(), minified.size());
}
bool run() {
return test_minify() &&
test_minify_array() &&
test_minify_object();
2020-06-12 01:07:18 +08:00
}
}
namespace format_tests {
using namespace simdjson;
2020-03-29 02:43:41 +08:00
using namespace simdjson::dom;
using namespace std;
2020-03-29 02:43:41 +08:00
const padded_string DOCUMENT = R"({ "foo" : 1, "bar" : [ 1, 2, 3 ], "baz": { "a": 1, "b": 2, "c": 3 } })"_padded;
const string MINIFIED(R"({"foo":1,"bar":[1,2,3],"baz":{"a":1,"b":2,"c":3}})");
bool assert_minified(ostringstream &actual, const std::string &expected=MINIFIED) {
if (actual.str() != expected) {
cerr << "Failed to correctly minify " << DOCUMENT << endl;
cerr << "Expected: " << expected << endl;
cerr << "Actual: " << actual.str() << endl;
return false;
}
return true;
}
bool print_parser_parse() {
std::cout << "Running " << __func__ << std::endl;
2020-03-29 02:43:41 +08:00
dom::parser parser;
dom::element doc;
ASSERT_SUCCESS( parser.parse(DOCUMENT).get(doc) );
ostringstream s;
2020-03-15 06:23:56 +08:00
s << doc;
return assert_minified(s);
}
bool print_minify_parser_parse() {
std::cout << "Running " << __func__ << std::endl;
2020-03-29 02:43:41 +08:00
dom::parser parser;
dom::element doc;
ASSERT_SUCCESS( parser.parse(DOCUMENT).get(doc) );
ostringstream s;
2020-03-15 06:23:56 +08:00
s << minify(doc);
return assert_minified(s);
}
2020-03-15 06:23:56 +08:00
bool print_element() {
std::cout << "Running " << __func__ << std::endl;
2020-03-29 02:43:41 +08:00
dom::parser parser;
dom::element value;
ASSERT_SUCCESS( parser.parse(DOCUMENT)["foo"].get(value) );
ostringstream s;
2020-03-15 06:23:56 +08:00
s << value;
return assert_minified(s, "1");
}
bool print_minify_element() {
std::cout << "Running " << __func__ << std::endl;
2020-03-29 02:43:41 +08:00
dom::parser parser;
dom::element value;
ASSERT_SUCCESS( parser.parse(DOCUMENT)["foo"].get(value) );
2020-03-15 06:23:56 +08:00
ostringstream s;
s << minify(value);
return assert_minified(s, "1");
}
bool print_array() {
std::cout << "Running " << __func__ << std::endl;
2020-03-29 02:43:41 +08:00
dom::parser parser;
dom::array array;
ASSERT_SUCCESS( parser.parse(DOCUMENT)["bar"].get(array) );
2020-03-15 06:23:56 +08:00
ostringstream s;
s << array;
2020-03-15 06:23:56 +08:00
return assert_minified(s, "[1,2,3]");
}
bool print_minify_array() {
std::cout << "Running " << __func__ << std::endl;
2020-03-29 02:43:41 +08:00
dom::parser parser;
dom::array array;
ASSERT_SUCCESS( parser.parse(DOCUMENT)["bar"].get(array) );
2020-03-15 06:23:56 +08:00
ostringstream s;
s << minify(array);
2020-03-15 06:23:56 +08:00
return assert_minified(s, "[1,2,3]");
}
bool print_object() {
std::cout << "Running " << __func__ << std::endl;
2020-03-29 02:43:41 +08:00
dom::parser parser;
dom::object object;
ASSERT_SUCCESS( parser.parse(DOCUMENT)["baz"].get(object) );
2020-03-15 06:23:56 +08:00
ostringstream s;
s << object;
2020-03-15 06:23:56 +08:00
return assert_minified(s, R"({"a":1,"b":2,"c":3})");
}
bool print_minify_object() {
std::cout << "Running " << __func__ << std::endl;
2020-03-29 02:43:41 +08:00
dom::parser parser;
dom::object object;
ASSERT_SUCCESS( parser.parse(DOCUMENT)["baz"].get(object) );
2020-03-15 06:23:56 +08:00
ostringstream s;
s << minify(object);
2020-03-15 06:23:56 +08:00
return assert_minified(s, R"({"a":1,"b":2,"c":3})");
}
#if SIMDJSON_EXCEPTIONS
bool print_parser_parse_exception() {
std::cout << "Running " << __func__ << std::endl;
2020-03-29 02:43:41 +08:00
dom::parser parser;
ostringstream s;
2020-03-15 06:23:56 +08:00
s << parser.parse(DOCUMENT);
return assert_minified(s);
}
2020-03-15 06:23:56 +08:00
bool print_minify_parser_parse_exception() {
std::cout << "Running " << __func__ << std::endl;
2020-03-29 02:43:41 +08:00
dom::parser parser;
ostringstream s;
2020-03-15 06:23:56 +08:00
s << minify(parser.parse(DOCUMENT));
return assert_minified(s);
}
2020-03-15 06:23:56 +08:00
bool print_element_result_exception() {
std::cout << "Running " << __func__ << std::endl;
2020-03-29 02:43:41 +08:00
dom::parser parser;
ostringstream s;
s << parser.parse(DOCUMENT)["foo"];
return assert_minified(s, "1");
}
2020-03-15 06:23:56 +08:00
bool print_minify_element_result_exception() {
std::cout << "Running " << __func__ << std::endl;
2020-03-29 02:43:41 +08:00
dom::parser parser;
ostringstream s;
s << minify(parser.parse(DOCUMENT)["foo"]);
return assert_minified(s, "1");
}
2020-03-15 06:23:56 +08:00
bool print_element_exception() {
std::cout << "Running " << __func__ << std::endl;
2020-03-29 02:43:41 +08:00
dom::parser parser;
element value = parser.parse(DOCUMENT)["foo"];
ostringstream s;
s << value;
return assert_minified(s, "1");
}
2020-03-15 06:23:56 +08:00
bool print_minify_element_exception() {
std::cout << "Running " << __func__ << std::endl;
2020-03-29 02:43:41 +08:00
dom::parser parser;
element value = parser.parse(DOCUMENT)["foo"];
ostringstream s;
s << minify(value);
return assert_minified(s, "1");
}
2020-03-15 06:23:56 +08:00
bool print_array_result_exception() {
std::cout << "Running " << __func__ << std::endl;
2020-03-29 02:43:41 +08:00
dom::parser parser;
ostringstream s;
s << parser.parse(DOCUMENT)["bar"].get<dom::array>();
return assert_minified(s, "[1,2,3]");
}
2020-03-15 06:23:56 +08:00
bool print_minify_array_result_exception() {
std::cout << "Running " << __func__ << std::endl;
2020-03-29 02:43:41 +08:00
dom::parser parser;
ostringstream s;
s << minify(parser.parse(DOCUMENT)["bar"].get<dom::array>());
return assert_minified(s, "[1,2,3]");
}
2020-03-15 06:23:56 +08:00
bool print_object_result_exception() {
std::cout << "Running " << __func__ << std::endl;
2020-03-29 02:43:41 +08:00
dom::parser parser;
ostringstream s;
s << parser.parse(DOCUMENT)["baz"].get<dom::object>();
return assert_minified(s, R"({"a":1,"b":2,"c":3})");
}
2020-03-15 06:23:56 +08:00
bool print_minify_object_result_exception() {
std::cout << "Running " << __func__ << std::endl;
2020-03-29 02:43:41 +08:00
dom::parser parser;
ostringstream s;
s << minify(parser.parse(DOCUMENT)["baz"].get<dom::object>());
return assert_minified(s, R"({"a":1,"b":2,"c":3})");
}
2020-03-15 06:23:56 +08:00
bool print_array_exception() {
std::cout << "Running " << __func__ << std::endl;
2020-03-29 02:43:41 +08:00
dom::parser parser;
dom::array array = parser.parse(DOCUMENT)["bar"];
ostringstream s;
s << array;
return assert_minified(s, "[1,2,3]");
}
2020-03-15 06:23:56 +08:00
bool print_minify_array_exception() {
std::cout << "Running " << __func__ << std::endl;
2020-03-29 02:43:41 +08:00
dom::parser parser;
dom::array array = parser.parse(DOCUMENT)["bar"];
ostringstream s;
s << minify(array);
return assert_minified(s, "[1,2,3]");
}
2020-03-15 06:23:56 +08:00
bool print_object_exception() {
std::cout << "Running " << __func__ << std::endl;
2020-03-29 02:43:41 +08:00
dom::parser parser;
dom::object object = parser.parse(DOCUMENT)["baz"];
ostringstream s;
s << object;
return assert_minified(s, R"({"a":1,"b":2,"c":3})");
}
2020-03-15 06:23:56 +08:00
bool print_minify_object_exception() {
std::cout << "Running " << __func__ << std::endl;
2020-03-29 02:43:41 +08:00
dom::parser parser;
dom::object object = parser.parse(DOCUMENT)["baz"];
ostringstream s;
s << minify(object);
return assert_minified(s, R"({"a":1,"b":2,"c":3})");
}
#endif // SIMDJSON_EXCEPTIONS
bool run() {
return print_parser_parse() && print_minify_parser_parse() &&
print_element() && print_minify_element() &&
print_array() && print_minify_array() &&
print_object() && print_minify_object() &&
2020-03-15 06:23:56 +08:00
#if SIMDJSON_EXCEPTIONS
print_parser_parse_exception() && print_minify_parser_parse_exception() &&
print_element_result_exception() && print_minify_element_result_exception() &&
print_array_result_exception() && print_minify_array_result_exception() &&
print_object_result_exception() && print_minify_object_result_exception() &&
print_element_exception() && print_minify_element_exception() &&
print_array_exception() && print_minify_array_exception() &&
print_object_exception() && print_minify_object_exception() &&
#endif
true;
}
}
int main(int argc, char *argv[]) {
std::cout << std::unitbuf;
int c;
while ((c = getopt(argc, argv, "a:")) != -1) {
switch (c) {
case 'a': {
const simdjson::implementation *impl = simdjson::available_implementations[optarg];
if (!impl) {
fprintf(stderr, "Unsupported architecture value -a %s\n", optarg);
return EXIT_FAILURE;
}
simdjson::active_implementation = impl;
break;
}
default:
fprintf(stderr, "Unexpected argument %c\n", c);
return EXIT_FAILURE;
}
}
// this is put here deliberately to check that the documentation is correct (README),
// should this fail to compile, you should update the documentation:
if (simdjson::active_implementation->name() == "unsupported") {
printf("unsupported CPU\n");
}
std::cout << "Running basic tests." << std::endl;
if (minify_tests::run() &&
2020-06-12 01:07:18 +08:00
parse_api_tests::run() &&
dom_api_tests::run() &&
2020-04-03 03:14:29 +08:00
type_tests::run() &&
format_tests::run() &&
document_tests::run() &&
number_tests::run() &&
2020-04-06 03:00:43 +08:00
document_stream_tests::run()
) {
std::cout << "Basic tests are ok." << std::endl;
return EXIT_SUCCESS;
} else {
return EXIT_FAILURE;
}
}