Fixed basictests

This commit is contained in:
Daniel Lemire 2020-03-26 19:40:29 -04:00
parent 8f3ddd3a73
commit abb0bf9247
5 changed files with 174 additions and 137 deletions

View File

@ -413,6 +413,11 @@ public:
really_inline bool is_number() const noexcept;
/** Whether this is a JSON integer (e.g. 1 or -1, but *not* 1.0 or 1e2) */
really_inline bool is_integer() const noexcept;
/** Whether this is a JSON integer in [9223372036854775808, 18446744073709551616)
* that is, a value too large for a signed 64-bit integer, but that still fits
* in a 64-bit word. Note that is_integer() is true when is_unsigned_integer()
* is true.*/
really_inline bool is_unsigned_integer() const noexcept;
/** Whether this is a JSON number but not an integer */
really_inline bool is_float() const noexcept;
/** Whether this is a JSON string (e.g. "abc") */
@ -889,17 +894,33 @@ public:
/**
* Get the value associated with the given key.
*
* Note: The key will be matched against **unescaped** JSON:
*
* document::parser parser;
* parser.parse(R"({ "a\n": 1 })")["a\n"].as_uint64_t().value == 1
* parser.parse(R"({ "a\n": 1 })")["a\\n"].as_uint64_t().error == NO_SUCH_FIELD
* Note: The key will be matched against **unescaped** JSON.
*
* @return The value associated with this field, or:
* - NO_SUCH_FIELD if the field does not exist in the object
*/
inline element_result at_key(const char *s) const noexcept;
/**
* Get the value associated with the given key, the provided key is
* considered to have length characters.
*
* Note: The key will be matched against **unescaped** JSON.
*
* @return The value associated with this field, or:
* - NO_SUCH_FIELD if the field does not exist in the object
*/
inline element_result at_key(const char *s, size_t length) const noexcept;
/**
* Get the value associated with the given key in a case-insensitive manner.
*
* Note: The key will be matched against **unescaped** JSON.
*
* @return The value associated with this field, or:
* - NO_SUCH_FIELD if the field does not exist in the object
*/
inline element_result at_key_case_insensitive(const char *s) const noexcept;
private:
really_inline object(const document *_doc, size_t _json_index) noexcept;
friend class document::element;

View File

@ -783,6 +783,16 @@ inline document::element_result document::object::at(std::string_view json_point
return child;
}
inline document::element_result document::object::at_key(const char *key, size_t length) const noexcept {
iterator end_field = end();
for (iterator field = begin(); field != end_field; ++field) {
std::string_view v{field.key()};
if ((v.size() == length) && (!memcmp(v.data(), key, length))) {
return field.value();
}
}
return NO_SUCH_FIELD;
}
inline document::element_result document::object::at_key(std::string_view key) const noexcept {
iterator end_field = end();
for (iterator field = begin(); field != end_field; ++field) {
@ -801,7 +811,18 @@ inline document::element_result document::object::at_key(const char *key) const
}
return NO_SUCH_FIELD;
}
// In case you wonder why we need this, please see
// https://github.com/simdjson/simdjson/issues/323
// People do seek keys in a case-insensitive manner.
inline document::element_result document::object::at_key_case_insensitive(const char *key) const noexcept {
iterator end_field = end();
for (iterator field = begin(); field != end_field; ++field) {
if (!simdjson_strcasecmp(key, field.key_c_str())) {
return field.value();
}
}
return NO_SUCH_FIELD;
}
//
// document::object::iterator inline implementation
//
@ -859,6 +880,9 @@ really_inline bool document::element::is_float() const noexcept {
really_inline bool document::element::is_integer() const noexcept {
return type() == internal::tape_type::UINT64 || type() == internal::tape_type::INT64;
}
really_inline bool document::element::is_unsigned_integer() const noexcept {
return type() == internal::tape_type::UINT64;
}
really_inline bool document::element::is_string() const noexcept {
return type() == internal::tape_type::STRING;
}

View File

@ -98,6 +98,8 @@ inline const char *padded_string::data() const noexcept { return data_ptr; }
inline char *padded_string::data() noexcept { return data_ptr; }
inline padded_string::operator std::string() const { return std::string(data(), length()); }
inline simdjson_result<padded_string> padded_string::load(const std::string &filename) noexcept {
// Open the file
std::FILE *fp = std::fopen(filename.c_str(), "rb");

View File

@ -89,6 +89,11 @@ struct padded_string final {
**/
char *data() noexcept;
/**
* Create a new std::string with the same content.
*/
operator std::string() const;
/**
* Load this padded string from a file.
*

View File

@ -42,27 +42,26 @@ namespace number_tests {
bool small_integers() {
std::cout << __func__ << std::endl;
char buf[1024];
simdjson::ParsedJson pj;
simdjson::document::parser parser;
for (int m = 10; m < 20; m++) {
for (int i = -1024; i < 1024; i++) {
auto n = sprintf(buf, "%*d", m, i);
buf[n] = '\0';
fflush(NULL);
auto error = simdjson::json_parse(buf, n, pj);
auto [pj, error] = parser.parse(buf, n);
if (error) {
printf("Could not parse '%s': %s\n", buf, simdjson::error_message(error).c_str());
printf("Could not parse '%s': %s\n", buf, simdjson::error_message(error));
return false;
}
simdjson::ParsedJson::Iterator iter(pj);
if(!iter.is_number()) {
if(!pj.root().is_number()) {
printf("Root should be number\n");
return false;
}
if(!iter.is_integer()) {
if(!pj.root().is_integer()) {
printf("Root should be an integer\n");
return false;
}
int64_t x = iter.get_integer();
int64_t x = pj.root().as_int64_t();
if(x != i) {
printf("failed to parse %s. \n", buf);
return false;
@ -77,40 +76,24 @@ namespace number_tests {
bool powers_of_two() {
std::cout << __func__ << std::endl;
char buf[1024];
simdjson::ParsedJson pj;
simdjson::document::parser parser;
int maxulp = 0;
for (int i = -1075; i < 1024; ++i) {// large negative values should be zero.
double expected = pow(2, i);
auto n = sprintf(buf, "%.*e", std::numeric_limits<double>::max_digits10 - 1, expected);
buf[n] = '\0';
fflush(NULL);
auto error = simdjson::json_parse(buf, n, pj);
auto [pj, error] = parser.parse(buf, n);
if (error) {
printf("Could not parse '%s': %s\n", buf, simdjson::error_message(error).c_str());
printf("Could not parse '%s': %s\n", buf, simdjson::error_message(error));
return false;
}
simdjson::ParsedJson::Iterator iter(pj);
if(!iter.is_number()) {
if(!pj.root().is_number()) {
printf("Root should be number\n");
return false;
}
if(iter.is_integer()) {
int64_t x = iter.get_integer();
int power = 0;
while(x > 1) {
if((x % 2) != 0) {
printf("failed to parse %s. \n", buf);
return false;
}
x = x / 2;
power ++;
}
if(power != i) {
printf("failed to parse %s. \n", buf);
return false;
}
} else if(iter.is_unsigned_integer()) {
uint64_t x = iter.get_unsigned_integer();
if(pj.root().is_unsigned_integer()) {
uint64_t x = pj.root().as_uint64_t();
int power = 0;
while(x > 1) {
if((x % 2) != 0) {
@ -124,8 +107,23 @@ namespace number_tests {
printf("failed to parse %s. \n", buf);
return false;
}
} else if(pj.root().is_integer()) {
int64_t x = pj.root().as_int64_t();
int power = 0;
while(x > 1) {
if((x % 2) != 0) {
printf("failed to parse %s. \n", buf);
return false;
}
x = x / 2;
power ++;
}
if(power != i) {
printf("failed to parse %s. \n", buf);
return false;
}
} else {
double x = iter.get_double();
double x = pj.root().as_double();
int ulp = f64_ulp_dist(x,expected);
if(ulp > maxulp) maxulp = ulp;
if(ulp > 0) {
@ -219,33 +217,18 @@ namespace number_tests {
auto n = sprintf(buf,"1e%d", i);
buf[n] = '\0';
fflush(NULL);
auto error = simdjson::json_parse(buf, n, pj);
simdjson::document::parser parser;
auto [pj, error] = parser.parse(buf, n);
if (error) {
printf("Could not parse '%s': %s\n", buf, simdjson::error_message(error).c_str());
printf("Could not parse '%s': %s\n", buf, simdjson::error_message(error));
return false;
}
simdjson::ParsedJson::Iterator iter(pj);
if(!iter.is_number()) {
if(!pj.root().is_number()) {
printf("Root should be number\n");
return false;
}
if(iter.is_integer()) {
int64_t x = iter.get_integer();
int power = 0;
while(x > 1) {
if((x % 10) != 0) {
printf("failed to parse %s. \n", buf);
return false;
}
x = x / 10;
power ++;
}
if(power != i) {
printf("failed to parse %s. \n", buf);
return false;
}
} else if(iter.is_unsigned_integer()) {
uint64_t x = iter.get_unsigned_integer();
if(pj.root().is_unsigned_integer()) {
uint64_t x = pj.root().as_uint64_t();
int power = 0;
while(x > 1) {
if((x % 10) != 0) {
@ -259,8 +242,23 @@ namespace number_tests {
printf("failed to parse %s. \n", buf);
return false;
}
} else {
double x = iter.get_double();
} if(pj.root().is_integer()) {
int64_t x = pj.root().as_int64_t();
int power = 0;
while(x > 1) {
if((x % 10) != 0) {
printf("failed to parse %s. \n", buf);
return false;
}
x = x / 10;
power ++;
}
if(power != i) {
printf("failed to parse %s. \n", buf);
return false;
}
} else {
double x = pj.root().as_double();
double expected = ((i >= -307) ? testing_power_of_ten[i + 307]: std::pow(10, i));
int ulp = (int) f64_ulp_dist(x, expected);
if(ulp > 0) {
@ -285,10 +283,11 @@ namespace document_tests {
// adversarial example that once triggred overruns, see https://github.com/lemire/simdjson/issues/345
bool bad_example() {
std::cout << __func__ << std::endl;
std::string badjson = "[7,7,7,7,6,7,7,7,6,7,7,6,[7,7,7,7,6,7,7,7,6,7,7,6,7,7,7,7,7,7,6";
simdjson::document::parser parser = simdjson::build_parsed_json(badjson);
simdjson::padded_string badjson = "[7,7,7,7,6,7,7,7,6,7,7,6,[7,7,7,7,6,7,7,7,6,7,7,6,7,7,7,7,7,7,6"_padded;
simdjson::document::parser parser;
parser.parse(badjson);
if(parser.is_valid()) {
printf("This json should not be valid %s.\n", badjson.c_str());
printf("This json should not be valid %s.\n", badjson.data());
return false;
}
return true;
@ -296,7 +295,7 @@ namespace document_tests {
// returns true if successful
bool stable_test() {
std::cout << __func__ << std::endl;
std::string json = "{"
simdjson::padded_string json = "{"
"\"Image\":{"
"\"Width\":800,"
"\"Height\":600,"
@ -309,20 +308,18 @@ namespace document_tests {
"\"Animated\":false,"
"\"IDs\":[116,943.3,234,38793]"
"}"
"}";
simdjson::document::parser parser = simdjson::build_parsed_json(json);
"}"_padded;
simdjson::document::parser parser;
auto [pj, error] = parser.parse(json);
std::ostringstream myStream;
if( ! parser.print_json(myStream) ) {
std::cout << "cannot print it out? " << std::endl;
return false;
}
myStream << pj;
std::string newjson = myStream.str();
if(json != newjson) {
if(static_cast<std::string>(json) != newjson) {
std::cout << "serialized json differs!" << std::endl;
std::cout << json << std::endl;
std::cout << static_cast<std::string>(json) << std::endl;
std::cout << newjson << std::endl;
}
return newjson == json;
return newjson == static_cast<std::string>(json);
}
// returns true if successful
bool skyprophet_test() {
@ -363,14 +360,16 @@ namespace document_tests {
fflush(NULL);
}
counter++;
auto ok1 = simdjson::json_parse(rec.c_str(), rec.length(), parser);
if (ok1 != 0 || !parser.is_valid()) {
auto [doc1, res1] = parser.parse(rec.c_str(), rec.length());
if (res1 != simdjson::error_code::SUCCESS) {
printf("Something is wrong in skyprophet_test: %s.\n", rec.c_str());
printf("Parsing failed. Error is %s\n", simdjson::error_message(res1));
return false;
}
auto ok2 = simdjson::json_parse(rec, parser);
if (ok2 != 0 || !parser.is_valid()) {
auto [doc2, res2] = parser.parse(rec.c_str(), rec.length());
if (res2 != simdjson::error_code::SUCCESS) {
printf("Something is wrong in skyprophet_test: %s.\n", rec.c_str());
printf("Parsing failed. Error is %s\n", simdjson::error_message(res2));
return false;
}
}
@ -668,7 +667,7 @@ namespace dom_api_tests {
// returns true if successful
bool document_iterator_test() {
std::cout << "Running " << __func__ << std::endl;
std::string json = "{"
simdjson::padded_string json = "{"
"\"Image\": {"
"\"Width\": 800,"
"\"Height\": 600,"
@ -681,96 +680,82 @@ namespace dom_api_tests {
"\"Animated\" : false,"
"\"IDs\": [116, 943, 234, 38793]"
"}"
"}";
ParsedJson pj = build_parsed_json(json);
if (pj.error) {
printf("Could not parse '%s': %s\n", json.data(), simdjson::error_message(pj.error));
"}"_padded;
simdjson::document::parser parser;
auto [pj, error] = parser.parse(json);
if (error) {
printf("Could not parse '%s': %s\n", json.data(), simdjson::error_message(error));
return false;
}
simdjson::ParsedJson::Iterator iter(pj);
if(!iter.is_object()) {
if(!pj.root().is_object()) {
printf("Root should be object\n");
return false;
}
if(iter.move_to_key("bad key")) {
auto [object, err] = pj.root().as_object();
if(err) {
printf("can't convert to object?\n");
return false;
}
auto [b1, e1] = object.at_key("bad key");
if(!e1) {
printf("We should not move to a non-existing key\n");
return false;
}
if(!iter.is_object()) {
printf("We should have remained at the object.\n");
return false;
}
if(iter.move_to_key_insensitive("bad key")) {
auto [b2, e2] = object.at_key_case_insensitive("bad key");
if(!e2) {
printf("We should not move to a non-existing key\n");
return false;
}
if(!iter.is_object()) {
if(!pj.root().is_object()) {
printf("We should have remained at the object.\n");
return false;
}
if(iter.move_to_key("bad key", 7)) {
auto [b3, e3] = object.at_key("bad key", 7);
if(!e3) {
printf("We should not move to a non-existing key\n");
return false;
}
if(!iter.is_object()) {
if(!pj.root().is_object()) {
printf("We should have remained at the object.\n");
return false;
}
if(!iter.down()) {
printf("Root should not be emtpy\n");
return false;
}
if(!iter.is_string()) {
printf("Object should start with string key\n");
return false;
}
if(iter.prev()) {
printf("We should not be able to go back from the start of the scope.\n");
return false;
}
if(strcmp(iter.get_string(),"Image")!=0) {
printf("There should be a single key, image.\n");
return false;
}
iter.move_to_value();
if(!iter.is_object()) {
auto img_element = object.at_key("Image");
if(!img_element.get().is_object()) {
printf("Value of image should be object\n");
return false;
}
if(!iter.down()) {
printf("Image key should not be emtpy\n");
auto root_object = pj.root().as_object();
auto img_object = img_element.get().as_object();
size_t co = 0;
for(auto [k,v]: root_object) {
co++;
if(strcmp(k.data(),"Image")!=0) {
printf("There should be a single key, Image.\n");
return false;
}
}
if( co != 1 ) {
printf("There should be a single key, Image, I found %zu.\n", co);
return false;
}
if(!iter.next()) {
printf("key should have a value\n");
if( img_object.at_key("Width").as_int64_t() != 800) {
printf("There should be a Width element and its value should be 800\n");
return false;
}
if(!iter.prev()) {
printf("We should go back to the key.\n");
auto ids_element = img_object.at_key("IDs");
if(!ids_element.get().is_array()) {
printf("Value of IDs should be array.\n");
return false;
}
if(strcmp(iter.get_string(),"Width")!=0) {
printf("There should be a key Width.\n");
return false;
std::vector<int64_t> val;
for(int64_t v : ids_element.as_array()) {
val.push_back(v);
}
if(!iter.up()) {
return false;
}
if(!iter.move_to_key("IDs")) {
printf("We should be able to move to an existing key\n");
return false;
}
if(!iter.is_array()) {
printf("Value of IDs should be array, it is %c \n", iter.get_type());
return false;
}
if(iter.move_to_index(4)) {
printf("We should not be able to move to a non-existing index\n");
return false;
}
if(!iter.is_array()) {
printf("We should have remained at the array\n");
std::vector<int64_t> tv = {116, 943, 234, 38793};
if(val != tv) {
printf("The values do not match.\n");
return false;
}
return true;