diff --git a/benchmark/distinctuseridcompetition.cpp b/benchmark/distinctuseridcompetition.cpp index 28382045..f3adb356 100644 --- a/benchmark/distinctuseridcompetition.cpp +++ b/benchmark/distinctuseridcompetition.cpp @@ -30,28 +30,74 @@ void print_vec(const std::vector &v) { std::cout << std::endl; } -void simdjson_scan(std::vector &answer, simdjson::ParsedJson::Iterator i) { - while (i.move_forward()) { - if (i.get_scope_type() == '{') { - bool found_user = (i.get_string_length() == 4) && - (memcmp(i.get_string(), "user", 4) == 0); - i.move_to_value(); - if (found_user) { - if (i.is_object() && i.move_to_key("id", 2)) { - if (i.is_integer()) { - answer.push_back(i.get_integer()); +// clang-format off + +// simdjson_recurse below come be implemented like so but it is slow: +/*void simdjson_recurse(std::vector & v, simdjson::document::element element) { + if (element.is_array()) { + auto [array, array_error] = element.as_array(); + for (auto child : array) { + if (child.is_array() || child.is_object()) { + simdjson_recurse(v, child); + } + } + } else if (element.is_object()) { + auto [object, error] = element.as_object(); + int64_t id; + object["user"]["id"].as_int64_t().tie(id,error); + if(!error) { + v.push_back(id); + } + for (auto [key, value] : object) { + if (value.is_array() || value.is_object()) { + simdjson_recurse(v, value); + } + } + } +}*/ +// clang-format on + + +void simdjson_recurse(std::vector & v, simdjson::document::element element) { + if (element.is_array()) { + auto array = element.as_array(); + for (auto child : array) { + if (child.is_array() || child.is_object()) { + simdjson_recurse(v, child); + } + } + } else if (element.is_object()) { + auto object = element.as_object(); + for (auto [key, value] : object) { + if((key.size() == 4) && (memcmp(key.data(), "user", 4) == 0)) { + // we are in an object under the key "user" + if(value.is_object()) { + auto child_object = value.as_object(); + for (auto [child_key, child_value] : child_object) { + if((child_key.size() == 2) && (memcmp(child_key.data(), "id", 2) == 0)) { + if(child_value.is_integer()) { + v.push_back(child_value.as_int64_t()); + } + } + if (child_value.is_array() || child_value.is_object()) { + simdjson_recurse(v, child_value); + } } - i.up(); + } else if (value.is_array()) { + simdjson_recurse(v, value); } + // end of: we are in an object under the key "user" + } else if (value.is_array() || value.is_object()) { + simdjson_recurse(v, value); } } } } __attribute__((noinline)) std::vector -simdjson_just_dom(simdjson::ParsedJson &pj) { +simdjson_just_dom(simdjson::document &doc) { std::vector answer; - simdjson_scan(answer, pj); + simdjson_recurse(answer, doc.root()); remove_duplicates(answer); return answer; } @@ -59,15 +105,16 @@ simdjson_just_dom(simdjson::ParsedJson &pj) { __attribute__((noinline)) std::vector simdjson_compute_stats(const simdjson::padded_string &p) { std::vector answer; - ParsedJson pj = simdjson::build_parsed_json(p); - simdjson_scan(answer, pj); + simdjson::document::parser parser; + simdjson::document &doc = parser.parse(p); + simdjson_recurse(answer, doc.root()); remove_duplicates(answer); return answer; } __attribute__((noinline)) bool simdjson_just_parse(const simdjson::padded_string &p) { - return simdjson::document::parse(p).error() != simdjson::SUCCESS; + return simdjson::document::parse(p).error() == simdjson::SUCCESS; } void sajson_traverse(std::vector &answer, const sajson::value &node) { @@ -307,7 +354,7 @@ int main(int argc, char *argv[]) { printf( "name cycles_per_byte cycles_per_byte_err gb_per_s gb_per_s_err \n"); } - BEST_TIME("simdjson ", simdjson_compute_stats(p).size(), size, , repeat, + BEST_TIME("simdjson ", simdjson_compute_stats(p).size(), size, , repeat, volume, !just_data); BEST_TIME("rapid ", rapid_compute_stats(p).size(), size, , repeat, volume, !just_data); @@ -319,10 +366,12 @@ int main(int argc, char *argv[]) { volume, !just_data); BEST_TIME("sasjon (just parse) ", sasjon_just_parse(p), false, , repeat, volume, !just_data); - ParsedJson dsimdjson = build_parsed_json(p); - BEST_TIME("simdjson (just dom) ", simdjson_just_dom(dsimdjson).size(), size, + simdjson::document::parser parser; + simdjson::document &doc = parser.parse(p); + BEST_TIME("simdjson (just dom)", simdjson_just_dom(doc).size(), size, , repeat, volume, !just_data); - char *buffer = (char *)malloc(p.size()); + char *buffer = (char *)malloc(p.size() + 1); + buffer[p.size()] = '\0'; memcpy(buffer, p.data(), p.size()); rapidjson::Document drapid; drapid.ParseInsitu(buffer);