#include #include #include "simdjson/jsonioutil.h" #include "simdjson/jsonparser.h" #ifdef __linux__ #include "linux-perf-events.h" #endif using namespace std; size_t count_nonasciibytes(const u8 *input, size_t length) { size_t count = 0; for (size_t i = 0; i < length; i++) { count += input[i] >> 7; } return count; } size_t count_backslash(const u8 *input, size_t length) { size_t count = 0; for (size_t i = 0; i < length; i++) { count += (input[i] == '\\') ? 1 : 0; } return count; } struct stat_s { size_t integer_count; size_t float_count; size_t string_count; size_t backslash_count; size_t nonasciibyte_count; size_t object_count; size_t array_count; size_t null_count; size_t true_count; size_t false_count; size_t byte_count; size_t structural_indexes_count; bool valid; }; typedef struct stat_s stat_t; stat_t simdjson_computestats(const std::string_view &p) { stat_t answer; ParsedJson pj = build_parsed_json(p); answer.valid = pj.isValid(); if (!answer.valid) { return answer; } answer.backslash_count = count_backslash((const u8 *)p.data(), p.size()); answer.nonasciibyte_count = count_nonasciibytes((const u8 *)p.data(), p.size()); answer.byte_count = p.size(); answer.integer_count = 0; answer.float_count = 0; answer.object_count = 0; answer.array_count = 0; answer.null_count = 0; answer.true_count = 0; answer.false_count = 0; answer.string_count = 0; answer.structural_indexes_count = pj.n_structural_indexes; size_t tapeidx = 0; u64 tape_val = pj.tape[tapeidx++]; u8 type = (tape_val >> 56); size_t howmany = 0; assert(type == 'r'); howmany = tape_val & JSONVALUEMASK; for (; tapeidx < howmany; tapeidx++) { tape_val = pj.tape[tapeidx]; // u64 payload = tape_val & JSONVALUEMASK; type = (tape_val >> 56); switch (type) { case 'l': // we have a long int answer.integer_count++; tapeidx++; // skipping the integer break; case 'd': // we have a double answer.float_count++; tapeidx++; // skipping the double break; case 'n': // we have a null answer.null_count++; break; case 't': // we have a true answer.true_count++; break; case 'f': // we have a false answer.false_count++; break; case '{': // we have an object answer.object_count++; break; case '}': // we end an object break; case '[': // we start an array answer.array_count++; break; case ']': // we end an array break; case '"': // we have a string answer.string_count++; break; default: break; // ignore } } return answer; } int main(int argc, char *argv[]) { int c; while ((c = getopt(argc, argv, "")) != -1) switch (c) { default: abort(); } if (optind >= argc) { cerr << "Reads json, prints stats. " << endl; cerr << "Usage: " << argv[0] << " " << endl; exit(1); } const char *filename = argv[optind]; if (optind + 1 < argc) { std::cerr << "warning: ignoring everything after " << argv[optind + 1] << std::endl; } std::string_view p; try { p = get_corpus(filename); } catch (const std::exception &e) { // caught by reference to base std::cerr << "Could not load the file " << filename << std::endl; return EXIT_FAILURE; } stat_t s = simdjson_computestats(p); if (!s.valid) { std::cerr << "not a valid JSON" << std::endl; return EXIT_FAILURE; } printf("# integer_count float_count string_count backslash_count " "nonasciibyte_count object_count array_count null_count true_count " "false_count byte_count structural_indexes_count "); #ifdef __linux__ printf( " stage1_instruction_count stage1_cycle_count stage2_instruction_count " "stage2_cycle_count stage3_instruction_count stage3_cycle_count "); #else printf("(you are not under linux, so perf counters are disaabled)"); #endif printf("\n"); printf("%zu %zu %zu %zu %zu %zu %zu %zu %zu %zu %zu %zu", s.integer_count, s.float_count, s.string_count, s.backslash_count, s.nonasciibyte_count, s.object_count, s.array_count, s.null_count, s.true_count, s.false_count, s.byte_count, s.structural_indexes_count); #ifdef __linux__ ParsedJson pj; bool allocok = pj.allocateCapacity(p.size()); if (!allocok) { std::cerr << "failed to allocate memory" << std::endl; return EXIT_FAILURE; } const u32 iterations = p.size() < 1 * 1000 * 1000 ? 1000 : 50; vector evts; evts.push_back(PERF_COUNT_HW_CPU_CYCLES); evts.push_back(PERF_COUNT_HW_INSTRUCTIONS); LinuxEvents unified(evts); unsigned long cy1 = 0, cy2 = 0, cy3 = 0; unsigned long cl1 = 0, cl2 = 0, cl3 = 0; vector results; results.resize(evts.size()); for (u32 i = 0; i < iterations; i++) { unified.start(); bool isok = find_structural_bits(p.data(), p.size(), pj); unified.end(results); cy1 += results[0]; cl1 += results[1]; unified.start(); isok = isok && flatten_indexes(p.size(), pj); unified.end(results); cy2 += results[0]; cl2 += results[1]; isok = isok && unified_machine(p.data(), p.size(), pj); unified.end(results); cy3 += results[0]; cl3 += results[1]; if(!isok) { std::cerr << "failure?" << std::endl; } } printf("%f %f %f %f %f %f", cy1 * 1.0 / iterations, cl1 * 1.0 / iterations, cy2 * 1.0 / iterations, cl2 * 1.0 / iterations, cy3 * 1.0 / iterations, cl3 * 1.0 / iterations); #endif // __linux__ printf("\n"); return EXIT_SUCCESS; }