2018-12-25 01:28:27 +08:00
|
|
|
#include <iostream>
|
2019-07-31 05:18:10 +08:00
|
|
|
#ifndef _MSC_VER
|
2018-12-25 01:28:27 +08:00
|
|
|
#include <unistd.h>
|
2018-12-31 10:00:19 +08:00
|
|
|
#endif
|
2018-12-25 01:28:27 +08:00
|
|
|
#include "simdjson/jsonioutil.h"
|
|
|
|
#include "simdjson/jsonparser.h"
|
|
|
|
#ifdef __linux__
|
|
|
|
#include "linux-perf-events.h"
|
|
|
|
#endif
|
|
|
|
|
2018-12-28 09:09:25 +08:00
|
|
|
size_t count_nonasciibytes(const uint8_t *input, size_t length) {
|
2018-12-25 01:28:27 +08:00
|
|
|
size_t count = 0;
|
|
|
|
for (size_t i = 0; i < length; i++) {
|
|
|
|
count += input[i] >> 7;
|
|
|
|
}
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
2018-12-28 09:09:25 +08:00
|
|
|
size_t count_backslash(const uint8_t *input, size_t length) {
|
2018-12-25 01:28:27 +08:00
|
|
|
size_t count = 0;
|
|
|
|
for (size_t i = 0; i < length; i++) {
|
|
|
|
count += (input[i] == '\\') ? 1 : 0;
|
|
|
|
}
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct stat_s {
|
|
|
|
size_t integer_count;
|
|
|
|
size_t float_count;
|
|
|
|
size_t string_count;
|
|
|
|
size_t backslash_count;
|
2019-07-31 05:18:10 +08:00
|
|
|
size_t non_ascii_byte_count;
|
2018-12-25 01:28:27 +08:00
|
|
|
size_t object_count;
|
|
|
|
size_t array_count;
|
|
|
|
size_t null_count;
|
|
|
|
size_t true_count;
|
|
|
|
size_t false_count;
|
|
|
|
size_t byte_count;
|
|
|
|
size_t structural_indexes_count;
|
|
|
|
bool valid;
|
|
|
|
};
|
|
|
|
|
2019-02-24 00:28:20 +08:00
|
|
|
using stat_t = struct stat_s;
|
2018-12-25 01:28:27 +08:00
|
|
|
|
2019-07-31 05:18:10 +08:00
|
|
|
stat_t simdjson_compute_stats(const simdjson::padded_string &p) {
|
2018-12-25 01:28:27 +08:00
|
|
|
stat_t answer;
|
2019-07-03 03:21:00 +08:00
|
|
|
simdjson::ParsedJson pj = simdjson::build_parsed_json(p);
|
2019-07-31 05:18:10 +08:00
|
|
|
answer.valid = pj.is_valid();
|
2018-12-25 01:28:27 +08:00
|
|
|
if (!answer.valid) {
|
|
|
|
return answer;
|
|
|
|
}
|
2019-07-31 05:18:10 +08:00
|
|
|
answer.backslash_count =
|
|
|
|
count_backslash(reinterpret_cast<const uint8_t *>(p.data()), p.size());
|
|
|
|
answer.non_ascii_byte_count = count_nonasciibytes(
|
|
|
|
reinterpret_cast<const uint8_t *>(p.data()), p.size());
|
2018-12-25 01:28:27 +08:00
|
|
|
answer.byte_count = p.size();
|
|
|
|
answer.integer_count = 0;
|
|
|
|
answer.float_count = 0;
|
|
|
|
answer.object_count = 0;
|
|
|
|
answer.array_count = 0;
|
|
|
|
answer.null_count = 0;
|
|
|
|
answer.true_count = 0;
|
|
|
|
answer.false_count = 0;
|
|
|
|
answer.string_count = 0;
|
|
|
|
answer.structural_indexes_count = pj.n_structural_indexes;
|
2019-07-31 05:18:10 +08:00
|
|
|
size_t tape_idx = 0;
|
2020-02-08 02:02:36 +08:00
|
|
|
uint64_t tape_val = pj.doc.tape[tape_idx++];
|
2018-12-28 09:09:25 +08:00
|
|
|
uint8_t type = (tape_val >> 56);
|
2019-07-31 05:18:10 +08:00
|
|
|
size_t how_many = 0;
|
2018-12-25 01:28:27 +08:00
|
|
|
assert(type == 'r');
|
2019-07-31 05:18:10 +08:00
|
|
|
how_many = tape_val & JSON_VALUE_MASK;
|
|
|
|
for (; tape_idx < how_many; tape_idx++) {
|
2020-02-08 02:02:36 +08:00
|
|
|
tape_val = pj.doc.tape[tape_idx];
|
2019-07-31 05:18:10 +08:00
|
|
|
// uint64_t payload = tape_val & JSON_VALUE_MASK;
|
2018-12-25 01:28:27 +08:00
|
|
|
type = (tape_val >> 56);
|
|
|
|
switch (type) {
|
|
|
|
case 'l': // we have a long int
|
|
|
|
answer.integer_count++;
|
2019-07-31 05:18:10 +08:00
|
|
|
tape_idx++; // skipping the integer
|
2018-12-25 01:28:27 +08:00
|
|
|
break;
|
2019-09-02 22:50:24 +08:00
|
|
|
case 'u': // we have a long uint
|
|
|
|
answer.integer_count++;
|
|
|
|
tape_idx++; // skipping the integer
|
|
|
|
break;
|
2018-12-25 01:28:27 +08:00
|
|
|
case 'd': // we have a double
|
|
|
|
answer.float_count++;
|
2019-07-31 05:18:10 +08:00
|
|
|
tape_idx++; // skipping the double
|
2018-12-25 01:28:27 +08:00
|
|
|
break;
|
|
|
|
case 'n': // we have a null
|
|
|
|
answer.null_count++;
|
|
|
|
break;
|
|
|
|
case 't': // we have a true
|
|
|
|
answer.true_count++;
|
|
|
|
break;
|
|
|
|
case 'f': // we have a false
|
|
|
|
answer.false_count++;
|
|
|
|
break;
|
|
|
|
case '{': // we have an object
|
|
|
|
answer.object_count++;
|
|
|
|
break;
|
|
|
|
case '}': // we end an object
|
|
|
|
break;
|
|
|
|
case '[': // we start an array
|
|
|
|
answer.array_count++;
|
|
|
|
break;
|
|
|
|
case ']': // we end an array
|
|
|
|
break;
|
|
|
|
case '"': // we have a string
|
|
|
|
answer.string_count++;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break; // ignore
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return answer;
|
|
|
|
}
|
|
|
|
|
|
|
|
int main(int argc, char *argv[]) {
|
2018-12-31 10:00:19 +08:00
|
|
|
#ifndef _MSC_VER
|
2019-07-31 05:18:10 +08:00
|
|
|
int c;
|
|
|
|
while ((c = getopt(argc, argv, "")) != -1) {
|
2018-12-25 01:28:27 +08:00
|
|
|
switch (c) {
|
|
|
|
|
|
|
|
default:
|
|
|
|
abort();
|
|
|
|
}
|
2019-07-31 05:18:10 +08:00
|
|
|
}
|
2018-12-31 10:00:19 +08:00
|
|
|
#else
|
|
|
|
int optind = 1;
|
|
|
|
#endif
|
2018-12-25 01:28:27 +08:00
|
|
|
if (optind >= argc) {
|
2019-05-10 05:59:51 +08:00
|
|
|
std::cerr << "Reads json, prints stats. " << std::endl;
|
|
|
|
std::cerr << "Usage: " << argv[0] << " <jsonfile>" << std::endl;
|
2018-12-25 01:28:27 +08:00
|
|
|
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
const char *filename = argv[optind];
|
|
|
|
if (optind + 1 < argc) {
|
|
|
|
std::cerr << "warning: ignoring everything after " << argv[optind + 1]
|
|
|
|
<< std::endl;
|
|
|
|
}
|
2019-07-03 03:21:00 +08:00
|
|
|
simdjson::padded_string p;
|
2018-12-25 01:28:27 +08:00
|
|
|
try {
|
2019-07-03 03:21:00 +08:00
|
|
|
simdjson::get_corpus(filename).swap(p);
|
2019-08-01 05:43:45 +08:00
|
|
|
} catch (const std::exception &) { // caught by reference to base
|
2018-12-25 01:28:27 +08:00
|
|
|
std::cerr << "Could not load the file " << filename << std::endl;
|
|
|
|
return EXIT_FAILURE;
|
|
|
|
}
|
2019-07-31 05:18:10 +08:00
|
|
|
stat_t s = simdjson_compute_stats(p);
|
2018-12-25 01:28:27 +08:00
|
|
|
if (!s.valid) {
|
|
|
|
std::cerr << "not a valid JSON" << std::endl;
|
|
|
|
return EXIT_FAILURE;
|
|
|
|
}
|
|
|
|
|
|
|
|
printf("# integer_count float_count string_count backslash_count "
|
2019-07-31 05:18:10 +08:00
|
|
|
"non_ascii_byte_count object_count array_count null_count true_count "
|
2018-12-25 01:28:27 +08:00
|
|
|
"false_count byte_count structural_indexes_count ");
|
|
|
|
#ifdef __linux__
|
2019-07-31 05:18:10 +08:00
|
|
|
printf(" stage1_cycle_count stage1_instruction_count stage2_cycle_count "
|
|
|
|
" stage2_instruction_count stage3_cycle_count "
|
|
|
|
"stage3_instruction_count ");
|
2018-12-25 01:28:27 +08:00
|
|
|
#else
|
|
|
|
printf("(you are not under linux, so perf counters are disaabled)");
|
|
|
|
#endif
|
|
|
|
printf("\n");
|
2018-12-25 02:18:19 +08:00
|
|
|
printf("%zu %zu %zu %zu %zu %zu %zu %zu %zu %zu %zu %zu ", s.integer_count,
|
2019-07-31 05:18:10 +08:00
|
|
|
s.float_count, s.string_count, s.backslash_count,
|
|
|
|
s.non_ascii_byte_count, s.object_count, s.array_count, s.null_count,
|
|
|
|
s.true_count, s.false_count, s.byte_count, s.structural_indexes_count);
|
2018-12-25 01:28:27 +08:00
|
|
|
#ifdef __linux__
|
2019-07-03 03:21:00 +08:00
|
|
|
simdjson::ParsedJson pj;
|
2019-07-31 05:18:10 +08:00
|
|
|
bool allocok = pj.allocate_capacity(p.size());
|
2018-12-25 01:28:27 +08:00
|
|
|
if (!allocok) {
|
|
|
|
std::cerr << "failed to allocate memory" << std::endl;
|
|
|
|
return EXIT_FAILURE;
|
|
|
|
}
|
2018-12-29 02:13:10 +08:00
|
|
|
const uint32_t iterations = p.size() < 1 * 1000 * 1000 ? 1000 : 50;
|
2019-05-10 05:59:51 +08:00
|
|
|
std::vector<int> evts;
|
2018-12-25 01:28:27 +08:00
|
|
|
evts.push_back(PERF_COUNT_HW_CPU_CYCLES);
|
|
|
|
evts.push_back(PERF_COUNT_HW_INSTRUCTIONS);
|
|
|
|
LinuxEvents<PERF_TYPE_HARDWARE> unified(evts);
|
2019-01-03 05:47:35 +08:00
|
|
|
unsigned long cy1 = 0, cy2 = 0;
|
|
|
|
unsigned long cl1 = 0, cl2 = 0;
|
2019-05-10 05:59:51 +08:00
|
|
|
std::vector<unsigned long long> results;
|
2018-12-25 01:36:45 +08:00
|
|
|
results.resize(evts.size());
|
2018-12-29 02:13:10 +08:00
|
|
|
for (uint32_t i = 0; i < iterations; i++) {
|
2018-12-25 01:28:27 +08:00
|
|
|
unified.start();
|
2020-02-15 07:21:28 +08:00
|
|
|
// The default template is simdjson::architecture::NATIVE.
|
2019-07-31 05:18:10 +08:00
|
|
|
bool isok = (simdjson::find_structural_bits<>(p.data(), p.size(), pj) ==
|
|
|
|
simdjson::SUCCESS);
|
2018-12-25 01:28:27 +08:00
|
|
|
unified.end(results);
|
2019-07-31 05:18:10 +08:00
|
|
|
|
2018-12-25 01:28:27 +08:00
|
|
|
cy1 += results[0];
|
|
|
|
cl1 += results[1];
|
2019-07-31 05:18:10 +08:00
|
|
|
|
2018-12-25 01:28:27 +08:00
|
|
|
unified.start();
|
2019-07-31 05:18:10 +08:00
|
|
|
isok =
|
|
|
|
isok && (simdjson::SUCCESS == unified_machine(p.data(), p.size(), pj));
|
2018-12-25 01:28:27 +08:00
|
|
|
unified.end(results);
|
2019-07-31 05:18:10 +08:00
|
|
|
|
2018-12-25 01:28:27 +08:00
|
|
|
cy2 += results[0];
|
|
|
|
cl2 += results[1];
|
2019-07-31 05:18:10 +08:00
|
|
|
if (!isok) {
|
2018-12-25 01:28:27 +08:00
|
|
|
std::cerr << "failure?" << std::endl;
|
|
|
|
}
|
|
|
|
}
|
2019-01-03 05:47:35 +08:00
|
|
|
printf("%f %f %f %f ", cy1 * 1.0 / iterations, cl1 * 1.0 / iterations,
|
|
|
|
cy2 * 1.0 / iterations, cl2 * 1.0 / iterations);
|
2018-12-25 01:28:27 +08:00
|
|
|
#endif // __linux__
|
|
|
|
printf("\n");
|
|
|
|
return EXIT_SUCCESS;
|
|
|
|
}
|