simdjson/benchmark/parse.cpp

206 lines
6.4 KiB
C++
Raw Normal View History

#include "jsonparser/common_defs.h"
2018-08-21 05:27:25 +08:00
#include "linux-perf-events.h"
#include <algorithm>
#include <assert.h>
#include <chrono>
#include <cstring>
2018-07-26 10:59:40 +08:00
#include <dirent.h>
2018-08-21 05:27:25 +08:00
#include <fstream>
2018-07-26 10:59:40 +08:00
#include <inttypes.h>
2018-08-21 05:27:25 +08:00
#include <iomanip>
#include <iostream>
#include <map>
#include <set>
#include <sstream>
2018-07-26 10:59:40 +08:00
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
2018-03-23 12:05:32 +08:00
#include <string>
2018-08-21 05:27:25 +08:00
#include <unistd.h>
2018-03-23 12:05:32 +08:00
#include <vector>
#include <x86intrin.h>
2018-11-10 10:31:14 +08:00
#include <ctype.h>
2018-05-31 10:46:28 +08:00
//#define DEBUG
2018-11-10 10:31:14 +08:00
#include "jsonparser/jsonparser.h"
#include "jsonparser/jsonioutil.h"
#include "jsonparser/simdjson_internal.h"
#include "jsonparser/stage1_find_marks.h"
#include "jsonparser/stage2_flatten.h"
#include "jsonparser/stage34_unified.h"
2018-08-07 15:24:05 +08:00
using namespace std;
2018-03-23 12:05:32 +08:00
2018-08-21 05:27:25 +08:00
int main(int argc, char *argv[]) {
2018-11-10 10:31:14 +08:00
bool verbose = false;
2018-11-24 11:20:57 +08:00
bool dump = false;
2018-11-28 03:37:59 +08:00
bool forceoneiteration = false;
2018-11-24 11:20:57 +08:00
2018-11-10 10:31:14 +08:00
int c;
2018-11-28 03:37:59 +08:00
while ((c = getopt (argc, argv, "1vd")) != -1)
2018-11-10 10:31:14 +08:00
switch (c)
{
case 'v':
verbose = true;
break;
2018-11-24 11:20:57 +08:00
case 'd':
dump = true;
break;
2018-11-28 03:37:59 +08:00
case '1':
forceoneiteration = true;
break;
2018-11-10 10:31:14 +08:00
default:
abort ();
}
if (optind >= argc) {
2018-08-21 05:27:25 +08:00
cerr << "Usage: " << argv[0] << " <jsonfile>" << endl;
exit(1);
}
2018-11-10 10:31:14 +08:00
const char * filename = argv[optind];
if(optind + 1 < argc) {
cerr << "warning: ignoring everything after " << argv[optind + 1] << endl;
2018-08-21 05:27:25 +08:00
}
2018-11-10 10:31:14 +08:00
if(verbose) cout << "[verbose] loading " << filename << endl;
2018-11-28 03:37:59 +08:00
pair<u8 *, size_t> p;
try {
p = get_corpus(filename);
} catch (const std::exception& e) { // caught by reference to base
std::cout << "Could not load the file " << filename << std::endl;
return EXIT_FAILURE;
}
2018-11-10 10:31:14 +08:00
if(verbose) cout << "[verbose] loaded " << filename << " ("<< p.second << " bytes)" << endl;
2018-11-27 23:10:39 +08:00
ParsedJson *pj_ptr = allocate_ParsedJson(p.second, 1024);
2018-11-10 10:31:14 +08:00
ParsedJson &pj(*pj_ptr);
if(verbose) cout << "[verbose] allocated memory for parsed JSON " << endl;
2018-03-23 12:05:32 +08:00
2018-07-14 10:22:30 +08:00
#if defined(DEBUG)
2018-08-21 05:27:25 +08:00
const u32 iterations = 1;
2018-03-23 12:05:32 +08:00
#else
2018-11-28 03:37:59 +08:00
const u32 iterations = forceoneiteration ? 1 : ( p.second < 1 * 1000 * 1000? 1000 : 10);
2018-03-23 12:05:32 +08:00
#endif
2018-08-21 05:27:25 +08:00
vector<double> res;
res.resize(iterations);
#if !defined(__linux__)
#define SQUASH_COUNTERS
#endif
#ifndef SQUASH_COUNTERS
2018-08-21 05:27:25 +08:00
vector<int> evts;
evts.push_back(PERF_COUNT_HW_CPU_CYCLES);
evts.push_back(PERF_COUNT_HW_INSTRUCTIONS);
2018-10-04 21:47:34 +08:00
evts.push_back(PERF_COUNT_HW_BRANCH_MISSES);
2018-11-28 23:53:57 +08:00
evts.push_back(PERF_COUNT_HW_CACHE_REFERENCES);
evts.push_back(PERF_COUNT_HW_CACHE_MISSES);
2018-08-21 05:27:25 +08:00
LinuxEvents<PERF_TYPE_HARDWARE> unified(evts);
vector<u64> results;
results.resize(evts.size());
2018-09-26 13:22:55 +08:00
unsigned long cy1 = 0, cy2 = 0, cy3 = 0;
unsigned long cl1 = 0, cl2 = 0, cl3 = 0;
2018-10-04 21:47:34 +08:00
unsigned long mis1 = 0, mis2 = 0, mis3 = 0;
2018-11-28 23:53:57 +08:00
unsigned long cref1 = 0, cref2 = 0, cref3 = 0;
unsigned long cmis1 = 0, cmis2 = 0, cmis3 = 0;
#endif
2018-08-21 05:27:25 +08:00
bool isok = true;
2018-11-10 10:31:14 +08:00
2018-08-21 05:27:25 +08:00
for (u32 i = 0; i < iterations; i++) {
2018-11-10 10:31:14 +08:00
if(verbose) cout << "[verbose] iteration # " << i << endl;
2018-08-21 05:27:25 +08:00
auto start = std::chrono::steady_clock::now();
#ifndef SQUASH_COUNTERS
2018-08-21 05:27:25 +08:00
unified.start();
#endif
2018-08-21 05:27:25 +08:00
isok = find_structural_bits(p.first, p.second, pj);
#ifndef SQUASH_COUNTERS
2018-08-21 05:27:25 +08:00
unified.end(results);
cy1 += results[0];
cl1 += results[1];
2018-10-04 21:47:34 +08:00
mis1 += results[2];
2018-11-28 23:53:57 +08:00
cref1 += results[3];
cmis1 += results[4];
if (!isok) {
cout << "Failed out during stage 1\n";
2018-08-21 05:27:25 +08:00
break;
}
2018-08-21 05:27:25 +08:00
unified.start();
#endif
2018-11-28 03:37:59 +08:00
isok = isok && flatten_indexes(p.second, pj);
#ifndef SQUASH_COUNTERS
2018-08-21 05:27:25 +08:00
unified.end(results);
cy2 += results[0];
cl2 += results[1];
2018-10-04 21:47:34 +08:00
mis2 += results[2];
2018-11-28 23:53:57 +08:00
cref2 += results[3];
cmis2 += results[4];
if (!isok) {
cout << "Failed out during stage 2\n";
2018-08-21 05:27:25 +08:00
break;
}
2018-08-21 05:27:25 +08:00
unified.start();
#endif
2018-11-28 03:37:59 +08:00
isok = isok && unified_machine(p.first, p.second, pj);
#ifndef SQUASH_COUNTERS
unified.end(results);
cy3 += results[0];
cl3 += results[1];
2018-10-04 21:47:34 +08:00
mis3 += results[2];
2018-11-28 23:53:57 +08:00
cref3 += results[3];
cmis3 += results[4];
if (!isok) {
cout << "Failed out during stage 34\n";
2018-08-21 05:27:25 +08:00
break;
}
#endif
2018-08-21 05:27:25 +08:00
auto end = std::chrono::steady_clock::now();
std::chrono::duration<double> secs = end - start;
res[i] = secs.count();
}
#ifndef SQUASH_COUNTERS
2018-11-28 04:05:50 +08:00
printf("number of bytes %ld number of structural chars %u ratio %.3f\n",
2018-08-21 05:27:25 +08:00
p.second, pj.n_structural_indexes,
(double)pj.n_structural_indexes / p.second);
2018-09-26 13:22:55 +08:00
unsigned long total = cy1 + cy2 + cy3;
2018-08-21 05:27:25 +08:00
printf(
2018-11-28 23:53:57 +08:00
"stage 1 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: %.2f mis. branches: %10lu (cycles/mis.branch %.2f) cache accesses: %10lu (failure %10lu)\n",
cl1 / iterations, cy1 / iterations, 100. * cy1 / total, (double)cl1 / cy1, mis1/iterations, (double)cy1/mis1, cref1 / iterations, cmis1 / iterations);
2018-08-21 05:27:25 +08:00
printf(" stage 1 runs at %.2f cycles per input byte.\n",
(double)cy1 / (iterations * p.second));
printf(
2018-11-28 23:53:57 +08:00
"stage 2 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: %.2f mis. branches: %10lu (cycles/mis.branch %.2f) cache accesses: %10lu (failure %10lu)\n",
cl2 / iterations, cy2 / iterations, 100. * cy2 / total, (double)cl2 / cy2, mis2/iterations, (double)cy2/mis2, cref2 /iterations, cmis2 / iterations);
2018-08-21 05:27:25 +08:00
printf(" stage 2 runs at %.2f cycles per input byte and ",
(double)cy2 / (iterations * p.second));
printf("%.2f cycles per structural character.\n",
(double)cy2 / (iterations * pj.n_structural_indexes));
printf(
2018-11-28 23:53:57 +08:00
"stage 3 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: %.2f mis. branches: %10lu (cycles/mis.branch %.2f) cache accesses: %10lu (failure %10lu)\n",
cl3 / iterations, cy3 /iterations, 100. * cy3 / total, (double)cl3 / cy3, mis3/iterations, (double)cy3/mis3, cref3 / iterations, cmis3 / iterations);
2018-08-21 05:27:25 +08:00
printf(" stage 3 runs at %.2f cycles per input byte and ",
(double)cy3 / (iterations * p.second));
printf("%.2f cycles per structural character.\n",
(double)cy3 / (iterations * pj.n_structural_indexes));
printf(" all stages: %.2f cycles per input byte.\n",
(double)total / (iterations * p.second));
#endif
2018-08-21 05:27:25 +08:00
// colorfuldisplay(pj, p.first);
double min_result = *min_element(res.begin(), res.end());
cout << "Min: " << min_result << " bytes read: " << p.second
<< " Gigabytes/second: " << (p.second) / (min_result * 1000000000.0)
<< "\n";
2018-11-28 09:42:35 +08:00
if(dump) pj_ptr->printjson();
2018-08-21 05:27:25 +08:00
free(p.first);
2018-11-10 10:31:14 +08:00
deallocate_ParsedJson(pj_ptr);
2018-08-21 05:27:25 +08:00
if (!isok) {
printf(" Parsing failed. \n ");
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
2018-03-23 12:05:32 +08:00
}