2018-08-21 05:51:38 +08:00
|
|
|
#include "jsonparser/common_defs.h"
|
2018-08-21 05:27:25 +08:00
|
|
|
#include "linux-perf-events.h"
|
|
|
|
#include <algorithm>
|
|
|
|
#include <assert.h>
|
|
|
|
#include <chrono>
|
|
|
|
#include <cstring>
|
2018-07-26 10:59:40 +08:00
|
|
|
#include <dirent.h>
|
2018-08-21 05:27:25 +08:00
|
|
|
#include <fstream>
|
2018-07-26 10:59:40 +08:00
|
|
|
#include <inttypes.h>
|
2018-08-21 05:27:25 +08:00
|
|
|
#include <iomanip>
|
|
|
|
#include <iostream>
|
|
|
|
#include <map>
|
|
|
|
#include <set>
|
|
|
|
#include <sstream>
|
2018-07-26 10:59:40 +08:00
|
|
|
#include <stdbool.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
2018-03-23 12:05:32 +08:00
|
|
|
#include <string>
|
2018-08-21 05:27:25 +08:00
|
|
|
#include <unistd.h>
|
2018-03-23 12:05:32 +08:00
|
|
|
#include <vector>
|
|
|
|
#include <x86intrin.h>
|
2018-11-10 10:31:14 +08:00
|
|
|
#include <ctype.h>
|
2018-07-29 09:13:09 +08:00
|
|
|
|
2018-05-31 10:46:28 +08:00
|
|
|
//#define DEBUG
|
2018-11-10 10:31:14 +08:00
|
|
|
#include "jsonparser/jsonparser.h"
|
2018-08-21 05:51:38 +08:00
|
|
|
#include "jsonparser/jsonioutil.h"
|
|
|
|
#include "jsonparser/simdjson_internal.h"
|
|
|
|
#include "jsonparser/stage1_find_marks.h"
|
|
|
|
#include "jsonparser/stage2_flatten.h"
|
2018-09-24 08:42:30 +08:00
|
|
|
#include "jsonparser/stage34_unified.h"
|
2018-08-07 15:24:05 +08:00
|
|
|
using namespace std;
|
2018-03-23 12:05:32 +08:00
|
|
|
|
2018-04-06 22:53:51 +08:00
|
|
|
// https://stackoverflow.com/questions/2616906/how-do-i-output-coloured-text-to-a-linux-terminal
|
|
|
|
namespace Color {
|
2018-08-21 05:27:25 +08:00
|
|
|
enum Code {
|
|
|
|
FG_DEFAULT = 39,
|
|
|
|
FG_BLACK = 30,
|
|
|
|
FG_RED = 31,
|
|
|
|
FG_GREEN = 32,
|
|
|
|
FG_YELLOW = 33,
|
|
|
|
FG_BLUE = 34,
|
|
|
|
FG_MAGENTA = 35,
|
|
|
|
FG_CYAN = 36,
|
|
|
|
FG_LIGHT_GRAY = 37,
|
|
|
|
FG_DARK_GRAY = 90,
|
|
|
|
FG_LIGHT_RED = 91,
|
|
|
|
FG_LIGHT_GREEN = 92,
|
|
|
|
FG_LIGHT_YELLOW = 93,
|
|
|
|
FG_LIGHT_BLUE = 94,
|
|
|
|
FG_LIGHT_MAGENTA = 95,
|
|
|
|
FG_LIGHT_CYAN = 96,
|
|
|
|
FG_WHITE = 97,
|
|
|
|
BG_RED = 41,
|
|
|
|
BG_GREEN = 42,
|
|
|
|
BG_BLUE = 44,
|
|
|
|
BG_DEFAULT = 49
|
|
|
|
};
|
|
|
|
class Modifier {
|
|
|
|
Code code;
|
|
|
|
|
|
|
|
public:
|
|
|
|
Modifier(Code pCode) : code(pCode) {}
|
|
|
|
friend std::ostream &operator<<(std::ostream &os, const Modifier &mod) {
|
|
|
|
return os << "\033[" << mod.code << "m";
|
|
|
|
}
|
|
|
|
};
|
|
|
|
} // namespace Color
|
2018-04-06 22:53:51 +08:00
|
|
|
|
2018-08-21 05:27:25 +08:00
|
|
|
void colorfuldisplay(ParsedJson &pj, const u8 *buf) {
|
|
|
|
Color::Modifier greenfg(Color::FG_GREEN);
|
|
|
|
Color::Modifier yellowfg(Color::FG_YELLOW);
|
|
|
|
Color::Modifier deffg(Color::FG_DEFAULT);
|
|
|
|
size_t i = 0;
|
|
|
|
// skip initial fluff
|
|
|
|
while ((i + 1 < pj.n_structural_indexes) &&
|
|
|
|
(pj.structural_indexes[i] == pj.structural_indexes[i + 1])) {
|
|
|
|
i++;
|
|
|
|
}
|
|
|
|
for (; i < pj.n_structural_indexes; i++) {
|
|
|
|
u32 idx = pj.structural_indexes[i];
|
|
|
|
u8 c = buf[idx];
|
|
|
|
if (((c & 0xdf) == 0x5b)) { // meaning 7b or 5b, { or [
|
|
|
|
std::cout << greenfg << buf[idx] << deffg;
|
|
|
|
} else if (((c & 0xdf) == 0x5d)) { // meaning 7d or 5d, } or ]
|
|
|
|
std::cout << greenfg << buf[idx] << deffg;
|
|
|
|
} else {
|
|
|
|
std::cout << yellowfg << buf[idx] << deffg;
|
2018-04-06 22:53:51 +08:00
|
|
|
}
|
2018-08-21 05:27:25 +08:00
|
|
|
if (i + 1 < pj.n_structural_indexes) {
|
|
|
|
u32 nextidx = pj.structural_indexes[i + 1];
|
|
|
|
for (u32 pos = idx + 1; pos < nextidx; pos++) {
|
|
|
|
std::cout << buf[pos];
|
|
|
|
}
|
2018-04-06 22:53:51 +08:00
|
|
|
}
|
2018-08-21 05:27:25 +08:00
|
|
|
}
|
|
|
|
std::cout << std::endl;
|
2018-04-06 22:53:51 +08:00
|
|
|
}
|
2018-07-26 10:59:40 +08:00
|
|
|
|
2018-08-21 05:27:25 +08:00
|
|
|
int main(int argc, char *argv[]) {
|
2018-11-10 10:31:14 +08:00
|
|
|
bool verbose = false;
|
|
|
|
int c;
|
|
|
|
|
|
|
|
while ((c = getopt (argc, argv, "v")) != -1)
|
|
|
|
switch (c)
|
|
|
|
{
|
|
|
|
case 'v':
|
|
|
|
verbose = true;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
abort ();
|
|
|
|
}
|
|
|
|
if (optind >= argc) {
|
2018-08-21 05:27:25 +08:00
|
|
|
cerr << "Usage: " << argv[0] << " <jsonfile>" << endl;
|
|
|
|
exit(1);
|
|
|
|
}
|
2018-11-10 10:31:14 +08:00
|
|
|
const char * filename = argv[optind];
|
|
|
|
if(optind + 1 < argc) {
|
|
|
|
cerr << "warning: ignoring everything after " << argv[optind + 1] << endl;
|
2018-08-21 05:27:25 +08:00
|
|
|
}
|
2018-11-10 10:31:14 +08:00
|
|
|
if(verbose) cout << "[verbose] loading " << filename << endl;
|
|
|
|
pair<u8 *, size_t> p = get_corpus(filename);
|
|
|
|
if(verbose) cout << "[verbose] loaded " << filename << " ("<< p.second << " bytes)" << endl;
|
|
|
|
ParsedJson *pj_ptr = allocate_ParsedJson(p.second);
|
|
|
|
ParsedJson &pj(*pj_ptr);
|
|
|
|
if(verbose) cout << "[verbose] allocated memory for parsed JSON " << endl;
|
2018-03-23 12:05:32 +08:00
|
|
|
|
2018-07-14 10:22:30 +08:00
|
|
|
#if defined(DEBUG)
|
2018-08-21 05:27:25 +08:00
|
|
|
const u32 iterations = 1;
|
2018-03-23 12:05:32 +08:00
|
|
|
#else
|
2018-11-10 10:31:14 +08:00
|
|
|
const u32 iterations = p.second < 1 * 1000 * 1000? 1000 : 10;
|
2018-03-23 12:05:32 +08:00
|
|
|
#endif
|
2018-08-21 05:27:25 +08:00
|
|
|
vector<double> res;
|
|
|
|
res.resize(iterations);
|
2018-04-26 09:36:07 +08:00
|
|
|
|
2018-05-07 19:33:23 +08:00
|
|
|
#if !defined(__linux__)
|
|
|
|
#define SQUASH_COUNTERS
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef SQUASH_COUNTERS
|
2018-08-21 05:27:25 +08:00
|
|
|
vector<int> evts;
|
|
|
|
evts.push_back(PERF_COUNT_HW_CPU_CYCLES);
|
|
|
|
evts.push_back(PERF_COUNT_HW_INSTRUCTIONS);
|
2018-10-04 21:47:34 +08:00
|
|
|
evts.push_back(PERF_COUNT_HW_BRANCH_MISSES);
|
2018-08-21 05:27:25 +08:00
|
|
|
LinuxEvents<PERF_TYPE_HARDWARE> unified(evts);
|
|
|
|
vector<u64> results;
|
|
|
|
results.resize(evts.size());
|
2018-09-26 13:22:55 +08:00
|
|
|
unsigned long cy1 = 0, cy2 = 0, cy3 = 0;
|
|
|
|
unsigned long cl1 = 0, cl2 = 0, cl3 = 0;
|
2018-10-04 21:47:34 +08:00
|
|
|
unsigned long mis1 = 0, mis2 = 0, mis3 = 0;
|
2018-04-26 09:36:07 +08:00
|
|
|
#endif
|
2018-08-21 05:27:25 +08:00
|
|
|
bool isok = true;
|
2018-11-10 10:31:14 +08:00
|
|
|
|
2018-08-21 05:27:25 +08:00
|
|
|
for (u32 i = 0; i < iterations; i++) {
|
2018-11-10 10:31:14 +08:00
|
|
|
if(verbose) cout << "[verbose] iteration # " << i << endl;
|
2018-08-21 05:27:25 +08:00
|
|
|
auto start = std::chrono::steady_clock::now();
|
2018-05-07 19:33:23 +08:00
|
|
|
#ifndef SQUASH_COUNTERS
|
2018-08-21 05:27:25 +08:00
|
|
|
unified.start();
|
2018-04-26 09:36:07 +08:00
|
|
|
#endif
|
2018-08-21 05:27:25 +08:00
|
|
|
isok = find_structural_bits(p.first, p.second, pj);
|
2018-05-07 19:33:23 +08:00
|
|
|
#ifndef SQUASH_COUNTERS
|
2018-08-21 05:27:25 +08:00
|
|
|
unified.end(results);
|
|
|
|
cy1 += results[0];
|
|
|
|
cl1 += results[1];
|
2018-10-04 21:47:34 +08:00
|
|
|
mis1 += results[2];
|
2018-09-24 08:42:30 +08:00
|
|
|
if (!isok) {
|
2018-09-24 08:54:29 +08:00
|
|
|
cout << "Failed out during stage 1\n";
|
2018-08-21 05:27:25 +08:00
|
|
|
break;
|
2018-09-24 08:42:30 +08:00
|
|
|
}
|
2018-08-21 05:27:25 +08:00
|
|
|
unified.start();
|
2018-04-26 09:36:07 +08:00
|
|
|
#endif
|
2018-08-21 05:27:25 +08:00
|
|
|
isok = flatten_indexes(p.second, pj);
|
2018-05-07 19:33:23 +08:00
|
|
|
#ifndef SQUASH_COUNTERS
|
2018-08-21 05:27:25 +08:00
|
|
|
unified.end(results);
|
|
|
|
cy2 += results[0];
|
|
|
|
cl2 += results[1];
|
2018-10-04 21:47:34 +08:00
|
|
|
mis2 += results[2];
|
2018-09-24 08:42:30 +08:00
|
|
|
if (!isok) {
|
2018-09-24 08:54:29 +08:00
|
|
|
cout << "Failed out during stage 2\n";
|
2018-08-21 05:27:25 +08:00
|
|
|
break;
|
2018-09-24 08:42:30 +08:00
|
|
|
}
|
2018-08-21 05:27:25 +08:00
|
|
|
unified.start();
|
2018-04-26 09:36:07 +08:00
|
|
|
#endif
|
2018-09-24 08:42:30 +08:00
|
|
|
|
|
|
|
isok = unified_machine(p.first, p.second, pj);
|
|
|
|
#ifndef SQUASH_COUNTERS
|
|
|
|
unified.end(results);
|
|
|
|
cy3 += results[0];
|
|
|
|
cl3 += results[1];
|
2018-10-04 21:47:34 +08:00
|
|
|
mis3 += results[2];
|
2018-09-24 08:42:30 +08:00
|
|
|
if (!isok) {
|
2018-09-24 08:54:29 +08:00
|
|
|
cout << "Failed out during stage 34\n";
|
2018-08-21 05:27:25 +08:00
|
|
|
break;
|
2018-09-24 08:42:30 +08:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2018-08-21 05:27:25 +08:00
|
|
|
auto end = std::chrono::steady_clock::now();
|
|
|
|
std::chrono::duration<double> secs = end - start;
|
|
|
|
res[i] = secs.count();
|
|
|
|
}
|
2018-08-18 07:57:31 +08:00
|
|
|
|
2018-05-07 19:33:23 +08:00
|
|
|
#ifndef SQUASH_COUNTERS
|
2018-08-21 05:27:25 +08:00
|
|
|
printf("number of bytes %ld number of structural chars %d ratio %.3f\n",
|
|
|
|
p.second, pj.n_structural_indexes,
|
|
|
|
(double)pj.n_structural_indexes / p.second);
|
2018-09-26 13:22:55 +08:00
|
|
|
unsigned long total = cy1 + cy2 + cy3;
|
2018-08-21 05:27:25 +08:00
|
|
|
|
|
|
|
printf(
|
2018-10-04 21:47:34 +08:00
|
|
|
"stage 1 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: %.2f mis. branches: %10lu (cycles/mis.branch %.2f) \n",
|
|
|
|
cl1 / iterations, cy1 / iterations, 100. * cy1 / total, (double)cl1 / cy1, mis1/iterations, (double)cy1/mis1);
|
2018-08-21 05:27:25 +08:00
|
|
|
printf(" stage 1 runs at %.2f cycles per input byte.\n",
|
|
|
|
(double)cy1 / (iterations * p.second));
|
|
|
|
|
|
|
|
printf(
|
2018-10-04 21:47:34 +08:00
|
|
|
"stage 2 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: %.2f mis. branches: %10lu (cycles/mis.branch %.2f) \n",
|
|
|
|
cl2 / iterations, cy2 / iterations, 100. * cy2 / total, (double)cl2 / cy2, mis2/iterations, (double)cy2/mis2);
|
2018-08-21 05:27:25 +08:00
|
|
|
printf(" stage 2 runs at %.2f cycles per input byte and ",
|
|
|
|
(double)cy2 / (iterations * p.second));
|
|
|
|
printf("%.2f cycles per structural character.\n",
|
|
|
|
(double)cy2 / (iterations * pj.n_structural_indexes));
|
|
|
|
|
|
|
|
printf(
|
2018-10-04 21:47:34 +08:00
|
|
|
"stage 3 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: %.2f mis. branches: %10lu (cycles/mis.branch %.2f)\n",
|
|
|
|
cl3 / iterations, cy3 /iterations, 100. * cy3 / total, (double)cl3 / cy3, mis3/iterations, (double)cy3/mis3);
|
2018-08-21 05:27:25 +08:00
|
|
|
printf(" stage 3 runs at %.2f cycles per input byte and ",
|
|
|
|
(double)cy3 / (iterations * p.second));
|
|
|
|
printf("%.2f cycles per structural character.\n",
|
|
|
|
(double)cy3 / (iterations * pj.n_structural_indexes));
|
|
|
|
|
|
|
|
printf(" all stages: %.2f cycles per input byte.\n",
|
|
|
|
(double)total / (iterations * p.second));
|
2018-04-26 09:36:07 +08:00
|
|
|
#endif
|
2018-08-21 05:27:25 +08:00
|
|
|
// colorfuldisplay(pj, p.first);
|
|
|
|
double min_result = *min_element(res.begin(), res.end());
|
|
|
|
cout << "Min: " << min_result << " bytes read: " << p.second
|
|
|
|
<< " Gigabytes/second: " << (p.second) / (min_result * 1000000000.0)
|
|
|
|
<< "\n";
|
|
|
|
|
|
|
|
free(p.first);
|
2018-11-10 10:31:14 +08:00
|
|
|
deallocate_ParsedJson(pj_ptr);
|
2018-08-21 05:27:25 +08:00
|
|
|
if (!isok) {
|
|
|
|
printf(" Parsing failed. \n ");
|
|
|
|
return EXIT_FAILURE;
|
|
|
|
}
|
|
|
|
return EXIT_SUCCESS;
|
2018-03-23 12:05:32 +08:00
|
|
|
}
|