Instead of emulating the whole parsing as stage 1 + stage 2, let us benchmark the real thing. (#441)

* Instead of emulating the whole parsing as stage 1 + stage 2, let us benchmark the real thing. * Adding explicit constructor. * Adding warning to the benchmark user. * Making re-running optional.
2020-01-11 10:14:22 -05:00 · 2020-01-11 10:14:22 -05:00 · f97b655f02
parent 1498b78342
commit f97b655f02
4 changed files with 89 additions and 27 deletions
--- a/benchmark/benchmarker.h
+++ b/benchmark/benchmarker.h
@ -280,7 +280,7 @@ struct benchmarker {
    return all_stages.iterations;
  }

-  really_inline void run_iteration(bool stage1_only=false) {
+  really_inline void run_iteration(bool stage1_only, bool rerunbothstages) {
    // Allocate ParsedJson
    collector.start();
    ParsedJson pj;
@ -303,34 +303,50 @@ struct benchmarker {
      exit_error(string("Failed to parse ") + filename + " during stage 1: " + pj.get_error_message());
    }

-    // Stage 2 (unified machine)
-    event_count stage2_count;
-    if (!stage1_only || stats == NULL) {
-      if (!stage1_only) {
-        collector.start();
-      }
-      result = parser.stage2((const uint8_t *)json.data(), json.size(), pj);
-      if (!stage1_only) {
-        stage2_count = collector.end();
-        stage2 << stage2_count;
-      }
+    // Stage 2 (unified machine) and the rest

+    if (stage1_only) {
+      all_stages << stage1_count;
+    } else {
+      event_count stage2_count;
+      collector.start();
+      result = parser.stage2((const uint8_t *)json.data(), json.size(), pj);
      if (result != simdjson::SUCCESS) {
-        exit_error(string("Failed to parse ") + filename + " during stage 2: " + pj.get_error_message());
+        exit_error(string("Failed to parse ") + filename + " during stage 2 parsing " + pj.get_error_message());
+      }
+      stage2_count = collector.end();
+      stage2 << stage2_count;
+      if(rerunbothstages) {
+        // You would think that the entire processing is just stage 1 + stage 2, but
+        // empirically, that's not true! Not even close to be true in some instances.
+        event_count allstages_count;
+        collector.start();
+        result = parser.parse((const uint8_t *)json.data(), json.size(), pj);
+        if (result != simdjson::SUCCESS) {
+          exit_error(string("Failed to parse ") + filename + " during overall parsing " + pj.get_error_message());
+        }
+        allstages_count = collector.end();
+        all_stages << allstages_count;
+      } else {
+        // we are optimistic
+        all_stages << stage1_count + stage2_count;
      }
    }
-
-    all_stages << (stage1_count + stage2_count);
-
    // Calculate stats the first time we parse
    if (stats == NULL) {
+      if (stage1_only) { //  we need stage 2 once
+        result = parser.stage2((const uint8_t *)json.data(), json.size(), pj);
+        if (result != simdjson::SUCCESS) {
+          printf("Warning: failed to parse during stage 2. Unable to acquire statistics.\n");
+        }
+      }
      stats = new json_stats(json, pj);
    }
  }

-  really_inline void run_iterations(size_t iterations, bool stage1_only=false) {
+  really_inline void run_iterations(size_t iterations, bool stage1_only, bool rerunbothstages) {
    for (size_t i = 0; i<iterations; i++) {
-      run_iteration(stage1_only);
+      run_iteration(stage1_only, rerunbothstages);
    }
  }

@ -439,6 +455,19 @@ struct benchmarker {
      print_aggregate("|    ", stage1.best);
              printf("|- Stage 2\n");
      print_aggregate("|    ", stage2.best);
+      if (collector.has_events()) {
+        double freq1 = (stage1.best.cycles() / stage1.best.elapsed_sec()) / 1000000000.0;
+        double freq2 = (stage2.best.cycles() / stage2.best.elapsed_sec()) / 1000000000.0;
+        double freqall = (all_stages.best.cycles() / all_stages.best.elapsed_sec()) / 1000000000.0;
+        double freqmin = std::min(freq1, freq2);
+        double freqmax = std::max(freq1, freq2);
+        if((freqall < 0.95 * freqmin) or (freqall > 1.05 * freqmax)) {
+          printf("\nWarning: The processor frequency fluctuates in an expected way!!!\n"
+          "Expect the overall speed not to match stage 1 and stage 2 speeds.\n"
+          "Range for stage 1 and stage 2 : [%.3f GHz, %.3f GHz], overall: %.3f GHz.\n",
+          freqmin, freqmax, freqall);
+        }
+      }
    }
  }
 };
--- a/benchmark/event_counter.h
+++ b/benchmark/event_counter.h
@ -128,6 +128,7 @@ struct event_collector {
    return linux_events.is_working();
  }
 #else
+  event_collector() {}
  bool has_events() {
    return false;
  }
--- a/benchmark/json_parser.h
+++ b/benchmark/json_parser.h
@ -43,6 +43,7 @@ using std::string;

 using stage2_functype = int(const uint8_t *buf, size_t len, ParsedJson &pj);
 using stage1_functype = int(const uint8_t *buf, size_t len, ParsedJson &pj);
+using jsonparse_functype = int(const uint8_t *buf, size_t len, ParsedJson &pj, bool streaming);

 stage1_functype* get_stage1_func(const Architecture architecture) {
  switch (architecture) {
@ -83,30 +84,52 @@ stage2_functype* get_stage2_func(const Architecture architecture) {
  }
 }

+jsonparse_functype* get_jsonparse_func(const Architecture architecture) {
+  switch (architecture) {
+#ifdef IS_X86_64
+  case Architecture::HASWELL:
+    return &json_parse_implementation<Architecture::HASWELL>;
+    break;
+  case Architecture::WESTMERE:
+    return &json_parse_implementation<Architecture::WESTMERE>;
+    break;
+#endif
+#ifdef IS_ARM64
+  case Architecture::ARM64:
+    return &json_parse_implementation<Architecture::ARM64>;
+    break;
+#endif
+  default:
+    std::cerr << "The processor is not supported by simdjson." << std::endl;
+    exit(EXIT_FAILURE);
+  }
+}
+
 struct json_parser {
  const Architecture architecture;
  const stage1_functype *stage1_func;
  const stage2_functype *stage2_func;
+  const jsonparse_functype *jsonparse_func;

  json_parser(const Architecture _architecture) : architecture(_architecture) {
    this->stage1_func = get_stage1_func(architecture);
    this->stage2_func = get_stage2_func(architecture);
+    this->jsonparse_func = get_jsonparse_func(architecture);
  }
  json_parser() : json_parser(find_best_supported_architecture()) {}

  int stage1(const uint8_t *buf, const size_t len, ParsedJson &pj) const {
    return this->stage1_func(buf, len, pj);
  }
+
  int stage2(const uint8_t *buf, const size_t len, ParsedJson &pj) const {
    return this->stage2_func(buf, len, pj);
  }

  int parse(const uint8_t *buf, const size_t len, ParsedJson &pj) const {
-    int result = this->stage1(buf, len, pj);
-    if (result == SUCCESS) {
-      result = this->stage2(buf, len, pj);
-    }
-    return result;
+    // yes, you can construct jsonparse from stage 1 and stage 2,
+    // but why emulate it when we have the real thing?
+    return this->jsonparse_func(buf, len, pj, false);
  }
 };

--- a/benchmark/parse.cpp
+++ b/benchmark/parse.cpp
@ -68,10 +68,14 @@ void print_usage(ostream& out) {
  out << "-t         - Tabbed data output" << endl;
  out << "-v         - Verbose output." << endl;
  out << "-s STAGE   - Stop after the given stage." << endl;
-  out << "             -s stage1 - Stop after find_structural_bits." << endl;
-  out << "             -s all    - Run all stages." << endl;
+  out << "             -s stage1  - Stop after find_structural_bits." << endl;
+  out << "             -s all     - Run all stages." << endl;
+  out << "             -s allfast - Run all stages." << endl;
+
  out << "-a ARCH    - Use the parser with the designated architecture (HASWELL, WESTMERE" << endl;
  out << "             or ARM64). By default, detects best supported architecture." << endl;
+  out << "-o         - Estimate the overall speed as stage 1 + stage 2 instead of a rerun of both" << endl;
+
 }

 void exit_usage(string message) {
@ -91,6 +95,7 @@ struct option_struct {

  bool verbose = false;
  bool tabbed_output = false;
+  bool rerunbothstages = true;

  option_struct(int argc, char **argv) {
    #ifndef _MSC_VER
@ -121,6 +126,10 @@ struct option_struct {
            stage1_only = true;
          } else if (!strcmp(optarg, "all")) {
            stage1_only = false;
+            rerunbothstages = true; // for safety
+          } else if (!strcmp(optarg, "allfast")) {
+            stage1_only = false;
+            rerunbothstages = false;
          } else {
            exit_usage(string("Unsupported option value -s ") + optarg + ": expected -s stage1 or all");
          }
@ -195,7 +204,7 @@ int main(int argc, char *argv[]) {
      // Benchmark each file once per iteration
      for (size_t f=0; f<options.files.size(); f++) {
        verbose() << "[verbose] " << benchmarkers[f]->filename << " iterations #" << iteration << "-" << (iteration+options.iteration_step-1) << endl;
-        benchmarkers[f]->run_iterations(options.iteration_step, true);
+        benchmarkers[f]->run_iterations(options.iteration_step, true, false);
      }
    }
  } else {
@ -204,7 +213,7 @@ int main(int argc, char *argv[]) {
      // Benchmark each file once per iteration
      for (size_t f=0; f<options.files.size(); f++) {
        verbose() << "[verbose] " << benchmarkers[f]->filename << " iterations #" << iteration << "-" << (iteration+options.iteration_step-1) << endl;
-        benchmarkers[f]->run_iterations(options.iteration_step, false);
+        benchmarkers[f]->run_iterations(options.iteration_step, false, options.rerunbothstages);
      }
    }
  }