check for performance degradation in CI (#270)

* Add -n and -w arguments * Add Dockerfile that compares perf against master * Add checkperf to .drone.yml * Clone from github instead of .git since CI doesn't have .git
2019-08-12 13:03:56 -07:00 · 2019-08-12 13:03:56 -07:00 · 875e2f9d0d
parent 3fb82502f7
commit 875e2f9d0d
7 changed files with 189 additions and 9 deletions
--- a/.drone.yml
+++ b/.drone.yml
@ -12,6 +12,10 @@ steps:
  - make
  - make quiettest
  - make amalgamate
+- name: checkperf
+  image: gcc:8
+  commands:
+  - make checkperf
 ---
 kind: pipeline
 name: arm64
@ -27,6 +31,10 @@ steps:
  - make
  - make quiettest
  - make amalgamate
+- name: checkperf
+  image: gcc:8
+  commands:
+  - make checkperf
 ---
 kind: pipeline
 name: stylecheck
--- a/6
+++ b/6
@ -123,6 +123,12 @@ $(JSON_INCLUDE) $(SAJSON_INCLUDE) $(RAPIDJSON_INCLUDE) $(JSON11_INCLUDE) $(FASTJ
 parse: benchmark/parse.cpp $(HEADERS) $(LIBFILES)
 	$(CXX) $(CXXFLAGS) -o parse $(LIBFILES) benchmark/parse.cpp $(LIBFLAGS)

+perfdiff: benchmark/perfdiff.cpp
+	$(CXX) $(CXXFLAGS) -o perfdiff benchmark/perfdiff.cpp $(LIBFLAGS)
+
+checkperf:
+	bash ./scripts/checkperf.sh
+
 statisticalmodel: benchmark/statisticalmodel.cpp $(HEADERS) $(LIBFILES)
 	$(CXX) $(CXXFLAGS) -o statisticalmodel $(LIBFILES) benchmark/statisticalmodel.cpp $(LIBFLAGS)

--- a/benchmark/CMakeLists.txt
+++ b/benchmark/CMakeLists.txt
@ -6,3 +6,4 @@ target_include_directories(${SIMDJSON_LIB_NAME}

 add_cpp_benchmark(parse)
 add_cpp_benchmark(statisticalmodel)
+add_executable(perfdiff perfdiff.cpp)
--- a/benchmark/Dockerfile
+++ b/benchmark/Dockerfile
@ -0,0 +1,19 @@
+# From the ROOT, run:
+# docker build -t simdjsonbench -f benchmark/Dockerfile . && docker run --privileged -t simdjsonbench
+FROM gcc:8.3
+
+# # Build latest
+# ENV latest_release=v0.2.1
+# WORKDIR /usr/src/$latest_release/
+# RUN git clone --depth 1 https://github.com/lemire/simdjson/ -b $latest_release .
+# RUN make parse
+
+# # Build master
+# WORKDIR /usr/src/master/
+# RUN git clone --depth 1 https://github.com/lemire/simdjson/ .
+# RUN make parse
+
+# Build the current source
+COPY . /usr/src/current/
+WORKDIR /usr/src/current/
+RUN make checkperf
--- a/benchmark/parse.cpp
+++ b/benchmark/parse.cpp
@ -124,11 +124,20 @@ int main(int argc, char *argv[]) {
  bool json_output = false;
  bool force_one_iteration = false;
  bool just_data = false;
+  int32_t iterations = -1;
+  int32_t warmup_iterations = -1;
+
 #ifndef _MSC_VER
  int c;

-  while ((c = getopt(argc, argv, "1vdt")) != -1) {
+  while ((c = getopt(argc, argv, "1vdtn:w:")) != -1) {
    switch (c) {
+    case 'n':
+      iterations = atoi(optarg);
+      break;
+    case 'w':
+      warmup_iterations = atoi(optarg);
+      break;
    case 't':
      just_data = true;
      break;
@ -174,12 +183,21 @@ int main(int argc, char *argv[]) {
    std::cout << "[verbose] loaded " << filename << " (" << p.size()
              << " bytes)" << std::endl;
  }
-#if defined(DEBUG)
-  const uint32_t iterations = 1;
-#else
-  const uint32_t iterations =
-      force_one_iteration ? 1 : (p.size() < 1 * 1000 * 1000 ? 1000 : 10);
-#endif
+  if (iterations == -1) {
+    #if defined(DEBUG)
+      iterations = 1;
+    #else
+      iterations = force_one_iteration ? 1 : (p.size() < 1 * 1000 * 1000 ? 1000 : 10);
+    #endif
+  }
+  if (warmup_iterations == -1) {
+    #if defined(DEBUG)
+      warmup_iterations = 0;
+    #else
+      warmup_iterations = (p.size() < 1 * 1000 * 1000) ? 10 : 1;
+    #endif
+  }
+
  std::vector<double> res;
  res.resize(iterations);
  if (!just_data)
@ -224,9 +242,33 @@ int main(int argc, char *argv[]) {
  unsigned long cref0 = 0, cref1 = 0, cref2 = 0;
  unsigned long cmis0 = 0, cmis1 = 0, cmis2 = 0;
 #endif
+
+  // Do warmup iterations
  bool isok = true;
+  for (int32_t i = 0; i < warmup_iterations; i++) {
+    if (verbose) {
+      std::cout << "[verbose] warmup iteration # " << i << std::endl;
+    }
+    simdjson::ParsedJson pj;
+    bool allocok = pj.allocate_capacity(p.size());
+    if (!allocok) {
+      std::cerr << "failed to allocate memory" << std::endl;
+      return EXIT_FAILURE;
+    }
+    isok = (simdjson::stage1_ptr((const uint8_t *)p.data(), p.size(), pj) ==
+            simdjson::SUCCESS);
+    isok = isok &&
+           (simdjson::SUCCESS ==
+            simdjson::unified_ptr((const uint8_t *)p.data(), p.size(), pj));
+    if (!isok) {
+      std::cerr << pj.get_error_message() << std::endl;
+      std::cerr << "Could not parse. " << std::endl;
+      return EXIT_FAILURE;
+    }
+  }
+
 #ifndef SQUASH_COUNTERS
-  for (uint32_t i = 0; i < iterations; i++) {
+  for (int32_t i = 0; i < iterations; i++) {
    if (verbose) {
      std::cout << "[verbose] iteration # " << i << std::endl;
    }
@ -275,8 +317,9 @@ int main(int argc, char *argv[]) {
    }
  }
 #endif
+
  // we do it again, this time just measuring the elapsed time
-  for (uint32_t i = 0; i < iterations; i++) {
+  for (int32_t i = 0; i < iterations; i++) {
    if (verbose) {
      std::cout << "[verbose] iteration # " << i << std::endl;
    }
--- a/benchmark/perfdiff.cpp
+++ b/benchmark/perfdiff.cpp
@ -0,0 +1,73 @@
+#include <cstdio>
+#include <iostream>
+#include <memory>
+#include <stdexcept>
+#include <string>
+#include <sstream>
+#include <array>
+
+#ifdef _WIN32
+#define popen _popen
+#define pclose _pclose
+#endif
+
+std::string exec(const char* cmd) {
+    std::array<char, 128> buffer;
+    std::string result;
+    std::unique_ptr<FILE, decltype(&pclose)> pipe(popen(cmd, "r"), pclose);
+    if (!pipe) {
+        throw std::runtime_error("popen() failed!");
+    }
+    while (fgets(buffer.data(), buffer.size(), pipe.get()) != nullptr) {
+        result += buffer.data();
+    }
+    return result;
+}
+
+double readThroughput(std::string parseOutput) {
+    std::istringstream output(parseOutput);
+    std::string line;
+    double result = 0;
+    int numResults = 0;
+    while (std::getline(output, line)) {
+        int pos = 0;
+        for (int i=0; i<5; i++) {
+            pos = line.find('\t', pos);
+            if (pos < 0) {
+                std::cerr << "Command printed out a line with less than 5 fields in it:\n" << line << std::endl;
+            }
+            pos++;
+        }
+        result += std::stod(line.substr(pos));
+        numResults++;
+    }
+    return result / numResults;
+}
+
+const double ERROR_MARGIN = 10; // 10%
+const double INTERLEAVED_ATTEMPTS = 4;
+
+int main(int argc, char *argv[]) {
+    if (argc != 3) {
+        std::cerr << "Usage: " << argv[0] << " <new parse cmd> <reference parse cmd>";
+        return 1;
+    }
+    double newThroughput = 0;
+    double referenceThroughput = 0;
+    for (int attempt=0; attempt < INTERLEAVED_ATTEMPTS; attempt++) {
+        newThroughput += readThroughput(exec(argv[1]));
+        referenceThroughput += readThroughput(exec(argv[2]));
+    }
+    newThroughput /= INTERLEAVED_ATTEMPTS;
+    referenceThroughput /= INTERLEAVED_ATTEMPTS;
+
+    std::cout << "New throughput: " << newThroughput << std::endl;
+    std::cout << "Ref throughput: " << referenceThroughput << std::endl;
+    double percentDifference = ((newThroughput / referenceThroughput) - 1.0) * 100;
+    std::cout << "Difference: " << percentDifference << "%" << std::endl;
+    if (percentDifference < -ERROR_MARGIN) {
+        std::cerr << "New throughput is more than " << ERROR_MARGIN << "% degraded from reference throughput!" << std::endl;
+        return 1;
+    }
+    return 0;
+}
--- a/scripts/checkperf.sh
+++ b/scripts/checkperf.sh
@ -0,0 +1,30 @@
+#!/bin/bash
+
+set -e
+SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
+
+# Arguments: perfdiff.sh <branch> <test json files>
+if [ -z "$1" ]; then reference_branch="master"; else reference_branch=$1; shift; fi
+if [ -z "$*" ]; then perftests="jsonexamples/twitter.json"; else perftests=$*; fi
+
+# Clone and build the reference branch's parse
+echo "Cloning and build the reference branch ($reference_branch) ..."
+reference=$current/benchbranch/$reference_branch
+rm -rf $reference
+mkdir -p $reference
+git clone --depth 1 -b $reference_branch https://github.com/lemire/simdjson $reference
+cd $reference
+make parse
+
+# Build the current branch's parse
+echo "Building the current branch ..."
+current=$SCRIPTPATH/..
+cd $current
+make clean
+make parse
+
+# Run them and diff performance
+make perfdiff
+
+echo "Running perfdiff:"
+./perfdiff "$current/parse -t $perftests" "$reference/parse -t $perftests"