check for performance degradation in CI (#270)

* Add -n and -w arguments * Add Dockerfile that compares perf against master * Add checkperf to .drone.yml * Clone from github instead of .git since CI doesn't have .git
2019-08-12 13:03:56 -07:00 · 2019-08-12 13:03:56 -07:00 · 875e2f9d0d
parent 3fb82502f7
commit 875e2f9d0d
7 changed files with 189 additions and 9 deletions
--- a/.drone.yml
+++ b/.drone.yml
@ -12,6 +12,10 @@ steps:
  - make
  - make quiettest
  - make amalgamate
 - name: checkperf
  image: gcc:8
  commands:
  - make checkperf
 ---
 kind: pipeline
 name: arm64
@ -27,6 +31,10 @@ steps:
  - make
  - make quiettest
  - make amalgamate
 - name: checkperf
  image: gcc:8
  commands:
  - make checkperf
 ---
 kind: pipeline
 name: stylecheck
--- a/6
+++ b/6
@ -123,6 +123,12 @@ $(JSON_INCLUDE) $(SAJSON_INCLUDE) $(RAPIDJSON_INCLUDE) $(JSON11_INCLUDE) $(FASTJ
 parse: benchmark/parse.cpp $(HEADERS) $(LIBFILES)
 	$(CXX) $(CXXFLAGS) -o parse $(LIBFILES) benchmark/parse.cpp $(LIBFLAGS)
 perfdiff: benchmark/perfdiff.cpp
 	$(CXX) $(CXXFLAGS) -o perfdiff benchmark/perfdiff.cpp $(LIBFLAGS)
 checkperf:
 	bash ./scripts/checkperf.sh
 statisticalmodel: benchmark/statisticalmodel.cpp $(HEADERS) $(LIBFILES)
 	$(CXX) $(CXXFLAGS) -o statisticalmodel $(LIBFILES) benchmark/statisticalmodel.cpp $(LIBFLAGS)
--- a/benchmark/CMakeLists.txt
+++ b/benchmark/CMakeLists.txt
@ -6,3 +6,4 @@ target_include_directories(${SIMDJSON_LIB_NAME}
 add_cpp_benchmark(parse)
 add_cpp_benchmark(statisticalmodel)
 add_executable(perfdiff perfdiff.cpp)
--- a/benchmark/Dockerfile
+++ b/benchmark/Dockerfile
@ -0,0 +1,19 @@
 # From the ROOT, run:
 # docker build -t simdjsonbench -f benchmark/Dockerfile . && docker run --privileged -t simdjsonbench
 FROM gcc:8.3
 # # Build latest
 # ENV latest_release=v0.2.1
 # WORKDIR /usr/src/$latest_release/
 # RUN git clone --depth 1 https://github.com/lemire/simdjson/ -b $latest_release .
 # RUN make parse
 # # Build master
 # WORKDIR /usr/src/master/
 # RUN git clone --depth 1 https://github.com/lemire/simdjson/ .
 # RUN make parse
 # Build the current source
 COPY . /usr/src/current/
 WORKDIR /usr/src/current/
 RUN make checkperf
--- a/benchmark/parse.cpp
+++ b/benchmark/parse.cpp
@ -124,11 +124,20 @@ int main(int argc, char *argv[]) {
  bool json_output = false;
  bool force_one_iteration = false;
  bool just_data = false;
  int32_t iterations = -1;
  int32_t warmup_iterations = -1;
 #ifndef _MSC_VER
  int c;
-  while ((c = getopt(argc, argv, "1vdt")) != -1) {
+  while ((c = getopt(argc, argv, "1vdtn:w:")) != -1) {
    switch (c) {
    case 'n':
      iterations = atoi(optarg);
      break;
    case 'w':
      warmup_iterations = atoi(optarg);
      break;
    case 't':
      just_data = true;
      break;
@ -174,12 +183,21 @@ int main(int argc, char *argv[]) {
    std::cout << "[verbose] loaded " << filename << " (" << p.size()
              << " bytes)" << std::endl;
  }
-#if defined(DEBUG)
+  if (iterations == -1) {
-  const uint32_t iterations = 1;
+    #if defined(DEBUG)
-#else
+      iterations = 1;
-  const uint32_t iterations =
+    #else
-      force_one_iteration ? 1 : (p.size() < 1 * 1000 * 1000 ? 1000 : 10);
+      iterations = force_one_iteration ? 1 : (p.size() < 1 * 1000 * 1000 ? 1000 : 10);
-#endif
+    #endif
  }
  if (warmup_iterations == -1) {
    #if defined(DEBUG)
      warmup_iterations = 0;
    #else
      warmup_iterations = (p.size() < 1 * 1000 * 1000) ? 10 : 1;
    #endif
  }
  std::vector<double> res;
  res.resize(iterations);
  if (!just_data)
@ -224,9 +242,33 @@ int main(int argc, char *argv[]) {
  unsigned long cref0 = 0, cref1 = 0, cref2 = 0;
  unsigned long cmis0 = 0, cmis1 = 0, cmis2 = 0;
 #endif
  // Do warmup iterations
  bool isok = true;
  for (int32_t i = 0; i < warmup_iterations; i++) {
    if (verbose) {
      std::cout << "[verbose] warmup iteration # " << i << std::endl;
    }
    simdjson::ParsedJson pj;
    bool allocok = pj.allocate_capacity(p.size());
    if (!allocok) {
      std::cerr << "failed to allocate memory" << std::endl;
      return EXIT_FAILURE;
    }
    isok = (simdjson::stage1_ptr((const uint8_t *)p.data(), p.size(), pj) ==
            simdjson::SUCCESS);
    isok = isok &&
           (simdjson::SUCCESS ==
            simdjson::unified_ptr((const uint8_t *)p.data(), p.size(), pj));
    if (!isok) {
      std::cerr << pj.get_error_message() << std::endl;
      std::cerr << "Could not parse. " << std::endl;
      return EXIT_FAILURE;
    }
  }
 #ifndef SQUASH_COUNTERS
-  for (uint32_t i = 0; i < iterations; i++) {
+  for (int32_t i = 0; i < iterations; i++) {
    if (verbose) {
      std::cout << "[verbose] iteration # " << i << std::endl;
    }
@ -275,8 +317,9 @@ int main(int argc, char *argv[]) {
    }
  }
 #endif
  // we do it again, this time just measuring the elapsed time
-  for (uint32_t i = 0; i < iterations; i++) {
+  for (int32_t i = 0; i < iterations; i++) {
    if (verbose) {
      std::cout << "[verbose] iteration # " << i << std::endl;
    }
--- a/benchmark/perfdiff.cpp
+++ b/benchmark/perfdiff.cpp
@ -0,0 +1,73 @@
 #include <cstdio>
 #include <iostream>
 #include <memory>
 #include <stdexcept>
 #include <string>
 #include <sstream>
 #include <array>
 #ifdef _WIN32
 #define popen _popen
 #define pclose _pclose
 #endif
 std::string exec(const char* cmd) {
    std::array<char, 128> buffer;
    std::string result;
    std::unique_ptr<FILE, decltype(&pclose)> pipe(popen(cmd, "r"), pclose);
    if (!pipe) {
        throw std::runtime_error("popen() failed!");
    }
    while (fgets(buffer.data(), buffer.size(), pipe.get()) != nullptr) {
        result += buffer.data();
    }
    return result;
 }
 double readThroughput(std::string parseOutput) {
    std::istringstream output(parseOutput);
    std::string line;
    double result = 0;
    int numResults = 0;
    while (std::getline(output, line)) {
        int pos = 0;
        for (int i=0; i<5; i++) {
            pos = line.find('\t', pos);
            if (pos < 0) {
                std::cerr << "Command printed out a line with less than 5 fields in it:\n" << line << std::endl;
            }
            pos++;
        }
        result += std::stod(line.substr(pos));
        numResults++;
    }
    return result / numResults;
 }
 const double ERROR_MARGIN = 10; // 10%
 const double INTERLEAVED_ATTEMPTS = 4;
 int main(int argc, char *argv[]) {
    if (argc != 3) {
        std::cerr << "Usage: " << argv[0] << " <new parse cmd> <reference parse cmd>";
        return 1;
    }
    double newThroughput = 0;
    double referenceThroughput = 0;
    for (int attempt=0; attempt < INTERLEAVED_ATTEMPTS; attempt++) {
        newThroughput += readThroughput(exec(argv[1]));
        referenceThroughput += readThroughput(exec(argv[2]));
    }
    newThroughput /= INTERLEAVED_ATTEMPTS;
    referenceThroughput /= INTERLEAVED_ATTEMPTS;
    std::cout << "New throughput: " << newThroughput << std::endl;
    std::cout << "Ref throughput: " << referenceThroughput << std::endl;
    double percentDifference = ((newThroughput / referenceThroughput) - 1.0) * 100;
    std::cout << "Difference: " << percentDifference << "%" << std::endl;
    if (percentDifference < -ERROR_MARGIN) {
        std::cerr << "New throughput is more than " << ERROR_MARGIN << "% degraded from reference throughput!" << std::endl;
        return 1;
    }
    return 0;
 }
--- a/scripts/checkperf.sh
+++ b/scripts/checkperf.sh
@ -0,0 +1,30 @@
 #!/bin/bash
 set -e
 SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
 # Arguments: perfdiff.sh <branch> <test json files>
 if [ -z "$1" ]; then reference_branch="master"; else reference_branch=$1; shift; fi
 if [ -z "$*" ]; then perftests="jsonexamples/twitter.json"; else perftests=$*; fi
 # Clone and build the reference branch's parse
 echo "Cloning and build the reference branch ($reference_branch) ..."
 reference=$current/benchbranch/$reference_branch
 rm -rf $reference
 mkdir -p $reference
 git clone --depth 1 -b $reference_branch https://github.com/lemire/simdjson $reference
 cd $reference
 make parse
 # Build the current branch's parse
 echo "Building the current branch ..."
 current=$SCRIPTPATH/..
 cd $current
 make clean
 make parse
 # Run them and diff performance
 make perfdiff
 echo "Running perfdiff:"
 ./perfdiff "$current/parse -t $perftests" "$reference/parse -t $perftests"