diff --git a/scalarvssimd/Makefile b/scalarvssimd/Makefile index 739fa806..ea27af69 100644 --- a/scalarvssimd/Makefile +++ b/scalarvssimd/Makefile @@ -1,6 +1,10 @@ HEADERS:=include/avxprocessing.h include/benchmark.h include/common_defs.h include/jsonstruct.h include/scalarprocessing.h include/util.h -bench: benchmarks/bench.cpp $(HEADERS) +bench: benchmarks/bench.cpp rapidjson/license.txt $(HEADERS) $(CXX) -std=c++11 -O3 -o $@ benchmarks/bench.cpp -Irapidjson/include -Iinclude -march=native -lm -Wall -Wextra +rapidjson/license.txt: + git submodule foreach git pull origin master + + clean: rm -f bench diff --git a/scalarvssimd/benchmarks/bench.cpp b/scalarvssimd/benchmarks/bench.cpp index 02bc2286..fb12488a 100644 --- a/scalarvssimd/benchmarks/bench.cpp +++ b/scalarvssimd/benchmarks/bench.cpp @@ -1,107 +1,78 @@ -#include "jsonstruct.h" -#include "rapidjson/reader.h" // you have to check in the submodule -#include "rapidjson/document.h" -#include "rapidjson/writer.h" -#include "rapidjson/stringbuffer.h" -#include "scalarprocessing.h" #include "avxprocessing.h" #include "benchmark.h" +#include "jsonstruct.h" +#include "rapidjson/document.h" +#include "rapidjson/reader.h" // you have to check in the submodule +#include "rapidjson/stringbuffer.h" +#include "rapidjson/writer.h" +#include "scalarprocessing.h" #include "util.h" -#include // std::string -#include // std::cout -#include // std::stringstream - -//colorfuldisplay(ParsedJson & pj, const u8 * buf) -//BEST_TIME_NOCHECK(dividearray32(array, N), , repeat, N, timings,true); +// colorfuldisplay(ParsedJson & pj, const u8 * buf) using namespace rapidjson; using namespace std; -size_t bogus1 = 0; -size_t bogus2 = 0; -size_t bogus3 = 0; +int main(int argc, char *argv[]) { + if (argc < 2) { + cerr << "Usage: " << argv[0] << " \n"; + cerr << "Or " << argv[0] << " -v \n"; + exit(1); + } + bool verbose = false; + if (argc > 2) { + if (strcmp(argv[1], "-v")) + verbose = true; + } + pair p = get_corpus(argv[argc - 1]); + ParsedJson pj; + std::cout << "Input has "; + if (p.second > 1024 * 1024) + std::cout << p.second / (1024 * 1024) << " MB "; + else if (p.second > 1024) + std::cout << p.second / 1024 << " KB "; + else + std::cout << p.second << " B "; + std::cout << std::endl; -struct MyHandler { - bool Null() { bogus1++; return true; } - bool Bool(bool b) { bogus2++; return true; } - bool Int(int i) { bogus3++; return true; } - bool Uint(unsigned u) { bogus2++; return true; } - bool Int64(int64_t i) { bogus2++; return true; } - bool Uint64(uint64_t u) { bogus2++; return true; } - bool Double(double d) { bogus2++; return true; } - bool RawNumber(const char* str, SizeType length, bool copy) { - bogus3++; - return true; - } - bool String(const char* str, SizeType length, bool copy) { - bogus2++; - return true; - } - bool StartObject() { bogus1++; return true; } - bool Key(const char* str, SizeType length, bool copy) { - bogus2++; - return true; - } - bool EndObject(SizeType memberCount) { bogus2++; return true; } - bool StartArray() { bogus2++; return true; } - bool EndArray(SizeType elementCount) { bogus1++; return true; } -}; + if (posix_memalign((void **)&pj.structurals, 8, + ROUNDUP_N(p.second, 64) / 8)) { + throw "Allocation failed"; + }; + pj.n_structural_indexes = 0; + // we have potentially 1 structure per byte of input + // as well as a dummy structure and a root structure + // we also potentially write up to 7 iterations beyond + // in our 'cheesy flatten', so make some worst-case + // sapce for that too + u32 max_structures = ROUNDUP_N(p.second, 64) + 2 + 7; + pj.structural_indexes = new u32[max_structures]; + pj.nodes = new JsonNode[max_structures]; + if (verbose) { + std::cout << "Parsing SIMD (once) " << std::endl; + avx_json_parse(p.first, p.second, pj); + colorfuldisplay(pj, p.first); + debugdisplay(pj, p.first); + std::cout << "Parsing scalar (once) " << std::endl; + scalar_json_parse(p.first, p.second, pj); + colorfuldisplay(pj, p.first); + debugdisplay(pj, p.first); + } -int main(int argc, char * argv[]) { - if (argc < 2) { - cerr << "Usage: " << argv[0] << " \n"; - cerr << "Or " << argv[0] << " -v \n"; - exit(1); - } - bool verbose = false; - if (argc > 2) { - if(strcmp(argv[1],"-v")) verbose = true; - } - pair p = get_corpus(argv[argc - 1]); - ParsedJson pj; - std::cout << "Input has "; - if(p.second > 1024 * 1024) - std::cout << p.second / (1024*1024) << " MB "; - else if (p.second > 1024) - std::cout << p.second / 1024 << " KB "; - else - std::cout << p.second << " B "; - std::cout << std::endl; + int repeat = 10; + int volume = p.second; + BEST_TIME_NOCHECK(avx_json_parse(p.first, p.second, pj), , repeat, volume, + true); + BEST_TIME_NOCHECK(scalar_json_parse(p.first, p.second, pj), , repeat, volume, + true); - if (posix_memalign( (void **)&pj.structurals, 8, ROUNDUP_N(p.second, 64)/8)) { - throw "Allocation failed"; - }; - - pj.n_structural_indexes = 0; - // we have potentially 1 structure per byte of input - // as well as a dummy structure and a root structure - // we also potentially write up to 7 iterations beyond - // in our 'cheesy flatten', so make some worst-case - // sapce for that too - u32 max_structures = ROUNDUP_N(p.second, 64) + 2 + 7; - pj.structural_indexes = new u32[max_structures]; - pj.nodes = new JsonNode[max_structures]; - if(verbose) { - std::cout << "Parsing SIMD (once) " << std::endl; - avx_json_parse(p.first, p.second, pj); - colorfuldisplay(pj, p.first); - debugdisplay(pj,p.first); - std::cout << "Parsing scalar (once) " << std::endl; - scalar_json_parse(p.first, p.second, pj); - colorfuldisplay(pj, p.first); - debugdisplay(pj,p.first); - } - - int repeat = 10; - int volume = p.second; - BEST_TIME_NOCHECK(avx_json_parse(p.first, p.second, pj), , repeat, volume, true); - BEST_TIME_NOCHECK(scalar_json_parse(p.first, p.second, pj), , repeat, volume, true); - - rapidjson::Document d; - char buffer[p.second+1024]; - memcpy(buffer, p.first, p.second); - buffer[p.second]='\0'; - BEST_TIME(d.Parse((const char *)p.first).HasParseError(), false, memcpy(buffer, p.first, p.second) , repeat, volume, true); + rapidjson::Document d; + char buffer[p.second + 1024]; + memcpy(buffer, p.first, p.second); + buffer[p.second] = '\0'; + BEST_TIME(d.Parse((const char *)buffer).HasParseError(), false, + memcpy(buffer, p.first, p.second), repeat, volume, true); + BEST_TIME(d.ParseInsitu(buffer).HasParseError(), false, + memcpy(buffer, p.first, p.second), repeat, volume, true); } diff --git a/scalarvssimd/include/benchmark.h b/scalarvssimd/include/benchmark.h index f503e221..3dbbf7a7 100644 --- a/scalarvssimd/include/benchmark.h +++ b/scalarvssimd/include/benchmark.h @@ -104,8 +104,11 @@ uint64_t global_rdtsc_overhead = (uint64_t)UINT64_MAX; } \ uint64_t S = size; \ float cycle_per_op = (min_diff) / (double)S; \ + float avg_cycle_per_op = (sum_diff) / ((double)S * repeat); \ if (verbose) \ - printf(" %.3f %s per operation (best) ", cycle_per_op, unitname); \ + printf(" %.3f %s per input byte (best) ", cycle_per_op, unitname); \ + if (verbose) \ + printf(" %.3f %s per input byte (avg) ", avg_cycle_per_op, unitname); \ if (verbose) \ printf("\n"); \ if (!verbose) \