Updated some performance evaluation code.

This commit is contained in:
Geoff Langdale 2018-07-24 14:41:45 +10:00
parent 9ed3a4a735
commit fe9c79df9d
3 changed files with 56 additions and 29 deletions

View File

@ -15,7 +15,7 @@ EXECUTABLES=parse
all: $(EXECUTABLES) all: $(EXECUTABLES)
parse: main.cpp common_defs.h parse: main.cpp common_defs.h linux-perf-events.h
$(CXX) $(CXXFLAGS) -o parse main.cpp $(CXX) $(CXXFLAGS) -o parse main.cpp

View File

@ -11,58 +11,75 @@
#include <cstring> // for memset #include <cstring> // for memset
#include <stdexcept> #include <stdexcept>
#include <vector>
template <int TYPE = PERF_TYPE_HARDWARE> template <int TYPE = PERF_TYPE_HARDWARE>
class LinuxEvents { class LinuxEvents {
int fd; int fd;
perf_event_attr attribs; perf_event_attr attribs;
int num_events;
std::vector<uint64_t> temp_result_vec;
std::vector<uint64_t> ids;
public: public:
LinuxEvents(int config) : fd(0) { LinuxEvents(std::vector<int> config_vec) : fd(0) {
memset(&attribs, 0, sizeof(attribs)); memset(&attribs, 0, sizeof(attribs));
attribs.type = TYPE; attribs.type = TYPE;
attribs.size = sizeof(attribs); attribs.size = sizeof(attribs);
attribs.config = config;
attribs.disabled = 1; attribs.disabled = 1;
attribs.exclude_kernel = 1; attribs.exclude_kernel = 1;
attribs.exclude_hv = 1; attribs.exclude_hv = 1;
attribs.sample_period = 0;
attribs.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
const int pid = 0; // the current process const int pid = 0; // the current process
const int cpu = -1; // all CPUs const int cpu = -1; // all CPUs
const int group = -1; // no group
const unsigned long flags = 0; const unsigned long flags = 0;
fd = syscall(__NR_perf_event_open, &attribs, pid, cpu, group, flags);
if (fd == -1) { int group = -1; // no group
report_error("perf_event_open"); num_events = config_vec.size();
u32 i = 0;
for (auto config: config_vec) {
attribs.config = config;
fd = syscall(__NR_perf_event_open, &attribs, pid, cpu, group, flags);
if (fd == -1) {
report_error("perf_event_open");
}
ioctl(fd, PERF_EVENT_IOC_ID, &ids[i++]);
if (group == -1) {
group = fd;
}
} }
temp_result_vec.resize(num_events*2 + 1);
} }
~LinuxEvents() { ~LinuxEvents() {
close(fd); close(fd);
} }
void start() { really_inline void start() {
if (ioctl(fd, PERF_EVENT_IOC_RESET, 0) == -1) { if (ioctl(fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP) == -1) {
report_error("ioctl(PERF_EVENT_IOC_RESET)"); report_error("ioctl(PERF_EVENT_IOC_RESET)");
} }
if (ioctl(fd, PERF_EVENT_IOC_ENABLE, 0) == -1) { if (ioctl(fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) == -1) {
report_error("ioctl(PERF_EVENT_IOC_ENABLE)"); report_error("ioctl(PERF_EVENT_IOC_ENABLE)");
} }
} }
unsigned long end() { really_inline void end(std::vector<unsigned long long> & results) {
if (ioctl(fd, PERF_EVENT_IOC_DISABLE, 0) == -1) { if (ioctl(fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP) == -1) {
report_error("ioctl(PERF_EVENT_IOC_DISABLE)"); report_error("ioctl(PERF_EVENT_IOC_DISABLE)");
} }
unsigned long result; if (read(fd, &temp_result_vec[0], temp_result_vec.size() * 8) == -1) {
if (read(fd, &result, sizeof(result)) == -1) {
report_error("read"); report_error("read");
} }
// our actual results are in slots 1,3,5, ... of this structure
return result; // we really should be checking our ids obtained earlier to be safe
for (u32 i = 1; i < temp_result_vec.size(); i+=2) {
results[i/2] = temp_result_vec[i];
}
} }
private: private:
@ -72,3 +89,5 @@ private:
}; };
#endif #endif

View File

@ -1,4 +1,3 @@
#include "linux-perf-events.h"
#include <iostream> #include <iostream>
#include <iomanip> #include <iomanip>
#include <chrono> #include <chrono>
@ -13,6 +12,7 @@
#include <x86intrin.h> #include <x86intrin.h>
#include <assert.h> #include <assert.h>
#include "common_defs.h" #include "common_defs.h"
#include "linux-perf-events.h"
using namespace std; using namespace std;
@ -1200,34 +1200,42 @@ int main(int argc, char * argv[]) {
#endif #endif
#ifndef SQUASH_COUNTERS #ifndef SQUASH_COUNTERS
LinuxEvents<PERF_TYPE_HARDWARE> cycles(PERF_COUNT_HW_CPU_CYCLES); vector<int> evts;
LinuxEvents<PERF_TYPE_HARDWARE> instructions(PERF_COUNT_HW_INSTRUCTIONS); evts.push_back(PERF_COUNT_HW_CPU_CYCLES);
evts.push_back(PERF_COUNT_HW_INSTRUCTIONS);
LinuxEvents<PERF_TYPE_HARDWARE> unified(evts);
vector<u64> results;
results.resize(evts.size());
unsigned long cy1 = 0, cy2 = 0, cy3 = 0, cy4 = 0; unsigned long cy1 = 0, cy2 = 0, cy3 = 0, cy4 = 0;
unsigned long cl1 = 0, cl2 = 0, cl3 = 0, cl4 = 0; unsigned long cl1 = 0, cl2 = 0, cl3 = 0, cl4 = 0;
#endif #endif
for (u32 i = 0; i < iterations; i++) { for (u32 i = 0; i < iterations; i++) {
auto start = std::chrono::steady_clock::now(); auto start = std::chrono::steady_clock::now();
#ifndef SQUASH_COUNTERS #ifndef SQUASH_COUNTERS
cycles.start(); instructions.start(); unified.start();
#endif #endif
find_structural_bits(p.first, p.second, pj); find_structural_bits(p.first, p.second, pj);
#ifndef SQUASH_COUNTERS #ifndef SQUASH_COUNTERS
cl1 += instructions.end(); cy1 += cycles.end(); unified.end(results);
cycles.start(); instructions.start(); cy1 += results[0]; cl1 += results[1];
unified.start();
#endif #endif
flatten_indexes(p.second, pj); flatten_indexes(p.second, pj);
#ifndef SQUASH_COUNTERS #ifndef SQUASH_COUNTERS
cl2 += instructions.end(); cy2 += cycles.end(); unified.end(results);
cycles.start(); instructions.start(); cy2 += results[0]; cl2 += results[1];
unified.start();
#endif #endif
ape_machine(p.first, p.second, pj); ape_machine(p.first, p.second, pj);
#ifndef SQUASH_COUNTERS #ifndef SQUASH_COUNTERS
cl3 += instructions.end(); cy3 += cycles.end(); unified.end(results);
cycles.start(); instructions.start(); cy3 += results[0]; cl3 += results[1];
unified.start();
#endif #endif
shovel_machine(p.first, p.second, pj); shovel_machine(p.first, p.second, pj);
#ifndef SQUASH_COUNTERS #ifndef SQUASH_COUNTERS
cl4 += instructions.end(); cy4 += cycles.end(); unified.end(results);
cy4 += results[0]; cl4 += results[1];
#endif #endif
auto end = std::chrono::steady_clock::now(); auto end = std::chrono::steady_clock::now();
std::chrono::duration<double> secs = end - start; std::chrono::duration<double> secs = end - start;