Updated some performance evaluation code.

This commit is contained in:
Geoff Langdale 2018-07-24 14:41:45 +10:00
parent 9ed3a4a735
commit fe9c79df9d
3 changed files with 56 additions and 29 deletions

View File

@ -15,7 +15,7 @@ EXECUTABLES=parse
all: $(EXECUTABLES)
parse: main.cpp common_defs.h
parse: main.cpp common_defs.h linux-perf-events.h
$(CXX) $(CXXFLAGS) -o parse main.cpp

View File

@ -11,58 +11,75 @@
#include <cstring> // for memset
#include <stdexcept>
#include <vector>
template <int TYPE = PERF_TYPE_HARDWARE>
class LinuxEvents {
int fd;
perf_event_attr attribs;
int num_events;
std::vector<uint64_t> temp_result_vec;
std::vector<uint64_t> ids;
public:
LinuxEvents(int config) : fd(0) {
LinuxEvents(std::vector<int> config_vec) : fd(0) {
memset(&attribs, 0, sizeof(attribs));
attribs.type = TYPE;
attribs.size = sizeof(attribs);
attribs.config = config;
attribs.disabled = 1;
attribs.exclude_kernel = 1;
attribs.exclude_hv = 1;
attribs.sample_period = 0;
attribs.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
const int pid = 0; // the current process
const int cpu = -1; // all CPUs
const int group = -1; // no group
const unsigned long flags = 0;
fd = syscall(__NR_perf_event_open, &attribs, pid, cpu, group, flags);
if (fd == -1) {
report_error("perf_event_open");
int group = -1; // no group
num_events = config_vec.size();
u32 i = 0;
for (auto config: config_vec) {
attribs.config = config;
fd = syscall(__NR_perf_event_open, &attribs, pid, cpu, group, flags);
if (fd == -1) {
report_error("perf_event_open");
}
ioctl(fd, PERF_EVENT_IOC_ID, &ids[i++]);
if (group == -1) {
group = fd;
}
}
temp_result_vec.resize(num_events*2 + 1);
}
~LinuxEvents() {
close(fd);
}
void start() {
if (ioctl(fd, PERF_EVENT_IOC_RESET, 0) == -1) {
really_inline void start() {
if (ioctl(fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP) == -1) {
report_error("ioctl(PERF_EVENT_IOC_RESET)");
}
if (ioctl(fd, PERF_EVENT_IOC_ENABLE, 0) == -1) {
if (ioctl(fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) == -1) {
report_error("ioctl(PERF_EVENT_IOC_ENABLE)");
}
}
unsigned long end() {
if (ioctl(fd, PERF_EVENT_IOC_DISABLE, 0) == -1) {
really_inline void end(std::vector<unsigned long long> & results) {
if (ioctl(fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP) == -1) {
report_error("ioctl(PERF_EVENT_IOC_DISABLE)");
}
unsigned long result;
if (read(fd, &result, sizeof(result)) == -1) {
if (read(fd, &temp_result_vec[0], temp_result_vec.size() * 8) == -1) {
report_error("read");
}
return result;
// our actual results are in slots 1,3,5, ... of this structure
// we really should be checking our ids obtained earlier to be safe
for (u32 i = 1; i < temp_result_vec.size(); i+=2) {
results[i/2] = temp_result_vec[i];
}
}
private:
@ -72,3 +89,5 @@ private:
};
#endif

View File

@ -1,4 +1,3 @@
#include "linux-perf-events.h"
#include <iostream>
#include <iomanip>
#include <chrono>
@ -13,6 +12,7 @@
#include <x86intrin.h>
#include <assert.h>
#include "common_defs.h"
#include "linux-perf-events.h"
using namespace std;
@ -1200,34 +1200,42 @@ int main(int argc, char * argv[]) {
#endif
#ifndef SQUASH_COUNTERS
LinuxEvents<PERF_TYPE_HARDWARE> cycles(PERF_COUNT_HW_CPU_CYCLES);
LinuxEvents<PERF_TYPE_HARDWARE> instructions(PERF_COUNT_HW_INSTRUCTIONS);
vector<int> evts;
evts.push_back(PERF_COUNT_HW_CPU_CYCLES);
evts.push_back(PERF_COUNT_HW_INSTRUCTIONS);
LinuxEvents<PERF_TYPE_HARDWARE> unified(evts);
vector<u64> results;
results.resize(evts.size());
unsigned long cy1 = 0, cy2 = 0, cy3 = 0, cy4 = 0;
unsigned long cl1 = 0, cl2 = 0, cl3 = 0, cl4 = 0;
#endif
for (u32 i = 0; i < iterations; i++) {
auto start = std::chrono::steady_clock::now();
#ifndef SQUASH_COUNTERS
cycles.start(); instructions.start();
unified.start();
#endif
find_structural_bits(p.first, p.second, pj);
#ifndef SQUASH_COUNTERS
cl1 += instructions.end(); cy1 += cycles.end();
cycles.start(); instructions.start();
unified.end(results);
cy1 += results[0]; cl1 += results[1];
unified.start();
#endif
flatten_indexes(p.second, pj);
#ifndef SQUASH_COUNTERS
cl2 += instructions.end(); cy2 += cycles.end();
cycles.start(); instructions.start();
unified.end(results);
cy2 += results[0]; cl2 += results[1];
unified.start();
#endif
ape_machine(p.first, p.second, pj);
#ifndef SQUASH_COUNTERS
cl3 += instructions.end(); cy3 += cycles.end();
cycles.start(); instructions.start();
unified.end(results);
cy3 += results[0]; cl3 += results[1];
unified.start();
#endif
shovel_machine(p.first, p.second, pj);
#ifndef SQUASH_COUNTERS
cl4 += instructions.end(); cy4 += cycles.end();
unified.end(results);
cy4 += results[0]; cl4 += results[1];
#endif
auto end = std::chrono::steady_clock::now();
std::chrono::duration<double> secs = end - start;