Updated some performance evaluation code.
This commit is contained in:
parent
9ed3a4a735
commit
fe9c79df9d
2
Makefile
2
Makefile
|
@ -15,7 +15,7 @@ EXECUTABLES=parse
|
|||
|
||||
all: $(EXECUTABLES)
|
||||
|
||||
parse: main.cpp common_defs.h
|
||||
parse: main.cpp common_defs.h linux-perf-events.h
|
||||
$(CXX) $(CXXFLAGS) -o parse main.cpp
|
||||
|
||||
|
||||
|
|
|
@ -11,58 +11,75 @@
|
|||
#include <cstring> // for memset
|
||||
#include <stdexcept>
|
||||
|
||||
#include <vector>
|
||||
|
||||
template <int TYPE = PERF_TYPE_HARDWARE>
|
||||
class LinuxEvents {
|
||||
|
||||
int fd;
|
||||
perf_event_attr attribs;
|
||||
|
||||
int num_events;
|
||||
std::vector<uint64_t> temp_result_vec;
|
||||
std::vector<uint64_t> ids;
|
||||
public:
|
||||
LinuxEvents(int config) : fd(0) {
|
||||
LinuxEvents(std::vector<int> config_vec) : fd(0) {
|
||||
memset(&attribs, 0, sizeof(attribs));
|
||||
attribs.type = TYPE;
|
||||
attribs.size = sizeof(attribs);
|
||||
attribs.config = config;
|
||||
attribs.disabled = 1;
|
||||
attribs.exclude_kernel = 1;
|
||||
attribs.exclude_hv = 1;
|
||||
|
||||
attribs.sample_period = 0;
|
||||
attribs.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
|
||||
const int pid = 0; // the current process
|
||||
const int cpu = -1; // all CPUs
|
||||
const int group = -1; // no group
|
||||
const unsigned long flags = 0;
|
||||
fd = syscall(__NR_perf_event_open, &attribs, pid, cpu, group, flags);
|
||||
if (fd == -1) {
|
||||
report_error("perf_event_open");
|
||||
|
||||
int group = -1; // no group
|
||||
num_events = config_vec.size();
|
||||
u32 i = 0;
|
||||
for (auto config: config_vec) {
|
||||
attribs.config = config;
|
||||
fd = syscall(__NR_perf_event_open, &attribs, pid, cpu, group, flags);
|
||||
if (fd == -1) {
|
||||
report_error("perf_event_open");
|
||||
}
|
||||
ioctl(fd, PERF_EVENT_IOC_ID, &ids[i++]);
|
||||
if (group == -1) {
|
||||
group = fd;
|
||||
}
|
||||
}
|
||||
|
||||
temp_result_vec.resize(num_events*2 + 1);
|
||||
}
|
||||
|
||||
~LinuxEvents() {
|
||||
close(fd);
|
||||
}
|
||||
|
||||
void start() {
|
||||
if (ioctl(fd, PERF_EVENT_IOC_RESET, 0) == -1) {
|
||||
really_inline void start() {
|
||||
if (ioctl(fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP) == -1) {
|
||||
report_error("ioctl(PERF_EVENT_IOC_RESET)");
|
||||
}
|
||||
|
||||
if (ioctl(fd, PERF_EVENT_IOC_ENABLE, 0) == -1) {
|
||||
if (ioctl(fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) == -1) {
|
||||
report_error("ioctl(PERF_EVENT_IOC_ENABLE)");
|
||||
}
|
||||
}
|
||||
|
||||
unsigned long end() {
|
||||
if (ioctl(fd, PERF_EVENT_IOC_DISABLE, 0) == -1) {
|
||||
really_inline void end(std::vector<unsigned long long> & results) {
|
||||
if (ioctl(fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP) == -1) {
|
||||
report_error("ioctl(PERF_EVENT_IOC_DISABLE)");
|
||||
}
|
||||
|
||||
unsigned long result;
|
||||
if (read(fd, &result, sizeof(result)) == -1) {
|
||||
if (read(fd, &temp_result_vec[0], temp_result_vec.size() * 8) == -1) {
|
||||
report_error("read");
|
||||
}
|
||||
|
||||
return result;
|
||||
// our actual results are in slots 1,3,5, ... of this structure
|
||||
// we really should be checking our ids obtained earlier to be safe
|
||||
for (u32 i = 1; i < temp_result_vec.size(); i+=2) {
|
||||
results[i/2] = temp_result_vec[i];
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -72,3 +89,5 @@ private:
|
|||
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
|
|
30
main.cpp
30
main.cpp
|
@ -1,4 +1,3 @@
|
|||
#include "linux-perf-events.h"
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <chrono>
|
||||
|
@ -13,6 +12,7 @@
|
|||
#include <x86intrin.h>
|
||||
#include <assert.h>
|
||||
#include "common_defs.h"
|
||||
#include "linux-perf-events.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
|
@ -1200,34 +1200,42 @@ int main(int argc, char * argv[]) {
|
|||
#endif
|
||||
|
||||
#ifndef SQUASH_COUNTERS
|
||||
LinuxEvents<PERF_TYPE_HARDWARE> cycles(PERF_COUNT_HW_CPU_CYCLES);
|
||||
LinuxEvents<PERF_TYPE_HARDWARE> instructions(PERF_COUNT_HW_INSTRUCTIONS);
|
||||
vector<int> evts;
|
||||
evts.push_back(PERF_COUNT_HW_CPU_CYCLES);
|
||||
evts.push_back(PERF_COUNT_HW_INSTRUCTIONS);
|
||||
LinuxEvents<PERF_TYPE_HARDWARE> unified(evts);
|
||||
vector<u64> results;
|
||||
results.resize(evts.size());
|
||||
unsigned long cy1 = 0, cy2 = 0, cy3 = 0, cy4 = 0;
|
||||
unsigned long cl1 = 0, cl2 = 0, cl3 = 0, cl4 = 0;
|
||||
#endif
|
||||
for (u32 i = 0; i < iterations; i++) {
|
||||
auto start = std::chrono::steady_clock::now();
|
||||
#ifndef SQUASH_COUNTERS
|
||||
cycles.start(); instructions.start();
|
||||
unified.start();
|
||||
#endif
|
||||
find_structural_bits(p.first, p.second, pj);
|
||||
#ifndef SQUASH_COUNTERS
|
||||
cl1 += instructions.end(); cy1 += cycles.end();
|
||||
cycles.start(); instructions.start();
|
||||
unified.end(results);
|
||||
cy1 += results[0]; cl1 += results[1];
|
||||
unified.start();
|
||||
#endif
|
||||
flatten_indexes(p.second, pj);
|
||||
#ifndef SQUASH_COUNTERS
|
||||
cl2 += instructions.end(); cy2 += cycles.end();
|
||||
cycles.start(); instructions.start();
|
||||
unified.end(results);
|
||||
cy2 += results[0]; cl2 += results[1];
|
||||
unified.start();
|
||||
#endif
|
||||
ape_machine(p.first, p.second, pj);
|
||||
#ifndef SQUASH_COUNTERS
|
||||
cl3 += instructions.end(); cy3 += cycles.end();
|
||||
cycles.start(); instructions.start();
|
||||
unified.end(results);
|
||||
cy3 += results[0]; cl3 += results[1];
|
||||
unified.start();
|
||||
#endif
|
||||
shovel_machine(p.first, p.second, pj);
|
||||
#ifndef SQUASH_COUNTERS
|
||||
cl4 += instructions.end(); cy4 += cycles.end();
|
||||
unified.end(results);
|
||||
cy4 += results[0]; cl4 += results[1];
|
||||
#endif
|
||||
auto end = std::chrono::steady_clock::now();
|
||||
std::chrono::duration<double> secs = end - start;
|
||||
|
|
Loading…
Reference in New Issue