Updated some performance evaluation code.
This commit is contained in:
parent
9ed3a4a735
commit
fe9c79df9d
2
Makefile
2
Makefile
|
@ -15,7 +15,7 @@ EXECUTABLES=parse
|
||||||
|
|
||||||
all: $(EXECUTABLES)
|
all: $(EXECUTABLES)
|
||||||
|
|
||||||
parse: main.cpp common_defs.h
|
parse: main.cpp common_defs.h linux-perf-events.h
|
||||||
$(CXX) $(CXXFLAGS) -o parse main.cpp
|
$(CXX) $(CXXFLAGS) -o parse main.cpp
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -11,58 +11,75 @@
|
||||||
#include <cstring> // for memset
|
#include <cstring> // for memset
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
template <int TYPE = PERF_TYPE_HARDWARE>
|
template <int TYPE = PERF_TYPE_HARDWARE>
|
||||||
class LinuxEvents {
|
class LinuxEvents {
|
||||||
|
|
||||||
int fd;
|
int fd;
|
||||||
perf_event_attr attribs;
|
perf_event_attr attribs;
|
||||||
|
int num_events;
|
||||||
|
std::vector<uint64_t> temp_result_vec;
|
||||||
|
std::vector<uint64_t> ids;
|
||||||
public:
|
public:
|
||||||
LinuxEvents(int config) : fd(0) {
|
LinuxEvents(std::vector<int> config_vec) : fd(0) {
|
||||||
memset(&attribs, 0, sizeof(attribs));
|
memset(&attribs, 0, sizeof(attribs));
|
||||||
attribs.type = TYPE;
|
attribs.type = TYPE;
|
||||||
attribs.size = sizeof(attribs);
|
attribs.size = sizeof(attribs);
|
||||||
attribs.config = config;
|
|
||||||
attribs.disabled = 1;
|
attribs.disabled = 1;
|
||||||
attribs.exclude_kernel = 1;
|
attribs.exclude_kernel = 1;
|
||||||
attribs.exclude_hv = 1;
|
attribs.exclude_hv = 1;
|
||||||
|
|
||||||
|
attribs.sample_period = 0;
|
||||||
|
attribs.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
|
||||||
const int pid = 0; // the current process
|
const int pid = 0; // the current process
|
||||||
const int cpu = -1; // all CPUs
|
const int cpu = -1; // all CPUs
|
||||||
const int group = -1; // no group
|
|
||||||
const unsigned long flags = 0;
|
const unsigned long flags = 0;
|
||||||
fd = syscall(__NR_perf_event_open, &attribs, pid, cpu, group, flags);
|
|
||||||
if (fd == -1) {
|
int group = -1; // no group
|
||||||
report_error("perf_event_open");
|
num_events = config_vec.size();
|
||||||
|
u32 i = 0;
|
||||||
|
for (auto config: config_vec) {
|
||||||
|
attribs.config = config;
|
||||||
|
fd = syscall(__NR_perf_event_open, &attribs, pid, cpu, group, flags);
|
||||||
|
if (fd == -1) {
|
||||||
|
report_error("perf_event_open");
|
||||||
|
}
|
||||||
|
ioctl(fd, PERF_EVENT_IOC_ID, &ids[i++]);
|
||||||
|
if (group == -1) {
|
||||||
|
group = fd;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
temp_result_vec.resize(num_events*2 + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
~LinuxEvents() {
|
~LinuxEvents() {
|
||||||
close(fd);
|
close(fd);
|
||||||
}
|
}
|
||||||
|
|
||||||
void start() {
|
really_inline void start() {
|
||||||
if (ioctl(fd, PERF_EVENT_IOC_RESET, 0) == -1) {
|
if (ioctl(fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP) == -1) {
|
||||||
report_error("ioctl(PERF_EVENT_IOC_RESET)");
|
report_error("ioctl(PERF_EVENT_IOC_RESET)");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ioctl(fd, PERF_EVENT_IOC_ENABLE, 0) == -1) {
|
if (ioctl(fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) == -1) {
|
||||||
report_error("ioctl(PERF_EVENT_IOC_ENABLE)");
|
report_error("ioctl(PERF_EVENT_IOC_ENABLE)");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned long end() {
|
really_inline void end(std::vector<unsigned long long> & results) {
|
||||||
if (ioctl(fd, PERF_EVENT_IOC_DISABLE, 0) == -1) {
|
if (ioctl(fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP) == -1) {
|
||||||
report_error("ioctl(PERF_EVENT_IOC_DISABLE)");
|
report_error("ioctl(PERF_EVENT_IOC_DISABLE)");
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned long result;
|
if (read(fd, &temp_result_vec[0], temp_result_vec.size() * 8) == -1) {
|
||||||
if (read(fd, &result, sizeof(result)) == -1) {
|
|
||||||
report_error("read");
|
report_error("read");
|
||||||
}
|
}
|
||||||
|
// our actual results are in slots 1,3,5, ... of this structure
|
||||||
return result;
|
// we really should be checking our ids obtained earlier to be safe
|
||||||
|
for (u32 i = 1; i < temp_result_vec.size(); i+=2) {
|
||||||
|
results[i/2] = temp_result_vec[i];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -72,3 +89,5 @@ private:
|
||||||
|
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
30
main.cpp
30
main.cpp
|
@ -1,4 +1,3 @@
|
||||||
#include "linux-perf-events.h"
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
|
@ -13,6 +12,7 @@
|
||||||
#include <x86intrin.h>
|
#include <x86intrin.h>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include "common_defs.h"
|
#include "common_defs.h"
|
||||||
|
#include "linux-perf-events.h"
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
|
@ -1200,34 +1200,42 @@ int main(int argc, char * argv[]) {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef SQUASH_COUNTERS
|
#ifndef SQUASH_COUNTERS
|
||||||
LinuxEvents<PERF_TYPE_HARDWARE> cycles(PERF_COUNT_HW_CPU_CYCLES);
|
vector<int> evts;
|
||||||
LinuxEvents<PERF_TYPE_HARDWARE> instructions(PERF_COUNT_HW_INSTRUCTIONS);
|
evts.push_back(PERF_COUNT_HW_CPU_CYCLES);
|
||||||
|
evts.push_back(PERF_COUNT_HW_INSTRUCTIONS);
|
||||||
|
LinuxEvents<PERF_TYPE_HARDWARE> unified(evts);
|
||||||
|
vector<u64> results;
|
||||||
|
results.resize(evts.size());
|
||||||
unsigned long cy1 = 0, cy2 = 0, cy3 = 0, cy4 = 0;
|
unsigned long cy1 = 0, cy2 = 0, cy3 = 0, cy4 = 0;
|
||||||
unsigned long cl1 = 0, cl2 = 0, cl3 = 0, cl4 = 0;
|
unsigned long cl1 = 0, cl2 = 0, cl3 = 0, cl4 = 0;
|
||||||
#endif
|
#endif
|
||||||
for (u32 i = 0; i < iterations; i++) {
|
for (u32 i = 0; i < iterations; i++) {
|
||||||
auto start = std::chrono::steady_clock::now();
|
auto start = std::chrono::steady_clock::now();
|
||||||
#ifndef SQUASH_COUNTERS
|
#ifndef SQUASH_COUNTERS
|
||||||
cycles.start(); instructions.start();
|
unified.start();
|
||||||
#endif
|
#endif
|
||||||
find_structural_bits(p.first, p.second, pj);
|
find_structural_bits(p.first, p.second, pj);
|
||||||
#ifndef SQUASH_COUNTERS
|
#ifndef SQUASH_COUNTERS
|
||||||
cl1 += instructions.end(); cy1 += cycles.end();
|
unified.end(results);
|
||||||
cycles.start(); instructions.start();
|
cy1 += results[0]; cl1 += results[1];
|
||||||
|
unified.start();
|
||||||
#endif
|
#endif
|
||||||
flatten_indexes(p.second, pj);
|
flatten_indexes(p.second, pj);
|
||||||
#ifndef SQUASH_COUNTERS
|
#ifndef SQUASH_COUNTERS
|
||||||
cl2 += instructions.end(); cy2 += cycles.end();
|
unified.end(results);
|
||||||
cycles.start(); instructions.start();
|
cy2 += results[0]; cl2 += results[1];
|
||||||
|
unified.start();
|
||||||
#endif
|
#endif
|
||||||
ape_machine(p.first, p.second, pj);
|
ape_machine(p.first, p.second, pj);
|
||||||
#ifndef SQUASH_COUNTERS
|
#ifndef SQUASH_COUNTERS
|
||||||
cl3 += instructions.end(); cy3 += cycles.end();
|
unified.end(results);
|
||||||
cycles.start(); instructions.start();
|
cy3 += results[0]; cl3 += results[1];
|
||||||
|
unified.start();
|
||||||
#endif
|
#endif
|
||||||
shovel_machine(p.first, p.second, pj);
|
shovel_machine(p.first, p.second, pj);
|
||||||
#ifndef SQUASH_COUNTERS
|
#ifndef SQUASH_COUNTERS
|
||||||
cl4 += instructions.end(); cy4 += cycles.end();
|
unified.end(results);
|
||||||
|
cy4 += results[0]; cl4 += results[1];
|
||||||
#endif
|
#endif
|
||||||
auto end = std::chrono::steady_clock::now();
|
auto end = std::chrono::steady_clock::now();
|
||||||
std::chrono::duration<double> secs = end - start;
|
std::chrono::duration<double> secs = end - start;
|
||||||
|
|
Loading…
Reference in New Issue