Tweaking.

This commit is contained in:
Daniel Lemire 2018-12-27 17:39:17 -05:00
parent c5a49e8f99
commit 8db5e6d044
1 changed files with 20 additions and 12 deletions

View File

@ -2,6 +2,7 @@
#define _BENCHMARK_H_
#include <stdint.h>
#include <time.h>
#include <float.h>
#ifdef __x86_64__
const char *unitname = "cycles";
@ -71,6 +72,11 @@ uint64_t global_rdtsc_overhead = (uint64_t)UINT64_MAX;
global_rdtsc_overhead = min_diff; \
} while (0)
double diff(timespec start, timespec end) {
return ((end.tv_nsec + 1000000000 * end.tv_sec)
- (start.tv_nsec + 1000000000 * start.tv_sec)) / 1000000000.0;
}
/*
* Prints the best number of operations per cycle where
* test is the function call, answer is the expected answer generated by
@ -88,23 +94,25 @@ uint64_t global_rdtsc_overhead = (uint64_t)UINT64_MAX;
fflush(NULL); \
uint64_t cycles_start, cycles_final, cycles_diff; \
uint64_t min_diff = (uint64_t)-1; \
uint64_t min_sumclockdiff = (uint64_t)-1; \
double min_sumclockdiff = DBL_MAX; \
uint64_t sum_diff = 0; \
uint64_t sumclockdiff = 0; \
double sumclockdiff = 0; \
struct timespec time1, time2;\
for (int i = 0; i < repeat; i++) { \
pre; \
__asm volatile("" ::: /* pretend to clobber */ "memory"); \
uint64_t bef = clock(); \
RDTSC_START(cycles_start); \
if (test != expected) { \
clock_gettime(CLOCK_REALTIME, &time1); \
RDTSC_START(cycles_start); \
if (test != expected) { \
printf("not expected (%d , %d )", (int)test, (int)expected); \
break; \
} \
RDTSC_STOP(cycles_final); \
uint64_t aft = clock(); \
sumclockdiff += (aft - bef) ; \
if (sumclockdiff < min_sumclockdiff) \
min_sumclockdiff = sumclockdiff; \
clock_gettime(CLOCK_REALTIME, &time2); \
double thistiming = diff(time1,time2) ;\
sumclockdiff += thistiming ; \
if ( thistiming < min_sumclockdiff) \
min_sumclockdiff = thistiming; \
cycles_diff = (cycles_final - cycles_start - global_rdtsc_overhead); \
if (cycles_diff < min_diff) \
min_diff = cycles_diff; \
@ -113,11 +121,11 @@ uint64_t global_rdtsc_overhead = (uint64_t)UINT64_MAX;
uint64_t S = size; \
float cycle_per_op = (min_diff) / (double)S; \
float avg_cycle_per_op = (sum_diff) / ((double)S * repeat); \
float avg_gb_per_s = (CLOCKS_PER_SEC * (double)S * repeat) / ((sumclockdiff) * 1000.0 * 1000.0 * 1000.0); \
float max_gb_per_s = (CLOCKS_PER_SEC * (double)S) / ((min_sumclockdiff) * 1000.0 * 1000.0 * 1000.0); \
double avg_gb_per_s = ((double)S * repeat) / ((sumclockdiff) * 1000.0 * 1000.0 * 1000.0); \
double max_gb_per_s = ((double)S) / ((min_sumclockdiff) * 1000.0 * 1000.0 * 1000.0); \
if (verbose) printf(" %.3f %s per input byte (best) ", cycle_per_op, unitname); \
if (verbose) printf(" %.3f %s per input byte (avg) ", avg_cycle_per_op, unitname); \
if (!verbose) printf(" %.3f %.3f %.3f %.3f ", cycle_per_op, avg_cycle_per_op-cycle_per_op ,max_gb_per_s,avg_gb_per_s-max_gb_per_s); \
if (!verbose) printf(" %.3f %.3f %.3f %.3f ", cycle_per_op, avg_cycle_per_op-cycle_per_op ,max_gb_per_s,-avg_gb_per_s+max_gb_per_s); \
printf("\n"); \
fflush(NULL); \
} while (0)