Tweaking.
This commit is contained in:
parent
c5a49e8f99
commit
8db5e6d044
|
@ -2,6 +2,7 @@
|
|||
#define _BENCHMARK_H_
|
||||
#include <stdint.h>
|
||||
#include <time.h>
|
||||
#include <float.h>
|
||||
#ifdef __x86_64__
|
||||
|
||||
const char *unitname = "cycles";
|
||||
|
@ -71,6 +72,11 @@ uint64_t global_rdtsc_overhead = (uint64_t)UINT64_MAX;
|
|||
global_rdtsc_overhead = min_diff; \
|
||||
} while (0)
|
||||
|
||||
double diff(timespec start, timespec end) {
|
||||
return ((end.tv_nsec + 1000000000 * end.tv_sec)
|
||||
- (start.tv_nsec + 1000000000 * start.tv_sec)) / 1000000000.0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Prints the best number of operations per cycle where
|
||||
* test is the function call, answer is the expected answer generated by
|
||||
|
@ -88,23 +94,25 @@ uint64_t global_rdtsc_overhead = (uint64_t)UINT64_MAX;
|
|||
fflush(NULL); \
|
||||
uint64_t cycles_start, cycles_final, cycles_diff; \
|
||||
uint64_t min_diff = (uint64_t)-1; \
|
||||
uint64_t min_sumclockdiff = (uint64_t)-1; \
|
||||
double min_sumclockdiff = DBL_MAX; \
|
||||
uint64_t sum_diff = 0; \
|
||||
uint64_t sumclockdiff = 0; \
|
||||
double sumclockdiff = 0; \
|
||||
struct timespec time1, time2;\
|
||||
for (int i = 0; i < repeat; i++) { \
|
||||
pre; \
|
||||
__asm volatile("" ::: /* pretend to clobber */ "memory"); \
|
||||
uint64_t bef = clock(); \
|
||||
RDTSC_START(cycles_start); \
|
||||
if (test != expected) { \
|
||||
clock_gettime(CLOCK_REALTIME, &time1); \
|
||||
RDTSC_START(cycles_start); \
|
||||
if (test != expected) { \
|
||||
printf("not expected (%d , %d )", (int)test, (int)expected); \
|
||||
break; \
|
||||
} \
|
||||
RDTSC_STOP(cycles_final); \
|
||||
uint64_t aft = clock(); \
|
||||
sumclockdiff += (aft - bef) ; \
|
||||
if (sumclockdiff < min_sumclockdiff) \
|
||||
min_sumclockdiff = sumclockdiff; \
|
||||
clock_gettime(CLOCK_REALTIME, &time2); \
|
||||
double thistiming = diff(time1,time2) ;\
|
||||
sumclockdiff += thistiming ; \
|
||||
if ( thistiming < min_sumclockdiff) \
|
||||
min_sumclockdiff = thistiming; \
|
||||
cycles_diff = (cycles_final - cycles_start - global_rdtsc_overhead); \
|
||||
if (cycles_diff < min_diff) \
|
||||
min_diff = cycles_diff; \
|
||||
|
@ -113,11 +121,11 @@ uint64_t global_rdtsc_overhead = (uint64_t)UINT64_MAX;
|
|||
uint64_t S = size; \
|
||||
float cycle_per_op = (min_diff) / (double)S; \
|
||||
float avg_cycle_per_op = (sum_diff) / ((double)S * repeat); \
|
||||
float avg_gb_per_s = (CLOCKS_PER_SEC * (double)S * repeat) / ((sumclockdiff) * 1000.0 * 1000.0 * 1000.0); \
|
||||
float max_gb_per_s = (CLOCKS_PER_SEC * (double)S) / ((min_sumclockdiff) * 1000.0 * 1000.0 * 1000.0); \
|
||||
double avg_gb_per_s = ((double)S * repeat) / ((sumclockdiff) * 1000.0 * 1000.0 * 1000.0); \
|
||||
double max_gb_per_s = ((double)S) / ((min_sumclockdiff) * 1000.0 * 1000.0 * 1000.0); \
|
||||
if (verbose) printf(" %.3f %s per input byte (best) ", cycle_per_op, unitname); \
|
||||
if (verbose) printf(" %.3f %s per input byte (avg) ", avg_cycle_per_op, unitname); \
|
||||
if (!verbose) printf(" %.3f %.3f %.3f %.3f ", cycle_per_op, avg_cycle_per_op-cycle_per_op ,max_gb_per_s,avg_gb_per_s-max_gb_per_s); \
|
||||
if (!verbose) printf(" %.3f %.3f %.3f %.3f ", cycle_per_op, avg_cycle_per_op-cycle_per_op ,max_gb_per_s,-avg_gb_per_s+max_gb_per_s); \
|
||||
printf("\n"); \
|
||||
fflush(NULL); \
|
||||
} while (0)
|
||||
|
|
Loading…
Reference in New Issue