Tweaking.
This commit is contained in:
parent
c5a49e8f99
commit
8db5e6d044
|
@ -2,6 +2,7 @@
|
||||||
#define _BENCHMARK_H_
|
#define _BENCHMARK_H_
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
|
#include <float.h>
|
||||||
#ifdef __x86_64__
|
#ifdef __x86_64__
|
||||||
|
|
||||||
const char *unitname = "cycles";
|
const char *unitname = "cycles";
|
||||||
|
@ -71,6 +72,11 @@ uint64_t global_rdtsc_overhead = (uint64_t)UINT64_MAX;
|
||||||
global_rdtsc_overhead = min_diff; \
|
global_rdtsc_overhead = min_diff; \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
double diff(timespec start, timespec end) {
|
||||||
|
return ((end.tv_nsec + 1000000000 * end.tv_sec)
|
||||||
|
- (start.tv_nsec + 1000000000 * start.tv_sec)) / 1000000000.0;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Prints the best number of operations per cycle where
|
* Prints the best number of operations per cycle where
|
||||||
* test is the function call, answer is the expected answer generated by
|
* test is the function call, answer is the expected answer generated by
|
||||||
|
@ -88,23 +94,25 @@ uint64_t global_rdtsc_overhead = (uint64_t)UINT64_MAX;
|
||||||
fflush(NULL); \
|
fflush(NULL); \
|
||||||
uint64_t cycles_start, cycles_final, cycles_diff; \
|
uint64_t cycles_start, cycles_final, cycles_diff; \
|
||||||
uint64_t min_diff = (uint64_t)-1; \
|
uint64_t min_diff = (uint64_t)-1; \
|
||||||
uint64_t min_sumclockdiff = (uint64_t)-1; \
|
double min_sumclockdiff = DBL_MAX; \
|
||||||
uint64_t sum_diff = 0; \
|
uint64_t sum_diff = 0; \
|
||||||
uint64_t sumclockdiff = 0; \
|
double sumclockdiff = 0; \
|
||||||
|
struct timespec time1, time2;\
|
||||||
for (int i = 0; i < repeat; i++) { \
|
for (int i = 0; i < repeat; i++) { \
|
||||||
pre; \
|
pre; \
|
||||||
__asm volatile("" ::: /* pretend to clobber */ "memory"); \
|
__asm volatile("" ::: /* pretend to clobber */ "memory"); \
|
||||||
uint64_t bef = clock(); \
|
clock_gettime(CLOCK_REALTIME, &time1); \
|
||||||
RDTSC_START(cycles_start); \
|
RDTSC_START(cycles_start); \
|
||||||
if (test != expected) { \
|
if (test != expected) { \
|
||||||
printf("not expected (%d , %d )", (int)test, (int)expected); \
|
printf("not expected (%d , %d )", (int)test, (int)expected); \
|
||||||
break; \
|
break; \
|
||||||
} \
|
} \
|
||||||
RDTSC_STOP(cycles_final); \
|
RDTSC_STOP(cycles_final); \
|
||||||
uint64_t aft = clock(); \
|
clock_gettime(CLOCK_REALTIME, &time2); \
|
||||||
sumclockdiff += (aft - bef) ; \
|
double thistiming = diff(time1,time2) ;\
|
||||||
if (sumclockdiff < min_sumclockdiff) \
|
sumclockdiff += thistiming ; \
|
||||||
min_sumclockdiff = sumclockdiff; \
|
if ( thistiming < min_sumclockdiff) \
|
||||||
|
min_sumclockdiff = thistiming; \
|
||||||
cycles_diff = (cycles_final - cycles_start - global_rdtsc_overhead); \
|
cycles_diff = (cycles_final - cycles_start - global_rdtsc_overhead); \
|
||||||
if (cycles_diff < min_diff) \
|
if (cycles_diff < min_diff) \
|
||||||
min_diff = cycles_diff; \
|
min_diff = cycles_diff; \
|
||||||
|
@ -113,11 +121,11 @@ uint64_t global_rdtsc_overhead = (uint64_t)UINT64_MAX;
|
||||||
uint64_t S = size; \
|
uint64_t S = size; \
|
||||||
float cycle_per_op = (min_diff) / (double)S; \
|
float cycle_per_op = (min_diff) / (double)S; \
|
||||||
float avg_cycle_per_op = (sum_diff) / ((double)S * repeat); \
|
float avg_cycle_per_op = (sum_diff) / ((double)S * repeat); \
|
||||||
float avg_gb_per_s = (CLOCKS_PER_SEC * (double)S * repeat) / ((sumclockdiff) * 1000.0 * 1000.0 * 1000.0); \
|
double avg_gb_per_s = ((double)S * repeat) / ((sumclockdiff) * 1000.0 * 1000.0 * 1000.0); \
|
||||||
float max_gb_per_s = (CLOCKS_PER_SEC * (double)S) / ((min_sumclockdiff) * 1000.0 * 1000.0 * 1000.0); \
|
double max_gb_per_s = ((double)S) / ((min_sumclockdiff) * 1000.0 * 1000.0 * 1000.0); \
|
||||||
if (verbose) printf(" %.3f %s per input byte (best) ", cycle_per_op, unitname); \
|
if (verbose) printf(" %.3f %s per input byte (best) ", cycle_per_op, unitname); \
|
||||||
if (verbose) printf(" %.3f %s per input byte (avg) ", avg_cycle_per_op, unitname); \
|
if (verbose) printf(" %.3f %s per input byte (avg) ", avg_cycle_per_op, unitname); \
|
||||||
if (!verbose) printf(" %.3f %.3f %.3f %.3f ", cycle_per_op, avg_cycle_per_op-cycle_per_op ,max_gb_per_s,avg_gb_per_s-max_gb_per_s); \
|
if (!verbose) printf(" %.3f %.3f %.3f %.3f ", cycle_per_op, avg_cycle_per_op-cycle_per_op ,max_gb_per_s,-avg_gb_per_s+max_gb_per_s); \
|
||||||
printf("\n"); \
|
printf("\n"); \
|
||||||
fflush(NULL); \
|
fflush(NULL); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
Loading…
Reference in New Issue