From 8db5e6d0448c1db53eb7f195b1d3c30d5aee8aa0 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Thu, 27 Dec 2018 17:39:17 -0500 Subject: [PATCH] Tweaking. --- benchmark/benchmark.h | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/benchmark/benchmark.h b/benchmark/benchmark.h index 212ca166..0ad5dc6c 100644 --- a/benchmark/benchmark.h +++ b/benchmark/benchmark.h @@ -2,6 +2,7 @@ #define _BENCHMARK_H_ #include #include +#include #ifdef __x86_64__ const char *unitname = "cycles"; @@ -71,6 +72,11 @@ uint64_t global_rdtsc_overhead = (uint64_t)UINT64_MAX; global_rdtsc_overhead = min_diff; \ } while (0) +double diff(timespec start, timespec end) { + return ((end.tv_nsec + 1000000000 * end.tv_sec) + - (start.tv_nsec + 1000000000 * start.tv_sec)) / 1000000000.0; +} + /* * Prints the best number of operations per cycle where * test is the function call, answer is the expected answer generated by @@ -88,23 +94,25 @@ uint64_t global_rdtsc_overhead = (uint64_t)UINT64_MAX; fflush(NULL); \ uint64_t cycles_start, cycles_final, cycles_diff; \ uint64_t min_diff = (uint64_t)-1; \ - uint64_t min_sumclockdiff = (uint64_t)-1; \ + double min_sumclockdiff = DBL_MAX; \ uint64_t sum_diff = 0; \ - uint64_t sumclockdiff = 0; \ + double sumclockdiff = 0; \ + struct timespec time1, time2;\ for (int i = 0; i < repeat; i++) { \ pre; \ __asm volatile("" ::: /* pretend to clobber */ "memory"); \ - uint64_t bef = clock(); \ - RDTSC_START(cycles_start); \ - if (test != expected) { \ + clock_gettime(CLOCK_REALTIME, &time1); \ +RDTSC_START(cycles_start); \ + if (test != expected) { \ printf("not expected (%d , %d )", (int)test, (int)expected); \ break; \ } \ RDTSC_STOP(cycles_final); \ - uint64_t aft = clock(); \ - sumclockdiff += (aft - bef) ; \ - if (sumclockdiff < min_sumclockdiff) \ - min_sumclockdiff = sumclockdiff; \ + clock_gettime(CLOCK_REALTIME, &time2); \ + double thistiming = diff(time1,time2) ;\ + sumclockdiff += thistiming ; \ + if ( thistiming < min_sumclockdiff) \ + min_sumclockdiff = thistiming; \ cycles_diff = (cycles_final - cycles_start - global_rdtsc_overhead); \ if (cycles_diff < min_diff) \ min_diff = cycles_diff; \ @@ -113,11 +121,11 @@ uint64_t global_rdtsc_overhead = (uint64_t)UINT64_MAX; uint64_t S = size; \ float cycle_per_op = (min_diff) / (double)S; \ float avg_cycle_per_op = (sum_diff) / ((double)S * repeat); \ - float avg_gb_per_s = (CLOCKS_PER_SEC * (double)S * repeat) / ((sumclockdiff) * 1000.0 * 1000.0 * 1000.0); \ - float max_gb_per_s = (CLOCKS_PER_SEC * (double)S) / ((min_sumclockdiff) * 1000.0 * 1000.0 * 1000.0); \ + double avg_gb_per_s = ((double)S * repeat) / ((sumclockdiff) * 1000.0 * 1000.0 * 1000.0); \ + double max_gb_per_s = ((double)S) / ((min_sumclockdiff) * 1000.0 * 1000.0 * 1000.0); \ if (verbose) printf(" %.3f %s per input byte (best) ", cycle_per_op, unitname); \ if (verbose) printf(" %.3f %s per input byte (avg) ", avg_cycle_per_op, unitname); \ - if (!verbose) printf(" %.3f %.3f %.3f %.3f ", cycle_per_op, avg_cycle_per_op-cycle_per_op ,max_gb_per_s,avg_gb_per_s-max_gb_per_s); \ + if (!verbose) printf(" %.3f %.3f %.3f %.3f ", cycle_per_op, avg_cycle_per_op-cycle_per_op ,max_gb_per_s,-avg_gb_per_s+max_gb_per_s); \ printf("\n"); \ fflush(NULL); \ } while (0)