Changeset b4a4ad94 in mainline
 Timestamp:
 20190102T21:17:05Z (3 years ago)
 Branches:
 lfn, master
 Children:
 b9f1585
 Parents:
 79bb48e
 gitauthor:
 Vojtech Horky <vojtech.horky@…> (20190102 21:10:03)
 gitcommitter:
 Vojtech Horky <vojtech.horky@…> (20190102 21:17:05)
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

uspace/app/perf/perf.c
r79bb48e rb4a4ad94 36 36 37 37 #include <assert.h> 38 #include <math.h> 38 39 #include <stdio.h> 39 40 #include <stddef.h> … … 61 62 double nanos = stopwatch_get_nanos(stopwatch); 62 63 double thruput = (double) workload_size / (nanos / 1000000000.0l); 63 printf(", %.0f cycles/s.\n", thruput);64 printf(", %.0f ops/s.\n", thruput); 64 65 } else { 65 66 printf(".\n"); … … 67 68 } 68 69 70 /* 71 * This is a temporary solution until we have proper sqrt() implementation 72 * in libmath. 73 * 74 * The algorithm uses Babylonian method [1]. 75 * 76 * [1] https://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Babylonian_method 77 */ 78 static double estimate_square_root(double value, double precision) 79 { 80 double estimate = 1.; 81 double prev_estimate = estimate + 10 * precision; 82 83 while (fabs(estimate  prev_estimate) > precision) { 84 prev_estimate = estimate; 85 estimate = (prev_estimate + value / prev_estimate) / 2.; 86 } 87 88 return estimate; 89 } 90 91 /* 92 * Compute available statistics from given stopwatches. 93 * 94 * We compute normal mean for average duration of the workload and geometric 95 * mean for average thruput. Note that geometric mean is necessary to compute 96 * average throughput correctly  consider the following example: 97 *  we run always 60 operations, 98 *  first run executes in 30 s (i.e. 2 ops/s) 99 *  and second one in 10 s (6 ops/s). 100 * Then, naively, average throughput would be (2+6)/2 = 4 [ops/s]. However, we 101 * actually executed 60 + 60 ops in 30 + 10 seconds. So the actual average 102 * throughput is 3 ops/s (which is exactly what geometric mean means). 103 * 104 */ 105 static void compute_stats(stopwatch_t *stopwatch, size_t stopwatch_count, 106 uint64_t workload_size, double precision, double *out_duration_avg, 107 double *out_duration_sigma, double *out_thruput_avg) 108 { 109 double inv_thruput_sum = 0.0; 110 double nanos_sum = 0.0; 111 double nanos_sum2 = 0.0; 112 113 for (size_t i = 0; i < stopwatch_count; i++) { 114 double nanos = stopwatch_get_nanos(&stopwatch[i]); 115 double thruput = (double) workload_size / nanos; 116 117 inv_thruput_sum += 1.0 / thruput; 118 nanos_sum += nanos; 119 nanos_sum2 += nanos * nanos; 120 } 121 *out_duration_avg = nanos_sum / stopwatch_count; 122 double sigma2 = (nanos_sum2  nanos_sum * (*out_duration_avg)) / 123 ((double) stopwatch_count  1); 124 // FIXME: implement sqrt properly 125 *out_duration_sigma = estimate_square_root(sigma2, precision); 126 *out_thruput_avg = 1.0 / (inv_thruput_sum / stopwatch_count); 127 } 128 69 129 static void summary_stats(stopwatch_t *stopwatch, size_t stopwatch_count, 70 130 benchmark_t *bench, uint64_t workload_size) 71 131 { 72 double sum = 0.0; 73 double sum_square = 0.0; 74 75 for (size_t i = 0; i < stopwatch_count; i++) { 76 double nanos = stopwatch_get_nanos(&stopwatch[i]); 77 double thruput = (double) workload_size / (nanos / 1000000000.0l); 78 sum += thruput; 79 sum_square += thruput * thruput; 80 } 81 82 double avg = sum / stopwatch_count; 83 84 double sd_numer = sum_square + stopwatch_count * avg * avg  2 * sum * avg; 85 double sd_square = sd_numer / ((double) stopwatch_count  1); 86 87 printf("Average: %.0f cycles/s Std.dev^2: %.0f cycles/s Samples: %zu\n", 88 avg, sd_square, stopwatch_count); 132 double duration_avg, duration_sigma, thruput_avg; 133 compute_stats(stopwatch, stopwatch_count, workload_size, 0.001, 134 &duration_avg, &duration_sigma, &thruput_avg); 135 136 printf("Average: %" PRIu64 " ops in %.0f us (sd %.0f us); " 137 "%.0f ops/s; Samples: %zu\n", 138 workload_size, duration_avg / 1000.0, duration_sigma / 1000.0, 139 thruput_avg * 1000000000.0, stopwatch_count); 89 140 } 90 141
Note: See TracChangeset
for help on using the changeset viewer.