提交 c3305257 编写于 作者: I Ingo Molnar

perf stat: Add more cache-miss percentage printouts

Print out the cache-miss percentage as well if the cache refs were
collected, for all the generic cache event types.

Before:

   11,103,723,230 dTLB-loads                #  622.471 M/sec                    ( +-  0.30% )
       87,065,337 dTLB-load-misses          #    4.881 M/sec                    ( +-  0.90% )

After:

   11,353,713,242 dTLB-loads                #  626.020 M/sec                    ( +-  0.35% )
      113,393,472 dTLB-load-misses          #    1.00% of all dTLB cache hits   ( +-  0.49% )

Also ASCII color highlight too high percentages, them when it's executed on the console.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/n/tip-lkhwxsevdbd9a8nymx0vxc3y@git.kernel.orgSigned-off-by: NIngo Molnar <mingo@elte.hu>
上级 2cba3ffb
...@@ -261,6 +261,10 @@ struct stats runtime_stalled_cycles_back_stats[MAX_NR_CPUS]; ...@@ -261,6 +261,10 @@ struct stats runtime_stalled_cycles_back_stats[MAX_NR_CPUS];
struct stats runtime_branches_stats[MAX_NR_CPUS]; struct stats runtime_branches_stats[MAX_NR_CPUS];
struct stats runtime_cacherefs_stats[MAX_NR_CPUS]; struct stats runtime_cacherefs_stats[MAX_NR_CPUS];
struct stats runtime_l1_dcache_stats[MAX_NR_CPUS]; struct stats runtime_l1_dcache_stats[MAX_NR_CPUS];
struct stats runtime_l1_icache_stats[MAX_NR_CPUS];
struct stats runtime_ll_cache_stats[MAX_NR_CPUS];
struct stats runtime_itlb_cache_stats[MAX_NR_CPUS];
struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS];
struct stats walltime_nsecs_stats; struct stats walltime_nsecs_stats;
static int create_perf_stat_counter(struct perf_evsel *evsel) static int create_perf_stat_counter(struct perf_evsel *evsel)
...@@ -317,6 +321,14 @@ static void update_shadow_stats(struct perf_evsel *counter, u64 *count) ...@@ -317,6 +321,14 @@ static void update_shadow_stats(struct perf_evsel *counter, u64 *count)
update_stats(&runtime_cacherefs_stats[0], count[0]); update_stats(&runtime_cacherefs_stats[0], count[0]);
else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
update_stats(&runtime_l1_dcache_stats[0], count[0]); update_stats(&runtime_l1_dcache_stats[0], count[0]);
else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
update_stats(&runtime_l1_icache_stats[0], count[0]);
else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
update_stats(&runtime_ll_cache_stats[0], count[0]);
else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
update_stats(&runtime_dtlb_cache_stats[0], count[0]);
else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
update_stats(&runtime_itlb_cache_stats[0], count[0]);
} }
/* /*
...@@ -630,6 +642,98 @@ static void print_l1_dcache_misses(int cpu, struct perf_evsel *evsel __used, dou ...@@ -630,6 +642,98 @@ static void print_l1_dcache_misses(int cpu, struct perf_evsel *evsel __used, dou
fprintf(stderr, " of all L1-dcache hits "); fprintf(stderr, " of all L1-dcache hits ");
} }
static void print_l1_icache_misses(int cpu, struct perf_evsel *evsel __used, double avg)
{
double total, ratio = 0.0;
const char *color;
total = avg_stats(&runtime_l1_icache_stats[cpu]);
if (total)
ratio = avg / total * 100.0;
color = PERF_COLOR_NORMAL;
if (ratio > 20.0)
color = PERF_COLOR_RED;
else if (ratio > 10.0)
color = PERF_COLOR_MAGENTA;
else if (ratio > 5.0)
color = PERF_COLOR_YELLOW;
fprintf(stderr, " # ");
color_fprintf(stderr, color, "%6.2f%%", ratio);
fprintf(stderr, " of all L1-icache hits ");
}
static void print_dtlb_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg)
{
double total, ratio = 0.0;
const char *color;
total = avg_stats(&runtime_dtlb_cache_stats[cpu]);
if (total)
ratio = avg / total * 100.0;
color = PERF_COLOR_NORMAL;
if (ratio > 20.0)
color = PERF_COLOR_RED;
else if (ratio > 10.0)
color = PERF_COLOR_MAGENTA;
else if (ratio > 5.0)
color = PERF_COLOR_YELLOW;
fprintf(stderr, " # ");
color_fprintf(stderr, color, "%6.2f%%", ratio);
fprintf(stderr, " of all dTLB cache hits ");
}
static void print_itlb_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg)
{
double total, ratio = 0.0;
const char *color;
total = avg_stats(&runtime_itlb_cache_stats[cpu]);
if (total)
ratio = avg / total * 100.0;
color = PERF_COLOR_NORMAL;
if (ratio > 20.0)
color = PERF_COLOR_RED;
else if (ratio > 10.0)
color = PERF_COLOR_MAGENTA;
else if (ratio > 5.0)
color = PERF_COLOR_YELLOW;
fprintf(stderr, " # ");
color_fprintf(stderr, color, "%6.2f%%", ratio);
fprintf(stderr, " of all iTLB cache hits ");
}
static void print_ll_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg)
{
double total, ratio = 0.0;
const char *color;
total = avg_stats(&runtime_ll_cache_stats[cpu]);
if (total)
ratio = avg / total * 100.0;
color = PERF_COLOR_NORMAL;
if (ratio > 20.0)
color = PERF_COLOR_RED;
else if (ratio > 10.0)
color = PERF_COLOR_MAGENTA;
else if (ratio > 5.0)
color = PERF_COLOR_YELLOW;
fprintf(stderr, " # ");
color_fprintf(stderr, color, "%6.2f%%", ratio);
fprintf(stderr, " of all LL-cache hits ");
}
static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
{ {
double total, ratio = 0.0; double total, ratio = 0.0;
...@@ -684,6 +788,34 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) ...@@ -684,6 +788,34 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
runtime_l1_dcache_stats[cpu].n != 0) { runtime_l1_dcache_stats[cpu].n != 0) {
print_l1_dcache_misses(cpu, evsel, avg); print_l1_dcache_misses(cpu, evsel, avg);
} else if (
evsel->attr.type == PERF_TYPE_HW_CACHE &&
evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I |
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
runtime_l1_icache_stats[cpu].n != 0) {
print_l1_icache_misses(cpu, evsel, avg);
} else if (
evsel->attr.type == PERF_TYPE_HW_CACHE &&
evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB |
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
runtime_dtlb_cache_stats[cpu].n != 0) {
print_dtlb_cache_misses(cpu, evsel, avg);
} else if (
evsel->attr.type == PERF_TYPE_HW_CACHE &&
evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB |
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
runtime_itlb_cache_stats[cpu].n != 0) {
print_itlb_cache_misses(cpu, evsel, avg);
} else if (
evsel->attr.type == PERF_TYPE_HW_CACHE &&
evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL |
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
runtime_ll_cache_stats[cpu].n != 0) {
print_ll_cache_misses(cpu, evsel, avg);
} else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) && } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
runtime_cacherefs_stats[cpu].n != 0) { runtime_cacherefs_stats[cpu].n != 0) {
total = avg_stats(&runtime_cacherefs_stats[cpu]); total = avg_stats(&runtime_cacherefs_stats[cpu]);
...@@ -842,10 +974,12 @@ static void print_stat(int argc, const char **argv) ...@@ -842,10 +974,12 @@ static void print_stat(int argc, const char **argv)
} }
if (!csv_output) { if (!csv_output) {
if (!null_run)
fprintf(stderr, "\n"); fprintf(stderr, "\n");
fprintf(stderr, " %18.9f seconds time elapsed", fprintf(stderr, " %17.9f seconds time elapsed",
avg_stats(&walltime_nsecs_stats)/1e9); avg_stats(&walltime_nsecs_stats)/1e9);
if (run_count > 1) { if (run_count > 1) {
fprintf(stderr, " ");
print_noise_pct(stddev_stats(&walltime_nsecs_stats), print_noise_pct(stddev_stats(&walltime_nsecs_stats),
avg_stats(&walltime_nsecs_stats)); avg_stats(&walltime_nsecs_stats));
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册