From 1e181b92a2da30ba1f80c61a41cfb9ef02f43b79 Mon Sep 17 00:00:00 2001 From: Jiri Olsa <jolsa@kernel.org> Date: Fri, 3 Jun 2016 15:40:28 +0200 Subject: [PATCH] perf c2c report: Add 'node' sort key It is to be displayed in the single cacheline output: node It displays nodes hits related to cacheline accesses. The node filed comes in 3 flavors: - node IDs separated by ',' - node IDs with stats for each ID, in following format: Node{cpus %hitms %stores} - node IDs with list of affected CPUs in following format: Node{cpu list} User can switch the flavor with -N option (-NN,-NNN). It will be available in TUI to switch this with 'n' key. Signed-off-by: Jiri Olsa <jolsa@kernel.org> Cc: Andi Kleen <andi@firstfloor.org> Cc: David Ahern <dsahern@gmail.com> Cc: Don Zickus <dzickus@redhat.com> Cc: Joe Mario <jmario@redhat.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/n/tip-6742e6g0r7n63y5wc4rrgxx5@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/builtin-c2c.c | 219 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 219 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index ffd41744886e..ca2f37479e6d 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -1,6 +1,7 @@ #include <linux/compiler.h> #include <linux/kernel.h> #include <linux/stringify.h> +#include <asm/bug.h> #include "util.h" #include "debug.h" #include "builtin.h" @@ -22,6 +23,8 @@ struct c2c_hists { struct c2c_hist_entry { struct c2c_hists *hists; struct c2c_stats stats; + unsigned long *cpuset; + struct c2c_stats *node_stats; /* * must be at the end, * because of its callchain dynamic entry @@ -32,6 +35,12 @@ struct c2c_hist_entry { struct perf_c2c { struct perf_tool tool; struct c2c_hists hists; + + unsigned long **nodes; + int nodes_cnt; + int cpus_cnt; + int *cpu2node; + int node_info; }; static struct perf_c2c c2c; @@ -44,6 +53,14 @@ static void *c2c_he_zalloc(size_t size) if (!c2c_he) return NULL; + c2c_he->cpuset = bitmap_alloc(c2c.cpus_cnt); + if (!c2c_he->cpuset) + return NULL; + + c2c_he->node_stats = zalloc(c2c.nodes_cnt * sizeof(*c2c_he->node_stats)); + if (!c2c_he->node_stats) + return NULL; + return &c2c_he->he; } @@ -57,6 +74,8 @@ static void c2c_he_free(void *he) free(c2c_he->hists); } + free(c2c_he->cpuset); + free(c2c_he->node_stats); free(c2c_he); } @@ -93,6 +112,16 @@ he__get_c2c_hists(struct hist_entry *he, return hists; } +static void c2c_he__set_cpu(struct c2c_hist_entry *c2c_he, + struct perf_sample *sample) +{ + if (WARN_ONCE(sample->cpu == (unsigned int) -1, + "WARNING: no sample cpu value")) + return; + + set_bit(sample->cpu, c2c_he->cpuset); +} + static int process_sample_event(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, @@ -133,10 +162,23 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, c2c_add_stats(&c2c_he->stats, &stats); c2c_add_stats(&c2c_hists->stats, &stats); + c2c_he__set_cpu(c2c_he, sample); + hists__inc_nr_samples(&c2c_hists->hists, he->filtered); ret = hist_entry__append_callchain(he, sample); if (!ret) { + /* + * There's already been warning about missing + * sample's cpu value. Let's account all to + * node 0 in this case, without any further + * warning. + * + * Doing node stats only for single callchain data. + */ + int cpu = sample->cpu == (unsigned int) -1 ? 0 : sample->cpu; + int node = c2c.cpu2node[cpu]; + mi = mi_dup; mi_dup = memdup(mi, sizeof(*mi)); @@ -156,6 +198,9 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, c2c_he = container_of(he, struct c2c_hist_entry, he); c2c_add_stats(&c2c_he->stats, &stats); c2c_add_stats(&c2c_hists->stats, &stats); + c2c_add_stats(&c2c_he->node_stats[node], &stats); + + c2c_he__set_cpu(c2c_he, sample); hists__inc_nr_samples(&c2c_hists->hists, he->filtered); ret = hist_entry__append_callchain(he, sample); @@ -826,6 +871,97 @@ pid_cmp(struct perf_hpp_fmt *fmt __maybe_unused, return left->thread->pid_ - right->thread->pid_; } +static int64_t +empty_cmp(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left __maybe_unused, + struct hist_entry *right __maybe_unused) +{ + return 0; +} + +static int +node_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, + struct hist_entry *he) +{ + struct c2c_hist_entry *c2c_he; + bool first = true; + int node; + int ret = 0; + + c2c_he = container_of(he, struct c2c_hist_entry, he); + + for (node = 0; node < c2c.nodes_cnt; node++) { + DECLARE_BITMAP(set, c2c.cpus_cnt); + + bitmap_zero(set, c2c.cpus_cnt); + bitmap_and(set, c2c_he->cpuset, c2c.nodes[node], c2c.cpus_cnt); + + if (!bitmap_weight(set, c2c.cpus_cnt)) { + if (c2c.node_info == 1) { + ret = scnprintf(hpp->buf, hpp->size, "%21s", " "); + advance_hpp(hpp, ret); + } + continue; + } + + if (!first) { + ret = scnprintf(hpp->buf, hpp->size, " "); + advance_hpp(hpp, ret); + } + + switch (c2c.node_info) { + case 0: + ret = scnprintf(hpp->buf, hpp->size, "%2d", node); + advance_hpp(hpp, ret); + break; + case 1: + { + int num = bitmap_weight(c2c_he->cpuset, c2c.cpus_cnt); + struct c2c_stats *stats = &c2c_he->node_stats[node]; + + ret = scnprintf(hpp->buf, hpp->size, "%2d{%2d ", node, num); + advance_hpp(hpp, ret); + + + if (c2c_he->stats.rmt_hitm > 0) { + ret = scnprintf(hpp->buf, hpp->size, "%5.1f%% ", + percent(stats->rmt_hitm, c2c_he->stats.rmt_hitm)); + } else { + ret = scnprintf(hpp->buf, hpp->size, "%6s ", "n/a"); + } + + advance_hpp(hpp, ret); + + if (c2c_he->stats.store > 0) { + ret = scnprintf(hpp->buf, hpp->size, "%5.1f%%}", + percent(stats->store, c2c_he->stats.store)); + } else { + ret = scnprintf(hpp->buf, hpp->size, "%6s}", "n/a"); + } + + advance_hpp(hpp, ret); + break; + } + case 2: + ret = scnprintf(hpp->buf, hpp->size, "%2d{", node); + advance_hpp(hpp, ret); + + ret = bitmap_scnprintf(set, c2c.cpus_cnt, hpp->buf, hpp->size); + advance_hpp(hpp, ret); + + ret = scnprintf(hpp->buf, hpp->size, "}"); + advance_hpp(hpp, ret); + break; + default: + break; + } + + first = false; + } + + return 0; +} + #define HEADER_LOW(__h) \ { \ .line[1] = { \ @@ -1115,6 +1251,19 @@ static struct c2c_dimension dim_dso = { .se = &sort_dso, }; +static struct c2c_header header_node[3] = { + HEADER_LOW("Node"), + HEADER_LOW("Node{cpus %hitms %stores}"), + HEADER_LOW("Node{cpu list}"), +}; + +static struct c2c_dimension dim_node = { + .name = "node", + .cmp = empty_cmp, + .entry = node_entry, + .width = 4, +}; + static struct c2c_dimension *dimensions[] = { &dim_dcacheline, &dim_offset, @@ -1148,6 +1297,7 @@ static struct c2c_dimension *dimensions[] = { &dim_tid, &dim_symbol, &dim_dso, + &dim_node, NULL, }; @@ -1374,6 +1524,68 @@ static int resort_cl_cb(struct hist_entry *he) return 0; } +static void setup_nodes_header(void) +{ + dim_node.header = header_node[c2c.node_info]; +} + +static int setup_nodes(struct perf_session *session) +{ + struct numa_node *n; + unsigned long **nodes; + int node, cpu; + int *cpu2node; + + if (c2c.node_info > 2) + c2c.node_info = 2; + + c2c.nodes_cnt = session->header.env.nr_numa_nodes; + c2c.cpus_cnt = session->header.env.nr_cpus_online; + + n = session->header.env.numa_nodes; + if (!n) + return -EINVAL; + + nodes = zalloc(sizeof(unsigned long *) * c2c.nodes_cnt); + if (!nodes) + return -ENOMEM; + + c2c.nodes = nodes; + + cpu2node = zalloc(sizeof(int) * c2c.cpus_cnt); + if (!cpu2node) + return -ENOMEM; + + for (cpu = 0; cpu < c2c.cpus_cnt; cpu++) + cpu2node[cpu] = -1; + + c2c.cpu2node = cpu2node; + + for (node = 0; node < c2c.nodes_cnt; node++) { + struct cpu_map *map = n[node].map; + unsigned long *set; + + set = bitmap_alloc(c2c.cpus_cnt); + if (!set) + return -ENOMEM; + + for (cpu = 0; cpu < map->nr; cpu++) { + set_bit(map->map[cpu], set); + + if (WARN_ONCE(cpu2node[map->map[cpu]] != -1, "node/cpu topology bug")) + return -EINVAL; + + cpu2node[map->map[cpu]] = node; + } + + nodes[node] = set; + } + + setup_nodes_header(); + return 0; +} + + static int perf_c2c__report(int argc, const char **argv) { struct perf_session *session; @@ -1388,6 +1600,8 @@ static int perf_c2c__report(int argc, const char **argv) "be more verbose (show counter open errors, etc)"), OPT_STRING('i', "input", &input_name, "file", "the input file to process"), + OPT_INCR('N', "node-info", &c2c.node_info, + "show extra node info in report (repeat for more info)"), OPT_END() }; int err = 0; @@ -1413,6 +1627,11 @@ static int perf_c2c__report(int argc, const char **argv) pr_debug("No memory for session\n"); goto out; } + err = setup_nodes(session); + if (err) { + pr_err("Failed setup nodes\n"); + goto out; + } if (symbol__init(&session->header.env) < 0) goto out_session; -- GitLab