diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 589ba3a92423a247943c5a5b78d10357c9567e81..a8b00b44b3cdb8c0fb7d295aa2663217d467174c 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -93,12 +93,6 @@ static const char *cpu_list; static const char *csv_sep = NULL; static bool csv_output = false; -struct cpu_counts { - u64 val; - u64 ena; - u64 run; -}; - static volatile int done = 0; struct stats @@ -108,15 +102,11 @@ struct stats struct perf_stat { struct stats res_stats[3]; - int scaled; - struct cpu_counts cpu_counts[]; }; -static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel, int ncpus) +static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) { - size_t priv_size = (sizeof(struct perf_stat) + - (ncpus * sizeof(struct cpu_counts))); - evsel->priv = zalloc(priv_size); + evsel->priv = zalloc(sizeof(struct perf_stat)); return evsel->priv == NULL ? -ENOMEM : 0; } @@ -238,52 +228,14 @@ static inline int nsec_counter(struct perf_evsel *evsel) * Read out the results of a single counter: * aggregate counts across CPUs in system-wide mode */ -static void read_counter_aggr(struct perf_evsel *counter) +static int read_counter_aggr(struct perf_evsel *counter) { struct perf_stat *ps = counter->priv; - u64 count[3], single_count[3]; - int cpu; - size_t res, nv; - int scaled; - int i, thread; - - count[0] = count[1] = count[2] = 0; - - nv = scale ? 3 : 1; - for (cpu = 0; cpu < nr_cpus; cpu++) { - for (thread = 0; thread < thread_num; thread++) { - if (FD(counter, cpu, thread) < 0) - continue; - - res = read(FD(counter, cpu, thread), - single_count, nv * sizeof(u64)); - assert(res == nv * sizeof(u64)); - - close(FD(counter, cpu, thread)); - FD(counter, cpu, thread) = -1; - - count[0] += single_count[0]; - if (scale) { - count[1] += single_count[1]; - count[2] += single_count[2]; - } - } - } - - scaled = 0; - if (scale) { - if (count[2] == 0) { - ps->scaled = -1; - count[0] = 0; - return; - } + u64 *count = counter->counts->aggr.values; + int i; - if (count[2] < count[1]) { - ps->scaled = 1; - count[0] = (unsigned long long) - ((double)count[0] * count[1] / count[2] + 0.5); - } - } + if (__perf_evsel__read(counter, nr_cpus, thread_num, scale) < 0) + return -1; for (i = 0; i < 3; i++) update_stats(&ps->res_stats[i], count[i]); @@ -302,46 +254,24 @@ static void read_counter_aggr(struct perf_evsel *counter) update_stats(&runtime_cycles_stats[0], count[0]); if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) update_stats(&runtime_branches_stats[0], count[0]); + + return 0; } /* * Read out the results of a single counter: * do not aggregate counts across CPUs in system-wide mode */ -static void read_counter(struct perf_evsel *counter) +static int read_counter(struct perf_evsel *counter) { - struct cpu_counts *cpu_counts = counter->priv; - u64 count[3]; + u64 *count; int cpu; - size_t res, nv; - - count[0] = count[1] = count[2] = 0; - - nv = scale ? 3 : 1; for (cpu = 0; cpu < nr_cpus; cpu++) { + if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0) + return -1; - if (FD(counter, cpu, 0) < 0) - continue; - - res = read(FD(counter, cpu, 0), count, nv * sizeof(u64)); - - assert(res == nv * sizeof(u64)); - - close(FD(counter, cpu, 0)); - FD(counter, cpu, 0) = -1; - - if (scale) { - if (count[2] == 0) { - count[0] = 0; - } else if (count[2] < count[1]) { - count[0] = (unsigned long long) - ((double)count[0] * count[1] / count[2] + 0.5); - } - } - cpu_counts[cpu].val = count[0]; /* scaled count */ - cpu_counts[cpu].ena = count[1]; - cpu_counts[cpu].run = count[2]; + count = counter->counts->cpu[cpu].values; if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK)) update_stats(&runtime_nsecs_stats[cpu], count[0]); @@ -350,6 +280,8 @@ static void read_counter(struct perf_evsel *counter) if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) update_stats(&runtime_branches_stats[cpu], count[0]); } + + return 0; } static int run_perf_stat(int argc __used, const char **argv) @@ -449,12 +381,17 @@ static int run_perf_stat(int argc __used, const char **argv) update_stats(&walltime_nsecs_stats, t1 - t0); if (no_aggr) { - list_for_each_entry(counter, &evsel_list, node) + list_for_each_entry(counter, &evsel_list, node) { read_counter(counter); + perf_evsel__close_fd(counter, nr_cpus, 1); + } } else { - list_for_each_entry(counter, &evsel_list, node) + list_for_each_entry(counter, &evsel_list, node) { read_counter_aggr(counter); + perf_evsel__close_fd(counter, nr_cpus, thread_num); + } } + return WEXITSTATUS(status); } @@ -550,7 +487,7 @@ static void print_counter_aggr(struct perf_evsel *counter) { struct perf_stat *ps = counter->priv; double avg = avg_stats(&ps->res_stats[0]); - int scaled = ps->scaled; + int scaled = counter->counts->scaled; if (scaled == -1) { fprintf(stderr, "%*s%s%-24s\n", @@ -590,14 +527,13 @@ static void print_counter_aggr(struct perf_evsel *counter) */ static void print_counter(struct perf_evsel *counter) { - struct perf_stat *ps = counter->priv; u64 ena, run, val; int cpu; for (cpu = 0; cpu < nr_cpus; cpu++) { - val = ps->cpu_counts[cpu].val; - ena = ps->cpu_counts[cpu].ena; - run = ps->cpu_counts[cpu].run; + val = counter->counts->cpu[cpu].val; + ena = counter->counts->cpu[cpu].ena; + run = counter->counts->cpu[cpu].run; if (run == 0 || ena == 0) { fprintf(stderr, "CPU%*d%s%*s%s%-24s", csv_output ? 0 : -4, @@ -818,7 +754,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) } list_for_each_entry(pos, &evsel_list, node) { - if (perf_evsel__alloc_stat_priv(pos, nr_cpus) < 0 || + if (perf_evsel__alloc_stat_priv(pos) < 0 || + perf_evsel__alloc_counts(pos, nr_cpus) < 0 || perf_evsel__alloc_fd(pos, nr_cpus, thread_num) < 0) goto out_free_fd; } diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 6539ec912c7068f83a449ee3203baa2fe1e049c3..3f5de5196231e423de78509ffbaf132865c933dd 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1,6 +1,8 @@ #include "evsel.h" #include "util.h" +#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) + struct perf_evsel *perf_evsel__new(u32 type, u64 config, int idx) { struct perf_evsel *evsel = zalloc(sizeof(*evsel)); @@ -21,15 +23,101 @@ int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) return evsel->fd != NULL ? 0 : -ENOMEM; } +int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus) +{ + evsel->counts = zalloc((sizeof(*evsel->counts) + + (ncpus * sizeof(struct perf_counts_values)))); + return evsel->counts != NULL ? 0 : -ENOMEM; +} + void perf_evsel__free_fd(struct perf_evsel *evsel) { xyarray__delete(evsel->fd); evsel->fd = NULL; } +void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads) +{ + int cpu, thread; + + for (cpu = 0; cpu < ncpus; cpu++) + for (thread = 0; thread < nthreads; ++thread) { + close(FD(evsel, cpu, thread)); + FD(evsel, cpu, thread) = -1; + } +} + void perf_evsel__delete(struct perf_evsel *evsel) { assert(list_empty(&evsel->node)); xyarray__delete(evsel->fd); free(evsel); } + +int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, + int cpu, int thread, bool scale) +{ + struct perf_counts_values count; + size_t nv = scale ? 3 : 1; + + if (FD(evsel, cpu, thread) < 0) + return -EINVAL; + + if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0) + return -errno; + + if (scale) { + if (count.run == 0) + count.val = 0; + else if (count.run < count.ena) + count.val = (u64)((double)count.val * count.ena / count.run + 0.5); + } else + count.ena = count.run = 0; + + evsel->counts->cpu[cpu] = count; + return 0; +} + +int __perf_evsel__read(struct perf_evsel *evsel, + int ncpus, int nthreads, bool scale) +{ + size_t nv = scale ? 3 : 1; + int cpu, thread; + struct perf_counts_values *aggr = &evsel->counts->aggr, count; + + aggr->val = 0; + + for (cpu = 0; cpu < ncpus; cpu++) { + for (thread = 0; thread < nthreads; thread++) { + if (FD(evsel, cpu, thread) < 0) + continue; + + if (readn(FD(evsel, cpu, thread), + &count, nv * sizeof(u64)) < 0) + return -errno; + + aggr->val += count.val; + if (scale) { + aggr->ena += count.ena; + aggr->run += count.run; + } + } + } + + evsel->counts->scaled = 0; + if (scale) { + if (aggr->run == 0) { + evsel->counts->scaled = -1; + aggr->val = 0; + return 0; + } + + if (aggr->run < aggr->ena) { + evsel->counts->scaled = 1; + aggr->val = (u64)((double)aggr->val * aggr->ena / aggr->run + 0.5); + } + } else + aggr->ena = aggr->run = 0; + + return 0; +} diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 8a5cfb656674336c9056d11a465e3b94776b9ee1..8b48ef1e672cf27f7fa2d075b7596a1c7a76c8ef 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -2,15 +2,34 @@ #define __PERF_EVSEL_H 1 #include +#include #include #include "types.h" #include "xyarray.h" + +struct perf_counts_values { + union { + struct { + u64 val; + u64 ena; + u64 run; + }; + u64 values[3]; + }; +}; + +struct perf_counts { + s8 scaled; + struct perf_counts_values aggr; + struct perf_counts_values cpu[]; +}; struct perf_evsel { struct list_head node; struct perf_event_attr attr; char *filter; struct xyarray *fd; + struct perf_counts *counts; int idx; void *priv; }; @@ -19,10 +38,70 @@ struct perf_evsel *perf_evsel__new(u32 type, u64 config, int idx); void perf_evsel__delete(struct perf_evsel *evsel); int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads); +int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus); void perf_evsel__free_fd(struct perf_evsel *evsel); +void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads); #define perf_evsel__match(evsel, t, c) \ (evsel->attr.type == PERF_TYPE_##t && \ evsel->attr.config == PERF_COUNT_##c) +int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, + int cpu, int thread, bool scale); + +/** + * perf_evsel__read_on_cpu - Read out the results on a CPU and thread + * + * @evsel - event selector to read value + * @cpu - CPU of interest + * @thread - thread of interest + */ +static inline int perf_evsel__read_on_cpu(struct perf_evsel *evsel, + int cpu, int thread) +{ + return __perf_evsel__read_on_cpu(evsel, cpu, thread, false); +} + +/** + * perf_evsel__read_on_cpu_scaled - Read out the results on a CPU and thread, scaled + * + * @evsel - event selector to read value + * @cpu - CPU of interest + * @thread - thread of interest + */ +static inline int perf_evsel__read_on_cpu_scaled(struct perf_evsel *evsel, + int cpu, int thread) +{ + return __perf_evsel__read_on_cpu(evsel, cpu, thread, true); +} + +int __perf_evsel__read(struct perf_evsel *evsel, int ncpus, int nthreads, + bool scale); + +/** + * perf_evsel__read - Read the aggregate results on all CPUs + * + * @evsel - event selector to read value + * @ncpus - Number of cpus affected, from zero + * @nthreads - Number of threads affected, from zero + */ +static inline int perf_evsel__read(struct perf_evsel *evsel, + int ncpus, int nthreads) +{ + return __perf_evsel__read(evsel, ncpus, nthreads, false); +} + +/** + * perf_evsel__read_scaled - Read the aggregate results on all CPUs, scaled + * + * @evsel - event selector to read value + * @ncpus - Number of cpus affected, from zero + * @nthreads - Number of threads affected, from zero + */ +static inline int perf_evsel__read_scaled(struct perf_evsel *evsel, + int ncpus, int nthreads) +{ + return __perf_evsel__read(evsel, ncpus, nthreads, true); +} + #endif /* __PERF_EVSEL_H */