提交 4cabc3d1 编写于 作者: A Andi Kleen 提交者: Ingo Molnar

tools/perf/stat: Add perf stat --transaction

Add support to perf stat to print the basic transactional execution statistics:
Total cycles, Cycles in Transaction, Cycles in aborted transsactions
using the in_tx and in_tx_checkpoint qualifiers.
Transaction Starts and Elision Starts, to compute the average transaction
length.

This is a reasonable overview over the success of the transactions.

Also support architectures that have a transaction aborted cycles
counter like POWER8. Since that is awkward to handle in the kernel
abstract handle both cases here.

Enable with a new --transaction / -T option.

This requires measuring these events in a group, since they depend on each
other.

This is implemented by using TM sysfs events exported by the kernel
Signed-off-by: NAndi Kleen <ak@linux.intel.com>
Acked-by: NArnaldo Carvalho de Melo <acme@infradead.org>
Signed-off-by: NPeter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1377128846-977-5-git-send-email-andi@firstfloor.orgSigned-off-by: NIngo Molnar <mingo@kernel.org>
上级 723478c8
...@@ -137,6 +137,11 @@ core number and the number of online logical processors on that physical process ...@@ -137,6 +137,11 @@ core number and the number of online logical processors on that physical process
After starting the program, wait msecs before measuring. This is useful to After starting the program, wait msecs before measuring. This is useful to
filter out the startup phase of the program, which is often very different. filter out the startup phase of the program, which is often very different.
-T::
--transaction::
Print statistics of transactional execution if supported.
EXAMPLES EXAMPLES
-------- --------
......
...@@ -46,6 +46,7 @@ ...@@ -46,6 +46,7 @@
#include "util/util.h" #include "util/util.h"
#include "util/parse-options.h" #include "util/parse-options.h"
#include "util/parse-events.h" #include "util/parse-events.h"
#include "util/pmu.h"
#include "util/event.h" #include "util/event.h"
#include "util/evlist.h" #include "util/evlist.h"
#include "util/evsel.h" #include "util/evsel.h"
...@@ -70,6 +71,41 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix); ...@@ -70,6 +71,41 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix);
static void print_counter(struct perf_evsel *counter, char *prefix); static void print_counter(struct perf_evsel *counter, char *prefix);
static void print_aggr(char *prefix); static void print_aggr(char *prefix);
/* Default events used for perf stat -T */
static const char * const transaction_attrs[] = {
"task-clock",
"{"
"instructions,"
"cycles,"
"cpu/cycles-t/,"
"cpu/tx-start/,"
"cpu/el-start/,"
"cpu/cycles-ct/"
"}"
};
/* More limited version when the CPU does not have all events. */
static const char * const transaction_limited_attrs[] = {
"task-clock",
"{"
"instructions,"
"cycles,"
"cpu/cycles-t/,"
"cpu/tx-start/"
"}"
};
/* must match transaction_attrs and the beginning limited_attrs */
enum {
T_TASK_CLOCK,
T_INSTRUCTIONS,
T_CYCLES,
T_CYCLES_IN_TX,
T_TRANSACTION_START,
T_ELISION_START,
T_CYCLES_IN_TX_CP,
};
static struct perf_evlist *evsel_list; static struct perf_evlist *evsel_list;
static struct perf_target target = { static struct perf_target target = {
...@@ -90,6 +126,7 @@ static enum aggr_mode aggr_mode = AGGR_GLOBAL; ...@@ -90,6 +126,7 @@ static enum aggr_mode aggr_mode = AGGR_GLOBAL;
static volatile pid_t child_pid = -1; static volatile pid_t child_pid = -1;
static bool null_run = false; static bool null_run = false;
static int detailed_run = 0; static int detailed_run = 0;
static bool transaction_run;
static bool big_num = true; static bool big_num = true;
static int big_num_opt = -1; static int big_num_opt = -1;
static const char *csv_sep = NULL; static const char *csv_sep = NULL;
...@@ -214,7 +251,10 @@ static struct stats runtime_l1_icache_stats[MAX_NR_CPUS]; ...@@ -214,7 +251,10 @@ static struct stats runtime_l1_icache_stats[MAX_NR_CPUS];
static struct stats runtime_ll_cache_stats[MAX_NR_CPUS]; static struct stats runtime_ll_cache_stats[MAX_NR_CPUS];
static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS]; static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS];
static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS]; static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS];
static struct stats runtime_cycles_in_tx_stats[MAX_NR_CPUS];
static struct stats walltime_nsecs_stats; static struct stats walltime_nsecs_stats;
static struct stats runtime_transaction_stats[MAX_NR_CPUS];
static struct stats runtime_elision_stats[MAX_NR_CPUS];
static void perf_stat__reset_stats(struct perf_evlist *evlist) static void perf_stat__reset_stats(struct perf_evlist *evlist)
{ {
...@@ -236,6 +276,11 @@ static void perf_stat__reset_stats(struct perf_evlist *evlist) ...@@ -236,6 +276,11 @@ static void perf_stat__reset_stats(struct perf_evlist *evlist)
memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats)); memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats));
memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats)); memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats));
memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats)); memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats));
memset(runtime_cycles_in_tx_stats, 0,
sizeof(runtime_cycles_in_tx_stats));
memset(runtime_transaction_stats, 0,
sizeof(runtime_transaction_stats));
memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats));
memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
} }
...@@ -274,6 +319,29 @@ static inline int nsec_counter(struct perf_evsel *evsel) ...@@ -274,6 +319,29 @@ static inline int nsec_counter(struct perf_evsel *evsel)
return 0; return 0;
} }
static struct perf_evsel *nth_evsel(int n)
{
static struct perf_evsel **array;
static int array_len;
struct perf_evsel *ev;
int j;
/* Assumes this only called when evsel_list does not change anymore. */
if (!array) {
list_for_each_entry(ev, &evsel_list->entries, node)
array_len++;
array = malloc(array_len * sizeof(void *));
if (!array)
exit(ENOMEM);
j = 0;
list_for_each_entry(ev, &evsel_list->entries, node)
array[j++] = ev;
}
if (n < array_len)
return array[n];
return NULL;
}
/* /*
* Update various tracking values we maintain to print * Update various tracking values we maintain to print
* more semantic information such as miss/hit ratios, * more semantic information such as miss/hit ratios,
...@@ -285,6 +353,15 @@ static void update_shadow_stats(struct perf_evsel *counter, u64 *count) ...@@ -285,6 +353,15 @@ static void update_shadow_stats(struct perf_evsel *counter, u64 *count)
update_stats(&runtime_nsecs_stats[0], count[0]); update_stats(&runtime_nsecs_stats[0], count[0]);
else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
update_stats(&runtime_cycles_stats[0], count[0]); update_stats(&runtime_cycles_stats[0], count[0]);
else if (transaction_run &&
perf_evsel__cmp(counter, nth_evsel(T_CYCLES_IN_TX)))
update_stats(&runtime_cycles_in_tx_stats[0], count[0]);
else if (transaction_run &&
perf_evsel__cmp(counter, nth_evsel(T_TRANSACTION_START)))
update_stats(&runtime_transaction_stats[0], count[0]);
else if (transaction_run &&
perf_evsel__cmp(counter, nth_evsel(T_ELISION_START)))
update_stats(&runtime_elision_stats[0], count[0]);
else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
update_stats(&runtime_stalled_cycles_front_stats[0], count[0]); update_stats(&runtime_stalled_cycles_front_stats[0], count[0]);
else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
...@@ -827,7 +904,7 @@ static void print_ll_cache_misses(int cpu, ...@@ -827,7 +904,7 @@ static void print_ll_cache_misses(int cpu,
static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg) static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
{ {
double total, ratio = 0.0; double total, ratio = 0.0, total2;
const char *fmt; const char *fmt;
if (csv_output) if (csv_output)
...@@ -923,6 +1000,43 @@ static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg) ...@@ -923,6 +1000,43 @@ static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
ratio = 1.0 * avg / total; ratio = 1.0 * avg / total;
fprintf(output, " # %8.3f GHz ", ratio); fprintf(output, " # %8.3f GHz ", ratio);
} else if (transaction_run &&
perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_IN_TX))) {
total = avg_stats(&runtime_cycles_stats[cpu]);
if (total)
fprintf(output,
" # %5.2f%% transactional cycles ",
100.0 * (avg / total));
} else if (transaction_run &&
perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_IN_TX_CP))) {
total = avg_stats(&runtime_cycles_stats[cpu]);
total2 = avg_stats(&runtime_cycles_in_tx_stats[cpu]);
if (total2 < avg)
total2 = avg;
if (total)
fprintf(output,
" # %5.2f%% aborted cycles ",
100.0 * ((total2-avg) / total));
} else if (transaction_run &&
perf_evsel__cmp(evsel, nth_evsel(T_TRANSACTION_START)) &&
avg > 0 &&
runtime_cycles_in_tx_stats[cpu].n != 0) {
total = avg_stats(&runtime_cycles_in_tx_stats[cpu]);
if (total)
ratio = total / avg;
fprintf(output, " # %8.0f cycles / transaction ", ratio);
} else if (transaction_run &&
perf_evsel__cmp(evsel, nth_evsel(T_ELISION_START)) &&
avg > 0 &&
runtime_cycles_in_tx_stats[cpu].n != 0) {
total = avg_stats(&runtime_cycles_in_tx_stats[cpu]);
if (total)
ratio = total / avg;
fprintf(output, " # %8.0f cycles / elision ", ratio);
} else if (runtime_nsecs_stats[cpu].n != 0) { } else if (runtime_nsecs_stats[cpu].n != 0) {
char unit = 'M'; char unit = 'M';
...@@ -1236,6 +1350,16 @@ static int perf_stat_init_aggr_mode(void) ...@@ -1236,6 +1350,16 @@ static int perf_stat_init_aggr_mode(void)
return 0; return 0;
} }
static int setup_events(const char * const *attrs, unsigned len)
{
unsigned i;
for (i = 0; i < len; i++) {
if (parse_events(evsel_list, attrs[i]))
return -1;
}
return 0;
}
/* /*
* Add default attributes, if there were no attributes specified or * Add default attributes, if there were no attributes specified or
...@@ -1354,6 +1478,22 @@ static int add_default_attributes(void) ...@@ -1354,6 +1478,22 @@ static int add_default_attributes(void)
if (null_run) if (null_run)
return 0; return 0;
if (transaction_run) {
int err;
if (pmu_have_event("cpu", "cycles-ct") &&
pmu_have_event("cpu", "el-start"))
err = setup_events(transaction_attrs,
ARRAY_SIZE(transaction_attrs));
else
err = setup_events(transaction_limited_attrs,
ARRAY_SIZE(transaction_limited_attrs));
if (err < 0) {
fprintf(stderr, "Cannot set up transaction events\n");
return -1;
}
return 0;
}
if (!evsel_list->nr_entries) { if (!evsel_list->nr_entries) {
if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0) if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0)
return -1; return -1;
...@@ -1388,6 +1528,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) ...@@ -1388,6 +1528,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
int output_fd = 0; int output_fd = 0;
const char *output_name = NULL; const char *output_name = NULL;
const struct option options[] = { const struct option options[] = {
OPT_BOOLEAN('T', "transaction", &transaction_run,
"hardware transaction statistics"),
OPT_CALLBACK('e', "event", &evsel_list, "event", OPT_CALLBACK('e', "event", &evsel_list, "event",
"event selector. use 'perf list' to list available events", "event selector. use 'perf list' to list available events",
parse_events_option), parse_events_option),
......
...@@ -197,6 +197,12 @@ static inline bool perf_evsel__match2(struct perf_evsel *e1, ...@@ -197,6 +197,12 @@ static inline bool perf_evsel__match2(struct perf_evsel *e1,
(e1->attr.config == e2->attr.config); (e1->attr.config == e2->attr.config);
} }
#define perf_evsel__cmp(a, b) \
((a) && \
(b) && \
(a)->attr.type == (b)->attr.type && \
(a)->attr.config == (b)->attr.config)
int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
int cpu, int thread, bool scale); int cpu, int thread, bool scale);
......
...@@ -637,3 +637,19 @@ void print_pmu_events(const char *event_glob, bool name_only) ...@@ -637,3 +637,19 @@ void print_pmu_events(const char *event_glob, bool name_only)
printf("\n"); printf("\n");
free(aliases); free(aliases);
} }
bool pmu_have_event(const char *pname, const char *name)
{
struct perf_pmu *pmu;
struct perf_pmu_alias *alias;
pmu = NULL;
while ((pmu = perf_pmu__scan(pmu)) != NULL) {
if (strcmp(pname, pmu->name))
continue;
list_for_each_entry(alias, &pmu->aliases, list)
if (!strcmp(alias->name, name))
return true;
}
return false;
}
...@@ -42,6 +42,7 @@ int perf_pmu__format_parse(char *dir, struct list_head *head); ...@@ -42,6 +42,7 @@ int perf_pmu__format_parse(char *dir, struct list_head *head);
struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu); struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu);
void print_pmu_events(const char *event_glob, bool name_only); void print_pmu_events(const char *event_glob, bool name_only);
bool pmu_have_event(const char *pname, const char *name);
int perf_pmu__test(void); int perf_pmu__test(void);
#endif /* __PMU_H */ #endif /* __PMU_H */
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册