diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index de341d4ec92a487b3af40f9cd036004963374f99..d5be06a5005e99eb9ac8dbe808f565013bddc929 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -12,6 +12,16 @@ enum { LBR_FORMAT_LIP = 0x01, LBR_FORMAT_EIP = 0x02, LBR_FORMAT_EIP_FLAGS = 0x03, + LBR_FORMAT_EIP_FLAGS2 = 0x04, + LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_EIP_FLAGS2, +}; + +static enum { + LBR_EIP_FLAGS = 1, + LBR_TSX = 2, +} lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = { + [LBR_FORMAT_EIP_FLAGS] = LBR_EIP_FLAGS, + [LBR_FORMAT_EIP_FLAGS2] = LBR_EIP_FLAGS | LBR_TSX, }; /* @@ -56,6 +66,8 @@ enum { LBR_FAR) #define LBR_FROM_FLAG_MISPRED (1ULL << 63) +#define LBR_FROM_FLAG_IN_TX (1ULL << 62) +#define LBR_FROM_FLAG_ABORT (1ULL << 61) #define for_each_branch_sample_type(x) \ for ((x) = PERF_SAMPLE_BRANCH_USER; \ @@ -81,9 +93,13 @@ enum { X86_BR_JMP = 1 << 9, /* jump */ X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */ X86_BR_IND_CALL = 1 << 11,/* indirect calls */ + X86_BR_ABORT = 1 << 12,/* transaction abort */ + X86_BR_IN_TX = 1 << 13,/* in transaction */ + X86_BR_NO_TX = 1 << 14,/* not in transaction */ }; #define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL) +#define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX) #define X86_BR_ANY \ (X86_BR_CALL |\ @@ -95,6 +111,7 @@ enum { X86_BR_JCC |\ X86_BR_JMP |\ X86_BR_IRQ |\ + X86_BR_ABORT |\ X86_BR_IND_CALL) #define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY) @@ -270,21 +287,31 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) for (i = 0; i < x86_pmu.lbr_nr; i++) { unsigned long lbr_idx = (tos - i) & mask; - u64 from, to, mis = 0, pred = 0; + u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0; + int skip = 0; + int lbr_flags = lbr_desc[lbr_format]; rdmsrl(x86_pmu.lbr_from + lbr_idx, from); rdmsrl(x86_pmu.lbr_to + lbr_idx, to); - if (lbr_format == LBR_FORMAT_EIP_FLAGS) { + if (lbr_flags & LBR_EIP_FLAGS) { mis = !!(from & LBR_FROM_FLAG_MISPRED); pred = !mis; - from = (u64)((((s64)from) << 1) >> 1); + skip = 1; + } + if (lbr_flags & LBR_TSX) { + in_tx = !!(from & LBR_FROM_FLAG_IN_TX); + abort = !!(from & LBR_FROM_FLAG_ABORT); + skip = 3; } + from = (u64)((((s64)from) << skip) >> skip); cpuc->lbr_entries[i].from = from; cpuc->lbr_entries[i].to = to; cpuc->lbr_entries[i].mispred = mis; cpuc->lbr_entries[i].predicted = pred; + cpuc->lbr_entries[i].in_tx = in_tx; + cpuc->lbr_entries[i].abort = abort; cpuc->lbr_entries[i].reserved = 0; } cpuc->lbr_stack.nr = i; @@ -334,6 +361,16 @@ static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event) if (br_type & PERF_SAMPLE_BRANCH_IND_CALL) mask |= X86_BR_IND_CALL; + + if (br_type & PERF_SAMPLE_BRANCH_ABORT_TX) + mask |= X86_BR_ABORT; + + if (br_type & PERF_SAMPLE_BRANCH_IN_TX) + mask |= X86_BR_IN_TX; + + if (br_type & PERF_SAMPLE_BRANCH_NO_TX) + mask |= X86_BR_NO_TX; + /* * stash actual user request into reg, it may * be used by fixup code for some CPU @@ -408,7 +445,7 @@ int intel_pmu_setup_lbr_filter(struct perf_event *event) * decoded (e.g., text page not present), then X86_BR_NONE is * returned. */ -static int branch_type(unsigned long from, unsigned long to) +static int branch_type(unsigned long from, unsigned long to, int abort) { struct insn insn; void *addr; @@ -428,6 +465,9 @@ static int branch_type(unsigned long from, unsigned long to) if (from == 0 || to == 0) return X86_BR_NONE; + if (abort) + return X86_BR_ABORT | to_plm; + if (from_plm == X86_BR_USER) { /* * can happen if measuring at the user level only @@ -574,7 +614,13 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc) from = cpuc->lbr_entries[i].from; to = cpuc->lbr_entries[i].to; - type = branch_type(from, to); + type = branch_type(from, to, cpuc->lbr_entries[i].abort); + if (type != X86_BR_NONE && (br_sel & X86_BR_ANYTX)) { + if (cpuc->lbr_entries[i].in_tx) + type |= X86_BR_IN_TX; + else + type |= X86_BR_NO_TX; + } /* if type does not correspond, then discard */ if (type == X86_BR_NONE || (br_sel & type) != type) { diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 33e8d65836d6d8f936479ff3204dfe53633dd39c..056f93a7990f856b2daf1860b7530e57053d98f3 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -73,13 +73,18 @@ struct perf_raw_record { * * support for mispred, predicted is optional. In case it * is not supported mispred = predicted = 0. + * + * in_tx: running in a hardware transaction + * abort: aborting a hardware transaction */ struct perf_branch_entry { __u64 from; __u64 to; __u64 mispred:1, /* target mispredicted */ predicted:1,/* target predicted */ - reserved:62; + in_tx:1, /* in transaction */ + abort:1, /* transaction abort */ + reserved:60; }; /* diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index fb104e51496ed0779159541865d42933b1a5e7a2..0b1df41691e8cc2c5436ceeab4812c4753de6653 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -157,8 +157,11 @@ enum perf_branch_sample_type { PERF_SAMPLE_BRANCH_ANY_CALL = 1U << 4, /* any call branch */ PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << 5, /* any return branch */ PERF_SAMPLE_BRANCH_IND_CALL = 1U << 6, /* indirect calls */ + PERF_SAMPLE_BRANCH_ABORT_TX = 1U << 7, /* transaction aborts */ + PERF_SAMPLE_BRANCH_IN_TX = 1U << 8, /* in transaction */ + PERF_SAMPLE_BRANCH_NO_TX = 1U << 9, /* not in transaction */ - PERF_SAMPLE_BRANCH_MAX = 1U << 7, /* non-ABI */ + PERF_SAMPLE_BRANCH_MAX = 1U << 10, /* non-ABI */ }; #define PERF_SAMPLE_BRANCH_PLM_ALL \