diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index a3aa67b4fd506b8dac78a57262b7763ede3ca347..d799628016c872b581fb60786515d44bcda9ca4e 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -620,7 +620,7 @@ static inline u64 amd_pmu_get_global_status(void) /* PerfCntrGlobalStatus is read-only */ rdmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS, status); - return status & amd_pmu_global_cntr_mask; + return status; } static inline void amd_pmu_ack_global_status(u64 status) @@ -631,8 +631,6 @@ static inline void amd_pmu_ack_global_status(u64 status) * clears the same bit in PerfCntrGlobalStatus */ - /* Only allow modifications to PerfCntrGlobalStatus.PerfCntrOvfl */ - status &= amd_pmu_global_cntr_mask; wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, status); } @@ -742,11 +740,17 @@ static void amd_pmu_v2_enable_event(struct perf_event *event) __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); } -static void amd_pmu_v2_enable_all(int added) +static __always_inline void amd_pmu_core_enable_all(void) { amd_pmu_set_global_ctl(amd_pmu_global_cntr_mask); } +static void amd_pmu_v2_enable_all(int added) +{ + amd_pmu_lbr_enable_all(); + amd_pmu_core_enable_all(); +} + static void amd_pmu_disable_event(struct perf_event *event) { x86_pmu_disable_event(event); @@ -771,10 +775,15 @@ static void amd_pmu_disable_all(void) amd_pmu_check_overflow(); } -static void amd_pmu_v2_disable_all(void) +static __always_inline void amd_pmu_core_disable_all(void) { - /* Disable all PMCs */ amd_pmu_set_global_ctl(0); +} + +static void amd_pmu_v2_disable_all(void) +{ + amd_pmu_core_disable_all(); + amd_pmu_lbr_disable_all(); amd_pmu_check_overflow(); } @@ -877,8 +886,8 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs) pmu_enabled = cpuc->enabled; cpuc->enabled = 0; - /* Stop counting */ - amd_pmu_v2_disable_all(); + /* Stop counting but do not disable LBR */ + amd_pmu_core_disable_all(); status = amd_pmu_get_global_status(); @@ -886,6 +895,12 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs) if (!status) goto done; + /* Read branch records before unfreezing */ + if (status & GLOBAL_STATUS_LBRS_FROZEN) { + amd_pmu_lbr_read(); + status &= ~GLOBAL_STATUS_LBRS_FROZEN; + } + for (idx = 0; idx < x86_pmu.num_counters; idx++) { if (!test_bit(idx, cpuc->active_mask)) continue; @@ -905,6 +920,9 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs) if (!x86_perf_event_set_period(event)) continue; + if (has_branch_stack(event)) + data.br_stack = &cpuc->lbr_stack; + if (perf_event_overflow(event, &data, regs)) x86_pmu_stop(event, 0); @@ -918,7 +936,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs) */ WARN_ON(status > 0); - /* Clear overflow bits */ + /* Clear overflow and freeze bits */ amd_pmu_ack_global_status(~status); /* @@ -932,7 +950,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs) /* Resume counting only if PMU is active */ if (pmu_enabled) - amd_pmu_v2_enable_all(0); + amd_pmu_core_enable_all(); return amd_pmu_adjust_nmi_window(handled); } @@ -1375,7 +1393,14 @@ static int __init amd_core_pmu_init(void) } /* LBR and BRS are mutually exclusive features */ - if (amd_pmu_lbr_init() && !amd_brs_init()) { + if (!amd_pmu_lbr_init()) { + /* LBR requires flushing on context switch */ + x86_pmu.sched_task = amd_pmu_lbr_sched_task; + static_call_update(amd_pmu_branch_hw_config, amd_pmu_lbr_hw_config); + static_call_update(amd_pmu_branch_reset, amd_pmu_lbr_reset); + static_call_update(amd_pmu_branch_add, amd_pmu_lbr_add); + static_call_update(amd_pmu_branch_del, amd_pmu_lbr_del); + } else if (!amd_brs_init()) { /* * BRS requires special event constraints and flushing on ctxsw. */ diff --git a/arch/x86/events/amd/lbr.c b/arch/x86/events/amd/lbr.c index 4e5b5d35f35a69b3c0dd6490e945f708c4b552e8..1dea66f332ae6857c8d8936f1853a7abfb7e925a 100644 --- a/arch/x86/events/amd/lbr.c +++ b/arch/x86/events/amd/lbr.c @@ -4,6 +4,209 @@ #include "../perf_event.h" +struct branch_entry { + union { + struct { + u64 ip:58; + u64 ip_sign_ext:5; + u64 mispredict:1; + } split; + u64 full; + } from; + + union { + struct { + u64 ip:58; + u64 ip_sign_ext:3; + u64 reserved:1; + u64 spec:1; + u64 valid:1; + } split; + u64 full; + } to; +}; + +static __always_inline void amd_pmu_lbr_set_from(unsigned int idx, u64 val) +{ + wrmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2, val); +} + +static __always_inline void amd_pmu_lbr_set_to(unsigned int idx, u64 val) +{ + wrmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val); +} + +static __always_inline u64 amd_pmu_lbr_get_from(unsigned int idx) +{ + u64 val; + + rdmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2, val); + + return val; +} + +static __always_inline u64 amd_pmu_lbr_get_to(unsigned int idx) +{ + u64 val; + + rdmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val); + + return val; +} + +static __always_inline u64 sign_ext_branch_ip(u64 ip) +{ + u32 shift = 64 - boot_cpu_data.x86_virt_bits; + + return (u64)(((s64)ip << shift) >> shift); +} + +void amd_pmu_lbr_read(void) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct perf_branch_entry *br = cpuc->lbr_entries; + struct branch_entry entry; + int out = 0, i; + + if (!cpuc->lbr_users) + return; + + for (i = 0; i < x86_pmu.lbr_nr; i++) { + entry.from.full = amd_pmu_lbr_get_from(i); + entry.to.full = amd_pmu_lbr_get_to(i); + + /* Check if a branch has been logged */ + if (!entry.to.split.valid) + continue; + + perf_clear_branch_entry_bitfields(br + out); + + br[out].from = sign_ext_branch_ip(entry.from.split.ip); + br[out].to = sign_ext_branch_ip(entry.to.split.ip); + br[out].mispred = entry.from.split.mispredict; + br[out].predicted = !br[out].mispred; + out++; + } + + cpuc->lbr_stack.nr = out; + + /* + * Internal register renaming always ensures that LBR From[0] and + * LBR To[0] always represent the TOS + */ + cpuc->lbr_stack.hw_idx = 0; +} + +static int amd_pmu_lbr_setup_filter(struct perf_event *event) +{ + /* No LBR support */ + if (!x86_pmu.lbr_nr) + return -EOPNOTSUPP; + + return 0; +} + +int amd_pmu_lbr_hw_config(struct perf_event *event) +{ + int ret = 0; + + /* LBR is not recommended in counting mode */ + if (!is_sampling_event(event)) + return -EINVAL; + + ret = amd_pmu_lbr_setup_filter(event); + if (!ret) + event->attach_state |= PERF_ATTACH_SCHED_CB; + + return ret; +} + +void amd_pmu_lbr_reset(void) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + int i; + + if (!x86_pmu.lbr_nr) + return; + + /* Reset all branch records individually */ + for (i = 0; i < x86_pmu.lbr_nr; i++) { + amd_pmu_lbr_set_from(i, 0); + amd_pmu_lbr_set_to(i, 0); + } + + cpuc->last_task_ctx = NULL; + cpuc->last_log_id = 0; +} + +void amd_pmu_lbr_add(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + if (!x86_pmu.lbr_nr) + return; + + perf_sched_cb_inc(event->ctx->pmu); + + if (!cpuc->lbr_users++ && !event->total_time_running) + amd_pmu_lbr_reset(); +} + +void amd_pmu_lbr_del(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + if (!x86_pmu.lbr_nr) + return; + + cpuc->lbr_users--; + WARN_ON_ONCE(cpuc->lbr_users < 0); + perf_sched_cb_dec(event->ctx->pmu); +} + +void amd_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + /* + * A context switch can flip the address space and LBR entries are + * not tagged with an identifier. Hence, branches cannot be resolved + * from the old address space and the LBR records should be wiped. + */ + if (cpuc->lbr_users && sched_in) + amd_pmu_lbr_reset(); +} + +void amd_pmu_lbr_enable_all(void) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + u64 dbg_ctl, dbg_extn_cfg; + + if (!cpuc->lbr_users || !x86_pmu.lbr_nr) + return; + + rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); + rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); + + wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); + wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN); +} + +void amd_pmu_lbr_disable_all(void) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + u64 dbg_ctl, dbg_extn_cfg; + + if (!cpuc->lbr_users || !x86_pmu.lbr_nr) + return; + + rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); + rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); + + wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN); + wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); +} + __init int amd_pmu_lbr_init(void) { union cpuid_0x80000022_ebx ebx; diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 82e8a6d87adefdbbc0a65470c956c35cf95b2ab5..e8930414cba5cb94a78441c31b9660ae4b80d222 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -1233,6 +1233,14 @@ static inline bool fixed_counter_disabled(int i, struct pmu *pmu) int amd_pmu_init(void); int amd_pmu_lbr_init(void); +void amd_pmu_lbr_reset(void); +void amd_pmu_lbr_read(void); +void amd_pmu_lbr_add(struct perf_event *event); +void amd_pmu_lbr_del(struct perf_event *event); +void amd_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in); +void amd_pmu_lbr_enable_all(void); +void amd_pmu_lbr_disable_all(void); +int amd_pmu_lbr_hw_config(struct perf_event *event); #ifdef CONFIG_PERF_EVENTS_AMD_BRS diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 6674bdb096f346d940e353e3c4df7491fd5e0779..109c404e6137fca409e7764c26cf1e9827bbed4a 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -585,6 +585,9 @@ #define MSR_AMD64_PERF_CNTR_GLOBAL_CTL 0xc0000301 #define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR 0xc0000302 +/* AMD Last Branch Record MSRs */ +#define MSR_AMD64_LBR_SELECT 0xc000010e + /* Fam 17h MSRs */ #define MSR_F17H_IRPERF 0xc00000e9 @@ -756,6 +759,8 @@ #define MSR_AMD_DBG_EXTN_CFG 0xc000010f #define MSR_AMD_SAMP_BR_FROM 0xc0010300 +#define DBG_EXTN_CFG_LBRV2EN BIT_ULL(6) + #define MSR_IA32_MPERF 0x000000e7 #define MSR_IA32_APERF 0x000000e8