diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 2d9075e863a0cfc25e43b7d40e706e8016586859..93fe929d1cee20ec1dc183d5c390e5d9325b26ce 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -167,6 +167,7 @@ #define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */ #define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */ #define X86_FEATURE_PERFCTR_CORE (6*32+23) /* core performance counter extensions */ +#define X86_FEATURE_PERFCTR_NB (6*32+24) /* NB performance counter extensions */ /* * Auxiliary flags: Linux defined - For features scattered in various @@ -309,6 +310,7 @@ extern const char * const x86_power_flags[32]; #define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) #define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ) #define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE) +#define cpu_has_perfctr_nb boot_cpu_has(X86_FEATURE_PERFCTR_NB) #define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8) #define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16) #define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU) diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 2234eaaecb525c9a1cd857033beee7db17e511c5..57cb634022136f39f3c9b4eebe9390b917717d89 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -29,9 +29,14 @@ #define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23) #define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL +#define AMD64_EVENTSEL_INT_CORE_ENABLE (1ULL << 36) #define AMD64_EVENTSEL_GUESTONLY (1ULL << 40) #define AMD64_EVENTSEL_HOSTONLY (1ULL << 41) +#define AMD64_EVENTSEL_INT_CORE_SEL_SHIFT 37 +#define AMD64_EVENTSEL_INT_CORE_SEL_MASK \ + (0xFULL << AMD64_EVENTSEL_INT_CORE_SEL_SHIFT) + #define AMD64_EVENTSEL_EVENT \ (ARCH_PERFMON_EVENTSEL_EVENT | (0x0FULL << 32)) #define INTEL_ARCH_EVENT_MASK \ @@ -46,8 +51,12 @@ #define AMD64_RAW_EVENT_MASK \ (X86_RAW_EVENT_MASK | \ AMD64_EVENTSEL_EVENT) +#define AMD64_RAW_EVENT_MASK_NB \ + (AMD64_EVENTSEL_EVENT | \ + ARCH_PERFMON_EVENTSEL_UMASK) #define AMD64_NUM_COUNTERS 4 #define AMD64_NUM_COUNTERS_CORE 6 +#define AMD64_NUM_COUNTERS_NB 4 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index 433a59fb1a7411b97cd0db54d30a3bc727a4aeea..075a402555912541fe0b502c6c6b877812a5b089 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h @@ -194,6 +194,8 @@ /* Fam 15h MSRs */ #define MSR_F15H_PERF_CTL 0xc0010200 #define MSR_F15H_PERF_CTR 0xc0010201 +#define MSR_F15H_NB_PERF_CTL 0xc0010240 +#define MSR_F15H_NB_PERF_CTR 0xc0010241 /* Fam 10h MSRs */ #define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058 diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 05462f0432d50d17e6658d9e59d1a973b5021310..dfdab42aed27372503d5fe33cd29a4ef97ae580b 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -132,11 +132,14 @@ static u64 amd_pmu_event_map(int hw_event) return amd_perfmon_event_map[hw_event]; } +static struct event_constraint *amd_nb_event_constraint; + /* * Previously calculated offsets */ static unsigned int event_offsets[X86_PMC_IDX_MAX] __read_mostly; static unsigned int count_offsets[X86_PMC_IDX_MAX] __read_mostly; +static unsigned int rdpmc_indexes[X86_PMC_IDX_MAX] __read_mostly; /* * Legacy CPUs: @@ -144,10 +147,14 @@ static unsigned int count_offsets[X86_PMC_IDX_MAX] __read_mostly; * * CPUs with core performance counter extensions: * 6 counters starting at 0xc0010200 each offset by 2 + * + * CPUs with north bridge performance counter extensions: + * 4 additional counters starting at 0xc0010240 each offset by 2 + * (indexed right above either one of the above core counters) */ static inline int amd_pmu_addr_offset(int index, bool eventsel) { - int offset; + int offset, first, base; if (!index) return index; @@ -160,7 +167,23 @@ static inline int amd_pmu_addr_offset(int index, bool eventsel) if (offset) return offset; - if (!cpu_has_perfctr_core) + if (amd_nb_event_constraint && + test_bit(index, amd_nb_event_constraint->idxmsk)) { + /* + * calculate the offset of NB counters with respect to + * base eventsel or perfctr + */ + + first = find_first_bit(amd_nb_event_constraint->idxmsk, + X86_PMC_IDX_MAX); + + if (eventsel) + base = MSR_F15H_NB_PERF_CTL - x86_pmu.eventsel; + else + base = MSR_F15H_NB_PERF_CTR - x86_pmu.perfctr; + + offset = base + ((index - first) << 1); + } else if (!cpu_has_perfctr_core) offset = index; else offset = index << 1; @@ -175,24 +198,36 @@ static inline int amd_pmu_addr_offset(int index, bool eventsel) static inline int amd_pmu_rdpmc_index(int index) { - return index; -} + int ret, first; -static int amd_pmu_hw_config(struct perf_event *event) -{ - int ret; + if (!index) + return index; - /* pass precise event sampling to ibs: */ - if (event->attr.precise_ip && get_ibs_caps()) - return -ENOENT; + ret = rdpmc_indexes[index]; - ret = x86_pmu_hw_config(event); if (ret) return ret; - if (has_branch_stack(event)) - return -EOPNOTSUPP; + if (amd_nb_event_constraint && + test_bit(index, amd_nb_event_constraint->idxmsk)) { + /* + * according to the mnual, ECX value of the NB counters is + * the index of the NB counter (0, 1, 2 or 3) plus 6 + */ + + first = find_first_bit(amd_nb_event_constraint->idxmsk, + X86_PMC_IDX_MAX); + ret = index - first + 6; + } else + ret = index; + + rdpmc_indexes[index] = ret; + return ret; +} + +static int amd_core_hw_config(struct perf_event *event) +{ if (event->attr.exclude_host && event->attr.exclude_guest) /* * When HO == GO == 1 the hardware treats that as GO == HO == 0 @@ -206,10 +241,33 @@ static int amd_pmu_hw_config(struct perf_event *event) else if (event->attr.exclude_guest) event->hw.config |= AMD64_EVENTSEL_HOSTONLY; - if (event->attr.type != PERF_TYPE_RAW) - return 0; + return 0; +} - event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK; +/* + * NB counters do not support the following event select bits: + * Host/Guest only + * Counter mask + * Invert counter mask + * Edge detect + * OS/User mode + */ +static int amd_nb_hw_config(struct perf_event *event) +{ + /* for NB, we only allow system wide counting mode */ + if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) + return -EINVAL; + + if (event->attr.exclude_user || event->attr.exclude_kernel || + event->attr.exclude_host || event->attr.exclude_guest) + return -EINVAL; + + event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR | + ARCH_PERFMON_EVENTSEL_OS); + + if (event->hw.config & ~(AMD64_RAW_EVENT_MASK_NB | + ARCH_PERFMON_EVENTSEL_INT)) + return -EINVAL; return 0; } @@ -227,6 +285,11 @@ static inline int amd_is_nb_event(struct hw_perf_event *hwc) return (hwc->config & 0xe0) == 0xe0; } +static inline int amd_is_perfctr_nb_event(struct hw_perf_event *hwc) +{ + return amd_nb_event_constraint && amd_is_nb_event(hwc); +} + static inline int amd_has_nb(struct cpu_hw_events *cpuc) { struct amd_nb *nb = cpuc->amd_nb; @@ -234,6 +297,30 @@ static inline int amd_has_nb(struct cpu_hw_events *cpuc) return nb && nb->nb_id != -1; } +static int amd_pmu_hw_config(struct perf_event *event) +{ + int ret; + + /* pass precise event sampling to ibs: */ + if (event->attr.precise_ip && get_ibs_caps()) + return -ENOENT; + + if (has_branch_stack(event)) + return -EOPNOTSUPP; + + ret = x86_pmu_hw_config(event); + if (ret) + return ret; + + if (event->attr.type == PERF_TYPE_RAW) + event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK; + + if (amd_is_perfctr_nb_event(&event->hw)) + return amd_nb_hw_config(event); + + return amd_core_hw_config(event); +} + static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) { @@ -254,6 +341,19 @@ static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc, } } +static void amd_nb_interrupt_hw_config(struct hw_perf_event *hwc) +{ + int core_id = cpu_data(smp_processor_id()).cpu_core_id; + + /* deliver interrupts only to this core */ + if (hwc->config & ARCH_PERFMON_EVENTSEL_INT) { + hwc->config |= AMD64_EVENTSEL_INT_CORE_ENABLE; + hwc->config &= ~AMD64_EVENTSEL_INT_CORE_SEL_MASK; + hwc->config |= (u64)(core_id) << + AMD64_EVENTSEL_INT_CORE_SEL_SHIFT; + } +} + /* * AMD64 NorthBridge events need special treatment because * counter access needs to be synchronized across all cores @@ -299,6 +399,12 @@ __amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *ev struct perf_event *old; int idx, new = -1; + if (!c) + c = &unconstrained; + + if (cpuc->is_fake) + return c; + /* * detect if already present, if so reuse * @@ -335,6 +441,9 @@ __amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *ev if (new == -1) return &emptyconstraint; + if (amd_is_perfctr_nb_event(hwc)) + amd_nb_interrupt_hw_config(hwc); + return &nb->event_constraints[new]; } @@ -434,7 +543,8 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) if (!(amd_has_nb(cpuc) && amd_is_nb_event(&event->hw))) return &unconstrained; - return __amd_get_nb_event_constraints(cpuc, event, &unconstrained); + return __amd_get_nb_event_constraints(cpuc, event, + amd_nb_event_constraint); } static void amd_put_event_constraints(struct cpu_hw_events *cpuc, @@ -533,6 +643,9 @@ static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0); static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0); +static struct event_constraint amd_NBPMC96 = EVENT_CONSTRAINT(0, 0x3C0, 0); +static struct event_constraint amd_NBPMC74 = EVENT_CONSTRAINT(0, 0xF0, 0); + static struct event_constraint * amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event) { @@ -598,8 +711,8 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *ev return &amd_f15_PMC20; } case AMD_EVENT_NB: - /* not yet implemented */ - return &emptyconstraint; + return __amd_get_nb_event_constraints(cpuc, event, + amd_nb_event_constraint); default: return &emptyconstraint; } @@ -647,7 +760,7 @@ static __initconst const struct x86_pmu amd_pmu = { static int setup_event_constraints(void) { - if (boot_cpu_data.x86 >= 0x15) + if (boot_cpu_data.x86 == 0x15) x86_pmu.get_event_constraints = amd_get_event_constraints_f15h; return 0; } @@ -677,6 +790,23 @@ static int setup_perfctr_core(void) return 0; } +static int setup_perfctr_nb(void) +{ + if (!cpu_has_perfctr_nb) + return -ENODEV; + + x86_pmu.num_counters += AMD64_NUM_COUNTERS_NB; + + if (cpu_has_perfctr_core) + amd_nb_event_constraint = &amd_NBPMC96; + else + amd_nb_event_constraint = &amd_NBPMC74; + + printk(KERN_INFO "perf: AMD northbridge performance counters detected\n"); + + return 0; +} + __init int amd_pmu_init(void) { /* Performance-monitoring supported from K7 and later: */ @@ -687,6 +817,7 @@ __init int amd_pmu_init(void) setup_event_constraints(); setup_perfctr_core(); + setup_perfctr_nb(); /* Events are common for all AMDs */ memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,