From 59abbd1e7cfd6018fb8e58a96aa562aaff8711e7 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 10 Sep 2009 06:28:20 -0700 Subject: [PATCH] sparc64: Initial hw perf counter support. Only supports one simple counter and only UltraSPARC-IIIi chips. Signed-off-by: David S. Miller --- arch/sparc/include/asm/nmi.h | 3 + arch/sparc/include/asm/perf_counter.h | 6 + arch/sparc/kernel/Makefile | 3 + arch/sparc/kernel/nmi.c | 8 +- arch/sparc/kernel/perf_counter.c | 520 ++++++++++++++++++++++++++ 5 files changed, 538 insertions(+), 2 deletions(-) create mode 100644 arch/sparc/kernel/perf_counter.c diff --git a/arch/sparc/include/asm/nmi.h b/arch/sparc/include/asm/nmi.h index c7d11e435df9..72e6500e7ab0 100644 --- a/arch/sparc/include/asm/nmi.h +++ b/arch/sparc/include/asm/nmi.h @@ -7,4 +7,7 @@ extern void nmi_adjust_hz(unsigned int new_hz); extern atomic_t nmi_active; +extern void start_nmi_watchdog(void *unused); +extern void stop_nmi_watchdog(void *unused); + #endif /* __NMI_H */ diff --git a/arch/sparc/include/asm/perf_counter.h b/arch/sparc/include/asm/perf_counter.h index 38d644546435..5d7a8ca0e491 100644 --- a/arch/sparc/include/asm/perf_counter.h +++ b/arch/sparc/include/asm/perf_counter.h @@ -5,4 +5,10 @@ extern void set_perf_counter_pending(void); #define PERF_COUNTER_INDEX_OFFSET 0 +#ifdef CONFIG_PERF_COUNTERS +extern void init_hw_perf_counters(void); +#else +static inline void init_hw_perf_counters(void) { } +#endif + #endif diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile index b0041756c9f0..f96dc5761f74 100644 --- a/arch/sparc/kernel/Makefile +++ b/arch/sparc/kernel/Makefile @@ -103,3 +103,6 @@ obj-$(CONFIG_SUN_LDOMS) += ldc.o vio.o viohs.o ds.o obj-$(CONFIG_AUDIT) += audit.o audit--$(CONFIG_AUDIT) := compat_audit.o obj-$(CONFIG_COMPAT) += $(audit--y) + +pc--$(CONFIG_PERF_COUNTERS) := perf_counter.o +obj-$(CONFIG_SPARC64) += $(pc--y) diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c index 7de19dd30f40..391a6ed9a184 100644 --- a/arch/sparc/kernel/nmi.c +++ b/arch/sparc/kernel/nmi.c @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -156,7 +157,7 @@ static void report_broken_nmi(int cpu, int *prev_nmi_count) atomic_dec(&nmi_active); } -static void stop_nmi_watchdog(void *unused) +void stop_nmi_watchdog(void *unused) { pcr_ops->write(PCR_PIC_PRIV); __get_cpu_var(wd_enabled) = 0; @@ -210,7 +211,7 @@ static int __init check_nmi_watchdog(void) return err; } -static void start_nmi_watchdog(void *unused) +void start_nmi_watchdog(void *unused) { __get_cpu_var(wd_enabled) = 1; atomic_inc(&nmi_active); @@ -263,6 +264,9 @@ int __init nmi_init(void) atomic_set(&nmi_active, -1); } } + if (!err) + init_hw_perf_counters(); + return err; } diff --git a/arch/sparc/kernel/perf_counter.c b/arch/sparc/kernel/perf_counter.c new file mode 100644 index 000000000000..f2c781450d08 --- /dev/null +++ b/arch/sparc/kernel/perf_counter.c @@ -0,0 +1,520 @@ +/* Performance counter support for sparc64. + * + * Copyright (C) 2009 David S. Miller + * + * This code is based almost entirely upon the x86 perf counter + * code, which is: + * + * Copyright (C) 2008 Thomas Gleixner + * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar + * Copyright (C) 2009 Jaswinder Singh Rajput + * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter + * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +/* Sparc64 chips have two performance counters, 32-bits each, with + * overflow interrupts generated on transition from 0xffffffff to 0. + * The counters are accessed in one go using a 64-bit register. + * + * Both counters are controlled using a single control register. The + * only way to stop all sampling is to clear all of the context (user, + * supervisor, hypervisor) sampling enable bits. But these bits apply + * to both counters, thus the two counters can't be enabled/disabled + * individually. + * + * The control register has two event fields, one for each of the two + * counters. It's thus nearly impossible to have one counter going + * while keeping the other one stopped. Therefore it is possible to + * get overflow interrupts for counters not currently "in use" and + * that condition must be checked in the overflow interrupt handler. + * + * So we use a hack, in that we program inactive counters with the + * "sw_count0" and "sw_count1" events. These count how many times + * the instruction "sethi %hi(0xfc000), %g0" is executed. It's an + * unusual way to encode a NOP and therefore will not trigger in + * normal code. + */ + +#define MAX_HWCOUNTERS 2 +#define MAX_PERIOD ((1UL << 32) - 1) + +#define PIC_UPPER_INDEX 0 +#define PIC_LOWER_INDEX 1 + +#define PIC_UPPER_NOP 0x1c +#define PIC_LOWER_NOP 0x14 + +struct cpu_hw_counters { + struct perf_counter *counters[MAX_HWCOUNTERS]; + unsigned long used_mask[BITS_TO_LONGS(MAX_HWCOUNTERS)]; + unsigned long active_mask[BITS_TO_LONGS(MAX_HWCOUNTERS)]; + int enabled; +}; +DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = { .enabled = 1, }; + +struct perf_event_map { + u16 encoding; + u8 pic_mask; +#define PIC_NONE 0x00 +#define PIC_UPPER 0x01 +#define PIC_LOWER 0x02 +}; + +struct sparc_pmu { + const struct perf_event_map *(*event_map)(int); + int max_events; + int upper_shift; + int lower_shift; + int event_mask; +}; + +static const struct perf_event_map ultra3i_perfmon_event_map[] = { + [PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER }, + [PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER }, + [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0009, PIC_LOWER }, + [PERF_COUNT_HW_CACHE_MISSES] = { 0x0009, PIC_UPPER }, +}; + +static const struct perf_event_map *ultra3i_event_map(int event) +{ + return &ultra3i_perfmon_event_map[event]; +} + +static const struct sparc_pmu ultra3i_pmu = { + .event_map = ultra3i_event_map, + .max_events = ARRAY_SIZE(ultra3i_perfmon_event_map), + .upper_shift = 11, + .lower_shift = 4, + .event_mask = 0x3f, +}; + +static const struct sparc_pmu *sparc_pmu __read_mostly; + +static u64 event_encoding(u64 event, int idx) +{ + if (idx == PIC_UPPER_INDEX) + event <<= sparc_pmu->upper_shift; + else + event <<= sparc_pmu->lower_shift; + return event; +} + +static u64 mask_for_index(int idx) +{ + return event_encoding(sparc_pmu->event_mask, idx); +} + +static u64 nop_for_index(int idx) +{ + return event_encoding(idx == PIC_UPPER_INDEX ? + PIC_UPPER_NOP : PIC_LOWER_NOP, idx); +} + +static inline void sparc_pmu_enable_counter(struct hw_perf_counter *hwc, + int idx) +{ + u64 val, mask = mask_for_index(idx); + + val = pcr_ops->read(); + pcr_ops->write((val & ~mask) | hwc->config); +} + +static inline void sparc_pmu_disable_counter(struct hw_perf_counter *hwc, + int idx) +{ + u64 mask = mask_for_index(idx); + u64 nop = nop_for_index(idx); + u64 val = pcr_ops->read(); + + pcr_ops->write((val & ~mask) | nop); +} + +void hw_perf_enable(void) +{ + struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); + u64 val; + int i; + + if (cpuc->enabled) + return; + + cpuc->enabled = 1; + barrier(); + + val = pcr_ops->read(); + + for (i = 0; i < MAX_HWCOUNTERS; i++) { + struct perf_counter *cp = cpuc->counters[i]; + struct hw_perf_counter *hwc; + + if (!cp) + continue; + hwc = &cp->hw; + val |= hwc->config_base; + } + + pcr_ops->write(val); +} + +void hw_perf_disable(void) +{ + struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); + u64 val; + + if (!cpuc->enabled) + return; + + cpuc->enabled = 0; + + val = pcr_ops->read(); + val &= ~(PCR_UTRACE | PCR_STRACE); + pcr_ops->write(val); +} + +static u32 read_pmc(int idx) +{ + u64 val; + + read_pic(val); + if (idx == PIC_UPPER_INDEX) + val >>= 32; + + return val & 0xffffffff; +} + +static void write_pmc(int idx, u64 val) +{ + u64 shift, mask, pic; + + shift = 0; + if (idx == PIC_UPPER_INDEX) + shift = 32; + + mask = ((u64) 0xffffffff) << shift; + val <<= shift; + + read_pic(pic); + pic &= ~mask; + pic |= val; + write_pic(pic); +} + +static int sparc_perf_counter_set_period(struct perf_counter *counter, + struct hw_perf_counter *hwc, int idx) +{ + s64 left = atomic64_read(&hwc->period_left); + s64 period = hwc->sample_period; + int ret = 0; + + if (unlikely(left <= -period)) { + left = period; + atomic64_set(&hwc->period_left, left); + hwc->last_period = period; + ret = 1; + } + + if (unlikely(left <= 0)) { + left += period; + atomic64_set(&hwc->period_left, left); + hwc->last_period = period; + ret = 1; + } + if (left > MAX_PERIOD) + left = MAX_PERIOD; + + atomic64_set(&hwc->prev_count, (u64)-left); + + write_pmc(idx, (u64)(-left) & 0xffffffff); + + perf_counter_update_userpage(counter); + + return ret; +} + +static int sparc_pmu_enable(struct perf_counter *counter) +{ + struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); + struct hw_perf_counter *hwc = &counter->hw; + int idx = hwc->idx; + + if (test_and_set_bit(idx, cpuc->used_mask)) + return -EAGAIN; + + sparc_pmu_disable_counter(hwc, idx); + + cpuc->counters[idx] = counter; + set_bit(idx, cpuc->active_mask); + + sparc_perf_counter_set_period(counter, hwc, idx); + sparc_pmu_enable_counter(hwc, idx); + perf_counter_update_userpage(counter); + return 0; +} + +static u64 sparc_perf_counter_update(struct perf_counter *counter, + struct hw_perf_counter *hwc, int idx) +{ + int shift = 64 - 32; + u64 prev_raw_count, new_raw_count; + s64 delta; + +again: + prev_raw_count = atomic64_read(&hwc->prev_count); + new_raw_count = read_pmc(idx); + + if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, + new_raw_count) != prev_raw_count) + goto again; + + delta = (new_raw_count << shift) - (prev_raw_count << shift); + delta >>= shift; + + atomic64_add(delta, &counter->count); + atomic64_sub(delta, &hwc->period_left); + + return new_raw_count; +} + +static void sparc_pmu_disable(struct perf_counter *counter) +{ + struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); + struct hw_perf_counter *hwc = &counter->hw; + int idx = hwc->idx; + + clear_bit(idx, cpuc->active_mask); + sparc_pmu_disable_counter(hwc, idx); + + barrier(); + + sparc_perf_counter_update(counter, hwc, idx); + cpuc->counters[idx] = NULL; + clear_bit(idx, cpuc->used_mask); + + perf_counter_update_userpage(counter); +} + +static void sparc_pmu_read(struct perf_counter *counter) +{ + struct hw_perf_counter *hwc = &counter->hw; + sparc_perf_counter_update(counter, hwc, hwc->idx); +} + +static void sparc_pmu_unthrottle(struct perf_counter *counter) +{ + struct hw_perf_counter *hwc = &counter->hw; + sparc_pmu_enable_counter(hwc, hwc->idx); +} + +static atomic_t active_counters = ATOMIC_INIT(0); +static DEFINE_MUTEX(pmc_grab_mutex); + +void perf_counter_grab_pmc(void) +{ + if (atomic_inc_not_zero(&active_counters)) + return; + + mutex_lock(&pmc_grab_mutex); + if (atomic_read(&active_counters) == 0) { + if (atomic_read(&nmi_active) > 0) { + on_each_cpu(stop_nmi_watchdog, NULL, 1); + BUG_ON(atomic_read(&nmi_active) != 0); + } + atomic_inc(&active_counters); + } + mutex_unlock(&pmc_grab_mutex); +} + +void perf_counter_release_pmc(void) +{ + if (atomic_dec_and_mutex_lock(&active_counters, &pmc_grab_mutex)) { + if (atomic_read(&nmi_active) == 0) + on_each_cpu(start_nmi_watchdog, NULL, 1); + mutex_unlock(&pmc_grab_mutex); + } +} + +static void hw_perf_counter_destroy(struct perf_counter *counter) +{ + perf_counter_release_pmc(); +} + +static int __hw_perf_counter_init(struct perf_counter *counter) +{ + struct perf_counter_attr *attr = &counter->attr; + struct hw_perf_counter *hwc = &counter->hw; + const struct perf_event_map *pmap; + u64 enc; + + if (atomic_read(&nmi_active) < 0) + return -ENODEV; + + if (attr->type != PERF_TYPE_HARDWARE) + return -EOPNOTSUPP; + + if (attr->config >= sparc_pmu->max_events) + return -EINVAL; + + perf_counter_grab_pmc(); + counter->destroy = hw_perf_counter_destroy; + + /* We save the enable bits in the config_base. So to + * turn off sampling just write 'config', and to enable + * things write 'config | config_base'. + */ + hwc->config_base = 0; + if (!attr->exclude_user) + hwc->config_base |= PCR_UTRACE; + if (!attr->exclude_kernel) + hwc->config_base |= PCR_STRACE; + + if (!hwc->sample_period) { + hwc->sample_period = MAX_PERIOD; + hwc->last_period = hwc->sample_period; + atomic64_set(&hwc->period_left, hwc->sample_period); + } + + pmap = sparc_pmu->event_map(attr->config); + + enc = pmap->encoding; + if (pmap->pic_mask & PIC_UPPER) { + hwc->idx = PIC_UPPER_INDEX; + enc <<= sparc_pmu->upper_shift; + } else { + hwc->idx = PIC_LOWER_INDEX; + enc <<= sparc_pmu->lower_shift; + } + + hwc->config |= enc; + return 0; +} + +static const struct pmu pmu = { + .enable = sparc_pmu_enable, + .disable = sparc_pmu_disable, + .read = sparc_pmu_read, + .unthrottle = sparc_pmu_unthrottle, +}; + +const struct pmu *hw_perf_counter_init(struct perf_counter *counter) +{ + int err = __hw_perf_counter_init(counter); + + if (err) + return ERR_PTR(err); + return &pmu; +} + +void perf_counter_print_debug(void) +{ + unsigned long flags; + u64 pcr, pic; + int cpu; + + if (!sparc_pmu) + return; + + local_irq_save(flags); + + cpu = smp_processor_id(); + + pcr = pcr_ops->read(); + read_pic(pic); + + pr_info("\n"); + pr_info("CPU#%d: PCR[%016llx] PIC[%016llx]\n", + cpu, pcr, pic); + + local_irq_restore(flags); +} + +static int __kprobes perf_counter_nmi_handler(struct notifier_block *self, + unsigned long cmd, void *__args) +{ + struct die_args *args = __args; + struct perf_sample_data data; + struct cpu_hw_counters *cpuc; + struct pt_regs *regs; + int idx; + + if (!atomic_read(&active_counters)) + return NOTIFY_DONE; + + switch (cmd) { + case DIE_NMI: + break; + + default: + return NOTIFY_DONE; + } + + regs = args->regs; + + data.regs = regs; + data.addr = 0; + + cpuc = &__get_cpu_var(cpu_hw_counters); + for (idx = 0; idx < MAX_HWCOUNTERS; idx++) { + struct perf_counter *counter = cpuc->counters[idx]; + struct hw_perf_counter *hwc; + u64 val; + + if (!test_bit(idx, cpuc->active_mask)) + continue; + hwc = &counter->hw; + val = sparc_perf_counter_update(counter, hwc, idx); + if (val & (1ULL << 31)) + continue; + + data.period = counter->hw.last_period; + if (!sparc_perf_counter_set_period(counter, hwc, idx)) + continue; + + if (perf_counter_overflow(counter, 1, &data)) + sparc_pmu_disable_counter(hwc, idx); + } + + return NOTIFY_STOP; +} + +static __read_mostly struct notifier_block perf_counter_nmi_notifier = { + .notifier_call = perf_counter_nmi_handler, +}; + +static bool __init supported_pmu(void) +{ + if (!strcmp(sparc_pmu_type, "ultra3i")) { + sparc_pmu = &ultra3i_pmu; + return true; + } + return false; +} + +void __init init_hw_perf_counters(void) +{ + pr_info("Performance counters: "); + + if (!supported_pmu()) { + pr_cont("No support for PMU type '%s'\n", sparc_pmu_type); + return; + } + + pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type); + + /* All sparc64 PMUs currently have 2 counters. But this simple + * driver only supports one active counter at a time. + */ + perf_max_counters = 1; + + register_die_notifier(&perf_counter_nmi_notifier); +} -- GitLab