提交 5d70f79b 编写于 作者: L Linus Torvalds

Merge branch 'perf-core-for-linus' of...

Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (163 commits)
  tracing: Fix compile issue for trace_sched_wakeup.c
  [S390] hardirq: remove pointless header file includes
  [IA64] Move local_softirq_pending() definition
  perf, powerpc: Fix power_pmu_event_init to not use event->ctx
  ftrace: Remove recursion between recordmcount and scripts/mod/empty
  jump_label: Add COND_STMT(), reducer wrappery
  perf: Optimize sw events
  perf: Use jump_labels to optimize the scheduler hooks
  jump_label: Add atomic_t interface
  jump_label: Use more consistent naming
  perf, hw_breakpoint: Fix crash in hw_breakpoint creation
  perf: Find task before event alloc
  perf: Fix task refcount bugs
  perf: Fix group moving
  irq_work: Add generic hardirq context callbacks
  perf_events: Fix transaction recovery in group_sched_in()
  perf_events: Fix bogus AMD64 generic TLB events
  perf_events: Fix bogus context time tracking
  tracing: Remove parent recording in latency tracer graph options
  tracing: Use one prologue for the preempt irqs off tracer function tracers
  ...
......@@ -542,9 +542,11 @@ Kprobes does not use mutexes or allocate memory except during
registration and unregistration.
Probe handlers are run with preemption disabled. Depending on the
architecture, handlers may also run with interrupts disabled. In any
case, your handler should not yield the CPU (e.g., by attempting to
acquire a semaphore).
architecture and optimization state, handlers may also run with
interrupts disabled (e.g., kretprobe handlers and optimized kprobe
handlers run without interrupt disabled on x86/x86-64). In any case,
your handler should not yield the CPU (e.g., by attempting to acquire
a semaphore).
Since a return probe is implemented by replacing the return
address with the trampoline's address, stack backtraces and calls
......
......@@ -568,6 +568,12 @@ endif
ifdef CONFIG_FUNCTION_TRACER
KBUILD_CFLAGS += -pg
ifdef CONFIG_DYNAMIC_FTRACE
ifdef CONFIG_HAVE_C_RECORDMCOUNT
BUILD_C_RECORDMCOUNT := y
export BUILD_C_RECORDMCOUNT
endif
endif
endif
# We trigger additional mismatches with less inlining
......@@ -591,6 +597,11 @@ KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow)
# conserve stack if available
KBUILD_CFLAGS += $(call cc-option,-fconserve-stack)
# check for 'asm goto'
ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC)), y)
KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO
endif
# Add user supplied CPPFLAGS, AFLAGS and CFLAGS as the last assignments
# But warn user when we do so
warn-assign = \
......
......@@ -158,4 +158,7 @@ config HAVE_PERF_EVENTS_NMI
subsystem. Also has support for calculating CPU cycle events
to determine how many clock cycles in a given period.
config HAVE_ARCH_JUMP_LABEL
bool
source "kernel/gcov/Kconfig"
......@@ -9,6 +9,7 @@ config ALPHA
select HAVE_IDE
select HAVE_OPROFILE
select HAVE_SYSCALL_WRAPPERS
select HAVE_IRQ_WORK
select HAVE_PERF_EVENTS
select HAVE_DMA_ATTRS
help
......
#ifndef __ASM_ALPHA_PERF_EVENT_H
#define __ASM_ALPHA_PERF_EVENT_H
/* Alpha only supports software events through this interface. */
extern void set_perf_event_pending(void);
#define PERF_EVENT_INDEX_OFFSET 0
#ifdef CONFIG_PERF_EVENTS
extern void init_hw_perf_events(void);
#else
......
......@@ -307,7 +307,7 @@ static unsigned long alpha_perf_event_update(struct perf_event *event,
new_raw_count) != prev_raw_count)
goto again;
delta = (new_raw_count - (prev_raw_count & alpha_pmu->pmc_count_mask[idx])) + ovf;
delta = (new_raw_count - (prev_raw_count & alpha_pmu->pmc_count_mask[idx])) + ovf;
/* It is possible on very rare occasions that the PMC has overflowed
* but the interrupt is yet to come. Detect and fix this situation.
......@@ -402,14 +402,13 @@ static void maybe_change_configuration(struct cpu_hw_events *cpuc)
struct hw_perf_event *hwc = &pe->hw;
int idx = hwc->idx;
if (cpuc->current_idx[j] != PMC_NO_INDEX) {
cpuc->idx_mask |= (1<<cpuc->current_idx[j]);
continue;
if (cpuc->current_idx[j] == PMC_NO_INDEX) {
alpha_perf_event_set_period(pe, hwc, idx);
cpuc->current_idx[j] = idx;
}
alpha_perf_event_set_period(pe, hwc, idx);
cpuc->current_idx[j] = idx;
cpuc->idx_mask |= (1<<cpuc->current_idx[j]);
if (!(hwc->state & PERF_HES_STOPPED))
cpuc->idx_mask |= (1<<cpuc->current_idx[j]);
}
cpuc->config = cpuc->event[0]->hw.config_base;
}
......@@ -420,12 +419,13 @@ static void maybe_change_configuration(struct cpu_hw_events *cpuc)
* - this function is called from outside this module via the pmu struct
* returned from perf event initialisation.
*/
static int alpha_pmu_enable(struct perf_event *event)
static int alpha_pmu_add(struct perf_event *event, int flags)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
int n0;
int ret;
unsigned long flags;
unsigned long irq_flags;
/*
* The Sparc code has the IRQ disable first followed by the perf
......@@ -435,8 +435,8 @@ static int alpha_pmu_enable(struct perf_event *event)
* nevertheless we disable the PMCs first to enable a potential
* final PMI to occur before we disable interrupts.
*/
perf_disable();
local_irq_save(flags);
perf_pmu_disable(event->pmu);
local_irq_save(irq_flags);
/* Default to error to be returned */
ret = -EAGAIN;
......@@ -455,8 +455,12 @@ static int alpha_pmu_enable(struct perf_event *event)
}
}
local_irq_restore(flags);
perf_enable();
hwc->state = PERF_HES_UPTODATE;
if (!(flags & PERF_EF_START))
hwc->state |= PERF_HES_STOPPED;
local_irq_restore(irq_flags);
perf_pmu_enable(event->pmu);
return ret;
}
......@@ -467,15 +471,15 @@ static int alpha_pmu_enable(struct perf_event *event)
* - this function is called from outside this module via the pmu struct
* returned from perf event initialisation.
*/
static void alpha_pmu_disable(struct perf_event *event)
static void alpha_pmu_del(struct perf_event *event, int flags)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
unsigned long flags;
unsigned long irq_flags;
int j;
perf_disable();
local_irq_save(flags);
perf_pmu_disable(event->pmu);
local_irq_save(irq_flags);
for (j = 0; j < cpuc->n_events; j++) {
if (event == cpuc->event[j]) {
......@@ -501,8 +505,8 @@ static void alpha_pmu_disable(struct perf_event *event)
}
}
local_irq_restore(flags);
perf_enable();
local_irq_restore(irq_flags);
perf_pmu_enable(event->pmu);
}
......@@ -514,13 +518,44 @@ static void alpha_pmu_read(struct perf_event *event)
}
static void alpha_pmu_unthrottle(struct perf_event *event)
static void alpha_pmu_stop(struct perf_event *event, int flags)
{
struct hw_perf_event *hwc = &event->hw;
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
if (!(hwc->state & PERF_HES_STOPPED)) {
cpuc->idx_mask &= ~(1UL<<hwc->idx);
hwc->state |= PERF_HES_STOPPED;
}
if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
alpha_perf_event_update(event, hwc, hwc->idx, 0);
hwc->state |= PERF_HES_UPTODATE;
}
if (cpuc->enabled)
wrperfmon(PERFMON_CMD_DISABLE, (1UL<<hwc->idx));
}
static void alpha_pmu_start(struct perf_event *event, int flags)
{
struct hw_perf_event *hwc = &event->hw;
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
return;
if (flags & PERF_EF_RELOAD) {
WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
alpha_perf_event_set_period(event, hwc, hwc->idx);
}
hwc->state = 0;
cpuc->idx_mask |= 1UL<<hwc->idx;
wrperfmon(PERFMON_CMD_ENABLE, (1UL<<hwc->idx));
if (cpuc->enabled)
wrperfmon(PERFMON_CMD_ENABLE, (1UL<<hwc->idx));
}
......@@ -642,39 +677,36 @@ static int __hw_perf_event_init(struct perf_event *event)
return 0;
}
static const struct pmu pmu = {
.enable = alpha_pmu_enable,
.disable = alpha_pmu_disable,
.read = alpha_pmu_read,
.unthrottle = alpha_pmu_unthrottle,
};
/*
* Main entry point to initialise a HW performance event.
*/
const struct pmu *hw_perf_event_init(struct perf_event *event)
static int alpha_pmu_event_init(struct perf_event *event)
{
int err;
switch (event->attr.type) {
case PERF_TYPE_RAW:
case PERF_TYPE_HARDWARE:
case PERF_TYPE_HW_CACHE:
break;
default:
return -ENOENT;
}
if (!alpha_pmu)
return ERR_PTR(-ENODEV);
return -ENODEV;
/* Do the real initialisation work. */
err = __hw_perf_event_init(event);
if (err)
return ERR_PTR(err);
return &pmu;
return err;
}
/*
* Main entry point - enable HW performance counters.
*/
void hw_perf_enable(void)
static void alpha_pmu_enable(struct pmu *pmu)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
......@@ -700,7 +732,7 @@ void hw_perf_enable(void)
* Main entry point - disable HW performance counters.
*/
void hw_perf_disable(void)
static void alpha_pmu_disable(struct pmu *pmu)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
......@@ -713,6 +745,17 @@ void hw_perf_disable(void)
wrperfmon(PERFMON_CMD_DISABLE, cpuc->idx_mask);
}
static struct pmu pmu = {
.pmu_enable = alpha_pmu_enable,
.pmu_disable = alpha_pmu_disable,
.event_init = alpha_pmu_event_init,
.add = alpha_pmu_add,
.del = alpha_pmu_del,
.start = alpha_pmu_start,
.stop = alpha_pmu_stop,
.read = alpha_pmu_read,
};
/*
* Main entry point - don't know when this is called but it
......@@ -766,7 +809,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr,
wrperfmon(PERFMON_CMD_DISABLE, cpuc->idx_mask);
/* la_ptr is the counter that overflowed. */
if (unlikely(la_ptr >= perf_max_events)) {
if (unlikely(la_ptr >= alpha_pmu->num_pmcs)) {
/* This should never occur! */
irq_err_count++;
pr_warning("PMI: silly index %ld\n", la_ptr);
......@@ -807,7 +850,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr,
/* Interrupts coming too quickly; "throttle" the
* counter, i.e., disable it for a little while.
*/
cpuc->idx_mask &= ~(1UL<<idx);
alpha_pmu_stop(event, 0);
}
}
wrperfmon(PERFMON_CMD_ENABLE, cpuc->idx_mask);
......@@ -837,6 +880,7 @@ void __init init_hw_perf_events(void)
/* And set up PMU specification */
alpha_pmu = &ev67_pmu;
perf_max_events = alpha_pmu->num_pmcs;
perf_pmu_register(&pmu);
}
......@@ -41,7 +41,7 @@
#include <linux/init.h>
#include <linux/bcd.h>
#include <linux/profile.h>
#include <linux/perf_event.h>
#include <linux/irq_work.h>
#include <asm/uaccess.h>
#include <asm/io.h>
......@@ -83,25 +83,25 @@ static struct {
unsigned long est_cycle_freq;
#ifdef CONFIG_PERF_EVENTS
#ifdef CONFIG_IRQ_WORK
DEFINE_PER_CPU(u8, perf_event_pending);
DEFINE_PER_CPU(u8, irq_work_pending);
#define set_perf_event_pending_flag() __get_cpu_var(perf_event_pending) = 1
#define test_perf_event_pending() __get_cpu_var(perf_event_pending)
#define clear_perf_event_pending() __get_cpu_var(perf_event_pending) = 0
#define set_irq_work_pending_flag() __get_cpu_var(irq_work_pending) = 1
#define test_irq_work_pending() __get_cpu_var(irq_work_pending)
#define clear_irq_work_pending() __get_cpu_var(irq_work_pending) = 0
void set_perf_event_pending(void)
void set_irq_work_pending(void)
{
set_perf_event_pending_flag();
set_irq_work_pending_flag();
}
#else /* CONFIG_PERF_EVENTS */
#else /* CONFIG_IRQ_WORK */
#define test_perf_event_pending() 0
#define clear_perf_event_pending()
#define test_irq_work_pending() 0
#define clear_irq_work_pending()
#endif /* CONFIG_PERF_EVENTS */
#endif /* CONFIG_IRQ_WORK */
static inline __u32 rpcc(void)
......@@ -191,9 +191,9 @@ irqreturn_t timer_interrupt(int irq, void *dev)
write_sequnlock(&xtime_lock);
if (test_perf_event_pending()) {
clear_perf_event_pending();
perf_event_do_pending();
if (test_irq_work_pending()) {
clear_irq_work_pending();
irq_work_run();
}
#ifndef CONFIG_SMP
......
......@@ -23,6 +23,7 @@ config ARM
select HAVE_KERNEL_GZIP
select HAVE_KERNEL_LZO
select HAVE_KERNEL_LZMA
select HAVE_IRQ_WORK
select HAVE_PERF_EVENTS
select PERF_USE_VMALLOC
select HAVE_REGS_AND_STACK_ACCESS_API
......
......@@ -12,18 +12,6 @@
#ifndef __ARM_PERF_EVENT_H__
#define __ARM_PERF_EVENT_H__
/*
* NOP: on *most* (read: all supported) ARM platforms, the performance
* counter interrupts are regular interrupts and not an NMI. This
* means that when we receive the interrupt we can call
* perf_event_do_pending() that handles all of the work with
* interrupts disabled.
*/
static inline void
set_perf_event_pending(void)
{
}
/* ARM performance counters start from 1 (in the cp15 accesses) so use the
* same indexes here for consistency. */
#define PERF_EVENT_INDEX_OFFSET 1
......
......@@ -123,6 +123,12 @@ armpmu_get_max_events(void)
}
EXPORT_SYMBOL_GPL(armpmu_get_max_events);
int perf_num_counters(void)
{
return armpmu_get_max_events();
}
EXPORT_SYMBOL_GPL(perf_num_counters);
#define HW_OP_UNSUPPORTED 0xFFFF
#define C(_x) \
......@@ -221,46 +227,56 @@ armpmu_event_update(struct perf_event *event,
}
static void
armpmu_disable(struct perf_event *event)
armpmu_read(struct perf_event *event)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
WARN_ON(idx < 0);
clear_bit(idx, cpuc->active_mask);
armpmu->disable(hwc, idx);
barrier();
armpmu_event_update(event, hwc, idx);
cpuc->events[idx] = NULL;
clear_bit(idx, cpuc->used_mask);
/* Don't read disabled counters! */
if (hwc->idx < 0)
return;
perf_event_update_userpage(event);
armpmu_event_update(event, hwc, hwc->idx);
}
static void
armpmu_read(struct perf_event *event)
armpmu_stop(struct perf_event *event, int flags)
{
struct hw_perf_event *hwc = &event->hw;
/* Don't read disabled counters! */
if (hwc->idx < 0)
if (!armpmu)
return;
armpmu_event_update(event, hwc, hwc->idx);
/*
* ARM pmu always has to update the counter, so ignore
* PERF_EF_UPDATE, see comments in armpmu_start().
*/
if (!(hwc->state & PERF_HES_STOPPED)) {
armpmu->disable(hwc, hwc->idx);
barrier(); /* why? */
armpmu_event_update(event, hwc, hwc->idx);
hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
}
}
static void
armpmu_unthrottle(struct perf_event *event)
armpmu_start(struct perf_event *event, int flags)
{
struct hw_perf_event *hwc = &event->hw;
if (!armpmu)
return;
/*
* ARM pmu always has to reprogram the period, so ignore
* PERF_EF_RELOAD, see the comment below.
*/
if (flags & PERF_EF_RELOAD)
WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
hwc->state = 0;
/*
* Set the period again. Some counters can't be stopped, so when we
* were throttled we simply disabled the IRQ source and the counter
* were stopped we simply disabled the IRQ source and the counter
* may have been left counting. If we don't do this step then we may
* get an interrupt too soon or *way* too late if the overflow has
* happened since disabling.
......@@ -269,14 +285,33 @@ armpmu_unthrottle(struct perf_event *event)
armpmu->enable(hwc, hwc->idx);
}
static void
armpmu_del(struct perf_event *event, int flags)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
WARN_ON(idx < 0);
clear_bit(idx, cpuc->active_mask);
armpmu_stop(event, PERF_EF_UPDATE);
cpuc->events[idx] = NULL;
clear_bit(idx, cpuc->used_mask);
perf_event_update_userpage(event);
}
static int
armpmu_enable(struct perf_event *event)
armpmu_add(struct perf_event *event, int flags)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
int idx;
int err = 0;
perf_pmu_disable(event->pmu);
/* If we don't have a space for the counter then finish early. */
idx = armpmu->get_event_idx(cpuc, hwc);
if (idx < 0) {
......@@ -293,25 +328,19 @@ armpmu_enable(struct perf_event *event)
cpuc->events[idx] = event;
set_bit(idx, cpuc->active_mask);
/* Set the period for the event. */
armpmu_event_set_period(event, hwc, idx);
/* Enable the event. */
armpmu->enable(hwc, idx);
hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
if (flags & PERF_EF_START)
armpmu_start(event, PERF_EF_RELOAD);
/* Propagate our changes to the userspace mapping. */
perf_event_update_userpage(event);
out:
perf_pmu_enable(event->pmu);
return err;
}
static struct pmu pmu = {
.enable = armpmu_enable,
.disable = armpmu_disable,
.unthrottle = armpmu_unthrottle,
.read = armpmu_read,
};
static struct pmu pmu;
static int
validate_event(struct cpu_hw_events *cpuc,
......@@ -491,20 +520,29 @@ __hw_perf_event_init(struct perf_event *event)
return err;
}
const struct pmu *
hw_perf_event_init(struct perf_event *event)
static int armpmu_event_init(struct perf_event *event)
{
int err = 0;
switch (event->attr.type) {
case PERF_TYPE_RAW:
case PERF_TYPE_HARDWARE:
case PERF_TYPE_HW_CACHE:
break;
default:
return -ENOENT;
}
if (!armpmu)
return ERR_PTR(-ENODEV);
return -ENODEV;
event->destroy = hw_perf_event_destroy;
if (!atomic_inc_not_zero(&active_events)) {
if (atomic_read(&active_events) > perf_max_events) {
if (atomic_read(&active_events) > armpmu->num_events) {
atomic_dec(&active_events);
return ERR_PTR(-ENOSPC);
return -ENOSPC;
}
mutex_lock(&pmu_reserve_mutex);
......@@ -518,17 +556,16 @@ hw_perf_event_init(struct perf_event *event)
}
if (err)
return ERR_PTR(err);
return err;
err = __hw_perf_event_init(event);
if (err)
hw_perf_event_destroy(event);
return err ? ERR_PTR(err) : &pmu;
return err;
}
void
hw_perf_enable(void)
static void armpmu_enable(struct pmu *pmu)
{
/* Enable all of the perf events on hardware. */
int idx;
......@@ -549,13 +586,23 @@ hw_perf_enable(void)
armpmu->start();
}
void
hw_perf_disable(void)
static void armpmu_disable(struct pmu *pmu)
{
if (armpmu)
armpmu->stop();
}
static struct pmu pmu = {
.pmu_enable = armpmu_enable,
.pmu_disable = armpmu_disable,
.event_init = armpmu_event_init,
.add = armpmu_add,
.del = armpmu_del,
.start = armpmu_start,
.stop = armpmu_stop,
.read = armpmu_read,
};
/*
* ARMv6 Performance counter handling code.
*
......@@ -1045,7 +1092,7 @@ armv6pmu_handle_irq(int irq_num,
* platforms that can have the PMU interrupts raised as an NMI, this
* will not work.
*/
perf_event_do_pending();
irq_work_run();
return IRQ_HANDLED;
}
......@@ -2021,7 +2068,7 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
* platforms that can have the PMU interrupts raised as an NMI, this
* will not work.
*/
perf_event_do_pending();
irq_work_run();
return IRQ_HANDLED;
}
......@@ -2389,7 +2436,7 @@ xscale1pmu_handle_irq(int irq_num, void *dev)
armpmu->disable(hwc, idx);
}
perf_event_do_pending();
irq_work_run();
/*
* Re-enable the PMU.
......@@ -2716,7 +2763,7 @@ xscale2pmu_handle_irq(int irq_num, void *dev)
armpmu->disable(hwc, idx);
}
perf_event_do_pending();
irq_work_run();
/*
* Re-enable the PMU.
......@@ -2933,14 +2980,12 @@ init_hw_perf_events(void)
armpmu = &armv6pmu;
memcpy(armpmu_perf_cache_map, armv6_perf_cache_map,
sizeof(armv6_perf_cache_map));
perf_max_events = armv6pmu.num_events;
break;
case 0xB020: /* ARM11mpcore */
armpmu = &armv6mpcore_pmu;
memcpy(armpmu_perf_cache_map,
armv6mpcore_perf_cache_map,
sizeof(armv6mpcore_perf_cache_map));
perf_max_events = armv6mpcore_pmu.num_events;
break;
case 0xC080: /* Cortex-A8 */
armv7pmu.id = ARM_PERF_PMU_ID_CA8;
......@@ -2952,7 +2997,6 @@ init_hw_perf_events(void)
/* Reset PMNC and read the nb of CNTx counters
supported */
armv7pmu.num_events = armv7_reset_read_pmnc();
perf_max_events = armv7pmu.num_events;
break;
case 0xC090: /* Cortex-A9 */
armv7pmu.id = ARM_PERF_PMU_ID_CA9;
......@@ -2964,7 +3008,6 @@ init_hw_perf_events(void)
/* Reset PMNC and read the nb of CNTx counters
supported */
armv7pmu.num_events = armv7_reset_read_pmnc();
perf_max_events = armv7pmu.num_events;
break;
}
/* Intel CPUs [xscale]. */
......@@ -2975,13 +3018,11 @@ init_hw_perf_events(void)
armpmu = &xscale1pmu;
memcpy(armpmu_perf_cache_map, xscale_perf_cache_map,
sizeof(xscale_perf_cache_map));
perf_max_events = xscale1pmu.num_events;
break;
case 2:
armpmu = &xscale2pmu;
memcpy(armpmu_perf_cache_map, xscale_perf_cache_map,
sizeof(xscale_perf_cache_map));
perf_max_events = xscale2pmu.num_events;
break;
}
}
......@@ -2991,9 +3032,10 @@ init_hw_perf_events(void)
arm_pmu_names[armpmu->id], armpmu->num_events);
} else {
pr_info("no hardware support available\n");
perf_max_events = -1;
}
perf_pmu_register(&pmu);
return 0;
}
arch_initcall(init_hw_perf_events);
......@@ -3001,13 +3043,6 @@ arch_initcall(init_hw_perf_events);
/*
* Callchain handling code.
*/
static inline void
callchain_store(struct perf_callchain_entry *entry,
u64 ip)
{
if (entry->nr < PERF_MAX_STACK_DEPTH)
entry->ip[entry->nr++] = ip;
}
/*
* The registers we're interested in are at the end of the variable
......@@ -3039,7 +3074,7 @@ user_backtrace(struct frame_tail *tail,
if (__copy_from_user_inatomic(&buftail, tail, sizeof(buftail)))
return NULL;
callchain_store(entry, buftail.lr);
perf_callchain_store(entry, buftail.lr);
/*
* Frame pointers should strictly progress back up the stack
......@@ -3051,16 +3086,11 @@ user_backtrace(struct frame_tail *tail,
return buftail.fp - 1;
}
static void
perf_callchain_user(struct pt_regs *regs,
struct perf_callchain_entry *entry)
void
perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
{
struct frame_tail *tail;
callchain_store(entry, PERF_CONTEXT_USER);
if (!user_mode(regs))
regs = task_pt_regs(current);
tail = (struct frame_tail *)regs->ARM_fp - 1;
......@@ -3078,56 +3108,18 @@ callchain_trace(struct stackframe *fr,
void *data)
{
struct perf_callchain_entry *entry = data;
callchain_store(entry, fr->pc);
perf_callchain_store(entry, fr->pc);
return 0;
}
static void
perf_callchain_kernel(struct pt_regs *regs,
struct perf_callchain_entry *entry)
void
perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
{
struct stackframe fr;
callchain_store(entry, PERF_CONTEXT_KERNEL);
fr.fp = regs->ARM_fp;
fr.sp = regs->ARM_sp;
fr.lr = regs->ARM_lr;
fr.pc = regs->ARM_pc;
walk_stackframe(&fr, callchain_trace, entry);
}
static void
perf_do_callchain(struct pt_regs *regs,
struct perf_callchain_entry *entry)
{
int is_user;
if (!regs)
return;
is_user = user_mode(regs);
if (!current || !current->pid)
return;
if (is_user && current->state != TASK_RUNNING)
return;
if (!is_user)
perf_callchain_kernel(regs, entry);
if (current->mm)
perf_callchain_user(regs, entry);
}
static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
struct perf_callchain_entry *
perf_callchain(struct pt_regs *regs)
{
struct perf_callchain_entry *entry = &__get_cpu_var(pmc_irq_entry);
entry->nr = 0;
perf_do_callchain(regs, entry);
return entry;
}
......@@ -6,4 +6,8 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
oprofilefs.o oprofile_stats.o \
timer_int.o )
ifeq ($(CONFIG_HW_PERF_EVENTS),y)
DRIVER_OBJS += $(addprefix ../../../drivers/oprofile/, oprofile_perf.o)
endif
oprofile-y := $(DRIVER_OBJS) common.o
......@@ -25,139 +25,10 @@
#include <asm/ptrace.h>
#ifdef CONFIG_HW_PERF_EVENTS
/*
* Per performance monitor configuration as set via oprofilefs.
*/
struct op_counter_config {
unsigned long count;
unsigned long enabled;
unsigned long event;
unsigned long unit_mask;
unsigned long kernel;
unsigned long user;
struct perf_event_attr attr;
};
static int op_arm_enabled;
static DEFINE_MUTEX(op_arm_mutex);
static struct op_counter_config *counter_config;
static struct perf_event **perf_events[nr_cpumask_bits];
static int perf_num_counters;
/*
* Overflow callback for oprofile.
*/
static void op_overflow_handler(struct perf_event *event, int unused,
struct perf_sample_data *data, struct pt_regs *regs)
char *op_name_from_perf_id(void)
{
int id;
u32 cpu = smp_processor_id();
for (id = 0; id < perf_num_counters; ++id)
if (perf_events[cpu][id] == event)
break;
if (id != perf_num_counters)
oprofile_add_sample(regs, id);
else
pr_warning("oprofile: ignoring spurious overflow "
"on cpu %u\n", cpu);
}
/*
* Called by op_arm_setup to create perf attributes to mirror the oprofile
* settings in counter_config. Attributes are created as `pinned' events and
* so are permanently scheduled on the PMU.
*/
static void op_perf_setup(void)
{
int i;
u32 size = sizeof(struct perf_event_attr);
struct perf_event_attr *attr;
for (i = 0; i < perf_num_counters; ++i) {
attr = &counter_config[i].attr;
memset(attr, 0, size);
attr->type = PERF_TYPE_RAW;
attr->size = size;
attr->config = counter_config[i].event;
attr->sample_period = counter_config[i].count;
attr->pinned = 1;
}
}
static int op_create_counter(int cpu, int event)
{
int ret = 0;
struct perf_event *pevent;
if (!counter_config[event].enabled || (perf_events[cpu][event] != NULL))
return ret;
pevent = perf_event_create_kernel_counter(&counter_config[event].attr,
cpu, -1,
op_overflow_handler);
if (IS_ERR(pevent)) {
ret = PTR_ERR(pevent);
} else if (pevent->state != PERF_EVENT_STATE_ACTIVE) {
perf_event_release_kernel(pevent);
pr_warning("oprofile: failed to enable event %d "
"on CPU %d\n", event, cpu);
ret = -EBUSY;
} else {
perf_events[cpu][event] = pevent;
}
return ret;
}
enum arm_perf_pmu_ids id = armpmu_get_pmu_id();
static void op_destroy_counter(int cpu, int event)
{
struct perf_event *pevent = perf_events[cpu][event];
if (pevent) {
perf_event_release_kernel(pevent);
perf_events[cpu][event] = NULL;
}
}
/*
* Called by op_arm_start to create active perf events based on the
* perviously configured attributes.
*/
static int op_perf_start(void)
{
int cpu, event, ret = 0;
for_each_online_cpu(cpu) {
for (event = 0; event < perf_num_counters; ++event) {
ret = op_create_counter(cpu, event);
if (ret)
goto out;
}
}
out:
return ret;
}
/*
* Called by op_arm_stop at the end of a profiling run.
*/
static void op_perf_stop(void)
{
int cpu, event;
for_each_online_cpu(cpu)
for (event = 0; event < perf_num_counters; ++event)
op_destroy_counter(cpu, event);
}
static char *op_name_from_perf_id(enum arm_perf_pmu_ids id)
{
switch (id) {
case ARM_PERF_PMU_ID_XSCALE1:
return "arm/xscale1";
......@@ -176,116 +47,6 @@ static char *op_name_from_perf_id(enum arm_perf_pmu_ids id)
}
}
static int op_arm_create_files(struct super_block *sb, struct dentry *root)
{
unsigned int i;
for (i = 0; i < perf_num_counters; i++) {
struct dentry *dir;
char buf[4];
snprintf(buf, sizeof buf, "%d", i);
dir = oprofilefs_mkdir(sb, root, buf);
oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event);
oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count);
oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask);
oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel);
oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user);
}
return 0;
}
static int op_arm_setup(void)
{
spin_lock(&oprofilefs_lock);
op_perf_setup();
spin_unlock(&oprofilefs_lock);
return 0;
}
static int op_arm_start(void)
{
int ret = -EBUSY;
mutex_lock(&op_arm_mutex);
if (!op_arm_enabled) {
ret = 0;
op_perf_start();
op_arm_enabled = 1;
}
mutex_unlock(&op_arm_mutex);
return ret;
}
static void op_arm_stop(void)
{
mutex_lock(&op_arm_mutex);
if (op_arm_enabled)
op_perf_stop();
op_arm_enabled = 0;
mutex_unlock(&op_arm_mutex);
}
#ifdef CONFIG_PM
static int op_arm_suspend(struct platform_device *dev, pm_message_t state)
{
mutex_lock(&op_arm_mutex);
if (op_arm_enabled)
op_perf_stop();
mutex_unlock(&op_arm_mutex);
return 0;
}
static int op_arm_resume(struct platform_device *dev)
{
mutex_lock(&op_arm_mutex);
if (op_arm_enabled && op_perf_start())
op_arm_enabled = 0;
mutex_unlock(&op_arm_mutex);
return 0;
}
static struct platform_driver oprofile_driver = {
.driver = {
.name = "arm-oprofile",
},
.resume = op_arm_resume,
.suspend = op_arm_suspend,
};
static struct platform_device *oprofile_pdev;
static int __init init_driverfs(void)
{
int ret;
ret = platform_driver_register(&oprofile_driver);
if (ret)
goto out;
oprofile_pdev = platform_device_register_simple(
oprofile_driver.driver.name, 0, NULL, 0);
if (IS_ERR(oprofile_pdev)) {
ret = PTR_ERR(oprofile_pdev);
platform_driver_unregister(&oprofile_driver);
}
out:
return ret;
}
static void exit_driverfs(void)
{
platform_device_unregister(oprofile_pdev);
platform_driver_unregister(&oprofile_driver);
}
#else
static int __init init_driverfs(void) { return 0; }
#define exit_driverfs() do { } while (0)
#endif /* CONFIG_PM */
static int report_trace(struct stackframe *frame, void *d)
{
unsigned int *depth = d;
......@@ -350,74 +111,14 @@ static void arm_backtrace(struct pt_regs * const regs, unsigned int depth)
int __init oprofile_arch_init(struct oprofile_operations *ops)
{
int cpu, ret = 0;
perf_num_counters = armpmu_get_max_events();
counter_config = kcalloc(perf_num_counters,
sizeof(struct op_counter_config), GFP_KERNEL);
if (!counter_config) {
pr_info("oprofile: failed to allocate %d "
"counters\n", perf_num_counters);
return -ENOMEM;
}
ret = init_driverfs();
if (ret) {
kfree(counter_config);
counter_config = NULL;
return ret;
}
for_each_possible_cpu(cpu) {
perf_events[cpu] = kcalloc(perf_num_counters,
sizeof(struct perf_event *), GFP_KERNEL);
if (!perf_events[cpu]) {
pr_info("oprofile: failed to allocate %d perf events "
"for cpu %d\n", perf_num_counters, cpu);
while (--cpu >= 0)
kfree(perf_events[cpu]);
return -ENOMEM;
}
}
ops->backtrace = arm_backtrace;
ops->create_files = op_arm_create_files;
ops->setup = op_arm_setup;
ops->start = op_arm_start;
ops->stop = op_arm_stop;
ops->shutdown = op_arm_stop;
ops->cpu_type = op_name_from_perf_id(armpmu_get_pmu_id());
if (!ops->cpu_type)
ret = -ENODEV;
else
pr_info("oprofile: using %s\n", ops->cpu_type);
return ret;
return oprofile_perf_init(ops);
}
void oprofile_arch_exit(void)
void __exit oprofile_arch_exit(void)
{
int cpu, id;
struct perf_event *event;
if (*perf_events) {
for_each_possible_cpu(cpu) {
for (id = 0; id < perf_num_counters; ++id) {
event = perf_events[cpu][id];
if (event != NULL)
perf_event_release_kernel(event);
}
kfree(perf_events[cpu]);
}
}
if (counter_config) {
kfree(counter_config);
exit_driverfs();
}
oprofile_perf_exit();
}
#else
int __init oprofile_arch_init(struct oprofile_operations *ops)
......@@ -425,5 +126,5 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
pr_info("oprofile: hardware counters not available\n");
return -ENODEV;
}
void oprofile_arch_exit(void) {}
void __exit oprofile_arch_exit(void) {}
#endif /* CONFIG_HW_PERF_EVENTS */
......@@ -7,6 +7,7 @@ config FRV
default y
select HAVE_IDE
select HAVE_ARCH_TRACEHOOK
select HAVE_IRQ_WORK
select HAVE_PERF_EVENTS
config ZONE_DMA
......
......@@ -5,4 +5,4 @@
lib-y := \
__ashldi3.o __lshrdi3.o __muldi3.o __ashrdi3.o __negdi2.o __ucmpdi2.o \
checksum.o memcpy.o memset.o atomic-ops.o atomic64-ops.o \
outsl_ns.o outsl_sw.o insl_ns.o insl_sw.o cache.o perf_event.o
outsl_ns.o outsl_sw.o insl_ns.o insl_sw.o cache.o
/* Performance event handling
*
* Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public Licence
* as published by the Free Software Foundation; either version
* 2 of the Licence, or (at your option) any later version.
*/
#include <linux/perf_event.h>
/*
* mark the performance event as pending
*/
void set_perf_event_pending(void)
{
}
......@@ -6,12 +6,6 @@
* David Mosberger-Tang <davidm@hpl.hp.com>
*/
#include <linux/threads.h>
#include <linux/irq.h>
#include <asm/processor.h>
/*
* No irq_cpustat_t for IA-64. The data is held in the per-CPU data structure.
*/
......@@ -20,6 +14,11 @@
#define local_softirq_pending() (local_cpu_data->softirq_pending)
#include <linux/threads.h>
#include <linux/irq.h>
#include <asm/processor.h>
extern void __iomem *ipi_base_addr;
void ack_bad_irq(unsigned int irq);
......
......@@ -16,6 +16,7 @@ config PARISC
select RTC_DRV_GENERIC
select INIT_ALL_POSSIBLE
select BUG
select HAVE_IRQ_WORK
select HAVE_PERF_EVENTS
select GENERIC_ATOMIC64 if !64BIT
help
......
#ifndef __ASM_PARISC_PERF_EVENT_H
#define __ASM_PARISC_PERF_EVENT_H
/* parisc only supports software events through this interface. */
static inline void set_perf_event_pending(void) { }
/* Empty, just to avoid compiling error */
#endif /* __ASM_PARISC_PERF_EVENT_H */
......@@ -138,6 +138,7 @@ config PPC
select HAVE_OPROFILE
select HAVE_SYSCALL_WRAPPERS if PPC64
select GENERIC_ATOMIC64 if PPC32
select HAVE_IRQ_WORK
select HAVE_PERF_EVENTS
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_HW_BREAKPOINT if PERF_EVENTS && PPC_BOOK3S_64
......
......@@ -129,7 +129,7 @@ struct paca_struct {
u8 soft_enabled; /* irq soft-enable flag */
u8 hard_enabled; /* set if irqs are enabled in MSR */
u8 io_sync; /* writel() needs spin_unlock sync */
u8 perf_event_pending; /* PM interrupt while soft-disabled */
u8 irq_work_pending; /* IRQ_WORK interrupt while soft-disable */
/* Stuff for accurate time accounting */
u64 user_time; /* accumulated usermode TB ticks */
......
......@@ -23,18 +23,6 @@
#include "ppc32.h"
#endif
/*
* Store another value in a callchain_entry.
*/
static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip)
{
unsigned int nr = entry->nr;
if (nr < PERF_MAX_STACK_DEPTH) {
entry->ip[nr] = ip;
entry->nr = nr + 1;
}
}
/*
* Is sp valid as the address of the next kernel stack frame after prev_sp?
......@@ -58,8 +46,8 @@ static int valid_next_sp(unsigned long sp, unsigned long prev_sp)
return 0;
}
static void perf_callchain_kernel(struct pt_regs *regs,
struct perf_callchain_entry *entry)
void
perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
{
unsigned long sp, next_sp;
unsigned long next_ip;
......@@ -69,8 +57,7 @@ static void perf_callchain_kernel(struct pt_regs *regs,
lr = regs->link;
sp = regs->gpr[1];
callchain_store(entry, PERF_CONTEXT_KERNEL);
callchain_store(entry, regs->nip);
perf_callchain_store(entry, regs->nip);
if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD))
return;
......@@ -89,7 +76,7 @@ static void perf_callchain_kernel(struct pt_regs *regs,
next_ip = regs->nip;
lr = regs->link;
level = 0;
callchain_store(entry, PERF_CONTEXT_KERNEL);
perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
} else {
if (level == 0)
......@@ -111,7 +98,7 @@ static void perf_callchain_kernel(struct pt_regs *regs,
++level;
}
callchain_store(entry, next_ip);
perf_callchain_store(entry, next_ip);
if (!valid_next_sp(next_sp, sp))
return;
sp = next_sp;
......@@ -233,8 +220,8 @@ static int sane_signal_64_frame(unsigned long sp)
puc == (unsigned long) &sf->uc;
}
static void perf_callchain_user_64(struct pt_regs *regs,
struct perf_callchain_entry *entry)
static void perf_callchain_user_64(struct perf_callchain_entry *entry,
struct pt_regs *regs)
{
unsigned long sp, next_sp;
unsigned long next_ip;
......@@ -246,8 +233,7 @@ static void perf_callchain_user_64(struct pt_regs *regs,
next_ip = regs->nip;
lr = regs->link;
sp = regs->gpr[1];
callchain_store(entry, PERF_CONTEXT_USER);
callchain_store(entry, next_ip);
perf_callchain_store(entry, next_ip);
for (;;) {
fp = (unsigned long __user *) sp;
......@@ -276,14 +262,14 @@ static void perf_callchain_user_64(struct pt_regs *regs,
read_user_stack_64(&uregs[PT_R1], &sp))
return;
level = 0;
callchain_store(entry, PERF_CONTEXT_USER);
callchain_store(entry, next_ip);
perf_callchain_store(entry, PERF_CONTEXT_USER);
perf_callchain_store(entry, next_ip);
continue;
}
if (level == 0)
next_ip = lr;
callchain_store(entry, next_ip);
perf_callchain_store(entry, next_ip);
++level;
sp = next_sp;
}
......@@ -315,8 +301,8 @@ static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret)
return __get_user_inatomic(*ret, ptr);
}
static inline void perf_callchain_user_64(struct pt_regs *regs,
struct perf_callchain_entry *entry)
static inline void perf_callchain_user_64(struct perf_callchain_entry *entry,
struct pt_regs *regs)
{
}
......@@ -435,8 +421,8 @@ static unsigned int __user *signal_frame_32_regs(unsigned int sp,
return mctx->mc_gregs;
}
static void perf_callchain_user_32(struct pt_regs *regs,
struct perf_callchain_entry *entry)
static void perf_callchain_user_32(struct perf_callchain_entry *entry,
struct pt_regs *regs)
{
unsigned int sp, next_sp;
unsigned int next_ip;
......@@ -447,8 +433,7 @@ static void perf_callchain_user_32(struct pt_regs *regs,
next_ip = regs->nip;
lr = regs->link;
sp = regs->gpr[1];
callchain_store(entry, PERF_CONTEXT_USER);
callchain_store(entry, next_ip);
perf_callchain_store(entry, next_ip);
while (entry->nr < PERF_MAX_STACK_DEPTH) {
fp = (unsigned int __user *) (unsigned long) sp;
......@@ -470,45 +455,24 @@ static void perf_callchain_user_32(struct pt_regs *regs,
read_user_stack_32(&uregs[PT_R1], &sp))
return;
level = 0;
callchain_store(entry, PERF_CONTEXT_USER);
callchain_store(entry, next_ip);
perf_callchain_store(entry, PERF_CONTEXT_USER);
perf_callchain_store(entry, next_ip);
continue;
}
if (level == 0)
next_ip = lr;
callchain_store(entry, next_ip);
perf_callchain_store(entry, next_ip);
++level;
sp = next_sp;
}
}
/*
* Since we can't get PMU interrupts inside a PMU interrupt handler,
* we don't need separate irq and nmi entries here.
*/
static DEFINE_PER_CPU(struct perf_callchain_entry, cpu_perf_callchain);
struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
void
perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
{
struct perf_callchain_entry *entry = &__get_cpu_var(cpu_perf_callchain);
entry->nr = 0;
if (!user_mode(regs)) {
perf_callchain_kernel(regs, entry);
if (current->mm)
regs = task_pt_regs(current);
else
regs = NULL;
}
if (regs) {
if (current_is_64bit())
perf_callchain_user_64(regs, entry);
else
perf_callchain_user_32(regs, entry);
}
return entry;
if (current_is_64bit())
perf_callchain_user_64(entry, regs);
else
perf_callchain_user_32(entry, regs);
}
......@@ -402,6 +402,9 @@ static void power_pmu_read(struct perf_event *event)
{
s64 val, delta, prev;
if (event->hw.state & PERF_HES_STOPPED)
return;
if (!event->hw.idx)
return;
/*
......@@ -517,7 +520,7 @@ static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0)
* Disable all events to prevent PMU interrupts and to allow
* events to be added or removed.
*/
void hw_perf_disable(void)
static void power_pmu_disable(struct pmu *pmu)
{
struct cpu_hw_events *cpuhw;
unsigned long flags;
......@@ -565,7 +568,7 @@ void hw_perf_disable(void)
* If we were previously disabled and events were added, then
* put the new config on the PMU.
*/
void hw_perf_enable(void)
static void power_pmu_enable(struct pmu *pmu)
{
struct perf_event *event;
struct cpu_hw_events *cpuhw;
......@@ -672,6 +675,8 @@ void hw_perf_enable(void)
}
local64_set(&event->hw.prev_count, val);
event->hw.idx = idx;
if (event->hw.state & PERF_HES_STOPPED)
val = 0;
write_pmc(idx, val);
perf_event_update_userpage(event);
}
......@@ -727,7 +732,7 @@ static int collect_events(struct perf_event *group, int max_count,
* re-enable the PMU in order to get hw_perf_enable to do the
* actual work of reconfiguring the PMU.
*/
static int power_pmu_enable(struct perf_event *event)
static int power_pmu_add(struct perf_event *event, int ef_flags)
{
struct cpu_hw_events *cpuhw;
unsigned long flags;
......@@ -735,7 +740,7 @@ static int power_pmu_enable(struct perf_event *event)
int ret = -EAGAIN;
local_irq_save(flags);
perf_disable();
perf_pmu_disable(event->pmu);
/*
* Add the event to the list (if there is room)
......@@ -749,6 +754,9 @@ static int power_pmu_enable(struct perf_event *event)
cpuhw->events[n0] = event->hw.config;
cpuhw->flags[n0] = event->hw.event_base;
if (!(ef_flags & PERF_EF_START))
event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
/*
* If group events scheduling transaction was started,
* skip the schedulability test here, it will be peformed
......@@ -769,7 +777,7 @@ static int power_pmu_enable(struct perf_event *event)
ret = 0;
out:
perf_enable();
perf_pmu_enable(event->pmu);
local_irq_restore(flags);
return ret;
}
......@@ -777,14 +785,14 @@ static int power_pmu_enable(struct perf_event *event)
/*
* Remove a event from the PMU.
*/
static void power_pmu_disable(struct perf_event *event)
static void power_pmu_del(struct perf_event *event, int ef_flags)
{
struct cpu_hw_events *cpuhw;
long i;
unsigned long flags;
local_irq_save(flags);
perf_disable();
perf_pmu_disable(event->pmu);
power_pmu_read(event);
......@@ -821,34 +829,60 @@ static void power_pmu_disable(struct perf_event *event)
cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE);
}
perf_enable();
perf_pmu_enable(event->pmu);
local_irq_restore(flags);
}
/*
* Re-enable interrupts on a event after they were throttled
* because they were coming too fast.
* POWER-PMU does not support disabling individual counters, hence
* program their cycle counter to their max value and ignore the interrupts.
*/
static void power_pmu_unthrottle(struct perf_event *event)
static void power_pmu_start(struct perf_event *event, int ef_flags)
{
unsigned long flags;
s64 left;
if (!event->hw.idx || !event->hw.sample_period)
return;
if (!(event->hw.state & PERF_HES_STOPPED))
return;
if (ef_flags & PERF_EF_RELOAD)
WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
local_irq_save(flags);
perf_pmu_disable(event->pmu);
event->hw.state = 0;
left = local64_read(&event->hw.period_left);
write_pmc(event->hw.idx, left);
perf_event_update_userpage(event);
perf_pmu_enable(event->pmu);
local_irq_restore(flags);
}
static void power_pmu_stop(struct perf_event *event, int ef_flags)
{
s64 val, left;
unsigned long flags;
if (!event->hw.idx || !event->hw.sample_period)
return;
if (event->hw.state & PERF_HES_STOPPED)
return;
local_irq_save(flags);
perf_disable();
perf_pmu_disable(event->pmu);
power_pmu_read(event);
left = event->hw.sample_period;
event->hw.last_period = left;
val = 0;
if (left < 0x80000000L)
val = 0x80000000L - left;
write_pmc(event->hw.idx, val);
local64_set(&event->hw.prev_count, val);
local64_set(&event->hw.period_left, left);
event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
write_pmc(event->hw.idx, 0);
perf_event_update_userpage(event);
perf_enable();
perf_pmu_enable(event->pmu);
local_irq_restore(flags);
}
......@@ -857,10 +891,11 @@ static void power_pmu_unthrottle(struct perf_event *event)
* Set the flag to make pmu::enable() not perform the
* schedulability test, it will be performed at commit time
*/
void power_pmu_start_txn(const struct pmu *pmu)
void power_pmu_start_txn(struct pmu *pmu)
{
struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
perf_pmu_disable(pmu);
cpuhw->group_flag |= PERF_EVENT_TXN;
cpuhw->n_txn_start = cpuhw->n_events;
}
......@@ -870,11 +905,12 @@ void power_pmu_start_txn(const struct pmu *pmu)
* Clear the flag and pmu::enable() will perform the
* schedulability test.
*/
void power_pmu_cancel_txn(const struct pmu *pmu)
void power_pmu_cancel_txn(struct pmu *pmu)
{
struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
cpuhw->group_flag &= ~PERF_EVENT_TXN;
perf_pmu_enable(pmu);
}
/*
......@@ -882,7 +918,7 @@ void power_pmu_cancel_txn(const struct pmu *pmu)
* Perform the group schedulability test as a whole
* Return 0 if success
*/
int power_pmu_commit_txn(const struct pmu *pmu)
int power_pmu_commit_txn(struct pmu *pmu)
{
struct cpu_hw_events *cpuhw;
long i, n;
......@@ -901,19 +937,10 @@ int power_pmu_commit_txn(const struct pmu *pmu)
cpuhw->event[i]->hw.config = cpuhw->events[i];
cpuhw->group_flag &= ~PERF_EVENT_TXN;
perf_pmu_enable(pmu);
return 0;
}
struct pmu power_pmu = {
.enable = power_pmu_enable,
.disable = power_pmu_disable,
.read = power_pmu_read,
.unthrottle = power_pmu_unthrottle,
.start_txn = power_pmu_start_txn,
.cancel_txn = power_pmu_cancel_txn,
.commit_txn = power_pmu_commit_txn,
};
/*
* Return 1 if we might be able to put event on a limited PMC,
* or 0 if not.
......@@ -1014,7 +1041,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp)
return 0;
}
const struct pmu *hw_perf_event_init(struct perf_event *event)
static int power_pmu_event_init(struct perf_event *event)
{
u64 ev;
unsigned long flags;
......@@ -1026,25 +1053,27 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
struct cpu_hw_events *cpuhw;
if (!ppmu)
return ERR_PTR(-ENXIO);
return -ENOENT;
switch (event->attr.type) {
case PERF_TYPE_HARDWARE:
ev = event->attr.config;
if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
return ERR_PTR(-EOPNOTSUPP);
return -EOPNOTSUPP;
ev = ppmu->generic_events[ev];
break;
case PERF_TYPE_HW_CACHE:
err = hw_perf_cache_event(event->attr.config, &ev);
if (err)
return ERR_PTR(err);
return err;
break;
case PERF_TYPE_RAW:
ev = event->attr.config;
break;
default:
return ERR_PTR(-EINVAL);
return -ENOENT;
}
event->hw.config_base = ev;
event->hw.idx = 0;
......@@ -1063,7 +1092,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
* XXX we should check if the task is an idle task.
*/
flags = 0;
if (event->ctx->task)
if (event->attach_state & PERF_ATTACH_TASK)
flags |= PPMU_ONLY_COUNT_RUN;
/*
......@@ -1081,7 +1110,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
*/
ev = normal_pmc_alternative(ev, flags);
if (!ev)
return ERR_PTR(-EINVAL);
return -EINVAL;
}
}
......@@ -1095,19 +1124,19 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
n = collect_events(event->group_leader, ppmu->n_counter - 1,
ctrs, events, cflags);
if (n < 0)
return ERR_PTR(-EINVAL);
return -EINVAL;
}
events[n] = ev;
ctrs[n] = event;
cflags[n] = flags;
if (check_excludes(ctrs, cflags, n, 1))
return ERR_PTR(-EINVAL);
return -EINVAL;
cpuhw = &get_cpu_var(cpu_hw_events);
err = power_check_constraints(cpuhw, events, cflags, n + 1);
put_cpu_var(cpu_hw_events);
if (err)
return ERR_PTR(-EINVAL);
return -EINVAL;
event->hw.config = events[n];
event->hw.event_base = cflags[n];
......@@ -1132,11 +1161,23 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
}
event->destroy = hw_perf_event_destroy;
if (err)
return ERR_PTR(err);
return &power_pmu;
return err;
}
struct pmu power_pmu = {
.pmu_enable = power_pmu_enable,
.pmu_disable = power_pmu_disable,
.event_init = power_pmu_event_init,
.add = power_pmu_add,
.del = power_pmu_del,
.start = power_pmu_start,
.stop = power_pmu_stop,
.read = power_pmu_read,
.start_txn = power_pmu_start_txn,
.cancel_txn = power_pmu_cancel_txn,
.commit_txn = power_pmu_commit_txn,
};
/*
* A counter has overflowed; update its count and record
* things if requested. Note that interrupts are hard-disabled
......@@ -1149,6 +1190,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
s64 prev, delta, left;
int record = 0;
if (event->hw.state & PERF_HES_STOPPED) {
write_pmc(event->hw.idx, 0);
return;
}
/* we don't have to worry about interrupts here */
prev = local64_read(&event->hw.prev_count);
delta = (val - prev) & 0xfffffffful;
......@@ -1171,6 +1217,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
val = 0x80000000LL - left;
}
write_pmc(event->hw.idx, val);
local64_set(&event->hw.prev_count, val);
local64_set(&event->hw.period_left, left);
perf_event_update_userpage(event);
/*
* Finally record data if requested.
*/
......@@ -1183,23 +1234,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
if (event->attr.sample_type & PERF_SAMPLE_ADDR)
perf_get_data_addr(regs, &data.addr);
if (perf_event_overflow(event, nmi, &data, regs)) {
/*
* Interrupts are coming too fast - throttle them
* by setting the event to 0, so it will be
* at least 2^30 cycles until the next interrupt
* (assuming each event counts at most 2 counts
* per cycle).
*/
val = 0;
left = ~0ULL >> 1;
}
if (perf_event_overflow(event, nmi, &data, regs))
power_pmu_stop(event, 0);
}
write_pmc(event->hw.idx, val);
local64_set(&event->hw.prev_count, val);
local64_set(&event->hw.period_left, left);
perf_event_update_userpage(event);
}
/*
......@@ -1342,6 +1379,7 @@ int register_power_pmu(struct power_pmu *pmu)
freeze_events_kernel = MMCR0_FCHV;
#endif /* CONFIG_PPC64 */
perf_pmu_register(&power_pmu);
perf_cpu_notifier(power_pmu_notifier);
return 0;
......
......@@ -156,6 +156,9 @@ static void fsl_emb_pmu_read(struct perf_event *event)
{
s64 val, delta, prev;
if (event->hw.state & PERF_HES_STOPPED)
return;
/*
* Performance monitor interrupts come even when interrupts
* are soft-disabled, as long as interrupts are hard-enabled.
......@@ -177,7 +180,7 @@ static void fsl_emb_pmu_read(struct perf_event *event)
* Disable all events to prevent PMU interrupts and to allow
* events to be added or removed.
*/
void hw_perf_disable(void)
static void fsl_emb_pmu_disable(struct pmu *pmu)
{
struct cpu_hw_events *cpuhw;
unsigned long flags;
......@@ -216,7 +219,7 @@ void hw_perf_disable(void)
* If we were previously disabled and events were added, then
* put the new config on the PMU.
*/
void hw_perf_enable(void)
static void fsl_emb_pmu_enable(struct pmu *pmu)
{
struct cpu_hw_events *cpuhw;
unsigned long flags;
......@@ -262,8 +265,8 @@ static int collect_events(struct perf_event *group, int max_count,
return n;
}
/* perf must be disabled, context locked on entry */
static int fsl_emb_pmu_enable(struct perf_event *event)
/* context locked on entry */
static int fsl_emb_pmu_add(struct perf_event *event, int flags)
{
struct cpu_hw_events *cpuhw;
int ret = -EAGAIN;
......@@ -271,6 +274,7 @@ static int fsl_emb_pmu_enable(struct perf_event *event)
u64 val;
int i;
perf_pmu_disable(event->pmu);
cpuhw = &get_cpu_var(cpu_hw_events);
if (event->hw.config & FSL_EMB_EVENT_RESTRICTED)
......@@ -301,6 +305,12 @@ static int fsl_emb_pmu_enable(struct perf_event *event)
val = 0x80000000L - left;
}
local64_set(&event->hw.prev_count, val);
if (!(flags & PERF_EF_START)) {
event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
val = 0;
}
write_pmc(i, val);
perf_event_update_userpage(event);
......@@ -310,15 +320,17 @@ static int fsl_emb_pmu_enable(struct perf_event *event)
ret = 0;
out:
put_cpu_var(cpu_hw_events);
perf_pmu_enable(event->pmu);
return ret;
}
/* perf must be disabled, context locked on entry */
static void fsl_emb_pmu_disable(struct perf_event *event)
/* context locked on entry */
static void fsl_emb_pmu_del(struct perf_event *event, int flags)
{
struct cpu_hw_events *cpuhw;
int i = event->hw.idx;
perf_pmu_disable(event->pmu);
if (i < 0)
goto out;
......@@ -346,44 +358,57 @@ static void fsl_emb_pmu_disable(struct perf_event *event)
cpuhw->n_events--;
out:
perf_pmu_enable(event->pmu);
put_cpu_var(cpu_hw_events);
}
/*
* Re-enable interrupts on a event after they were throttled
* because they were coming too fast.
*
* Context is locked on entry, but perf is not disabled.
*/
static void fsl_emb_pmu_unthrottle(struct perf_event *event)
static void fsl_emb_pmu_start(struct perf_event *event, int ef_flags)
{
s64 val, left;
unsigned long flags;
s64 left;
if (event->hw.idx < 0 || !event->hw.sample_period)
return;
if (!(event->hw.state & PERF_HES_STOPPED))
return;
if (ef_flags & PERF_EF_RELOAD)
WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
local_irq_save(flags);
perf_disable();
fsl_emb_pmu_read(event);
left = event->hw.sample_period;
event->hw.last_period = left;
val = 0;
if (left < 0x80000000L)
val = 0x80000000L - left;
write_pmc(event->hw.idx, val);
local64_set(&event->hw.prev_count, val);
local64_set(&event->hw.period_left, left);
perf_pmu_disable(event->pmu);
event->hw.state = 0;
left = local64_read(&event->hw.period_left);
write_pmc(event->hw.idx, left);
perf_event_update_userpage(event);
perf_enable();
perf_pmu_enable(event->pmu);
local_irq_restore(flags);
}
static struct pmu fsl_emb_pmu = {
.enable = fsl_emb_pmu_enable,
.disable = fsl_emb_pmu_disable,
.read = fsl_emb_pmu_read,
.unthrottle = fsl_emb_pmu_unthrottle,
};
static void fsl_emb_pmu_stop(struct perf_event *event, int ef_flags)
{
unsigned long flags;
if (event->hw.idx < 0 || !event->hw.sample_period)
return;
if (event->hw.state & PERF_HES_STOPPED)
return;
local_irq_save(flags);
perf_pmu_disable(event->pmu);
fsl_emb_pmu_read(event);
event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
write_pmc(event->hw.idx, 0);
perf_event_update_userpage(event);
perf_pmu_enable(event->pmu);
local_irq_restore(flags);
}
/*
* Release the PMU if this is the last perf_event.
......@@ -428,7 +453,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp)
return 0;
}
const struct pmu *hw_perf_event_init(struct perf_event *event)
static int fsl_emb_pmu_event_init(struct perf_event *event)
{
u64 ev;
struct perf_event *events[MAX_HWEVENTS];
......@@ -441,14 +466,14 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
case PERF_TYPE_HARDWARE:
ev = event->attr.config;
if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
return ERR_PTR(-EOPNOTSUPP);
return -EOPNOTSUPP;
ev = ppmu->generic_events[ev];
break;
case PERF_TYPE_HW_CACHE:
err = hw_perf_cache_event(event->attr.config, &ev);
if (err)
return ERR_PTR(err);
return err;
break;
case PERF_TYPE_RAW:
......@@ -456,12 +481,12 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
break;
default:
return ERR_PTR(-EINVAL);
return -ENOENT;
}
event->hw.config = ppmu->xlate_event(ev);
if (!(event->hw.config & FSL_EMB_EVENT_VALID))
return ERR_PTR(-EINVAL);
return -EINVAL;
/*
* If this is in a group, check if it can go on with all the
......@@ -473,7 +498,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
n = collect_events(event->group_leader,
ppmu->n_counter - 1, events);
if (n < 0)
return ERR_PTR(-EINVAL);
return -EINVAL;
}
if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) {
......@@ -484,7 +509,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
}
if (num_restricted >= ppmu->n_restricted)
return ERR_PTR(-EINVAL);
return -EINVAL;
}
event->hw.idx = -1;
......@@ -497,7 +522,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
if (event->attr.exclude_kernel)
event->hw.config_base |= PMLCA_FCS;
if (event->attr.exclude_idle)
return ERR_PTR(-ENOTSUPP);
return -ENOTSUPP;
event->hw.last_period = event->hw.sample_period;
local64_set(&event->hw.period_left, event->hw.last_period);
......@@ -523,11 +548,20 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
}
event->destroy = hw_perf_event_destroy;
if (err)
return ERR_PTR(err);
return &fsl_emb_pmu;
return err;
}
static struct pmu fsl_emb_pmu = {
.pmu_enable = fsl_emb_pmu_enable,
.pmu_disable = fsl_emb_pmu_disable,
.event_init = fsl_emb_pmu_event_init,
.add = fsl_emb_pmu_add,
.del = fsl_emb_pmu_del,
.start = fsl_emb_pmu_start,
.stop = fsl_emb_pmu_stop,
.read = fsl_emb_pmu_read,
};
/*
* A counter has overflowed; update its count and record
* things if requested. Note that interrupts are hard-disabled
......@@ -540,6 +574,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
s64 prev, delta, left;
int record = 0;
if (event->hw.state & PERF_HES_STOPPED) {
write_pmc(event->hw.idx, 0);
return;
}
/* we don't have to worry about interrupts here */
prev = local64_read(&event->hw.prev_count);
delta = (val - prev) & 0xfffffffful;
......@@ -562,6 +601,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
val = 0x80000000LL - left;
}
write_pmc(event->hw.idx, val);
local64_set(&event->hw.prev_count, val);
local64_set(&event->hw.period_left, left);
perf_event_update_userpage(event);
/*
* Finally record data if requested.
*/
......@@ -571,23 +615,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
perf_sample_data_init(&data, 0);
data.period = event->hw.last_period;
if (perf_event_overflow(event, nmi, &data, regs)) {
/*
* Interrupts are coming too fast - throttle them
* by setting the event to 0, so it will be
* at least 2^30 cycles until the next interrupt
* (assuming each event counts at most 2 counts
* per cycle).
*/
val = 0;
left = ~0ULL >> 1;
}
if (perf_event_overflow(event, nmi, &data, regs))
fsl_emb_pmu_stop(event, 0);
}
write_pmc(event->hw.idx, val);
local64_set(&event->hw.prev_count, val);
local64_set(&event->hw.period_left, left);
perf_event_update_userpage(event);
}
static void perf_event_interrupt(struct pt_regs *regs)
......@@ -651,5 +681,7 @@ int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu)
pr_info("%s performance monitor hardware support registered\n",
pmu->name);
perf_pmu_register(&fsl_emb_pmu);
return 0;
}
......@@ -53,7 +53,7 @@
#include <linux/posix-timers.h>
#include <linux/irq.h>
#include <linux/delay.h>
#include <linux/perf_event.h>
#include <linux/irq_work.h>
#include <asm/trace.h>
#include <asm/io.h>
......@@ -493,60 +493,60 @@ void __init iSeries_time_init_early(void)
}
#endif /* CONFIG_PPC_ISERIES */
#ifdef CONFIG_PERF_EVENTS
#ifdef CONFIG_IRQ_WORK
/*
* 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable...
*/
#ifdef CONFIG_PPC64
static inline unsigned long test_perf_event_pending(void)
static inline unsigned long test_irq_work_pending(void)
{
unsigned long x;
asm volatile("lbz %0,%1(13)"
: "=r" (x)
: "i" (offsetof(struct paca_struct, perf_event_pending)));
: "i" (offsetof(struct paca_struct, irq_work_pending)));
return x;
}
static inline void set_perf_event_pending_flag(void)
static inline void set_irq_work_pending_flag(void)
{
asm volatile("stb %0,%1(13)" : :
"r" (1),
"i" (offsetof(struct paca_struct, perf_event_pending)));
"i" (offsetof(struct paca_struct, irq_work_pending)));
}
static inline void clear_perf_event_pending(void)
static inline void clear_irq_work_pending(void)
{
asm volatile("stb %0,%1(13)" : :
"r" (0),
"i" (offsetof(struct paca_struct, perf_event_pending)));
"i" (offsetof(struct paca_struct, irq_work_pending)));
}
#else /* 32-bit */
DEFINE_PER_CPU(u8, perf_event_pending);
DEFINE_PER_CPU(u8, irq_work_pending);
#define set_perf_event_pending_flag() __get_cpu_var(perf_event_pending) = 1
#define test_perf_event_pending() __get_cpu_var(perf_event_pending)
#define clear_perf_event_pending() __get_cpu_var(perf_event_pending) = 0
#define set_irq_work_pending_flag() __get_cpu_var(irq_work_pending) = 1
#define test_irq_work_pending() __get_cpu_var(irq_work_pending)
#define clear_irq_work_pending() __get_cpu_var(irq_work_pending) = 0
#endif /* 32 vs 64 bit */
void set_perf_event_pending(void)
void set_irq_work_pending(void)
{
preempt_disable();
set_perf_event_pending_flag();
set_irq_work_pending_flag();
set_dec(1);
preempt_enable();
}
#else /* CONFIG_PERF_EVENTS */
#else /* CONFIG_IRQ_WORK */
#define test_perf_event_pending() 0
#define clear_perf_event_pending()
#define test_irq_work_pending() 0
#define clear_irq_work_pending()
#endif /* CONFIG_PERF_EVENTS */
#endif /* CONFIG_IRQ_WORK */
/*
* For iSeries shared processors, we have to let the hypervisor
......@@ -587,9 +587,9 @@ void timer_interrupt(struct pt_regs * regs)
calculate_steal_time();
if (test_perf_event_pending()) {
clear_perf_event_pending();
perf_event_do_pending();
if (test_irq_work_pending()) {
clear_irq_work_pending();
irq_work_run();
}
#ifdef CONFIG_PPC_ISERIES
......
......@@ -95,6 +95,7 @@ config S390
select HAVE_KVM if 64BIT
select HAVE_ARCH_TRACEHOOK
select INIT_ALL_POSSIBLE
select HAVE_IRQ_WORK
select HAVE_PERF_EVENTS
select HAVE_KERNEL_GZIP
select HAVE_KERNEL_BZIP2
......
......@@ -12,10 +12,6 @@
#ifndef __ASM_HARDIRQ_H
#define __ASM_HARDIRQ_H
#include <linux/threads.h>
#include <linux/sched.h>
#include <linux/cache.h>
#include <linux/interrupt.h>
#include <asm/lowcore.h>
#define local_softirq_pending() (S390_lowcore.softirq_pending)
......
......@@ -4,7 +4,6 @@
* Copyright 2009 Martin Schwidefsky, IBM Corporation.
*/
static inline void set_perf_event_pending(void) {}
static inline void clear_perf_event_pending(void) {}
/* Empty, just to avoid compiling error */
#define PERF_EVENT_INDEX_OFFSET 0
......@@ -16,6 +16,7 @@ config SUPERH
select HAVE_ARCH_TRACEHOOK
select HAVE_DMA_API_DEBUG
select HAVE_DMA_ATTRS
select HAVE_IRQ_WORK
select HAVE_PERF_EVENTS
select PERF_USE_VMALLOC
select HAVE_KERNEL_GZIP
......@@ -249,6 +250,11 @@ config ARCH_SHMOBILE
select PM
select PM_RUNTIME
config CPU_HAS_PMU
depends on CPU_SH4 || CPU_SH4A
default y
bool
if SUPERH32
choice
......@@ -738,6 +744,14 @@ config GUSA_RB
LLSC, this should be more efficient than the other alternative of
disabling interrupts around the atomic sequence.
config HW_PERF_EVENTS
bool "Enable hardware performance counter support for perf events"
depends on PERF_EVENTS && CPU_HAS_PMU
default y
help
Enable hardware performance counter support for perf events. If
disabled, perf events will use software events only.
source "drivers/sh/Kconfig"
endmenu
......
......@@ -26,11 +26,4 @@ extern int register_sh_pmu(struct sh_pmu *);
extern int reserve_pmc_hardware(void);
extern void release_pmc_hardware(void);
static inline void set_perf_event_pending(void)
{
/* Nothing to see here, move along. */
}
#define PERF_EVENT_INDEX_OFFSET 0
#endif /* __ASM_SH_PERF_EVENT_H */
......@@ -14,11 +14,6 @@
#include <asm/unwinder.h>
#include <asm/ptrace.h>
static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip)
{
if (entry->nr < PERF_MAX_STACK_DEPTH)
entry->ip[entry->nr++] = ip;
}
static void callchain_warning(void *data, char *msg)
{
......@@ -39,7 +34,7 @@ static void callchain_address(void *data, unsigned long addr, int reliable)
struct perf_callchain_entry *entry = data;
if (reliable)
callchain_store(entry, addr);
perf_callchain_store(entry, addr);
}
static const struct stacktrace_ops callchain_ops = {
......@@ -49,47 +44,10 @@ static const struct stacktrace_ops callchain_ops = {
.address = callchain_address,
};
static void
perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
void
perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
{
callchain_store(entry, PERF_CONTEXT_KERNEL);
callchain_store(entry, regs->pc);
perf_callchain_store(entry, regs->pc);
unwind_stack(NULL, regs, NULL, &callchain_ops, entry);
}
static void
perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
{
int is_user;
if (!regs)
return;
is_user = user_mode(regs);
if (is_user && current->state != TASK_RUNNING)
return;
/*
* Only the kernel side is implemented for now.
*/
if (!is_user)
perf_callchain_kernel(regs, entry);
}
/*
* No need for separate IRQ and NMI entries.
*/
static DEFINE_PER_CPU(struct perf_callchain_entry, callchain);
struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
{
struct perf_callchain_entry *entry = &__get_cpu_var(callchain);
entry->nr = 0;
perf_do_callchain(regs, entry);
return entry;
}
......@@ -59,6 +59,24 @@ static inline int sh_pmu_initialized(void)
return !!sh_pmu;
}
const char *perf_pmu_name(void)
{
if (!sh_pmu)
return NULL;
return sh_pmu->name;
}
EXPORT_SYMBOL_GPL(perf_pmu_name);
int perf_num_counters(void)
{
if (!sh_pmu)
return 0;
return sh_pmu->num_events;
}
EXPORT_SYMBOL_GPL(perf_num_counters);
/*
* Release the PMU if this is the last perf_event.
*/
......@@ -206,50 +224,80 @@ static void sh_perf_event_update(struct perf_event *event,
local64_add(delta, &event->count);
}
static void sh_pmu_disable(struct perf_event *event)
static void sh_pmu_stop(struct perf_event *event, int flags)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
clear_bit(idx, cpuc->active_mask);
sh_pmu->disable(hwc, idx);
if (!(event->hw.state & PERF_HES_STOPPED)) {
sh_pmu->disable(hwc, idx);
cpuc->events[idx] = NULL;
event->hw.state |= PERF_HES_STOPPED;
}
if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) {
sh_perf_event_update(event, &event->hw, idx);
event->hw.state |= PERF_HES_UPTODATE;
}
}
static void sh_pmu_start(struct perf_event *event, int flags)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
if (WARN_ON_ONCE(idx == -1))
return;
if (flags & PERF_EF_RELOAD)
WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
barrier();
cpuc->events[idx] = event;
event->hw.state = 0;
sh_pmu->enable(hwc, idx);
}
sh_perf_event_update(event, &event->hw, idx);
static void sh_pmu_del(struct perf_event *event, int flags)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
cpuc->events[idx] = NULL;
clear_bit(idx, cpuc->used_mask);
sh_pmu_stop(event, PERF_EF_UPDATE);
__clear_bit(event->hw.idx, cpuc->used_mask);
perf_event_update_userpage(event);
}
static int sh_pmu_enable(struct perf_event *event)
static int sh_pmu_add(struct perf_event *event, int flags)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
int ret = -EAGAIN;
perf_pmu_disable(event->pmu);
if (test_and_set_bit(idx, cpuc->used_mask)) {
if (__test_and_set_bit(idx, cpuc->used_mask)) {
idx = find_first_zero_bit(cpuc->used_mask, sh_pmu->num_events);
if (idx == sh_pmu->num_events)
return -EAGAIN;
goto out;
set_bit(idx, cpuc->used_mask);
__set_bit(idx, cpuc->used_mask);
hwc->idx = idx;
}
sh_pmu->disable(hwc, idx);
cpuc->events[idx] = event;
set_bit(idx, cpuc->active_mask);
sh_pmu->enable(hwc, idx);
event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
if (flags & PERF_EF_START)
sh_pmu_start(event, PERF_EF_RELOAD);
perf_event_update_userpage(event);
return 0;
ret = 0;
out:
perf_pmu_enable(event->pmu);
return ret;
}
static void sh_pmu_read(struct perf_event *event)
......@@ -257,24 +305,56 @@ static void sh_pmu_read(struct perf_event *event)
sh_perf_event_update(event, &event->hw, event->hw.idx);
}
static const struct pmu pmu = {
.enable = sh_pmu_enable,
.disable = sh_pmu_disable,
.read = sh_pmu_read,
};
const struct pmu *hw_perf_event_init(struct perf_event *event)
static int sh_pmu_event_init(struct perf_event *event)
{
int err = __hw_perf_event_init(event);
int err;
switch (event->attr.type) {
case PERF_TYPE_RAW:
case PERF_TYPE_HW_CACHE:
case PERF_TYPE_HARDWARE:
err = __hw_perf_event_init(event);
break;
default:
return -ENOENT;
}
if (unlikely(err)) {
if (event->destroy)
event->destroy(event);
return ERR_PTR(err);
}
return &pmu;
return err;
}
static void sh_pmu_enable(struct pmu *pmu)
{
if (!sh_pmu_initialized())
return;
sh_pmu->enable_all();
}
static void sh_pmu_disable(struct pmu *pmu)
{
if (!sh_pmu_initialized())
return;
sh_pmu->disable_all();
}
static struct pmu pmu = {
.pmu_enable = sh_pmu_enable,
.pmu_disable = sh_pmu_disable,
.event_init = sh_pmu_event_init,
.add = sh_pmu_add,
.del = sh_pmu_del,
.start = sh_pmu_start,
.stop = sh_pmu_stop,
.read = sh_pmu_read,
};
static void sh_pmu_setup(int cpu)
{
struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
......@@ -299,32 +379,17 @@ sh_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
return NOTIFY_OK;
}
void hw_perf_enable(void)
{
if (!sh_pmu_initialized())
return;
sh_pmu->enable_all();
}
void hw_perf_disable(void)
{
if (!sh_pmu_initialized())
return;
sh_pmu->disable_all();
}
int __cpuinit register_sh_pmu(struct sh_pmu *pmu)
int __cpuinit register_sh_pmu(struct sh_pmu *_pmu)
{
if (sh_pmu)
return -EBUSY;
sh_pmu = pmu;
sh_pmu = _pmu;
pr_info("Performance Events: %s support registered\n", pmu->name);
pr_info("Performance Events: %s support registered\n", _pmu->name);
WARN_ON(pmu->num_events > MAX_HWEVENTS);
WARN_ON(_pmu->num_events > MAX_HWEVENTS);
perf_pmu_register(&pmu);
perf_cpu_notifier(sh_pmu_notifier);
return 0;
}
......@@ -6,4 +6,8 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
oprofilefs.o oprofile_stats.o \
timer_int.o )
ifeq ($(CONFIG_HW_PERF_EVENTS),y)
DRIVER_OBJS += $(addprefix ../../../drivers/oprofile/, oprofile_perf.o)
endif
oprofile-y := $(DRIVER_OBJS) common.o backtrace.o
......@@ -17,114 +17,45 @@
#include <linux/init.h>
#include <linux/errno.h>
#include <linux/smp.h>
#include <linux/perf_event.h>
#include <asm/processor.h>
#include "op_impl.h"
static struct op_sh_model *model;
static struct op_counter_config ctr[20];
#ifdef CONFIG_HW_PERF_EVENTS
extern void sh_backtrace(struct pt_regs * const regs, unsigned int depth);
static int op_sh_setup(void)
{
/* Pre-compute the values to stuff in the hardware registers. */
model->reg_setup(ctr);
/* Configure the registers on all cpus. */
on_each_cpu(model->cpu_setup, NULL, 1);
return 0;
}
static int op_sh_create_files(struct super_block *sb, struct dentry *root)
char *op_name_from_perf_id(void)
{
int i, ret = 0;
const char *pmu;
char buf[20];
int size;
for (i = 0; i < model->num_counters; i++) {
struct dentry *dir;
char buf[4];
pmu = perf_pmu_name();
if (!pmu)
return NULL;
snprintf(buf, sizeof(buf), "%d", i);
dir = oprofilefs_mkdir(sb, root, buf);
size = snprintf(buf, sizeof(buf), "sh/%s", pmu);
if (size > -1 && size < sizeof(buf))
return buf;
ret |= oprofilefs_create_ulong(sb, dir, "enabled", &ctr[i].enabled);
ret |= oprofilefs_create_ulong(sb, dir, "event", &ctr[i].event);
ret |= oprofilefs_create_ulong(sb, dir, "kernel", &ctr[i].kernel);
ret |= oprofilefs_create_ulong(sb, dir, "user", &ctr[i].user);
if (model->create_files)
ret |= model->create_files(sb, dir);
else
ret |= oprofilefs_create_ulong(sb, dir, "count", &ctr[i].count);
/* Dummy entries */
ret |= oprofilefs_create_ulong(sb, dir, "unit_mask", &ctr[i].unit_mask);
}
return ret;
return NULL;
}
static int op_sh_start(void)
int __init oprofile_arch_init(struct oprofile_operations *ops)
{
/* Enable performance monitoring for all counters. */
on_each_cpu(model->cpu_start, NULL, 1);
ops->backtrace = sh_backtrace;
return 0;
return oprofile_perf_init(ops);
}
static void op_sh_stop(void)
void __exit oprofile_arch_exit(void)
{
/* Disable performance monitoring for all counters. */
on_each_cpu(model->cpu_stop, NULL, 1);
oprofile_perf_exit();
}
#else
int __init oprofile_arch_init(struct oprofile_operations *ops)
{
struct op_sh_model *lmodel = NULL;
int ret;
/*
* Always assign the backtrace op. If the counter initialization
* fails, we fall back to the timer which will still make use of
* this.
*/
ops->backtrace = sh_backtrace;
/*
* XXX
*
* All of the SH7750/SH-4A counters have been converted to perf,
* this infrastructure hook is left for other users until they've
* had a chance to convert over, at which point all of this
* will be deleted.
*/
if (!lmodel)
return -ENODEV;
if (!(current_cpu_data.flags & CPU_HAS_PERF_COUNTER))
return -ENODEV;
ret = lmodel->init();
if (unlikely(ret != 0))
return ret;
model = lmodel;
ops->setup = op_sh_setup;
ops->create_files = op_sh_create_files;
ops->start = op_sh_start;
ops->stop = op_sh_stop;
ops->cpu_type = lmodel->cpu_type;
printk(KERN_INFO "oprofile: using %s performance monitoring.\n",
lmodel->cpu_type);
return 0;
}
void oprofile_arch_exit(void)
{
if (model && model->exit)
model->exit();
pr_info("oprofile: hardware counters not available\n");
return -ENODEV;
}
void __exit oprofile_arch_exit(void) {}
#endif /* CONFIG_HW_PERF_EVENTS */
#ifndef __OP_IMPL_H
#define __OP_IMPL_H
/* Per-counter configuration as set via oprofilefs. */
struct op_counter_config {
unsigned long enabled;
unsigned long event;
unsigned long count;
/* Dummy values for userspace tool compliance */
unsigned long kernel;
unsigned long user;
unsigned long unit_mask;
};
/* Per-architecture configury and hooks. */
struct op_sh_model {
void (*reg_setup)(struct op_counter_config *);
int (*create_files)(struct super_block *sb, struct dentry *dir);
void (*cpu_setup)(void *dummy);
int (*init)(void);
void (*exit)(void);
void (*cpu_start)(void *args);
void (*cpu_stop)(void *args);
char *cpu_type;
unsigned char num_counters;
};
/* arch/sh/oprofile/common.c */
extern void sh_backtrace(struct pt_regs * const regs, unsigned int depth);
#endif /* __OP_IMPL_H */
......@@ -26,10 +26,12 @@ config SPARC
select ARCH_WANT_OPTIONAL_GPIOLIB
select RTC_CLASS
select RTC_DRV_M48T59
select HAVE_IRQ_WORK
select HAVE_PERF_EVENTS
select PERF_USE_VMALLOC
select HAVE_DMA_ATTRS
select HAVE_DMA_API_DEBUG
select HAVE_ARCH_JUMP_LABEL
config SPARC32
def_bool !64BIT
......@@ -53,6 +55,7 @@ config SPARC64
select RTC_DRV_BQ4802
select RTC_DRV_SUN4V
select RTC_DRV_STARFIRE
select HAVE_IRQ_WORK
select HAVE_PERF_EVENTS
select PERF_USE_VMALLOC
......
#ifndef _ASM_SPARC_JUMP_LABEL_H
#define _ASM_SPARC_JUMP_LABEL_H
#ifdef __KERNEL__
#include <linux/types.h>
#include <asm/system.h>
#define JUMP_LABEL_NOP_SIZE 4
#define JUMP_LABEL(key, label) \
do { \
asm goto("1:\n\t" \
"nop\n\t" \
"nop\n\t" \
".pushsection __jump_table, \"a\"\n\t"\
".word 1b, %l[" #label "], %c0\n\t" \
".popsection \n\t" \
: : "i" (key) : : label);\
} while (0)
#endif /* __KERNEL__ */
typedef u32 jump_label_t;
struct jump_entry {
jump_label_t code;
jump_label_t target;
jump_label_t key;
};
#endif
#ifndef __ASM_SPARC_PERF_EVENT_H
#define __ASM_SPARC_PERF_EVENT_H
extern void set_perf_event_pending(void);
#define PERF_EVENT_INDEX_OFFSET 0
#ifdef CONFIG_PERF_EVENTS
#include <asm/ptrace.h>
......
......@@ -119,3 +119,5 @@ obj-$(CONFIG_COMPAT) += $(audit--y)
pc--$(CONFIG_PERF_EVENTS) := perf_event.o
obj-$(CONFIG_SPARC64) += $(pc--y)
obj-$(CONFIG_SPARC64) += jump_label.o
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/mutex.h>
#include <linux/cpu.h>
#include <linux/jump_label.h>
#include <linux/memory.h>
#ifdef HAVE_JUMP_LABEL
void arch_jump_label_transform(struct jump_entry *entry,
enum jump_label_type type)
{
u32 val;
u32 *insn = (u32 *) (unsigned long) entry->code;
if (type == JUMP_LABEL_ENABLE) {
s32 off = (s32)entry->target - (s32)entry->code;
#ifdef CONFIG_SPARC64
/* ba,pt %xcc, . + (off << 2) */
val = 0x10680000 | ((u32) off >> 2);
#else
/* ba . + (off << 2) */
val = 0x10800000 | ((u32) off >> 2);
#endif
} else {
val = 0x01000000;
}
get_online_cpus();
mutex_lock(&text_mutex);
*insn = val;
flushi(insn);
mutex_unlock(&text_mutex);
put_online_cpus();
}
void arch_jump_label_text_poke_early(jump_label_t addr)
{
u32 *insn_p = (u32 *) (unsigned long) addr;
*insn_p = 0x01000000;
flushi(insn_p);
}
#endif
......@@ -18,6 +18,9 @@
#include <asm/spitfire.h>
#ifdef CONFIG_SPARC64
#include <linux/jump_label.h>
static void *module_map(unsigned long size)
{
struct vm_struct *area;
......@@ -227,6 +230,9 @@ int module_finalize(const Elf_Ehdr *hdr,
const Elf_Shdr *sechdrs,
struct module *me)
{
/* make jump label nops */
jump_label_apply_nops(me);
/* Cheetah's I-cache is fully coherent. */
if (tlb_type == spitfire) {
unsigned long va;
......
......@@ -7,7 +7,7 @@
#include <linux/init.h>
#include <linux/irq.h>
#include <linux/perf_event.h>
#include <linux/irq_work.h>
#include <linux/ftrace.h>
#include <asm/pil.h>
......@@ -43,14 +43,14 @@ void __irq_entry deferred_pcr_work_irq(int irq, struct pt_regs *regs)
old_regs = set_irq_regs(regs);
irq_enter();
#ifdef CONFIG_PERF_EVENTS
perf_event_do_pending();
#ifdef CONFIG_IRQ_WORK
irq_work_run();
#endif
irq_exit();
set_irq_regs(old_regs);
}
void set_perf_event_pending(void)
void arch_irq_work_raise(void)
{
set_softint(1 << PIL_DEFERRED_PCR_WORK);
}
......
......@@ -658,13 +658,16 @@ static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr)
enc = perf_event_get_enc(cpuc->events[i]);
pcr &= ~mask_for_index(idx);
pcr |= event_encoding(enc, idx);
if (hwc->state & PERF_HES_STOPPED)
pcr |= nop_for_index(idx);
else
pcr |= event_encoding(enc, idx);
}
out:
return pcr;
}
void hw_perf_enable(void)
static void sparc_pmu_enable(struct pmu *pmu)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
u64 pcr;
......@@ -691,7 +694,7 @@ void hw_perf_enable(void)
pcr_ops->write(cpuc->pcr);
}
void hw_perf_disable(void)
static void sparc_pmu_disable(struct pmu *pmu)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
u64 val;
......@@ -710,19 +713,65 @@ void hw_perf_disable(void)
pcr_ops->write(cpuc->pcr);
}
static void sparc_pmu_disable(struct perf_event *event)
static int active_event_index(struct cpu_hw_events *cpuc,
struct perf_event *event)
{
int i;
for (i = 0; i < cpuc->n_events; i++) {
if (cpuc->event[i] == event)
break;
}
BUG_ON(i == cpuc->n_events);
return cpuc->current_idx[i];
}
static void sparc_pmu_start(struct perf_event *event, int flags)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
int idx = active_event_index(cpuc, event);
if (flags & PERF_EF_RELOAD) {
WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
sparc_perf_event_set_period(event, &event->hw, idx);
}
event->hw.state = 0;
sparc_pmu_enable_event(cpuc, &event->hw, idx);
}
static void sparc_pmu_stop(struct perf_event *event, int flags)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
int idx = active_event_index(cpuc, event);
if (!(event->hw.state & PERF_HES_STOPPED)) {
sparc_pmu_disable_event(cpuc, &event->hw, idx);
event->hw.state |= PERF_HES_STOPPED;
}
if (!(event->hw.state & PERF_HES_UPTODATE) && (flags & PERF_EF_UPDATE)) {
sparc_perf_event_update(event, &event->hw, idx);
event->hw.state |= PERF_HES_UPTODATE;
}
}
static void sparc_pmu_del(struct perf_event *event, int _flags)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
unsigned long flags;
int i;
local_irq_save(flags);
perf_disable();
perf_pmu_disable(event->pmu);
for (i = 0; i < cpuc->n_events; i++) {
if (event == cpuc->event[i]) {
int idx = cpuc->current_idx[i];
/* Absorb the final count and turn off the
* event.
*/
sparc_pmu_stop(event, PERF_EF_UPDATE);
/* Shift remaining entries down into
* the existing slot.
......@@ -734,13 +783,6 @@ static void sparc_pmu_disable(struct perf_event *event)
cpuc->current_idx[i];
}
/* Absorb the final count and turn off the
* event.
*/
sparc_pmu_disable_event(cpuc, hwc, idx);
barrier();
sparc_perf_event_update(event, hwc, idx);
perf_event_update_userpage(event);
cpuc->n_events--;
......@@ -748,23 +790,10 @@ static void sparc_pmu_disable(struct perf_event *event)
}
}
perf_enable();
perf_pmu_enable(event->pmu);
local_irq_restore(flags);
}
static int active_event_index(struct cpu_hw_events *cpuc,
struct perf_event *event)
{
int i;
for (i = 0; i < cpuc->n_events; i++) {
if (cpuc->event[i] == event)
break;
}
BUG_ON(i == cpuc->n_events);
return cpuc->current_idx[i];
}
static void sparc_pmu_read(struct perf_event *event)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
......@@ -774,15 +803,6 @@ static void sparc_pmu_read(struct perf_event *event)
sparc_perf_event_update(event, hwc, idx);
}
static void sparc_pmu_unthrottle(struct perf_event *event)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
int idx = active_event_index(cpuc, event);
struct hw_perf_event *hwc = &event->hw;
sparc_pmu_enable_event(cpuc, hwc, idx);
}
static atomic_t active_events = ATOMIC_INIT(0);
static DEFINE_MUTEX(pmc_grab_mutex);
......@@ -877,7 +897,7 @@ static int sparc_check_constraints(struct perf_event **evts,
if (!n_ev)
return 0;
if (n_ev > perf_max_events)
if (n_ev > MAX_HWEVENTS)
return -1;
msk0 = perf_event_get_msk(events[0]);
......@@ -984,23 +1004,27 @@ static int collect_events(struct perf_event *group, int max_count,
return n;
}
static int sparc_pmu_enable(struct perf_event *event)
static int sparc_pmu_add(struct perf_event *event, int ef_flags)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
int n0, ret = -EAGAIN;
unsigned long flags;
local_irq_save(flags);
perf_disable();
perf_pmu_disable(event->pmu);
n0 = cpuc->n_events;
if (n0 >= perf_max_events)
if (n0 >= MAX_HWEVENTS)
goto out;
cpuc->event[n0] = event;
cpuc->events[n0] = event->hw.event_base;
cpuc->current_idx[n0] = PIC_NO_INDEX;
event->hw.state = PERF_HES_UPTODATE;
if (!(ef_flags & PERF_EF_START))
event->hw.state |= PERF_HES_STOPPED;
/*
* If group events scheduling transaction was started,
* skip the schedulability test here, it will be peformed
......@@ -1020,12 +1044,12 @@ static int sparc_pmu_enable(struct perf_event *event)
ret = 0;
out:
perf_enable();
perf_pmu_enable(event->pmu);
local_irq_restore(flags);
return ret;
}
static int __hw_perf_event_init(struct perf_event *event)
static int sparc_pmu_event_init(struct perf_event *event)
{
struct perf_event_attr *attr = &event->attr;
struct perf_event *evts[MAX_HWEVENTS];
......@@ -1038,22 +1062,33 @@ static int __hw_perf_event_init(struct perf_event *event)
if (atomic_read(&nmi_active) < 0)
return -ENODEV;
pmap = NULL;
if (attr->type == PERF_TYPE_HARDWARE) {
switch (attr->type) {
case PERF_TYPE_HARDWARE:
if (attr->config >= sparc_pmu->max_events)
return -EINVAL;
pmap = sparc_pmu->event_map(attr->config);
} else if (attr->type == PERF_TYPE_HW_CACHE) {
break;
case PERF_TYPE_HW_CACHE:
pmap = sparc_map_cache_event(attr->config);
if (IS_ERR(pmap))
return PTR_ERR(pmap);
} else if (attr->type != PERF_TYPE_RAW)
return -EOPNOTSUPP;
break;
case PERF_TYPE_RAW:
pmap = NULL;
break;
default:
return -ENOENT;
}
if (pmap) {
hwc->event_base = perf_event_encode(pmap);
} else {
/* User gives us "(encoding << 16) | pic_mask" for
/*
* User gives us "(encoding << 16) | pic_mask" for
* PERF_TYPE_RAW events.
*/
hwc->event_base = attr->config;
......@@ -1071,7 +1106,7 @@ static int __hw_perf_event_init(struct perf_event *event)
n = 0;
if (event->group_leader != event) {
n = collect_events(event->group_leader,
perf_max_events - 1,
MAX_HWEVENTS - 1,
evts, events, current_idx_dmy);
if (n < 0)
return -EINVAL;
......@@ -1107,10 +1142,11 @@ static int __hw_perf_event_init(struct perf_event *event)
* Set the flag to make pmu::enable() not perform the
* schedulability test, it will be performed at commit time
*/
static void sparc_pmu_start_txn(const struct pmu *pmu)
static void sparc_pmu_start_txn(struct pmu *pmu)
{
struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
perf_pmu_disable(pmu);
cpuhw->group_flag |= PERF_EVENT_TXN;
}
......@@ -1119,11 +1155,12 @@ static void sparc_pmu_start_txn(const struct pmu *pmu)
* Clear the flag and pmu::enable() will perform the
* schedulability test.
*/
static void sparc_pmu_cancel_txn(const struct pmu *pmu)
static void sparc_pmu_cancel_txn(struct pmu *pmu)
{
struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
cpuhw->group_flag &= ~PERF_EVENT_TXN;
perf_pmu_enable(pmu);
}
/*
......@@ -1131,7 +1168,7 @@ static void sparc_pmu_cancel_txn(const struct pmu *pmu)
* Perform the group schedulability test as a whole
* Return 0 if success
*/
static int sparc_pmu_commit_txn(const struct pmu *pmu)
static int sparc_pmu_commit_txn(struct pmu *pmu)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
int n;
......@@ -1147,28 +1184,24 @@ static int sparc_pmu_commit_txn(const struct pmu *pmu)
return -EAGAIN;
cpuc->group_flag &= ~PERF_EVENT_TXN;
perf_pmu_enable(pmu);
return 0;
}
static const struct pmu pmu = {
.enable = sparc_pmu_enable,
.disable = sparc_pmu_disable,
static struct pmu pmu = {
.pmu_enable = sparc_pmu_enable,
.pmu_disable = sparc_pmu_disable,
.event_init = sparc_pmu_event_init,
.add = sparc_pmu_add,
.del = sparc_pmu_del,
.start = sparc_pmu_start,
.stop = sparc_pmu_stop,
.read = sparc_pmu_read,
.unthrottle = sparc_pmu_unthrottle,
.start_txn = sparc_pmu_start_txn,
.cancel_txn = sparc_pmu_cancel_txn,
.commit_txn = sparc_pmu_commit_txn,
};
const struct pmu *hw_perf_event_init(struct perf_event *event)
{
int err = __hw_perf_event_init(event);
if (err)
return ERR_PTR(err);
return &pmu;
}
void perf_event_print_debug(void)
{
unsigned long flags;
......@@ -1244,7 +1277,7 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
continue;
if (perf_event_overflow(event, 1, &data, regs))
sparc_pmu_disable_event(cpuc, hwc, idx);
sparc_pmu_stop(event, 0);
}
return NOTIFY_STOP;
......@@ -1285,28 +1318,21 @@ void __init init_hw_perf_events(void)
pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type);
/* All sparc64 PMUs currently have 2 events. */
perf_max_events = 2;
perf_pmu_register(&pmu);
register_die_notifier(&perf_event_nmi_notifier);
}
static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip)
{
if (entry->nr < PERF_MAX_STACK_DEPTH)
entry->ip[entry->nr++] = ip;
}
static void perf_callchain_kernel(struct pt_regs *regs,
struct perf_callchain_entry *entry)
void perf_callchain_kernel(struct perf_callchain_entry *entry,
struct pt_regs *regs)
{
unsigned long ksp, fp;
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
int graph = 0;
#endif
callchain_store(entry, PERF_CONTEXT_KERNEL);
callchain_store(entry, regs->tpc);
stack_trace_flush();
perf_callchain_store(entry, regs->tpc);
ksp = regs->u_regs[UREG_I6];
fp = ksp + STACK_BIAS;
......@@ -1330,13 +1356,13 @@ static void perf_callchain_kernel(struct pt_regs *regs,
pc = sf->callers_pc;
fp = (unsigned long)sf->fp + STACK_BIAS;
}
callchain_store(entry, pc);
perf_callchain_store(entry, pc);
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
if ((pc + 8UL) == (unsigned long) &return_to_handler) {
int index = current->curr_ret_stack;
if (current->ret_stack && index >= graph) {
pc = current->ret_stack[index - graph].ret;
callchain_store(entry, pc);
perf_callchain_store(entry, pc);
graph++;
}
}
......@@ -1344,13 +1370,12 @@ static void perf_callchain_kernel(struct pt_regs *regs,
} while (entry->nr < PERF_MAX_STACK_DEPTH);
}
static void perf_callchain_user_64(struct pt_regs *regs,
struct perf_callchain_entry *entry)
static void perf_callchain_user_64(struct perf_callchain_entry *entry,
struct pt_regs *regs)
{
unsigned long ufp;
callchain_store(entry, PERF_CONTEXT_USER);
callchain_store(entry, regs->tpc);
perf_callchain_store(entry, regs->tpc);
ufp = regs->u_regs[UREG_I6] + STACK_BIAS;
do {
......@@ -1363,17 +1388,16 @@ static void perf_callchain_user_64(struct pt_regs *regs,
pc = sf.callers_pc;
ufp = (unsigned long)sf.fp + STACK_BIAS;
callchain_store(entry, pc);
perf_callchain_store(entry, pc);
} while (entry->nr < PERF_MAX_STACK_DEPTH);
}
static void perf_callchain_user_32(struct pt_regs *regs,
struct perf_callchain_entry *entry)
static void perf_callchain_user_32(struct perf_callchain_entry *entry,
struct pt_regs *regs)
{
unsigned long ufp;
callchain_store(entry, PERF_CONTEXT_USER);
callchain_store(entry, regs->tpc);
perf_callchain_store(entry, regs->tpc);
ufp = regs->u_regs[UREG_I6] & 0xffffffffUL;
do {
......@@ -1386,34 +1410,16 @@ static void perf_callchain_user_32(struct pt_regs *regs,
pc = sf.callers_pc;
ufp = (unsigned long)sf.fp;
callchain_store(entry, pc);
perf_callchain_store(entry, pc);
} while (entry->nr < PERF_MAX_STACK_DEPTH);
}
/* Like powerpc we can't get PMU interrupts within the PMU handler,
* so no need for separate NMI and IRQ chains as on x86.
*/
static DEFINE_PER_CPU(struct perf_callchain_entry, callchain);
struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
void
perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
{
struct perf_callchain_entry *entry = &__get_cpu_var(callchain);
entry->nr = 0;
if (!user_mode(regs)) {
stack_trace_flush();
perf_callchain_kernel(regs, entry);
if (current->mm)
regs = task_pt_regs(current);
else
regs = NULL;
}
if (regs) {
flushw_user();
if (test_thread_flag(TIF_32BIT))
perf_callchain_user_32(regs, entry);
else
perf_callchain_user_64(regs, entry);
}
return entry;
flushw_user();
if (test_thread_flag(TIF_32BIT))
perf_callchain_user_32(entry, regs);
else
perf_callchain_user_64(entry, regs);
}
......@@ -25,6 +25,7 @@ config X86
select HAVE_IDE
select HAVE_OPROFILE
select HAVE_PERF_EVENTS if (!M386 && !M486)
select HAVE_IRQ_WORK
select HAVE_IOREMAP_PROT
select HAVE_KPROBES
select ARCH_WANT_OPTIONAL_GPIOLIB
......@@ -33,6 +34,7 @@ config X86
select HAVE_KRETPROBES
select HAVE_OPTPROBES
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_C_RECORDMCOUNT
select HAVE_DYNAMIC_FTRACE
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_GRAPH_TRACER
......@@ -59,6 +61,8 @@ config X86
select ANON_INODES
select HAVE_ARCH_KMEMCHECK
select HAVE_USER_RETURN_NOTIFIER
select HAVE_ARCH_JUMP_LABEL
select HAVE_TEXT_POKE_SMP
config INSTRUCTION_DECODER
def_bool (KPROBES || PERF_EVENTS)
......@@ -2125,6 +2129,10 @@ config HAVE_ATOMIC_IOMAP
def_bool y
depends on X86_32
config HAVE_TEXT_POKE_SMP
bool
select STOP_MACHINE if SMP
source "net/Kconfig"
source "drivers/Kconfig"
......
......@@ -4,6 +4,7 @@
#include <linux/types.h>
#include <linux/stddef.h>
#include <linux/stringify.h>
#include <linux/jump_label.h>
#include <asm/asm.h>
/*
......@@ -160,6 +161,8 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
#define __parainstructions_end NULL
#endif
extern void *text_poke_early(void *addr, const void *opcode, size_t len);
/*
* Clear and restore the kernel write-protection flag on the local CPU.
* Allows the kernel to edit read-only pages.
......@@ -180,4 +183,12 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
extern void *text_poke(void *addr, const void *opcode, size_t len);
extern void *text_poke_smp(void *addr, const void *opcode, size_t len);
#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
#define IDEAL_NOP_SIZE_5 5
extern unsigned char ideal_nop5[IDEAL_NOP_SIZE_5];
extern void arch_init_ideal_nop5(void);
#else
static inline void arch_init_ideal_nop5(void) {}
#endif
#endif /* _ASM_X86_ALTERNATIVE_H */
......@@ -49,8 +49,8 @@ BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR)
BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
#ifdef CONFIG_PERF_EVENTS
BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR)
#ifdef CONFIG_IRQ_WORK
BUILD_INTERRUPT(irq_work_interrupt, IRQ_WORK_VECTOR)
#endif
#ifdef CONFIG_X86_THERMAL_VECTOR
......
......@@ -14,7 +14,7 @@ typedef struct {
#endif
unsigned int x86_platform_ipis; /* arch dependent */
unsigned int apic_perf_irqs;
unsigned int apic_pending_irqs;
unsigned int apic_irq_work_irqs;
#ifdef CONFIG_SMP
unsigned int irq_resched_count;
unsigned int irq_call_count;
......
......@@ -29,7 +29,7 @@
extern void apic_timer_interrupt(void);
extern void x86_platform_ipi(void);
extern void error_interrupt(void);
extern void perf_pending_interrupt(void);
extern void irq_work_interrupt(void);
extern void spurious_interrupt(void);
extern void thermal_interrupt(void);
......
......@@ -114,9 +114,9 @@
#define X86_PLATFORM_IPI_VECTOR 0xed
/*
* Performance monitoring pending work vector:
* IRQ work vector:
*/
#define LOCAL_PENDING_VECTOR 0xec
#define IRQ_WORK_VECTOR 0xec
#define UV_BAU_MESSAGE 0xea
......
#ifndef _ASM_X86_JUMP_LABEL_H
#define _ASM_X86_JUMP_LABEL_H
#ifdef __KERNEL__
#include <linux/types.h>
#include <asm/nops.h>
#define JUMP_LABEL_NOP_SIZE 5
# define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t"
# define JUMP_LABEL(key, label) \
do { \
asm goto("1:" \
JUMP_LABEL_INITIAL_NOP \
".pushsection __jump_table, \"a\" \n\t"\
_ASM_PTR "1b, %l[" #label "], %c0 \n\t" \
".popsection \n\t" \
: : "i" (key) : : label); \
} while (0)
#endif /* __KERNEL__ */
#ifdef CONFIG_X86_64
typedef u64 jump_label_t;
#else
typedef u32 jump_label_t;
#endif
struct jump_entry {
jump_label_t code;
jump_label_t target;
jump_label_t key;
};
#endif
......@@ -36,19 +36,6 @@
#define P4_ESCR_EMASK(v) ((v) << P4_ESCR_EVENTMASK_SHIFT)
#define P4_ESCR_TAG(v) ((v) << P4_ESCR_TAG_SHIFT)
/* Non HT mask */
#define P4_ESCR_MASK \
(P4_ESCR_EVENT_MASK | \
P4_ESCR_EVENTMASK_MASK | \
P4_ESCR_TAG_MASK | \
P4_ESCR_TAG_ENABLE | \
P4_ESCR_T0_OS | \
P4_ESCR_T0_USR)
/* HT mask */
#define P4_ESCR_MASK_HT \
(P4_ESCR_MASK | P4_ESCR_T1_OS | P4_ESCR_T1_USR)
#define P4_CCCR_OVF 0x80000000U
#define P4_CCCR_CASCADE 0x40000000U
#define P4_CCCR_OVF_PMI_T0 0x04000000U
......@@ -70,23 +57,6 @@
#define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT)
#define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT)
/* Non HT mask */
#define P4_CCCR_MASK \
(P4_CCCR_OVF | \
P4_CCCR_CASCADE | \
P4_CCCR_OVF_PMI_T0 | \
P4_CCCR_FORCE_OVF | \
P4_CCCR_EDGE | \
P4_CCCR_THRESHOLD_MASK | \
P4_CCCR_COMPLEMENT | \
P4_CCCR_COMPARE | \
P4_CCCR_ESCR_SELECT_MASK | \
P4_CCCR_ENABLE)
/* HT mask */
#define P4_CCCR_MASK_HT \
(P4_CCCR_MASK | P4_CCCR_OVF_PMI_T1 | P4_CCCR_THREAD_ANY)
#define P4_GEN_ESCR_EMASK(class, name, bit) \
class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT)
#define P4_ESCR_EMASK_BIT(class, name) class##__##name
......@@ -127,6 +97,28 @@
#define P4_CONFIG_HT_SHIFT 63
#define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT)
/*
* The bits we allow to pass for RAW events
*/
#define P4_CONFIG_MASK_ESCR \
P4_ESCR_EVENT_MASK | \
P4_ESCR_EVENTMASK_MASK | \
P4_ESCR_TAG_MASK | \
P4_ESCR_TAG_ENABLE
#define P4_CONFIG_MASK_CCCR \
P4_CCCR_EDGE | \
P4_CCCR_THRESHOLD_MASK | \
P4_CCCR_COMPLEMENT | \
P4_CCCR_COMPARE | \
P4_CCCR_THREAD_ANY | \
P4_CCCR_RESERVED
/* some dangerous bits are reserved for kernel internals */
#define P4_CONFIG_MASK \
(p4_config_pack_escr(P4_CONFIG_MASK_ESCR)) | \
(p4_config_pack_cccr(P4_CONFIG_MASK_CCCR))
static inline bool p4_is_event_cascaded(u64 config)
{
u32 cccr = p4_config_unpack_cccr(config);
......
......@@ -34,7 +34,8 @@ GCOV_PROFILE_paravirt.o := n
obj-y := process_$(BITS).o signal.o entry_$(BITS).o
obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
obj-y += time.o ioport.o ldt.o dumpstack.o
obj-y += setup.o x86_init.o i8259.o irqinit.o
obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o
obj-$(CONFIG_IRQ_WORK) += irq_work.o
obj-$(CONFIG_X86_VISWS) += visws_quirks.o
obj-$(CONFIG_X86_32) += probe_roms_32.o
obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
......
......@@ -195,7 +195,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len)
extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
extern s32 __smp_locks[], __smp_locks_end[];
static void *text_poke_early(void *addr, const void *opcode, size_t len);
void *text_poke_early(void *addr, const void *opcode, size_t len);
/* Replace instructions with better alternatives for this CPU type.
This runs before SMP is initialized to avoid SMP problems with
......@@ -522,7 +522,7 @@ void __init alternative_instructions(void)
* instructions. And on the local CPU you need to be protected again NMI or MCE
* handlers seeing an inconsistent instruction while you patch.
*/
static void *__init_or_module text_poke_early(void *addr, const void *opcode,
void *__init_or_module text_poke_early(void *addr, const void *opcode,
size_t len)
{
unsigned long flags;
......@@ -637,7 +637,72 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
tpp.len = len;
atomic_set(&stop_machine_first, 1);
wrote_text = 0;
stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
/* Use __stop_machine() because the caller already got online_cpus. */
__stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
return addr;
}
#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
unsigned char ideal_nop5[IDEAL_NOP_SIZE_5];
void __init arch_init_ideal_nop5(void)
{
extern const unsigned char ftrace_test_p6nop[];
extern const unsigned char ftrace_test_nop5[];
extern const unsigned char ftrace_test_jmp[];
int faulted = 0;
/*
* There is no good nop for all x86 archs.
* We will default to using the P6_NOP5, but first we
* will test to make sure that the nop will actually
* work on this CPU. If it faults, we will then
* go to a lesser efficient 5 byte nop. If that fails
* we then just use a jmp as our nop. This isn't the most
* efficient nop, but we can not use a multi part nop
* since we would then risk being preempted in the middle
* of that nop, and if we enabled tracing then, it might
* cause a system crash.
*
* TODO: check the cpuid to determine the best nop.
*/
asm volatile (
"ftrace_test_jmp:"
"jmp ftrace_test_p6nop\n"
"nop\n"
"nop\n"
"nop\n" /* 2 byte jmp + 3 bytes */
"ftrace_test_p6nop:"
P6_NOP5
"jmp 1f\n"
"ftrace_test_nop5:"
".byte 0x66,0x66,0x66,0x66,0x90\n"
"1:"
".section .fixup, \"ax\"\n"
"2: movl $1, %0\n"
" jmp ftrace_test_nop5\n"
"3: movl $2, %0\n"
" jmp 1b\n"
".previous\n"
_ASM_EXTABLE(ftrace_test_p6nop, 2b)
_ASM_EXTABLE(ftrace_test_nop5, 3b)
: "=r"(faulted) : "0" (faulted));
switch (faulted) {
case 0:
pr_info("converting mcount calls to 0f 1f 44 00 00\n");
memcpy(ideal_nop5, ftrace_test_p6nop, IDEAL_NOP_SIZE_5);
break;
case 1:
pr_info("converting mcount calls to 66 66 66 66 90\n");
memcpy(ideal_nop5, ftrace_test_nop5, IDEAL_NOP_SIZE_5);
break;
case 2:
pr_info("converting mcount calls to jmp . + 5\n");
memcpy(ideal_nop5, ftrace_test_jmp, IDEAL_NOP_SIZE_5);
break;
}
}
#endif
......@@ -531,7 +531,7 @@ static int x86_pmu_hw_config(struct perf_event *event)
/*
* Setup the hardware configuration for a given attr_type
*/
static int __hw_perf_event_init(struct perf_event *event)
static int __x86_pmu_event_init(struct perf_event *event)
{
int err;
......@@ -584,7 +584,7 @@ static void x86_pmu_disable_all(void)
}
}
void hw_perf_disable(void)
static void x86_pmu_disable(struct pmu *pmu)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
......@@ -619,7 +619,7 @@ static void x86_pmu_enable_all(int added)
}
}
static const struct pmu pmu;
static struct pmu pmu;
static inline int is_x86_event(struct perf_event *event)
{
......@@ -801,10 +801,10 @@ static inline int match_prev_assignment(struct hw_perf_event *hwc,
hwc->last_tag == cpuc->tags[i];
}
static int x86_pmu_start(struct perf_event *event);
static void x86_pmu_stop(struct perf_event *event);
static void x86_pmu_start(struct perf_event *event, int flags);
static void x86_pmu_stop(struct perf_event *event, int flags);
void hw_perf_enable(void)
static void x86_pmu_enable(struct pmu *pmu)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct perf_event *event;
......@@ -840,7 +840,14 @@ void hw_perf_enable(void)
match_prev_assignment(hwc, cpuc, i))
continue;
x86_pmu_stop(event);
/*
* Ensure we don't accidentally enable a stopped
* counter simply because we rescheduled.
*/
if (hwc->state & PERF_HES_STOPPED)
hwc->state |= PERF_HES_ARCH;
x86_pmu_stop(event, PERF_EF_UPDATE);
}
for (i = 0; i < cpuc->n_events; i++) {
......@@ -852,7 +859,10 @@ void hw_perf_enable(void)
else if (i < n_running)
continue;
x86_pmu_start(event);
if (hwc->state & PERF_HES_ARCH)
continue;
x86_pmu_start(event, PERF_EF_RELOAD);
}
cpuc->n_added = 0;
perf_events_lapic_init();
......@@ -953,15 +963,12 @@ static void x86_pmu_enable_event(struct perf_event *event)
}
/*
* activate a single event
* Add a single event to the PMU.
*
* The event is added to the group of enabled events
* but only if it can be scehduled with existing events.
*
* Called with PMU disabled. If successful and return value 1,
* then guaranteed to call perf_enable() and hw_perf_enable()
*/
static int x86_pmu_enable(struct perf_event *event)
static int x86_pmu_add(struct perf_event *event, int flags)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc;
......@@ -970,58 +977,67 @@ static int x86_pmu_enable(struct perf_event *event)
hwc = &event->hw;
perf_pmu_disable(event->pmu);
n0 = cpuc->n_events;
n = collect_events(cpuc, event, false);
if (n < 0)
return n;
ret = n = collect_events(cpuc, event, false);
if (ret < 0)
goto out;
hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
if (!(flags & PERF_EF_START))
hwc->state |= PERF_HES_ARCH;
/*
* If group events scheduling transaction was started,
* skip the schedulability test here, it will be peformed
* at commit time(->commit_txn) as a whole
* at commit time (->commit_txn) as a whole
*/
if (cpuc->group_flag & PERF_EVENT_TXN)
goto out;
goto done_collect;
ret = x86_pmu.schedule_events(cpuc, n, assign);
if (ret)
return ret;
goto out;
/*
* copy new assignment, now we know it is possible
* will be used by hw_perf_enable()
*/
memcpy(cpuc->assign, assign, n*sizeof(int));
out:
done_collect:
cpuc->n_events = n;
cpuc->n_added += n - n0;
cpuc->n_txn += n - n0;
return 0;
ret = 0;
out:
perf_pmu_enable(event->pmu);
return ret;
}
static int x86_pmu_start(struct perf_event *event)
static void x86_pmu_start(struct perf_event *event, int flags)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
int idx = event->hw.idx;
if (idx == -1)
return -EAGAIN;
if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
return;
if (WARN_ON_ONCE(idx == -1))
return;
if (flags & PERF_EF_RELOAD) {
WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
x86_perf_event_set_period(event);
}
event->hw.state = 0;
x86_perf_event_set_period(event);
cpuc->events[idx] = event;
__set_bit(idx, cpuc->active_mask);
__set_bit(idx, cpuc->running);
x86_pmu.enable(event);
perf_event_update_userpage(event);
return 0;
}
static void x86_pmu_unthrottle(struct perf_event *event)
{
int ret = x86_pmu_start(event);
WARN_ON_ONCE(ret);
}
void perf_event_print_debug(void)
......@@ -1078,27 +1094,29 @@ void perf_event_print_debug(void)
local_irq_restore(flags);
}
static void x86_pmu_stop(struct perf_event *event)
static void x86_pmu_stop(struct perf_event *event, int flags)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
if (!__test_and_clear_bit(idx, cpuc->active_mask))
return;
x86_pmu.disable(event);
/*
* Drain the remaining delta count out of a event
* that we are disabling:
*/
x86_perf_event_update(event);
if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) {
x86_pmu.disable(event);
cpuc->events[hwc->idx] = NULL;
WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
hwc->state |= PERF_HES_STOPPED;
}
cpuc->events[idx] = NULL;
if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
/*
* Drain the remaining delta count out of a event
* that we are disabling:
*/
x86_perf_event_update(event);
hwc->state |= PERF_HES_UPTODATE;
}
}
static void x86_pmu_disable(struct perf_event *event)
static void x86_pmu_del(struct perf_event *event, int flags)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
int i;
......@@ -1111,7 +1129,7 @@ static void x86_pmu_disable(struct perf_event *event)
if (cpuc->group_flag & PERF_EVENT_TXN)
return;
x86_pmu_stop(event);
x86_pmu_stop(event, PERF_EF_UPDATE);
for (i = 0; i < cpuc->n_events; i++) {
if (event == cpuc->event_list[i]) {
......@@ -1134,7 +1152,6 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
struct perf_sample_data data;
struct cpu_hw_events *cpuc;
struct perf_event *event;
struct hw_perf_event *hwc;
int idx, handled = 0;
u64 val;
......@@ -1155,7 +1172,6 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
}
event = cpuc->events[idx];
hwc = &event->hw;
val = x86_perf_event_update(event);
if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
......@@ -1171,7 +1187,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
continue;
if (perf_event_overflow(event, 1, &data, regs))
x86_pmu_stop(event);
x86_pmu_stop(event, 0);
}
if (handled)
......@@ -1180,25 +1196,6 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
return handled;
}
void smp_perf_pending_interrupt(struct pt_regs *regs)
{
irq_enter();
ack_APIC_irq();
inc_irq_stat(apic_pending_irqs);
perf_event_do_pending();
irq_exit();
}
void set_perf_event_pending(void)
{
#ifdef CONFIG_X86_LOCAL_APIC
if (!x86_pmu.apic || !x86_pmu_initialized())
return;
apic->send_IPI_self(LOCAL_PENDING_VECTOR);
#endif
}
void perf_events_lapic_init(void)
{
if (!x86_pmu.apic || !x86_pmu_initialized())
......@@ -1388,7 +1385,6 @@ void __init init_hw_perf_events(void)
x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
}
x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
perf_max_events = x86_pmu.num_counters;
if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
......@@ -1424,6 +1420,7 @@ void __init init_hw_perf_events(void)
pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed);
pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl);
perf_pmu_register(&pmu);
perf_cpu_notifier(x86_pmu_notifier);
}
......@@ -1437,10 +1434,11 @@ static inline void x86_pmu_read(struct perf_event *event)
* Set the flag to make pmu::enable() not perform the
* schedulability test, it will be performed at commit time
*/
static void x86_pmu_start_txn(const struct pmu *pmu)
static void x86_pmu_start_txn(struct pmu *pmu)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
perf_pmu_disable(pmu);
cpuc->group_flag |= PERF_EVENT_TXN;
cpuc->n_txn = 0;
}
......@@ -1450,7 +1448,7 @@ static void x86_pmu_start_txn(const struct pmu *pmu)
* Clear the flag and pmu::enable() will perform the
* schedulability test.
*/
static void x86_pmu_cancel_txn(const struct pmu *pmu)
static void x86_pmu_cancel_txn(struct pmu *pmu)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
......@@ -1460,6 +1458,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu)
*/
cpuc->n_added -= cpuc->n_txn;
cpuc->n_events -= cpuc->n_txn;
perf_pmu_enable(pmu);
}
/*
......@@ -1467,7 +1466,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu)
* Perform the group schedulability test as a whole
* Return 0 if success
*/
static int x86_pmu_commit_txn(const struct pmu *pmu)
static int x86_pmu_commit_txn(struct pmu *pmu)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
int assign[X86_PMC_IDX_MAX];
......@@ -1489,22 +1488,10 @@ static int x86_pmu_commit_txn(const struct pmu *pmu)
memcpy(cpuc->assign, assign, n*sizeof(int));
cpuc->group_flag &= ~PERF_EVENT_TXN;
perf_pmu_enable(pmu);
return 0;
}
static const struct pmu pmu = {
.enable = x86_pmu_enable,
.disable = x86_pmu_disable,
.start = x86_pmu_start,
.stop = x86_pmu_stop,
.read = x86_pmu_read,
.unthrottle = x86_pmu_unthrottle,
.start_txn = x86_pmu_start_txn,
.cancel_txn = x86_pmu_cancel_txn,
.commit_txn = x86_pmu_commit_txn,
};
/*
* validate that we can schedule this event
*/
......@@ -1579,12 +1566,22 @@ static int validate_group(struct perf_event *event)
return ret;
}
const struct pmu *hw_perf_event_init(struct perf_event *event)
int x86_pmu_event_init(struct perf_event *event)
{
const struct pmu *tmp;
struct pmu *tmp;
int err;
err = __hw_perf_event_init(event);
switch (event->attr.type) {
case PERF_TYPE_RAW:
case PERF_TYPE_HARDWARE:
case PERF_TYPE_HW_CACHE:
break;
default:
return -ENOENT;
}
err = __x86_pmu_event_init(event);
if (!err) {
/*
* we temporarily connect event to its pmu
......@@ -1604,26 +1601,31 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
if (err) {
if (event->destroy)
event->destroy(event);
return ERR_PTR(err);
}
return &pmu;
return err;
}
/*
* callchain support
*/
static struct pmu pmu = {
.pmu_enable = x86_pmu_enable,
.pmu_disable = x86_pmu_disable,
static inline
void callchain_store(struct perf_callchain_entry *entry, u64 ip)
{
if (entry->nr < PERF_MAX_STACK_DEPTH)
entry->ip[entry->nr++] = ip;
}
.event_init = x86_pmu_event_init,
static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);
.add = x86_pmu_add,
.del = x86_pmu_del,
.start = x86_pmu_start,
.stop = x86_pmu_stop,
.read = x86_pmu_read,
.start_txn = x86_pmu_start_txn,
.cancel_txn = x86_pmu_cancel_txn,
.commit_txn = x86_pmu_commit_txn,
};
/*
* callchain support
*/
static void
backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
......@@ -1645,7 +1647,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
{
struct perf_callchain_entry *entry = data;
callchain_store(entry, addr);
perf_callchain_store(entry, addr);
}
static const struct stacktrace_ops backtrace_ops = {
......@@ -1656,11 +1658,15 @@ static const struct stacktrace_ops backtrace_ops = {
.walk_stack = print_context_stack_bp,
};
static void
perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
void
perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
{
callchain_store(entry, PERF_CONTEXT_KERNEL);
callchain_store(entry, regs->ip);
if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
/* TODO: We don't support guest os callchain now */
return;
}
perf_callchain_store(entry, regs->ip);
dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry);
}
......@@ -1689,7 +1695,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
if (fp < compat_ptr(regs->sp))
break;
callchain_store(entry, frame.return_address);
perf_callchain_store(entry, frame.return_address);
fp = compat_ptr(frame.next_frame);
}
return 1;
......@@ -1702,19 +1708,20 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
}
#endif
static void
perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
void
perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
{
struct stack_frame frame;
const void __user *fp;
if (!user_mode(regs))
regs = task_pt_regs(current);
if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
/* TODO: We don't support guest os callchain now */
return;
}
fp = (void __user *)regs->bp;
callchain_store(entry, PERF_CONTEXT_USER);
callchain_store(entry, regs->ip);
perf_callchain_store(entry, regs->ip);
if (perf_callchain_user32(regs, entry))
return;
......@@ -1731,52 +1738,11 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
if ((unsigned long)fp < regs->sp)
break;
callchain_store(entry, frame.return_address);
perf_callchain_store(entry, frame.return_address);
fp = frame.next_frame;
}
}
static void
perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
{
int is_user;
if (!regs)
return;
is_user = user_mode(regs);
if (is_user && current->state != TASK_RUNNING)
return;
if (!is_user)
perf_callchain_kernel(regs, entry);
if (current->mm)
perf_callchain_user(regs, entry);
}
struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
{
struct perf_callchain_entry *entry;
if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
/* TODO: We don't support guest os callchain now */
return NULL;
}
if (in_nmi())
entry = &__get_cpu_var(pmc_nmi_entry);
else
entry = &__get_cpu_var(pmc_irq_entry);
entry->nr = 0;
perf_do_callchain(regs, entry);
return entry;
}
unsigned long perf_instruction_pointer(struct pt_regs *regs)
{
unsigned long ip;
......
......@@ -52,7 +52,7 @@ static __initconst const u64 amd_hw_cache_event_ids
[ C(DTLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
[ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */
[ C(RESULT_MISS) ] = 0x0746, /* L1_DTLB_AND_L2_DLTB_MISS.ALL */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0,
......@@ -66,7 +66,7 @@ static __initconst const u64 amd_hw_cache_event_ids
[ C(ITLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */
[ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */
[ C(RESULT_MISS) ] = 0x0385, /* L1_ITLB_AND_L2_ITLB_MISS.ALL */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
......
......@@ -713,18 +713,18 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
struct cpu_hw_events *cpuc;
int bit, loops;
u64 status;
int handled = 0;
int handled;
perf_sample_data_init(&data, 0);
cpuc = &__get_cpu_var(cpu_hw_events);
intel_pmu_disable_all();
intel_pmu_drain_bts_buffer();
handled = intel_pmu_drain_bts_buffer();
status = intel_pmu_get_status();
if (!status) {
intel_pmu_enable_all(0);
return 0;
return handled;
}
loops = 0;
......@@ -763,7 +763,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
data.period = event->hw.last_period;
if (perf_event_overflow(event, 1, &data, regs))
x86_pmu_stop(event);
x86_pmu_stop(event, 0);
}
/*
......
......@@ -214,7 +214,7 @@ static void intel_pmu_disable_bts(void)
update_debugctlmsr(debugctlmsr);
}
static void intel_pmu_drain_bts_buffer(void)
static int intel_pmu_drain_bts_buffer(void)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct debug_store *ds = cpuc->ds;
......@@ -231,16 +231,16 @@ static void intel_pmu_drain_bts_buffer(void)
struct pt_regs regs;
if (!event)
return;
return 0;
if (!ds)
return;
return 0;
at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
top = (struct bts_record *)(unsigned long)ds->bts_index;
if (top <= at)
return;
return 0;
ds->bts_index = ds->bts_buffer_base;
......@@ -256,7 +256,7 @@ static void intel_pmu_drain_bts_buffer(void)
perf_prepare_sample(&header, &data, event, &regs);
if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1))
return;
return 1;
for (; at < top; at++) {
data.ip = at->from;
......@@ -270,6 +270,7 @@ static void intel_pmu_drain_bts_buffer(void)
/* There's new data available. */
event->hw.interrupts++;
event->pending_kill = POLL_IN;
return 1;
}
/*
......@@ -491,7 +492,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
regs.flags &= ~PERF_EFLAGS_EXACT;
if (perf_event_overflow(event, 1, &data, &regs))
x86_pmu_stop(event);
x86_pmu_stop(event, 0);
}
static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
......
此差异已折叠。
......@@ -1023,9 +1023,9 @@ apicinterrupt ERROR_APIC_VECTOR \
apicinterrupt SPURIOUS_APIC_VECTOR \
spurious_interrupt smp_spurious_interrupt
#ifdef CONFIG_PERF_EVENTS
apicinterrupt LOCAL_PENDING_VECTOR \
perf_pending_interrupt smp_perf_pending_interrupt
#ifdef CONFIG_IRQ_WORK
apicinterrupt IRQ_WORK_VECTOR \
irq_work_interrupt smp_irq_work_interrupt
#endif
/*
......
......@@ -257,14 +257,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
return mod_code_status;
}
static unsigned char ftrace_nop[MCOUNT_INSN_SIZE];
static unsigned char *ftrace_nop_replace(void)
{
return ftrace_nop;
return ideal_nop5;
}
static int
......@@ -338,62 +333,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
int __init ftrace_dyn_arch_init(void *data)
{
extern const unsigned char ftrace_test_p6nop[];
extern const unsigned char ftrace_test_nop5[];
extern const unsigned char ftrace_test_jmp[];
int faulted = 0;
/*
* There is no good nop for all x86 archs.
* We will default to using the P6_NOP5, but first we
* will test to make sure that the nop will actually
* work on this CPU. If it faults, we will then
* go to a lesser efficient 5 byte nop. If that fails
* we then just use a jmp as our nop. This isn't the most
* efficient nop, but we can not use a multi part nop
* since we would then risk being preempted in the middle
* of that nop, and if we enabled tracing then, it might
* cause a system crash.
*
* TODO: check the cpuid to determine the best nop.
*/
asm volatile (
"ftrace_test_jmp:"
"jmp ftrace_test_p6nop\n"
"nop\n"
"nop\n"
"nop\n" /* 2 byte jmp + 3 bytes */
"ftrace_test_p6nop:"
P6_NOP5
"jmp 1f\n"
"ftrace_test_nop5:"
".byte 0x66,0x66,0x66,0x66,0x90\n"
"1:"
".section .fixup, \"ax\"\n"
"2: movl $1, %0\n"
" jmp ftrace_test_nop5\n"
"3: movl $2, %0\n"
" jmp 1b\n"
".previous\n"
_ASM_EXTABLE(ftrace_test_p6nop, 2b)
_ASM_EXTABLE(ftrace_test_nop5, 3b)
: "=r"(faulted) : "0" (faulted));
switch (faulted) {
case 0:
pr_info("converting mcount calls to 0f 1f 44 00 00\n");
memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE);
break;
case 1:
pr_info("converting mcount calls to 66 66 66 66 90\n");
memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE);
break;
case 2:
pr_info("converting mcount calls to jmp . + 5\n");
memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE);
break;
}
/* The return code is retured via data */
*(unsigned long *)data = 0;
......
......@@ -67,10 +67,10 @@ static int show_other_interrupts(struct seq_file *p, int prec)
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
seq_printf(p, " Performance monitoring interrupts\n");
seq_printf(p, "%*s: ", prec, "PND");
seq_printf(p, "%*s: ", prec, "IWI");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs);
seq_printf(p, " Performance pending work\n");
seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs);
seq_printf(p, " IRQ work interrupts\n");
#endif
if (x86_platform_ipi_callback) {
seq_printf(p, "%*s: ", prec, "PLT");
......@@ -185,7 +185,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
sum += irq_stats(cpu)->apic_timer_irqs;
sum += irq_stats(cpu)->irq_spurious_count;
sum += irq_stats(cpu)->apic_perf_irqs;
sum += irq_stats(cpu)->apic_pending_irqs;
sum += irq_stats(cpu)->apic_irq_work_irqs;
#endif
if (x86_platform_ipi_callback)
sum += irq_stats(cpu)->x86_platform_ipis;
......
/*
* x86 specific code for irq_work
*
* Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
*/
#include <linux/kernel.h>
#include <linux/irq_work.h>
#include <linux/hardirq.h>
#include <asm/apic.h>
void smp_irq_work_interrupt(struct pt_regs *regs)
{
irq_enter();
ack_APIC_irq();
inc_irq_stat(apic_irq_work_irqs);
irq_work_run();
irq_exit();
}
void arch_irq_work_raise(void)
{
#ifdef CONFIG_X86_LOCAL_APIC
if (!cpu_has_apic)
return;
apic->send_IPI_self(IRQ_WORK_VECTOR);
apic_wait_icr_idle();
#endif
}
......@@ -224,9 +224,9 @@ static void __init apic_intr_init(void)
alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
/* Performance monitoring interrupts: */
# ifdef CONFIG_PERF_EVENTS
alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt);
/* IRQ work interrupts: */
# ifdef CONFIG_IRQ_WORK
alloc_intr_gate(IRQ_WORK_VECTOR, irq_work_interrupt);
# endif
#endif
......
/*
* jump label x86 support
*
* Copyright (C) 2009 Jason Baron <jbaron@redhat.com>
*
*/
#include <linux/jump_label.h>
#include <linux/memory.h>
#include <linux/uaccess.h>
#include <linux/module.h>
#include <linux/list.h>
#include <linux/jhash.h>
#include <linux/cpu.h>
#include <asm/kprobes.h>
#include <asm/alternative.h>
#ifdef HAVE_JUMP_LABEL
union jump_code_union {
char code[JUMP_LABEL_NOP_SIZE];
struct {
char jump;
int offset;
} __attribute__((packed));
};
void arch_jump_label_transform(struct jump_entry *entry,
enum jump_label_type type)
{
union jump_code_union code;
if (type == JUMP_LABEL_ENABLE) {
code.jump = 0xe9;
code.offset = entry->target -
(entry->code + JUMP_LABEL_NOP_SIZE);
} else
memcpy(&code, ideal_nop5, JUMP_LABEL_NOP_SIZE);
get_online_cpus();
mutex_lock(&text_mutex);
text_poke_smp((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE);
mutex_unlock(&text_mutex);
put_online_cpus();
}
void arch_jump_label_text_poke_early(jump_label_t addr)
{
text_poke_early((void *)addr, ideal_nop5, JUMP_LABEL_NOP_SIZE);
}
#endif
此差异已折叠。
......@@ -239,6 +239,9 @@ int module_finalize(const Elf_Ehdr *hdr,
apply_paravirt(pseg, pseg + para->sh_size);
}
/* make jump label nops */
jump_label_apply_nops(me);
return 0;
}
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册