提交 475113d9 编写于 作者: J Jiri Olsa 提交者: Ingo Molnar

perf/x86/intel: Account interrupts for PEBS errors

It's possible to set up PEBS events to get only errors and not
any data, like on SNB-X (model 45) and IVB-EP (model 62)
via 2 perf commands running simultaneously:

    taskset -c 1 ./perf record -c 4 -e branches:pp -j any -C 10

This leads to a soft lock up, because the error path of the
intel_pmu_drain_pebs_nhm() does not account event->hw.interrupt
for error PEBS interrupts, so in case you're getting ONLY
errors you don't have a way to stop the event when it's over
the max_samples_per_tick limit:

  NMI watchdog: BUG: soft lockup - CPU#22 stuck for 22s! [perf_fuzzer:5816]
  ...
  RIP: 0010:[<ffffffff81159232>]  [<ffffffff81159232>] smp_call_function_single+0xe2/0x140
  ...
  Call Trace:
   ? trace_hardirqs_on_caller+0xf5/0x1b0
   ? perf_cgroup_attach+0x70/0x70
   perf_install_in_context+0x199/0x1b0
   ? ctx_resched+0x90/0x90
   SYSC_perf_event_open+0x641/0xf90
   SyS_perf_event_open+0x9/0x10
   do_syscall_64+0x6c/0x1f0
   entry_SYSCALL64_slow_path+0x25/0x25

Add perf_event_account_interrupt() which does the interrupt
and frequency checks and call it from intel_pmu_drain_pebs_nhm()'s
error path.

We keep the pending_kill and pending_wakeup logic only in the
__perf_event_overflow() path, because they make sense only if
there's any data to deliver.
Signed-off-by: NJiri Olsa <jolsa@kernel.org>
Signed-off-by: NPeter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vince@deater.net>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/1482931866-6018-2-git-send-email-jolsa@kernel.orgSigned-off-by: NIngo Molnar <mingo@kernel.org>
上级 321027c1
...@@ -1389,9 +1389,13 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) ...@@ -1389,9 +1389,13 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
continue; continue;
/* log dropped samples number */ /* log dropped samples number */
if (error[bit]) if (error[bit]) {
perf_log_lost_samples(event, error[bit]); perf_log_lost_samples(event, error[bit]);
if (perf_event_account_interrupt(event))
x86_pmu_stop(event, 0);
}
if (counts[bit]) { if (counts[bit]) {
__intel_pmu_pebs_event(event, iregs, base, __intel_pmu_pebs_event(event, iregs, base,
top, bit, counts[bit]); top, bit, counts[bit]);
......
...@@ -1259,6 +1259,7 @@ extern void perf_event_disable(struct perf_event *event); ...@@ -1259,6 +1259,7 @@ extern void perf_event_disable(struct perf_event *event);
extern void perf_event_disable_local(struct perf_event *event); extern void perf_event_disable_local(struct perf_event *event);
extern void perf_event_disable_inatomic(struct perf_event *event); extern void perf_event_disable_inatomic(struct perf_event *event);
extern void perf_event_task_tick(void); extern void perf_event_task_tick(void);
extern int perf_event_account_interrupt(struct perf_event *event);
#else /* !CONFIG_PERF_EVENTS: */ #else /* !CONFIG_PERF_EVENTS: */
static inline void * static inline void *
perf_aux_output_begin(struct perf_output_handle *handle, perf_aux_output_begin(struct perf_output_handle *handle,
......
...@@ -7060,25 +7060,12 @@ static void perf_log_itrace_start(struct perf_event *event) ...@@ -7060,25 +7060,12 @@ static void perf_log_itrace_start(struct perf_event *event)
perf_output_end(&handle); perf_output_end(&handle);
} }
/* static int
* Generic event overflow handling, sampling. __perf_event_account_interrupt(struct perf_event *event, int throttle)
*/
static int __perf_event_overflow(struct perf_event *event,
int throttle, struct perf_sample_data *data,
struct pt_regs *regs)
{ {
int events = atomic_read(&event->event_limit);
struct hw_perf_event *hwc = &event->hw; struct hw_perf_event *hwc = &event->hw;
u64 seq;
int ret = 0; int ret = 0;
u64 seq;
/*
* Non-sampling counters might still use the PMI to fold short
* hardware counters, ignore those.
*/
if (unlikely(!is_sampling_event(event)))
return 0;
seq = __this_cpu_read(perf_throttled_seq); seq = __this_cpu_read(perf_throttled_seq);
if (seq != hwc->interrupts_seq) { if (seq != hwc->interrupts_seq) {
...@@ -7106,6 +7093,34 @@ static int __perf_event_overflow(struct perf_event *event, ...@@ -7106,6 +7093,34 @@ static int __perf_event_overflow(struct perf_event *event,
perf_adjust_period(event, delta, hwc->last_period, true); perf_adjust_period(event, delta, hwc->last_period, true);
} }
return ret;
}
int perf_event_account_interrupt(struct perf_event *event)
{
return __perf_event_account_interrupt(event, 1);
}
/*
* Generic event overflow handling, sampling.
*/
static int __perf_event_overflow(struct perf_event *event,
int throttle, struct perf_sample_data *data,
struct pt_regs *regs)
{
int events = atomic_read(&event->event_limit);
int ret = 0;
/*
* Non-sampling counters might still use the PMI to fold short
* hardware counters, ignore those.
*/
if (unlikely(!is_sampling_event(event)))
return 0;
ret = __perf_event_account_interrupt(event, throttle);
/* /*
* XXX event_limit might not quite work as expected on inherited * XXX event_limit might not quite work as expected on inherited
* events * events
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册