提交 52d168e2 编写于 作者: A Andi Kleen 提交者: H. Peter Anvin

x86, mce: switch machine check polling to per CPU timer

Impact: Higher priority bug fix

The machine check poller runs a single timer and then broadcasted an
IPI to all CPUs to check them. This leads to unnecessary
synchronization between CPUs. The original CPU running the timer has
to wait potentially a long time for all other CPUs answering. This is
also real time unfriendly and in general inefficient.

This was especially a problem on systems with a lot of events where
the poller run with a higher frequency after processing some events.
There could be more and more CPU time wasted with this, to
the point of significantly slowing down machines.

The machine check polling is actually fully independent per CPU, so
there's no reason to not just do this all with per CPU timers.  This
patch implements that.

Also switch the poller also to use standard timers instead of work
queues. It was using work queues to be able to execute a user program
on a event, but mce_notify_user() handles this case now with a
separate callback. So instead always run the poll code in in a
standard per CPU timer, which means that in the common case of not
having to execute a trigger there will be less overhead.

This allows to clean up the initialization significantly, because
standard timers are already up when machine checks get init'ed.  No
multiple initialization functions.

Thanks to Thomas Gleixner for some help.

Cc: thockin@google.com
v2: Use del_timer_sync() on cpu shutdown and don't try to handle
migrated timers.
v3: Add WARN_ON for timer running on unexpected CPU
Signed-off-by: NAndi Kleen <ak@linux.intel.com>
Acked-by: NThomas Gleixner <tglx@linutronix.de>
Signed-off-by: NH. Peter Anvin <hpa@zytor.com>
上级 9bd98405
...@@ -353,18 +353,17 @@ void mce_log_therm_throt_event(unsigned int cpu, __u64 status) ...@@ -353,18 +353,17 @@ void mce_log_therm_throt_event(unsigned int cpu, __u64 status)
static int check_interval = 5 * 60; /* 5 minutes */ static int check_interval = 5 * 60; /* 5 minutes */
static int next_interval; /* in jiffies */ static int next_interval; /* in jiffies */
static void mcheck_timer(struct work_struct *work); static void mcheck_timer(unsigned long);
static DECLARE_DELAYED_WORK(mcheck_work, mcheck_timer); static DEFINE_PER_CPU(struct timer_list, mce_timer);
static void mcheck_check_cpu(void *info) static void mcheck_timer(unsigned long data)
{ {
struct timer_list *t = &per_cpu(mce_timer, data);
WARN_ON(smp_processor_id() != data);
if (mce_available(&current_cpu_data)) if (mce_available(&current_cpu_data))
do_machine_check(NULL, 0); do_machine_check(NULL, 0);
}
static void mcheck_timer(struct work_struct *work)
{
on_each_cpu(mcheck_check_cpu, NULL, 1);
/* /*
* Alert userspace if needed. If we logged an MCE, reduce the * Alert userspace if needed. If we logged an MCE, reduce the
...@@ -377,7 +376,8 @@ static void mcheck_timer(struct work_struct *work) ...@@ -377,7 +376,8 @@ static void mcheck_timer(struct work_struct *work)
(int)round_jiffies_relative(check_interval*HZ)); (int)round_jiffies_relative(check_interval*HZ));
} }
schedule_delayed_work(&mcheck_work, next_interval); t->expires = jiffies + next_interval;
add_timer(t);
} }
static void mce_do_trigger(struct work_struct *work) static void mce_do_trigger(struct work_struct *work)
...@@ -436,16 +436,11 @@ static struct notifier_block mce_idle_notifier = { ...@@ -436,16 +436,11 @@ static struct notifier_block mce_idle_notifier = {
static __init int periodic_mcheck_init(void) static __init int periodic_mcheck_init(void)
{ {
next_interval = check_interval * HZ; idle_notifier_register(&mce_idle_notifier);
if (next_interval) return 0;
schedule_delayed_work(&mcheck_work,
round_jiffies_relative(next_interval));
idle_notifier_register(&mce_idle_notifier);
return 0;
} }
__initcall(periodic_mcheck_init); __initcall(periodic_mcheck_init);
/* /*
* Initialize Machine Checks for a CPU. * Initialize Machine Checks for a CPU.
*/ */
...@@ -515,6 +510,20 @@ static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c) ...@@ -515,6 +510,20 @@ static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c)
} }
} }
static void mce_init_timer(void)
{
struct timer_list *t = &__get_cpu_var(mce_timer);
/* data race harmless because everyone sets to the same value */
if (!next_interval)
next_interval = check_interval * HZ;
if (!next_interval)
return;
setup_timer(t, mcheck_timer, smp_processor_id());
t->expires = round_jiffies_relative(jiffies + next_interval);
add_timer(t);
}
/* /*
* Called for each booted CPU to set up machine checks. * Called for each booted CPU to set up machine checks.
* Must be called with preempt off. * Must be called with preempt off.
...@@ -529,6 +538,7 @@ void __cpuinit mcheck_init(struct cpuinfo_x86 *c) ...@@ -529,6 +538,7 @@ void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
mce_init(NULL); mce_init(NULL);
mce_cpu_features(c); mce_cpu_features(c);
mce_init_timer();
} }
/* /*
...@@ -758,17 +768,19 @@ static int mce_resume(struct sys_device *dev) ...@@ -758,17 +768,19 @@ static int mce_resume(struct sys_device *dev)
return 0; return 0;
} }
static void mce_cpu_restart(void *data)
{
del_timer_sync(&__get_cpu_var(mce_timer));
if (mce_available(&current_cpu_data))
mce_init(NULL);
mce_init_timer();
}
/* Reinit MCEs after user configuration changes */ /* Reinit MCEs after user configuration changes */
static void mce_restart(void) static void mce_restart(void)
{ {
if (next_interval)
cancel_delayed_work(&mcheck_work);
/* Timer race is harmless here */
on_each_cpu(mce_init, NULL, 1);
next_interval = check_interval * HZ; next_interval = check_interval * HZ;
if (next_interval) on_each_cpu(mce_cpu_restart, NULL, 1);
schedule_delayed_work(&mcheck_work,
round_jiffies_relative(next_interval));
} }
static struct sysdev_class mce_sysclass = { static struct sysdev_class mce_sysclass = {
...@@ -899,6 +911,7 @@ static int __cpuinit mce_cpu_callback(struct notifier_block *nfb, ...@@ -899,6 +911,7 @@ static int __cpuinit mce_cpu_callback(struct notifier_block *nfb,
unsigned long action, void *hcpu) unsigned long action, void *hcpu)
{ {
unsigned int cpu = (unsigned long)hcpu; unsigned int cpu = (unsigned long)hcpu;
struct timer_list *t = &per_cpu(mce_timer, cpu);
switch (action) { switch (action) {
case CPU_ONLINE: case CPU_ONLINE:
...@@ -913,6 +926,15 @@ static int __cpuinit mce_cpu_callback(struct notifier_block *nfb, ...@@ -913,6 +926,15 @@ static int __cpuinit mce_cpu_callback(struct notifier_block *nfb,
threshold_cpu_callback(action, cpu); threshold_cpu_callback(action, cpu);
mce_remove_device(cpu); mce_remove_device(cpu);
break; break;
case CPU_DOWN_PREPARE:
case CPU_DOWN_PREPARE_FROZEN:
del_timer_sync(t);
break;
case CPU_DOWN_FAILED:
case CPU_DOWN_FAILED_FROZEN:
t->expires = round_jiffies_relative(jiffies + next_interval);
add_timer_on(t, cpu);
break;
} }
return NOTIFY_OK; return NOTIFY_OK;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册