提交 553f265f 编写于 作者: A Andi Kleen 提交者: Linus Torvalds

[PATCH] x86_64: Don't run NMI watchdog during machine checks

Machine checks can stall the machine for a long time and
it's not good to trigger the nmi watchdog during that.
Signed-off-by: NAndi Kleen <ak@suse.de>
Signed-off-by: NLinus Torvalds <torvalds@osdl.org>
上级 be56db61
...@@ -29,6 +29,8 @@ ...@@ -29,6 +29,8 @@
#define MISC_MCELOG_MINOR 227 #define MISC_MCELOG_MINOR 227
#define NR_BANKS 6 #define NR_BANKS 6
atomic_t mce_entry;
static int mce_dont_init; static int mce_dont_init;
/* 0: always panic, 1: panic if deadlock possible, 2: try to avoid panic, /* 0: always panic, 1: panic if deadlock possible, 2: try to avoid panic,
...@@ -172,10 +174,12 @@ void do_machine_check(struct pt_regs * regs, long error_code) ...@@ -172,10 +174,12 @@ void do_machine_check(struct pt_regs * regs, long error_code)
int i; int i;
int panicm_found = 0; int panicm_found = 0;
atomic_inc(&mce_entry);
if (regs) if (regs)
notify_die(DIE_NMI, "machine check", regs, error_code, 18, SIGKILL); notify_die(DIE_NMI, "machine check", regs, error_code, 18, SIGKILL);
if (!banks) if (!banks)
return; goto out2;
memset(&m, 0, sizeof(struct mce)); memset(&m, 0, sizeof(struct mce));
m.cpu = safe_smp_processor_id(); m.cpu = safe_smp_processor_id();
...@@ -266,6 +270,8 @@ void do_machine_check(struct pt_regs * regs, long error_code) ...@@ -266,6 +270,8 @@ void do_machine_check(struct pt_regs * regs, long error_code)
out: out:
/* Last thing done in the machine check exception to clear state. */ /* Last thing done in the machine check exception to clear state. */
wrmsrl(MSR_IA32_MCG_STATUS, 0); wrmsrl(MSR_IA32_MCG_STATUS, 0);
out2:
atomic_dec(&mce_entry);
} }
/* /*
......
...@@ -34,6 +34,7 @@ ...@@ -34,6 +34,7 @@
#include <asm/proto.h> #include <asm/proto.h>
#include <asm/kdebug.h> #include <asm/kdebug.h>
#include <asm/local.h> #include <asm/local.h>
#include <asm/mce.h>
/* /*
* lapic_nmi_owner tracks the ownership of the lapic NMI hardware: * lapic_nmi_owner tracks the ownership of the lapic NMI hardware:
...@@ -480,6 +481,12 @@ void __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) ...@@ -480,6 +481,12 @@ void __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
__get_cpu_var(nmi_touch) = 0; __get_cpu_var(nmi_touch) = 0;
touched = 1; touched = 1;
} }
#ifdef CONFIG_X86_MCE
/* Could check oops_in_progress here too, but it's safer
not too */
if (atomic_read(&mce_entry) > 0)
touched = 1;
#endif
if (!touched && __get_cpu_var(last_irq_sum) == sum) { if (!touched && __get_cpu_var(last_irq_sum) == sum) {
/* /*
* Ayiee, looks like this CPU is stuck ... * Ayiee, looks like this CPU is stuck ...
......
...@@ -70,6 +70,9 @@ struct mce_log { ...@@ -70,6 +70,9 @@ struct mce_log {
#define MCE_THRESHOLD_BASE MCE_EXTENDED_BANK + 1 /* MCE_AMD */ #define MCE_THRESHOLD_BASE MCE_EXTENDED_BANK + 1 /* MCE_AMD */
#define MCE_THRESHOLD_DRAM_ECC MCE_THRESHOLD_BASE + 4 #define MCE_THRESHOLD_DRAM_ECC MCE_THRESHOLD_BASE + 4
#ifdef __KERNEL__
#include <asm/atomic.h>
void mce_log(struct mce *m); void mce_log(struct mce *m);
#ifdef CONFIG_X86_MCE_INTEL #ifdef CONFIG_X86_MCE_INTEL
void mce_intel_feature_init(struct cpuinfo_x86 *c); void mce_intel_feature_init(struct cpuinfo_x86 *c);
...@@ -87,4 +90,8 @@ static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) ...@@ -87,4 +90,8 @@ static inline void mce_amd_feature_init(struct cpuinfo_x86 *c)
} }
#endif #endif
extern atomic_t mce_entry;
#endif
#endif #endif
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册