提交 bf80bbd7 编写于 作者: A Aravind Gopalakrishnan 提交者: Borislav Petkov

x86/mce: Add an AMD severities-grading function

Add a severities function that caters to AMD processors. This allows us
to do some vendor-specific work within the function if necessary.

Also, introduce a vendor flag bitfield for vendor-specific settings. The
severities code uses this to define error scope based on the prescence
of the flags field.

This is based off of work by Boris Petkov.

Testing details:
Fam10h, Model 9h (Greyhound)
Fam15h: Models 0h-0fh (Orochi), 30h-3fh (Kaveri) and 60h-6fh (Carrizo),
Fam16h Model 00h-0fh (Kabini)

Boris:
Intel SNB
AMD K8 (JH-E0)
Signed-off-by: NAravind Gopalakrishnan <aravind.gopalakrishnan@amd.com>
Acked-by: NTony Luck <tony.luck@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Chen Yucong <slaoub@gmail.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: linux-edac@vger.kernel.org
Link: http://lkml.kernel.org/r/1427125373-2918-2-git-send-email-Aravind.Gopalakrishnan@amd.com
[ Fixup build, clean up comments. ]
Signed-off-by: NBorislav Petkov <bp@suse.de>
上级 c9ce8712
...@@ -116,6 +116,12 @@ struct mca_config { ...@@ -116,6 +116,12 @@ struct mca_config {
u32 rip_msr; u32 rip_msr;
}; };
struct mce_vendor_flags {
__u64 overflow_recov : 1, /* cpuid_ebx(80000007) */
__reserved_0 : 63;
};
extern struct mce_vendor_flags mce_flags;
extern struct mca_config mca_cfg; extern struct mca_config mca_cfg;
extern void mce_register_decode_chain(struct notifier_block *nb); extern void mce_register_decode_chain(struct notifier_block *nb);
extern void mce_unregister_decode_chain(struct notifier_block *nb); extern void mce_unregister_decode_chain(struct notifier_block *nb);
......
...@@ -186,12 +186,68 @@ static int error_context(struct mce *m) ...@@ -186,12 +186,68 @@ static int error_context(struct mce *m)
return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL; return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL;
} }
/*
* See AMD Error Scope Hierarchy table in a newer BKDG. For example
* 49125_15h_Models_30h-3Fh_BKDG.pdf, section "RAS Features"
*/
static int mce_severity_amd(struct mce *m, enum context ctx)
{
/* Processor Context Corrupt, no need to fumble too much, die! */
if (m->status & MCI_STATUS_PCC)
return MCE_PANIC_SEVERITY;
if (m->status & MCI_STATUS_UC) {
/*
* On older systems where overflow_recov flag is not present, we
* should simply panic if an error overflow occurs. If
* overflow_recov flag is present and set, then software can try
* to at least kill process to prolong system operation.
*/
if (mce_flags.overflow_recov) {
/* software can try to contain */
if (!(m->mcgstatus & MCG_STATUS_RIPV))
if (ctx == IN_KERNEL)
return MCE_PANIC_SEVERITY;
/* kill current process */
return MCE_AR_SEVERITY;
} else {
/* at least one error was not logged */
if (m->status & MCI_STATUS_OVER)
return MCE_PANIC_SEVERITY;
}
/*
* For any other case, return MCE_UC_SEVERITY so that we log the
* error and exit #MC handler.
*/
return MCE_UC_SEVERITY;
}
/*
* deferred error: poll handler catches these and adds to mce_ring so
* memory-failure can take recovery actions.
*/
if (m->status & MCI_STATUS_DEFERRED)
return MCE_DEFERRED_SEVERITY;
/*
* corrected error: poll handler catches these and passes responsibility
* of decoding the error to EDAC
*/
return MCE_KEEP_SEVERITY;
}
int mce_severity(struct mce *m, int tolerant, char **msg, bool is_excp) int mce_severity(struct mce *m, int tolerant, char **msg, bool is_excp)
{ {
enum exception excp = (is_excp ? EXCP_CONTEXT : NO_EXCP); enum exception excp = (is_excp ? EXCP_CONTEXT : NO_EXCP);
enum context ctx = error_context(m); enum context ctx = error_context(m);
struct severity *s; struct severity *s;
if (m->cpuvendor == X86_VENDOR_AMD)
return mce_severity_amd(m, ctx);
for (s = severities;; s++) { for (s = severities;; s++) {
if ((m->status & s->mask) != s->result) if ((m->status & s->mask) != s->result)
continue; continue;
......
...@@ -64,6 +64,7 @@ static DEFINE_MUTEX(mce_chrdev_read_mutex); ...@@ -64,6 +64,7 @@ static DEFINE_MUTEX(mce_chrdev_read_mutex);
DEFINE_PER_CPU(unsigned, mce_exception_count); DEFINE_PER_CPU(unsigned, mce_exception_count);
struct mce_bank *mce_banks __read_mostly; struct mce_bank *mce_banks __read_mostly;
struct mce_vendor_flags mce_flags __read_mostly;
struct mca_config mca_cfg __read_mostly = { struct mca_config mca_cfg __read_mostly = {
.bootlog = -1, .bootlog = -1,
...@@ -1534,6 +1535,13 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) ...@@ -1534,6 +1535,13 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
if (c->x86 == 6 && cfg->banks > 0) if (c->x86 == 6 && cfg->banks > 0)
mce_banks[0].ctl = 0; mce_banks[0].ctl = 0;
/*
* overflow_recov is supported for F15h Models 00h-0fh
* even though we don't have a CPUID bit for it.
*/
if (c->x86 == 0x15 && c->x86_model <= 0xf)
mce_flags.overflow_recov = 1;
/* /*
* Turn off MC4_MISC thresholding banks on those models since * Turn off MC4_MISC thresholding banks on those models since
* they're not supported there. * they're not supported there.
...@@ -1633,6 +1641,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) ...@@ -1633,6 +1641,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
break; break;
case X86_VENDOR_AMD: case X86_VENDOR_AMD:
mce_amd_feature_init(c); mce_amd_feature_init(c);
mce_flags.overflow_recov = cpuid_ebx(0x80000007) & 0x1;
break; break;
default: default:
break; break;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册