提交 c6708d50 编写于 作者: Y Yazen Ghannam 提交者: Thomas Gleixner

x86/MCE: Report only DRAM ECC as memory errors on AMD systems

The MCA_STATUS[ErrorCodeExt] field is very bank type specific.
We currently check if the ErrorCodeExt value is 0x0 or 0x8 in
mce_is_memory_error(), but we don't check the bank number. This means
that we could flag non-memory errors as memory errors.

We know that we want to flag DRAM ECC errors as memory errors, so let's do
those cases first. We can add more cases later when needed.

Define a wrapper function in mce_amd.c so we can use SMCA enums.

[ bp: Remove brackets around return statements. ]
Signed-off-by: NYazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: NBorislav Petkov <bp@suse.de>
Signed-off-by: NThomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20171207203955.118171-2-Yazen.Ghannam@amd.com
上级 11cf8877
...@@ -376,6 +376,7 @@ struct smca_bank { ...@@ -376,6 +376,7 @@ struct smca_bank {
extern struct smca_bank smca_banks[MAX_NR_BANKS]; extern struct smca_bank smca_banks[MAX_NR_BANKS];
extern const char *smca_get_long_name(enum smca_bank_types t); extern const char *smca_get_long_name(enum smca_bank_types t);
extern bool amd_mce_is_memory_error(struct mce *m);
extern int mce_threshold_create_device(unsigned int cpu); extern int mce_threshold_create_device(unsigned int cpu);
extern int mce_threshold_remove_device(unsigned int cpu); extern int mce_threshold_remove_device(unsigned int cpu);
...@@ -384,6 +385,7 @@ extern int mce_threshold_remove_device(unsigned int cpu); ...@@ -384,6 +385,7 @@ extern int mce_threshold_remove_device(unsigned int cpu);
static inline int mce_threshold_create_device(unsigned int cpu) { return 0; }; static inline int mce_threshold_create_device(unsigned int cpu) { return 0; };
static inline int mce_threshold_remove_device(unsigned int cpu) { return 0; }; static inline int mce_threshold_remove_device(unsigned int cpu) { return 0; };
static inline bool amd_mce_is_memory_error(struct mce *m) { return false; };
#endif #endif
......
...@@ -503,10 +503,8 @@ static int mce_usable_address(struct mce *m) ...@@ -503,10 +503,8 @@ static int mce_usable_address(struct mce *m)
bool mce_is_memory_error(struct mce *m) bool mce_is_memory_error(struct mce *m)
{ {
if (m->cpuvendor == X86_VENDOR_AMD) { if (m->cpuvendor == X86_VENDOR_AMD) {
/* ErrCodeExt[20:16] */ return amd_mce_is_memory_error(m);
u8 xec = (m->status >> 16) & 0x1f;
return (xec == 0x0 || xec == 0x8);
} else if (m->cpuvendor == X86_VENDOR_INTEL) { } else if (m->cpuvendor == X86_VENDOR_INTEL) {
/* /*
* Intel SDM Volume 3B - 15.9.2 Compound Error Codes * Intel SDM Volume 3B - 15.9.2 Compound Error Codes
......
...@@ -754,6 +754,17 @@ int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) ...@@ -754,6 +754,17 @@ int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr)
} }
EXPORT_SYMBOL_GPL(umc_normaddr_to_sysaddr); EXPORT_SYMBOL_GPL(umc_normaddr_to_sysaddr);
bool amd_mce_is_memory_error(struct mce *m)
{
/* ErrCodeExt[20:16] */
u8 xec = (m->status >> 16) & 0x1f;
if (mce_flags.smca)
return smca_get_bank_type(m) == SMCA_UMC && xec == 0x0;
return m->bank == 4 && xec == 0x8;
}
static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc) static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
{ {
struct mce m; struct mce m;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册