diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index f0f8e98f6efb289f0c415be7885f7473e65b8ca3..5fdcb6d5b96b6e8f056e5a5a9ba0eda92564fdab 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -632,12 +632,18 @@ static bool skx_error_in_1st_level_mem(const struct mce *m) if (!skx_mem_cfg_2lm) return false; - errcode = GET_BITFIELD(m->status, 0, 15); + errcode = GET_BITFIELD(m->status, 0, 15) & MCACOD_MEM_ERR_MASK; - if ((errcode & 0xef80) != 0x280) - return false; + return errcode == MCACOD_EXT_MEM_ERR; +} - return true; +static bool skx_error_in_mem(const struct mce *m) +{ + u32 errcode; + + errcode = GET_BITFIELD(m->status, 0, 15) & MCACOD_MEM_ERR_MASK; + + return (errcode == MCACOD_MEM_CTL_ERR || errcode == MCACOD_EXT_MEM_ERR); } int skx_mce_check_error(struct notifier_block *nb, unsigned long val, @@ -651,8 +657,8 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val, if (mce->kflags & MCE_HANDLED_CEC) return NOTIFY_DONE; - /* ignore unless this is memory related with an address */ - if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV)) + /* Ignore unless this is memory related with an address */ + if (!skx_error_in_mem(mce) || !(mce->status & MCI_STATUS_ADDRV)) return NOTIFY_DONE; memset(&res, 0, sizeof(res)); diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index 0cbadd3d2cd39044f438dfa6bc6431ac72826229..312032657264912352a5e804709f3da49978d73e 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -56,6 +56,30 @@ #define MCI_MISC_ECC_MODE(m) (((m) >> 59) & 15) #define MCI_MISC_ECC_DDRT 8 /* read from DDRT */ +/* + * According to Intel Architecture spec vol 3B, + * Table 15-10 "IA32_MCi_Status [15:0] Compound Error Code Encoding" + * memory errors should fit one of these masks: + * 000f 0000 1mmm cccc (binary) + * 000f 0010 1mmm cccc (binary) [RAM used as cache] + * where: + * f = Correction Report Filtering Bit. If 1, subsequent errors + * won't be shown + * mmm = error type + * cccc = channel + */ +#define MCACOD_MEM_ERR_MASK 0xef80 +/* + * Errors from either the memory of the 1-level memory system or the + * 2nd level memory (the slow "far" memory) of the 2-level memory system. + */ +#define MCACOD_MEM_CTL_ERR 0x80 +/* + * Errors from the 1st level memory (the fast "near" memory as cache) + * of the 2-level memory system. + */ +#define MCACOD_EXT_MEM_ERR 0x280 + /* * Each cpu socket contains some pci devices that provide global * information, and also some that are local to each of the two