提交 55789bce 编写于 作者: T Tony Luck 提交者: Caspar Zhang

x86/mce: Fix all mce notifiers to update the mce->kflags bitmask

fix #29415191

commit 23ba710a0864108910c7531dc4c73ef65eca5568 upstream

If the handler took any action to log or deal with the error, set a bit
in mce->kflags so that the default handler on the end of the machine
check chain can see what has been done.

Get rid of NOTIFY_STOP returns. Make the EDAC and dev-mcelog handlers
skip over errors already processed by CEC.
Signed-off-by: NTony Luck <tony.luck@intel.com>
Signed-off-by: NBorislav Petkov <bp@suse.de>
Tested-by: NTony Luck <tony.luck@intel.com>
Link: https://lkml.kernel.org/r/20200214222720.13168-5-tony.luck@intel.comSigned-off-by: NYouquan Song <youquan.song@intel.com>
Signed-off-by: NWetp Zhang <wetp.zy@linux.alibaba.com>
Reviewed-by: NArtie Ding <artie.ding@linux.alibaba.com>
上级 8e98e1ff
...@@ -47,6 +47,9 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val, ...@@ -47,6 +47,9 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val,
struct mce *mce = (struct mce *)data; struct mce *mce = (struct mce *)data;
unsigned int entry; unsigned int entry;
if (mce->kflags & MCE_HANDLED_CEC)
return NOTIFY_DONE;
mutex_lock(&mce_chrdev_read_mutex); mutex_lock(&mce_chrdev_read_mutex);
entry = mcelog.next; entry = mcelog.next;
...@@ -64,6 +67,7 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val, ...@@ -64,6 +67,7 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val,
memcpy(mcelog.entry + entry, mce, sizeof(struct mce)); memcpy(mcelog.entry + entry, mce, sizeof(struct mce));
mcelog.entry[entry].finished = 1; mcelog.entry[entry].finished = 1;
mcelog.entry[entry].kflags = 0;
/* wake processes polling /dev/mcelog */ /* wake processes polling /dev/mcelog */
wake_up_interruptible(&mce_chrdev_wait); wake_up_interruptible(&mce_chrdev_wait);
...@@ -71,6 +75,7 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val, ...@@ -71,6 +75,7 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val,
unlock: unlock:
mutex_unlock(&mce_chrdev_read_mutex); mutex_unlock(&mce_chrdev_read_mutex);
mce->kflags |= MCE_HANDLED_MCELOG;
return NOTIFY_OK; return NOTIFY_OK;
} }
......
...@@ -568,8 +568,10 @@ static bool cec_add_mce(struct mce *m) ...@@ -568,8 +568,10 @@ static bool cec_add_mce(struct mce *m)
if (mce_is_memory_error(m) && if (mce_is_memory_error(m) &&
mce_is_correctable(m) && mce_is_correctable(m) &&
mce_usable_address(m)) mce_usable_address(m))
if (!cec_add_elem(m->addr >> PAGE_SHIFT)) if (!cec_add_elem(m->addr >> PAGE_SHIFT)) {
m->kflags |= MCE_HANDLED_CEC;
return true; return true;
}
return false; return false;
} }
...@@ -611,8 +613,10 @@ static int srao_decode_notifier(struct notifier_block *nb, unsigned long val, ...@@ -611,8 +613,10 @@ static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,
if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) { if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) {
pfn = mce->addr >> PAGE_SHIFT; pfn = mce->addr >> PAGE_SHIFT;
if (!memory_failure(pfn, 0)) if (!memory_failure(pfn, 0)) {
set_mce_nospec(pfn, whole_page(mce)); set_mce_nospec(pfn, whole_page(mce));
mce->kflags |= MCE_HANDLED_UC;
}
} }
return NOTIFY_OK; return NOTIFY_OK;
......
...@@ -147,7 +147,7 @@ static int extlog_print(struct notifier_block *nb, unsigned long val, ...@@ -147,7 +147,7 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
static u32 err_seq; static u32 err_seq;
estatus = extlog_elog_entry_check(cpu, bank); estatus = extlog_elog_entry_check(cpu, bank);
if (estatus == NULL) if (estatus == NULL || (mce->kflags & MCE_HANDLED_CEC))
return NOTIFY_DONE; return NOTIFY_DONE;
memcpy(elog_buf, (void *)estatus, ELOG_ENTRY_LEN); memcpy(elog_buf, (void *)estatus, ELOG_ENTRY_LEN);
...@@ -177,7 +177,8 @@ static int extlog_print(struct notifier_block *nb, unsigned long val, ...@@ -177,7 +177,8 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
} }
out: out:
return NOTIFY_STOP; mce->kflags |= MCE_HANDLED_EXTLOG;
return NOTIFY_OK;
} }
static bool __init extlog_get_l1addr(void) static bool __init extlog_get_l1addr(void)
......
...@@ -84,6 +84,7 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val, ...@@ -84,6 +84,7 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
*/ */
acpi_nfit_ars_rescan(acpi_desc, 0); acpi_nfit_ars_rescan(acpi_desc, 0);
} }
mce->kflags |= MCE_HANDLED_NFIT;
break; break;
} }
......
...@@ -1819,7 +1819,7 @@ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val, ...@@ -1819,7 +1819,7 @@ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
struct i7core_pvt *pvt; struct i7core_pvt *pvt;
i7_dev = get_i7core_dev(mce->socketid); i7_dev = get_i7core_dev(mce->socketid);
if (!i7_dev) if (!i7_dev || (mce->kflags & MCE_HANDLED_CEC))
return NOTIFY_DONE; return NOTIFY_DONE;
mci = i7_dev->mci; mci = i7_dev->mci;
...@@ -1839,7 +1839,8 @@ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val, ...@@ -1839,7 +1839,8 @@ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
i7core_check_error(mci, mce); i7core_check_error(mci, mce);
/* Advise mcelog that the errors were handled */ /* Advise mcelog that the errors were handled */
return NOTIFY_STOP; mce->kflags |= MCE_HANDLED_EDAC;
return NOTIFY_OK;
} }
static struct notifier_block i7_mce_dec = { static struct notifier_block i7_mce_dec = {
......
...@@ -1042,6 +1042,9 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) ...@@ -1042,6 +1042,9 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
if (amd_filter_mce(m)) if (amd_filter_mce(m))
return NOTIFY_STOP; return NOTIFY_STOP;
if (m->kflags & MCE_HANDLED_CEC)
return NOTIFY_DONE;
pr_emerg(HW_ERR "%s\n", decode_error_status(m)); pr_emerg(HW_ERR "%s\n", decode_error_status(m));
pr_emerg(HW_ERR "CPU:%d (%x:%x:%x) MC%d_STATUS[%s|%s|%s|%s|%s", pr_emerg(HW_ERR "CPU:%d (%x:%x:%x) MC%d_STATUS[%s|%s|%s|%s|%s",
...@@ -1141,7 +1144,8 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) ...@@ -1141,7 +1144,8 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
err_code: err_code:
amd_decode_err_code(m->status & 0xffff); amd_decode_err_code(m->status & 0xffff);
return NOTIFY_STOP; m->kflags |= MCE_HANDLED_EDAC;
return NOTIFY_OK;
} }
static struct notifier_block amd_mce_dec_nb = { static struct notifier_block amd_mce_dec_nb = {
......
...@@ -1408,7 +1408,7 @@ static int pnd2_mce_check_error(struct notifier_block *nb, unsigned long val, vo ...@@ -1408,7 +1408,7 @@ static int pnd2_mce_check_error(struct notifier_block *nb, unsigned long val, vo
return NOTIFY_DONE; return NOTIFY_DONE;
mci = pnd2_mci; mci = pnd2_mci;
if (!mci) if (!mci || (mce->kflags & MCE_HANDLED_CEC))
return NOTIFY_DONE; return NOTIFY_DONE;
/* /*
...@@ -1437,7 +1437,8 @@ static int pnd2_mce_check_error(struct notifier_block *nb, unsigned long val, vo ...@@ -1437,7 +1437,8 @@ static int pnd2_mce_check_error(struct notifier_block *nb, unsigned long val, vo
pnd2_mce_output_error(mci, mce, &daddr); pnd2_mce_output_error(mci, mce, &daddr);
/* Advice mcelog that the error were handled */ /* Advice mcelog that the error were handled */
return NOTIFY_STOP; mce->kflags |= MCE_HANDLED_EDAC;
return NOTIFY_OK;
} }
static struct notifier_block pnd2_mce_dec = { static struct notifier_block pnd2_mce_dec = {
......
...@@ -3042,6 +3042,8 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val, ...@@ -3042,6 +3042,8 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
if (edac_get_report_status() == EDAC_REPORTING_DISABLED) if (edac_get_report_status() == EDAC_REPORTING_DISABLED)
return NOTIFY_DONE; return NOTIFY_DONE;
if (mce->kflags & MCE_HANDLED_CEC)
return NOTIFY_DONE;
/* /*
* Just let mcelog handle it if the error is * Just let mcelog handle it if the error is
...@@ -3089,7 +3091,8 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val, ...@@ -3089,7 +3091,8 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
sbridge_mce_output_error(mci, mce); sbridge_mce_output_error(mci, mce);
/* Advice mcelog that the error were handled */ /* Advice mcelog that the error were handled */
return NOTIFY_STOP; mce->kflags |= MCE_HANDLED_EDAC;
return NOTIFY_OK;
} }
static struct notifier_block sbridge_mce_dec = { static struct notifier_block sbridge_mce_dec = {
......
...@@ -579,6 +579,9 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val, ...@@ -579,6 +579,9 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
if (edac_get_report_status() == EDAC_REPORTING_DISABLED) if (edac_get_report_status() == EDAC_REPORTING_DISABLED)
return NOTIFY_DONE; return NOTIFY_DONE;
if (mce->kflags & MCE_HANDLED_CEC)
return NOTIFY_DONE;
/* ignore unless this is memory related with an address */ /* ignore unless this is memory related with an address */
if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV)) if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV))
return NOTIFY_DONE; return NOTIFY_DONE;
...@@ -621,6 +624,7 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val, ...@@ -621,6 +624,7 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
skx_mce_output_error(mci, mce, &res); skx_mce_output_error(mci, mce, &res);
mce->kflags |= MCE_HANDLED_EDAC;
return NOTIFY_DONE; return NOTIFY_DONE;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册