From 55789bced0327aefe261cdf776e013317ffc0b1c Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Mon, 6 Jul 2020 22:13:30 +0800 Subject: [PATCH] x86/mce: Fix all mce notifiers to update the mce->kflags bitmask fix #29415191 commit 23ba710a0864108910c7531dc4c73ef65eca5568 upstream If the handler took any action to log or deal with the error, set a bit in mce->kflags so that the default handler on the end of the machine check chain can see what has been done. Get rid of NOTIFY_STOP returns. Make the EDAC and dev-mcelog handlers skip over errors already processed by CEC. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Tested-by: Tony Luck Link: https://lkml.kernel.org/r/20200214222720.13168-5-tony.luck@intel.com Signed-off-by: Youquan Song Signed-off-by: Wetp Zhang Reviewed-by: Artie Ding --- arch/x86/kernel/cpu/mcheck/dev-mcelog.c | 5 +++++ arch/x86/kernel/cpu/mcheck/mce.c | 8 ++++++-- drivers/acpi/acpi_extlog.c | 5 +++-- drivers/acpi/nfit/mce.c | 1 + drivers/edac/i7core_edac.c | 5 +++-- drivers/edac/mce_amd.c | 6 +++++- drivers/edac/pnd2_edac.c | 5 +++-- drivers/edac/sb_edac.c | 5 ++++- drivers/edac/skx_common.c | 4 ++++ 9 files changed, 34 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c index 97685a0c3175..d43576096f68 100644 --- a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c +++ b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c @@ -47,6 +47,9 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val, struct mce *mce = (struct mce *)data; unsigned int entry; + if (mce->kflags & MCE_HANDLED_CEC) + return NOTIFY_DONE; + mutex_lock(&mce_chrdev_read_mutex); entry = mcelog.next; @@ -64,6 +67,7 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val, memcpy(mcelog.entry + entry, mce, sizeof(struct mce)); mcelog.entry[entry].finished = 1; + mcelog.entry[entry].kflags = 0; /* wake processes polling /dev/mcelog */ wake_up_interruptible(&mce_chrdev_wait); @@ -71,6 +75,7 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val, unlock: mutex_unlock(&mce_chrdev_read_mutex); + mce->kflags |= MCE_HANDLED_MCELOG; return NOTIFY_OK; } diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index ec5d86910d47..f0484ff49652 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -568,8 +568,10 @@ static bool cec_add_mce(struct mce *m) if (mce_is_memory_error(m) && mce_is_correctable(m) && mce_usable_address(m)) - if (!cec_add_elem(m->addr >> PAGE_SHIFT)) + if (!cec_add_elem(m->addr >> PAGE_SHIFT)) { + m->kflags |= MCE_HANDLED_CEC; return true; + } return false; } @@ -611,8 +613,10 @@ static int srao_decode_notifier(struct notifier_block *nb, unsigned long val, if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) { pfn = mce->addr >> PAGE_SHIFT; - if (!memory_failure(pfn, 0)) + if (!memory_failure(pfn, 0)) { set_mce_nospec(pfn, whole_page(mce)); + mce->kflags |= MCE_HANDLED_UC; + } } return NOTIFY_OK; diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c index 560fdae8cc59..90b36ff13259 100644 --- a/drivers/acpi/acpi_extlog.c +++ b/drivers/acpi/acpi_extlog.c @@ -147,7 +147,7 @@ static int extlog_print(struct notifier_block *nb, unsigned long val, static u32 err_seq; estatus = extlog_elog_entry_check(cpu, bank); - if (estatus == NULL) + if (estatus == NULL || (mce->kflags & MCE_HANDLED_CEC)) return NOTIFY_DONE; memcpy(elog_buf, (void *)estatus, ELOG_ENTRY_LEN); @@ -177,7 +177,8 @@ static int extlog_print(struct notifier_block *nb, unsigned long val, } out: - return NOTIFY_STOP; + mce->kflags |= MCE_HANDLED_EXTLOG; + return NOTIFY_OK; } static bool __init extlog_get_l1addr(void) diff --git a/drivers/acpi/nfit/mce.c b/drivers/acpi/nfit/mce.c index d6c1b10f6c25..455ee7e018d7 100644 --- a/drivers/acpi/nfit/mce.c +++ b/drivers/acpi/nfit/mce.c @@ -84,6 +84,7 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val, */ acpi_nfit_ars_rescan(acpi_desc, 0); } + mce->kflags |= MCE_HANDLED_NFIT; break; } diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 4a3300c2da33..5a4f47a501d9 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -1819,7 +1819,7 @@ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val, struct i7core_pvt *pvt; i7_dev = get_i7core_dev(mce->socketid); - if (!i7_dev) + if (!i7_dev || (mce->kflags & MCE_HANDLED_CEC)) return NOTIFY_DONE; mci = i7_dev->mci; @@ -1839,7 +1839,8 @@ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val, i7core_check_error(mci, mce); /* Advise mcelog that the errors were handled */ - return NOTIFY_STOP; + mce->kflags |= MCE_HANDLED_EDAC; + return NOTIFY_OK; } static struct notifier_block i7_mce_dec = { diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index c1aa0064b1d9..96adc7607652 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -1042,6 +1042,9 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) if (amd_filter_mce(m)) return NOTIFY_STOP; + if (m->kflags & MCE_HANDLED_CEC) + return NOTIFY_DONE; + pr_emerg(HW_ERR "%s\n", decode_error_status(m)); pr_emerg(HW_ERR "CPU:%d (%x:%x:%x) MC%d_STATUS[%s|%s|%s|%s|%s", @@ -1141,7 +1144,8 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) err_code: amd_decode_err_code(m->status & 0xffff); - return NOTIFY_STOP; + m->kflags |= MCE_HANDLED_EDAC; + return NOTIFY_OK; } static struct notifier_block amd_mce_dec_nb = { diff --git a/drivers/edac/pnd2_edac.c b/drivers/edac/pnd2_edac.c index 0153c730750e..fbfb6cb768cc 100644 --- a/drivers/edac/pnd2_edac.c +++ b/drivers/edac/pnd2_edac.c @@ -1408,7 +1408,7 @@ static int pnd2_mce_check_error(struct notifier_block *nb, unsigned long val, vo return NOTIFY_DONE; mci = pnd2_mci; - if (!mci) + if (!mci || (mce->kflags & MCE_HANDLED_CEC)) return NOTIFY_DONE; /* @@ -1437,7 +1437,8 @@ static int pnd2_mce_check_error(struct notifier_block *nb, unsigned long val, vo pnd2_mce_output_error(mci, mce, &daddr); /* Advice mcelog that the error were handled */ - return NOTIFY_STOP; + mce->kflags |= MCE_HANDLED_EDAC; + return NOTIFY_OK; } static struct notifier_block pnd2_mce_dec = { diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 53074ad361e5..757eb83f913e 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -3042,6 +3042,8 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val, if (edac_get_report_status() == EDAC_REPORTING_DISABLED) return NOTIFY_DONE; + if (mce->kflags & MCE_HANDLED_CEC) + return NOTIFY_DONE; /* * Just let mcelog handle it if the error is @@ -3089,7 +3091,8 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val, sbridge_mce_output_error(mci, mce); /* Advice mcelog that the error were handled */ - return NOTIFY_STOP; + mce->kflags |= MCE_HANDLED_EDAC; + return NOTIFY_OK; } static struct notifier_block sbridge_mce_dec = { diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index 575342ac2151..2b081cb05424 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -579,6 +579,9 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val, if (edac_get_report_status() == EDAC_REPORTING_DISABLED) return NOTIFY_DONE; + if (mce->kflags & MCE_HANDLED_CEC) + return NOTIFY_DONE; + /* ignore unless this is memory related with an address */ if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV)) return NOTIFY_DONE; @@ -621,6 +624,7 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val, skx_mce_output_error(mci, mce, &res); + mce->kflags |= MCE_HANDLED_EDAC; return NOTIFY_DONE; } -- GitLab