diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 982dfc3679ad3d9cd741d75f8799bc1b8b8c5ef7..dfaa4de1dbb4f8a5ce1bcf3d0e4f73ca24595be3 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -140,7 +140,7 @@ struct mce_vendor_flags { extern struct mce_vendor_flags mce_flags; extern struct mca_config mca_cfg; -extern void mce_register_decode_chain(struct notifier_block *nb); +extern void mce_register_decode_chain(struct notifier_block *nb, bool drain); extern void mce_unregister_decode_chain(struct notifier_block *nb); #include diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 456f8d7b8fd38702d902f1f96825c760b983c1a3..82603690b65ced5b10a7c85de49d86a8ee56bd76 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -114,6 +114,7 @@ static struct work_struct mce_work; static struct irq_work mce_irq_work; static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs); +static int mce_usable_address(struct mce *m); /* * CPU/chipset specific EDAC code can register a notifier call here to print @@ -234,11 +235,18 @@ static void drain_mcelog_buffer(void) } while (next != prev); } +static struct notifier_block mce_srao_nb; -void mce_register_decode_chain(struct notifier_block *nb) +void mce_register_decode_chain(struct notifier_block *nb, bool drain) { + /* Ensure SRAO notifier has the highest priority in the decode chain. */ + if (nb != &mce_srao_nb && nb->priority == INT_MAX) + nb->priority -= 1; + atomic_notifier_chain_register(&x86_mce_decoder_chain, nb); - drain_mcelog_buffer(); + + if (drain) + drain_mcelog_buffer(); } EXPORT_SYMBOL_GPL(mce_register_decode_chain); @@ -462,61 +470,6 @@ static inline void mce_gather_info(struct mce *m, struct pt_regs *regs) } } -/* - * Simple lockless ring to communicate PFNs from the exception handler with the - * process context work function. This is vastly simplified because there's - * only a single reader and a single writer. - */ -#define MCE_RING_SIZE 16 /* we use one entry less */ - -struct mce_ring { - unsigned short start; - unsigned short end; - unsigned long ring[MCE_RING_SIZE]; -}; -static DEFINE_PER_CPU(struct mce_ring, mce_ring); - -/* Runs with CPU affinity in workqueue */ -static int mce_ring_empty(void) -{ - struct mce_ring *r = this_cpu_ptr(&mce_ring); - - return r->start == r->end; -} - -static int mce_ring_get(unsigned long *pfn) -{ - struct mce_ring *r; - int ret = 0; - - *pfn = 0; - get_cpu(); - r = this_cpu_ptr(&mce_ring); - if (r->start == r->end) - goto out; - *pfn = r->ring[r->start]; - r->start = (r->start + 1) % MCE_RING_SIZE; - ret = 1; -out: - put_cpu(); - return ret; -} - -/* Always runs in MCE context with preempt off */ -static int mce_ring_add(unsigned long pfn) -{ - struct mce_ring *r = this_cpu_ptr(&mce_ring); - unsigned next; - - next = (r->end + 1) % MCE_RING_SIZE; - if (next == r->start) - return -1; - r->ring[r->end] = pfn; - wmb(); - r->end = next; - return 0; -} - int mce_available(struct cpuinfo_x86 *c) { if (mca_cfg.disabled) @@ -526,7 +479,7 @@ int mce_available(struct cpuinfo_x86 *c) static void mce_schedule_work(void) { - if (!mce_ring_empty()) + if (!mce_gen_pool_empty() && keventd_up()) schedule_work(&mce_work); } @@ -553,6 +506,27 @@ static void mce_report_event(struct pt_regs *regs) irq_work_queue(&mce_irq_work); } +static int srao_decode_notifier(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct mce *mce = (struct mce *)data; + unsigned long pfn; + + if (!mce) + return NOTIFY_DONE; + + if (mce->usable_addr && (mce->severity == MCE_AO_SEVERITY)) { + pfn = mce->addr >> PAGE_SHIFT; + memory_failure(pfn, MCE_VECTOR, 0); + } + + return NOTIFY_OK; +} +static struct notifier_block mce_srao_nb = { + .notifier_call = srao_decode_notifier, + .priority = INT_MAX, +}; + /* * Read ADDR and MISC registers. */ @@ -671,8 +645,11 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) */ if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m)) { if (m.status & MCI_STATUS_ADDRV) { - mce_ring_add(m.addr >> PAGE_SHIFT); - mce_schedule_work(); + m.severity = severity; + m.usable_addr = mce_usable_address(&m); + + if (!mce_gen_pool_add(&m)) + mce_schedule_work(); } } @@ -1142,15 +1119,10 @@ void do_machine_check(struct pt_regs *regs, long error_code) mce_read_aux(&m, i); - /* - * Action optional error. Queue address for later processing. - * When the ring overflows we just ignore the AO error. - * RED-PEN add some logging mechanism when - * usable_address or mce_add_ring fails. - * RED-PEN don't ignore overflow for mca_cfg.tolerant == 0 - */ - if (severity == MCE_AO_SEVERITY && mce_usable_address(&m)) - mce_ring_add(m.addr >> PAGE_SHIFT); + /* assuming valid severity level != 0 */ + m.severity = severity; + m.usable_addr = mce_usable_address(&m); + mce_gen_pool_add(&m); mce_log(&m); @@ -1246,14 +1218,11 @@ int memory_failure(unsigned long pfn, int vector, int flags) /* * Action optional processing happens here (picking up * from the list of faulting pages that do_machine_check() - * placed into the "ring"). + * placed into the genpool). */ static void mce_process_work(struct work_struct *dummy) { - unsigned long pfn; - - while (mce_ring_get(&pfn)) - memory_failure(pfn, MCE_VECTOR, 0); + mce_gen_pool_process(); } #ifdef CONFIG_X86_MCE_INTEL @@ -2059,6 +2028,7 @@ __setup("mce", mcheck_enable); int __init mcheck_init(void) { mcheck_intel_therm_init(); + mce_register_decode_chain(&mce_srao_nb, false); mcheck_vendor_init_severity(); INIT_WORK(&mce_work, mce_process_work); @@ -2597,5 +2567,20 @@ static int __init mcheck_debugfs_init(void) return 0; } -late_initcall(mcheck_debugfs_init); +#else +static int __init mcheck_debugfs_init(void) { return -EINVAL; } #endif + +static int __init mcheck_late_init(void) +{ + mcheck_debugfs_init(); + + /* + * Flush out everything that has been logged during early boot, now that + * everything has been initialized (workqueues, decoders, ...). + */ + mce_schedule_work(); + + return 0; +} +late_initcall(mcheck_late_init); diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c index b3842ffc19ba20a210bec8dd8dc5f56cf9119de9..07e012e74c1bbe4dab50adcb7d33b58dc1b4ff45 100644 --- a/drivers/acpi/acpi_extlog.c +++ b/drivers/acpi/acpi_extlog.c @@ -286,7 +286,7 @@ static int __init extlog_init(void) */ old_edac_report_status = get_edac_report_status(); set_edac_report_status(EDAC_REPORTING_DISABLED); - mce_register_decode_chain(&extlog_mce_dec); + mce_register_decode_chain(&extlog_mce_dec, true); /* enable OS to be involved to take over management from BIOS */ ((struct extlog_l1_head *)extlog_l1_addr)->flags |= FLAG_OS_OPTIN; diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 01087a38da226d08bd7e08da5a183885fbf420b1..13d77f4a892c7244dcaac13ec3ddce1c95759325 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -2424,7 +2424,7 @@ static int __init i7core_init(void) pci_rc = pci_register_driver(&i7core_driver); if (pci_rc >= 0) { - mce_register_decode_chain(&i7_mce_dec); + mce_register_decode_chain(&i7_mce_dec, true); return 0; } diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index 58586d59bf8ed2ff2b7a7abf20288105da43f2d3..aca31a2370732cd74cc6cee2c4c925c180fc02ba 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -895,7 +895,7 @@ static int __init mce_amd_init(void) pr_info("MCE: In-kernel MCE decoding enabled.\n"); - mce_register_decode_chain(&amd_mce_dec_nb); + mce_register_decode_chain(&amd_mce_dec_nb, true); return 0; } diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index ca7831168298a444d1b2dd55fbdec48a287b5ab1..5780e26c3e5845c665bd684bb6f833a252436830 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -2591,7 +2591,7 @@ static int __init sbridge_init(void) pci_rc = pci_register_driver(&sbridge_driver); if (pci_rc >= 0) { - mce_register_decode_chain(&sbridge_mce_dec); + mce_register_decode_chain(&sbridge_mce_dec, true); if (get_edac_report_status() == EDAC_REPORTING_DISABLED) sbridge_printk(KERN_WARNING, "Loading driver, error reporting disabled.\n"); return 0;