提交 fd4cf79f 编写于 作者: C Chen, Gong 提交者: Ingo Molnar

x86/mce: Remove the MCE ring for Action Optional errors

Use unified genpool to save Action Optional error events and put
Action Optional error handling in the same notification chain as
MCE error decoding.
Signed-off-by: NChen, Gong <gong.chen@linux.intel.com>
[ Fold in subsequent patch from Boris for early boot logging. ]
Signed-off-by: NTony Luck <tony.luck@intel.com>
[ Correct a lot. ]
Signed-off-by: NBorislav Petkov <bp@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1439396985-12812-5-git-send-email-bp@alien8.deSigned-off-by: NIngo Molnar <mingo@kernel.org>
上级 061120ae
......@@ -140,7 +140,7 @@ struct mce_vendor_flags {
extern struct mce_vendor_flags mce_flags;
extern struct mca_config mca_cfg;
extern void mce_register_decode_chain(struct notifier_block *nb);
extern void mce_register_decode_chain(struct notifier_block *nb, bool drain);
extern void mce_unregister_decode_chain(struct notifier_block *nb);
#include <linux/percpu.h>
......
......@@ -114,6 +114,7 @@ static struct work_struct mce_work;
static struct irq_work mce_irq_work;
static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
static int mce_usable_address(struct mce *m);
/*
* CPU/chipset specific EDAC code can register a notifier call here to print
......@@ -234,10 +235,17 @@ static void drain_mcelog_buffer(void)
} while (next != prev);
}
static struct notifier_block mce_srao_nb;
void mce_register_decode_chain(struct notifier_block *nb)
void mce_register_decode_chain(struct notifier_block *nb, bool drain)
{
/* Ensure SRAO notifier has the highest priority in the decode chain. */
if (nb != &mce_srao_nb && nb->priority == INT_MAX)
nb->priority -= 1;
atomic_notifier_chain_register(&x86_mce_decoder_chain, nb);
if (drain)
drain_mcelog_buffer();
}
EXPORT_SYMBOL_GPL(mce_register_decode_chain);
......@@ -462,61 +470,6 @@ static inline void mce_gather_info(struct mce *m, struct pt_regs *regs)
}
}
/*
* Simple lockless ring to communicate PFNs from the exception handler with the
* process context work function. This is vastly simplified because there's
* only a single reader and a single writer.
*/
#define MCE_RING_SIZE 16 /* we use one entry less */
struct mce_ring {
unsigned short start;
unsigned short end;
unsigned long ring[MCE_RING_SIZE];
};
static DEFINE_PER_CPU(struct mce_ring, mce_ring);
/* Runs with CPU affinity in workqueue */
static int mce_ring_empty(void)
{
struct mce_ring *r = this_cpu_ptr(&mce_ring);
return r->start == r->end;
}
static int mce_ring_get(unsigned long *pfn)
{
struct mce_ring *r;
int ret = 0;
*pfn = 0;
get_cpu();
r = this_cpu_ptr(&mce_ring);
if (r->start == r->end)
goto out;
*pfn = r->ring[r->start];
r->start = (r->start + 1) % MCE_RING_SIZE;
ret = 1;
out:
put_cpu();
return ret;
}
/* Always runs in MCE context with preempt off */
static int mce_ring_add(unsigned long pfn)
{
struct mce_ring *r = this_cpu_ptr(&mce_ring);
unsigned next;
next = (r->end + 1) % MCE_RING_SIZE;
if (next == r->start)
return -1;
r->ring[r->end] = pfn;
wmb();
r->end = next;
return 0;
}
int mce_available(struct cpuinfo_x86 *c)
{
if (mca_cfg.disabled)
......@@ -526,7 +479,7 @@ int mce_available(struct cpuinfo_x86 *c)
static void mce_schedule_work(void)
{
if (!mce_ring_empty())
if (!mce_gen_pool_empty() && keventd_up())
schedule_work(&mce_work);
}
......@@ -553,6 +506,27 @@ static void mce_report_event(struct pt_regs *regs)
irq_work_queue(&mce_irq_work);
}
static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,
void *data)
{
struct mce *mce = (struct mce *)data;
unsigned long pfn;
if (!mce)
return NOTIFY_DONE;
if (mce->usable_addr && (mce->severity == MCE_AO_SEVERITY)) {
pfn = mce->addr >> PAGE_SHIFT;
memory_failure(pfn, MCE_VECTOR, 0);
}
return NOTIFY_OK;
}
static struct notifier_block mce_srao_nb = {
.notifier_call = srao_decode_notifier,
.priority = INT_MAX,
};
/*
* Read ADDR and MISC registers.
*/
......@@ -671,7 +645,10 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
*/
if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m)) {
if (m.status & MCI_STATUS_ADDRV) {
mce_ring_add(m.addr >> PAGE_SHIFT);
m.severity = severity;
m.usable_addr = mce_usable_address(&m);
if (!mce_gen_pool_add(&m))
mce_schedule_work();
}
}
......@@ -1142,15 +1119,10 @@ void do_machine_check(struct pt_regs *regs, long error_code)
mce_read_aux(&m, i);
/*
* Action optional error. Queue address for later processing.
* When the ring overflows we just ignore the AO error.
* RED-PEN add some logging mechanism when
* usable_address or mce_add_ring fails.
* RED-PEN don't ignore overflow for mca_cfg.tolerant == 0
*/
if (severity == MCE_AO_SEVERITY && mce_usable_address(&m))
mce_ring_add(m.addr >> PAGE_SHIFT);
/* assuming valid severity level != 0 */
m.severity = severity;
m.usable_addr = mce_usable_address(&m);
mce_gen_pool_add(&m);
mce_log(&m);
......@@ -1246,14 +1218,11 @@ int memory_failure(unsigned long pfn, int vector, int flags)
/*
* Action optional processing happens here (picking up
* from the list of faulting pages that do_machine_check()
* placed into the "ring").
* placed into the genpool).
*/
static void mce_process_work(struct work_struct *dummy)
{
unsigned long pfn;
while (mce_ring_get(&pfn))
memory_failure(pfn, MCE_VECTOR, 0);
mce_gen_pool_process();
}
#ifdef CONFIG_X86_MCE_INTEL
......@@ -2059,6 +2028,7 @@ __setup("mce", mcheck_enable);
int __init mcheck_init(void)
{
mcheck_intel_therm_init();
mce_register_decode_chain(&mce_srao_nb, false);
mcheck_vendor_init_severity();
INIT_WORK(&mce_work, mce_process_work);
......@@ -2597,5 +2567,20 @@ static int __init mcheck_debugfs_init(void)
return 0;
}
late_initcall(mcheck_debugfs_init);
#else
static int __init mcheck_debugfs_init(void) { return -EINVAL; }
#endif
static int __init mcheck_late_init(void)
{
mcheck_debugfs_init();
/*
* Flush out everything that has been logged during early boot, now that
* everything has been initialized (workqueues, decoders, ...).
*/
mce_schedule_work();
return 0;
}
late_initcall(mcheck_late_init);
......@@ -286,7 +286,7 @@ static int __init extlog_init(void)
*/
old_edac_report_status = get_edac_report_status();
set_edac_report_status(EDAC_REPORTING_DISABLED);
mce_register_decode_chain(&extlog_mce_dec);
mce_register_decode_chain(&extlog_mce_dec, true);
/* enable OS to be involved to take over management from BIOS */
((struct extlog_l1_head *)extlog_l1_addr)->flags |= FLAG_OS_OPTIN;
......
......@@ -2424,7 +2424,7 @@ static int __init i7core_init(void)
pci_rc = pci_register_driver(&i7core_driver);
if (pci_rc >= 0) {
mce_register_decode_chain(&i7_mce_dec);
mce_register_decode_chain(&i7_mce_dec, true);
return 0;
}
......
......@@ -895,7 +895,7 @@ static int __init mce_amd_init(void)
pr_info("MCE: In-kernel MCE decoding enabled.\n");
mce_register_decode_chain(&amd_mce_dec_nb);
mce_register_decode_chain(&amd_mce_dec_nb, true);
return 0;
}
......
......@@ -2591,7 +2591,7 @@ static int __init sbridge_init(void)
pci_rc = pci_register_driver(&sbridge_driver);
if (pci_rc >= 0) {
mce_register_decode_chain(&sbridge_mce_dec);
mce_register_decode_chain(&sbridge_mce_dec, true);
if (get_edac_report_status() == EDAC_REPORTING_DISABLED)
sbridge_printk(KERN_WARNING, "Loading driver, error reporting disabled.\n");
return 0;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册