non-fatal.c 2.1 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3
/*
 * Non Fatal Machine Check Exception Reporting
 *
D
Dave Jones 已提交
4
 * (C) Copyright 2002 Dave Jones. <davej@redhat.com>
L
Linus Torvalds 已提交
5 6 7 8 9
 *
 * This file contains routines to check for non-fatal MCEs every 15s
 *
 */
#include <linux/interrupt.h>
I
Ingo Molnar 已提交
10 11 12
#include <linux/workqueue.h>
#include <linux/jiffies.h>
#include <linux/kernel.h>
L
Linus Torvalds 已提交
13
#include <linux/module.h>
I
Ingo Molnar 已提交
14 15 16
#include <linux/types.h>
#include <linux/init.h>
#include <linux/smp.h>
L
Linus Torvalds 已提交
17

18
#include <asm/processor.h>
L
Linus Torvalds 已提交
19
#include <asm/system.h>
H
Hidetoshi Seto 已提交
20
#include <asm/mce.h>
L
Linus Torvalds 已提交
21 22
#include <asm/msr.h>

I
Ingo Molnar 已提交
23
static int		firstbank;
L
Linus Torvalds 已提交
24

I
Ingo Molnar 已提交
25
#define MCE_RATE	(15*HZ)	/* timer rate is 15s */
L
Linus Torvalds 已提交
26

27
static void mce_checkregs(void *info)
L
Linus Torvalds 已提交
28 29 30 31
{
	u32 low, high;
	int i;

32 33
	for (i = firstbank; i < nr_mce_banks; i++) {
		rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
L
Linus Torvalds 已提交
34

I
Ingo Molnar 已提交
35 36 37 38 39
		if (!(high & (1<<31)))
			continue;

		printk(KERN_INFO "MCE: The hardware reports a non fatal, "
			"correctable incident occurred on CPU %d.\n",
L
Linus Torvalds 已提交
40
				smp_processor_id());
I
Ingo Molnar 已提交
41 42 43 44 45 46 47 48 49 50 51 52

		printk(KERN_INFO "Bank %d: %08x%08x\n", i, high, low);

		/*
		 * Scrub the error so we don't pick it up in MCE_RATE
		 * seconds time:
		 */
		wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);

		/* Serialize: */
		wmb();
		add_taint(TAINT_MACHINE_CHECK);
L
Linus Torvalds 已提交
53 54 55
	}
}

D
David Howells 已提交
56 57
static void mce_work_fn(struct work_struct *work);
static DECLARE_DELAYED_WORK(mce_work, mce_work_fn);
L
Linus Torvalds 已提交
58

D
David Howells 已提交
59
static void mce_work_fn(struct work_struct *work)
60
{
61
	on_each_cpu(mce_checkregs, NULL, 1);
62
	schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE));
63
}
L
Linus Torvalds 已提交
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78

static int __init init_nonfatal_mce_checker(void)
{
	struct cpuinfo_x86 *c = &boot_cpu_data;

	/* Check for MCE support */
	if (!cpu_has(c, X86_FEATURE_MCE))
		return -ENODEV;

	/* Check for PPro style MCA */
	if (!cpu_has(c, X86_FEATURE_MCA))
		return -ENODEV;

	/* Some Athlons misbehave when we frob bank 0 */
	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
I
Ingo Molnar 已提交
79 80
						boot_cpu_data.x86 == 6)
		firstbank = 1;
L
Linus Torvalds 已提交
81
	else
I
Ingo Molnar 已提交
82
		firstbank = 0;
L
Linus Torvalds 已提交
83 84 85 86

	/*
	 * Check for non-fatal errors every MCE_RATE s
	 */
87
	schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE));
L
Linus Torvalds 已提交
88
	printk(KERN_INFO "Machine check exception polling timer started.\n");
I
Ingo Molnar 已提交
89

L
Linus Torvalds 已提交
90 91 92 93 94
	return 0;
}
module_init(init_nonfatal_mce_checker);

MODULE_LICENSE("GPL");