mce-severity.c 11.0 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12
/*
 * MCE grading rules.
 * Copyright 2008, 2009 Intel Corporation.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; version 2
 * of the License.
 *
 * Author: Andi Kleen
 */
#include <linux/kernel.h>
13 14 15
#include <linux/seq_file.h>
#include <linux/init.h>
#include <linux/debugfs.h>
16
#include <asm/mce.h>
17
#include <linux/uaccess.h>
18 19 20 21 22 23 24 25

#include "mce-internal.h"

/*
 * Grade an mce by severity. In general the most severe ones are processed
 * first. Since there are quite a lot of combinations test the bits in a
 * table-driven way. The rules are simply processed in order, first
 * match wins.
A
Andi Kleen 已提交
26 27 28 29 30
 *
 * Note this is only used for machine check exceptions, the corrected
 * errors use much simpler rules. The exceptions still check for the corrected
 * errors, but only to leave them alone for the CMCI handler (except for
 * panic situations)
31 32
 */

33
enum context { IN_KERNEL = 1, IN_USER = 2, IN_KERNEL_RECOV = 3 };
A
Andi Kleen 已提交
34
enum ser { SER_REQUIRED = 1, NO_SER = 2 };
35
enum exception { EXCP_CONTEXT = 1, NO_EXCP = 2 };
A
Andi Kleen 已提交
36

37 38 39 40 41 42
static struct severity {
	u64 mask;
	u64 result;
	unsigned char sev;
	unsigned char mcgmask;
	unsigned char mcgres;
A
Andi Kleen 已提交
43 44
	unsigned char ser;
	unsigned char context;
45
	unsigned char excp;
46
	unsigned char covered;
47 48
	char *msg;
} severities[] = {
49 50 51
#define MCESEV(s, m, c...) { .sev = MCE_ ## s ## _SEVERITY, .msg = m, ## c }
#define  KERNEL		.context = IN_KERNEL
#define  USER		.context = IN_USER
52
#define  KERNEL_RECOV	.context = IN_KERNEL_RECOV
53 54
#define  SER		.ser = SER_REQUIRED
#define  NOSER		.ser = NO_SER
55 56
#define  EXCP		.excp = EXCP_CONTEXT
#define  NOEXCP		.excp = NO_EXCP
57 58 59 60
#define  BITCLR(x)	.mask = x, .result = 0
#define  BITSET(x)	.mask = x, .result = x
#define  MCGMASK(x, y)	.mcgmask = x, .mcgres = y
#define  MASK(x, y)	.mask = x, .result = y
A
Andi Kleen 已提交
61
#define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S)
62
#define MCI_UC_AR (MCI_STATUS_UC|MCI_STATUS_AR)
A
Andi Kleen 已提交
63
#define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR)
64
#define	MCI_ADDR (MCI_STATUS_ADDRV|MCI_STATUS_MISCV)
A
Andi Kleen 已提交
65

66 67 68
	MCESEV(
		NO, "Invalid",
		BITCLR(MCI_STATUS_VAL)
69
		),
70 71
	MCESEV(
		NO, "Not enabled",
72
		EXCP, BITCLR(MCI_STATUS_EN)
73
		),
74 75 76
	MCESEV(
		PANIC, "Processor context corrupt",
		BITSET(MCI_STATUS_PCC)
77
		),
A
Andi Kleen 已提交
78
	/* When MCIP is not set something is very confused */
79 80
	MCESEV(
		PANIC, "MCIP not set in MCA handler",
81
		EXCP, MCGMASK(MCG_STATUS_MCIP, 0)
82
		),
A
Andi Kleen 已提交
83
	/* Neither return not error IP -- no chance to recover -> PANIC */
84 85
	MCESEV(
		PANIC, "Neither restart nor error IP",
86
		EXCP, MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0)
87
		),
88
	MCESEV(
89
		PANIC, "In kernel and no restart IP",
90 91
		EXCP, KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
		),
92 93 94 95
	MCESEV(
		PANIC, "In kernel and no restart IP",
		EXCP, KERNEL_RECOV, MCGMASK(MCG_STATUS_RIPV, 0)
		),
96 97 98
	MCESEV(
		DEFERRED, "Deferred error",
		NOSER, MASK(MCI_STATUS_UC|MCI_STATUS_DEFERRED|MCI_STATUS_POISON, MCI_STATUS_DEFERRED)
99
		),
100
	MCESEV(
101
		KEEP, "Corrected error",
102
		NOSER, BITCLR(MCI_STATUS_UC)
103
		),
A
Andi Kleen 已提交
104

105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
	/*
	 * known AO MCACODs reported via MCE or CMC:
	 *
	 * SRAO could be signaled either via a machine check exception or
	 * CMCI with the corresponding bit S 1 or 0. So we don't need to
	 * check bit S for SRAO.
	 */
	MCESEV(
		AO, "Action optional: memory scrubbing error",
		SER, MASK(MCI_STATUS_OVER|MCI_UC_AR|MCACOD_SCRUBMSK, MCI_STATUS_UC|MCACOD_SCRUB)
		),
	MCESEV(
		AO, "Action optional: last level cache writeback error",
		SER, MASK(MCI_STATUS_OVER|MCI_UC_AR|MCACOD, MCI_STATUS_UC|MCACOD_L3WB)
		),

A
Andi Kleen 已提交
121
	/* ignore OVER for UCNA */
122
	MCESEV(
123
		UCNA, "Uncorrected no action required",
124
		SER, MASK(MCI_UC_SAR, MCI_STATUS_UC)
125
		),
126
	MCESEV(
127
		PANIC, "Illegal combination (UCNA with AR=1)",
128 129
		SER,
		MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR)
130
		),
131
	MCESEV(
132
		KEEP, "Non signalled machine check",
133
		SER, BITCLR(MCI_STATUS_S)
134
		),
A
Andi Kleen 已提交
135

136
	MCESEV(
137
		PANIC, "Action required with lost events",
138
		SER, BITSET(MCI_STATUS_OVER|MCI_UC_SAR)
139
		),
140 141 142 143

	/* known AR MCACODs: */
#ifdef	CONFIG_MEMORY_FAILURE
	MCESEV(
144
		KEEP, "Action required but unaffected thread is continuable",
145 146
		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR, MCI_UC_SAR|MCI_ADDR),
		MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, MCG_STATUS_RIPV)
147
		),
148 149 150 151 152
	MCESEV(
		AR, "Action required: data load in error recoverable area of kernel",
		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
		KERNEL_RECOV
		),
153
	MCESEV(
154
		AR, "Action required: data load error in a user process",
155
		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
156 157
		USER
		),
158
	MCESEV(
159
		AR, "Action required: instruction fetch error in a user process",
160 161 162
		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
		USER
		),
163 164 165 166 167
	MCESEV(
		PANIC, "Data load in unrecoverable area of kernel",
		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
		KERNEL
		),
168 169 170 171 172
	MCESEV(
		PANIC, "Instruction fetch error in kernel",
		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
		KERNEL
		),
173
#endif
174
	MCESEV(
175
		PANIC, "Action required: unknown MCACOD",
176
		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR)
177
		),
A
Andi Kleen 已提交
178

179
	MCESEV(
180
		SOME, "Action optional: unknown MCACOD",
181
		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S)
182
		),
183
	MCESEV(
184
		SOME, "Action optional with lost events",
185
		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_S)
186
		),
187 188 189

	MCESEV(
		PANIC, "Overflowed uncorrected",
190
		BITSET(MCI_STATUS_OVER|MCI_STATUS_UC)
191
		),
192 193 194
	MCESEV(
		UC, "Uncorrected",
		BITSET(MCI_STATUS_UC)
195
		),
196 197 198
	MCESEV(
		SOME, "No match",
		BITSET(0)
199
		)	/* always matches. keep at end */
200 201
};

202 203 204
#define mc_recoverable(mcg) (((mcg) & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) == \
				(MCG_STATUS_RIPV|MCG_STATUS_EIPV))

A
Andi Kleen 已提交
205
/*
206 207 208 209 210 211 212 213 214
 * If mcgstatus indicated that ip/cs on the stack were
 * no good, then "m->cs" will be zero and we will have
 * to assume the worst case (IN_KERNEL) as we actually
 * have no idea what we were executing when the machine
 * check hit.
 * If we do have a good "m->cs" (or a faked one in the
 * case we were executing in VM86 mode) we can use it to
 * distinguish an exception taken in user from from one
 * taken in the kernel.
A
Andi Kleen 已提交
215 216 217
 */
static int error_context(struct mce *m)
{
218 219 220 221 222
	if ((m->cs & 3) == 3)
		return IN_USER;
	if (mc_recoverable(m->mcgstatus) && ex_has_fault_handler(m->ip))
		return IN_KERNEL_RECOV;
	return IN_KERNEL;
A
Andi Kleen 已提交
223 224
}

225
static int mce_severity_amd_smca(struct mce *m, enum context err_ctx)
226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251
{
	u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
	u32 low, high;

	/*
	 * We need to look at the following bits:
	 * - "succor" bit (data poisoning support), and
	 * - TCC bit (Task Context Corrupt)
	 * in MCi_STATUS to determine error severity.
	 */
	if (!mce_flags.succor)
		return MCE_PANIC_SEVERITY;

	if (rdmsr_safe(addr, &low, &high))
		return MCE_PANIC_SEVERITY;

	/* TCC (Task context corrupt). If set and if IN_KERNEL, panic. */
	if ((low & MCI_CONFIG_MCAX) &&
	    (m->status & MCI_STATUS_TCC) &&
	    (err_ctx == IN_KERNEL))
		return MCE_PANIC_SEVERITY;

	 /* ...otherwise invoke hwpoison handler. */
	return MCE_AR_SEVERITY;
}

252 253 254 255
/*
 * See AMD Error Scope Hierarchy table in a newer BKDG. For example
 * 49125_15h_Models_30h-3Fh_BKDG.pdf, section "RAS Features"
 */
256
static int mce_severity_amd(struct mce *m, int tolerant, char **msg, bool is_excp)
257
{
258 259
	enum context ctx = error_context(m);

260 261 262 263 264 265
	/* Processor Context Corrupt, no need to fumble too much, die! */
	if (m->status & MCI_STATUS_PCC)
		return MCE_PANIC_SEVERITY;

	if (m->status & MCI_STATUS_UC) {

266 267 268
		if (ctx == IN_KERNEL)
			return MCE_PANIC_SEVERITY;

269 270 271 272 273 274 275
		/*
		 * On older systems where overflow_recov flag is not present, we
		 * should simply panic if an error overflow occurs. If
		 * overflow_recov flag is present and set, then software can try
		 * to at least kill process to prolong system operation.
		 */
		if (mce_flags.overflow_recov) {
276 277 278
			if (mce_flags.smca)
				return mce_severity_amd_smca(m, ctx);

279 280
			/* kill current process */
			return MCE_AR_SEVERITY;
281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307
		} else {
			/* at least one error was not logged */
			if (m->status & MCI_STATUS_OVER)
				return MCE_PANIC_SEVERITY;
		}

		/*
		 * For any other case, return MCE_UC_SEVERITY so that we log the
		 * error and exit #MC handler.
		 */
		return MCE_UC_SEVERITY;
	}

	/*
	 * deferred error: poll handler catches these and adds to mce_ring so
	 * memory-failure can take recovery actions.
	 */
	if (m->status & MCI_STATUS_DEFERRED)
		return MCE_DEFERRED_SEVERITY;

	/*
	 * corrected error: poll handler catches these and passes responsibility
	 * of decoding the error to EDAC
	 */
	return MCE_KEEP_SEVERITY;
}

308
static int mce_severity_intel(struct mce *m, int tolerant, char **msg, bool is_excp)
309
{
310
	enum exception excp = (is_excp ? EXCP_CONTEXT : NO_EXCP);
311
	enum context ctx = error_context(m);
312
	struct severity *s;
A
Andi Kleen 已提交
313

314
	for (s = severities;; s++) {
315
		if ((m->status & s->mask) != s->result)
316
			continue;
317
		if ((m->mcgstatus & s->mcgmask) != s->mcgres)
318
			continue;
319
		if (s->ser == SER_REQUIRED && !mca_cfg.ser)
A
Andi Kleen 已提交
320
			continue;
321
		if (s->ser == NO_SER && mca_cfg.ser)
A
Andi Kleen 已提交
322 323 324
			continue;
		if (s->context && ctx != s->context)
			continue;
325 326
		if (s->excp && excp != s->excp)
			continue;
327 328
		if (msg)
			*msg = s->msg;
329
		s->covered = 1;
A
Andi Kleen 已提交
330
		if (s->sev >= MCE_UC_SEVERITY && ctx == IN_KERNEL) {
331
			if (tolerant < 1)
A
Andi Kleen 已提交
332 333
				return MCE_PANIC_SEVERITY;
		}
334 335 336
		return s->sev;
	}
}
337

338 339 340 341 342 343
/* Default to mce_severity_intel */
int (*mce_severity)(struct mce *m, int tolerant, char **msg, bool is_excp) =
		    mce_severity_intel;

void __init mcheck_vendor_init_severity(void)
{
344 345
	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
	    boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
346 347 348
		mce_severity = mce_severity_amd;
}

349
#ifdef CONFIG_DEBUG_FS
350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401
static void *s_start(struct seq_file *f, loff_t *pos)
{
	if (*pos >= ARRAY_SIZE(severities))
		return NULL;
	return &severities[*pos];
}

static void *s_next(struct seq_file *f, void *data, loff_t *pos)
{
	if (++(*pos) >= ARRAY_SIZE(severities))
		return NULL;
	return &severities[*pos];
}

static void s_stop(struct seq_file *f, void *data)
{
}

static int s_show(struct seq_file *f, void *data)
{
	struct severity *ser = data;
	seq_printf(f, "%d\t%s\n", ser->covered, ser->msg);
	return 0;
}

static const struct seq_operations severities_seq_ops = {
	.start	= s_start,
	.next	= s_next,
	.stop	= s_stop,
	.show	= s_show,
};

static int severities_coverage_open(struct inode *inode, struct file *file)
{
	return seq_open(file, &severities_seq_ops);
}

static ssize_t severities_coverage_write(struct file *file,
					 const char __user *ubuf,
					 size_t count, loff_t *ppos)
{
	int i;
	for (i = 0; i < ARRAY_SIZE(severities); i++)
		severities[i].covered = 0;
	return count;
}

static const struct file_operations severities_coverage_fops = {
	.open		= severities_coverage_open,
	.release	= seq_release,
	.read		= seq_read,
	.write		= severities_coverage_write,
402
	.llseek		= seq_lseek,
403 404 405 406
};

static int __init severities_debugfs_init(void)
{
407
	struct dentry *dmce, *fsev;
408

409
	dmce = mce_get_debugfs_dir();
410
	if (!dmce)
411
		goto err_out;
412 413 414 415

	fsev = debugfs_create_file("severities-coverage", 0444, dmce, NULL,
				   &severities_coverage_fops);
	if (!fsev)
416 417 418 419 420 421 422 423
		goto err_out;

	return 0;

err_out:
	return -ENOMEM;
}
late_initcall(severities_debugfs_init);
424
#endif /* CONFIG_DEBUG_FS */