mce-severity.c 10.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12
/*
 * MCE grading rules.
 * Copyright 2008, 2009 Intel Corporation.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; version 2
 * of the License.
 *
 * Author: Andi Kleen
 */
#include <linux/kernel.h>
13 14 15
#include <linux/seq_file.h>
#include <linux/init.h>
#include <linux/debugfs.h>
16
#include <asm/mce.h>
17
#include <linux/uaccess.h>
18 19 20 21 22 23 24 25

#include "mce-internal.h"

/*
 * Grade an mce by severity. In general the most severe ones are processed
 * first. Since there are quite a lot of combinations test the bits in a
 * table-driven way. The rules are simply processed in order, first
 * match wins.
A
Andi Kleen 已提交
26 27 28 29 30
 *
 * Note this is only used for machine check exceptions, the corrected
 * errors use much simpler rules. The exceptions still check for the corrected
 * errors, but only to leave them alone for the CMCI handler (except for
 * panic situations)
31 32
 */

33
enum context { IN_KERNEL = 1, IN_USER = 2, IN_KERNEL_RECOV = 3 };
A
Andi Kleen 已提交
34
enum ser { SER_REQUIRED = 1, NO_SER = 2 };
35
enum exception { EXCP_CONTEXT = 1, NO_EXCP = 2 };
A
Andi Kleen 已提交
36

37 38 39 40 41 42
static struct severity {
	u64 mask;
	u64 result;
	unsigned char sev;
	unsigned char mcgmask;
	unsigned char mcgres;
A
Andi Kleen 已提交
43 44
	unsigned char ser;
	unsigned char context;
45
	unsigned char excp;
46
	unsigned char covered;
47 48
	char *msg;
} severities[] = {
49 50 51
#define MCESEV(s, m, c...) { .sev = MCE_ ## s ## _SEVERITY, .msg = m, ## c }
#define  KERNEL		.context = IN_KERNEL
#define  USER		.context = IN_USER
52
#define  KERNEL_RECOV	.context = IN_KERNEL_RECOV
53 54
#define  SER		.ser = SER_REQUIRED
#define  NOSER		.ser = NO_SER
55 56
#define  EXCP		.excp = EXCP_CONTEXT
#define  NOEXCP		.excp = NO_EXCP
57 58 59 60
#define  BITCLR(x)	.mask = x, .result = 0
#define  BITSET(x)	.mask = x, .result = x
#define  MCGMASK(x, y)	.mcgmask = x, .mcgres = y
#define  MASK(x, y)	.mask = x, .result = y
A
Andi Kleen 已提交
61
#define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S)
62
#define MCI_UC_AR (MCI_STATUS_UC|MCI_STATUS_AR)
A
Andi Kleen 已提交
63
#define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR)
64
#define	MCI_ADDR (MCI_STATUS_ADDRV|MCI_STATUS_MISCV)
A
Andi Kleen 已提交
65

66 67 68
	MCESEV(
		NO, "Invalid",
		BITCLR(MCI_STATUS_VAL)
69
		),
70 71
	MCESEV(
		NO, "Not enabled",
72
		EXCP, BITCLR(MCI_STATUS_EN)
73
		),
74 75 76
	MCESEV(
		PANIC, "Processor context corrupt",
		BITSET(MCI_STATUS_PCC)
77
		),
A
Andi Kleen 已提交
78
	/* When MCIP is not set something is very confused */
79 80
	MCESEV(
		PANIC, "MCIP not set in MCA handler",
81
		EXCP, MCGMASK(MCG_STATUS_MCIP, 0)
82
		),
A
Andi Kleen 已提交
83
	/* Neither return not error IP -- no chance to recover -> PANIC */
84 85
	MCESEV(
		PANIC, "Neither restart nor error IP",
86
		EXCP, MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0)
87
		),
88
	MCESEV(
89
		PANIC, "In kernel and no restart IP",
90 91
		EXCP, KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
		),
92 93 94 95
	MCESEV(
		PANIC, "In kernel and no restart IP",
		EXCP, KERNEL_RECOV, MCGMASK(MCG_STATUS_RIPV, 0)
		),
96 97 98
	MCESEV(
		DEFERRED, "Deferred error",
		NOSER, MASK(MCI_STATUS_UC|MCI_STATUS_DEFERRED|MCI_STATUS_POISON, MCI_STATUS_DEFERRED)
99
		),
100
	MCESEV(
101
		KEEP, "Corrected error",
102
		NOSER, BITCLR(MCI_STATUS_UC)
103
		),
A
Andi Kleen 已提交
104

105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
	/*
	 * known AO MCACODs reported via MCE or CMC:
	 *
	 * SRAO could be signaled either via a machine check exception or
	 * CMCI with the corresponding bit S 1 or 0. So we don't need to
	 * check bit S for SRAO.
	 */
	MCESEV(
		AO, "Action optional: memory scrubbing error",
		SER, MASK(MCI_STATUS_OVER|MCI_UC_AR|MCACOD_SCRUBMSK, MCI_STATUS_UC|MCACOD_SCRUB)
		),
	MCESEV(
		AO, "Action optional: last level cache writeback error",
		SER, MASK(MCI_STATUS_OVER|MCI_UC_AR|MCACOD, MCI_STATUS_UC|MCACOD_L3WB)
		),

A
Andi Kleen 已提交
121
	/* ignore OVER for UCNA */
122
	MCESEV(
123
		UCNA, "Uncorrected no action required",
124
		SER, MASK(MCI_UC_SAR, MCI_STATUS_UC)
125
		),
126
	MCESEV(
127
		PANIC, "Illegal combination (UCNA with AR=1)",
128 129
		SER,
		MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR)
130
		),
131
	MCESEV(
132
		KEEP, "Non signalled machine check",
133
		SER, BITCLR(MCI_STATUS_S)
134
		),
A
Andi Kleen 已提交
135

136
	MCESEV(
137
		PANIC, "Action required with lost events",
138
		SER, BITSET(MCI_STATUS_OVER|MCI_UC_SAR)
139
		),
140 141 142 143

	/* known AR MCACODs: */
#ifdef	CONFIG_MEMORY_FAILURE
	MCESEV(
144
		KEEP, "Action required but unaffected thread is continuable",
145 146
		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR, MCI_UC_SAR|MCI_ADDR),
		MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, MCG_STATUS_RIPV)
147
		),
148 149 150 151 152
	MCESEV(
		AR, "Action required: data load in error recoverable area of kernel",
		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
		KERNEL_RECOV
		),
153
	MCESEV(
154
		AR, "Action required: data load error in a user process",
155
		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
156 157
		USER
		),
158
	MCESEV(
159
		AR, "Action required: instruction fetch error in a user process",
160 161 162
		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
		USER
		),
163 164 165 166 167
	MCESEV(
		PANIC, "Data load in unrecoverable area of kernel",
		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
		KERNEL
		),
168
#endif
169
	MCESEV(
170
		PANIC, "Action required: unknown MCACOD",
171
		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR)
172
		),
A
Andi Kleen 已提交
173

174
	MCESEV(
175
		SOME, "Action optional: unknown MCACOD",
176
		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S)
177
		),
178
	MCESEV(
179
		SOME, "Action optional with lost events",
180
		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_S)
181
		),
182 183 184

	MCESEV(
		PANIC, "Overflowed uncorrected",
185
		BITSET(MCI_STATUS_OVER|MCI_STATUS_UC)
186
		),
187 188 189
	MCESEV(
		UC, "Uncorrected",
		BITSET(MCI_STATUS_UC)
190
		),
191 192 193
	MCESEV(
		SOME, "No match",
		BITSET(0)
194
		)	/* always matches. keep at end */
195 196
};

197 198 199
#define mc_recoverable(mcg) (((mcg) & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) == \
				(MCG_STATUS_RIPV|MCG_STATUS_EIPV))

A
Andi Kleen 已提交
200
/*
201 202 203 204 205 206 207 208 209
 * If mcgstatus indicated that ip/cs on the stack were
 * no good, then "m->cs" will be zero and we will have
 * to assume the worst case (IN_KERNEL) as we actually
 * have no idea what we were executing when the machine
 * check hit.
 * If we do have a good "m->cs" (or a faked one in the
 * case we were executing in VM86 mode) we can use it to
 * distinguish an exception taken in user from from one
 * taken in the kernel.
A
Andi Kleen 已提交
210 211 212
 */
static int error_context(struct mce *m)
{
213 214 215 216 217
	if ((m->cs & 3) == 3)
		return IN_USER;
	if (mc_recoverable(m->mcgstatus) && ex_has_fault_handler(m->ip))
		return IN_KERNEL_RECOV;
	return IN_KERNEL;
A
Andi Kleen 已提交
218 219
}

220
static int mce_severity_amd_smca(struct mce *m, enum context err_ctx)
221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246
{
	u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
	u32 low, high;

	/*
	 * We need to look at the following bits:
	 * - "succor" bit (data poisoning support), and
	 * - TCC bit (Task Context Corrupt)
	 * in MCi_STATUS to determine error severity.
	 */
	if (!mce_flags.succor)
		return MCE_PANIC_SEVERITY;

	if (rdmsr_safe(addr, &low, &high))
		return MCE_PANIC_SEVERITY;

	/* TCC (Task context corrupt). If set and if IN_KERNEL, panic. */
	if ((low & MCI_CONFIG_MCAX) &&
	    (m->status & MCI_STATUS_TCC) &&
	    (err_ctx == IN_KERNEL))
		return MCE_PANIC_SEVERITY;

	 /* ...otherwise invoke hwpoison handler. */
	return MCE_AR_SEVERITY;
}

247 248 249 250
/*
 * See AMD Error Scope Hierarchy table in a newer BKDG. For example
 * 49125_15h_Models_30h-3Fh_BKDG.pdf, section "RAS Features"
 */
251
static int mce_severity_amd(struct mce *m, int tolerant, char **msg, bool is_excp)
252
{
253 254
	enum context ctx = error_context(m);

255 256 257 258 259 260
	/* Processor Context Corrupt, no need to fumble too much, die! */
	if (m->status & MCI_STATUS_PCC)
		return MCE_PANIC_SEVERITY;

	if (m->status & MCI_STATUS_UC) {

261 262 263
		if (ctx == IN_KERNEL)
			return MCE_PANIC_SEVERITY;

264 265 266 267 268 269 270
		/*
		 * On older systems where overflow_recov flag is not present, we
		 * should simply panic if an error overflow occurs. If
		 * overflow_recov flag is present and set, then software can try
		 * to at least kill process to prolong system operation.
		 */
		if (mce_flags.overflow_recov) {
271 272 273
			if (mce_flags.smca)
				return mce_severity_amd_smca(m, ctx);

274 275
			/* kill current process */
			return MCE_AR_SEVERITY;
276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302
		} else {
			/* at least one error was not logged */
			if (m->status & MCI_STATUS_OVER)
				return MCE_PANIC_SEVERITY;
		}

		/*
		 * For any other case, return MCE_UC_SEVERITY so that we log the
		 * error and exit #MC handler.
		 */
		return MCE_UC_SEVERITY;
	}

	/*
	 * deferred error: poll handler catches these and adds to mce_ring so
	 * memory-failure can take recovery actions.
	 */
	if (m->status & MCI_STATUS_DEFERRED)
		return MCE_DEFERRED_SEVERITY;

	/*
	 * corrected error: poll handler catches these and passes responsibility
	 * of decoding the error to EDAC
	 */
	return MCE_KEEP_SEVERITY;
}

303
static int mce_severity_intel(struct mce *m, int tolerant, char **msg, bool is_excp)
304
{
305
	enum exception excp = (is_excp ? EXCP_CONTEXT : NO_EXCP);
306
	enum context ctx = error_context(m);
307
	struct severity *s;
A
Andi Kleen 已提交
308

309
	for (s = severities;; s++) {
310
		if ((m->status & s->mask) != s->result)
311
			continue;
312
		if ((m->mcgstatus & s->mcgmask) != s->mcgres)
313
			continue;
314
		if (s->ser == SER_REQUIRED && !mca_cfg.ser)
A
Andi Kleen 已提交
315
			continue;
316
		if (s->ser == NO_SER && mca_cfg.ser)
A
Andi Kleen 已提交
317 318 319
			continue;
		if (s->context && ctx != s->context)
			continue;
320 321
		if (s->excp && excp != s->excp)
			continue;
322 323
		if (msg)
			*msg = s->msg;
324
		s->covered = 1;
A
Andi Kleen 已提交
325
		if (s->sev >= MCE_UC_SEVERITY && ctx == IN_KERNEL) {
326
			if (tolerant < 1)
A
Andi Kleen 已提交
327 328
				return MCE_PANIC_SEVERITY;
		}
329 330 331
		return s->sev;
	}
}
332

333 334 335 336 337 338 339 340 341 342
/* Default to mce_severity_intel */
int (*mce_severity)(struct mce *m, int tolerant, char **msg, bool is_excp) =
		    mce_severity_intel;

void __init mcheck_vendor_init_severity(void)
{
	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
		mce_severity = mce_severity_amd;
}

343
#ifdef CONFIG_DEBUG_FS
344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395
static void *s_start(struct seq_file *f, loff_t *pos)
{
	if (*pos >= ARRAY_SIZE(severities))
		return NULL;
	return &severities[*pos];
}

static void *s_next(struct seq_file *f, void *data, loff_t *pos)
{
	if (++(*pos) >= ARRAY_SIZE(severities))
		return NULL;
	return &severities[*pos];
}

static void s_stop(struct seq_file *f, void *data)
{
}

static int s_show(struct seq_file *f, void *data)
{
	struct severity *ser = data;
	seq_printf(f, "%d\t%s\n", ser->covered, ser->msg);
	return 0;
}

static const struct seq_operations severities_seq_ops = {
	.start	= s_start,
	.next	= s_next,
	.stop	= s_stop,
	.show	= s_show,
};

static int severities_coverage_open(struct inode *inode, struct file *file)
{
	return seq_open(file, &severities_seq_ops);
}

static ssize_t severities_coverage_write(struct file *file,
					 const char __user *ubuf,
					 size_t count, loff_t *ppos)
{
	int i;
	for (i = 0; i < ARRAY_SIZE(severities); i++)
		severities[i].covered = 0;
	return count;
}

static const struct file_operations severities_coverage_fops = {
	.open		= severities_coverage_open,
	.release	= seq_release,
	.read		= seq_read,
	.write		= severities_coverage_write,
396
	.llseek		= seq_lseek,
397 398 399 400
};

static int __init severities_debugfs_init(void)
{
401
	struct dentry *dmce, *fsev;
402

403
	dmce = mce_get_debugfs_dir();
404
	if (!dmce)
405
		goto err_out;
406 407 408 409

	fsev = debugfs_create_file("severities-coverage", 0444, dmce, NULL,
				   &severities_coverage_fops);
	if (!fsev)
410 411 412 413 414 415 416 417
		goto err_out;

	return 0;

err_out:
	return -ENOMEM;
}
late_initcall(severities_debugfs_init);
418
#endif /* CONFIG_DEBUG_FS */