mce_amd.c 18.3 KB
Newer Older
1
#include <linux/module.h>
2 3
#include <linux/slab.h>

B
Borislav Petkov 已提交
4
#include "mce_amd.h"
D
Doug Thompson 已提交
5

6 7
static struct amd_decoder_ops *fam_ops;

8
static u8 xec_mask	 = 0xf;
9

10
static bool report_gart_errors;
11
static void (*nb_bus_decoder)(int node_id, struct mce *m);
12 13 14 15 16 17 18

void amd_report_gart_errors(bool v)
{
	report_gart_errors = v;
}
EXPORT_SYMBOL_GPL(amd_report_gart_errors);

19
void amd_register_ecc_decoder(void (*f)(int, struct mce *))
20 21 22 23 24
{
	nb_bus_decoder = f;
}
EXPORT_SYMBOL_GPL(amd_register_ecc_decoder);

25
void amd_unregister_ecc_decoder(void (*f)(int, struct mce *))
26 27 28 29 30 31 32 33 34
{
	if (nb_bus_decoder) {
		WARN_ON(nb_bus_decoder != f);

		nb_bus_decoder = NULL;
	}
}
EXPORT_SYMBOL_GPL(amd_unregister_ecc_decoder);

D
Doug Thompson 已提交
35 36 37 38
/*
 * string representation for the different MCA reported error types, see F3x48
 * or MSR0000_0411.
 */
B
Borislav Petkov 已提交
39 40

/* transaction type */
41
static const char * const tt_msgs[] = { "INSN", "DATA", "GEN", "RESV" };
D
Doug Thompson 已提交
42

B
Borislav Petkov 已提交
43
/* cache level */
44
static const char * const ll_msgs[] = { "RESV", "L1", "L2", "L3/GEN" };
D
Doug Thompson 已提交
45

B
Borislav Petkov 已提交
46
/* memory transaction type */
47
static const char * const rrrr_msgs[] = {
B
Borislav Petkov 已提交
48
       "GEN", "RD", "WR", "DRD", "DWR", "IRD", "PRF", "EV", "SNP"
D
Doug Thompson 已提交
49 50
};

B
Borislav Petkov 已提交
51
/* participating processor */
52
const char * const pp_msgs[] = { "SRC", "RES", "OBS", "GEN" };
53
EXPORT_SYMBOL_GPL(pp_msgs);
D
Doug Thompson 已提交
54

B
Borislav Petkov 已提交
55
/* request timeout */
56
static const char * const to_msgs[] = { "no timeout", "timed out" };
D
Doug Thompson 已提交
57

B
Borislav Petkov 已提交
58
/* memory or i/o */
59
static const char * const ii_msgs[] = { "MEM", "RESV", "IO", "GEN" };
D
Doug Thompson 已提交
60

61
/* internal error type */
62
static const char * const uu_msgs[] = { "RESV", "RESV", "HWA", "RESV" };
63

64
static const char * const f15h_mc1_mce_desc[] = {
65 66 67 68 69 70 71 72 73 74 75 76
	"UC during a demand linefill from L2",
	"Parity error during data load from IC",
	"Parity error for IC valid bit",
	"Main tag parity error",
	"Parity error in prediction queue",
	"PFB data/address parity error",
	"Parity error in the branch status reg",
	"PFB promotion address error",
	"Tag error during probe/victimization",
	"Parity error for IC probe tag valid bit",
	"PFB non-cacheable bit parity error",
	"PFB valid bit parity error",			/* xec = 0xd */
77
	"Microcode Patch Buffer",			/* xec = 010 */
78 79 80
	"uop queue",
	"insn buffer",
	"predecode buffer",
81 82
	"fetch address FIFO",
	"dispatch uop queue"
83 84
};

85
static const char * const f15h_mc2_mce_desc[] = {
86 87 88 89 90 91 92 93
	"Fill ECC error on data fills",			/* xec = 0x4 */
	"Fill parity error on insn fills",
	"Prefetcher request FIFO parity error",
	"PRQ address parity error",
	"PRQ data parity error",
	"WCC Tag ECC error",
	"WCC Data ECC error",
	"WCB Data parity error",
94
	"VB Data ECC or parity error",
95 96 97 98 99 100 101
	"L2 Tag ECC error",				/* xec = 0x10 */
	"Hard L2 Tag ECC error",
	"Multiple hits on L2 tag",
	"XAB parity error",
	"PRB address parity error"
};

102
static const char * const mc4_mce_desc[] = {
103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123
	"DRAM ECC error detected on the NB",
	"CRC error detected on HT link",
	"Link-defined sync error packets detected on HT link",
	"HT Master abort",
	"HT Target abort",
	"Invalid GART PTE entry during GART table walk",
	"Unsupported atomic RMW received from an IO link",
	"Watchdog timeout due to lack of progress",
	"DRAM ECC error detected on the NB",
	"SVM DMA Exclusion Vector error",
	"HT data error detected on link",
	"Protocol error (link, L3, probe filter)",
	"NB internal arrays parity error",
	"DRAM addr/ctl signals parity error",
	"IO link transmission error",
	"L3 data cache ECC error",			/* xec = 0x1c */
	"L3 cache tag error",
	"L3 LRU parity bits error",
	"ECC Error in the Probe Filter directory"
};

124
static const char * const mc5_mce_desc[] = {
125 126 127 128 129 130 131 132 133 134 135 136
	"CPU Watchdog timer expire",
	"Wakeup array dest tag",
	"AG payload array",
	"EX payload array",
	"IDRF array",
	"Retire dispatch queue",
	"Mapper checkpoint array",
	"Physical register file EX0 port",
	"Physical register file EX1 port",
	"Physical register file AG0 port",
	"Physical register file AG1 port",
	"Flag register file",
137 138
	"DE error occurred",
	"Retire status queue"
139 140
};

141 142 143 144 145 146 147 148 149
static const char * const mc6_mce_desc[] = {
	"Hardware Assertion",
	"Free List",
	"Physical Register File",
	"Retire Queue",
	"Scheduler table",
	"Status Register File",
};

150
static bool f12h_mc0_mce(u16 ec, u8 xec)
151
{
152
	bool ret = false;
153

154
	if (MEM_ERROR(ec)) {
155
		u8 ll = LL(ec);
156
		ret = true;
157

158 159 160
		if (ll == LL_L2)
			pr_cont("during L1 linefill from L2.\n");
		else if (ll == LL_L1)
161
			pr_cont("Data/Tag %s error.\n", R4_MSG(ec));
162 163 164 165 166
		else
			ret = false;
	}
	return ret;
}
167

168
static bool f10h_mc0_mce(u16 ec, u8 xec)
169
{
170
	if (R4(ec) == R4_GEN && LL(ec) == LL_L1) {
171 172 173
		pr_cont("during data scrub.\n");
		return true;
	}
174
	return f12h_mc0_mce(ec, xec);
175 176
}

177
static bool k8_mc0_mce(u16 ec, u8 xec)
178 179 180 181 182
{
	if (BUS_ERROR(ec)) {
		pr_cont("during system linefill.\n");
		return true;
	}
183

184
	return f10h_mc0_mce(ec, xec);
185 186
}

187
static bool cat_mc0_mce(u16 ec, u8 xec)
188
{
189
	u8 r4	 = R4(ec);
190 191 192 193
	bool ret = true;

	if (MEM_ERROR(ec)) {

194
		if (TT(ec) != TT_DATA || LL(ec) != LL_L1)
195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213
			return false;

		switch (r4) {
		case R4_DRD:
		case R4_DWR:
			pr_cont("Data/Tag parity error due to %s.\n",
				(r4 == R4_DRD ? "load/hw prf" : "store"));
			break;
		case R4_EVICT:
			pr_cont("Copyback parity error on a tag miss.\n");
			break;
		case R4_SNOOP:
			pr_cont("Tag parity error during snoop.\n");
			break;
		default:
			ret = false;
		}
	} else if (BUS_ERROR(ec)) {

214
		if ((II(ec) != II_MEM && II(ec) != II_IO) || LL(ec) != LL_LG)
215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238
			return false;

		pr_cont("System read data error on a ");

		switch (r4) {
		case R4_RD:
			pr_cont("TLB reload.\n");
			break;
		case R4_DWR:
			pr_cont("store.\n");
			break;
		case R4_DRD:
			pr_cont("load.\n");
			break;
		default:
			ret = false;
		}
	} else {
		ret = false;
	}

	return ret;
}

239
static bool f15h_mc0_mce(u16 ec, u8 xec)
240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276
{
	bool ret = true;

	if (MEM_ERROR(ec)) {

		switch (xec) {
		case 0x0:
			pr_cont("Data Array access error.\n");
			break;

		case 0x1:
			pr_cont("UC error during a linefill from L2/NB.\n");
			break;

		case 0x2:
		case 0x11:
			pr_cont("STQ access error.\n");
			break;

		case 0x3:
			pr_cont("SCB access error.\n");
			break;

		case 0x10:
			pr_cont("Tag error.\n");
			break;

		case 0x12:
			pr_cont("LDQ access error.\n");
			break;

		default:
			ret = false;
		}
	} else if (BUS_ERROR(ec)) {

		if (!xec)
277
			pr_cont("System Read Data Error.\n");
278
		else
279
			pr_cont(" Internal error condition type %d.\n", xec);
280 281 282 283 284 285
	} else if (INT_ERROR(ec)) {
		if (xec <= 0x1f)
			pr_cont("Hardware Assert.\n");
		else
			ret = false;

286 287 288 289 290 291
	} else
		ret = false;

	return ret;
}

292
static void decode_mc0_mce(struct mce *m)
293
{
294 295
	u16 ec = EC(m->status);
	u8 xec = XEC(m->status, xec_mask);
296

297
	pr_emerg(HW_ERR "MC0 Error: ");
298 299 300

	/* TLB error signatures are the same across families */
	if (TLB_ERROR(ec)) {
301
		if (TT(ec) == TT_DATA) {
302
			pr_cont("%s TLB %s.\n", LL_MSG(ec),
303 304
				((xec == 2) ? "locked miss"
					    : (xec ? "multimatch" : "parity")));
305 306
			return;
		}
307
	} else if (fam_ops->mc0_mce(ec, xec))
308 309
		;
	else
310
		pr_emerg(HW_ERR "Corrupted MC0 MCE info?\n");
311 312
}

313
static bool k8_mc1_mce(u16 ec, u8 xec)
314
{
315
	u8 ll	 = LL(ec);
316
	bool ret = true;
317

318 319
	if (!MEM_ERROR(ec))
		return false;
320

321 322 323
	if (ll == 0x2)
		pr_cont("during a linefill from L2.\n");
	else if (ll == 0x1) {
324
		switch (R4(ec)) {
325 326 327
		case R4_IRD:
			pr_cont("Parity error during data load.\n");
			break;
328

329 330 331 332 333 334 335 336 337 338 339 340
		case R4_EVICT:
			pr_cont("Copyback Parity/Victim error.\n");
			break;

		case R4_SNOOP:
			pr_cont("Tag Snoop error.\n");
			break;

		default:
			ret = false;
			break;
		}
341
	} else
342
		ret = false;
343

344 345 346
	return ret;
}

347
static bool cat_mc1_mce(u16 ec, u8 xec)
348
{
349
	u8 r4    = R4(ec);
350
	bool ret = true;
351

352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367
	if (!MEM_ERROR(ec))
		return false;

	if (TT(ec) != TT_INSTR)
		return false;

	if (r4 == R4_IRD)
		pr_cont("Data/tag array parity error for a tag hit.\n");
	else if (r4 == R4_SNOOP)
		pr_cont("Tag error during snoop/victimization.\n");
	else if (xec == 0x0)
		pr_cont("Tag parity error from victim castout.\n");
	else if (xec == 0x2)
		pr_cont("Microcode patch RAM parity error.\n");
	else
		ret = false;
368 369 370 371

	return ret;
}

372
static bool f15h_mc1_mce(u16 ec, u8 xec)
373 374 375 376 377 378 379 380
{
	bool ret = true;

	if (!MEM_ERROR(ec))
		return false;

	switch (xec) {
	case 0x0 ... 0xa:
381
		pr_cont("%s.\n", f15h_mc1_mce_desc[xec]);
382 383 384
		break;

	case 0xd:
385
		pr_cont("%s.\n", f15h_mc1_mce_desc[xec-2]);
386 387
		break;

388
	case 0x10:
389
		pr_cont("%s.\n", f15h_mc1_mce_desc[xec-4]);
390 391
		break;

392
	case 0x11 ... 0x15:
393
		pr_cont("Decoder %s parity error.\n", f15h_mc1_mce_desc[xec-4]);
394 395 396 397 398 399 400 401
		break;

	default:
		ret = false;
	}
	return ret;
}

402
static void decode_mc1_mce(struct mce *m)
403
{
404 405
	u16 ec = EC(m->status);
	u8 xec = XEC(m->status, xec_mask);
406

407
	pr_emerg(HW_ERR "MC1 Error: ");
408 409 410 411 412

	if (TLB_ERROR(ec))
		pr_cont("%s TLB %s.\n", LL_MSG(ec),
			(xec ? "multimatch" : "parity error"));
	else if (BUS_ERROR(ec)) {
413
		bool k8 = (boot_cpu_data.x86 == 0xf && (m->status & BIT_64(58)));
414 415

		pr_cont("during %s.\n", (k8 ? "system linefill" : "NB data read"));
416 417 418 419 420
	} else if (INT_ERROR(ec)) {
		if (xec <= 0x3f)
			pr_cont("Hardware Assert.\n");
		else
			goto wrong_mc1_mce;
421
	} else if (fam_ops->mc1_mce(ec, xec))
422 423
		;
	else
424 425 426 427 428 429
		goto wrong_mc1_mce;

	return;

wrong_mc1_mce:
	pr_emerg(HW_ERR "Corrupted MC1 MCE info?\n");
430 431
}

432
static bool k8_mc2_mce(u16 ec, u8 xec)
433
{
434
	bool ret = true;
435 436 437 438 439 440

	if (xec == 0x1)
		pr_cont(" in the write data buffers.\n");
	else if (xec == 0x3)
		pr_cont(" in the victim data buffers.\n");
	else if (xec == 0x2 && MEM_ERROR(ec))
441
		pr_cont(": %s error in the L2 cache tags.\n", R4_MSG(ec));
442 443 444 445 446 447
	else if (xec == 0x0) {
		if (TLB_ERROR(ec))
			pr_cont(": %s error in a Page Descriptor Cache or "
				"Guest TLB.\n", TT_MSG(ec));
		else if (BUS_ERROR(ec))
			pr_cont(": %s/ECC error in data read from NB: %s.\n",
448
				R4_MSG(ec), PP_MSG(ec));
449
		else if (MEM_ERROR(ec)) {
450
			u8 r4 = R4(ec);
451

452
			if (r4 >= 0x7)
453
				pr_cont(": %s error during data copyback.\n",
454 455
					R4_MSG(ec));
			else if (r4 <= 0x1)
456
				pr_cont(": %s parity/ECC error during data "
457
					"access from L2.\n", R4_MSG(ec));
458
			else
459
				ret = false;
460
		} else
461
			ret = false;
462
	} else
463
		ret = false;
464

465
	return ret;
466 467
}

468
static bool f15h_mc2_mce(u16 ec, u8 xec)
469
{
470
	bool ret = true;
471 472 473 474 475 476 477

	if (TLB_ERROR(ec)) {
		if (xec == 0x0)
			pr_cont("Data parity TLB read error.\n");
		else if (xec == 0x1)
			pr_cont("Poison data provided for TLB fill.\n");
		else
478
			ret = false;
479 480
	} else if (BUS_ERROR(ec)) {
		if (xec > 2)
481
			ret = false;
482 483 484 485 486

		pr_cont("Error during attempted NB data read.\n");
	} else if (MEM_ERROR(ec)) {
		switch (xec) {
		case 0x4 ... 0xc:
487
			pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x4]);
488 489 490
			break;

		case 0x10 ... 0x14:
491
			pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x7]);
492 493 494
			break;

		default:
495
			ret = false;
496
		}
497 498 499 500 501
	} else if (INT_ERROR(ec)) {
		if (xec <= 0x3f)
			pr_cont("Hardware Assert.\n");
		else
			ret = false;
502 503
	}

504 505 506
	return ret;
}

507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547
static bool f16h_mc2_mce(u16 ec, u8 xec)
{
	u8 r4 = R4(ec);

	if (!MEM_ERROR(ec))
		return false;

	switch (xec) {
	case 0x04 ... 0x05:
		pr_cont("%cBUFF parity error.\n", (r4 == R4_RD) ? 'I' : 'O');
		break;

	case 0x09 ... 0x0b:
	case 0x0d ... 0x0f:
		pr_cont("ECC error in L2 tag (%s).\n",
			((r4 == R4_GEN)   ? "BankReq" :
			((r4 == R4_SNOOP) ? "Prb"     : "Fill")));
		break;

	case 0x10 ... 0x19:
	case 0x1b:
		pr_cont("ECC error in L2 data array (%s).\n",
			(((r4 == R4_RD) && !(xec & 0x3)) ? "Hit"  :
			((r4 == R4_GEN)   ? "Attr" :
			((r4 == R4_EVICT) ? "Vict" : "Fill"))));
		break;

	case 0x1c ... 0x1d:
	case 0x1f:
		pr_cont("Parity error in L2 attribute bits (%s).\n",
			((r4 == R4_RD)  ? "Hit"  :
			((r4 == R4_GEN) ? "Attr" : "Fill")));
		break;

	default:
		return false;
	}

	return true;
}

548 549 550 551
static void decode_mc2_mce(struct mce *m)
{
	u16 ec = EC(m->status);
	u8 xec = XEC(m->status, xec_mask);
552

553 554 555 556
	pr_emerg(HW_ERR "MC2 Error: ");

	if (!fam_ops->mc2_mce(ec, xec))
		pr_cont(HW_ERR "Corrupted MC2 MCE info?\n");
557 558
}

559
static void decode_mc3_mce(struct mce *m)
560
{
561 562
	u16 ec = EC(m->status);
	u8 xec = XEC(m->status, xec_mask);
563

564
	if (boot_cpu_data.x86 >= 0x14) {
565
		pr_emerg("You shouldn't be seeing MC3 MCE on this cpu family,"
566 567 568
			 " please report on LKML.\n");
		return;
	}
569

570
	pr_emerg(HW_ERR "MC3 Error");
571 572

	if (xec == 0x0) {
573
		u8 r4 = R4(ec);
574

575
		if (!BUS_ERROR(ec) || (r4 != R4_DRD && r4 != R4_DWR))
576
			goto wrong_mc3_mce;
577

578
		pr_cont(" during %s.\n", R4_MSG(ec));
579
	} else
580
		goto wrong_mc3_mce;
581

582 583
	return;

584 585
 wrong_mc3_mce:
	pr_emerg(HW_ERR "Corrupted MC3 MCE info?\n");
586 587
}

588
static void decode_mc4_mce(struct mce *m)
589
{
590 591 592 593 594
	struct cpuinfo_x86 *c = &boot_cpu_data;
	int node_id = amd_get_nb_id(m->extcpu);
	u16 ec = EC(m->status);
	u8 xec = XEC(m->status, 0x1f);
	u8 offset = 0;
595

596
	pr_emerg(HW_ERR "MC4 Error (node %d): ", node_id);
597

598 599
	switch (xec) {
	case 0x0 ... 0xe:
600

601 602 603 604
		/* special handling for DRAM ECCs */
		if (xec == 0x0 || xec == 0x8) {
			/* no ECCs on F11h */
			if (c->x86 == 0x11)
605
				goto wrong_mc4_mce;
606

607
			pr_cont("%s.\n", mc4_mce_desc[xec]);
608

609 610 611 612
			if (nb_bus_decoder)
				nb_bus_decoder(node_id, m);
			return;
		}
613 614 615 616 617 618 619 620
		break;

	case 0xf:
		if (TLB_ERROR(ec))
			pr_cont("GART Table Walk data error.\n");
		else if (BUS_ERROR(ec))
			pr_cont("DMA Exclusion Vector Table Walk error.\n");
		else
621
			goto wrong_mc4_mce;
622
		return;
623

624
	case 0x19:
625
		if (boot_cpu_data.x86 == 0x15 || boot_cpu_data.x86 == 0x16)
626 627
			pr_cont("Compute Unit Data Error.\n");
		else
628
			goto wrong_mc4_mce;
629
		return;
630

631
	case 0x1c ... 0x1f:
632
		offset = 13;
633 634 635
		break;

	default:
636
		goto wrong_mc4_mce;
637
	}
638

639
	pr_cont("%s.\n", mc4_mce_desc[xec - offset]);
640 641
	return;

642 643
 wrong_mc4_mce:
	pr_emerg(HW_ERR "Corrupted MC4 MCE info?\n");
644 645
}

646
static void decode_mc5_mce(struct mce *m)
B
Borislav Petkov 已提交
647
{
648
	struct cpuinfo_x86 *c = &boot_cpu_data;
649
	u16 ec = EC(m->status);
650
	u8 xec = XEC(m->status, xec_mask);
651 652

	if (c->x86 == 0xf || c->x86 == 0x11)
653
		goto wrong_mc5_mce;
B
Borislav Petkov 已提交
654

655
	pr_emerg(HW_ERR "MC5 Error: ");
656

657 658 659 660 661 662 663 664
	if (INT_ERROR(ec)) {
		if (xec <= 0x1f) {
			pr_cont("Hardware Assert.\n");
			return;
		} else
			goto wrong_mc5_mce;
	}

665
	if (xec == 0x0 || xec == 0xc)
666
		pr_cont("%s.\n", mc5_mce_desc[xec]);
667
	else if (xec <= 0xd)
668
		pr_cont("%s parity error.\n", mc5_mce_desc[xec]);
669
	else
670
		goto wrong_mc5_mce;
671 672

	return;
B
Borislav Petkov 已提交
673

674 675
 wrong_mc5_mce:
	pr_emerg(HW_ERR "Corrupted MC5 MCE info?\n");
B
Borislav Petkov 已提交
676 677
}

678
static void decode_mc6_mce(struct mce *m)
679
{
680
	u8 xec = XEC(m->status, xec_mask);
681

682
	pr_emerg(HW_ERR "MC6 Error: ");
683

684
	if (xec > 0x5)
685
		goto wrong_mc6_mce;
686

687
	pr_cont("%s parity error.\n", mc6_mce_desc[xec]);
688 689
	return;

690 691
 wrong_mc6_mce:
	pr_emerg(HW_ERR "Corrupted MC6 MCE info?\n");
692 693
}

B
Borislav Petkov 已提交
694
static inline void amd_decode_err_code(u16 ec)
695
{
696 697 698 699
	if (INT_ERROR(ec)) {
		pr_emerg(HW_ERR "internal: %s\n", UU_MSG(ec));
		return;
	}
700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715

	pr_emerg(HW_ERR "cache level: %s", LL_MSG(ec));

	if (BUS_ERROR(ec))
		pr_cont(", mem/io: %s", II_MSG(ec));
	else
		pr_cont(", tx: %s", TT_MSG(ec));

	if (MEM_ERROR(ec) || BUS_ERROR(ec)) {
		pr_cont(", mem-tx: %s", R4_MSG(ec));

		if (BUS_ERROR(ec))
			pr_cont(", part-proc: %s (%s)", PP_MSG(ec), TO_MSG(ec));
	}

	pr_cont("\n");
716 717
}

718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733
/*
 * Filter out unwanted MCE signatures here.
 */
static bool amd_filter_mce(struct mce *m)
{
	u8 xec = (m->status >> 16) & 0x1f;

	/*
	 * NB GART TLB error reporting is disabled by default.
	 */
	if (m->bank == 4 && xec == 0x5 && !report_gart_errors)
		return true;

	return false;
}

B
Borislav Petkov 已提交
734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749
static const char *decode_error_status(struct mce *m)
{
	if (m->status & MCI_STATUS_UC) {
		if (m->status & MCI_STATUS_PCC)
			return "System Fatal error.";
		if (m->mcgstatus & MCG_STATUS_RIPV)
			return "Uncorrected, software restartable error.";
		return "Uncorrected, software containable error.";
	}

	if (m->status & MCI_STATUS_DEFERRED)
		return "Deferred error.";

	return "Corrected error, no action required.";
}

B
Borislav Petkov 已提交
750
int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
751
{
752
	struct mce *m = (struct mce *)data;
753
	struct cpuinfo_x86 *c = &cpu_data(m->extcpu);
754
	int ecc;
755

756 757 758
	if (amd_filter_mce(m))
		return NOTIFY_STOP;

759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788
	pr_emerg(HW_ERR "%s\n", decode_error_status(m));

	pr_emerg(HW_ERR "CPU:%d (%x:%x:%x) MC%d_STATUS[%s|%s|%s|%s|%s",
		m->extcpu,
		c->x86, c->x86_model, c->x86_mask,
		m->bank,
		((m->status & MCI_STATUS_OVER)	? "Over"  : "-"),
		((m->status & MCI_STATUS_UC)	? "UE"	  : "CE"),
		((m->status & MCI_STATUS_MISCV)	? "MiscV" : "-"),
		((m->status & MCI_STATUS_PCC)	? "PCC"	  : "-"),
		((m->status & MCI_STATUS_ADDRV)	? "AddrV" : "-"));

	if (c->x86 == 0x15 || c->x86 == 0x16)
		pr_cont("|%s|%s",
			((m->status & MCI_STATUS_DEFERRED) ? "Deferred" : "-"),
			((m->status & MCI_STATUS_POISON)   ? "Poison"   : "-"));

	/* do the two bits[14:13] together */
	ecc = (m->status >> 45) & 0x3;
	if (ecc)
		pr_cont("|%sECC", ((ecc == 2) ? "C" : "U"));

	pr_cont("]: 0x%016llx\n", m->status);

	if (m->status & MCI_STATUS_ADDRV)
		pr_emerg(HW_ERR "MC%d_ADDR: 0x%016llx\n", m->bank, m->addr);

	if (!fam_ops)
		goto err_code;

789 790
	switch (m->bank) {
	case 0:
791
		decode_mc0_mce(m);
792
		break;
793

794
	case 1:
795
		decode_mc1_mce(m);
796 797
		break;

798
	case 2:
799
		decode_mc2_mce(m);
800 801
		break;

802
	case 3:
803
		decode_mc3_mce(m);
804 805
		break;

806
	case 4:
807
		decode_mc4_mce(m);
808 809
		break;

B
Borislav Petkov 已提交
810
	case 5:
811
		decode_mc5_mce(m);
B
Borislav Petkov 已提交
812 813
		break;

814
	case 6:
815
		decode_mc6_mce(m);
816 817
		break;

818 819
	default:
		break;
820
	}
821

822
 err_code:
823
	amd_decode_err_code(m->status & 0xffff);
824 825

	return NOTIFY_STOP;
826
}
B
Borislav Petkov 已提交
827
EXPORT_SYMBOL_GPL(amd_decode_mce);
828

829 830 831 832
static struct notifier_block amd_mce_dec_nb = {
	.notifier_call	= amd_decode_mce,
};

833 834
static int __init mce_amd_init(void)
{
835 836 837
	struct cpuinfo_x86 *c = &boot_cpu_data;

	if (c->x86_vendor != X86_VENDOR_AMD)
838
		return -ENODEV;
839

840 841 842 843
	fam_ops = kzalloc(sizeof(struct amd_decoder_ops), GFP_KERNEL);
	if (!fam_ops)
		return -ENOMEM;

844
	switch (c->x86) {
845
	case 0xf:
846 847
		fam_ops->mc0_mce = k8_mc0_mce;
		fam_ops->mc1_mce = k8_mc1_mce;
848
		fam_ops->mc2_mce = k8_mc2_mce;
849 850 851
		break;

	case 0x10:
852 853
		fam_ops->mc0_mce = f10h_mc0_mce;
		fam_ops->mc1_mce = k8_mc1_mce;
854
		fam_ops->mc2_mce = k8_mc2_mce;
855 856
		break;

857
	case 0x11:
858 859
		fam_ops->mc0_mce = k8_mc0_mce;
		fam_ops->mc1_mce = k8_mc1_mce;
860
		fam_ops->mc2_mce = k8_mc2_mce;
861 862
		break;

863
	case 0x12:
864 865
		fam_ops->mc0_mce = f12h_mc0_mce;
		fam_ops->mc1_mce = k8_mc1_mce;
866
		fam_ops->mc2_mce = k8_mc2_mce;
867 868
		break;

869
	case 0x14:
870 871
		fam_ops->mc0_mce = cat_mc0_mce;
		fam_ops->mc1_mce = cat_mc1_mce;
872
		fam_ops->mc2_mce = k8_mc2_mce;
873 874
		break;

875
	case 0x15:
876 877
		xec_mask = c->x86_model == 0x60 ? 0x3f : 0x1f;

878 879
		fam_ops->mc0_mce = f15h_mc0_mce;
		fam_ops->mc1_mce = f15h_mc1_mce;
880
		fam_ops->mc2_mce = f15h_mc2_mce;
881 882
		break;

883 884 885 886 887 888 889
	case 0x16:
		xec_mask = 0x1f;
		fam_ops->mc0_mce = cat_mc0_mce;
		fam_ops->mc1_mce = cat_mc1_mce;
		fam_ops->mc2_mce = f16h_mc2_mce;
		break;

890
	default:
891
		printk(KERN_WARNING "Huh? What family is it: 0x%x?!\n", c->x86);
892
		kfree(fam_ops);
893
		fam_ops = NULL;
894 895
	}

896 897
	pr_info("MCE: In-kernel MCE decoding enabled.\n");

898
	mce_register_decode_chain(&amd_mce_dec_nb);
899 900 901 902

	return 0;
}
early_initcall(mce_amd_init);
903 904 905 906

#ifdef MODULE
static void __exit mce_amd_exit(void)
{
907
	mce_unregister_decode_chain(&amd_mce_dec_nb);
908
	kfree(fam_ops);
909 910 911 912 913 914 915
}

MODULE_DESCRIPTION("AMD MCE decoder");
MODULE_ALIAS("edac-mce-amd");
MODULE_LICENSE("GPL");
module_exit(mce_amd_exit);
#endif