amd64_edac.c 73.0 KB
Newer Older
1
#include "amd64_edac.h"
2
#include <asm/amd_nb.h>
3 4 5 6 7 8 9 10 11 12 13 14 15

static struct edac_pci_ctl_info *amd64_ctl_pci;

static int report_gart_errors;
module_param(report_gart_errors, int, 0644);

/*
 * Set by command line parameter. If BIOS has enabled the ECC, this override is
 * cleared to prevent re-enabling the hardware by this driver.
 */
static int ecc_enable_override;
module_param(ecc_enable_override, int, 0644);

16
static struct msr __percpu *msrs;
17

18 19 20 21 22
/*
 * count successfully initialized driver instances for setup_pci_device()
 */
static atomic_t drv_instances = ATOMIC_INIT(0);

23 24
/* Per-node driver instances */
static struct mem_ctl_info **mcis;
25
static struct ecc_settings **ecc_stngs;
26

27 28 29 30 31 32 33
/*
 * Valid scrub rates for the K8 hardware memory scrubber. We map the scrubbing
 * bandwidth to a valid bit pattern. The 'set' operation finds the 'matching-
 * or higher value'.
 *
 *FIXME: Produce a better mapping/linearisation.
 */
34 35 36 37
struct scrubrate {
       u32 scrubval;           /* bit pattern for scrub rate */
       u32 bandwidth;          /* bandwidth consumed (bytes/sec) */
} scrubrates[] = {
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
	{ 0x01, 1600000000UL},
	{ 0x02, 800000000UL},
	{ 0x03, 400000000UL},
	{ 0x04, 200000000UL},
	{ 0x05, 100000000UL},
	{ 0x06, 50000000UL},
	{ 0x07, 25000000UL},
	{ 0x08, 12284069UL},
	{ 0x09, 6274509UL},
	{ 0x0A, 3121951UL},
	{ 0x0B, 1560975UL},
	{ 0x0C, 781440UL},
	{ 0x0D, 390720UL},
	{ 0x0E, 195300UL},
	{ 0x0F, 97650UL},
	{ 0x10, 48854UL},
	{ 0x11, 24427UL},
	{ 0x12, 12213UL},
	{ 0x13, 6101UL},
	{ 0x14, 3051UL},
	{ 0x15, 1523UL},
	{ 0x16, 761UL},
	{ 0x00, 0UL},        /* scrubbing off */
};

63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
static int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset,
				      u32 *val, const char *func)
{
	int err = 0;

	err = pci_read_config_dword(pdev, offset, val);
	if (err)
		amd64_warn("%s: error reading F%dx%03x.\n",
			   func, PCI_FUNC(pdev->devfn), offset);

	return err;
}

int __amd64_write_pci_cfg_dword(struct pci_dev *pdev, int offset,
				u32 val, const char *func)
{
	int err = 0;

	err = pci_write_config_dword(pdev, offset, val);
	if (err)
		amd64_warn("%s: error writing to F%dx%03x.\n",
			   func, PCI_FUNC(pdev->devfn), offset);

	return err;
}

/*
 *
 * Depending on the family, F2 DCT reads need special handling:
 *
 * K8: has a single DCT only
 *
 * F10h: each DCT has its own set of regs
 *	DCT0 -> F2x040..
 *	DCT1 -> F2x140..
 *
 * F15h: we select which DCT we access using F1x10C[DctCfgSel]
 *
 */
static int k8_read_dct_pci_cfg(struct amd64_pvt *pvt, int addr, u32 *val,
			       const char *func)
{
	if (addr >= 0x100)
		return -EINVAL;

	return __amd64_read_pci_cfg_dword(pvt->F2, addr, val, func);
}

static int f10_read_dct_pci_cfg(struct amd64_pvt *pvt, int addr, u32 *val,
				 const char *func)
{
	return __amd64_read_pci_cfg_dword(pvt->F2, addr, val, func);
}

117 118 119 120 121 122 123 124 125 126 127 128 129
/*
 * Select DCT to which PCI cfg accesses are routed
 */
static void f15h_select_dct(struct amd64_pvt *pvt, u8 dct)
{
	u32 reg = 0;

	amd64_read_pci_cfg(pvt->F1, DCT_CFG_SEL, &reg);
	reg &= 0xfffffffe;
	reg |= dct;
	amd64_write_pci_cfg(pvt->F1, DCT_CFG_SEL, reg);
}

130 131 132 133 134 135 136 137 138 139
static int f15_read_dct_pci_cfg(struct amd64_pvt *pvt, int addr, u32 *val,
				 const char *func)
{
	u8 dct  = 0;

	if (addr >= 0x140 && addr <= 0x1a0) {
		dct   = 1;
		addr -= 0x100;
	}

140
	f15h_select_dct(pvt, dct);
141 142 143 144

	return __amd64_read_pci_cfg_dword(pvt->F2, addr, val, func);
}

145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
/*
 * Memory scrubber control interface. For K8, memory scrubbing is handled by
 * hardware and can involve L2 cache, dcache as well as the main memory. With
 * F10, this is extended to L3 cache scrubbing on CPU models sporting that
 * functionality.
 *
 * This causes the "units" for the scrubbing speed to vary from 64 byte blocks
 * (dram) over to cache lines. This is nasty, so we will use bandwidth in
 * bytes/sec for the setting.
 *
 * Currently, we only do dram scrubbing. If the scrubbing is done in software on
 * other archs, we might not have access to the caches directly.
 */

/*
 * scan the scrub rate mapping table for a close or matching bandwidth value to
 * issue. If requested is too big, then use last maximum value found.
 */
163
static int __amd64_set_scrub_rate(struct pci_dev *ctl, u32 new_bw, u32 min_rate)
164 165 166 167 168 169 170 171 172
{
	u32 scrubval;
	int i;

	/*
	 * map the configured rate (new_bw) to a value specific to the AMD64
	 * memory controller and apply to register. Search for the first
	 * bandwidth entry that is greater or equal than the setting requested
	 * and program that. If at last entry, turn off DRAM scrubbing.
173 174 175
	 *
	 * If no suitable bandwidth is found, turn off DRAM scrubbing entirely
	 * by falling back to the last element in scrubrates[].
176
	 */
177
	for (i = 0; i < ARRAY_SIZE(scrubrates) - 1; i++) {
178 179 180 181
		/*
		 * skip scrub rates which aren't recommended
		 * (see F10 BKDG, F3x58)
		 */
182
		if (scrubrates[i].scrubval < min_rate)
183 184 185 186 187 188 189 190
			continue;

		if (scrubrates[i].bandwidth <= new_bw)
			break;
	}

	scrubval = scrubrates[i].scrubval;

191
	pci_write_bits32(ctl, SCRCTRL, scrubval, 0x001F);
192

193 194 195
	if (scrubval)
		return scrubrates[i].bandwidth;

196 197 198
	return 0;
}

199
static int amd64_set_scrub_rate(struct mem_ctl_info *mci, u32 bw)
200 201
{
	struct amd64_pvt *pvt = mci->pvt_info;
202
	u32 min_scrubrate = 0x5;
203

204 205 206
	if (boot_cpu_data.x86 == 0xf)
		min_scrubrate = 0x0;

207 208 209 210
	/* F15h Erratum #505 */
	if (boot_cpu_data.x86 == 0x15)
		f15h_select_dct(pvt, 0);

211
	return __amd64_set_scrub_rate(pvt->F3, bw, min_scrubrate);
212 213
}

214
static int amd64_get_scrub_rate(struct mem_ctl_info *mci)
215 216 217
{
	struct amd64_pvt *pvt = mci->pvt_info;
	u32 scrubval = 0;
218
	int i, retval = -EINVAL;
219

220 221 222 223
	/* F15h Erratum #505 */
	if (boot_cpu_data.x86 == 0x15)
		f15h_select_dct(pvt, 0);

224
	amd64_read_pci_cfg(pvt->F3, SCRCTRL, &scrubval);
225 226 227

	scrubval = scrubval & 0x001F;

228
	for (i = 0; i < ARRAY_SIZE(scrubrates); i++) {
229
		if (scrubrates[i].scrubval == scrubval) {
230
			retval = scrubrates[i].bandwidth;
231 232 233
			break;
		}
	}
234
	return retval;
235 236
}

237
/*
238 239
 * returns true if the SysAddr given by sys_addr matches the
 * DRAM base/limit associated with node_id
240
 */
241 242
static bool amd64_base_limit_match(struct amd64_pvt *pvt, u64 sys_addr,
				   unsigned nid)
243
{
244
	u64 addr;
245 246 247 248 249 250 251 252 253

	/* The K8 treats this as a 40-bit value.  However, bits 63-40 will be
	 * all ones if the most significant implemented address bit is 1.
	 * Here we discard bits 63-40.  See section 3.4.2 of AMD publication
	 * 24592: AMD x86-64 Architecture Programmer's Manual Volume 1
	 * Application Programming.
	 */
	addr = sys_addr & 0x000000ffffffffffull;

254 255
	return ((addr >= get_dram_base(pvt, nid)) &&
		(addr <= get_dram_limit(pvt, nid)));
256 257 258 259 260 261 262 263 264 265 266 267
}

/*
 * Attempt to map a SysAddr to a node. On success, return a pointer to the
 * mem_ctl_info structure for the node that the SysAddr maps to.
 *
 * On failure, return NULL.
 */
static struct mem_ctl_info *find_mc_by_sys_addr(struct mem_ctl_info *mci,
						u64 sys_addr)
{
	struct amd64_pvt *pvt;
268
	unsigned node_id;
269 270 271 272 273 274 275 276 277 278 279 280 281
	u32 intlv_en, bits;

	/*
	 * Here we use the DRAM Base (section 3.4.4.1) and DRAM Limit (section
	 * 3.4.4.2) registers to map the SysAddr to a node ID.
	 */
	pvt = mci->pvt_info;

	/*
	 * The value of this field should be the same for all DRAM Base
	 * registers.  Therefore we arbitrarily choose to read it from the
	 * register for node 0.
	 */
282
	intlv_en = dram_intlv_en(pvt, 0);
283 284

	if (intlv_en == 0) {
285
		for (node_id = 0; node_id < DRAM_RANGES; node_id++) {
286
			if (amd64_base_limit_match(pvt, sys_addr, node_id))
287
				goto found;
288
		}
289
		goto err_no_match;
290 291
	}

292 293 294
	if (unlikely((intlv_en != 0x01) &&
		     (intlv_en != 0x03) &&
		     (intlv_en != 0x07))) {
295
		amd64_warn("DRAM Base[IntlvEn] junk value: 0x%x, BIOS bug?\n", intlv_en);
296 297 298 299 300 301
		return NULL;
	}

	bits = (((u32) sys_addr) >> 12) & intlv_en;

	for (node_id = 0; ; ) {
302
		if ((dram_intlv_sel(pvt, node_id) & intlv_en) == bits)
303 304
			break;	/* intlv_sel field matches */

305
		if (++node_id >= DRAM_RANGES)
306 307 308 309 310
			goto err_no_match;
	}

	/* sanity test for sys_addr */
	if (unlikely(!amd64_base_limit_match(pvt, sys_addr, node_id))) {
311 312 313
		amd64_warn("%s: sys_addr 0x%llx falls outside base/limit address"
			   "range for node %d with node interleaving enabled.\n",
			   __func__, sys_addr, node_id);
314 315 316 317
		return NULL;
	}

found:
318
	return edac_mc_find((int)node_id);
319 320

err_no_match:
321 322
	edac_dbg(2, "sys_addr 0x%lx doesn't match any node\n",
		 (unsigned long)sys_addr);
323 324 325

	return NULL;
}
326 327

/*
328 329
 * compute the CS base address of the @csrow on the DRAM controller @dct.
 * For details see F2x[5C:40] in the processor's BKDG
330
 */
331 332
static void get_cs_base_and_mask(struct amd64_pvt *pvt, int csrow, u8 dct,
				 u64 *base, u64 *mask)
333
{
334 335
	u64 csbase, csmask, base_bits, mask_bits;
	u8 addr_shift;
336

337 338 339 340 341 342 343 344 345 346
	if (boot_cpu_data.x86 == 0xf && pvt->ext_model < K8_REV_F) {
		csbase		= pvt->csels[dct].csbases[csrow];
		csmask		= pvt->csels[dct].csmasks[csrow];
		base_bits	= GENMASK(21, 31) | GENMASK(9, 15);
		mask_bits	= GENMASK(21, 29) | GENMASK(9, 15);
		addr_shift	= 4;
	} else {
		csbase		= pvt->csels[dct].csbases[csrow];
		csmask		= pvt->csels[dct].csmasks[csrow >> 1];
		addr_shift	= 8;
347

348 349 350 351 352
		if (boot_cpu_data.x86 == 0x15)
			base_bits = mask_bits = GENMASK(19,30) | GENMASK(5,13);
		else
			base_bits = mask_bits = GENMASK(19,28) | GENMASK(5,13);
	}
353

354
	*base  = (csbase & base_bits) << addr_shift;
355

356 357 358 359 360
	*mask  = ~0ULL;
	/* poke holes for the csmask */
	*mask &= ~(mask_bits << addr_shift);
	/* OR them in */
	*mask |= (csmask & mask_bits) << addr_shift;
361 362
}

363 364 365
#define for_each_chip_select(i, dct, pvt) \
	for (i = 0; i < pvt->csels[dct].b_cnt; i++)

366 367 368
#define chip_select_base(i, dct, pvt) \
	pvt->csels[dct].csbases[i]

369 370 371
#define for_each_chip_select_mask(i, dct, pvt) \
	for (i = 0; i < pvt->csels[dct].m_cnt; i++)

372 373 374 375 376 377 378 379 380 381 382 383
/*
 * @input_addr is an InputAddr associated with the node given by mci. Return the
 * csrow that input_addr maps to, or -1 on failure (no csrow claims input_addr).
 */
static int input_addr_to_csrow(struct mem_ctl_info *mci, u64 input_addr)
{
	struct amd64_pvt *pvt;
	int csrow;
	u64 base, mask;

	pvt = mci->pvt_info;

384 385
	for_each_chip_select(csrow, 0, pvt) {
		if (!csrow_enabled(csrow, 0, pvt))
386 387
			continue;

388 389 390
		get_cs_base_and_mask(pvt, csrow, 0, &base, &mask);

		mask = ~mask;
391 392

		if ((input_addr & mask) == (base & mask)) {
393 394 395
			edac_dbg(2, "InputAddr 0x%lx matches csrow %d (node %d)\n",
				 (unsigned long)input_addr, csrow,
				 pvt->mc_node_id);
396 397 398 399

			return csrow;
		}
	}
400 401
	edac_dbg(2, "no matching csrow for InputAddr 0x%lx (MC node %d)\n",
		 (unsigned long)input_addr, pvt->mc_node_id);
402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427

	return -1;
}

/*
 * Obtain info from the DRAM Hole Address Register (section 3.4.8, pub #26094)
 * for the node represented by mci. Info is passed back in *hole_base,
 * *hole_offset, and *hole_size.  Function returns 0 if info is valid or 1 if
 * info is invalid. Info may be invalid for either of the following reasons:
 *
 * - The revision of the node is not E or greater.  In this case, the DRAM Hole
 *   Address Register does not exist.
 *
 * - The DramHoleValid bit is cleared in the DRAM Hole Address Register,
 *   indicating that its contents are not valid.
 *
 * The values passed back in *hole_base, *hole_offset, and *hole_size are
 * complete 32-bit values despite the fact that the bitfields in the DHAR
 * only represent bits 31-24 of the base and offset values.
 */
int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base,
			     u64 *hole_offset, u64 *hole_size)
{
	struct amd64_pvt *pvt = mci->pvt_info;

	/* only revE and later have the DRAM Hole Address Register */
428
	if (boot_cpu_data.x86 == 0xf && pvt->ext_model < K8_REV_E) {
429 430
		edac_dbg(1, "  revision %d for node %d does not support DHAR\n",
			 pvt->ext_model, pvt->mc_node_id);
431 432 433
		return 1;
	}

434
	/* valid for Fam10h and above */
435
	if (boot_cpu_data.x86 >= 0x10 && !dhar_mem_hoist_valid(pvt)) {
436
		edac_dbg(1, "  Dram Memory Hoisting is DISABLED on this system\n");
437 438 439
		return 1;
	}

440
	if (!dhar_valid(pvt)) {
441 442
		edac_dbg(1, "  Dram Memory Hoisting is DISABLED on this node %d\n",
			 pvt->mc_node_id);
443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463
		return 1;
	}

	/* This node has Memory Hoisting */

	/* +------------------+--------------------+--------------------+-----
	 * | memory           | DRAM hole          | relocated          |
	 * | [0, (x - 1)]     | [x, 0xffffffff]    | addresses from     |
	 * |                  |                    | DRAM hole          |
	 * |                  |                    | [0x100000000,      |
	 * |                  |                    |  (0x100000000+     |
	 * |                  |                    |   (0xffffffff-x))] |
	 * +------------------+--------------------+--------------------+-----
	 *
	 * Above is a diagram of physical memory showing the DRAM hole and the
	 * relocated addresses from the DRAM hole.  As shown, the DRAM hole
	 * starts at address x (the base address) and extends through address
	 * 0xffffffff.  The DRAM Hole Address Register (DHAR) relocates the
	 * addresses in the hole so that they start at 0x100000000.
	 */

464 465
	*hole_base = dhar_base(pvt);
	*hole_size = (1ULL << 32) - *hole_base;
466 467

	if (boot_cpu_data.x86 > 0xf)
468
		*hole_offset = f10_dhar_offset(pvt);
469
	else
470
		*hole_offset = k8_dhar_offset(pvt);
471

472 473 474
	edac_dbg(1, "  DHAR info for node %d base 0x%lx offset 0x%lx size 0x%lx\n",
		 pvt->mc_node_id, (unsigned long)*hole_base,
		 (unsigned long)*hole_offset, (unsigned long)*hole_size);
475 476 477 478 479

	return 0;
}
EXPORT_SYMBOL_GPL(amd64_get_dram_hole_info);

480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510
/*
 * Return the DramAddr that the SysAddr given by @sys_addr maps to.  It is
 * assumed that sys_addr maps to the node given by mci.
 *
 * The first part of section 3.4.4 (p. 70) shows how the DRAM Base (section
 * 3.4.4.1) and DRAM Limit (section 3.4.4.2) registers are used to translate a
 * SysAddr to a DramAddr. If the DRAM Hole Address Register (DHAR) is enabled,
 * then it is also involved in translating a SysAddr to a DramAddr. Sections
 * 3.4.8 and 3.5.8.2 describe the DHAR and how it is used for memory hoisting.
 * These parts of the documentation are unclear. I interpret them as follows:
 *
 * When node n receives a SysAddr, it processes the SysAddr as follows:
 *
 * 1. It extracts the DRAMBase and DRAMLimit values from the DRAM Base and DRAM
 *    Limit registers for node n. If the SysAddr is not within the range
 *    specified by the base and limit values, then node n ignores the Sysaddr
 *    (since it does not map to node n). Otherwise continue to step 2 below.
 *
 * 2. If the DramHoleValid bit of the DHAR for node n is clear, the DHAR is
 *    disabled so skip to step 3 below. Otherwise see if the SysAddr is within
 *    the range of relocated addresses (starting at 0x100000000) from the DRAM
 *    hole. If not, skip to step 3 below. Else get the value of the
 *    DramHoleOffset field from the DHAR. To obtain the DramAddr, subtract the
 *    offset defined by this value from the SysAddr.
 *
 * 3. Obtain the base address for node n from the DRAMBase field of the DRAM
 *    Base register for node n. To obtain the DramAddr, subtract the base
 *    address from the SysAddr, as shown near the start of section 3.4.4 (p.70).
 */
static u64 sys_addr_to_dram_addr(struct mem_ctl_info *mci, u64 sys_addr)
{
511
	struct amd64_pvt *pvt = mci->pvt_info;
512
	u64 dram_base, hole_base, hole_offset, hole_size, dram_addr;
513
	int ret;
514

515
	dram_base = get_dram_base(pvt, pvt->mc_node_id);
516 517 518 519

	ret = amd64_get_dram_hole_info(mci, &hole_base, &hole_offset,
				      &hole_size);
	if (!ret) {
520 521
		if ((sys_addr >= (1ULL << 32)) &&
		    (sys_addr < ((1ULL << 32) + hole_size))) {
522 523 524
			/* use DHAR to translate SysAddr to DramAddr */
			dram_addr = sys_addr - hole_offset;

525 526 527
			edac_dbg(2, "using DHAR to translate SysAddr 0x%lx to DramAddr 0x%lx\n",
				 (unsigned long)sys_addr,
				 (unsigned long)dram_addr);
528 529 530 531 532 533 534 535 536 537 538 539 540 541

			return dram_addr;
		}
	}

	/*
	 * Translate the SysAddr to a DramAddr as shown near the start of
	 * section 3.4.4 (p. 70).  Although sys_addr is a 64-bit value, the k8
	 * only deals with 40-bit values.  Therefore we discard bits 63-40 of
	 * sys_addr below.  If bit 39 of sys_addr is 1 then the bits we
	 * discard are all 1s.  Otherwise the bits we discard are all 0s.  See
	 * section 3.4.2 of AMD publication 24592: AMD x86-64 Architecture
	 * Programmer's Manual Volume 1 Application Programming.
	 */
542
	dram_addr = (sys_addr & GENMASK(0, 39)) - dram_base;
543

544 545
	edac_dbg(2, "using DRAM Base register to translate SysAddr 0x%lx to DramAddr 0x%lx\n",
		 (unsigned long)sys_addr, (unsigned long)dram_addr);
546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576
	return dram_addr;
}

/*
 * @intlv_en is the value of the IntlvEn field from a DRAM Base register
 * (section 3.4.4.1).  Return the number of bits from a SysAddr that are used
 * for node interleaving.
 */
static int num_node_interleave_bits(unsigned intlv_en)
{
	static const int intlv_shift_table[] = { 0, 1, 0, 2, 0, 0, 0, 3 };
	int n;

	BUG_ON(intlv_en > 7);
	n = intlv_shift_table[intlv_en];
	return n;
}

/* Translate the DramAddr given by @dram_addr to an InputAddr. */
static u64 dram_addr_to_input_addr(struct mem_ctl_info *mci, u64 dram_addr)
{
	struct amd64_pvt *pvt;
	int intlv_shift;
	u64 input_addr;

	pvt = mci->pvt_info;

	/*
	 * See the start of section 3.4.4 (p. 70, BKDG #26094, K8, revA-E)
	 * concerning translating a DramAddr to an InputAddr.
	 */
577
	intlv_shift = num_node_interleave_bits(dram_intlv_en(pvt, 0));
578 579
	input_addr = ((dram_addr >> intlv_shift) & GENMASK(12, 35)) +
		      (dram_addr & 0xfff);
580

581 582 583
	edac_dbg(2, "  Intlv Shift=%d DramAddr=0x%lx maps to InputAddr=0x%lx\n",
		 intlv_shift, (unsigned long)dram_addr,
		 (unsigned long)input_addr);
584 585 586 587 588 589 590 591 592 593 594 595 596 597 598

	return input_addr;
}

/*
 * Translate the SysAddr represented by @sys_addr to an InputAddr.  It is
 * assumed that @sys_addr maps to the node given by mci.
 */
static u64 sys_addr_to_input_addr(struct mem_ctl_info *mci, u64 sys_addr)
{
	u64 input_addr;

	input_addr =
	    dram_addr_to_input_addr(mci, sys_addr_to_dram_addr(mci, sys_addr));

599 600
	edac_dbg(2, "SysAdddr 0x%lx translates to InputAddr 0x%lx\n",
		 (unsigned long)sys_addr, (unsigned long)input_addr);
601 602 603 604 605 606 607 608 609 610 611 612

	return input_addr;
}


/*
 * @input_addr is an InputAddr associated with the node represented by mci.
 * Translate @input_addr to a DramAddr and return the result.
 */
static u64 input_addr_to_dram_addr(struct mem_ctl_info *mci, u64 input_addr)
{
	struct amd64_pvt *pvt;
613
	unsigned node_id, intlv_shift;
614 615 616 617 618 619 620 621 622 623 624 625 626
	u64 bits, dram_addr;
	u32 intlv_sel;

	/*
	 * Near the start of section 3.4.4 (p. 70, BKDG #26094, K8, revA-E)
	 * shows how to translate a DramAddr to an InputAddr. Here we reverse
	 * this procedure. When translating from a DramAddr to an InputAddr, the
	 * bits used for node interleaving are discarded.  Here we recover these
	 * bits from the IntlvSel field of the DRAM Limit register (section
	 * 3.4.4.2) for the node that input_addr is associated with.
	 */
	pvt = mci->pvt_info;
	node_id = pvt->mc_node_id;
627 628

	BUG_ON(node_id > 7);
629

630
	intlv_shift = num_node_interleave_bits(dram_intlv_en(pvt, 0));
631
	if (intlv_shift == 0) {
632 633
		edac_dbg(1, "    InputAddr 0x%lx translates to DramAddr of same value\n",
			 (unsigned long)input_addr);
634 635 636 637

		return input_addr;
	}

638 639
	bits = ((input_addr & GENMASK(12, 35)) << intlv_shift) +
		(input_addr & 0xfff);
640

641
	intlv_sel = dram_intlv_sel(pvt, node_id) & ((1 << intlv_shift) - 1);
642 643
	dram_addr = bits + (intlv_sel << 12);

644 645 646
	edac_dbg(1, "InputAddr 0x%lx translates to DramAddr 0x%lx (%d node interleave bits)\n",
		 (unsigned long)input_addr,
		 (unsigned long)dram_addr, intlv_shift);
647 648 649 650 651 652 653 654 655 656 657

	return dram_addr;
}

/*
 * @dram_addr is a DramAddr that maps to the node represented by mci. Convert
 * @dram_addr to a SysAddr.
 */
static u64 dram_addr_to_sys_addr(struct mem_ctl_info *mci, u64 dram_addr)
{
	struct amd64_pvt *pvt = mci->pvt_info;
658
	u64 hole_base, hole_offset, hole_size, base, sys_addr;
659 660 661 662 663 664 665 666 667
	int ret = 0;

	ret = amd64_get_dram_hole_info(mci, &hole_base, &hole_offset,
				      &hole_size);
	if (!ret) {
		if ((dram_addr >= hole_base) &&
		    (dram_addr < (hole_base + hole_size))) {
			sys_addr = dram_addr + hole_offset;

668 669 670
			edac_dbg(1, "using DHAR to translate DramAddr 0x%lx to SysAddr 0x%lx\n",
				 (unsigned long)dram_addr,
				 (unsigned long)sys_addr);
671 672 673 674 675

			return sys_addr;
		}
	}

676
	base     = get_dram_base(pvt, pvt->mc_node_id);
677 678 679 680 681 682 683 684 685 686 687 688 689 690 691
	sys_addr = dram_addr + base;

	/*
	 * The sys_addr we have computed up to this point is a 40-bit value
	 * because the k8 deals with 40-bit values.  However, the value we are
	 * supposed to return is a full 64-bit physical address.  The AMD
	 * x86-64 architecture specifies that the most significant implemented
	 * address bit through bit 63 of a physical address must be either all
	 * 0s or all 1s.  Therefore we sign-extend the 40-bit sys_addr to a
	 * 64-bit value below.  See section 3.4.2 of AMD publication 24592:
	 * AMD x86-64 Architecture Programmer's Manual Volume 1 Application
	 * Programming.
	 */
	sys_addr |= ~((sys_addr & (1ull << 39)) - 1);

692 693 694
	edac_dbg(1, "    Node %d, DramAddr 0x%lx to SysAddr 0x%lx\n",
		 pvt->mc_node_id, (unsigned long)dram_addr,
		 (unsigned long)sys_addr);
695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732

	return sys_addr;
}

/*
 * @input_addr is an InputAddr associated with the node given by mci. Translate
 * @input_addr to a SysAddr.
 */
static inline u64 input_addr_to_sys_addr(struct mem_ctl_info *mci,
					 u64 input_addr)
{
	return dram_addr_to_sys_addr(mci,
				     input_addr_to_dram_addr(mci, input_addr));
}

/* Map the Error address to a PAGE and PAGE OFFSET. */
static inline void error_address_to_page_and_offset(u64 error_address,
						    u32 *page, u32 *offset)
{
	*page = (u32) (error_address >> PAGE_SHIFT);
	*offset = ((u32) error_address) & ~PAGE_MASK;
}

/*
 * @sys_addr is an error address (a SysAddr) extracted from the MCA NB Address
 * Low (section 3.6.4.5) and MCA NB Address High (section 3.6.4.6) registers
 * of a node that detected an ECC memory error.  mci represents the node that
 * the error address maps to (possibly different from the node that detected
 * the error).  Return the number of the csrow that sys_addr maps to, or -1 on
 * error.
 */
static int sys_addr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr)
{
	int csrow;

	csrow = input_addr_to_csrow(mci, sys_addr_to_input_addr(mci, sys_addr));

	if (csrow == -1)
733 734
		amd64_mc_err(mci, "Failed to translate InputAddr to csrow for "
				  "address 0x%lx\n", (unsigned long)sys_addr);
735 736
	return csrow;
}
737

738
static int get_channel_from_ecc_syndrome(struct mem_ctl_info *, u16);
739 740 741 742 743

/*
 * Determine if the DIMMs have ECC enabled. ECC is enabled ONLY if all the DIMMs
 * are ECC capable.
 */
744
static unsigned long amd64_determine_edac_cap(struct amd64_pvt *pvt)
745
{
746
	u8 bit;
747
	unsigned long edac_cap = EDAC_FLAG_NONE;
748

749
	bit = (boot_cpu_data.x86 > 0xf || pvt->ext_model >= K8_REV_F)
750 751 752
		? 19
		: 17;

753
	if (pvt->dclr0 & BIT(bit))
754 755 756 757 758
		edac_cap = EDAC_FLAG_SECDED;

	return edac_cap;
}

759
static void amd64_debug_display_dimm_sizes(struct amd64_pvt *, u8);
760

761 762
static void amd64_dump_dramcfg_low(u32 dclr, int chan)
{
763
	edac_dbg(1, "F2x%d90 (DRAM Cfg Low): 0x%08x\n", chan, dclr);
764

765 766 767
	edac_dbg(1, "  DIMM type: %sbuffered; all DIMMs support ECC: %s\n",
		 (dclr & BIT(16)) ?  "un" : "",
		 (dclr & BIT(19)) ? "yes" : "no");
768

769 770
	edac_dbg(1, "  PAR/ERR parity: %s\n",
		 (dclr & BIT(8)) ?  "enabled" : "disabled");
771

772
	if (boot_cpu_data.x86 == 0x10)
773 774
		edac_dbg(1, "  DCT 128bit mode width: %s\n",
			 (dclr & BIT(11)) ?  "128b" : "64b");
775

776 777 778 779 780
	edac_dbg(1, "  x4 logical DIMMs present: L0: %s L1: %s L2: %s L3: %s\n",
		 (dclr & BIT(12)) ?  "yes" : "no",
		 (dclr & BIT(13)) ?  "yes" : "no",
		 (dclr & BIT(14)) ?  "yes" : "no",
		 (dclr & BIT(15)) ?  "yes" : "no");
781 782
}

783
/* Display and decode various NB registers for debug purposes. */
784
static void dump_misc_regs(struct amd64_pvt *pvt)
785
{
786
	edac_dbg(1, "F3xE8 (NB Cap): 0x%08x\n", pvt->nbcap);
787

788 789
	edac_dbg(1, "  NB two channel DRAM capable: %s\n",
		 (pvt->nbcap & NBCAP_DCT_DUAL) ? "yes" : "no");
790

791 792 793
	edac_dbg(1, "  ECC capable: %s, ChipKill ECC capable: %s\n",
		 (pvt->nbcap & NBCAP_SECDED) ? "yes" : "no",
		 (pvt->nbcap & NBCAP_CHIPKILL) ? "yes" : "no");
794 795

	amd64_dump_dramcfg_low(pvt->dclr0, 0);
796

797
	edac_dbg(1, "F3xB0 (Online Spare): 0x%08x\n", pvt->online_spare);
798

799 800 801 802
	edac_dbg(1, "F1xF0 (DRAM Hole Address): 0x%08x, base: 0x%08x, offset: 0x%08x\n",
		 pvt->dhar, dhar_base(pvt),
		 (boot_cpu_data.x86 == 0xf) ? k8_dhar_offset(pvt)
		 : f10_dhar_offset(pvt));
803

804
	edac_dbg(1, "  DramHoleValid: %s\n", dhar_valid(pvt) ? "yes" : "no");
805

806
	amd64_debug_display_dimm_sizes(pvt, 0);
807

808
	/* everything below this point is Fam10h and above */
809
	if (boot_cpu_data.x86 == 0xf)
810
		return;
811

812
	amd64_debug_display_dimm_sizes(pvt, 1);
813

814
	amd64_info("using %s syndromes.\n", ((pvt->ecc_sym_sz == 8) ? "x8" : "x4"));
815

816
	/* Only if NOT ganged does dclr1 have valid info */
817 818
	if (!dct_ganging_enabled(pvt))
		amd64_dump_dramcfg_low(pvt->dclr1, 1);
819 820
}

821
/*
822
 * see BKDG, F2x[1,0][5C:40], F2[1,0][6C:60]
823
 */
824
static void prep_chip_selects(struct amd64_pvt *pvt)
825
{
826
	if (boot_cpu_data.x86 == 0xf && pvt->ext_model < K8_REV_F) {
827 828
		pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8;
		pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 8;
829
	} else {
830 831
		pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8;
		pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 4;
832 833 834 835
	}
}

/*
836
 * Function 2 Offset F10_DCSB0; read in the DCS Base and DCS Mask registers
837
 */
838
static void read_dct_base_mask(struct amd64_pvt *pvt)
839
{
840
	int cs;
841

842
	prep_chip_selects(pvt);
843

844
	for_each_chip_select(cs, 0, pvt) {
845 846
		int reg0   = DCSB0 + (cs * 4);
		int reg1   = DCSB1 + (cs * 4);
847 848
		u32 *base0 = &pvt->csels[0].csbases[cs];
		u32 *base1 = &pvt->csels[1].csbases[cs];
849

850
		if (!amd64_read_dct_pci_cfg(pvt, reg0, base0))
851 852
			edac_dbg(0, "  DCSB0[%d]=0x%08x reg: F2x%x\n",
				 cs, *base0, reg0);
853

854 855
		if (boot_cpu_data.x86 == 0xf || dct_ganging_enabled(pvt))
			continue;
856

857
		if (!amd64_read_dct_pci_cfg(pvt, reg1, base1))
858 859
			edac_dbg(0, "  DCSB1[%d]=0x%08x reg: F2x%x\n",
				 cs, *base1, reg1);
860 861
	}

862
	for_each_chip_select_mask(cs, 0, pvt) {
863 864
		int reg0   = DCSM0 + (cs * 4);
		int reg1   = DCSM1 + (cs * 4);
865 866
		u32 *mask0 = &pvt->csels[0].csmasks[cs];
		u32 *mask1 = &pvt->csels[1].csmasks[cs];
867

868
		if (!amd64_read_dct_pci_cfg(pvt, reg0, mask0))
869 870
			edac_dbg(0, "    DCSM0[%d]=0x%08x reg: F2x%x\n",
				 cs, *mask0, reg0);
871

872 873
		if (boot_cpu_data.x86 == 0xf || dct_ganging_enabled(pvt))
			continue;
874

875
		if (!amd64_read_dct_pci_cfg(pvt, reg1, mask1))
876 877
			edac_dbg(0, "    DCSM1[%d]=0x%08x reg: F2x%x\n",
				 cs, *mask1, reg1);
878 879 880
	}
}

881
static enum mem_type amd64_determine_memory_type(struct amd64_pvt *pvt, int cs)
882 883 884
{
	enum mem_type type;

885 886 887 888
	/* F15h supports only DDR3 */
	if (boot_cpu_data.x86 >= 0x15)
		type = (pvt->dclr0 & BIT(16)) ?	MEM_DDR3 : MEM_RDDR3;
	else if (boot_cpu_data.x86 == 0x10 || pvt->ext_model >= K8_REV_F) {
889 890 891 892
		if (pvt->dchr0 & DDR3_MODE)
			type = (pvt->dclr0 & BIT(16)) ?	MEM_DDR3 : MEM_RDDR3;
		else
			type = (pvt->dclr0 & BIT(16)) ? MEM_DDR2 : MEM_RDDR2;
893 894 895 896
	} else {
		type = (pvt->dclr0 & BIT(18)) ? MEM_DDR : MEM_RDDR;
	}

897
	amd64_info("CS%d: %s\n", cs, edac_mem_types[type]);
898 899 900 901

	return type;
}

902
/* Get the number of DCT channels the memory controller is using. */
903 904
static int k8_early_channel_count(struct amd64_pvt *pvt)
{
905
	int flag;
906

907
	if (pvt->ext_model >= K8_REV_F)
908
		/* RevF (NPT) and later */
909
		flag = pvt->dclr0 & WIDTH_128;
910
	else
911 912 913 914 915 916 917 918 919
		/* RevE and earlier */
		flag = pvt->dclr0 & REVE_WIDTH_128;

	/* not used */
	pvt->dclr1 = 0;

	return (flag) ? 2 : 1;
}

920 921
/* On F10h and later ErrAddr is MC4_ADDR[47:1] */
static u64 get_error_address(struct mce *m)
922
{
923 924
	struct cpuinfo_x86 *c = &boot_cpu_data;
	u64 addr;
925 926 927
	u8 start_bit = 1;
	u8 end_bit   = 47;

928
	if (c->x86 == 0xf) {
929 930 931 932
		start_bit = 3;
		end_bit   = 39;
	}

933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979
	addr = m->addr & GENMASK(start_bit, end_bit);

	/*
	 * Erratum 637 workaround
	 */
	if (c->x86 == 0x15) {
		struct amd64_pvt *pvt;
		u64 cc6_base, tmp_addr;
		u32 tmp;
		u8 mce_nid, intlv_en;

		if ((addr & GENMASK(24, 47)) >> 24 != 0x00fdf7)
			return addr;

		mce_nid	= amd_get_nb_id(m->extcpu);
		pvt	= mcis[mce_nid]->pvt_info;

		amd64_read_pci_cfg(pvt->F1, DRAM_LOCAL_NODE_LIM, &tmp);
		intlv_en = tmp >> 21 & 0x7;

		/* add [47:27] + 3 trailing bits */
		cc6_base  = (tmp & GENMASK(0, 20)) << 3;

		/* reverse and add DramIntlvEn */
		cc6_base |= intlv_en ^ 0x7;

		/* pin at [47:24] */
		cc6_base <<= 24;

		if (!intlv_en)
			return cc6_base | (addr & GENMASK(0, 23));

		amd64_read_pci_cfg(pvt->F1, DRAM_LOCAL_NODE_BASE, &tmp);

							/* faster log2 */
		tmp_addr  = (addr & GENMASK(12, 23)) << __fls(intlv_en + 1);

		/* OR DramIntlvSel into bits [14:12] */
		tmp_addr |= (tmp & GENMASK(21, 23)) >> 9;

		/* add remaining [11:0] bits from original MC4_ADDR */
		tmp_addr |= addr & GENMASK(0, 11);

		return cc6_base | tmp_addr;
	}

	return addr;
980 981
}

982
static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range)
983
{
984
	struct cpuinfo_x86 *c = &boot_cpu_data;
985
	int off = range << 3;
986

987 988
	amd64_read_pci_cfg(pvt->F1, DRAM_BASE_LO + off,  &pvt->ranges[range].base.lo);
	amd64_read_pci_cfg(pvt->F1, DRAM_LIMIT_LO + off, &pvt->ranges[range].lim.lo);
989

990
	if (c->x86 == 0xf)
991
		return;
992

993 994
	if (!dram_rw(pvt, range))
		return;
995

996 997
	amd64_read_pci_cfg(pvt->F1, DRAM_BASE_HI + off,  &pvt->ranges[range].base.hi);
	amd64_read_pci_cfg(pvt->F1, DRAM_LIMIT_HI + off, &pvt->ranges[range].lim.hi);
998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022

	/* Factor in CC6 save area by reading dst node's limit reg */
	if (c->x86 == 0x15) {
		struct pci_dev *f1 = NULL;
		u8 nid = dram_dst_node(pvt, range);
		u32 llim;

		f1 = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0x18 + nid, 1));
		if (WARN_ON(!f1))
			return;

		amd64_read_pci_cfg(f1, DRAM_LOCAL_NODE_LIM, &llim);

		pvt->ranges[range].lim.lo &= GENMASK(0, 15);

					    /* {[39:27],111b} */
		pvt->ranges[range].lim.lo |= ((llim & 0x1fff) << 3 | 0x7) << 16;

		pvt->ranges[range].lim.hi &= GENMASK(0, 7);

					    /* [47:40] */
		pvt->ranges[range].lim.hi |= llim >> 13;

		pci_dev_put(f1);
	}
1023 1024
}

1025 1026
static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
				    u16 syndrome)
1027 1028
{
	struct mem_ctl_info *src_mci;
1029
	struct amd64_pvt *pvt = mci->pvt_info;
1030 1031 1032
	int channel, csrow;
	u32 page, offset;

1033 1034 1035 1036 1037 1038 1039 1040 1041 1042
	error_address_to_page_and_offset(sys_addr, &page, &offset);

	/*
	 * Find out which node the error address belongs to. This may be
	 * different from the node that detected the error.
	 */
	src_mci = find_mc_by_sys_addr(mci, sys_addr);
	if (!src_mci) {
		amd64_mc_err(mci, "failed to map error addr 0x%lx to a node\n",
			     (unsigned long)sys_addr);
1043
		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
1044 1045 1046
				     page, offset, syndrome,
				     -1, -1, -1,
				     "failed to map error addr to a node",
1047
				     "");
1048 1049 1050 1051 1052 1053
		return;
	}

	/* Now map the sys_addr to a CSROW */
	csrow = sys_addr_to_csrow(src_mci, sys_addr);
	if (csrow < 0) {
1054
		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
1055 1056 1057
				     page, offset, syndrome,
				     -1, -1, -1,
				     "failed to map error addr to a csrow",
1058
				     "");
1059 1060 1061
		return;
	}

1062
	/* CHIPKILL enabled */
1063
	if (pvt->nbcfg & NBCFG_CHIPKILL) {
1064
		channel = get_channel_from_ecc_syndrome(mci, syndrome);
1065 1066 1067 1068 1069 1070
		if (channel < 0) {
			/*
			 * Syndrome didn't map, so we don't know which of the
			 * 2 DIMMs is in error. So we need to ID 'both' of them
			 * as suspect.
			 */
1071 1072 1073
			amd64_mc_warn(src_mci, "unknown syndrome 0x%04x - "
				      "possible error reporting race\n",
				      syndrome);
1074
			edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
1075 1076 1077
					     page, offset, syndrome,
					     csrow, -1, -1,
					     "unknown syndrome - possible error reporting race",
1078
					     "");
1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089
			return;
		}
	} else {
		/*
		 * non-chipkill ecc mode
		 *
		 * The k8 documentation is unclear about how to determine the
		 * channel number when using non-chipkill memory.  This method
		 * was obtained from email communication with someone at AMD.
		 * (Wish the email was placed in this comment - norsk)
		 */
1090
		channel = ((sys_addr & BIT(3)) != 0);
1091 1092
	}

1093
	edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, src_mci, 1,
1094 1095
			     page, offset, syndrome,
			     csrow, channel, -1,
1096
			     "", "");
1097 1098
}

1099
static int ddr2_cs_size(unsigned i, bool dct_width)
1100
{
1101
	unsigned shift = 0;
1102

1103 1104 1105 1106
	if (i <= 2)
		shift = i;
	else if (!(i & 0x1))
		shift = i >> 1;
1107
	else
1108
		shift = (i + 1) >> 1;
1109

1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122
	return 128 << (shift + !!dct_width);
}

static int k8_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
				  unsigned cs_mode)
{
	u32 dclr = dct ? pvt->dclr1 : pvt->dclr0;

	if (pvt->ext_model >= K8_REV_F) {
		WARN_ON(cs_mode > 11);
		return ddr2_cs_size(cs_mode, dclr & WIDTH_128);
	}
	else if (pvt->ext_model >= K8_REV_D) {
1123
		unsigned diff;
1124 1125
		WARN_ON(cs_mode > 10);

1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152
		/*
		 * the below calculation, besides trying to win an obfuscated C
		 * contest, maps cs_mode values to DIMM chip select sizes. The
		 * mappings are:
		 *
		 * cs_mode	CS size (mb)
		 * =======	============
		 * 0		32
		 * 1		64
		 * 2		128
		 * 3		128
		 * 4		256
		 * 5		512
		 * 6		256
		 * 7		512
		 * 8		1024
		 * 9		1024
		 * 10		2048
		 *
		 * Basically, it calculates a value with which to shift the
		 * smallest CS size of 32MB.
		 *
		 * ddr[23]_cs_size have a similar purpose.
		 */
		diff = cs_mode/3 + (unsigned)(cs_mode > 5);

		return 32 << (cs_mode - diff);
1153 1154 1155 1156 1157
	}
	else {
		WARN_ON(cs_mode > 6);
		return 32 << cs_mode;
	}
1158 1159
}

1160 1161 1162 1163 1164 1165 1166 1167
/*
 * Get the number of DCT channels in use.
 *
 * Return:
 *	number of Memory Channels in operation
 * Pass back:
 *	contents of the DCL0_LOW register
 */
1168
static int f1x_early_channel_count(struct amd64_pvt *pvt)
1169
{
1170
	int i, j, channels = 0;
1171

1172
	/* On F10h, if we are in 128 bit mode, then we are using 2 channels */
1173
	if (boot_cpu_data.x86 == 0x10 && (pvt->dclr0 & WIDTH_128))
1174
		return 2;
1175 1176

	/*
1177 1178 1179
	 * Need to check if in unganged mode: In such, there are 2 channels,
	 * but they are not in 128 bit mode and thus the above 'dclr0' status
	 * bit will be OFF.
1180 1181 1182 1183
	 *
	 * Need to check DCT0[0] and DCT1[0] to see if only one of them has
	 * their CSEnable bit on. If so, then SINGLE DIMM case.
	 */
1184
	edac_dbg(0, "Data width is not 128 bits - need more decoding\n");
1185

1186 1187 1188 1189 1190
	/*
	 * Check DRAM Bank Address Mapping values for each DIMM to see if there
	 * is more than just one DIMM present in unganged mode. Need to check
	 * both controllers since DIMMs can be placed in either one.
	 */
1191 1192
	for (i = 0; i < 2; i++) {
		u32 dbam = (i ? pvt->dbam1 : pvt->dbam0);
1193

1194 1195 1196 1197 1198 1199
		for (j = 0; j < 4; j++) {
			if (DBAM_DIMM(j, dbam) > 0) {
				channels++;
				break;
			}
		}
1200 1201
	}

1202 1203 1204
	if (channels > 2)
		channels = 2;

1205
	amd64_info("MCT channel count: %d\n", channels);
1206 1207 1208 1209

	return channels;
}

1210
static int ddr3_cs_size(unsigned i, bool dct_width)
1211
{
1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237
	unsigned shift = 0;
	int cs_size = 0;

	if (i == 0 || i == 3 || i == 4)
		cs_size = -1;
	else if (i <= 2)
		shift = i;
	else if (i == 12)
		shift = 7;
	else if (!(i & 0x1))
		shift = i >> 1;
	else
		shift = (i + 1) >> 1;

	if (cs_size != -1)
		cs_size = (128 * (1 << !!dct_width)) << shift;

	return cs_size;
}

static int f10_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
				   unsigned cs_mode)
{
	u32 dclr = dct ? pvt->dclr1 : pvt->dclr0;

	WARN_ON(cs_mode > 11);
1238 1239

	if (pvt->dchr0 & DDR3_MODE || pvt->dchr1 & DDR3_MODE)
1240
		return ddr3_cs_size(cs_mode, dclr & WIDTH_128);
1241
	else
1242 1243 1244 1245 1246 1247 1248 1249 1250 1251
		return ddr2_cs_size(cs_mode, dclr & WIDTH_128);
}

/*
 * F15h supports only 64bit DCT interfaces
 */
static int f15_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
				   unsigned cs_mode)
{
	WARN_ON(cs_mode > 12);
1252

1253
	return ddr3_cs_size(cs_mode, false);
1254 1255
}

1256
static void read_dram_ctl_register(struct amd64_pvt *pvt)
1257 1258
{

1259 1260 1261
	if (boot_cpu_data.x86 == 0xf)
		return;

1262
	if (!amd64_read_dct_pci_cfg(pvt, DCT_SEL_LO, &pvt->dct_sel_lo)) {
1263 1264
		edac_dbg(0, "F2x110 (DCTSelLow): 0x%08x, High range addrs at: 0x%x\n",
			 pvt->dct_sel_lo, dct_sel_baseaddr(pvt));
1265

1266 1267
		edac_dbg(0, "  DCTs operate in %s mode\n",
			 (dct_ganging_enabled(pvt) ? "ganged" : "unganged"));
1268 1269

		if (!dct_ganging_enabled(pvt))
1270 1271
			edac_dbg(0, "  Address range split per DCT: %s\n",
				 (dct_high_range_enabled(pvt) ? "yes" : "no"));
1272

1273 1274 1275
		edac_dbg(0, "  data interleave for ECC: %s, DRAM cleared since last warm reset: %s\n",
			 (dct_data_intlv_enabled(pvt) ? "enabled" : "disabled"),
			 (dct_memory_cleared(pvt) ? "yes" : "no"));
1276

1277 1278 1279 1280
		edac_dbg(0, "  channel interleave: %s, "
			 "interleave bits selector: 0x%x\n",
			 (dct_interleave_enabled(pvt) ? "enabled" : "disabled"),
			 dct_sel_interleave_addr(pvt));
1281 1282
	}

1283
	amd64_read_dct_pci_cfg(pvt, DCT_SEL_HI, &pvt->dct_sel_hi);
1284 1285
}

1286
/*
1287
 * Determine channel (DCT) based on the interleaving mode: F10h BKDG, 2.8.9 Memory
1288 1289
 * Interleaving Modes.
 */
1290
static u8 f1x_determine_channel(struct amd64_pvt *pvt, u64 sys_addr,
1291
				bool hi_range_sel, u8 intlv_en)
1292
{
1293
	u8 dct_sel_high = (pvt->dct_sel_lo >> 1) & 1;
1294 1295

	if (dct_ganging_enabled(pvt))
1296
		return 0;
1297

1298 1299
	if (hi_range_sel)
		return dct_sel_high;
1300

1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322
	/*
	 * see F2x110[DctSelIntLvAddr] - channel interleave mode
	 */
	if (dct_interleave_enabled(pvt)) {
		u8 intlv_addr = dct_sel_interleave_addr(pvt);

		/* return DCT select function: 0=DCT0, 1=DCT1 */
		if (!intlv_addr)
			return sys_addr >> 6 & 1;

		if (intlv_addr & 0x2) {
			u8 shift = intlv_addr & 0x1 ? 9 : 6;
			u32 temp = hweight_long((u32) ((sys_addr >> 16) & 0x1F)) % 2;

			return ((sys_addr >> shift) & 1) ^ temp;
		}

		return (sys_addr >> (12 + hweight8(intlv_en))) & 1;
	}

	if (dct_high_range_enabled(pvt))
		return ~dct_sel_high & 1;
1323 1324 1325 1326

	return 0;
}

1327
/* Convert the sys_addr to the normalized DCT address */
1328
static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, unsigned range,
1329 1330
				 u64 sys_addr, bool hi_rng,
				 u32 dct_sel_base_addr)
1331 1332
{
	u64 chan_off;
1333 1334 1335
	u64 dram_base		= get_dram_base(pvt, range);
	u64 hole_off		= f10_dhar_offset(pvt);
	u64 dct_sel_base_off	= (pvt->dct_sel_hi & 0xFFFFFC00) << 16;
1336

1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350
	if (hi_rng) {
		/*
		 * if
		 * base address of high range is below 4Gb
		 * (bits [47:27] at [31:11])
		 * DRAM address space on this DCT is hoisted above 4Gb	&&
		 * sys_addr > 4Gb
		 *
		 *	remove hole offset from sys_addr
		 * else
		 *	remove high range offset from sys_addr
		 */
		if ((!(dct_sel_base_addr >> 16) ||
		     dct_sel_base_addr < dhar_base(pvt)) &&
1351
		    dhar_valid(pvt) &&
1352
		    (sys_addr >= BIT_64(32)))
1353
			chan_off = hole_off;
1354 1355 1356
		else
			chan_off = dct_sel_base_off;
	} else {
1357 1358 1359 1360 1361 1362 1363 1364 1365
		/*
		 * if
		 * we have a valid hole		&&
		 * sys_addr > 4Gb
		 *
		 *	remove hole
		 * else
		 *	remove dram base to normalize to DCT address
		 */
1366
		if (dhar_valid(pvt) && (sys_addr >= BIT_64(32)))
1367
			chan_off = hole_off;
1368
		else
1369
			chan_off = dram_base;
1370 1371
	}

1372
	return (sys_addr & GENMASK(6,47)) - (chan_off & GENMASK(23,47));
1373 1374 1375 1376 1377 1378
}

/*
 * checks if the csrow passed in is marked as SPARED, if so returns the new
 * spare row
 */
1379
static int f10_process_possible_spare(struct amd64_pvt *pvt, u8 dct, int csrow)
1380
{
1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391
	int tmp_cs;

	if (online_spare_swap_done(pvt, dct) &&
	    csrow == online_spare_bad_dramcs(pvt, dct)) {

		for_each_chip_select(tmp_cs, dct, pvt) {
			if (chip_select_base(tmp_cs, dct, pvt) & 0x2) {
				csrow = tmp_cs;
				break;
			}
		}
1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403
	}
	return csrow;
}

/*
 * Iterate over the DRAM DCT "base" and "mask" registers looking for a
 * SystemAddr match on the specified 'ChannelSelect' and 'NodeID'
 *
 * Return:
 *	-EINVAL:  NOT FOUND
 *	0..csrow = Chip-Select Row
 */
1404
static int f1x_lookup_addr_in_dct(u64 in_addr, u32 nid, u8 dct)
1405 1406 1407
{
	struct mem_ctl_info *mci;
	struct amd64_pvt *pvt;
1408
	u64 cs_base, cs_mask;
1409 1410 1411
	int cs_found = -EINVAL;
	int csrow;

1412
	mci = mcis[nid];
1413 1414 1415 1416 1417
	if (!mci)
		return cs_found;

	pvt = mci->pvt_info;

1418
	edac_dbg(1, "input addr: 0x%llx, DCT: %d\n", in_addr, dct);
1419

1420 1421
	for_each_chip_select(csrow, dct, pvt) {
		if (!csrow_enabled(csrow, dct, pvt))
1422 1423
			continue;

1424
		get_cs_base_and_mask(pvt, csrow, dct, &cs_base, &cs_mask);
1425

1426 1427
		edac_dbg(1, "    CSROW=%d CSBase=0x%llx CSMask=0x%llx\n",
			 csrow, cs_base, cs_mask);
1428

1429
		cs_mask = ~cs_mask;
1430

1431 1432
		edac_dbg(1, "    (InputAddr & ~CSMask)=0x%llx (CSBase & ~CSMask)=0x%llx\n",
			 (in_addr & cs_mask), (cs_base & cs_mask));
1433

1434 1435
		if ((in_addr & cs_mask) == (cs_base & cs_mask)) {
			cs_found = f10_process_possible_spare(pvt, dct, csrow);
1436

1437
			edac_dbg(1, " MATCH csrow=%d\n", cs_found);
1438 1439 1440 1441 1442 1443
			break;
		}
	}
	return cs_found;
}

1444 1445 1446 1447 1448
/*
 * See F2x10C. Non-interleaved graphics framebuffer memory under the 16G is
 * swapped with a region located at the bottom of memory so that the GPU can use
 * the interleaved region and thus two channels.
 */
1449
static u64 f1x_swap_interleaved_region(struct amd64_pvt *pvt, u64 sys_addr)
1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479
{
	u32 swap_reg, swap_base, swap_limit, rgn_size, tmp_addr;

	if (boot_cpu_data.x86 == 0x10) {
		/* only revC3 and revE have that feature */
		if (boot_cpu_data.x86_model < 4 ||
		    (boot_cpu_data.x86_model < 0xa &&
		     boot_cpu_data.x86_mask < 3))
			return sys_addr;
	}

	amd64_read_dct_pci_cfg(pvt, SWAP_INTLV_REG, &swap_reg);

	if (!(swap_reg & 0x1))
		return sys_addr;

	swap_base	= (swap_reg >> 3) & 0x7f;
	swap_limit	= (swap_reg >> 11) & 0x7f;
	rgn_size	= (swap_reg >> 20) & 0x7f;
	tmp_addr	= sys_addr >> 27;

	if (!(sys_addr >> 34) &&
	    (((tmp_addr >= swap_base) &&
	     (tmp_addr <= swap_limit)) ||
	     (tmp_addr < rgn_size)))
		return sys_addr ^ (u64)swap_base << 27;

	return sys_addr;
}

1480
/* For a given @dram_range, check if @sys_addr falls within it. */
1481
static int f1x_match_to_this_node(struct amd64_pvt *pvt, unsigned range,
1482 1483
				  u64 sys_addr, int *nid, int *chan_sel)
{
1484
	int cs_found = -EINVAL;
1485
	u64 chan_addr;
1486
	u32 dct_sel_base;
1487
	u8 channel;
1488
	bool high_range = false;
1489

1490
	u8 node_id    = dram_dst_node(pvt, range);
1491
	u8 intlv_en   = dram_intlv_en(pvt, range);
1492
	u32 intlv_sel = dram_intlv_sel(pvt, range);
1493

1494 1495
	edac_dbg(1, "(range %d) SystemAddr= 0x%llx Limit=0x%llx\n",
		 range, sys_addr, get_dram_limit(pvt, range));
1496

1497 1498 1499 1500 1501 1502 1503 1504
	if (dhar_valid(pvt) &&
	    dhar_base(pvt) <= sys_addr &&
	    sys_addr < BIT_64(32)) {
		amd64_warn("Huh? Address is in the MMIO hole: 0x%016llx\n",
			    sys_addr);
		return -EINVAL;
	}

1505
	if (intlv_en && (intlv_sel != ((sys_addr >> 12) & intlv_en)))
1506 1507
		return -EINVAL;

1508
	sys_addr = f1x_swap_interleaved_region(pvt, sys_addr);
1509

1510 1511 1512 1513 1514 1515 1516 1517 1518
	dct_sel_base = dct_sel_baseaddr(pvt);

	/*
	 * check whether addresses >= DctSelBaseAddr[47:27] are to be used to
	 * select between DCT0 and DCT1.
	 */
	if (dct_high_range_enabled(pvt) &&
	   !dct_ganging_enabled(pvt) &&
	   ((sys_addr >> 27) >= (dct_sel_base >> 11)))
1519
		high_range = true;
1520

1521
	channel = f1x_determine_channel(pvt, sys_addr, high_range, intlv_en);
1522

1523
	chan_addr = f1x_get_norm_dct_addr(pvt, range, sys_addr,
1524
					  high_range, dct_sel_base);
1525

1526 1527 1528 1529
	/* Remove node interleaving, see F1x120 */
	if (intlv_en)
		chan_addr = ((chan_addr >> (12 + hweight8(intlv_en))) << 12) |
			    (chan_addr & 0xfff);
1530

1531
	/* remove channel interleave */
1532 1533 1534
	if (dct_interleave_enabled(pvt) &&
	   !dct_high_range_enabled(pvt) &&
	   !dct_ganging_enabled(pvt)) {
1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548

		if (dct_sel_interleave_addr(pvt) != 1) {
			if (dct_sel_interleave_addr(pvt) == 0x3)
				/* hash 9 */
				chan_addr = ((chan_addr >> 10) << 9) |
					     (chan_addr & 0x1ff);
			else
				/* A[6] or hash 6 */
				chan_addr = ((chan_addr >> 7) << 6) |
					     (chan_addr & 0x3f);
		} else
			/* A[12] */
			chan_addr = ((chan_addr >> 13) << 12) |
				     (chan_addr & 0xfff);
1549 1550
	}

1551
	edac_dbg(1, "   Normalized DCT addr: 0x%llx\n", chan_addr);
1552

1553
	cs_found = f1x_lookup_addr_in_dct(chan_addr, node_id, channel);
1554 1555 1556 1557 1558 1559 1560 1561

	if (cs_found >= 0) {
		*nid = node_id;
		*chan_sel = channel;
	}
	return cs_found;
}

1562
static int f1x_translate_sysaddr_to_cs(struct amd64_pvt *pvt, u64 sys_addr,
1563 1564
				       int *node, int *chan_sel)
{
1565 1566
	int cs_found = -EINVAL;
	unsigned range;
1567

1568
	for (range = 0; range < DRAM_RANGES; range++) {
1569

1570
		if (!dram_rw(pvt, range))
1571 1572
			continue;

1573 1574
		if ((get_dram_base(pvt, range)  <= sys_addr) &&
		    (get_dram_limit(pvt, range) >= sys_addr)) {
1575

1576
			cs_found = f1x_match_to_this_node(pvt, range,
1577 1578 1579 1580 1581 1582 1583 1584 1585 1586
							  sys_addr, node,
							  chan_sel);
			if (cs_found >= 0)
				break;
		}
	}
	return cs_found;
}

/*
1587 1588
 * For reference see "2.8.5 Routing DRAM Requests" in F10 BKDG. This code maps
 * a @sys_addr to NodeID, DCT (channel) and chip select (CSROW).
1589
 *
1590 1591
 * The @sys_addr is usually an error address received from the hardware
 * (MCX_ADDR).
1592
 */
1593
static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
1594
				     u16 syndrome)
1595 1596 1597 1598 1599
{
	struct amd64_pvt *pvt = mci->pvt_info;
	u32 page, offset;
	int nid, csrow, chan = 0;

1600 1601
	error_address_to_page_and_offset(sys_addr, &page, &offset);

1602
	csrow = f1x_translate_sysaddr_to_cs(pvt, sys_addr, &nid, &chan);
1603

1604
	if (csrow < 0) {
1605
		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
1606 1607 1608
				     page, offset, syndrome,
				     -1, -1, -1,
				     "failed to map error addr to a csrow",
1609
				     "");
1610 1611 1612 1613 1614 1615 1616 1617
		return;
	}

	/*
	 * We need the syndromes for channel detection only when we're
	 * ganged. Otherwise @chan should already contain the channel at
	 * this point.
	 */
1618
	if (dct_ganging_enabled(pvt))
1619
		chan = get_channel_from_ecc_syndrome(mci, syndrome);
1620

1621
	edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
1622 1623
			     page, offset, syndrome,
			     csrow, chan, -1,
1624
			     "", "");
1625 1626 1627
}

/*
1628
 * debug routine to display the memory sizes of all logical DIMMs and its
1629
 * CSROWs
1630
 */
1631
static void amd64_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl)
1632
{
1633
	int dimm, size0, size1, factor = 0;
1634 1635
	u32 *dcsb = ctrl ? pvt->csels[1].csbases : pvt->csels[0].csbases;
	u32 dbam  = ctrl ? pvt->dbam1 : pvt->dbam0;
1636

1637
	if (boot_cpu_data.x86 == 0xf) {
1638
		if (pvt->dclr0 & WIDTH_128)
1639 1640
			factor = 1;

1641
		/* K8 families < revF not supported yet */
1642
	       if (pvt->ext_model < K8_REV_F)
1643 1644 1645 1646 1647
			return;
	       else
		       WARN_ON(ctrl != 0);
	}

1648
	dbam = (ctrl && !dct_ganging_enabled(pvt)) ? pvt->dbam1 : pvt->dbam0;
1649 1650
	dcsb = (ctrl && !dct_ganging_enabled(pvt)) ? pvt->csels[1].csbases
						   : pvt->csels[0].csbases;
1651

1652 1653
	edac_dbg(1, "F2x%d80 (DRAM Bank Address Mapping): 0x%08x\n",
		 ctrl, dbam);
1654

1655 1656
	edac_printk(KERN_DEBUG, EDAC_MC, "DCT%d chip selects:\n", ctrl);

1657 1658 1659 1660
	/* Dump memory sizes for DIMM and its CSROWs */
	for (dimm = 0; dimm < 4; dimm++) {

		size0 = 0;
1661
		if (dcsb[dimm*2] & DCSB_CS_ENABLE)
1662 1663
			size0 = pvt->ops->dbam_to_cs(pvt, ctrl,
						     DBAM_DIMM(dimm, dbam));
1664 1665

		size1 = 0;
1666
		if (dcsb[dimm*2 + 1] & DCSB_CS_ENABLE)
1667 1668
			size1 = pvt->ops->dbam_to_cs(pvt, ctrl,
						     DBAM_DIMM(dimm, dbam));
1669

1670 1671 1672
		amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n",
				dimm * 2,     size0 << factor,
				dimm * 2 + 1, size1 << factor);
1673 1674 1675
	}
}

1676 1677
static struct amd64_family_type amd64_family_types[] = {
	[K8_CPUS] = {
1678
		.ctl_name = "K8",
1679 1680
		.f1_id = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP,
		.f3_id = PCI_DEVICE_ID_AMD_K8_NB_MISC,
1681
		.ops = {
1682 1683 1684
			.early_channel_count	= k8_early_channel_count,
			.map_sysaddr_to_csrow	= k8_map_sysaddr_to_csrow,
			.dbam_to_cs		= k8_dbam_to_chip_select,
1685
			.read_dct_pci_cfg	= k8_read_dct_pci_cfg,
1686 1687 1688
		}
	},
	[F10_CPUS] = {
1689
		.ctl_name = "F10h",
1690 1691
		.f1_id = PCI_DEVICE_ID_AMD_10H_NB_MAP,
		.f3_id = PCI_DEVICE_ID_AMD_10H_NB_MISC,
1692
		.ops = {
1693
			.early_channel_count	= f1x_early_channel_count,
1694
			.map_sysaddr_to_csrow	= f1x_map_sysaddr_to_csrow,
1695
			.dbam_to_cs		= f10_dbam_to_chip_select,
1696 1697 1698 1699 1700
			.read_dct_pci_cfg	= f10_read_dct_pci_cfg,
		}
	},
	[F15_CPUS] = {
		.ctl_name = "F15h",
1701 1702
		.f1_id = PCI_DEVICE_ID_AMD_15H_NB_F1,
		.f3_id = PCI_DEVICE_ID_AMD_15H_NB_F3,
1703
		.ops = {
1704
			.early_channel_count	= f1x_early_channel_count,
1705
			.map_sysaddr_to_csrow	= f1x_map_sysaddr_to_csrow,
1706
			.dbam_to_cs		= f15_dbam_to_chip_select,
1707
			.read_dct_pci_cfg	= f15_read_dct_pci_cfg,
1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728
		}
	},
};

static struct pci_dev *pci_get_related_function(unsigned int vendor,
						unsigned int device,
						struct pci_dev *related)
{
	struct pci_dev *dev = NULL;

	dev = pci_get_device(vendor, device, dev);
	while (dev) {
		if ((dev->bus->number == related->bus->number) &&
		    (PCI_SLOT(dev->devfn) == PCI_SLOT(related->devfn)))
			break;
		dev = pci_get_device(vendor, device, dev);
	}

	return dev;
}

1729
/*
1730 1731 1732
 * These are tables of eigenvectors (one per line) which can be used for the
 * construction of the syndrome tables. The modified syndrome search algorithm
 * uses those to find the symbol in error and thus the DIMM.
1733
 *
1734
 * Algorithm courtesy of Ross LaFetra from AMD.
1735
 */
1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772
static u16 x4_vectors[] = {
	0x2f57, 0x1afe, 0x66cc, 0xdd88,
	0x11eb, 0x3396, 0x7f4c, 0xeac8,
	0x0001, 0x0002, 0x0004, 0x0008,
	0x1013, 0x3032, 0x4044, 0x8088,
	0x106b, 0x30d6, 0x70fc, 0xe0a8,
	0x4857, 0xc4fe, 0x13cc, 0x3288,
	0x1ac5, 0x2f4a, 0x5394, 0xa1e8,
	0x1f39, 0x251e, 0xbd6c, 0x6bd8,
	0x15c1, 0x2a42, 0x89ac, 0x4758,
	0x2b03, 0x1602, 0x4f0c, 0xca08,
	0x1f07, 0x3a0e, 0x6b04, 0xbd08,
	0x8ba7, 0x465e, 0x244c, 0x1cc8,
	0x2b87, 0x164e, 0x642c, 0xdc18,
	0x40b9, 0x80de, 0x1094, 0x20e8,
	0x27db, 0x1eb6, 0x9dac, 0x7b58,
	0x11c1, 0x2242, 0x84ac, 0x4c58,
	0x1be5, 0x2d7a, 0x5e34, 0xa718,
	0x4b39, 0x8d1e, 0x14b4, 0x28d8,
	0x4c97, 0xc87e, 0x11fc, 0x33a8,
	0x8e97, 0x497e, 0x2ffc, 0x1aa8,
	0x16b3, 0x3d62, 0x4f34, 0x8518,
	0x1e2f, 0x391a, 0x5cac, 0xf858,
	0x1d9f, 0x3b7a, 0x572c, 0xfe18,
	0x15f5, 0x2a5a, 0x5264, 0xa3b8,
	0x1dbb, 0x3b66, 0x715c, 0xe3f8,
	0x4397, 0xc27e, 0x17fc, 0x3ea8,
	0x1617, 0x3d3e, 0x6464, 0xb8b8,
	0x23ff, 0x12aa, 0xab6c, 0x56d8,
	0x2dfb, 0x1ba6, 0x913c, 0x7328,
	0x185d, 0x2ca6, 0x7914, 0x9e28,
	0x171b, 0x3e36, 0x7d7c, 0xebe8,
	0x4199, 0x82ee, 0x19f4, 0x2e58,
	0x4807, 0xc40e, 0x130c, 0x3208,
	0x1905, 0x2e0a, 0x5804, 0xac08,
	0x213f, 0x132a, 0xadfc, 0x5ba8,
	0x19a9, 0x2efe, 0xb5cc, 0x6f88,
1773 1774
};

1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796
static u16 x8_vectors[] = {
	0x0145, 0x028a, 0x2374, 0x43c8, 0xa1f0, 0x0520, 0x0a40, 0x1480,
	0x0211, 0x0422, 0x0844, 0x1088, 0x01b0, 0x44e0, 0x23c0, 0xed80,
	0x1011, 0x0116, 0x022c, 0x0458, 0x08b0, 0x8c60, 0x2740, 0x4e80,
	0x0411, 0x0822, 0x1044, 0x0158, 0x02b0, 0x2360, 0x46c0, 0xab80,
	0x0811, 0x1022, 0x012c, 0x0258, 0x04b0, 0x4660, 0x8cc0, 0x2780,
	0x2071, 0x40e2, 0xa0c4, 0x0108, 0x0210, 0x0420, 0x0840, 0x1080,
	0x4071, 0x80e2, 0x0104, 0x0208, 0x0410, 0x0820, 0x1040, 0x2080,
	0x8071, 0x0102, 0x0204, 0x0408, 0x0810, 0x1020, 0x2040, 0x4080,
	0x019d, 0x03d6, 0x136c, 0x2198, 0x50b0, 0xb2e0, 0x0740, 0x0e80,
	0x0189, 0x03ea, 0x072c, 0x0e58, 0x1cb0, 0x56e0, 0x37c0, 0xf580,
	0x01fd, 0x0376, 0x06ec, 0x0bb8, 0x1110, 0x2220, 0x4440, 0x8880,
	0x0163, 0x02c6, 0x1104, 0x0758, 0x0eb0, 0x2be0, 0x6140, 0xc280,
	0x02fd, 0x01c6, 0x0b5c, 0x1108, 0x07b0, 0x25a0, 0x8840, 0x6180,
	0x0801, 0x012e, 0x025c, 0x04b8, 0x1370, 0x26e0, 0x57c0, 0xb580,
	0x0401, 0x0802, 0x015c, 0x02b8, 0x22b0, 0x13e0, 0x7140, 0xe280,
	0x0201, 0x0402, 0x0804, 0x01b8, 0x11b0, 0x31a0, 0x8040, 0x7180,
	0x0101, 0x0202, 0x0404, 0x0808, 0x1010, 0x2020, 0x4040, 0x8080,
	0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080,
	0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000, 0x8000,
};

1797 1798
static int decode_syndrome(u16 syndrome, u16 *vectors, unsigned num_vecs,
			   unsigned v_dim)
1799
{
1800 1801 1802 1803
	unsigned int i, err_sym;

	for (err_sym = 0; err_sym < num_vecs / v_dim; err_sym++) {
		u16 s = syndrome;
1804 1805
		unsigned v_idx =  err_sym * v_dim;
		unsigned v_end = (err_sym + 1) * v_dim;
1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817

		/* walk over all 16 bits of the syndrome */
		for (i = 1; i < (1U << 16); i <<= 1) {

			/* if bit is set in that eigenvector... */
			if (v_idx < v_end && vectors[v_idx] & i) {
				u16 ev_comp = vectors[v_idx++];

				/* ... and bit set in the modified syndrome, */
				if (s & i) {
					/* remove it. */
					s ^= ev_comp;
1818

1819 1820 1821
					if (!s)
						return err_sym;
				}
1822

1823 1824 1825 1826
			} else if (s & i)
				/* can't get to zero, move to next symbol */
				break;
		}
1827 1828
	}

1829
	edac_dbg(0, "syndrome(%x) not found\n", syndrome);
1830 1831
	return -1;
}
1832

1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874
static int map_err_sym_to_channel(int err_sym, int sym_size)
{
	if (sym_size == 4)
		switch (err_sym) {
		case 0x20:
		case 0x21:
			return 0;
			break;
		case 0x22:
		case 0x23:
			return 1;
			break;
		default:
			return err_sym >> 4;
			break;
		}
	/* x8 symbols */
	else
		switch (err_sym) {
		/* imaginary bits not in a DIMM */
		case 0x10:
			WARN(1, KERN_ERR "Invalid error symbol: 0x%x\n",
					  err_sym);
			return -1;
			break;

		case 0x11:
			return 0;
			break;
		case 0x12:
			return 1;
			break;
		default:
			return err_sym >> 3;
			break;
		}
	return -1;
}

static int get_channel_from_ecc_syndrome(struct mem_ctl_info *mci, u16 syndrome)
{
	struct amd64_pvt *pvt = mci->pvt_info;
1875 1876
	int err_sym = -1;

1877
	if (pvt->ecc_sym_sz == 8)
1878 1879
		err_sym = decode_syndrome(syndrome, x8_vectors,
					  ARRAY_SIZE(x8_vectors),
1880 1881
					  pvt->ecc_sym_sz);
	else if (pvt->ecc_sym_sz == 4)
1882 1883
		err_sym = decode_syndrome(syndrome, x4_vectors,
					  ARRAY_SIZE(x4_vectors),
1884
					  pvt->ecc_sym_sz);
1885
	else {
1886
		amd64_warn("Illegal syndrome type: %u\n", pvt->ecc_sym_sz);
1887
		return err_sym;
1888
	}
1889

1890
	return map_err_sym_to_channel(err_sym, pvt->ecc_sym_sz);
1891 1892
}

1893 1894 1895 1896
/*
 * Handle any Correctable Errors (CEs) that have occurred. Check for valid ERROR
 * ADDRESS and process.
 */
1897
static void amd64_handle_ce(struct mem_ctl_info *mci, struct mce *m)
1898 1899
{
	struct amd64_pvt *pvt = mci->pvt_info;
1900
	u64 sys_addr;
1901
	u16 syndrome;
1902 1903

	/* Ensure that the Error Address is VALID */
1904
	if (!(m->status & MCI_STATUS_ADDRV)) {
1905
		amd64_mc_err(mci, "HW has no ERROR_ADDRESS available\n");
1906
		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
1907 1908 1909
				     0, 0, 0,
				     -1, -1, -1,
				     "HW has no ERROR_ADDRESS available",
1910
				     "");
1911 1912 1913
		return;
	}

1914
	sys_addr = get_error_address(m);
1915
	syndrome = extract_syndrome(m->status);
1916

1917
	amd64_mc_err(mci, "CE ERROR_ADDRESS= 0x%llx\n", sys_addr);
1918

1919
	pvt->ops->map_sysaddr_to_csrow(mci, sys_addr, syndrome);
1920 1921 1922
}

/* Handle any Un-correctable Errors (UEs) */
1923
static void amd64_handle_ue(struct mem_ctl_info *mci, struct mce *m)
1924
{
1925
	struct mem_ctl_info *log_mci, *src_mci = NULL;
1926
	int csrow;
1927
	u64 sys_addr;
1928 1929 1930 1931
	u32 page, offset;

	log_mci = mci;

1932
	if (!(m->status & MCI_STATUS_ADDRV)) {
1933
		amd64_mc_err(mci, "HW has no ERROR_ADDRESS available\n");
1934
		edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
1935 1936 1937
				     0, 0, 0,
				     -1, -1, -1,
				     "HW has no ERROR_ADDRESS available",
1938
				     "");
1939 1940 1941
		return;
	}

1942
	sys_addr = get_error_address(m);
1943
	error_address_to_page_and_offset(sys_addr, &page, &offset);
1944 1945 1946 1947 1948

	/*
	 * Find out which node the error address belongs to. This may be
	 * different from the node that detected the error.
	 */
1949
	src_mci = find_mc_by_sys_addr(mci, sys_addr);
1950
	if (!src_mci) {
1951 1952
		amd64_mc_err(mci, "ERROR ADDRESS (0x%lx) NOT mapped to a MC\n",
				  (unsigned long)sys_addr);
1953
		edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
1954 1955
				     page, offset, 0,
				     -1, -1, -1,
1956
				     "ERROR ADDRESS NOT mapped to a MC",
1957
				     "");
1958 1959 1960 1961 1962
		return;
	}

	log_mci = src_mci;

1963
	csrow = sys_addr_to_csrow(log_mci, sys_addr);
1964
	if (csrow < 0) {
1965 1966
		amd64_mc_err(mci, "ERROR_ADDRESS (0x%lx) NOT mapped to CS\n",
				  (unsigned long)sys_addr);
1967
		edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
1968 1969 1970
				     page, offset, 0,
				     -1, -1, -1,
				     "ERROR ADDRESS NOT mapped to CS",
1971
				     "");
1972
	} else {
1973
		edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
1974 1975
				     page, offset, 0,
				     csrow, -1, -1,
1976
				     "", "");
1977 1978 1979
	}
}

1980
static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci,
1981
					    struct mce *m)
1982
{
1983 1984 1985
	u16 ec = EC(m->status);
	u8 xec = XEC(m->status, 0x1f);
	u8 ecc_type = (m->status >> 45) & 0x3;
1986

1987
	/* Bail early out if this was an 'observed' error */
1988
	if (PP(ec) == NBSL_PP_OBS)
1989
		return;
1990

1991 1992
	/* Do only ECC errors */
	if (xec && xec != F10_NBSL_EXT_ERR_ECC)
1993 1994
		return;

1995
	if (ecc_type == 2)
1996
		amd64_handle_ce(mci, m);
1997
	else if (ecc_type == 1)
1998
		amd64_handle_ue(mci, m);
1999 2000
}

2001
void amd64_decode_bus_error(int node_id, struct mce *m)
2002
{
2003
	__amd64_decode_bus_error(mcis[node_id], m);
2004 2005
}

2006
/*
2007
 * Use pvt->F2 which contains the F2 CPU PCI device to get the related
2008
 * F1 (AddrMap) and F3 (Misc) devices. Return negative value on error.
2009
 */
2010
static int reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 f1_id, u16 f3_id)
2011 2012
{
	/* Reserve the ADDRESS MAP Device */
2013 2014
	pvt->F1 = pci_get_related_function(pvt->F2->vendor, f1_id, pvt->F2);
	if (!pvt->F1) {
2015 2016 2017
		amd64_err("error address map device not found: "
			  "vendor %x device 0x%x (broken BIOS?)\n",
			  PCI_VENDOR_ID_AMD, f1_id);
2018
		return -ENODEV;
2019 2020 2021
	}

	/* Reserve the MISC Device */
2022 2023 2024 2025
	pvt->F3 = pci_get_related_function(pvt->F2->vendor, f3_id, pvt->F2);
	if (!pvt->F3) {
		pci_dev_put(pvt->F1);
		pvt->F1 = NULL;
2026

2027 2028 2029
		amd64_err("error F3 device not found: "
			  "vendor %x device 0x%x (broken BIOS?)\n",
			  PCI_VENDOR_ID_AMD, f3_id);
2030

2031
		return -ENODEV;
2032
	}
2033 2034 2035
	edac_dbg(1, "F1: %s\n", pci_name(pvt->F1));
	edac_dbg(1, "F2: %s\n", pci_name(pvt->F2));
	edac_dbg(1, "F3: %s\n", pci_name(pvt->F3));
2036 2037 2038 2039

	return 0;
}

2040
static void free_mc_sibling_devs(struct amd64_pvt *pvt)
2041
{
2042 2043
	pci_dev_put(pvt->F1);
	pci_dev_put(pvt->F3);
2044 2045 2046 2047 2048 2049
}

/*
 * Retrieve the hardware registers of the memory controller (this includes the
 * 'Address Map' and 'Misc' device regs)
 */
2050
static void read_mc_regs(struct amd64_pvt *pvt)
2051
{
2052
	struct cpuinfo_x86 *c = &boot_cpu_data;
2053
	u64 msr_val;
2054
	u32 tmp;
2055
	unsigned range;
2056 2057 2058 2059 2060

	/*
	 * Retrieve TOP_MEM and TOP_MEM2; no masking off of reserved bits since
	 * those are Read-As-Zero
	 */
2061
	rdmsrl(MSR_K8_TOP_MEM1, pvt->top_mem);
2062
	edac_dbg(0, "  TOP_MEM:  0x%016llx\n", pvt->top_mem);
2063 2064 2065 2066

	/* check first whether TOP_MEM2 is enabled */
	rdmsrl(MSR_K8_SYSCFG, msr_val);
	if (msr_val & (1U << 21)) {
2067
		rdmsrl(MSR_K8_TOP_MEM2, pvt->top_mem2);
2068
		edac_dbg(0, "  TOP_MEM2: 0x%016llx\n", pvt->top_mem2);
2069
	} else
2070
		edac_dbg(0, "  TOP_MEM2 disabled\n");
2071

2072
	amd64_read_pci_cfg(pvt->F3, NBCAP, &pvt->nbcap);
2073

2074
	read_dram_ctl_register(pvt);
2075

2076 2077
	for (range = 0; range < DRAM_RANGES; range++) {
		u8 rw;
2078

2079 2080 2081 2082 2083 2084 2085
		/* read settings for this DRAM range */
		read_dram_base_limit_regs(pvt, range);

		rw = dram_rw(pvt, range);
		if (!rw)
			continue;

2086 2087 2088 2089
		edac_dbg(1, "  DRAM range[%d], base: 0x%016llx; limit: 0x%016llx\n",
			 range,
			 get_dram_base(pvt, range),
			 get_dram_limit(pvt, range));
2090

2091 2092 2093 2094 2095 2096
		edac_dbg(1, "   IntlvEn=%s; Range access: %s%s IntlvSel=%d DstNode=%d\n",
			 dram_intlv_en(pvt, range) ? "Enabled" : "Disabled",
			 (rw & 0x1) ? "R" : "-",
			 (rw & 0x2) ? "W" : "-",
			 dram_intlv_sel(pvt, range),
			 dram_dst_node(pvt, range));
2097 2098
	}

2099
	read_dct_base_mask(pvt);
2100

2101
	amd64_read_pci_cfg(pvt->F1, DHAR, &pvt->dhar);
2102
	amd64_read_dct_pci_cfg(pvt, DBAM0, &pvt->dbam0);
2103

2104
	amd64_read_pci_cfg(pvt->F3, F10_ONLINE_SPARE, &pvt->online_spare);
2105

2106 2107
	amd64_read_dct_pci_cfg(pvt, DCLR0, &pvt->dclr0);
	amd64_read_dct_pci_cfg(pvt, DCHR0, &pvt->dchr0);
2108

2109
	if (!dct_ganging_enabled(pvt)) {
2110 2111
		amd64_read_dct_pci_cfg(pvt, DCLR1, &pvt->dclr1);
		amd64_read_dct_pci_cfg(pvt, DCHR1, &pvt->dchr1);
2112
	}
2113

2114 2115 2116
	pvt->ecc_sym_sz = 4;

	if (c->x86 >= 0x10) {
2117
		amd64_read_pci_cfg(pvt->F3, EXT_NB_MCA_CFG, &tmp);
2118
		amd64_read_dct_pci_cfg(pvt, DBAM1, &pvt->dbam1);
2119

2120 2121 2122 2123
		/* F10h, revD and later can do x8 ECC too */
		if ((c->x86 > 0x10 || c->x86_model > 7) && tmp & BIT(25))
			pvt->ecc_sym_sz = 8;
	}
2124
	dump_misc_regs(pvt);
2125 2126 2127 2128 2129 2130
}

/*
 * NOTE: CPU Revision Dependent code
 *
 * Input:
2131
 *	@csrow_nr ChipSelect Row Number (0..NUM_CHIPSELECTS-1)
2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160
 *	k8 private pointer to -->
 *			DRAM Bank Address mapping register
 *			node_id
 *			DCL register where dual_channel_active is
 *
 * The DBAM register consists of 4 sets of 4 bits each definitions:
 *
 * Bits:	CSROWs
 * 0-3		CSROWs 0 and 1
 * 4-7		CSROWs 2 and 3
 * 8-11		CSROWs 4 and 5
 * 12-15	CSROWs 6 and 7
 *
 * Values range from: 0 to 15
 * The meaning of the values depends on CPU revision and dual-channel state,
 * see relevant BKDG more info.
 *
 * The memory controller provides for total of only 8 CSROWs in its current
 * architecture. Each "pair" of CSROWs normally represents just one DIMM in
 * single channel or two (2) DIMMs in dual channel mode.
 *
 * The following code logic collapses the various tables for CSROW based on CPU
 * revision.
 *
 * Returns:
 *	The number of PAGE_SIZE pages on the specified CSROW number it
 *	encompasses
 *
 */
2161
static u32 amd64_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr)
2162
{
2163
	u32 cs_mode, nr_pages;
2164
	u32 dbam = dct ? pvt->dbam1 : pvt->dbam0;
2165 2166 2167 2168 2169 2170 2171 2172

	/*
	 * The math on this doesn't look right on the surface because x/2*4 can
	 * be simplified to x*2 but this expression makes use of the fact that
	 * it is integral math where 1/2=0. This intermediate value becomes the
	 * number of bits to shift the DBAM register to extract the proper CSROW
	 * field.
	 */
2173
	cs_mode =  (dbam >> ((csrow_nr / 2) * 4)) & 0xF;
2174

2175
	nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode) << (20 - PAGE_SHIFT);
2176

2177 2178 2179
	edac_dbg(0, "  (csrow=%d) DBAM map index= %d\n", csrow_nr, cs_mode);
	edac_dbg(0, "    nr_pages/channel= %u  channel-count = %d\n",
		 nr_pages, pvt->channel_count);
2180 2181 2182 2183 2184 2185 2186 2187

	return nr_pages;
}

/*
 * Initialize the array of csrow attribute instances, based on the values
 * from pci config hardware registers.
 */
2188
static int init_csrows(struct mem_ctl_info *mci)
2189 2190
{
	struct csrow_info *csrow;
2191
	struct dimm_info *dimm;
2192
	struct amd64_pvt *pvt = mci->pvt_info;
2193
	u64 base, mask;
2194
	u32 val;
2195 2196 2197
	int i, j, empty = 1;
	enum mem_type mtype;
	enum edac_type edac_mode;
2198
	int nr_pages = 0;
2199

2200
	amd64_read_pci_cfg(pvt->F3, NBCFG, &val);
2201

2202
	pvt->nbcfg = val;
2203

2204 2205 2206
	edac_dbg(0, "node %d, NBCFG=0x%08x[ChipKillEccCap: %d|DramEccEn: %d]\n",
		 pvt->mc_node_id, val,
		 !!(val & NBCFG_CHIPKILL), !!(val & NBCFG_ECC_ENABLE));
2207

2208
	for_each_chip_select(i, 0, pvt) {
2209
		csrow = mci->csrows[i];
2210

2211
		if (!csrow_enabled(i, 0, pvt) && !csrow_enabled(i, 1, pvt)) {
2212 2213
			edac_dbg(1, "----CSROW %d VALID for MC node %d\n",
				 i, pvt->mc_node_id);
2214 2215 2216 2217
			continue;
		}

		empty = 0;
2218
		if (csrow_enabled(i, 0, pvt))
2219
			nr_pages = amd64_csrow_nr_pages(pvt, 0, i);
2220
		if (csrow_enabled(i, 1, pvt))
2221
			nr_pages += amd64_csrow_nr_pages(pvt, 1, i);
2222 2223

		get_cs_base_and_mask(pvt, i, 0, &base, &mask);
2224 2225
		/* 8 bytes of resolution */

2226
		mtype = amd64_determine_memory_type(pvt, i);
2227

2228 2229 2230
		edac_dbg(1, "  for MC node %d csrow %d:\n", pvt->mc_node_id, i);
		edac_dbg(1, "    nr_pages: %u\n",
			 nr_pages * pvt->channel_count);
2231 2232 2233 2234

		/*
		 * determine whether CHIPKILL or JUST ECC or NO ECC is operating
		 */
2235
		if (pvt->nbcfg & NBCFG_ECC_ENABLE)
2236 2237
			edac_mode = (pvt->nbcfg & NBCFG_CHIPKILL) ?
				    EDAC_S4ECD4ED : EDAC_SECDED;
2238
		else
2239 2240 2241
			edac_mode = EDAC_NONE;

		for (j = 0; j < pvt->channel_count; j++) {
2242 2243 2244 2245
			dimm = csrow->channels[j]->dimm;
			dimm->mtype = mtype;
			dimm->edac_mode = edac_mode;
			dimm->nr_pages = nr_pages;
2246
		}
2247 2248 2249 2250
	}

	return empty;
}
2251

2252
/* get all cores on this DCT */
2253
static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, unsigned nid)
2254 2255 2256 2257 2258 2259 2260 2261 2262
{
	int cpu;

	for_each_online_cpu(cpu)
		if (amd_get_nb_id(cpu) == nid)
			cpumask_set_cpu(cpu, mask);
}

/* check MCG_CTL on all the cpus on this node */
2263
static bool amd64_nb_mce_bank_enabled_on_node(unsigned nid)
2264 2265
{
	cpumask_var_t mask;
2266
	int cpu, nbe;
2267 2268 2269
	bool ret = false;

	if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) {
2270
		amd64_warn("%s: Error allocating mask\n", __func__);
2271 2272 2273 2274 2275 2276 2277 2278
		return false;
	}

	get_cpus_on_this_dct_cpumask(mask, nid);

	rdmsr_on_cpus(mask, MSR_IA32_MCG_CTL, msrs);

	for_each_cpu(cpu, mask) {
2279
		struct msr *reg = per_cpu_ptr(msrs, cpu);
2280
		nbe = reg->l & MSR_MCGCTL_NBE;
2281

2282 2283 2284
		edac_dbg(0, "core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n",
			 cpu, reg->q,
			 (nbe ? "enabled" : "disabled"));
2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295

		if (!nbe)
			goto out;
	}
	ret = true;

out:
	free_cpumask_var(mask);
	return ret;
}

2296
static int toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on)
2297 2298
{
	cpumask_var_t cmask;
2299
	int cpu;
2300 2301

	if (!zalloc_cpumask_var(&cmask, GFP_KERNEL)) {
2302
		amd64_warn("%s: error allocating mask\n", __func__);
2303 2304 2305
		return false;
	}

2306
	get_cpus_on_this_dct_cpumask(cmask, nid);
2307 2308 2309 2310 2311

	rdmsr_on_cpus(cmask, MSR_IA32_MCG_CTL, msrs);

	for_each_cpu(cpu, cmask) {

2312 2313
		struct msr *reg = per_cpu_ptr(msrs, cpu);

2314
		if (on) {
2315
			if (reg->l & MSR_MCGCTL_NBE)
2316
				s->flags.nb_mce_enable = 1;
2317

2318
			reg->l |= MSR_MCGCTL_NBE;
2319 2320
		} else {
			/*
2321
			 * Turn off NB MCE reporting only when it was off before
2322
			 */
2323
			if (!s->flags.nb_mce_enable)
2324
				reg->l &= ~MSR_MCGCTL_NBE;
2325 2326 2327 2328 2329 2330 2331 2332 2333
		}
	}
	wrmsr_on_cpus(cmask, MSR_IA32_MCG_CTL, msrs);

	free_cpumask_var(cmask);

	return 0;
}

2334 2335
static bool enable_ecc_error_reporting(struct ecc_settings *s, u8 nid,
				       struct pci_dev *F3)
2336
{
2337
	bool ret = true;
B
Borislav Petkov 已提交
2338
	u32 value, mask = 0x3;		/* UECC/CECC enable */
2339

2340 2341 2342 2343 2344
	if (toggle_ecc_err_reporting(s, nid, ON)) {
		amd64_warn("Error enabling ECC reporting over MCGCTL!\n");
		return false;
	}

B
Borislav Petkov 已提交
2345
	amd64_read_pci_cfg(F3, NBCTL, &value);
2346

2347 2348
	s->old_nbctl   = value & mask;
	s->nbctl_valid = true;
2349 2350

	value |= mask;
B
Borislav Petkov 已提交
2351
	amd64_write_pci_cfg(F3, NBCTL, value);
2352

2353
	amd64_read_pci_cfg(F3, NBCFG, &value);
2354

2355 2356
	edac_dbg(0, "1: node %d, NBCFG=0x%08x[DramEccEn: %d]\n",
		 nid, value, !!(value & NBCFG_ECC_ENABLE));
2357

2358
	if (!(value & NBCFG_ECC_ENABLE)) {
2359
		amd64_warn("DRAM ECC disabled on this node, enabling...\n");
2360

2361
		s->flags.nb_ecc_prev = 0;
2362

2363
		/* Attempt to turn on DRAM ECC Enable */
2364 2365
		value |= NBCFG_ECC_ENABLE;
		amd64_write_pci_cfg(F3, NBCFG, value);
2366

2367
		amd64_read_pci_cfg(F3, NBCFG, &value);
2368

2369
		if (!(value & NBCFG_ECC_ENABLE)) {
2370 2371
			amd64_warn("Hardware rejected DRAM ECC enable,"
				   "check memory DIMM configuration.\n");
2372
			ret = false;
2373
		} else {
2374
			amd64_info("Hardware accepted DRAM ECC Enable\n");
2375
		}
2376
	} else {
2377
		s->flags.nb_ecc_prev = 1;
2378
	}
2379

2380 2381
	edac_dbg(0, "2: node %d, NBCFG=0x%08x[DramEccEn: %d]\n",
		 nid, value, !!(value & NBCFG_ECC_ENABLE));
2382

2383
	return ret;
2384 2385
}

2386 2387
static void restore_ecc_error_reporting(struct ecc_settings *s, u8 nid,
					struct pci_dev *F3)
2388
{
B
Borislav Petkov 已提交
2389 2390
	u32 value, mask = 0x3;		/* UECC/CECC enable */

2391

2392
	if (!s->nbctl_valid)
2393 2394
		return;

B
Borislav Petkov 已提交
2395
	amd64_read_pci_cfg(F3, NBCTL, &value);
2396
	value &= ~mask;
2397
	value |= s->old_nbctl;
2398

B
Borislav Petkov 已提交
2399
	amd64_write_pci_cfg(F3, NBCTL, value);
2400

2401 2402
	/* restore previous BIOS DRAM ECC "off" setting we force-enabled */
	if (!s->flags.nb_ecc_prev) {
2403 2404 2405
		amd64_read_pci_cfg(F3, NBCFG, &value);
		value &= ~NBCFG_ECC_ENABLE;
		amd64_write_pci_cfg(F3, NBCFG, value);
2406 2407 2408
	}

	/* restore the NB Enable MCGCTL bit */
2409
	if (toggle_ecc_err_reporting(s, nid, OFF))
2410
		amd64_warn("Error restoring NB MCGCTL settings!\n");
2411 2412 2413
}

/*
2414 2415 2416 2417
 * EDAC requires that the BIOS have ECC enabled before
 * taking over the processing of ECC errors. A command line
 * option allows to force-enable hardware ECC later in
 * enable_ecc_error_reporting().
2418
 */
2419 2420 2421 2422 2423
static const char *ecc_msg =
	"ECC disabled in the BIOS or no ECC capability, module will not load.\n"
	" Either enable ECC checking or force module loading by setting "
	"'ecc_enable_override'.\n"
	" (Note that use of the override may cause unknown side effects.)\n";
2424

2425
static bool ecc_enabled(struct pci_dev *F3, u8 nid)
2426 2427
{
	u32 value;
2428
	u8 ecc_en = 0;
2429
	bool nb_mce_en = false;
2430

2431
	amd64_read_pci_cfg(F3, NBCFG, &value);
2432

2433
	ecc_en = !!(value & NBCFG_ECC_ENABLE);
2434
	amd64_info("DRAM ECC %s.\n", (ecc_en ? "enabled" : "disabled"));
2435

2436
	nb_mce_en = amd64_nb_mce_bank_enabled_on_node(nid);
2437
	if (!nb_mce_en)
2438 2439 2440
		amd64_notice("NB MCE bank disabled, set MSR "
			     "0x%08x[4] on node %d to enable.\n",
			     MSR_IA32_MCG_CTL, nid);
2441

2442 2443 2444 2445 2446
	if (!ecc_en || !nb_mce_en) {
		amd64_notice("%s", ecc_msg);
		return false;
	}
	return true;
2447 2448
}

2449
static int set_mc_sysfs_attrs(struct mem_ctl_info *mci)
2450
{
2451
	int rc;
2452

2453 2454 2455
	rc = amd64_create_sysfs_dbg_files(mci);
	if (rc < 0)
		return rc;
2456

2457 2458 2459 2460 2461 2462 2463 2464
	if (boot_cpu_data.x86 >= 0x10) {
		rc = amd64_create_sysfs_inject_files(mci);
		if (rc < 0)
			return rc;
	}

	return 0;
}
2465

2466 2467 2468
static void del_mc_sysfs_attrs(struct mem_ctl_info *mci)
{
	amd64_remove_sysfs_dbg_files(mci);
2469

2470 2471
	if (boot_cpu_data.x86 >= 0x10)
		amd64_remove_sysfs_inject_files(mci);
2472 2473
}

2474 2475
static void setup_mci_misc_attrs(struct mem_ctl_info *mci,
				 struct amd64_family_type *fam)
2476 2477 2478 2479 2480 2481
{
	struct amd64_pvt *pvt = mci->pvt_info;

	mci->mtype_cap		= MEM_FLAG_DDR2 | MEM_FLAG_RDDR2;
	mci->edac_ctl_cap	= EDAC_FLAG_NONE;

2482
	if (pvt->nbcap & NBCAP_SECDED)
2483 2484
		mci->edac_ctl_cap |= EDAC_FLAG_SECDED;

2485
	if (pvt->nbcap & NBCAP_CHIPKILL)
2486 2487 2488 2489 2490
		mci->edac_ctl_cap |= EDAC_FLAG_S4ECD4ED;

	mci->edac_cap		= amd64_determine_edac_cap(pvt);
	mci->mod_name		= EDAC_MOD_STR;
	mci->mod_ver		= EDAC_AMD64_VERSION;
2491
	mci->ctl_name		= fam->ctl_name;
2492
	mci->dev_name		= pci_name(pvt->F2);
2493 2494 2495 2496 2497 2498 2499
	mci->ctl_page_to_phys	= NULL;

	/* memory scrubber interface */
	mci->set_sdram_scrub_rate = amd64_set_scrub_rate;
	mci->get_sdram_scrub_rate = amd64_get_scrub_rate;
}

2500 2501 2502 2503
/*
 * returns a pointer to the family descriptor on success, NULL otherwise.
 */
static struct amd64_family_type *amd64_per_family_init(struct amd64_pvt *pvt)
2504
{
2505 2506 2507 2508
	u8 fam = boot_cpu_data.x86;
	struct amd64_family_type *fam_type = NULL;

	switch (fam) {
2509
	case 0xf:
2510
		fam_type		= &amd64_family_types[K8_CPUS];
2511
		pvt->ops		= &amd64_family_types[K8_CPUS].ops;
2512
		break;
2513

2514
	case 0x10:
2515
		fam_type		= &amd64_family_types[F10_CPUS];
2516
		pvt->ops		= &amd64_family_types[F10_CPUS].ops;
2517 2518 2519 2520 2521
		break;

	case 0x15:
		fam_type		= &amd64_family_types[F15_CPUS];
		pvt->ops		= &amd64_family_types[F15_CPUS].ops;
2522 2523 2524
		break;

	default:
2525
		amd64_err("Unsupported family!\n");
2526
		return NULL;
2527
	}
2528

2529 2530
	pvt->ext_model = boot_cpu_data.x86_model >> 4;

2531
	amd64_info("%s %sdetected (node %d).\n", fam_type->ctl_name,
2532
		     (fam == 0xf ?
2533 2534 2535
				(pvt->ext_model >= K8_REV_F  ? "revF or later "
							     : "revE or earlier ")
				 : ""), pvt->mc_node_id);
2536
	return fam_type;
2537 2538
}

2539
static int amd64_init_one_instance(struct pci_dev *F2)
2540 2541
{
	struct amd64_pvt *pvt = NULL;
2542
	struct amd64_family_type *fam_type = NULL;
2543
	struct mem_ctl_info *mci = NULL;
2544
	struct edac_mc_layer layers[2];
2545
	int err = 0, ret;
2546
	u8 nid = get_node_id(F2);
2547 2548 2549 2550

	ret = -ENOMEM;
	pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
	if (!pvt)
2551
		goto err_ret;
2552

2553
	pvt->mc_node_id	= nid;
2554
	pvt->F2 = F2;
2555

2556
	ret = -EINVAL;
2557 2558
	fam_type = amd64_per_family_init(pvt);
	if (!fam_type)
2559 2560
		goto err_free;

2561
	ret = -ENODEV;
2562
	err = reserve_mc_sibling_devs(pvt, fam_type->f1_id, fam_type->f3_id);
2563 2564 2565
	if (err)
		goto err_free;

2566
	read_mc_regs(pvt);
2567 2568 2569 2570

	/*
	 * We need to determine how many memory channels there are. Then use
	 * that information for calculating the size of the dynamic instance
2571
	 * tables in the 'mci' structure.
2572
	 */
2573
	ret = -EINVAL;
2574 2575
	pvt->channel_count = pvt->ops->early_channel_count(pvt);
	if (pvt->channel_count < 0)
2576
		goto err_siblings;
2577 2578

	ret = -ENOMEM;
2579 2580 2581 2582 2583 2584
	layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
	layers[0].size = pvt->csels[0].b_cnt;
	layers[0].is_virt_csrow = true;
	layers[1].type = EDAC_MC_LAYER_CHANNEL;
	layers[1].size = pvt->channel_count;
	layers[1].is_virt_csrow = false;
2585
	mci = edac_mc_alloc(nid, ARRAY_SIZE(layers), layers, 0);
2586
	if (!mci)
2587
		goto err_siblings;
2588 2589

	mci->pvt_info = pvt;
2590
	mci->pdev = &pvt->F2->dev;
2591

2592
	setup_mci_misc_attrs(mci, fam_type);
2593 2594

	if (init_csrows(mci))
2595 2596 2597 2598
		mci->edac_cap = EDAC_FLAG_NONE;

	ret = -ENODEV;
	if (edac_mc_add_mc(mci)) {
2599
		edac_dbg(1, "failed edac_mc_add_mc()\n");
2600 2601
		goto err_add_mc;
	}
2602
	if (set_mc_sysfs_attrs(mci)) {
2603
		edac_dbg(1, "failed edac_mc_add_mc()\n");
2604 2605
		goto err_add_sysfs;
	}
2606

2607 2608 2609 2610 2611 2612
	/* register stuff with EDAC MCE */
	if (report_gart_errors)
		amd_report_gart_errors(true);

	amd_register_ecc_decoder(amd64_decode_bus_error);

2613 2614 2615 2616
	mcis[nid] = mci;

	atomic_inc(&drv_instances);

2617 2618
	return 0;

2619 2620
err_add_sysfs:
	edac_mc_del_mc(mci->pdev);
2621 2622 2623
err_add_mc:
	edac_mc_free(mci);

2624 2625
err_siblings:
	free_mc_sibling_devs(pvt);
2626

2627 2628
err_free:
	kfree(pvt);
2629

2630
err_ret:
2631 2632 2633
	return ret;
}

2634
static int __devinit amd64_probe_one_instance(struct pci_dev *pdev,
2635
					     const struct pci_device_id *mc_type)
2636
{
2637
	u8 nid = get_node_id(pdev);
2638
	struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
2639
	struct ecc_settings *s;
2640
	int ret = 0;
2641 2642

	ret = pci_enable_device(pdev);
2643
	if (ret < 0) {
2644
		edac_dbg(0, "ret=%d\n", ret);
2645 2646
		return -EIO;
	}
2647

2648 2649 2650
	ret = -ENOMEM;
	s = kzalloc(sizeof(struct ecc_settings), GFP_KERNEL);
	if (!s)
2651
		goto err_out;
2652 2653 2654

	ecc_stngs[nid] = s;

2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667
	if (!ecc_enabled(F3, nid)) {
		ret = -ENODEV;

		if (!ecc_enable_override)
			goto err_enable;

		amd64_warn("Forcing ECC on!\n");

		if (!enable_ecc_error_reporting(s, nid, F3))
			goto err_enable;
	}

	ret = amd64_init_one_instance(pdev);
2668
	if (ret < 0) {
2669
		amd64_err("Error probing instance: %d\n", nid);
2670 2671
		restore_ecc_error_reporting(s, nid, F3);
	}
2672 2673

	return ret;
2674 2675 2676 2677 2678 2679 2680

err_enable:
	kfree(s);
	ecc_stngs[nid] = NULL;

err_out:
	return ret;
2681 2682 2683 2684 2685 2686
}

static void __devexit amd64_remove_one_instance(struct pci_dev *pdev)
{
	struct mem_ctl_info *mci;
	struct amd64_pvt *pvt;
2687 2688 2689
	u8 nid = get_node_id(pdev);
	struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
	struct ecc_settings *s = ecc_stngs[nid];
2690

2691 2692
	mci = find_mci_by_dev(&pdev->dev);
	del_mc_sysfs_attrs(mci);
2693 2694 2695 2696 2697 2698 2699
	/* Remove from EDAC CORE tracking list */
	mci = edac_mc_del_mc(&pdev->dev);
	if (!mci)
		return;

	pvt = mci->pvt_info;

2700
	restore_ecc_error_reporting(s, nid, F3);
2701

2702
	free_mc_sibling_devs(pvt);
2703

2704 2705 2706 2707
	/* unregister from EDAC MCE */
	amd_report_gart_errors(false);
	amd_unregister_ecc_decoder(amd64_decode_bus_error);

2708 2709
	kfree(ecc_stngs[nid]);
	ecc_stngs[nid] = NULL;
2710

2711
	/* Free the EDAC CORE resources */
2712
	mci->pvt_info = NULL;
2713
	mcis[nid] = NULL;
2714 2715

	kfree(pvt);
2716 2717 2718 2719 2720 2721 2722 2723
	edac_mc_free(mci);
}

/*
 * This table is part of the interface for loading drivers for PCI devices. The
 * PCI core identifies what devices are on a system during boot, and then
 * inquiry this table to see if this driver is for a given device found.
 */
2724
static DEFINE_PCI_DEVICE_TABLE(amd64_pci_table) = {
2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740
	{
		.vendor		= PCI_VENDOR_ID_AMD,
		.device		= PCI_DEVICE_ID_AMD_K8_NB_MEMCTL,
		.subvendor	= PCI_ANY_ID,
		.subdevice	= PCI_ANY_ID,
		.class		= 0,
		.class_mask	= 0,
	},
	{
		.vendor		= PCI_VENDOR_ID_AMD,
		.device		= PCI_DEVICE_ID_AMD_10H_NB_DRAM,
		.subvendor	= PCI_ANY_ID,
		.subdevice	= PCI_ANY_ID,
		.class		= 0,
		.class_mask	= 0,
	},
2741 2742 2743 2744 2745 2746 2747 2748 2749
	{
		.vendor		= PCI_VENDOR_ID_AMD,
		.device		= PCI_DEVICE_ID_AMD_15H_NB_F2,
		.subvendor	= PCI_ANY_ID,
		.subdevice	= PCI_ANY_ID,
		.class		= 0,
		.class_mask	= 0,
	},

2750 2751 2752 2753 2754 2755
	{0, }
};
MODULE_DEVICE_TABLE(pci, amd64_pci_table);

static struct pci_driver amd64_pci_driver = {
	.name		= EDAC_MOD_STR,
2756
	.probe		= amd64_probe_one_instance,
2757 2758 2759 2760
	.remove		= __devexit_p(amd64_remove_one_instance),
	.id_table	= amd64_pci_table,
};

2761
static void setup_pci_device(void)
2762 2763 2764 2765 2766 2767 2768
{
	struct mem_ctl_info *mci;
	struct amd64_pvt *pvt;

	if (amd64_ctl_pci)
		return;

2769
	mci = mcis[0];
2770 2771 2772 2773
	if (mci) {

		pvt = mci->pvt_info;
		amd64_ctl_pci =
2774
			edac_pci_create_generic_ctl(&pvt->F2->dev, EDAC_MOD_STR);
2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787

		if (!amd64_ctl_pci) {
			pr_warning("%s(): Unable to create PCI control\n",
				   __func__);

			pr_warning("%s(): PCI error report via EDAC not set\n",
				   __func__);
			}
	}
}

static int __init amd64_edac_init(void)
{
2788
	int err = -ENODEV;
2789

2790
	printk(KERN_INFO "AMD64 EDAC driver v%s\n", EDAC_AMD64_VERSION);
2791 2792 2793

	opstate_init();

2794
	if (amd_cache_northbridges() < 0)
2795
		goto err_ret;
2796

2797
	err = -ENOMEM;
2798 2799
	mcis	  = kzalloc(amd_nb_num() * sizeof(mcis[0]), GFP_KERNEL);
	ecc_stngs = kzalloc(amd_nb_num() * sizeof(ecc_stngs[0]), GFP_KERNEL);
2800
	if (!(mcis && ecc_stngs))
2801
		goto err_free;
2802

2803
	msrs = msrs_alloc();
2804
	if (!msrs)
2805
		goto err_free;
2806

2807 2808
	err = pci_register_driver(&amd64_pci_driver);
	if (err)
2809
		goto err_pci;
2810

2811
	err = -ENODEV;
2812 2813
	if (!atomic_read(&drv_instances))
		goto err_no_instances;
2814

2815 2816
	setup_pci_device();
	return 0;
2817

2818
err_no_instances:
2819
	pci_unregister_driver(&amd64_pci_driver);
2820

2821 2822 2823
err_pci:
	msrs_free(msrs);
	msrs = NULL;
2824

2825 2826 2827 2828 2829 2830 2831
err_free:
	kfree(mcis);
	mcis = NULL;

	kfree(ecc_stngs);
	ecc_stngs = NULL;

2832
err_ret:
2833 2834 2835 2836 2837 2838 2839 2840 2841
	return err;
}

static void __exit amd64_edac_exit(void)
{
	if (amd64_ctl_pci)
		edac_pci_release_generic_ctl(amd64_ctl_pci);

	pci_unregister_driver(&amd64_pci_driver);
2842

2843 2844 2845
	kfree(ecc_stngs);
	ecc_stngs = NULL;

2846 2847 2848
	kfree(mcis);
	mcis = NULL;

2849 2850
	msrs_free(msrs);
	msrs = NULL;
2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863
}

module_init(amd64_edac_init);
module_exit(amd64_edac_exit);

MODULE_LICENSE("GPL");
MODULE_AUTHOR("SoftwareBitMaker: Doug Thompson, "
		"Dave Peterson, Thayne Harbaugh");
MODULE_DESCRIPTION("MC support for AMD64 memory controllers - "
		EDAC_AMD64_VERSION);

module_param(edac_op_state, int, 0444);
MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");