mce_amd.c 19.8 KB
Newer Older
1
/*
2
 *  (c) 2005-2012 Advanced Micro Devices, Inc.
3 4 5 6 7 8
 *  Your use of this code is subject to the terms and conditions of the
 *  GNU general public license version 2. See "COPYING" or
 *  http://www.gnu.org/licenses/gpl.html
 *
 *  Written by Jacob Shin - AMD, Inc.
 *
9
 *  Maintained by: Borislav Petkov <bp@alien8.de>
10
 *
11 12
 *  April 2006
 *     - added support for AMD Family 0x10 processors
13 14
 *  May 2012
 *     - major scrubbing
15 16
 *  May 2015
 *     - add support for deferred error interrupts (Aravind Gopalakrishnan)
17
 *
18
 *  All MC4_MISCi registers are shared between multi-cores
19 20 21
 */
#include <linux/interrupt.h>
#include <linux/notifier.h>
I
Ingo Molnar 已提交
22
#include <linux/kobject.h>
23
#include <linux/percpu.h>
I
Ingo Molnar 已提交
24 25
#include <linux/errno.h>
#include <linux/sched.h>
26
#include <linux/sysfs.h>
27
#include <linux/slab.h>
I
Ingo Molnar 已提交
28 29 30 31
#include <linux/init.h>
#include <linux/cpu.h>
#include <linux/smp.h>

32
#include <asm/amd_nb.h>
33
#include <asm/apic.h>
I
Ingo Molnar 已提交
34
#include <asm/idle.h>
35 36
#include <asm/mce.h>
#include <asm/msr.h>
37
#include <asm/trace/irq_vectors.h>
38

J
Jacob Shin 已提交
39 40 41 42
#define NR_BLOCKS         9
#define THRESHOLD_MAX     0xFFF
#define INT_TYPE_APIC     0x00020000
#define MASK_VALID_HI     0x80000000
43 44
#define MASK_CNTP_HI      0x40000000
#define MASK_LOCKED_HI    0x20000000
J
Jacob Shin 已提交
45 46 47 48
#define MASK_LVTOFF_HI    0x00F00000
#define MASK_COUNT_EN_HI  0x00080000
#define MASK_INT_TYPE_HI  0x00060000
#define MASK_OVERFLOW_HI  0x00010000
49
#define MASK_ERR_COUNT_HI 0x00000FFF
50 51
#define MASK_BLKPTR_LO    0xFF000000
#define MCG_XBLK_ADDR     0xC0000400
52

53 54 55 56 57 58 59
/* Deferred error settings */
#define MSR_CU_DEF_ERR		0xC0000410
#define MASK_DEF_LVTOFF		0x000000F0
#define MASK_DEF_INT_TYPE	0x00000006
#define DEF_LVT_OFF		0x2
#define DEF_INT_TYPE_APIC	0x2

60 61 62 63 64 65 66 67 68
static const char * const th_names[] = {
	"load_store",
	"insn_fetch",
	"combined_unit",
	"",
	"northbridge",
	"execution_unit",
};

69
static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
70 71
static DEFINE_PER_CPU(unsigned char, bank_map);	/* see which banks are on */

72
static void amd_threshold_interrupt(void);
73 74 75 76 77 78 79
static void amd_deferred_error_interrupt(void);

static void default_deferred_error_interrupt(void)
{
	pr_err("Unexpected deferred interrupt at vector %x\n", DEFERRED_ERROR_VECTOR);
}
void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt;
80

81 82 83 84
/*
 * CPU Initialization
 */

85
struct thresh_restart {
I
Ingo Molnar 已提交
86 87
	struct threshold_block	*b;
	int			reset;
88 89
	int			set_lvt_off;
	int			lvt_off;
I
Ingo Molnar 已提交
90
	u16			old_limit;
91 92
};

93 94 95 96 97 98
static inline bool is_shared_bank(int bank)
{
	/* Bank 4 is for northbridge reporting and is thus shared */
	return (bank == 4);
}

99
static const char *bank4_names(const struct threshold_block *b)
100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118
{
	switch (b->address) {
	/* MSR4_MISC0 */
	case 0x00000413:
		return "dram";

	case 0xc0000408:
		return "ht_links";

	case 0xc0000409:
		return "l3_cache";

	default:
		WARN(1, "Funny MSR: 0x%08x\n", b->address);
		return "";
	}
};


119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
static bool lvt_interrupt_supported(unsigned int bank, u32 msr_high_bits)
{
	/*
	 * bank 4 supports APIC LVT interrupts implicitly since forever.
	 */
	if (bank == 4)
		return true;

	/*
	 * IntP: interrupt present; if this bit is set, the thresholding
	 * bank can generate APIC LVT interrupts
	 */
	return msr_high_bits & BIT(28);
}

134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
{
	int msr = (hi & MASK_LVTOFF_HI) >> 20;

	if (apic < 0) {
		pr_err(FW_BUG "cpu %d, failed to setup threshold interrupt "
		       "for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu,
		       b->bank, b->block, b->address, hi, lo);
		return 0;
	}

	if (apic != msr) {
		pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d "
		       "for bank %d, block %d (MSR%08X=0x%x%08x)\n",
		       b->cpu, apic, b->bank, b->block, b->address, hi, lo);
		return 0;
	}

	return 1;
};

155 156 157 158
/*
 * Called via smp_call_function_single(), must be called with correct
 * cpu affinity.
 */
159
static void threshold_restart_bank(void *_tr)
160
{
161
	struct thresh_restart *tr = _tr;
162
	u32 hi, lo;
163

164
	rdmsr(tr->b->address, lo, hi);
165

166
	if (tr->b->threshold_limit < (hi & THRESHOLD_MAX))
167
		tr->reset = 1;	/* limit cannot be lower than err count */
168

169
	if (tr->reset) {		/* reset err count and overflow bit */
170 171
		hi =
		    (hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
172 173
		    (THRESHOLD_MAX - tr->b->threshold_limit);
	} else if (tr->old_limit) {	/* change limit w/o reset */
174
		int new_count = (hi & THRESHOLD_MAX) +
175
		    (tr->old_limit - tr->b->threshold_limit);
I
Ingo Molnar 已提交
176

177
		hi = (hi & ~MASK_ERR_COUNT_HI) |
178 179 180
		    (new_count & THRESHOLD_MAX);
	}

181 182 183 184 185 186
	/* clear IntType */
	hi &= ~MASK_INT_TYPE_HI;

	if (!tr->b->interrupt_capable)
		goto done;

187
	if (tr->set_lvt_off) {
188 189 190 191 192
		if (lvt_off_valid(tr->b, tr->lvt_off, lo, hi)) {
			/* set new lvt offset */
			hi &= ~MASK_LVTOFF_HI;
			hi |= tr->lvt_off << 20;
		}
193 194
	}

195 196 197 198
	if (tr->b->interrupt_enable)
		hi |= INT_TYPE_APIC;

 done:
199

200 201
	hi |= MASK_COUNT_EN_HI;
	wrmsr(tr->b->address, lo, hi);
202 203
}

204 205 206 207 208 209 210 211 212 213 214 215
static void mce_threshold_block_init(struct threshold_block *b, int offset)
{
	struct thresh_restart tr = {
		.b			= b,
		.set_lvt_off		= 1,
		.lvt_off		= offset,
	};

	b->threshold_limit		= THRESHOLD_MAX;
	threshold_restart_bank(&tr);
};

216
static int setup_APIC_mce_threshold(int reserved, int new)
217 218 219 220 221 222 223 224
{
	if (reserved < 0 && !setup_APIC_eilvt(new, THRESHOLD_APIC_VECTOR,
					      APIC_EILVT_MSG_FIX, 0))
		return new;

	return reserved;
}

225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257
static int setup_APIC_deferred_error(int reserved, int new)
{
	if (reserved < 0 && !setup_APIC_eilvt(new, DEFERRED_ERROR_VECTOR,
					      APIC_EILVT_MSG_FIX, 0))
		return new;

	return reserved;
}

static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
{
	u32 low = 0, high = 0;
	int def_offset = -1, def_new;

	if (rdmsr_safe(MSR_CU_DEF_ERR, &low, &high))
		return;

	def_new = (low & MASK_DEF_LVTOFF) >> 4;
	if (!(low & MASK_DEF_LVTOFF)) {
		pr_err(FW_BUG "Your BIOS is not setting up LVT offset 0x2 for deferred error IRQs correctly.\n");
		def_new = DEF_LVT_OFF;
		low = (low & ~MASK_DEF_LVTOFF) | (DEF_LVT_OFF << 4);
	}

	def_offset = setup_APIC_deferred_error(def_offset, def_new);
	if ((def_offset == def_new) &&
	    (deferred_error_int_vector != amd_deferred_error_interrupt))
		deferred_error_int_vector = amd_deferred_error_interrupt;

	low = (low & ~MASK_DEF_INT_TYPE) | DEF_INT_TYPE_APIC;
	wrmsr(MSR_CU_DEF_ERR, low, high);
}

258
/* cpu init entry point, called from mce.c with preempt off */
259
void mce_amd_feature_init(struct cpuinfo_x86 *c)
260
{
261
	struct threshold_block b;
262
	unsigned int cpu = smp_processor_id();
263
	u32 low = 0, high = 0, address = 0;
I
Ingo Molnar 已提交
264
	unsigned int bank, block;
265
	int offset = -1, new;
266

267
	for (bank = 0; bank < mca_cfg.banks; ++bank) {
268 269
		for (block = 0; block < NR_BLOCKS; ++block) {
			if (block == 0)
270
				address = MSR_IA32_MCx_MISC(bank);
271 272 273 274
			else if (block == 1) {
				address = (low & MASK_BLKPTR_LO) >> 21;
				if (!address)
					break;
275

276
				address += MCG_XBLK_ADDR;
I
Ingo Molnar 已提交
277
			} else
278 279 280
				++address;

			if (rdmsr_safe(address, &low, &high))
281
				break;
282

283 284
			if (!(high & MASK_VALID_HI))
				continue;
285

286 287
			if (!(high & MASK_CNTP_HI)  ||
			     (high & MASK_LOCKED_HI))
288 289 290 291
				continue;

			if (!block)
				per_cpu(bank_map, cpu) |= (1 << bank);
292

293
			memset(&b, 0, sizeof(b));
294 295 296 297 298 299
			b.cpu			= cpu;
			b.bank			= bank;
			b.block			= block;
			b.address		= address;
			b.interrupt_capable	= lvt_interrupt_supported(bank, high);

300 301
			if (!b.interrupt_capable)
				goto init;
302

303
			b.interrupt_enable = 1;
304
			new	= (high & MASK_LVTOFF_HI) >> 20;
305
			offset  = setup_APIC_mce_threshold(offset, new);
306

307 308
			if ((offset == new) &&
			    (mce_threshold_vector != amd_threshold_interrupt))
309
				mce_threshold_vector = amd_threshold_interrupt;
310 311 312

init:
			mce_threshold_block_init(&b, offset);
313
		}
314
	}
315 316 317

	if (mce_flags.succor)
		deferred_error_interrupt_enable(c);
318 319
}

320 321 322 323 324 325 326 327 328 329 330 331 332
static void __log_error(unsigned int bank, bool threshold_err, u64 misc)
{
	struct mce m;
	u64 status;

	rdmsrl(MSR_IA32_MCx_STATUS(bank), status);
	if (!(status & MCI_STATUS_VAL))
		return;

	mce_setup(&m);

	m.status = status;
	m.bank = bank;
333

334 335 336
	if (threshold_err)
		m.misc = misc;

337 338
	if (m.status & MCI_STATUS_ADDRV)
		rdmsrl(MSR_IA32_MCx_ADDR(bank), m.addr);
339

340
	mce_log(&m);
341 342 343
	wrmsrl(MSR_IA32_MCx_STATUS(bank), 0);
}

344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383
static inline void __smp_deferred_error_interrupt(void)
{
	inc_irq_stat(irq_deferred_error_count);
	deferred_error_int_vector();
}

asmlinkage __visible void smp_deferred_error_interrupt(void)
{
	entering_irq();
	__smp_deferred_error_interrupt();
	exiting_ack_irq();
}

asmlinkage __visible void smp_trace_deferred_error_interrupt(void)
{
	entering_irq();
	trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR);
	__smp_deferred_error_interrupt();
	trace_deferred_error_apic_exit(DEFERRED_ERROR_VECTOR);
	exiting_ack_irq();
}

/* APIC interrupt handler for deferred errors */
static void amd_deferred_error_interrupt(void)
{
	u64 status;
	unsigned int bank;

	for (bank = 0; bank < mca_cfg.banks; ++bank) {
		rdmsrl(MSR_IA32_MCx_STATUS(bank), status);

		if (!(status & MCI_STATUS_VAL) ||
		    !(status & MCI_STATUS_DEFERRED))
			continue;

		__log_error(bank, false, 0);
		break;
	}
}

384 385 386 387 388 389 390 391 392
/*
 * APIC Interrupt Handler
 */

/*
 * threshold interrupt handler will service THRESHOLD_APIC_VECTOR.
 * the interrupt goes off when error_count reaches threshold_limit.
 * the handler will simply log mcelog w/ software defined bank number.
 */
393

394
static void amd_threshold_interrupt(void)
395
{
I
Ingo Molnar 已提交
396
	u32 low = 0, high = 0, address = 0;
397
	int cpu = smp_processor_id();
398
	unsigned int bank, block;
399 400

	/* assume first bank caused it */
401
	for (bank = 0; bank < mca_cfg.banks; ++bank) {
402
		if (!(per_cpu(bank_map, cpu) & (1 << bank)))
403
			continue;
404
		for (block = 0; block < NR_BLOCKS; ++block) {
I
Ingo Molnar 已提交
405
			if (block == 0) {
406
				address = MSR_IA32_MCx_MISC(bank);
I
Ingo Molnar 已提交
407
			} else if (block == 1) {
408 409 410 411
				address = (low & MASK_BLKPTR_LO) >> 21;
				if (!address)
					break;
				address += MCG_XBLK_ADDR;
I
Ingo Molnar 已提交
412
			} else {
413
				++address;
I
Ingo Molnar 已提交
414
			}
415 416

			if (rdmsr_safe(address, &low, &high))
417
				break;
418 419 420 421 422 423 424 425

			if (!(high & MASK_VALID_HI)) {
				if (block)
					continue;
				else
					break;
			}

426 427
			if (!(high & MASK_CNTP_HI)  ||
			     (high & MASK_LOCKED_HI))
428 429
				continue;

I
Ingo Molnar 已提交
430 431 432 433
			/*
			 * Log the machine check that caused the threshold
			 * event.
			 */
434 435
			if (high & MASK_OVERFLOW_HI)
				goto log;
436 437
		}
	}
438 439 440
	return;

log:
441
	__log_error(bank, true, ((u64)high << 32) | low);
442 443 444 445 446 447 448
}

/*
 * Sysfs Interface
 */

struct threshold_attr {
J
Jacob Shin 已提交
449
	struct attribute attr;
I
Ingo Molnar 已提交
450 451
	ssize_t (*show) (struct threshold_block *, char *);
	ssize_t (*store) (struct threshold_block *, const char *, size_t count);
452 453
};

I
Ingo Molnar 已提交
454 455 456
#define SHOW_FIELDS(name)						\
static ssize_t show_ ## name(struct threshold_block *b, char *buf)	\
{									\
457
	return sprintf(buf, "%lu\n", (unsigned long) b->name);		\
J
Jacob Shin 已提交
458
}
459 460 461
SHOW_FIELDS(interrupt_enable)
SHOW_FIELDS(threshold_limit)

I
Ingo Molnar 已提交
462
static ssize_t
H
Hidetoshi Seto 已提交
463
store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size)
464
{
465
	struct thresh_restart tr;
I
Ingo Molnar 已提交
466 467
	unsigned long new;

468 469 470
	if (!b->interrupt_capable)
		return -EINVAL;

471
	if (kstrtoul(buf, 0, &new) < 0)
472
		return -EINVAL;
I
Ingo Molnar 已提交
473

474 475
	b->interrupt_enable = !!new;

476
	memset(&tr, 0, sizeof(tr));
I
Ingo Molnar 已提交
477 478
	tr.b		= b;

479
	smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
480

H
Hidetoshi Seto 已提交
481
	return size;
482 483
}

I
Ingo Molnar 已提交
484
static ssize_t
H
Hidetoshi Seto 已提交
485
store_threshold_limit(struct threshold_block *b, const char *buf, size_t size)
486
{
487
	struct thresh_restart tr;
I
Ingo Molnar 已提交
488 489
	unsigned long new;

490
	if (kstrtoul(buf, 0, &new) < 0)
491
		return -EINVAL;
I
Ingo Molnar 已提交
492

493 494 495 496
	if (new > THRESHOLD_MAX)
		new = THRESHOLD_MAX;
	if (new < 1)
		new = 1;
I
Ingo Molnar 已提交
497

498
	memset(&tr, 0, sizeof(tr));
499
	tr.old_limit = b->threshold_limit;
500
	b->threshold_limit = new;
501
	tr.b = b;
502

503
	smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
504

H
Hidetoshi Seto 已提交
505
	return size;
506 507
}

508 509
static ssize_t show_error_count(struct threshold_block *b, char *buf)
{
510 511 512
	u32 lo, hi;

	rdmsr_on_cpu(b->cpu, b->address, &lo, &hi);
513

514 515
	return sprintf(buf, "%u\n", ((hi & THRESHOLD_MAX) -
				     (THRESHOLD_MAX - b->threshold_limit)));
516 517
}

518 519 520 521
static struct threshold_attr error_count = {
	.attr = {.name = __stringify(error_count), .mode = 0444 },
	.show = show_error_count,
};
522

523 524 525 526 527
#define RW_ATTR(val)							\
static struct threshold_attr val = {					\
	.attr	= {.name = __stringify(val), .mode = 0644 },		\
	.show	= show_## val,						\
	.store	= store_## val,						\
528 529
};

J
Jacob Shin 已提交
530 531
RW_ATTR(interrupt_enable);
RW_ATTR(threshold_limit);
532 533 534 535

static struct attribute *default_attrs[] = {
	&threshold_limit.attr,
	&error_count.attr,
536 537
	NULL,	/* possibly interrupt_enable if supported, see below */
	NULL,
538 539
};

I
Ingo Molnar 已提交
540 541
#define to_block(k)	container_of(k, struct threshold_block, kobj)
#define to_attr(a)	container_of(a, struct threshold_attr, attr)
542 543 544

static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
{
545
	struct threshold_block *b = to_block(kobj);
546 547
	struct threshold_attr *a = to_attr(attr);
	ssize_t ret;
I
Ingo Molnar 已提交
548

549
	ret = a->show ? a->show(b, buf) : -EIO;
I
Ingo Molnar 已提交
550

551 552 553 554 555 556
	return ret;
}

static ssize_t store(struct kobject *kobj, struct attribute *attr,
		     const char *buf, size_t count)
{
557
	struct threshold_block *b = to_block(kobj);
558 559
	struct threshold_attr *a = to_attr(attr);
	ssize_t ret;
I
Ingo Molnar 已提交
560

561
	ret = a->store ? a->store(b, buf, count) : -EIO;
I
Ingo Molnar 已提交
562

563 564 565
	return ret;
}

566
static const struct sysfs_ops threshold_ops = {
I
Ingo Molnar 已提交
567 568
	.show			= show,
	.store			= store,
569 570 571
};

static struct kobj_type threshold_ktype = {
I
Ingo Molnar 已提交
572 573
	.sysfs_ops		= &threshold_ops,
	.default_attrs		= default_attrs,
574 575
};

576 577
static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
				     unsigned int block, u32 address)
578 579
{
	struct threshold_block *b = NULL;
I
Ingo Molnar 已提交
580 581
	u32 low, high;
	int err;
582

583
	if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS))
584 585
		return 0;

586
	if (rdmsr_safe_on_cpu(cpu, address, &low, &high))
587
		return 0;
588 589 590 591 592 593 594 595

	if (!(high & MASK_VALID_HI)) {
		if (block)
			goto recurse;
		else
			return 0;
	}

596 597
	if (!(high & MASK_CNTP_HI)  ||
	     (high & MASK_LOCKED_HI))
598 599 600 601 602 603
		goto recurse;

	b = kzalloc(sizeof(struct threshold_block), GFP_KERNEL);
	if (!b)
		return -ENOMEM;

I
Ingo Molnar 已提交
604 605 606 607 608
	b->block		= block;
	b->bank			= bank;
	b->cpu			= cpu;
	b->address		= address;
	b->interrupt_enable	= 0;
609
	b->interrupt_capable	= lvt_interrupt_supported(bank, high);
I
Ingo Molnar 已提交
610
	b->threshold_limit	= THRESHOLD_MAX;
611

612
	if (b->interrupt_capable) {
613
		threshold_ktype.default_attrs[2] = &interrupt_enable.attr;
614 615
		b->interrupt_enable = 1;
	} else {
616
		threshold_ktype.default_attrs[2] = NULL;
617
	}
618

619 620
	INIT_LIST_HEAD(&b->miscj);

I
Ingo Molnar 已提交
621
	if (per_cpu(threshold_banks, cpu)[bank]->blocks) {
622 623
		list_add(&b->miscj,
			 &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj);
I
Ingo Molnar 已提交
624
	} else {
625
		per_cpu(threshold_banks, cpu)[bank]->blocks = b;
I
Ingo Molnar 已提交
626
	}
627

628 629
	err = kobject_init_and_add(&b->kobj, &threshold_ktype,
				   per_cpu(threshold_banks, cpu)[bank]->kobj,
630
				   (bank == 4 ? bank4_names(b) : th_names[bank]));
631 632 633 634 635 636 637 638
	if (err)
		goto out_free;
recurse:
	if (!block) {
		address = (low & MASK_BLKPTR_LO) >> 21;
		if (!address)
			return 0;
		address += MCG_XBLK_ADDR;
I
Ingo Molnar 已提交
639
	} else {
640
		++address;
I
Ingo Molnar 已提交
641
	}
642 643 644 645 646

	err = allocate_threshold_blocks(cpu, bank, ++block, address);
	if (err)
		goto out_free;

647 648
	if (b)
		kobject_uevent(&b->kobj, KOBJ_ADD);
649

650 651 652 653
	return err;

out_free:
	if (b) {
654
		kobject_put(&b->kobj);
655
		list_del(&b->miscj);
656 657 658 659 660
		kfree(b);
	}
	return err;
}

661
static int __threshold_add_blocks(struct threshold_bank *b)
662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684
{
	struct list_head *head = &b->blocks->miscj;
	struct threshold_block *pos = NULL;
	struct threshold_block *tmp = NULL;
	int err = 0;

	err = kobject_add(&b->blocks->kobj, b->kobj, b->blocks->kobj.name);
	if (err)
		return err;

	list_for_each_entry_safe(pos, tmp, head, miscj) {

		err = kobject_add(&pos->kobj, b->kobj, pos->kobj.name);
		if (err) {
			list_for_each_entry_safe_reverse(pos, tmp, head, miscj)
				kobject_del(&pos->kobj);

			return err;
		}
	}
	return err;
}

685
static int threshold_create_bank(unsigned int cpu, unsigned int bank)
686
{
687
	struct device *dev = per_cpu(mce_device, cpu);
688
	struct amd_northbridge *nb = NULL;
689
	struct threshold_bank *b = NULL;
690
	const char *name = th_names[bank];
691
	int err = 0;
692

693
	if (is_shared_bank(bank)) {
694 695 696
		nb = node_to_amd_nb(amd_get_nb_id(cpu));

		/* threshold descriptor already initialized on this node? */
697
		if (nb && nb->bank4) {
698 699 700 701 702 703 704 705 706 707 708 709 710 711 712
			/* yes, use it */
			b = nb->bank4;
			err = kobject_add(b->kobj, &dev->kobj, name);
			if (err)
				goto out;

			per_cpu(threshold_banks, cpu)[bank] = b;
			atomic_inc(&b->cpus);

			err = __threshold_add_blocks(b);

			goto out;
		}
	}

713
	b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL);
714 715 716 717 718
	if (!b) {
		err = -ENOMEM;
		goto out;
	}

719
	b->kobj = kobject_create_and_add(name, &dev->kobj);
720 721
	if (!b->kobj) {
		err = -EINVAL;
722
		goto out_free;
723
	}
724

725
	per_cpu(threshold_banks, cpu)[bank] = b;
726

727
	if (is_shared_bank(bank)) {
728 729 730
		atomic_set(&b->cpus, 1);

		/* nb is already initialized, see above */
731 732 733 734
		if (nb) {
			WARN_ON(nb->bank4);
			nb->bank4 = b;
		}
735 736
	}

737
	err = allocate_threshold_blocks(cpu, bank, 0, MSR_IA32_MCx_MISC(bank));
738 739
	if (!err)
		goto out;
740

741
 out_free:
742
	kfree(b);
743 744

 out:
745 746 747 748
	return err;
}

/* create dir/files for all valid threshold banks */
749
static int threshold_create_device(unsigned int cpu)
750
{
J
Jacob Shin 已提交
751
	unsigned int bank;
752
	struct threshold_bank **bp;
753 754
	int err = 0;

755 756 757 758 759 760 761 762
	bp = kzalloc(sizeof(struct threshold_bank *) * mca_cfg.banks,
		     GFP_KERNEL);
	if (!bp)
		return -ENOMEM;

	per_cpu(threshold_banks, cpu) = bp;

	for (bank = 0; bank < mca_cfg.banks; ++bank) {
763
		if (!(per_cpu(bank_map, cpu) & (1 << bank)))
764 765 766
			continue;
		err = threshold_create_bank(cpu, bank);
		if (err)
767
			return err;
768
	}
769

770 771 772
	return err;
}

773
static void deallocate_threshold_block(unsigned int cpu,
774 775 776 777 778 779 780 781 782 783
						 unsigned int bank)
{
	struct threshold_block *pos = NULL;
	struct threshold_block *tmp = NULL;
	struct threshold_bank *head = per_cpu(threshold_banks, cpu)[bank];

	if (!head)
		return;

	list_for_each_entry_safe(pos, tmp, &head->blocks->miscj, miscj) {
784
		kobject_put(&pos->kobj);
785 786 787 788 789 790 791 792
		list_del(&pos->miscj);
		kfree(pos);
	}

	kfree(per_cpu(threshold_banks, cpu)[bank]->blocks);
	per_cpu(threshold_banks, cpu)[bank]->blocks = NULL;
}

793 794 795 796 797 798 799 800 801 802 803
static void __threshold_remove_blocks(struct threshold_bank *b)
{
	struct threshold_block *pos = NULL;
	struct threshold_block *tmp = NULL;

	kobject_del(b->kobj);

	list_for_each_entry_safe(pos, tmp, &b->blocks->miscj, miscj)
		kobject_del(&pos->kobj);
}

804
static void threshold_remove_bank(unsigned int cpu, int bank)
805
{
806
	struct amd_northbridge *nb;
807 808 809 810 811
	struct threshold_bank *b;

	b = per_cpu(threshold_banks, cpu)[bank];
	if (!b)
		return;
812

813 814 815
	if (!b->blocks)
		goto free_out;

816
	if (is_shared_bank(bank)) {
817 818 819 820 821 822 823 824 825 826 827 828 829 830
		if (!atomic_dec_and_test(&b->cpus)) {
			__threshold_remove_blocks(b);
			per_cpu(threshold_banks, cpu)[bank] = NULL;
			return;
		} else {
			/*
			 * the last CPU on this node using the shared bank is
			 * going away, remove that bank now.
			 */
			nb = node_to_amd_nb(amd_get_nb_id(cpu));
			nb->bank4 = NULL;
		}
	}

831 832 833
	deallocate_threshold_block(cpu, bank);

free_out:
834
	kobject_del(b->kobj);
835
	kobject_put(b->kobj);
836 837
	kfree(b);
	per_cpu(threshold_banks, cpu)[bank] = NULL;
838 839
}

840
static void threshold_remove_device(unsigned int cpu)
841
{
J
Jacob Shin 已提交
842
	unsigned int bank;
843

844
	for (bank = 0; bank < mca_cfg.banks; ++bank) {
845
		if (!(per_cpu(bank_map, cpu) & (1 << bank)))
846 847 848
			continue;
		threshold_remove_bank(cpu, bank);
	}
849
	kfree(per_cpu(threshold_banks, cpu));
850 851 852
}

/* get notified when a cpu comes on/off */
853
static void
I
Ingo Molnar 已提交
854
amd_64_threshold_cpu_callback(unsigned long action, unsigned int cpu)
855 856 857
{
	switch (action) {
	case CPU_ONLINE:
858
	case CPU_ONLINE_FROZEN:
859 860 861
		threshold_create_device(cpu);
		break;
	case CPU_DEAD:
862
	case CPU_DEAD_FROZEN:
863 864 865 866 867 868 869 870 871
		threshold_remove_device(cpu);
		break;
	default:
		break;
	}
}

static __init int threshold_init_device(void)
{
J
Jacob Shin 已提交
872
	unsigned lcpu = 0;
873 874 875

	/* to hit CPUs online before the notifier is up */
	for_each_online_cpu(lcpu) {
876
		int err = threshold_create_device(lcpu);
I
Ingo Molnar 已提交
877

878
		if (err)
879
			return err;
880
	}
881
	threshold_cpu_callback = amd_64_threshold_cpu_callback;
I
Ingo Molnar 已提交
882

883
	return 0;
884
}
885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905
/*
 * there are 3 funcs which need to be _initcalled in a logic sequence:
 * 1. xen_late_init_mcelog
 * 2. mcheck_init_device
 * 3. threshold_init_device
 *
 * xen_late_init_mcelog must register xen_mce_chrdev_device before
 * native mce_chrdev_device registration if running under xen platform;
 *
 * mcheck_init_device should be inited before threshold_init_device to
 * initialize mce_device, otherwise a NULL ptr dereference will cause panic.
 *
 * so we use following _initcalls
 * 1. device_initcall(xen_late_init_mcelog);
 * 2. device_initcall_sync(mcheck_init_device);
 * 3. late_initcall(threshold_init_device);
 *
 * when running under xen, the initcall order is 1,2,3;
 * on baremetal, we skip 1 and we do only 2 and 3.
 */
late_initcall(threshold_init_device);