mce_amd.c 19.6 KB
Newer Older
1
/*
B
Borislav Petkov 已提交
2
 *  (c) 2005-2015 Advanced Micro Devices, Inc.
3 4 5 6 7
 *  Your use of this code is subject to the terms and conditions of the
 *  GNU general public license version 2. See "COPYING" or
 *  http://www.gnu.org/licenses/gpl.html
 *
 *  Written by Jacob Shin - AMD, Inc.
8
 *  Maintained by: Borislav Petkov <bp@alien8.de>
9
 *
B
Borislav Petkov 已提交
10
 *  All MC4_MISCi registers are shared between cores on a node.
11 12 13
 */
#include <linux/interrupt.h>
#include <linux/notifier.h>
I
Ingo Molnar 已提交
14
#include <linux/kobject.h>
15
#include <linux/percpu.h>
I
Ingo Molnar 已提交
16 17
#include <linux/errno.h>
#include <linux/sched.h>
18
#include <linux/sysfs.h>
19
#include <linux/slab.h>
I
Ingo Molnar 已提交
20 21 22 23
#include <linux/init.h>
#include <linux/cpu.h>
#include <linux/smp.h>

24
#include <asm/amd_nb.h>
25
#include <asm/apic.h>
I
Ingo Molnar 已提交
26
#include <asm/idle.h>
27 28
#include <asm/mce.h>
#include <asm/msr.h>
29
#include <asm/trace/irq_vectors.h>
30

J
Jacob Shin 已提交
31 32 33 34
#define NR_BLOCKS         9
#define THRESHOLD_MAX     0xFFF
#define INT_TYPE_APIC     0x00020000
#define MASK_VALID_HI     0x80000000
35 36
#define MASK_CNTP_HI      0x40000000
#define MASK_LOCKED_HI    0x20000000
J
Jacob Shin 已提交
37 38 39 40
#define MASK_LVTOFF_HI    0x00F00000
#define MASK_COUNT_EN_HI  0x00080000
#define MASK_INT_TYPE_HI  0x00060000
#define MASK_OVERFLOW_HI  0x00010000
41
#define MASK_ERR_COUNT_HI 0x00000FFF
42 43
#define MASK_BLKPTR_LO    0xFF000000
#define MCG_XBLK_ADDR     0xC0000400
44

45 46 47 48 49 50 51
/* Deferred error settings */
#define MSR_CU_DEF_ERR		0xC0000410
#define MASK_DEF_LVTOFF		0x000000F0
#define MASK_DEF_INT_TYPE	0x00000006
#define DEF_LVT_OFF		0x2
#define DEF_INT_TYPE_APIC	0x2

52 53 54 55 56 57 58 59 60
static const char * const th_names[] = {
	"load_store",
	"insn_fetch",
	"combined_unit",
	"",
	"northbridge",
	"execution_unit",
};

61
static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
62 63
static DEFINE_PER_CPU(unsigned char, bank_map);	/* see which banks are on */

64
static void amd_threshold_interrupt(void);
65 66 67 68 69 70 71
static void amd_deferred_error_interrupt(void);

static void default_deferred_error_interrupt(void)
{
	pr_err("Unexpected deferred interrupt at vector %x\n", DEFERRED_ERROR_VECTOR);
}
void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt;
72

73 74 75 76
/*
 * CPU Initialization
 */

77
struct thresh_restart {
I
Ingo Molnar 已提交
78 79
	struct threshold_block	*b;
	int			reset;
80 81
	int			set_lvt_off;
	int			lvt_off;
I
Ingo Molnar 已提交
82
	u16			old_limit;
83 84
};

85 86 87 88 89 90
static inline bool is_shared_bank(int bank)
{
	/* Bank 4 is for northbridge reporting and is thus shared */
	return (bank == 4);
}

91
static const char *bank4_names(const struct threshold_block *b)
92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110
{
	switch (b->address) {
	/* MSR4_MISC0 */
	case 0x00000413:
		return "dram";

	case 0xc0000408:
		return "ht_links";

	case 0xc0000409:
		return "l3_cache";

	default:
		WARN(1, "Funny MSR: 0x%08x\n", b->address);
		return "";
	}
};


111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
static bool lvt_interrupt_supported(unsigned int bank, u32 msr_high_bits)
{
	/*
	 * bank 4 supports APIC LVT interrupts implicitly since forever.
	 */
	if (bank == 4)
		return true;

	/*
	 * IntP: interrupt present; if this bit is set, the thresholding
	 * bank can generate APIC LVT interrupts
	 */
	return msr_high_bits & BIT(28);
}

126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
{
	int msr = (hi & MASK_LVTOFF_HI) >> 20;

	if (apic < 0) {
		pr_err(FW_BUG "cpu %d, failed to setup threshold interrupt "
		       "for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu,
		       b->bank, b->block, b->address, hi, lo);
		return 0;
	}

	if (apic != msr) {
		pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d "
		       "for bank %d, block %d (MSR%08X=0x%x%08x)\n",
		       b->cpu, apic, b->bank, b->block, b->address, hi, lo);
		return 0;
	}

	return 1;
};

147 148 149 150
/*
 * Called via smp_call_function_single(), must be called with correct
 * cpu affinity.
 */
151
static void threshold_restart_bank(void *_tr)
152
{
153
	struct thresh_restart *tr = _tr;
154
	u32 hi, lo;
155

156
	rdmsr(tr->b->address, lo, hi);
157

158
	if (tr->b->threshold_limit < (hi & THRESHOLD_MAX))
159
		tr->reset = 1;	/* limit cannot be lower than err count */
160

161
	if (tr->reset) {		/* reset err count and overflow bit */
162 163
		hi =
		    (hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
164 165
		    (THRESHOLD_MAX - tr->b->threshold_limit);
	} else if (tr->old_limit) {	/* change limit w/o reset */
166
		int new_count = (hi & THRESHOLD_MAX) +
167
		    (tr->old_limit - tr->b->threshold_limit);
I
Ingo Molnar 已提交
168

169
		hi = (hi & ~MASK_ERR_COUNT_HI) |
170 171 172
		    (new_count & THRESHOLD_MAX);
	}

173 174 175 176 177 178
	/* clear IntType */
	hi &= ~MASK_INT_TYPE_HI;

	if (!tr->b->interrupt_capable)
		goto done;

179
	if (tr->set_lvt_off) {
180 181 182 183 184
		if (lvt_off_valid(tr->b, tr->lvt_off, lo, hi)) {
			/* set new lvt offset */
			hi &= ~MASK_LVTOFF_HI;
			hi |= tr->lvt_off << 20;
		}
185 186
	}

187 188 189 190
	if (tr->b->interrupt_enable)
		hi |= INT_TYPE_APIC;

 done:
191

192 193
	hi |= MASK_COUNT_EN_HI;
	wrmsr(tr->b->address, lo, hi);
194 195
}

196 197 198 199 200 201 202 203 204 205 206 207
static void mce_threshold_block_init(struct threshold_block *b, int offset)
{
	struct thresh_restart tr = {
		.b			= b,
		.set_lvt_off		= 1,
		.lvt_off		= offset,
	};

	b->threshold_limit		= THRESHOLD_MAX;
	threshold_restart_bank(&tr);
};

208
static int setup_APIC_mce_threshold(int reserved, int new)
209 210 211 212 213 214 215 216
{
	if (reserved < 0 && !setup_APIC_eilvt(new, THRESHOLD_APIC_VECTOR,
					      APIC_EILVT_MSG_FIX, 0))
		return new;

	return reserved;
}

217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249
static int setup_APIC_deferred_error(int reserved, int new)
{
	if (reserved < 0 && !setup_APIC_eilvt(new, DEFERRED_ERROR_VECTOR,
					      APIC_EILVT_MSG_FIX, 0))
		return new;

	return reserved;
}

static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
{
	u32 low = 0, high = 0;
	int def_offset = -1, def_new;

	if (rdmsr_safe(MSR_CU_DEF_ERR, &low, &high))
		return;

	def_new = (low & MASK_DEF_LVTOFF) >> 4;
	if (!(low & MASK_DEF_LVTOFF)) {
		pr_err(FW_BUG "Your BIOS is not setting up LVT offset 0x2 for deferred error IRQs correctly.\n");
		def_new = DEF_LVT_OFF;
		low = (low & ~MASK_DEF_LVTOFF) | (DEF_LVT_OFF << 4);
	}

	def_offset = setup_APIC_deferred_error(def_offset, def_new);
	if ((def_offset == def_new) &&
	    (deferred_error_int_vector != amd_deferred_error_interrupt))
		deferred_error_int_vector = amd_deferred_error_interrupt;

	low = (low & ~MASK_DEF_INT_TYPE) | DEF_INT_TYPE_APIC;
	wrmsr(MSR_CU_DEF_ERR, low, high);
}

250
/* cpu init entry point, called from mce.c with preempt off */
251
void mce_amd_feature_init(struct cpuinfo_x86 *c)
252
{
253
	struct threshold_block b;
254
	unsigned int cpu = smp_processor_id();
255
	u32 low = 0, high = 0, address = 0;
I
Ingo Molnar 已提交
256
	unsigned int bank, block;
257
	int offset = -1, new;
258

259
	for (bank = 0; bank < mca_cfg.banks; ++bank) {
260 261
		for (block = 0; block < NR_BLOCKS; ++block) {
			if (block == 0)
262
				address = MSR_IA32_MCx_MISC(bank);
263 264 265 266
			else if (block == 1) {
				address = (low & MASK_BLKPTR_LO) >> 21;
				if (!address)
					break;
267

268
				address += MCG_XBLK_ADDR;
I
Ingo Molnar 已提交
269
			} else
270 271 272
				++address;

			if (rdmsr_safe(address, &low, &high))
273
				break;
274

275 276
			if (!(high & MASK_VALID_HI))
				continue;
277

278 279
			if (!(high & MASK_CNTP_HI)  ||
			     (high & MASK_LOCKED_HI))
280 281 282 283
				continue;

			if (!block)
				per_cpu(bank_map, cpu) |= (1 << bank);
284

285
			memset(&b, 0, sizeof(b));
286 287 288 289 290 291
			b.cpu			= cpu;
			b.bank			= bank;
			b.block			= block;
			b.address		= address;
			b.interrupt_capable	= lvt_interrupt_supported(bank, high);

292 293
			if (!b.interrupt_capable)
				goto init;
294

295
			b.interrupt_enable = 1;
296
			new	= (high & MASK_LVTOFF_HI) >> 20;
297
			offset  = setup_APIC_mce_threshold(offset, new);
298

299 300
			if ((offset == new) &&
			    (mce_threshold_vector != amd_threshold_interrupt))
301
				mce_threshold_vector = amd_threshold_interrupt;
302 303 304

init:
			mce_threshold_block_init(&b, offset);
305
		}
306
	}
307 308 309

	if (mce_flags.succor)
		deferred_error_interrupt_enable(c);
310 311
}

312 313 314 315 316 317 318 319 320 321 322 323 324
static void __log_error(unsigned int bank, bool threshold_err, u64 misc)
{
	struct mce m;
	u64 status;

	rdmsrl(MSR_IA32_MCx_STATUS(bank), status);
	if (!(status & MCI_STATUS_VAL))
		return;

	mce_setup(&m);

	m.status = status;
	m.bank = bank;
325

326 327 328
	if (threshold_err)
		m.misc = misc;

329 330
	if (m.status & MCI_STATUS_ADDRV)
		rdmsrl(MSR_IA32_MCx_ADDR(bank), m.addr);
331

332
	mce_log(&m);
333 334 335
	wrmsrl(MSR_IA32_MCx_STATUS(bank), 0);
}

336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375
static inline void __smp_deferred_error_interrupt(void)
{
	inc_irq_stat(irq_deferred_error_count);
	deferred_error_int_vector();
}

asmlinkage __visible void smp_deferred_error_interrupt(void)
{
	entering_irq();
	__smp_deferred_error_interrupt();
	exiting_ack_irq();
}

asmlinkage __visible void smp_trace_deferred_error_interrupt(void)
{
	entering_irq();
	trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR);
	__smp_deferred_error_interrupt();
	trace_deferred_error_apic_exit(DEFERRED_ERROR_VECTOR);
	exiting_ack_irq();
}

/* APIC interrupt handler for deferred errors */
static void amd_deferred_error_interrupt(void)
{
	u64 status;
	unsigned int bank;

	for (bank = 0; bank < mca_cfg.banks; ++bank) {
		rdmsrl(MSR_IA32_MCx_STATUS(bank), status);

		if (!(status & MCI_STATUS_VAL) ||
		    !(status & MCI_STATUS_DEFERRED))
			continue;

		__log_error(bank, false, 0);
		break;
	}
}

376 377 378 379 380 381 382 383 384
/*
 * APIC Interrupt Handler
 */

/*
 * threshold interrupt handler will service THRESHOLD_APIC_VECTOR.
 * the interrupt goes off when error_count reaches threshold_limit.
 * the handler will simply log mcelog w/ software defined bank number.
 */
385

386
static void amd_threshold_interrupt(void)
387
{
I
Ingo Molnar 已提交
388
	u32 low = 0, high = 0, address = 0;
389
	int cpu = smp_processor_id();
390
	unsigned int bank, block;
391 392

	/* assume first bank caused it */
393
	for (bank = 0; bank < mca_cfg.banks; ++bank) {
394
		if (!(per_cpu(bank_map, cpu) & (1 << bank)))
395
			continue;
396
		for (block = 0; block < NR_BLOCKS; ++block) {
I
Ingo Molnar 已提交
397
			if (block == 0) {
398
				address = MSR_IA32_MCx_MISC(bank);
I
Ingo Molnar 已提交
399
			} else if (block == 1) {
400 401 402 403
				address = (low & MASK_BLKPTR_LO) >> 21;
				if (!address)
					break;
				address += MCG_XBLK_ADDR;
I
Ingo Molnar 已提交
404
			} else {
405
				++address;
I
Ingo Molnar 已提交
406
			}
407 408

			if (rdmsr_safe(address, &low, &high))
409
				break;
410 411 412 413 414 415 416 417

			if (!(high & MASK_VALID_HI)) {
				if (block)
					continue;
				else
					break;
			}

418 419
			if (!(high & MASK_CNTP_HI)  ||
			     (high & MASK_LOCKED_HI))
420 421
				continue;

I
Ingo Molnar 已提交
422 423 424 425
			/*
			 * Log the machine check that caused the threshold
			 * event.
			 */
426 427
			if (high & MASK_OVERFLOW_HI)
				goto log;
428 429
		}
	}
430 431 432
	return;

log:
433
	__log_error(bank, true, ((u64)high << 32) | low);
434 435 436 437 438 439 440
}

/*
 * Sysfs Interface
 */

struct threshold_attr {
J
Jacob Shin 已提交
441
	struct attribute attr;
I
Ingo Molnar 已提交
442 443
	ssize_t (*show) (struct threshold_block *, char *);
	ssize_t (*store) (struct threshold_block *, const char *, size_t count);
444 445
};

I
Ingo Molnar 已提交
446 447 448
#define SHOW_FIELDS(name)						\
static ssize_t show_ ## name(struct threshold_block *b, char *buf)	\
{									\
449
	return sprintf(buf, "%lu\n", (unsigned long) b->name);		\
J
Jacob Shin 已提交
450
}
451 452 453
SHOW_FIELDS(interrupt_enable)
SHOW_FIELDS(threshold_limit)

I
Ingo Molnar 已提交
454
static ssize_t
H
Hidetoshi Seto 已提交
455
store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size)
456
{
457
	struct thresh_restart tr;
I
Ingo Molnar 已提交
458 459
	unsigned long new;

460 461 462
	if (!b->interrupt_capable)
		return -EINVAL;

463
	if (kstrtoul(buf, 0, &new) < 0)
464
		return -EINVAL;
I
Ingo Molnar 已提交
465

466 467
	b->interrupt_enable = !!new;

468
	memset(&tr, 0, sizeof(tr));
I
Ingo Molnar 已提交
469 470
	tr.b		= b;

471
	smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
472

H
Hidetoshi Seto 已提交
473
	return size;
474 475
}

I
Ingo Molnar 已提交
476
static ssize_t
H
Hidetoshi Seto 已提交
477
store_threshold_limit(struct threshold_block *b, const char *buf, size_t size)
478
{
479
	struct thresh_restart tr;
I
Ingo Molnar 已提交
480 481
	unsigned long new;

482
	if (kstrtoul(buf, 0, &new) < 0)
483
		return -EINVAL;
I
Ingo Molnar 已提交
484

485 486 487 488
	if (new > THRESHOLD_MAX)
		new = THRESHOLD_MAX;
	if (new < 1)
		new = 1;
I
Ingo Molnar 已提交
489

490
	memset(&tr, 0, sizeof(tr));
491
	tr.old_limit = b->threshold_limit;
492
	b->threshold_limit = new;
493
	tr.b = b;
494

495
	smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
496

H
Hidetoshi Seto 已提交
497
	return size;
498 499
}

500 501
static ssize_t show_error_count(struct threshold_block *b, char *buf)
{
502 503 504
	u32 lo, hi;

	rdmsr_on_cpu(b->cpu, b->address, &lo, &hi);
505

506 507
	return sprintf(buf, "%u\n", ((hi & THRESHOLD_MAX) -
				     (THRESHOLD_MAX - b->threshold_limit)));
508 509
}

510 511 512 513
static struct threshold_attr error_count = {
	.attr = {.name = __stringify(error_count), .mode = 0444 },
	.show = show_error_count,
};
514

515 516 517 518 519
#define RW_ATTR(val)							\
static struct threshold_attr val = {					\
	.attr	= {.name = __stringify(val), .mode = 0644 },		\
	.show	= show_## val,						\
	.store	= store_## val,						\
520 521
};

J
Jacob Shin 已提交
522 523
RW_ATTR(interrupt_enable);
RW_ATTR(threshold_limit);
524 525 526 527

static struct attribute *default_attrs[] = {
	&threshold_limit.attr,
	&error_count.attr,
528 529
	NULL,	/* possibly interrupt_enable if supported, see below */
	NULL,
530 531
};

I
Ingo Molnar 已提交
532 533
#define to_block(k)	container_of(k, struct threshold_block, kobj)
#define to_attr(a)	container_of(a, struct threshold_attr, attr)
534 535 536

static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
{
537
	struct threshold_block *b = to_block(kobj);
538 539
	struct threshold_attr *a = to_attr(attr);
	ssize_t ret;
I
Ingo Molnar 已提交
540

541
	ret = a->show ? a->show(b, buf) : -EIO;
I
Ingo Molnar 已提交
542

543 544 545 546 547 548
	return ret;
}

static ssize_t store(struct kobject *kobj, struct attribute *attr,
		     const char *buf, size_t count)
{
549
	struct threshold_block *b = to_block(kobj);
550 551
	struct threshold_attr *a = to_attr(attr);
	ssize_t ret;
I
Ingo Molnar 已提交
552

553
	ret = a->store ? a->store(b, buf, count) : -EIO;
I
Ingo Molnar 已提交
554

555 556 557
	return ret;
}

558
static const struct sysfs_ops threshold_ops = {
I
Ingo Molnar 已提交
559 560
	.show			= show,
	.store			= store,
561 562 563
};

static struct kobj_type threshold_ktype = {
I
Ingo Molnar 已提交
564 565
	.sysfs_ops		= &threshold_ops,
	.default_attrs		= default_attrs,
566 567
};

568 569
static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
				     unsigned int block, u32 address)
570 571
{
	struct threshold_block *b = NULL;
I
Ingo Molnar 已提交
572 573
	u32 low, high;
	int err;
574

575
	if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS))
576 577
		return 0;

578
	if (rdmsr_safe_on_cpu(cpu, address, &low, &high))
579
		return 0;
580 581 582 583 584 585 586 587

	if (!(high & MASK_VALID_HI)) {
		if (block)
			goto recurse;
		else
			return 0;
	}

588 589
	if (!(high & MASK_CNTP_HI)  ||
	     (high & MASK_LOCKED_HI))
590 591 592 593 594 595
		goto recurse;

	b = kzalloc(sizeof(struct threshold_block), GFP_KERNEL);
	if (!b)
		return -ENOMEM;

I
Ingo Molnar 已提交
596 597 598 599 600
	b->block		= block;
	b->bank			= bank;
	b->cpu			= cpu;
	b->address		= address;
	b->interrupt_enable	= 0;
601
	b->interrupt_capable	= lvt_interrupt_supported(bank, high);
I
Ingo Molnar 已提交
602
	b->threshold_limit	= THRESHOLD_MAX;
603

604
	if (b->interrupt_capable) {
605
		threshold_ktype.default_attrs[2] = &interrupt_enable.attr;
606 607
		b->interrupt_enable = 1;
	} else {
608
		threshold_ktype.default_attrs[2] = NULL;
609
	}
610

611 612
	INIT_LIST_HEAD(&b->miscj);

I
Ingo Molnar 已提交
613
	if (per_cpu(threshold_banks, cpu)[bank]->blocks) {
614 615
		list_add(&b->miscj,
			 &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj);
I
Ingo Molnar 已提交
616
	} else {
617
		per_cpu(threshold_banks, cpu)[bank]->blocks = b;
I
Ingo Molnar 已提交
618
	}
619

620 621
	err = kobject_init_and_add(&b->kobj, &threshold_ktype,
				   per_cpu(threshold_banks, cpu)[bank]->kobj,
622
				   (bank == 4 ? bank4_names(b) : th_names[bank]));
623 624 625 626 627 628 629 630
	if (err)
		goto out_free;
recurse:
	if (!block) {
		address = (low & MASK_BLKPTR_LO) >> 21;
		if (!address)
			return 0;
		address += MCG_XBLK_ADDR;
I
Ingo Molnar 已提交
631
	} else {
632
		++address;
I
Ingo Molnar 已提交
633
	}
634 635 636 637 638

	err = allocate_threshold_blocks(cpu, bank, ++block, address);
	if (err)
		goto out_free;

639 640
	if (b)
		kobject_uevent(&b->kobj, KOBJ_ADD);
641

642 643 644 645
	return err;

out_free:
	if (b) {
646
		kobject_put(&b->kobj);
647
		list_del(&b->miscj);
648 649 650 651 652
		kfree(b);
	}
	return err;
}

653
static int __threshold_add_blocks(struct threshold_bank *b)
654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676
{
	struct list_head *head = &b->blocks->miscj;
	struct threshold_block *pos = NULL;
	struct threshold_block *tmp = NULL;
	int err = 0;

	err = kobject_add(&b->blocks->kobj, b->kobj, b->blocks->kobj.name);
	if (err)
		return err;

	list_for_each_entry_safe(pos, tmp, head, miscj) {

		err = kobject_add(&pos->kobj, b->kobj, pos->kobj.name);
		if (err) {
			list_for_each_entry_safe_reverse(pos, tmp, head, miscj)
				kobject_del(&pos->kobj);

			return err;
		}
	}
	return err;
}

677
static int threshold_create_bank(unsigned int cpu, unsigned int bank)
678
{
679
	struct device *dev = per_cpu(mce_device, cpu);
680
	struct amd_northbridge *nb = NULL;
681
	struct threshold_bank *b = NULL;
682
	const char *name = th_names[bank];
683
	int err = 0;
684

685
	if (is_shared_bank(bank)) {
686 687 688
		nb = node_to_amd_nb(amd_get_nb_id(cpu));

		/* threshold descriptor already initialized on this node? */
689
		if (nb && nb->bank4) {
690 691 692 693 694 695 696 697 698 699 700 701 702 703 704
			/* yes, use it */
			b = nb->bank4;
			err = kobject_add(b->kobj, &dev->kobj, name);
			if (err)
				goto out;

			per_cpu(threshold_banks, cpu)[bank] = b;
			atomic_inc(&b->cpus);

			err = __threshold_add_blocks(b);

			goto out;
		}
	}

705
	b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL);
706 707 708 709 710
	if (!b) {
		err = -ENOMEM;
		goto out;
	}

711
	b->kobj = kobject_create_and_add(name, &dev->kobj);
712 713
	if (!b->kobj) {
		err = -EINVAL;
714
		goto out_free;
715
	}
716

717
	per_cpu(threshold_banks, cpu)[bank] = b;
718

719
	if (is_shared_bank(bank)) {
720 721 722
		atomic_set(&b->cpus, 1);

		/* nb is already initialized, see above */
723 724 725 726
		if (nb) {
			WARN_ON(nb->bank4);
			nb->bank4 = b;
		}
727 728
	}

729
	err = allocate_threshold_blocks(cpu, bank, 0, MSR_IA32_MCx_MISC(bank));
730 731
	if (!err)
		goto out;
732

733
 out_free:
734
	kfree(b);
735 736

 out:
737 738 739 740
	return err;
}

/* create dir/files for all valid threshold banks */
741
static int threshold_create_device(unsigned int cpu)
742
{
J
Jacob Shin 已提交
743
	unsigned int bank;
744
	struct threshold_bank **bp;
745 746
	int err = 0;

747 748 749 750 751 752 753 754
	bp = kzalloc(sizeof(struct threshold_bank *) * mca_cfg.banks,
		     GFP_KERNEL);
	if (!bp)
		return -ENOMEM;

	per_cpu(threshold_banks, cpu) = bp;

	for (bank = 0; bank < mca_cfg.banks; ++bank) {
755
		if (!(per_cpu(bank_map, cpu) & (1 << bank)))
756 757 758
			continue;
		err = threshold_create_bank(cpu, bank);
		if (err)
759
			return err;
760
	}
761

762 763 764
	return err;
}

765
static void deallocate_threshold_block(unsigned int cpu,
766 767 768 769 770 771 772 773 774 775
						 unsigned int bank)
{
	struct threshold_block *pos = NULL;
	struct threshold_block *tmp = NULL;
	struct threshold_bank *head = per_cpu(threshold_banks, cpu)[bank];

	if (!head)
		return;

	list_for_each_entry_safe(pos, tmp, &head->blocks->miscj, miscj) {
776
		kobject_put(&pos->kobj);
777 778 779 780 781 782 783 784
		list_del(&pos->miscj);
		kfree(pos);
	}

	kfree(per_cpu(threshold_banks, cpu)[bank]->blocks);
	per_cpu(threshold_banks, cpu)[bank]->blocks = NULL;
}

785 786 787 788 789 790 791 792 793 794 795
static void __threshold_remove_blocks(struct threshold_bank *b)
{
	struct threshold_block *pos = NULL;
	struct threshold_block *tmp = NULL;

	kobject_del(b->kobj);

	list_for_each_entry_safe(pos, tmp, &b->blocks->miscj, miscj)
		kobject_del(&pos->kobj);
}

796
static void threshold_remove_bank(unsigned int cpu, int bank)
797
{
798
	struct amd_northbridge *nb;
799 800 801 802 803
	struct threshold_bank *b;

	b = per_cpu(threshold_banks, cpu)[bank];
	if (!b)
		return;
804

805 806 807
	if (!b->blocks)
		goto free_out;

808
	if (is_shared_bank(bank)) {
809 810 811 812 813 814 815 816 817 818 819 820 821 822
		if (!atomic_dec_and_test(&b->cpus)) {
			__threshold_remove_blocks(b);
			per_cpu(threshold_banks, cpu)[bank] = NULL;
			return;
		} else {
			/*
			 * the last CPU on this node using the shared bank is
			 * going away, remove that bank now.
			 */
			nb = node_to_amd_nb(amd_get_nb_id(cpu));
			nb->bank4 = NULL;
		}
	}

823 824 825
	deallocate_threshold_block(cpu, bank);

free_out:
826
	kobject_del(b->kobj);
827
	kobject_put(b->kobj);
828 829
	kfree(b);
	per_cpu(threshold_banks, cpu)[bank] = NULL;
830 831
}

832
static void threshold_remove_device(unsigned int cpu)
833
{
J
Jacob Shin 已提交
834
	unsigned int bank;
835

836
	for (bank = 0; bank < mca_cfg.banks; ++bank) {
837
		if (!(per_cpu(bank_map, cpu) & (1 << bank)))
838 839 840
			continue;
		threshold_remove_bank(cpu, bank);
	}
841
	kfree(per_cpu(threshold_banks, cpu));
842 843 844
}

/* get notified when a cpu comes on/off */
845
static void
I
Ingo Molnar 已提交
846
amd_64_threshold_cpu_callback(unsigned long action, unsigned int cpu)
847 848 849
{
	switch (action) {
	case CPU_ONLINE:
850
	case CPU_ONLINE_FROZEN:
851 852 853
		threshold_create_device(cpu);
		break;
	case CPU_DEAD:
854
	case CPU_DEAD_FROZEN:
855 856 857 858 859 860 861 862 863
		threshold_remove_device(cpu);
		break;
	default:
		break;
	}
}

static __init int threshold_init_device(void)
{
J
Jacob Shin 已提交
864
	unsigned lcpu = 0;
865 866 867

	/* to hit CPUs online before the notifier is up */
	for_each_online_cpu(lcpu) {
868
		int err = threshold_create_device(lcpu);
I
Ingo Molnar 已提交
869

870
		if (err)
871
			return err;
872
	}
873
	threshold_cpu_callback = amd_64_threshold_cpu_callback;
I
Ingo Molnar 已提交
874

875
	return 0;
876
}
877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897
/*
 * there are 3 funcs which need to be _initcalled in a logic sequence:
 * 1. xen_late_init_mcelog
 * 2. mcheck_init_device
 * 3. threshold_init_device
 *
 * xen_late_init_mcelog must register xen_mce_chrdev_device before
 * native mce_chrdev_device registration if running under xen platform;
 *
 * mcheck_init_device should be inited before threshold_init_device to
 * initialize mce_device, otherwise a NULL ptr dereference will cause panic.
 *
 * so we use following _initcalls
 * 1. device_initcall(xen_late_init_mcelog);
 * 2. device_initcall_sync(mcheck_init_device);
 * 3. late_initcall(threshold_init_device);
 *
 * when running under xen, the initcall order is 1,2,3;
 * on baremetal, we skip 1 and we do only 2 and 3.
 */
late_initcall(threshold_init_device);