mce_amd.c 20.5 KB
Newer Older
1
/*
B
Borislav Petkov 已提交
2
 *  (c) 2005-2015 Advanced Micro Devices, Inc.
3 4 5 6 7
 *  Your use of this code is subject to the terms and conditions of the
 *  GNU general public license version 2. See "COPYING" or
 *  http://www.gnu.org/licenses/gpl.html
 *
 *  Written by Jacob Shin - AMD, Inc.
8
 *  Maintained by: Borislav Petkov <bp@alien8.de>
9
 *
B
Borislav Petkov 已提交
10
 *  All MC4_MISCi registers are shared between cores on a node.
11 12 13
 */
#include <linux/interrupt.h>
#include <linux/notifier.h>
I
Ingo Molnar 已提交
14
#include <linux/kobject.h>
15
#include <linux/percpu.h>
I
Ingo Molnar 已提交
16 17
#include <linux/errno.h>
#include <linux/sched.h>
18
#include <linux/sysfs.h>
19
#include <linux/slab.h>
I
Ingo Molnar 已提交
20 21 22 23
#include <linux/init.h>
#include <linux/cpu.h>
#include <linux/smp.h>

24
#include <asm/amd_nb.h>
25
#include <asm/apic.h>
I
Ingo Molnar 已提交
26
#include <asm/idle.h>
27 28
#include <asm/mce.h>
#include <asm/msr.h>
29
#include <asm/trace/irq_vectors.h>
30

31
#define NR_BLOCKS         5
J
Jacob Shin 已提交
32 33 34
#define THRESHOLD_MAX     0xFFF
#define INT_TYPE_APIC     0x00020000
#define MASK_VALID_HI     0x80000000
35 36
#define MASK_CNTP_HI      0x40000000
#define MASK_LOCKED_HI    0x20000000
J
Jacob Shin 已提交
37 38 39 40
#define MASK_LVTOFF_HI    0x00F00000
#define MASK_COUNT_EN_HI  0x00080000
#define MASK_INT_TYPE_HI  0x00060000
#define MASK_OVERFLOW_HI  0x00010000
41
#define MASK_ERR_COUNT_HI 0x00000FFF
42 43
#define MASK_BLKPTR_LO    0xFF000000
#define MCG_XBLK_ADDR     0xC0000400
44

45 46 47 48 49 50 51
/* Deferred error settings */
#define MSR_CU_DEF_ERR		0xC0000410
#define MASK_DEF_LVTOFF		0x000000F0
#define MASK_DEF_INT_TYPE	0x00000006
#define DEF_LVT_OFF		0x2
#define DEF_INT_TYPE_APIC	0x2

52 53 54 55 56
/* Scalable MCA: */

/* Threshold LVT offset is at MSR0xC0000410[15:12] */
#define SMCA_THR_LVT_OFF	0xF000

57 58 59 60 61 62 63 64 65
static const char * const th_names[] = {
	"load_store",
	"insn_fetch",
	"combined_unit",
	"",
	"northbridge",
	"execution_unit",
};

66
static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
67 68
static DEFINE_PER_CPU(unsigned char, bank_map);	/* see which banks are on */

69
static void amd_threshold_interrupt(void);
70 71 72 73 74 75 76
static void amd_deferred_error_interrupt(void);

static void default_deferred_error_interrupt(void)
{
	pr_err("Unexpected deferred interrupt at vector %x\n", DEFERRED_ERROR_VECTOR);
}
void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt;
77

78 79 80 81
/*
 * CPU Initialization
 */

82
struct thresh_restart {
I
Ingo Molnar 已提交
83 84
	struct threshold_block	*b;
	int			reset;
85 86
	int			set_lvt_off;
	int			lvt_off;
I
Ingo Molnar 已提交
87
	u16			old_limit;
88 89
};

90 91
static inline bool is_shared_bank(int bank)
{
92 93 94 95 96 97 98
	/*
	 * Scalable MCA provides for only one core to have access to the MSRs of
	 * a shared bank.
	 */
	if (mce_flags.smca)
		return false;

99 100 101 102
	/* Bank 4 is for northbridge reporting and is thus shared */
	return (bank == 4);
}

103
static const char *bank4_names(const struct threshold_block *b)
104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
{
	switch (b->address) {
	/* MSR4_MISC0 */
	case 0x00000413:
		return "dram";

	case 0xc0000408:
		return "ht_links";

	case 0xc0000409:
		return "l3_cache";

	default:
		WARN(1, "Funny MSR: 0x%08x\n", b->address);
		return "";
	}
};


123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
static bool lvt_interrupt_supported(unsigned int bank, u32 msr_high_bits)
{
	/*
	 * bank 4 supports APIC LVT interrupts implicitly since forever.
	 */
	if (bank == 4)
		return true;

	/*
	 * IntP: interrupt present; if this bit is set, the thresholding
	 * bank can generate APIC LVT interrupts
	 */
	return msr_high_bits & BIT(28);
}

138 139 140 141 142 143 144 145 146 147 148 149
static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
{
	int msr = (hi & MASK_LVTOFF_HI) >> 20;

	if (apic < 0) {
		pr_err(FW_BUG "cpu %d, failed to setup threshold interrupt "
		       "for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu,
		       b->bank, b->block, b->address, hi, lo);
		return 0;
	}

	if (apic != msr) {
150 151 152 153 154 155 156 157
		/*
		 * On SMCA CPUs, LVT offset is programmed at a different MSR, and
		 * the BIOS provides the value. The original field where LVT offset
		 * was set is reserved. Return early here:
		 */
		if (mce_flags.smca)
			return 0;

158 159 160 161 162 163 164 165 166
		pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d "
		       "for bank %d, block %d (MSR%08X=0x%x%08x)\n",
		       b->cpu, apic, b->bank, b->block, b->address, hi, lo);
		return 0;
	}

	return 1;
};

167 168 169 170
/*
 * Called via smp_call_function_single(), must be called with correct
 * cpu affinity.
 */
171
static void threshold_restart_bank(void *_tr)
172
{
173
	struct thresh_restart *tr = _tr;
174
	u32 hi, lo;
175

176
	rdmsr(tr->b->address, lo, hi);
177

178
	if (tr->b->threshold_limit < (hi & THRESHOLD_MAX))
179
		tr->reset = 1;	/* limit cannot be lower than err count */
180

181
	if (tr->reset) {		/* reset err count and overflow bit */
182 183
		hi =
		    (hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
184 185
		    (THRESHOLD_MAX - tr->b->threshold_limit);
	} else if (tr->old_limit) {	/* change limit w/o reset */
186
		int new_count = (hi & THRESHOLD_MAX) +
187
		    (tr->old_limit - tr->b->threshold_limit);
I
Ingo Molnar 已提交
188

189
		hi = (hi & ~MASK_ERR_COUNT_HI) |
190 191 192
		    (new_count & THRESHOLD_MAX);
	}

193 194 195 196 197 198
	/* clear IntType */
	hi &= ~MASK_INT_TYPE_HI;

	if (!tr->b->interrupt_capable)
		goto done;

199
	if (tr->set_lvt_off) {
200 201 202 203 204
		if (lvt_off_valid(tr->b, tr->lvt_off, lo, hi)) {
			/* set new lvt offset */
			hi &= ~MASK_LVTOFF_HI;
			hi |= tr->lvt_off << 20;
		}
205 206
	}

207 208 209 210
	if (tr->b->interrupt_enable)
		hi |= INT_TYPE_APIC;

 done:
211

212 213
	hi |= MASK_COUNT_EN_HI;
	wrmsr(tr->b->address, lo, hi);
214 215
}

216 217 218 219 220 221 222 223 224 225 226 227
static void mce_threshold_block_init(struct threshold_block *b, int offset)
{
	struct thresh_restart tr = {
		.b			= b,
		.set_lvt_off		= 1,
		.lvt_off		= offset,
	};

	b->threshold_limit		= THRESHOLD_MAX;
	threshold_restart_bank(&tr);
};

228
static int setup_APIC_mce_threshold(int reserved, int new)
229 230 231 232 233 234 235 236
{
	if (reserved < 0 && !setup_APIC_eilvt(new, THRESHOLD_APIC_VECTOR,
					      APIC_EILVT_MSG_FIX, 0))
		return new;

	return reserved;
}

237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269
static int setup_APIC_deferred_error(int reserved, int new)
{
	if (reserved < 0 && !setup_APIC_eilvt(new, DEFERRED_ERROR_VECTOR,
					      APIC_EILVT_MSG_FIX, 0))
		return new;

	return reserved;
}

static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
{
	u32 low = 0, high = 0;
	int def_offset = -1, def_new;

	if (rdmsr_safe(MSR_CU_DEF_ERR, &low, &high))
		return;

	def_new = (low & MASK_DEF_LVTOFF) >> 4;
	if (!(low & MASK_DEF_LVTOFF)) {
		pr_err(FW_BUG "Your BIOS is not setting up LVT offset 0x2 for deferred error IRQs correctly.\n");
		def_new = DEF_LVT_OFF;
		low = (low & ~MASK_DEF_LVTOFF) | (DEF_LVT_OFF << 4);
	}

	def_offset = setup_APIC_deferred_error(def_offset, def_new);
	if ((def_offset == def_new) &&
	    (deferred_error_int_vector != amd_deferred_error_interrupt))
		deferred_error_int_vector = amd_deferred_error_interrupt;

	low = (low & ~MASK_DEF_INT_TYPE) | DEF_INT_TYPE_APIC;
	wrmsr(MSR_CU_DEF_ERR, low, high);
}

270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316
static int
prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
			int offset, u32 misc_high)
{
	unsigned int cpu = smp_processor_id();
	struct threshold_block b;
	int new;

	if (!block)
		per_cpu(bank_map, cpu) |= (1 << bank);

	memset(&b, 0, sizeof(b));
	b.cpu			= cpu;
	b.bank			= bank;
	b.block			= block;
	b.address		= addr;
	b.interrupt_capable	= lvt_interrupt_supported(bank, misc_high);

	if (!b.interrupt_capable)
		goto done;

	b.interrupt_enable = 1;

	if (mce_flags.smca) {
		u32 smca_low, smca_high;

		/* Gather LVT offset for thresholding: */
		if (rdmsr_safe(MSR_CU_DEF_ERR, &smca_low, &smca_high))
			goto out;

		new = (smca_low & SMCA_THR_LVT_OFF) >> 12;
	} else {
		new = (misc_high & MASK_LVTOFF_HI) >> 20;
	}

	offset = setup_APIC_mce_threshold(offset, new);

	if ((offset == new) && (mce_threshold_vector != amd_threshold_interrupt))
		mce_threshold_vector = amd_threshold_interrupt;

done:
	mce_threshold_block_init(&b, offset);

out:
	return offset;
}

317
/* cpu init entry point, called from mce.c with preempt off */
318
void mce_amd_feature_init(struct cpuinfo_x86 *c)
319
{
320
	u32 low = 0, high = 0, address = 0;
I
Ingo Molnar 已提交
321
	unsigned int bank, block;
322
	int offset = -1;
323

324
	for (bank = 0; bank < mca_cfg.banks; ++bank) {
325 326
		for (block = 0; block < NR_BLOCKS; ++block) {
			if (block == 0)
327
				address = MSR_IA32_MCx_MISC(bank);
328 329 330 331
			else if (block == 1) {
				address = (low & MASK_BLKPTR_LO) >> 21;
				if (!address)
					break;
332

333
				address += MCG_XBLK_ADDR;
I
Ingo Molnar 已提交
334
			} else
335 336 337
				++address;

			if (rdmsr_safe(address, &low, &high))
338
				break;
339

340 341
			if (!(high & MASK_VALID_HI))
				continue;
342

343 344
			if (!(high & MASK_CNTP_HI)  ||
			     (high & MASK_LOCKED_HI))
345 346
				continue;

347
			offset = prepare_threshold_block(bank, block, address, offset, high);
348
		}
349
	}
350 351 352

	if (mce_flags.succor)
		deferred_error_interrupt_enable(c);
353 354
}

355 356 357 358 359 360 361 362 363 364 365 366 367
static void __log_error(unsigned int bank, bool threshold_err, u64 misc)
{
	struct mce m;
	u64 status;

	rdmsrl(MSR_IA32_MCx_STATUS(bank), status);
	if (!(status & MCI_STATUS_VAL))
		return;

	mce_setup(&m);

	m.status = status;
	m.bank = bank;
368

369 370 371
	if (threshold_err)
		m.misc = misc;

372 373
	if (m.status & MCI_STATUS_ADDRV)
		rdmsrl(MSR_IA32_MCx_ADDR(bank), m.addr);
374

375
	mce_log(&m);
376 377 378
	wrmsrl(MSR_IA32_MCx_STATUS(bank), 0);
}

379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418
static inline void __smp_deferred_error_interrupt(void)
{
	inc_irq_stat(irq_deferred_error_count);
	deferred_error_int_vector();
}

asmlinkage __visible void smp_deferred_error_interrupt(void)
{
	entering_irq();
	__smp_deferred_error_interrupt();
	exiting_ack_irq();
}

asmlinkage __visible void smp_trace_deferred_error_interrupt(void)
{
	entering_irq();
	trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR);
	__smp_deferred_error_interrupt();
	trace_deferred_error_apic_exit(DEFERRED_ERROR_VECTOR);
	exiting_ack_irq();
}

/* APIC interrupt handler for deferred errors */
static void amd_deferred_error_interrupt(void)
{
	u64 status;
	unsigned int bank;

	for (bank = 0; bank < mca_cfg.banks; ++bank) {
		rdmsrl(MSR_IA32_MCx_STATUS(bank), status);

		if (!(status & MCI_STATUS_VAL) ||
		    !(status & MCI_STATUS_DEFERRED))
			continue;

		__log_error(bank, false, 0);
		break;
	}
}

419 420 421 422 423 424 425 426 427
/*
 * APIC Interrupt Handler
 */

/*
 * threshold interrupt handler will service THRESHOLD_APIC_VECTOR.
 * the interrupt goes off when error_count reaches threshold_limit.
 * the handler will simply log mcelog w/ software defined bank number.
 */
428

429
static void amd_threshold_interrupt(void)
430
{
I
Ingo Molnar 已提交
431
	u32 low = 0, high = 0, address = 0;
432
	int cpu = smp_processor_id();
433
	unsigned int bank, block;
434 435

	/* assume first bank caused it */
436
	for (bank = 0; bank < mca_cfg.banks; ++bank) {
437
		if (!(per_cpu(bank_map, cpu) & (1 << bank)))
438
			continue;
439
		for (block = 0; block < NR_BLOCKS; ++block) {
I
Ingo Molnar 已提交
440
			if (block == 0) {
441
				address = MSR_IA32_MCx_MISC(bank);
I
Ingo Molnar 已提交
442
			} else if (block == 1) {
443 444 445 446
				address = (low & MASK_BLKPTR_LO) >> 21;
				if (!address)
					break;
				address += MCG_XBLK_ADDR;
I
Ingo Molnar 已提交
447
			} else {
448
				++address;
I
Ingo Molnar 已提交
449
			}
450 451

			if (rdmsr_safe(address, &low, &high))
452
				break;
453 454 455 456 457 458 459 460

			if (!(high & MASK_VALID_HI)) {
				if (block)
					continue;
				else
					break;
			}

461 462
			if (!(high & MASK_CNTP_HI)  ||
			     (high & MASK_LOCKED_HI))
463 464
				continue;

I
Ingo Molnar 已提交
465 466 467 468
			/*
			 * Log the machine check that caused the threshold
			 * event.
			 */
469 470
			if (high & MASK_OVERFLOW_HI)
				goto log;
471 472
		}
	}
473 474 475
	return;

log:
476
	__log_error(bank, true, ((u64)high << 32) | low);
477 478 479 480 481 482 483
}

/*
 * Sysfs Interface
 */

struct threshold_attr {
J
Jacob Shin 已提交
484
	struct attribute attr;
I
Ingo Molnar 已提交
485 486
	ssize_t (*show) (struct threshold_block *, char *);
	ssize_t (*store) (struct threshold_block *, const char *, size_t count);
487 488
};

I
Ingo Molnar 已提交
489 490 491
#define SHOW_FIELDS(name)						\
static ssize_t show_ ## name(struct threshold_block *b, char *buf)	\
{									\
492
	return sprintf(buf, "%lu\n", (unsigned long) b->name);		\
J
Jacob Shin 已提交
493
}
494 495 496
SHOW_FIELDS(interrupt_enable)
SHOW_FIELDS(threshold_limit)

I
Ingo Molnar 已提交
497
static ssize_t
H
Hidetoshi Seto 已提交
498
store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size)
499
{
500
	struct thresh_restart tr;
I
Ingo Molnar 已提交
501 502
	unsigned long new;

503 504 505
	if (!b->interrupt_capable)
		return -EINVAL;

506
	if (kstrtoul(buf, 0, &new) < 0)
507
		return -EINVAL;
I
Ingo Molnar 已提交
508

509 510
	b->interrupt_enable = !!new;

511
	memset(&tr, 0, sizeof(tr));
I
Ingo Molnar 已提交
512 513
	tr.b		= b;

514
	smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
515

H
Hidetoshi Seto 已提交
516
	return size;
517 518
}

I
Ingo Molnar 已提交
519
static ssize_t
H
Hidetoshi Seto 已提交
520
store_threshold_limit(struct threshold_block *b, const char *buf, size_t size)
521
{
522
	struct thresh_restart tr;
I
Ingo Molnar 已提交
523 524
	unsigned long new;

525
	if (kstrtoul(buf, 0, &new) < 0)
526
		return -EINVAL;
I
Ingo Molnar 已提交
527

528 529 530 531
	if (new > THRESHOLD_MAX)
		new = THRESHOLD_MAX;
	if (new < 1)
		new = 1;
I
Ingo Molnar 已提交
532

533
	memset(&tr, 0, sizeof(tr));
534
	tr.old_limit = b->threshold_limit;
535
	b->threshold_limit = new;
536
	tr.b = b;
537

538
	smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
539

H
Hidetoshi Seto 已提交
540
	return size;
541 542
}

543 544
static ssize_t show_error_count(struct threshold_block *b, char *buf)
{
545 546 547
	u32 lo, hi;

	rdmsr_on_cpu(b->cpu, b->address, &lo, &hi);
548

549 550
	return sprintf(buf, "%u\n", ((hi & THRESHOLD_MAX) -
				     (THRESHOLD_MAX - b->threshold_limit)));
551 552
}

553 554 555 556
static struct threshold_attr error_count = {
	.attr = {.name = __stringify(error_count), .mode = 0444 },
	.show = show_error_count,
};
557

558 559 560 561 562
#define RW_ATTR(val)							\
static struct threshold_attr val = {					\
	.attr	= {.name = __stringify(val), .mode = 0644 },		\
	.show	= show_## val,						\
	.store	= store_## val,						\
563 564
};

J
Jacob Shin 已提交
565 566
RW_ATTR(interrupt_enable);
RW_ATTR(threshold_limit);
567 568 569 570

static struct attribute *default_attrs[] = {
	&threshold_limit.attr,
	&error_count.attr,
571 572
	NULL,	/* possibly interrupt_enable if supported, see below */
	NULL,
573 574
};

I
Ingo Molnar 已提交
575 576
#define to_block(k)	container_of(k, struct threshold_block, kobj)
#define to_attr(a)	container_of(a, struct threshold_attr, attr)
577 578 579

static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
{
580
	struct threshold_block *b = to_block(kobj);
581 582
	struct threshold_attr *a = to_attr(attr);
	ssize_t ret;
I
Ingo Molnar 已提交
583

584
	ret = a->show ? a->show(b, buf) : -EIO;
I
Ingo Molnar 已提交
585

586 587 588 589 590 591
	return ret;
}

static ssize_t store(struct kobject *kobj, struct attribute *attr,
		     const char *buf, size_t count)
{
592
	struct threshold_block *b = to_block(kobj);
593 594
	struct threshold_attr *a = to_attr(attr);
	ssize_t ret;
I
Ingo Molnar 已提交
595

596
	ret = a->store ? a->store(b, buf, count) : -EIO;
I
Ingo Molnar 已提交
597

598 599 600
	return ret;
}

601
static const struct sysfs_ops threshold_ops = {
I
Ingo Molnar 已提交
602 603
	.show			= show,
	.store			= store,
604 605 606
};

static struct kobj_type threshold_ktype = {
I
Ingo Molnar 已提交
607 608
	.sysfs_ops		= &threshold_ops,
	.default_attrs		= default_attrs,
609 610
};

611 612
static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
				     unsigned int block, u32 address)
613 614
{
	struct threshold_block *b = NULL;
I
Ingo Molnar 已提交
615 616
	u32 low, high;
	int err;
617

618
	if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS))
619 620
		return 0;

621
	if (rdmsr_safe_on_cpu(cpu, address, &low, &high))
622
		return 0;
623 624 625 626 627 628 629 630

	if (!(high & MASK_VALID_HI)) {
		if (block)
			goto recurse;
		else
			return 0;
	}

631 632
	if (!(high & MASK_CNTP_HI)  ||
	     (high & MASK_LOCKED_HI))
633 634 635 636 637 638
		goto recurse;

	b = kzalloc(sizeof(struct threshold_block), GFP_KERNEL);
	if (!b)
		return -ENOMEM;

I
Ingo Molnar 已提交
639 640 641 642 643
	b->block		= block;
	b->bank			= bank;
	b->cpu			= cpu;
	b->address		= address;
	b->interrupt_enable	= 0;
644
	b->interrupt_capable	= lvt_interrupt_supported(bank, high);
I
Ingo Molnar 已提交
645
	b->threshold_limit	= THRESHOLD_MAX;
646

647
	if (b->interrupt_capable) {
648
		threshold_ktype.default_attrs[2] = &interrupt_enable.attr;
649 650
		b->interrupt_enable = 1;
	} else {
651
		threshold_ktype.default_attrs[2] = NULL;
652
	}
653

654 655
	INIT_LIST_HEAD(&b->miscj);

I
Ingo Molnar 已提交
656
	if (per_cpu(threshold_banks, cpu)[bank]->blocks) {
657 658
		list_add(&b->miscj,
			 &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj);
I
Ingo Molnar 已提交
659
	} else {
660
		per_cpu(threshold_banks, cpu)[bank]->blocks = b;
I
Ingo Molnar 已提交
661
	}
662

663 664
	err = kobject_init_and_add(&b->kobj, &threshold_ktype,
				   per_cpu(threshold_banks, cpu)[bank]->kobj,
665
				   (bank == 4 ? bank4_names(b) : th_names[bank]));
666 667 668 669 670 671 672 673
	if (err)
		goto out_free;
recurse:
	if (!block) {
		address = (low & MASK_BLKPTR_LO) >> 21;
		if (!address)
			return 0;
		address += MCG_XBLK_ADDR;
I
Ingo Molnar 已提交
674
	} else {
675
		++address;
I
Ingo Molnar 已提交
676
	}
677 678 679 680 681

	err = allocate_threshold_blocks(cpu, bank, ++block, address);
	if (err)
		goto out_free;

682 683
	if (b)
		kobject_uevent(&b->kobj, KOBJ_ADD);
684

685 686 687 688
	return err;

out_free:
	if (b) {
689
		kobject_put(&b->kobj);
690
		list_del(&b->miscj);
691 692 693 694 695
		kfree(b);
	}
	return err;
}

696
static int __threshold_add_blocks(struct threshold_bank *b)
697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719
{
	struct list_head *head = &b->blocks->miscj;
	struct threshold_block *pos = NULL;
	struct threshold_block *tmp = NULL;
	int err = 0;

	err = kobject_add(&b->blocks->kobj, b->kobj, b->blocks->kobj.name);
	if (err)
		return err;

	list_for_each_entry_safe(pos, tmp, head, miscj) {

		err = kobject_add(&pos->kobj, b->kobj, pos->kobj.name);
		if (err) {
			list_for_each_entry_safe_reverse(pos, tmp, head, miscj)
				kobject_del(&pos->kobj);

			return err;
		}
	}
	return err;
}

720
static int threshold_create_bank(unsigned int cpu, unsigned int bank)
721
{
722
	struct device *dev = per_cpu(mce_device, cpu);
723
	struct amd_northbridge *nb = NULL;
724
	struct threshold_bank *b = NULL;
725
	const char *name = th_names[bank];
726
	int err = 0;
727

728
	if (is_shared_bank(bank)) {
729 730 731
		nb = node_to_amd_nb(amd_get_nb_id(cpu));

		/* threshold descriptor already initialized on this node? */
732
		if (nb && nb->bank4) {
733 734 735 736 737 738 739 740 741 742 743 744 745 746 747
			/* yes, use it */
			b = nb->bank4;
			err = kobject_add(b->kobj, &dev->kobj, name);
			if (err)
				goto out;

			per_cpu(threshold_banks, cpu)[bank] = b;
			atomic_inc(&b->cpus);

			err = __threshold_add_blocks(b);

			goto out;
		}
	}

748
	b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL);
749 750 751 752 753
	if (!b) {
		err = -ENOMEM;
		goto out;
	}

754
	b->kobj = kobject_create_and_add(name, &dev->kobj);
755 756
	if (!b->kobj) {
		err = -EINVAL;
757
		goto out_free;
758
	}
759

760
	per_cpu(threshold_banks, cpu)[bank] = b;
761

762
	if (is_shared_bank(bank)) {
763 764 765
		atomic_set(&b->cpus, 1);

		/* nb is already initialized, see above */
766 767 768 769
		if (nb) {
			WARN_ON(nb->bank4);
			nb->bank4 = b;
		}
770 771
	}

772
	err = allocate_threshold_blocks(cpu, bank, 0, MSR_IA32_MCx_MISC(bank));
773 774
	if (!err)
		goto out;
775

776
 out_free:
777
	kfree(b);
778 779

 out:
780 781 782 783
	return err;
}

/* create dir/files for all valid threshold banks */
784
static int threshold_create_device(unsigned int cpu)
785
{
J
Jacob Shin 已提交
786
	unsigned int bank;
787
	struct threshold_bank **bp;
788 789
	int err = 0;

790 791 792 793 794 795 796 797
	bp = kzalloc(sizeof(struct threshold_bank *) * mca_cfg.banks,
		     GFP_KERNEL);
	if (!bp)
		return -ENOMEM;

	per_cpu(threshold_banks, cpu) = bp;

	for (bank = 0; bank < mca_cfg.banks; ++bank) {
798
		if (!(per_cpu(bank_map, cpu) & (1 << bank)))
799 800 801
			continue;
		err = threshold_create_bank(cpu, bank);
		if (err)
802
			return err;
803
	}
804

805 806 807
	return err;
}

808
static void deallocate_threshold_block(unsigned int cpu,
809 810 811 812 813 814 815 816 817 818
						 unsigned int bank)
{
	struct threshold_block *pos = NULL;
	struct threshold_block *tmp = NULL;
	struct threshold_bank *head = per_cpu(threshold_banks, cpu)[bank];

	if (!head)
		return;

	list_for_each_entry_safe(pos, tmp, &head->blocks->miscj, miscj) {
819
		kobject_put(&pos->kobj);
820 821 822 823 824 825 826 827
		list_del(&pos->miscj);
		kfree(pos);
	}

	kfree(per_cpu(threshold_banks, cpu)[bank]->blocks);
	per_cpu(threshold_banks, cpu)[bank]->blocks = NULL;
}

828 829 830 831 832 833 834 835 836 837 838
static void __threshold_remove_blocks(struct threshold_bank *b)
{
	struct threshold_block *pos = NULL;
	struct threshold_block *tmp = NULL;

	kobject_del(b->kobj);

	list_for_each_entry_safe(pos, tmp, &b->blocks->miscj, miscj)
		kobject_del(&pos->kobj);
}

839
static void threshold_remove_bank(unsigned int cpu, int bank)
840
{
841
	struct amd_northbridge *nb;
842 843 844 845 846
	struct threshold_bank *b;

	b = per_cpu(threshold_banks, cpu)[bank];
	if (!b)
		return;
847

848 849 850
	if (!b->blocks)
		goto free_out;

851
	if (is_shared_bank(bank)) {
852 853 854 855 856 857 858 859 860 861 862 863 864 865
		if (!atomic_dec_and_test(&b->cpus)) {
			__threshold_remove_blocks(b);
			per_cpu(threshold_banks, cpu)[bank] = NULL;
			return;
		} else {
			/*
			 * the last CPU on this node using the shared bank is
			 * going away, remove that bank now.
			 */
			nb = node_to_amd_nb(amd_get_nb_id(cpu));
			nb->bank4 = NULL;
		}
	}

866 867 868
	deallocate_threshold_block(cpu, bank);

free_out:
869
	kobject_del(b->kobj);
870
	kobject_put(b->kobj);
871 872
	kfree(b);
	per_cpu(threshold_banks, cpu)[bank] = NULL;
873 874
}

875
static void threshold_remove_device(unsigned int cpu)
876
{
J
Jacob Shin 已提交
877
	unsigned int bank;
878

879
	for (bank = 0; bank < mca_cfg.banks; ++bank) {
880
		if (!(per_cpu(bank_map, cpu) & (1 << bank)))
881 882 883
			continue;
		threshold_remove_bank(cpu, bank);
	}
884
	kfree(per_cpu(threshold_banks, cpu));
885 886 887
}

/* get notified when a cpu comes on/off */
888
static void
I
Ingo Molnar 已提交
889
amd_64_threshold_cpu_callback(unsigned long action, unsigned int cpu)
890 891 892
{
	switch (action) {
	case CPU_ONLINE:
893
	case CPU_ONLINE_FROZEN:
894 895 896
		threshold_create_device(cpu);
		break;
	case CPU_DEAD:
897
	case CPU_DEAD_FROZEN:
898 899 900 901 902 903 904 905 906
		threshold_remove_device(cpu);
		break;
	default:
		break;
	}
}

static __init int threshold_init_device(void)
{
J
Jacob Shin 已提交
907
	unsigned lcpu = 0;
908 909 910

	/* to hit CPUs online before the notifier is up */
	for_each_online_cpu(lcpu) {
911
		int err = threshold_create_device(lcpu);
I
Ingo Molnar 已提交
912

913
		if (err)
914
			return err;
915
	}
916
	threshold_cpu_callback = amd_64_threshold_cpu_callback;
I
Ingo Molnar 已提交
917

918
	return 0;
919
}
920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940
/*
 * there are 3 funcs which need to be _initcalled in a logic sequence:
 * 1. xen_late_init_mcelog
 * 2. mcheck_init_device
 * 3. threshold_init_device
 *
 * xen_late_init_mcelog must register xen_mce_chrdev_device before
 * native mce_chrdev_device registration if running under xen platform;
 *
 * mcheck_init_device should be inited before threshold_init_device to
 * initialize mce_device, otherwise a NULL ptr dereference will cause panic.
 *
 * so we use following _initcalls
 * 1. device_initcall(xen_late_init_mcelog);
 * 2. device_initcall_sync(mcheck_init_device);
 * 3. late_initcall(threshold_init_device);
 *
 * when running under xen, the initcall order is 1,2,3;
 * on baremetal, we skip 1 and we do only 2 and 3.
 */
late_initcall(threshold_init_device);