mce_amd.c 20.3 KB
Newer Older
1
/*
B
Borislav Petkov 已提交
2
 *  (c) 2005-2015 Advanced Micro Devices, Inc.
3 4 5 6 7
 *  Your use of this code is subject to the terms and conditions of the
 *  GNU general public license version 2. See "COPYING" or
 *  http://www.gnu.org/licenses/gpl.html
 *
 *  Written by Jacob Shin - AMD, Inc.
8
 *  Maintained by: Borislav Petkov <bp@alien8.de>
9
 *
B
Borislav Petkov 已提交
10
 *  All MC4_MISCi registers are shared between cores on a node.
11 12 13
 */
#include <linux/interrupt.h>
#include <linux/notifier.h>
I
Ingo Molnar 已提交
14
#include <linux/kobject.h>
15
#include <linux/percpu.h>
I
Ingo Molnar 已提交
16 17
#include <linux/errno.h>
#include <linux/sched.h>
18
#include <linux/sysfs.h>
19
#include <linux/slab.h>
I
Ingo Molnar 已提交
20 21 22 23
#include <linux/init.h>
#include <linux/cpu.h>
#include <linux/smp.h>

24
#include <asm/amd_nb.h>
25
#include <asm/apic.h>
I
Ingo Molnar 已提交
26
#include <asm/idle.h>
27 28
#include <asm/mce.h>
#include <asm/msr.h>
29
#include <asm/trace/irq_vectors.h>
30

31
#define NR_BLOCKS         5
J
Jacob Shin 已提交
32 33 34
#define THRESHOLD_MAX     0xFFF
#define INT_TYPE_APIC     0x00020000
#define MASK_VALID_HI     0x80000000
35 36
#define MASK_CNTP_HI      0x40000000
#define MASK_LOCKED_HI    0x20000000
J
Jacob Shin 已提交
37 38 39 40
#define MASK_LVTOFF_HI    0x00F00000
#define MASK_COUNT_EN_HI  0x00080000
#define MASK_INT_TYPE_HI  0x00060000
#define MASK_OVERFLOW_HI  0x00010000
41
#define MASK_ERR_COUNT_HI 0x00000FFF
42 43
#define MASK_BLKPTR_LO    0xFF000000
#define MCG_XBLK_ADDR     0xC0000400
44

45 46 47 48 49 50 51
/* Deferred error settings */
#define MSR_CU_DEF_ERR		0xC0000410
#define MASK_DEF_LVTOFF		0x000000F0
#define MASK_DEF_INT_TYPE	0x00000006
#define DEF_LVT_OFF		0x2
#define DEF_INT_TYPE_APIC	0x2

52 53 54 55 56
/* Scalable MCA: */

/* Threshold LVT offset is at MSR0xC0000410[15:12] */
#define SMCA_THR_LVT_OFF	0xF000

57 58 59 60 61 62 63 64 65
static const char * const th_names[] = {
	"load_store",
	"insn_fetch",
	"combined_unit",
	"",
	"northbridge",
	"execution_unit",
};

66
static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
67 68
static DEFINE_PER_CPU(unsigned char, bank_map);	/* see which banks are on */

69
static void amd_threshold_interrupt(void);
70 71 72 73 74 75 76
static void amd_deferred_error_interrupt(void);

static void default_deferred_error_interrupt(void)
{
	pr_err("Unexpected deferred interrupt at vector %x\n", DEFERRED_ERROR_VECTOR);
}
void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt;
77

78 79 80 81
/*
 * CPU Initialization
 */

82
struct thresh_restart {
I
Ingo Molnar 已提交
83 84
	struct threshold_block	*b;
	int			reset;
85 86
	int			set_lvt_off;
	int			lvt_off;
I
Ingo Molnar 已提交
87
	u16			old_limit;
88 89
};

90 91
static inline bool is_shared_bank(int bank)
{
92 93 94 95 96 97 98
	/*
	 * Scalable MCA provides for only one core to have access to the MSRs of
	 * a shared bank.
	 */
	if (mce_flags.smca)
		return false;

99 100 101 102
	/* Bank 4 is for northbridge reporting and is thus shared */
	return (bank == 4);
}

103
static const char *bank4_names(const struct threshold_block *b)
104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
{
	switch (b->address) {
	/* MSR4_MISC0 */
	case 0x00000413:
		return "dram";

	case 0xc0000408:
		return "ht_links";

	case 0xc0000409:
		return "l3_cache";

	default:
		WARN(1, "Funny MSR: 0x%08x\n", b->address);
		return "";
	}
};


123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
static bool lvt_interrupt_supported(unsigned int bank, u32 msr_high_bits)
{
	/*
	 * bank 4 supports APIC LVT interrupts implicitly since forever.
	 */
	if (bank == 4)
		return true;

	/*
	 * IntP: interrupt present; if this bit is set, the thresholding
	 * bank can generate APIC LVT interrupts
	 */
	return msr_high_bits & BIT(28);
}

138 139 140 141 142 143 144 145 146 147 148 149
static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
{
	int msr = (hi & MASK_LVTOFF_HI) >> 20;

	if (apic < 0) {
		pr_err(FW_BUG "cpu %d, failed to setup threshold interrupt "
		       "for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu,
		       b->bank, b->block, b->address, hi, lo);
		return 0;
	}

	if (apic != msr) {
150 151 152 153 154 155 156 157
		/*
		 * On SMCA CPUs, LVT offset is programmed at a different MSR, and
		 * the BIOS provides the value. The original field where LVT offset
		 * was set is reserved. Return early here:
		 */
		if (mce_flags.smca)
			return 0;

158 159 160 161 162 163 164 165 166
		pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d "
		       "for bank %d, block %d (MSR%08X=0x%x%08x)\n",
		       b->cpu, apic, b->bank, b->block, b->address, hi, lo);
		return 0;
	}

	return 1;
};

167 168 169 170
/*
 * Called via smp_call_function_single(), must be called with correct
 * cpu affinity.
 */
171
static void threshold_restart_bank(void *_tr)
172
{
173
	struct thresh_restart *tr = _tr;
174
	u32 hi, lo;
175

176
	rdmsr(tr->b->address, lo, hi);
177

178
	if (tr->b->threshold_limit < (hi & THRESHOLD_MAX))
179
		tr->reset = 1;	/* limit cannot be lower than err count */
180

181
	if (tr->reset) {		/* reset err count and overflow bit */
182 183
		hi =
		    (hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
184 185
		    (THRESHOLD_MAX - tr->b->threshold_limit);
	} else if (tr->old_limit) {	/* change limit w/o reset */
186
		int new_count = (hi & THRESHOLD_MAX) +
187
		    (tr->old_limit - tr->b->threshold_limit);
I
Ingo Molnar 已提交
188

189
		hi = (hi & ~MASK_ERR_COUNT_HI) |
190 191 192
		    (new_count & THRESHOLD_MAX);
	}

193 194 195 196 197 198
	/* clear IntType */
	hi &= ~MASK_INT_TYPE_HI;

	if (!tr->b->interrupt_capable)
		goto done;

199
	if (tr->set_lvt_off) {
200 201 202 203 204
		if (lvt_off_valid(tr->b, tr->lvt_off, lo, hi)) {
			/* set new lvt offset */
			hi &= ~MASK_LVTOFF_HI;
			hi |= tr->lvt_off << 20;
		}
205 206
	}

207 208 209 210
	if (tr->b->interrupt_enable)
		hi |= INT_TYPE_APIC;

 done:
211

212 213
	hi |= MASK_COUNT_EN_HI;
	wrmsr(tr->b->address, lo, hi);
214 215
}

216 217 218 219 220 221 222 223 224 225 226 227
static void mce_threshold_block_init(struct threshold_block *b, int offset)
{
	struct thresh_restart tr = {
		.b			= b,
		.set_lvt_off		= 1,
		.lvt_off		= offset,
	};

	b->threshold_limit		= THRESHOLD_MAX;
	threshold_restart_bank(&tr);
};

228
static int setup_APIC_mce_threshold(int reserved, int new)
229 230 231 232 233 234 235 236
{
	if (reserved < 0 && !setup_APIC_eilvt(new, THRESHOLD_APIC_VECTOR,
					      APIC_EILVT_MSG_FIX, 0))
		return new;

	return reserved;
}

237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269
static int setup_APIC_deferred_error(int reserved, int new)
{
	if (reserved < 0 && !setup_APIC_eilvt(new, DEFERRED_ERROR_VECTOR,
					      APIC_EILVT_MSG_FIX, 0))
		return new;

	return reserved;
}

static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
{
	u32 low = 0, high = 0;
	int def_offset = -1, def_new;

	if (rdmsr_safe(MSR_CU_DEF_ERR, &low, &high))
		return;

	def_new = (low & MASK_DEF_LVTOFF) >> 4;
	if (!(low & MASK_DEF_LVTOFF)) {
		pr_err(FW_BUG "Your BIOS is not setting up LVT offset 0x2 for deferred error IRQs correctly.\n");
		def_new = DEF_LVT_OFF;
		low = (low & ~MASK_DEF_LVTOFF) | (DEF_LVT_OFF << 4);
	}

	def_offset = setup_APIC_deferred_error(def_offset, def_new);
	if ((def_offset == def_new) &&
	    (deferred_error_int_vector != amd_deferred_error_interrupt))
		deferred_error_int_vector = amd_deferred_error_interrupt;

	low = (low & ~MASK_DEF_INT_TYPE) | DEF_INT_TYPE_APIC;
	wrmsr(MSR_CU_DEF_ERR, low, high);
}

270
/* cpu init entry point, called from mce.c with preempt off */
271
void mce_amd_feature_init(struct cpuinfo_x86 *c)
272
{
273
	struct threshold_block b;
274
	unsigned int cpu = smp_processor_id();
275
	u32 low = 0, high = 0, address = 0;
I
Ingo Molnar 已提交
276
	unsigned int bank, block;
277
	int offset = -1, new;
278

279
	for (bank = 0; bank < mca_cfg.banks; ++bank) {
280 281
		for (block = 0; block < NR_BLOCKS; ++block) {
			if (block == 0)
282
				address = MSR_IA32_MCx_MISC(bank);
283 284 285 286
			else if (block == 1) {
				address = (low & MASK_BLKPTR_LO) >> 21;
				if (!address)
					break;
287

288
				address += MCG_XBLK_ADDR;
I
Ingo Molnar 已提交
289
			} else
290 291 292
				++address;

			if (rdmsr_safe(address, &low, &high))
293
				break;
294

295 296
			if (!(high & MASK_VALID_HI))
				continue;
297

298 299
			if (!(high & MASK_CNTP_HI)  ||
			     (high & MASK_LOCKED_HI))
300 301 302 303
				continue;

			if (!block)
				per_cpu(bank_map, cpu) |= (1 << bank);
304

305
			memset(&b, 0, sizeof(b));
306 307 308 309 310 311
			b.cpu			= cpu;
			b.bank			= bank;
			b.block			= block;
			b.address		= address;
			b.interrupt_capable	= lvt_interrupt_supported(bank, high);

312 313
			if (!b.interrupt_capable)
				goto init;
314

315
			b.interrupt_enable = 1;
316 317 318 319 320 321 322 323 324 325 326 327 328

			if (mce_flags.smca) {
				u32 smca_low, smca_high;

				/* Gather LVT offset for thresholding: */
				if (rdmsr_safe(MSR_CU_DEF_ERR, &smca_low, &smca_high))
					break;

				new = (smca_low & SMCA_THR_LVT_OFF) >> 12;
			} else {
				new = (high & MASK_LVTOFF_HI) >> 20;
			}

329
			offset  = setup_APIC_mce_threshold(offset, new);
330

331 332
			if ((offset == new) &&
			    (mce_threshold_vector != amd_threshold_interrupt))
333
				mce_threshold_vector = amd_threshold_interrupt;
334 335 336

init:
			mce_threshold_block_init(&b, offset);
337
		}
338
	}
339 340 341

	if (mce_flags.succor)
		deferred_error_interrupt_enable(c);
342 343
}

344 345 346 347 348 349 350 351 352 353 354 355 356
static void __log_error(unsigned int bank, bool threshold_err, u64 misc)
{
	struct mce m;
	u64 status;

	rdmsrl(MSR_IA32_MCx_STATUS(bank), status);
	if (!(status & MCI_STATUS_VAL))
		return;

	mce_setup(&m);

	m.status = status;
	m.bank = bank;
357

358 359 360
	if (threshold_err)
		m.misc = misc;

361 362
	if (m.status & MCI_STATUS_ADDRV)
		rdmsrl(MSR_IA32_MCx_ADDR(bank), m.addr);
363

364
	mce_log(&m);
365 366 367
	wrmsrl(MSR_IA32_MCx_STATUS(bank), 0);
}

368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407
static inline void __smp_deferred_error_interrupt(void)
{
	inc_irq_stat(irq_deferred_error_count);
	deferred_error_int_vector();
}

asmlinkage __visible void smp_deferred_error_interrupt(void)
{
	entering_irq();
	__smp_deferred_error_interrupt();
	exiting_ack_irq();
}

asmlinkage __visible void smp_trace_deferred_error_interrupt(void)
{
	entering_irq();
	trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR);
	__smp_deferred_error_interrupt();
	trace_deferred_error_apic_exit(DEFERRED_ERROR_VECTOR);
	exiting_ack_irq();
}

/* APIC interrupt handler for deferred errors */
static void amd_deferred_error_interrupt(void)
{
	u64 status;
	unsigned int bank;

	for (bank = 0; bank < mca_cfg.banks; ++bank) {
		rdmsrl(MSR_IA32_MCx_STATUS(bank), status);

		if (!(status & MCI_STATUS_VAL) ||
		    !(status & MCI_STATUS_DEFERRED))
			continue;

		__log_error(bank, false, 0);
		break;
	}
}

408 409 410 411 412 413 414 415 416
/*
 * APIC Interrupt Handler
 */

/*
 * threshold interrupt handler will service THRESHOLD_APIC_VECTOR.
 * the interrupt goes off when error_count reaches threshold_limit.
 * the handler will simply log mcelog w/ software defined bank number.
 */
417

418
static void amd_threshold_interrupt(void)
419
{
I
Ingo Molnar 已提交
420
	u32 low = 0, high = 0, address = 0;
421
	int cpu = smp_processor_id();
422
	unsigned int bank, block;
423 424

	/* assume first bank caused it */
425
	for (bank = 0; bank < mca_cfg.banks; ++bank) {
426
		if (!(per_cpu(bank_map, cpu) & (1 << bank)))
427
			continue;
428
		for (block = 0; block < NR_BLOCKS; ++block) {
I
Ingo Molnar 已提交
429
			if (block == 0) {
430
				address = MSR_IA32_MCx_MISC(bank);
I
Ingo Molnar 已提交
431
			} else if (block == 1) {
432 433 434 435
				address = (low & MASK_BLKPTR_LO) >> 21;
				if (!address)
					break;
				address += MCG_XBLK_ADDR;
I
Ingo Molnar 已提交
436
			} else {
437
				++address;
I
Ingo Molnar 已提交
438
			}
439 440

			if (rdmsr_safe(address, &low, &high))
441
				break;
442 443 444 445 446 447 448 449

			if (!(high & MASK_VALID_HI)) {
				if (block)
					continue;
				else
					break;
			}

450 451
			if (!(high & MASK_CNTP_HI)  ||
			     (high & MASK_LOCKED_HI))
452 453
				continue;

I
Ingo Molnar 已提交
454 455 456 457
			/*
			 * Log the machine check that caused the threshold
			 * event.
			 */
458 459
			if (high & MASK_OVERFLOW_HI)
				goto log;
460 461
		}
	}
462 463 464
	return;

log:
465
	__log_error(bank, true, ((u64)high << 32) | low);
466 467 468 469 470 471 472
}

/*
 * Sysfs Interface
 */

struct threshold_attr {
J
Jacob Shin 已提交
473
	struct attribute attr;
I
Ingo Molnar 已提交
474 475
	ssize_t (*show) (struct threshold_block *, char *);
	ssize_t (*store) (struct threshold_block *, const char *, size_t count);
476 477
};

I
Ingo Molnar 已提交
478 479 480
#define SHOW_FIELDS(name)						\
static ssize_t show_ ## name(struct threshold_block *b, char *buf)	\
{									\
481
	return sprintf(buf, "%lu\n", (unsigned long) b->name);		\
J
Jacob Shin 已提交
482
}
483 484 485
SHOW_FIELDS(interrupt_enable)
SHOW_FIELDS(threshold_limit)

I
Ingo Molnar 已提交
486
static ssize_t
H
Hidetoshi Seto 已提交
487
store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size)
488
{
489
	struct thresh_restart tr;
I
Ingo Molnar 已提交
490 491
	unsigned long new;

492 493 494
	if (!b->interrupt_capable)
		return -EINVAL;

495
	if (kstrtoul(buf, 0, &new) < 0)
496
		return -EINVAL;
I
Ingo Molnar 已提交
497

498 499
	b->interrupt_enable = !!new;

500
	memset(&tr, 0, sizeof(tr));
I
Ingo Molnar 已提交
501 502
	tr.b		= b;

503
	smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
504

H
Hidetoshi Seto 已提交
505
	return size;
506 507
}

I
Ingo Molnar 已提交
508
static ssize_t
H
Hidetoshi Seto 已提交
509
store_threshold_limit(struct threshold_block *b, const char *buf, size_t size)
510
{
511
	struct thresh_restart tr;
I
Ingo Molnar 已提交
512 513
	unsigned long new;

514
	if (kstrtoul(buf, 0, &new) < 0)
515
		return -EINVAL;
I
Ingo Molnar 已提交
516

517 518 519 520
	if (new > THRESHOLD_MAX)
		new = THRESHOLD_MAX;
	if (new < 1)
		new = 1;
I
Ingo Molnar 已提交
521

522
	memset(&tr, 0, sizeof(tr));
523
	tr.old_limit = b->threshold_limit;
524
	b->threshold_limit = new;
525
	tr.b = b;
526

527
	smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
528

H
Hidetoshi Seto 已提交
529
	return size;
530 531
}

532 533
static ssize_t show_error_count(struct threshold_block *b, char *buf)
{
534 535 536
	u32 lo, hi;

	rdmsr_on_cpu(b->cpu, b->address, &lo, &hi);
537

538 539
	return sprintf(buf, "%u\n", ((hi & THRESHOLD_MAX) -
				     (THRESHOLD_MAX - b->threshold_limit)));
540 541
}

542 543 544 545
static struct threshold_attr error_count = {
	.attr = {.name = __stringify(error_count), .mode = 0444 },
	.show = show_error_count,
};
546

547 548 549 550 551
#define RW_ATTR(val)							\
static struct threshold_attr val = {					\
	.attr	= {.name = __stringify(val), .mode = 0644 },		\
	.show	= show_## val,						\
	.store	= store_## val,						\
552 553
};

J
Jacob Shin 已提交
554 555
RW_ATTR(interrupt_enable);
RW_ATTR(threshold_limit);
556 557 558 559

static struct attribute *default_attrs[] = {
	&threshold_limit.attr,
	&error_count.attr,
560 561
	NULL,	/* possibly interrupt_enable if supported, see below */
	NULL,
562 563
};

I
Ingo Molnar 已提交
564 565
#define to_block(k)	container_of(k, struct threshold_block, kobj)
#define to_attr(a)	container_of(a, struct threshold_attr, attr)
566 567 568

static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
{
569
	struct threshold_block *b = to_block(kobj);
570 571
	struct threshold_attr *a = to_attr(attr);
	ssize_t ret;
I
Ingo Molnar 已提交
572

573
	ret = a->show ? a->show(b, buf) : -EIO;
I
Ingo Molnar 已提交
574

575 576 577 578 579 580
	return ret;
}

static ssize_t store(struct kobject *kobj, struct attribute *attr,
		     const char *buf, size_t count)
{
581
	struct threshold_block *b = to_block(kobj);
582 583
	struct threshold_attr *a = to_attr(attr);
	ssize_t ret;
I
Ingo Molnar 已提交
584

585
	ret = a->store ? a->store(b, buf, count) : -EIO;
I
Ingo Molnar 已提交
586

587 588 589
	return ret;
}

590
static const struct sysfs_ops threshold_ops = {
I
Ingo Molnar 已提交
591 592
	.show			= show,
	.store			= store,
593 594 595
};

static struct kobj_type threshold_ktype = {
I
Ingo Molnar 已提交
596 597
	.sysfs_ops		= &threshold_ops,
	.default_attrs		= default_attrs,
598 599
};

600 601
static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
				     unsigned int block, u32 address)
602 603
{
	struct threshold_block *b = NULL;
I
Ingo Molnar 已提交
604 605
	u32 low, high;
	int err;
606

607
	if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS))
608 609
		return 0;

610
	if (rdmsr_safe_on_cpu(cpu, address, &low, &high))
611
		return 0;
612 613 614 615 616 617 618 619

	if (!(high & MASK_VALID_HI)) {
		if (block)
			goto recurse;
		else
			return 0;
	}

620 621
	if (!(high & MASK_CNTP_HI)  ||
	     (high & MASK_LOCKED_HI))
622 623 624 625 626 627
		goto recurse;

	b = kzalloc(sizeof(struct threshold_block), GFP_KERNEL);
	if (!b)
		return -ENOMEM;

I
Ingo Molnar 已提交
628 629 630 631 632
	b->block		= block;
	b->bank			= bank;
	b->cpu			= cpu;
	b->address		= address;
	b->interrupt_enable	= 0;
633
	b->interrupt_capable	= lvt_interrupt_supported(bank, high);
I
Ingo Molnar 已提交
634
	b->threshold_limit	= THRESHOLD_MAX;
635

636
	if (b->interrupt_capable) {
637
		threshold_ktype.default_attrs[2] = &interrupt_enable.attr;
638 639
		b->interrupt_enable = 1;
	} else {
640
		threshold_ktype.default_attrs[2] = NULL;
641
	}
642

643 644
	INIT_LIST_HEAD(&b->miscj);

I
Ingo Molnar 已提交
645
	if (per_cpu(threshold_banks, cpu)[bank]->blocks) {
646 647
		list_add(&b->miscj,
			 &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj);
I
Ingo Molnar 已提交
648
	} else {
649
		per_cpu(threshold_banks, cpu)[bank]->blocks = b;
I
Ingo Molnar 已提交
650
	}
651

652 653
	err = kobject_init_and_add(&b->kobj, &threshold_ktype,
				   per_cpu(threshold_banks, cpu)[bank]->kobj,
654
				   (bank == 4 ? bank4_names(b) : th_names[bank]));
655 656 657 658 659 660 661 662
	if (err)
		goto out_free;
recurse:
	if (!block) {
		address = (low & MASK_BLKPTR_LO) >> 21;
		if (!address)
			return 0;
		address += MCG_XBLK_ADDR;
I
Ingo Molnar 已提交
663
	} else {
664
		++address;
I
Ingo Molnar 已提交
665
	}
666 667 668 669 670

	err = allocate_threshold_blocks(cpu, bank, ++block, address);
	if (err)
		goto out_free;

671 672
	if (b)
		kobject_uevent(&b->kobj, KOBJ_ADD);
673

674 675 676 677
	return err;

out_free:
	if (b) {
678
		kobject_put(&b->kobj);
679
		list_del(&b->miscj);
680 681 682 683 684
		kfree(b);
	}
	return err;
}

685
static int __threshold_add_blocks(struct threshold_bank *b)
686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708
{
	struct list_head *head = &b->blocks->miscj;
	struct threshold_block *pos = NULL;
	struct threshold_block *tmp = NULL;
	int err = 0;

	err = kobject_add(&b->blocks->kobj, b->kobj, b->blocks->kobj.name);
	if (err)
		return err;

	list_for_each_entry_safe(pos, tmp, head, miscj) {

		err = kobject_add(&pos->kobj, b->kobj, pos->kobj.name);
		if (err) {
			list_for_each_entry_safe_reverse(pos, tmp, head, miscj)
				kobject_del(&pos->kobj);

			return err;
		}
	}
	return err;
}

709
static int threshold_create_bank(unsigned int cpu, unsigned int bank)
710
{
711
	struct device *dev = per_cpu(mce_device, cpu);
712
	struct amd_northbridge *nb = NULL;
713
	struct threshold_bank *b = NULL;
714
	const char *name = th_names[bank];
715
	int err = 0;
716

717
	if (is_shared_bank(bank)) {
718 719 720
		nb = node_to_amd_nb(amd_get_nb_id(cpu));

		/* threshold descriptor already initialized on this node? */
721
		if (nb && nb->bank4) {
722 723 724 725 726 727 728 729 730 731 732 733 734 735 736
			/* yes, use it */
			b = nb->bank4;
			err = kobject_add(b->kobj, &dev->kobj, name);
			if (err)
				goto out;

			per_cpu(threshold_banks, cpu)[bank] = b;
			atomic_inc(&b->cpus);

			err = __threshold_add_blocks(b);

			goto out;
		}
	}

737
	b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL);
738 739 740 741 742
	if (!b) {
		err = -ENOMEM;
		goto out;
	}

743
	b->kobj = kobject_create_and_add(name, &dev->kobj);
744 745
	if (!b->kobj) {
		err = -EINVAL;
746
		goto out_free;
747
	}
748

749
	per_cpu(threshold_banks, cpu)[bank] = b;
750

751
	if (is_shared_bank(bank)) {
752 753 754
		atomic_set(&b->cpus, 1);

		/* nb is already initialized, see above */
755 756 757 758
		if (nb) {
			WARN_ON(nb->bank4);
			nb->bank4 = b;
		}
759 760
	}

761
	err = allocate_threshold_blocks(cpu, bank, 0, MSR_IA32_MCx_MISC(bank));
762 763
	if (!err)
		goto out;
764

765
 out_free:
766
	kfree(b);
767 768

 out:
769 770 771 772
	return err;
}

/* create dir/files for all valid threshold banks */
773
static int threshold_create_device(unsigned int cpu)
774
{
J
Jacob Shin 已提交
775
	unsigned int bank;
776
	struct threshold_bank **bp;
777 778
	int err = 0;

779 780 781 782 783 784 785 786
	bp = kzalloc(sizeof(struct threshold_bank *) * mca_cfg.banks,
		     GFP_KERNEL);
	if (!bp)
		return -ENOMEM;

	per_cpu(threshold_banks, cpu) = bp;

	for (bank = 0; bank < mca_cfg.banks; ++bank) {
787
		if (!(per_cpu(bank_map, cpu) & (1 << bank)))
788 789 790
			continue;
		err = threshold_create_bank(cpu, bank);
		if (err)
791
			return err;
792
	}
793

794 795 796
	return err;
}

797
static void deallocate_threshold_block(unsigned int cpu,
798 799 800 801 802 803 804 805 806 807
						 unsigned int bank)
{
	struct threshold_block *pos = NULL;
	struct threshold_block *tmp = NULL;
	struct threshold_bank *head = per_cpu(threshold_banks, cpu)[bank];

	if (!head)
		return;

	list_for_each_entry_safe(pos, tmp, &head->blocks->miscj, miscj) {
808
		kobject_put(&pos->kobj);
809 810 811 812 813 814 815 816
		list_del(&pos->miscj);
		kfree(pos);
	}

	kfree(per_cpu(threshold_banks, cpu)[bank]->blocks);
	per_cpu(threshold_banks, cpu)[bank]->blocks = NULL;
}

817 818 819 820 821 822 823 824 825 826 827
static void __threshold_remove_blocks(struct threshold_bank *b)
{
	struct threshold_block *pos = NULL;
	struct threshold_block *tmp = NULL;

	kobject_del(b->kobj);

	list_for_each_entry_safe(pos, tmp, &b->blocks->miscj, miscj)
		kobject_del(&pos->kobj);
}

828
static void threshold_remove_bank(unsigned int cpu, int bank)
829
{
830
	struct amd_northbridge *nb;
831 832 833 834 835
	struct threshold_bank *b;

	b = per_cpu(threshold_banks, cpu)[bank];
	if (!b)
		return;
836

837 838 839
	if (!b->blocks)
		goto free_out;

840
	if (is_shared_bank(bank)) {
841 842 843 844 845 846 847 848 849 850 851 852 853 854
		if (!atomic_dec_and_test(&b->cpus)) {
			__threshold_remove_blocks(b);
			per_cpu(threshold_banks, cpu)[bank] = NULL;
			return;
		} else {
			/*
			 * the last CPU on this node using the shared bank is
			 * going away, remove that bank now.
			 */
			nb = node_to_amd_nb(amd_get_nb_id(cpu));
			nb->bank4 = NULL;
		}
	}

855 856 857
	deallocate_threshold_block(cpu, bank);

free_out:
858
	kobject_del(b->kobj);
859
	kobject_put(b->kobj);
860 861
	kfree(b);
	per_cpu(threshold_banks, cpu)[bank] = NULL;
862 863
}

864
static void threshold_remove_device(unsigned int cpu)
865
{
J
Jacob Shin 已提交
866
	unsigned int bank;
867

868
	for (bank = 0; bank < mca_cfg.banks; ++bank) {
869
		if (!(per_cpu(bank_map, cpu) & (1 << bank)))
870 871 872
			continue;
		threshold_remove_bank(cpu, bank);
	}
873
	kfree(per_cpu(threshold_banks, cpu));
874 875 876
}

/* get notified when a cpu comes on/off */
877
static void
I
Ingo Molnar 已提交
878
amd_64_threshold_cpu_callback(unsigned long action, unsigned int cpu)
879 880 881
{
	switch (action) {
	case CPU_ONLINE:
882
	case CPU_ONLINE_FROZEN:
883 884 885
		threshold_create_device(cpu);
		break;
	case CPU_DEAD:
886
	case CPU_DEAD_FROZEN:
887 888 889 890 891 892 893 894 895
		threshold_remove_device(cpu);
		break;
	default:
		break;
	}
}

static __init int threshold_init_device(void)
{
J
Jacob Shin 已提交
896
	unsigned lcpu = 0;
897 898 899

	/* to hit CPUs online before the notifier is up */
	for_each_online_cpu(lcpu) {
900
		int err = threshold_create_device(lcpu);
I
Ingo Molnar 已提交
901

902
		if (err)
903
			return err;
904
	}
905
	threshold_cpu_callback = amd_64_threshold_cpu_callback;
I
Ingo Molnar 已提交
906

907
	return 0;
908
}
909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929
/*
 * there are 3 funcs which need to be _initcalled in a logic sequence:
 * 1. xen_late_init_mcelog
 * 2. mcheck_init_device
 * 3. threshold_init_device
 *
 * xen_late_init_mcelog must register xen_mce_chrdev_device before
 * native mce_chrdev_device registration if running under xen platform;
 *
 * mcheck_init_device should be inited before threshold_init_device to
 * initialize mce_device, otherwise a NULL ptr dereference will cause panic.
 *
 * so we use following _initcalls
 * 1. device_initcall(xen_late_init_mcelog);
 * 2. device_initcall_sync(mcheck_init_device);
 * 3. late_initcall(threshold_init_device);
 *
 * when running under xen, the initcall order is 1,2,3;
 * on baremetal, we skip 1 and we do only 2 and 3.
 */
late_initcall(threshold_init_device);