mce_intel.c 9.8 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3
/*
 * Intel specific MCE features.
 * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
A
Andi Kleen 已提交
4 5
 * Copyright (C) 2008, 2009 Intel Corporation
 * Author: Andi Kleen
L
Linus Torvalds 已提交
6 7
 */

8
#include <linux/gfp.h>
L
Linus Torvalds 已提交
9 10
#include <linux/interrupt.h>
#include <linux/percpu.h>
11
#include <linux/sched.h>
12
#include <linux/cpumask.h>
13
#include <asm/apic.h>
L
Linus Torvalds 已提交
14 15 16 17
#include <asm/processor.h>
#include <asm/msr.h>
#include <asm/mce.h>

C
Chen Gong 已提交
18 19
#include "mce-internal.h"

A
Andi Kleen 已提交
20 21 22 23 24 25 26
/*
 * Support for Intel Correct Machine Check Interrupts. This allows
 * the CPU to raise an interrupt when a corrected machine check happened.
 * Normally we pick those up using a regular polling timer.
 * Also supports reliable discovery of shared banks.
 */

27 28 29 30 31 32 33 34 35 36 37 38
/*
 * CMCI can be delivered to multiple cpus that share a machine check bank
 * so we need to designate a single cpu to process errors logged in each bank
 * in the interrupt handler (otherwise we would have many races and potential
 * double reporting of the same error).
 * Note that this can change when a cpu is offlined or brought online since
 * some MCA banks are shared across cpus. When a cpu is offlined, cmci_clear()
 * disables CMCI on all banks owned by the cpu and clears this bitfield. At
 * this point, cmci_rediscover() kicks in and a different cpu may end up
 * taking ownership of some of the shared MCA banks that were previously
 * owned by the offlined cpu.
 */
A
Andi Kleen 已提交
39 40 41 42 43 44
static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);

/*
 * cmci_discover_lock protects against parallel discovery attempts
 * which could race against each other.
 */
45
static DEFINE_RAW_SPINLOCK(cmci_discover_lock);
A
Andi Kleen 已提交
46

C
Chen Gong 已提交
47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
#define CMCI_THRESHOLD		1
#define CMCI_POLL_INTERVAL	(30 * HZ)
#define CMCI_STORM_INTERVAL	(1 * HZ)
#define CMCI_STORM_THRESHOLD	15

static DEFINE_PER_CPU(unsigned long, cmci_time_stamp);
static DEFINE_PER_CPU(unsigned int, cmci_storm_cnt);
static DEFINE_PER_CPU(unsigned int, cmci_storm_state);

enum {
	CMCI_STORM_NONE,
	CMCI_STORM_ACTIVE,
	CMCI_STORM_SUBSIDED,
};

static atomic_t cmci_storm_on_cpus;
A
Andi Kleen 已提交
63

64
static int cmci_supported(int *banks)
A
Andi Kleen 已提交
65 66 67
{
	u64 cap;

68
	if (mca_cfg.cmci_disabled || mca_cfg.ignore_ce)
69 70
		return 0;

A
Andi Kleen 已提交
71 72 73 74 75 76 77 78 79 80 81 82 83 84
	/*
	 * Vendor check is not strictly needed, but the initial
	 * initialization is vendor keyed and this
	 * makes sure none of the backdoors are entered otherwise.
	 */
	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
		return 0;
	if (!cpu_has_apic || lapic_get_maxlvt() < 6)
		return 0;
	rdmsrl(MSR_IA32_MCG_CAP, cap);
	*banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);
	return !!(cap & MCG_CMCI_P);
}

C
Chen Gong 已提交
85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
void mce_intel_cmci_poll(void)
{
	if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE)
		return;
	machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
}

void mce_intel_hcpu_update(unsigned long cpu)
{
	if (per_cpu(cmci_storm_state, cpu) == CMCI_STORM_ACTIVE)
		atomic_dec(&cmci_storm_on_cpus);

	per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE;
}

unsigned long mce_intel_adjust_timer(unsigned long interval)
{
	int r;

	if (interval < CMCI_POLL_INTERVAL)
		return interval;

	switch (__this_cpu_read(cmci_storm_state)) {
	case CMCI_STORM_ACTIVE:
		/*
		 * We switch back to interrupt mode once the poll timer has
		 * silenced itself. That means no events recorded and the
		 * timer interval is back to our poll interval.
		 */
		__this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED);
		r = atomic_sub_return(1, &cmci_storm_on_cpus);
		if (r == 0)
			pr_notice("CMCI storm subsided: switching to interrupt mode\n");
		/* FALLTHROUGH */

	case CMCI_STORM_SUBSIDED:
		/*
		 * We wait for all cpus to go back to SUBSIDED
		 * state. When that happens we switch back to
		 * interrupt mode.
		 */
		if (!atomic_read(&cmci_storm_on_cpus)) {
			__this_cpu_write(cmci_storm_state, CMCI_STORM_NONE);
			cmci_reenable();
			cmci_recheck();
		}
		return CMCI_POLL_INTERVAL;
	default:
		/*
		 * We have shiny weather. Let the poll do whatever it
		 * thinks.
		 */
		return interval;
	}
}

141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
static void cmci_storm_disable_banks(void)
{
	unsigned long flags, *owned;
	int bank;
	u64 val;

	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
	owned = __get_cpu_var(mce_banks_owned);
	for_each_set_bit(bank, owned, MAX_NR_BANKS) {
		rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
		val &= ~MCI_CTL2_CMCI_EN;
		wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
	}
	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
}

C
Chen Gong 已提交
157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177
static bool cmci_storm_detect(void)
{
	unsigned int cnt = __this_cpu_read(cmci_storm_cnt);
	unsigned long ts = __this_cpu_read(cmci_time_stamp);
	unsigned long now = jiffies;
	int r;

	if (__this_cpu_read(cmci_storm_state) != CMCI_STORM_NONE)
		return true;

	if (time_before_eq(now, ts + CMCI_STORM_INTERVAL)) {
		cnt++;
	} else {
		cnt = 1;
		__this_cpu_write(cmci_time_stamp, now);
	}
	__this_cpu_write(cmci_storm_cnt, cnt);

	if (cnt <= CMCI_STORM_THRESHOLD)
		return false;

178
	cmci_storm_disable_banks();
C
Chen Gong 已提交
179 180 181 182 183 184 185 186 187
	__this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE);
	r = atomic_add_return(1, &cmci_storm_on_cpus);
	mce_timer_kick(CMCI_POLL_INTERVAL);

	if (r == 1)
		pr_notice("CMCI storm detected: switching to poll mode\n");
	return true;
}

A
Andi Kleen 已提交
188 189 190 191 192 193 194 195
/*
 * The interrupt handler. This is called on every event.
 * Just call the poller directly to log any events.
 * This could in theory increase the threshold under high load,
 * but doesn't for now.
 */
static void intel_threshold_interrupt(void)
{
C
Chen Gong 已提交
196 197
	if (cmci_storm_detect())
		return;
A
Andi Kleen 已提交
198
	machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
199
	mce_notify_irq();
A
Andi Kleen 已提交
200 201 202 203 204 205 206
}

/*
 * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
 * on this CPU. Use the algorithm recommended in the SDM to discover shared
 * banks.
 */
T
Tony Luck 已提交
207
static void cmci_discover(int banks)
A
Andi Kleen 已提交
208 209
{
	unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);
210
	unsigned long flags;
A
Andi Kleen 已提交
211
	int i;
212
	int bios_wrong_thresh = 0;
A
Andi Kleen 已提交
213

214
	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
A
Andi Kleen 已提交
215 216
	for (i = 0; i < banks; i++) {
		u64 val;
217
		int bios_zero_thresh = 0;
A
Andi Kleen 已提交
218 219 220 221

		if (test_bit(i, owned))
			continue;

222 223 224 225
		/* Skip banks in firmware first mode */
		if (test_bit(i, mce_banks_ce_disabled))
			continue;

226
		rdmsrl(MSR_IA32_MCx_CTL2(i), val);
A
Andi Kleen 已提交
227 228

		/* Already owned by someone else? */
229
		if (val & MCI_CTL2_CMCI_EN) {
T
Tony Luck 已提交
230
			clear_bit(i, owned);
A
Andi Kleen 已提交
231 232 233 234
			__clear_bit(i, __get_cpu_var(mce_poll_banks));
			continue;
		}

235
		if (!mca_cfg.bios_cmci_threshold) {
236 237 238 239 240 241 242 243 244 245 246 247 248
			val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
			val |= CMCI_THRESHOLD;
		} else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) {
			/*
			 * If bios_cmci_threshold boot option was specified
			 * but the threshold is zero, we'll try to initialize
			 * it to 1.
			 */
			bios_zero_thresh = 1;
			val |= CMCI_THRESHOLD;
		}

		val |= MCI_CTL2_CMCI_EN;
249 250
		wrmsrl(MSR_IA32_MCx_CTL2(i), val);
		rdmsrl(MSR_IA32_MCx_CTL2(i), val);
A
Andi Kleen 已提交
251 252

		/* Did the enable bit stick? -- the bank supports CMCI */
253
		if (val & MCI_CTL2_CMCI_EN) {
T
Tony Luck 已提交
254
			set_bit(i, owned);
A
Andi Kleen 已提交
255
			__clear_bit(i, __get_cpu_var(mce_poll_banks));
256 257 258 259 260 261
			/*
			 * We are able to set thresholds for some banks that
			 * had a threshold of 0. This means the BIOS has not
			 * set the thresholds properly or does not work with
			 * this boot option. Note down now and report later.
			 */
262
			if (mca_cfg.bios_cmci_threshold && bios_zero_thresh &&
263 264
					(val & MCI_CTL2_CMCI_THRESHOLD_MASK))
				bios_wrong_thresh = 1;
A
Andi Kleen 已提交
265 266 267 268
		} else {
			WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
		}
	}
269
	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
270
	if (mca_cfg.bios_cmci_threshold && bios_wrong_thresh) {
271 272 273 274 275
		pr_info_once(
			"bios_cmci_threshold: Some banks do not have valid thresholds set\n");
		pr_info_once(
			"bios_cmci_threshold: Make sure your BIOS supports this boot option\n");
	}
A
Andi Kleen 已提交
276 277 278 279 280 281
}

/*
 * Just in case we missed an event during initialization check
 * all the CMCI owned banks.
 */
282
void cmci_recheck(void)
A
Andi Kleen 已提交
283 284 285 286
{
	unsigned long flags;
	int banks;

287
	if (!mce_available(__this_cpu_ptr(&cpu_info)) || !cmci_supported(&banks))
A
Andi Kleen 已提交
288 289 290 291 292 293
		return;
	local_irq_save(flags);
	machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
	local_irq_restore(flags);
}

294 295 296 297 298 299 300 301 302 303 304 305 306
/* Caller must hold the lock on cmci_discover_lock */
static void __cmci_disable_bank(int bank)
{
	u64 val;

	if (!test_bit(bank, __get_cpu_var(mce_banks_owned)))
		return;
	rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
	val &= ~MCI_CTL2_CMCI_EN;
	wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
	__clear_bit(bank, __get_cpu_var(mce_banks_owned));
}

A
Andi Kleen 已提交
307 308 309 310
/*
 * Disable CMCI on this CPU for all banks it owns when it goes down.
 * This allows other CPUs to claim the banks on rediscovery.
 */
311
void cmci_clear(void)
A
Andi Kleen 已提交
312
{
313
	unsigned long flags;
A
Andi Kleen 已提交
314 315 316 317 318
	int i;
	int banks;

	if (!cmci_supported(&banks))
		return;
319
	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
320 321
	for (i = 0; i < banks; i++)
		__cmci_disable_bank(i);
322
	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
A
Andi Kleen 已提交
323 324
}

325
static void cmci_rediscover_work_func(void *arg)
326 327 328 329 330 331 332 333
{
	int banks;

	/* Recheck banks in case CPUs don't all have the same */
	if (cmci_supported(&banks))
		cmci_discover(banks);
}

334 335
/* After a CPU went down cycle through all the others and rediscover */
void cmci_rediscover(void)
A
Andi Kleen 已提交
336
{
337
	int banks;
A
Andi Kleen 已提交
338 339 340 341

	if (!cmci_supported(&banks))
		return;

342
	on_each_cpu(cmci_rediscover_work_func, NULL, 1);
A
Andi Kleen 已提交
343 344 345 346 347 348 349 350 351
}

/*
 * Reenable CMCI on this CPU in case a CPU down failed.
 */
void cmci_reenable(void)
{
	int banks;
	if (cmci_supported(&banks))
T
Tony Luck 已提交
352
		cmci_discover(banks);
A
Andi Kleen 已提交
353 354
}

355 356 357 358 359 360 361 362 363 364 365 366 367
void cmci_disable_bank(int bank)
{
	int banks;
	unsigned long flags;

	if (!cmci_supported(&banks))
		return;

	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
	__cmci_disable_bank(bank);
	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
}

368
static void intel_init_cmci(void)
A
Andi Kleen 已提交
369 370 371 372 373 374 375
{
	int banks;

	if (!cmci_supported(&banks))
		return;

	mce_threshold_vector = intel_threshold_interrupt;
T
Tony Luck 已提交
376
	cmci_discover(banks);
A
Andi Kleen 已提交
377 378 379 380 381 382 383 384 385 386
	/*
	 * For CPU #0 this runs with still disabled APIC, but that's
	 * ok because only the vector is set up. We still do another
	 * check for the banks later for CPU #0 just to make sure
	 * to not miss any events.
	 */
	apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED);
	cmci_recheck();
}

387
void mce_intel_feature_init(struct cpuinfo_x86 *c)
L
Linus Torvalds 已提交
388 389
{
	intel_init_thermal(c);
A
Andi Kleen 已提交
390
	intel_init_cmci();
L
Linus Torvalds 已提交
391
}