nmi_64.c 10.9 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12
/*
 *  NMI watchdog support on APIC systems
 *
 *  Started by Ingo Molnar <mingo@redhat.com>
 *
 *  Fixes:
 *  Mikael Pettersson	: AMD K7 support for local APIC NMI watchdog.
 *  Mikael Pettersson	: Power Management for local APIC NMI watchdog.
 *  Pavel Machek and
 *  Mikael Pettersson	: PM converted to driver model. Disable/enable API.
 */

A
Andrew Morton 已提交
13
#include <linux/nmi.h>
L
Linus Torvalds 已提交
14 15 16 17 18 19
#include <linux/mm.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
#include <linux/module.h>
#include <linux/sysdev.h>
#include <linux/sysctl.h>
20
#include <linux/kprobes.h>
A
Andrew Morton 已提交
21
#include <linux/cpumask.h>
22
#include <linux/kdebug.h>
L
Linus Torvalds 已提交
23 24 25 26

#include <asm/smp.h>
#include <asm/nmi.h>
#include <asm/proto.h>
27
#include <asm/mce.h>
L
Linus Torvalds 已提交
28

29 30
#include <mach_traps.h>

31 32 33 34
int unknown_nmi_panic;
int nmi_watchdog_enabled;
int panic_on_unrecovered_nmi;

35
static cpumask_t backtrace_mask = CPU_MASK_NONE;
36

L
Linus Torvalds 已提交
37
/* nmi_active:
38 39
 * >0: the lapic NMI watchdog is active, but can be disabled
 * <0: the lapic NMI watchdog has not been set up, and cannot
L
Linus Torvalds 已提交
40
 *     be enabled
41
 *  0: the lapic NMI watchdog is disabled, but can be enabled
L
Linus Torvalds 已提交
42
 */
43
atomic_t nmi_active = ATOMIC_INIT(0);		/* oprofile uses this */
A
Adrian Bunk 已提交
44
static int panic_on_timeout;
L
Linus Torvalds 已提交
45 46 47 48

unsigned int nmi_watchdog = NMI_DEFAULT;
static unsigned int nmi_hz = HZ;

49
static DEFINE_PER_CPU(short, wd_enabled);
L
Linus Torvalds 已提交
50 51

/* Run after command line and cpu_init init, but before all other checks */
52
void nmi_watchdog_default(void)
L
Linus Torvalds 已提交
53 54 55
{
	if (nmi_watchdog != NMI_DEFAULT)
		return;
56
	nmi_watchdog = NMI_NONE;
L
Linus Torvalds 已提交
57 58
}

59 60
static int endflag __initdata = 0;

61 62 63 64 65 66
#ifdef CONFIG_SMP
/* The performance counters used by NMI_LOCAL_APIC don't trigger when
 * the CPU is idle. To make sure the NMI watchdog really ticks on all
 * CPUs during the test make them busy.
 */
static __init void nmi_cpu_busy(void *data)
L
Linus Torvalds 已提交
67
{
68
	local_irq_enable_in_hardirq();
69 70 71 72 73 74
	/* Intentionally don't use cpu_relax here. This is
	   to make sure that the performance counter really ticks,
	   even if there is a simulator or similar that catches the
	   pause instruction. On a real HT machine this is fine because
	   all other CPUs are busy with "useless" delay loops and don't
	   care if they get somewhat less cycles. */
75 76
	while (endflag == 0)
		mb();
L
Linus Torvalds 已提交
77
}
78
#endif
L
Linus Torvalds 已提交
79

H
Hiroshi Shimamoto 已提交
80
int __init check_nmi_watchdog(void)
L
Linus Torvalds 已提交
81
{
H
Hiroshi Shimamoto 已提交
82
	int *prev_nmi_count;
L
Linus Torvalds 已提交
83 84
	int cpu;

H
Hiroshi Shimamoto 已提交
85
	if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DISABLED))
86 87 88 89 90
		return 0;

	if (!atomic_read(&nmi_active))
		return 0;

H
Hiroshi Shimamoto 已提交
91 92
	prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
	if (!prev_nmi_count)
93
		return -1;
L
Linus Torvalds 已提交
94

H
Hiroshi Shimamoto 已提交
95
	printk(KERN_INFO "Testing NMI watchdog ... ");
96

97
#ifdef CONFIG_SMP
98
	if (nmi_watchdog == NMI_LOCAL_APIC)
99
		smp_call_function(nmi_cpu_busy, (void *)&endflag, 0);
100
#endif
L
Linus Torvalds 已提交
101 102

	for (cpu = 0; cpu < NR_CPUS; cpu++)
H
Hiroshi Shimamoto 已提交
103
		prev_nmi_count[cpu] = cpu_pda(cpu)->__nmi_count;
L
Linus Torvalds 已提交
104
	local_irq_enable();
105
	mdelay((20*1000)/nmi_hz); // wait 20 ticks
L
Linus Torvalds 已提交
106

107
	for_each_online_cpu(cpu) {
108
		if (!per_cpu(wd_enabled, cpu))
109
			continue;
H
Hiroshi Shimamoto 已提交
110
		if (cpu_pda(cpu)->__nmi_count - prev_nmi_count[cpu] <= 5) {
111 112
			printk(KERN_WARNING "WARNING: CPU#%d: NMI "
			       "appears to be stuck (%d->%d)!\n",
H
Hiroshi Shimamoto 已提交
113 114 115
				cpu,
				prev_nmi_count[cpu],
				cpu_pda(cpu)->__nmi_count);
116
			per_cpu(wd_enabled, cpu) = 0;
117
			atomic_dec(&nmi_active);
L
Linus Torvalds 已提交
118 119
		}
	}
H
Hiroshi Shimamoto 已提交
120
	endflag = 1;
121
	if (!atomic_read(&nmi_active)) {
H
Hiroshi Shimamoto 已提交
122
		kfree(prev_nmi_count);
123 124 125
		atomic_set(&nmi_active, -1);
		return -1;
	}
L
Linus Torvalds 已提交
126 127 128 129
	printk("OK.\n");

	/* now that we know it works we can reduce NMI frequency to
	   something more reasonable; makes a difference in some configs */
130 131
	if (nmi_watchdog == NMI_LOCAL_APIC)
		nmi_hz = lapic_adjust_nmi_hz(1);
L
Linus Torvalds 已提交
132

H
Hiroshi Shimamoto 已提交
133
	kfree(prev_nmi_count);
L
Linus Torvalds 已提交
134 135 136
	return 0;
}

A
Adrian Bunk 已提交
137
static int __init setup_nmi_watchdog(char *str)
L
Linus Torvalds 已提交
138 139 140 141 142 143 144 145 146 147 148 149 150
{
	int nmi;

	if (!strncmp(str,"panic",5)) {
		panic_on_timeout = 1;
		str = strchr(str, ',');
		if (!str)
			return 1;
		++str;
	}

	get_option(&str, &nmi);

151
	if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE))
L
Linus Torvalds 已提交
152
		return 0;
153

154
	nmi_watchdog = nmi;
L
Linus Torvalds 已提交
155 156 157 158 159 160 161 162 163
	return 1;
}

__setup("nmi_watchdog=", setup_nmi_watchdog);

#ifdef CONFIG_PM

static int nmi_pm_active; /* nmi_active before suspend */

164
static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
L
Linus Torvalds 已提交
165
{
166
	/* only CPU0 goes here, other CPUs should be offline */
167
	nmi_pm_active = atomic_read(&nmi_active);
168 169
	stop_apic_nmi_watchdog(NULL);
	BUG_ON(atomic_read(&nmi_active) != 0);
L
Linus Torvalds 已提交
170 171 172 173 174
	return 0;
}

static int lapic_nmi_resume(struct sys_device *dev)
{
175 176 177 178 179
	/* only CPU0 goes here, other CPUs should be offline */
	if (nmi_pm_active > 0) {
		setup_apic_nmi_watchdog(NULL);
		touch_nmi_watchdog();
	}
L
Linus Torvalds 已提交
180 181 182 183
	return 0;
}

static struct sysdev_class nmi_sysclass = {
184
	.name		= "lapic_nmi",
L
Linus Torvalds 已提交
185 186 187 188 189
	.resume		= lapic_nmi_resume,
	.suspend	= lapic_nmi_suspend,
};

static struct sys_device device_lapic_nmi = {
H
Hiroshi Shimamoto 已提交
190
	.id	= 0,
L
Linus Torvalds 已提交
191 192 193 194 195 196 197
	.cls	= &nmi_sysclass,
};

static int __init init_lapic_nmi_sysfs(void)
{
	int error;

198 199 200 201 202 203
	/* should really be a BUG_ON but b/c this is an
	 * init call, it just doesn't work.  -dcz
	 */
	if (nmi_watchdog != NMI_LOCAL_APIC)
		return 0;

H
Hiroshi Shimamoto 已提交
204
	if (atomic_read(&nmi_active) < 0)
L
Linus Torvalds 已提交
205 206 207 208 209 210 211 212 213 214 215 216
		return 0;

	error = sysdev_class_register(&nmi_sysclass);
	if (!error)
		error = sysdev_register(&device_lapic_nmi);
	return error;
}
/* must come after the local APIC's device_initcall() */
late_initcall(init_lapic_nmi_sysfs);

#endif	/* CONFIG_PM */

H
Hiroshi Shimamoto 已提交
217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244
static void __acpi_nmi_enable(void *__unused)
{
	apic_write(APIC_LVT0, APIC_DM_NMI);
}

/*
 * Enable timer based NMIs on all CPUs:
 */
void acpi_nmi_enable(void)
{
	if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
		on_each_cpu(__acpi_nmi_enable, NULL, 0, 1);
}

static void __acpi_nmi_disable(void *__unused)
{
	apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
}

/*
 * Disable timer based NMIs on all CPUs:
 */
void acpi_nmi_disable(void)
{
	if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
		on_each_cpu(__acpi_nmi_disable, NULL, 0, 1);
}

245 246
void setup_apic_nmi_watchdog(void *unused)
{
H
Hiroshi Shimamoto 已提交
247
	if (__get_cpu_var(wd_enabled))
248 249 250 251 252 253 254
		return;

	/* cheap hack to support suspend/resume */
	/* if cpu0 is not active neither should the other cpus */
	if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0))
		return;

255 256 257 258 259
	switch (nmi_watchdog) {
	case NMI_LOCAL_APIC:
		__get_cpu_var(wd_enabled) = 1;
		if (lapic_watchdog_init(nmi_hz) < 0) {
			__get_cpu_var(wd_enabled) = 0;
260
			return;
261
		}
262 263 264 265
		/* FALL THROUGH */
	case NMI_IO_APIC:
		__get_cpu_var(wd_enabled) = 1;
		atomic_inc(&nmi_active);
266 267
	}
}
268

269
void stop_apic_nmi_watchdog(void *unused)
270 271 272 273 274
{
	/* only support LOCAL and IO APICs for now */
	if ((nmi_watchdog != NMI_LOCAL_APIC) &&
	    (nmi_watchdog != NMI_IO_APIC))
	    	return;
275
	if (__get_cpu_var(wd_enabled) == 0)
276
		return;
277 278 279
	if (nmi_watchdog == NMI_LOCAL_APIC)
		lapic_watchdog_stop();
	__get_cpu_var(wd_enabled) = 0;
280
	atomic_dec(&nmi_active);
L
Linus Torvalds 已提交
281 282 283 284 285 286 287 288 289 290 291
}

/*
 * the best way to detect whether a CPU has a 'hard lockup' problem
 * is to check it's local APIC timer IRQ counts. If they are not
 * changing then that CPU has some problem.
 *
 * as these watchdog NMI IRQs are generated on every CPU, we only
 * have to check the current processor.
 */

292 293 294
static DEFINE_PER_CPU(unsigned, last_irq_sum);
static DEFINE_PER_CPU(local_t, alert_counter);
static DEFINE_PER_CPU(int, nmi_touch);
L
Linus Torvalds 已提交
295

296
void touch_nmi_watchdog(void)
L
Linus Torvalds 已提交
297
{
298 299
	if (nmi_watchdog > 0) {
		unsigned cpu;
L
Linus Torvalds 已提交
300

301 302 303 304 305
		/*
 		 * Tell other CPUs to reset their alert counters. We cannot
		 * do it ourselves because the alert count increase is not
		 * atomic.
		 */
306 307 308 309
		for_each_present_cpu(cpu) {
			if (per_cpu(nmi_touch, cpu) != 1)
				per_cpu(nmi_touch, cpu) = 1;
		}
310
	}
I
Ingo Molnar 已提交
311

H
Hiroshi Shimamoto 已提交
312
	touch_softlockup_watchdog();
L
Linus Torvalds 已提交
313
}
H
Hiroshi Shimamoto 已提交
314
EXPORT_SYMBOL(touch_nmi_watchdog);
L
Linus Torvalds 已提交
315

316 317
notrace __kprobes int
nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
L
Linus Torvalds 已提交
318
{
319 320
	int sum;
	int touched = 0;
A
Andrew Morton 已提交
321
	int cpu = smp_processor_id();
322
	int rc = 0;
323 324 325 326

	/* check for other users first */
	if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
			== NOTIFY_STOP) {
327
		rc = 1;
328 329
		touched = 1;
	}
L
Linus Torvalds 已提交
330

331
	sum = read_pda(apic_timer_irqs) + read_pda(irq0_irqs);
332 333 334 335
	if (__get_cpu_var(nmi_touch)) {
		__get_cpu_var(nmi_touch) = 0;
		touched = 1;
	}
336

A
Andrew Morton 已提交
337 338 339 340 341 342 343 344 345 346
	if (cpu_isset(cpu, backtrace_mask)) {
		static DEFINE_SPINLOCK(lock);	/* Serialise the printks */

		spin_lock(&lock);
		printk("NMI backtrace for cpu %d\n", cpu);
		dump_stack();
		spin_unlock(&lock);
		cpu_clear(cpu, backtrace_mask);
	}

347 348 349 350 351 352
#ifdef CONFIG_X86_MCE
	/* Could check oops_in_progress here too, but it's safer
	   not too */
	if (atomic_read(&mce_entry) > 0)
		touched = 1;
#endif
353
	/* if the apic timer isn't firing, this cpu isn't doing much */
354
	if (!touched && __get_cpu_var(last_irq_sum) == sum) {
L
Linus Torvalds 已提交
355 356 357 358
		/*
		 * Ayiee, looks like this CPU is stuck ...
		 * wait a few IRQs (5 seconds) before doing the oops ...
		 */
359
		local_inc(&__get_cpu_var(alert_counter));
360
		if (local_read(&__get_cpu_var(alert_counter)) == 5*nmi_hz)
361 362
			die_nmi("NMI Watchdog detected LOCKUP on CPU %d\n", regs,
				panic_on_timeout);
L
Linus Torvalds 已提交
363
	} else {
364 365
		__get_cpu_var(last_irq_sum) = sum;
		local_set(&__get_cpu_var(alert_counter), 0);
L
Linus Torvalds 已提交
366
	}
367 368

	/* see if the nmi watchdog went off */
369 370 371 372 373 374 375 376 377 378 379 380 381
	if (!__get_cpu_var(wd_enabled))
		return rc;
	switch (nmi_watchdog) {
	case NMI_LOCAL_APIC:
		rc |= lapic_wd_event(nmi_hz);
		break;
	case NMI_IO_APIC:
		/* don't know how to accurately check for this.
		 * just assume it was a watchdog timer interrupt
		 * This matches the old behaviour.
		 */
		rc = 1;
		break;
382
	}
383
	return rc;
L
Linus Torvalds 已提交
384 385
}

386 387
static unsigned ignore_nmis;

388 389
asmlinkage notrace __kprobes void
do_nmi(struct pt_regs *regs, long error_code)
L
Linus Torvalds 已提交
390 391 392
{
	nmi_enter();
	add_pda(__nmi_count,1);
393 394
	if (!ignore_nmis)
		default_do_nmi(regs);
L
Linus Torvalds 已提交
395 396 397
	nmi_exit();
}

398 399 400 401 402 403 404 405 406 407 408 409
void stop_nmi(void)
{
	acpi_nmi_disable();
	ignore_nmis++;
}

void restart_nmi(void)
{
	ignore_nmis--;
	acpi_nmi_enable();
}

L
Linus Torvalds 已提交
410 411 412 413 414 415 416
#ifdef CONFIG_SYSCTL

static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
{
	unsigned char reason = get_nmi_reason();
	char buf[64];

417
	sprintf(buf, "NMI received for unknown reason %02x\n", reason);
418
	die_nmi(buf, regs, 1);	/* Always panic here */
L
Linus Torvalds 已提交
419 420 421
	return 0;
}

422 423 424 425 426 427 428 429 430 431 432 433 434 435
/*
 * proc handler for /proc/sys/kernel/nmi
 */
int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
			void __user *buffer, size_t *length, loff_t *ppos)
{
	int old_state;

	nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
	old_state = nmi_watchdog_enabled;
	proc_dointvec(table, write, file, buffer, length, ppos);
	if (!!old_state == !!nmi_watchdog_enabled)
		return 0;

436
	if (atomic_read(&nmi_active) < 0 || nmi_watchdog == NMI_DISABLED) {
437
		printk( KERN_WARNING "NMI watchdog is permanently disabled\n");
438
		return -EIO;
439 440 441 442 443
	}

	/* if nmi_watchdog is not set yet, then set it */
	nmi_watchdog_default();

444
	if (nmi_watchdog == NMI_LOCAL_APIC) {
445 446 447 448 449
		if (nmi_watchdog_enabled)
			enable_lapic_nmi_watchdog();
		else
			disable_lapic_nmi_watchdog();
	} else {
450
		printk( KERN_WARNING
451 452 453 454 455 456
			"NMI watchdog doesn't know what hardware to touch\n");
		return -EIO;
	}
	return 0;
}

L
Linus Torvalds 已提交
457 458
#endif

459 460 461 462 463 464 465 466 467
int do_nmi_callback(struct pt_regs *regs, int cpu)
{
#ifdef CONFIG_SYSCTL
	if (unknown_nmi_panic)
		return unknown_nmi_panic_callback(regs, cpu);
#endif
	return 0;
}

A
Andrew Morton 已提交
468 469 470 471 472 473 474 475 476 477 478 479 480
void __trigger_all_cpu_backtrace(void)
{
	int i;

	backtrace_mask = cpu_online_map;
	/* Wait for up to 10 seconds for all CPUs to do the backtrace */
	for (i = 0; i < 10 * 1000; i++) {
		if (cpus_empty(backtrace_mask))
			break;
		mdelay(1);
	}
}

L
Linus Torvalds 已提交
481 482
EXPORT_SYMBOL(nmi_active);
EXPORT_SYMBOL(nmi_watchdog);