nmi.c 7.0 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12
/* Pseudo NMI support on sparc64 systems.
 *
 * Copyright (C) 2009 David S. Miller <davem@davemloft.net>
 *
 * The NMI watchdog support and infrastructure is based almost
 * entirely upon the x86 NMI support code.
 */
#include <linux/kernel.h>
#include <linux/param.h>
#include <linux/init.h>
#include <linux/percpu.h>
#include <linux/nmi.h>
13
#include <linux/export.h>
14 15
#include <linux/kprobes.h>
#include <linux/kernel_stat.h>
16
#include <linux/reboot.h>
17 18 19 20 21
#include <linux/slab.h>
#include <linux/kdebug.h>
#include <linux/delay.h>
#include <linux/smp.h>

22
#include <asm/perf_event.h>
23 24 25
#include <asm/ptrace.h>
#include <asm/pcr.h>

26 27
#include "kstack.h"

28 29 30 31 32 33 34 35 36 37
/* We don't have a real NMI on sparc64, but we can fake one
 * up using profiling counter overflow interrupts and interrupt
 * levels.
 *
 * The profile overflow interrupts at level 15, so we use
 * level 14 as our IRQ off level.
 */

static int panic_on_timeout;

38 39 40 41 42 43 44
/* nmi_active:
 * >0: the NMI watchdog is active, but can be disabled
 * <0: the NMI watchdog has not been set up, and cannot be enabled
 *  0: the NMI watchdog is disabled, but can be enabled
 */
atomic_t nmi_active = ATOMIC_INIT(0);		/* oprofile uses this */
EXPORT_SYMBOL(nmi_active);
45
static int nmi_init_done;
46
static unsigned int nmi_hz = HZ;
47 48
static DEFINE_PER_CPU(short, wd_enabled);
static int endflag __initdata;
49 50

static DEFINE_PER_CPU(unsigned int, last_irq_sum);
51
static DEFINE_PER_CPU(long, alert_counter);
52 53 54 55
static DEFINE_PER_CPU(int, nmi_touch);

void touch_nmi_watchdog(void)
{
56
	if (atomic_read(&nmi_active)) {
57 58 59 60 61 62 63 64 65 66 67 68 69 70
		int cpu;

		for_each_present_cpu(cpu) {
			if (per_cpu(nmi_touch, cpu) != 1)
				per_cpu(nmi_touch, cpu) = 1;
		}
	}

	touch_softlockup_watchdog();
}
EXPORT_SYMBOL(touch_nmi_watchdog);

static void die_nmi(const char *str, struct pt_regs *regs, int do_panic)
{
71 72
	int this_cpu = smp_processor_id();

73 74 75 76 77
	if (notify_die(DIE_NMIWATCHDOG, str, regs, 0,
		       pt_regs_trap_type(regs), SIGINT) == NOTIFY_STOP)
		return;

	if (do_panic || panic_on_oops)
78 79 80
		panic("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
	else
		WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu);
81 82 83 84 85
}

notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs)
{
	unsigned int sum, touched = 0;
86
	void *orig_sp;
87 88 89 90 91

	clear_softint(1 << irq);

	local_cpu_data().__nmi_count++;

92 93
	nmi_enter();

94 95
	orig_sp = set_hardirq_stack();

96 97 98
	if (notify_die(DIE_NMI, "nmi", regs, 0,
		       pt_regs_trap_type(regs), SIGINT) == NOTIFY_STOP)
		touched = 1;
99
	else
100
		pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable);
101

102
	sum = local_cpu_data().irq0_irqs;
103 104
	if (__this_cpu_read(nmi_touch)) {
		__this_cpu_write(nmi_touch, 0);
105 106
		touched = 1;
	}
107
	if (!touched && __this_cpu_read(last_irq_sum) == sum) {
R
Rusty Russell 已提交
108 109
		__this_cpu_inc(alert_counter);
		if (__this_cpu_read(alert_counter) == 30 * nmi_hz)
110 111 112
			die_nmi("BUG: NMI Watchdog detected LOCKUP",
				regs, panic_on_timeout);
	} else {
113
		__this_cpu_write(last_irq_sum, sum);
R
Rusty Russell 已提交
114
		__this_cpu_write(alert_counter, 0);
115
	}
116
	if (__this_cpu_read(wd_enabled)) {
117
		pcr_ops->write_pic(0, pcr_ops->nmi_picl_value(nmi_hz));
118
		pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_enable);
119
	}
120

121 122
	restore_hardirq_stack(orig_sp);

123
	nmi_exit();
124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149
}

static inline unsigned int get_nmi_count(int cpu)
{
	return cpu_data(cpu).__nmi_count;
}

static __init void nmi_cpu_busy(void *data)
{
	while (endflag == 0)
		mb();
}

static void report_broken_nmi(int cpu, int *prev_nmi_count)
{
	printk(KERN_CONT "\n");

	printk(KERN_WARNING
		"WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n",
			cpu, prev_nmi_count[cpu], get_nmi_count(cpu));

	printk(KERN_WARNING
		"Please report this to bugzilla.kernel.org,\n");
	printk(KERN_WARNING
		"and attach the output of the 'dmesg' command.\n");

150 151
	per_cpu(wd_enabled, cpu) = 0;
	atomic_dec(&nmi_active);
152 153
}

154
void stop_nmi_watchdog(void *unused)
155
{
156 157
	if (!__this_cpu_read(wd_enabled))
		return;
158
	pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable);
159
	__this_cpu_write(wd_enabled, 0);
160
	atomic_dec(&nmi_active);
161 162 163 164 165 166 167
}

static int __init check_nmi_watchdog(void)
{
	unsigned int *prev_nmi_count;
	int cpu, err;

168 169 170
	if (!atomic_read(&nmi_active))
		return 0;

171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
	prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(unsigned int), GFP_KERNEL);
	if (!prev_nmi_count) {
		err = -ENOMEM;
		goto error;
	}

	printk(KERN_INFO "Testing NMI watchdog ... ");

	smp_call_function(nmi_cpu_busy, (void *)&endflag, 0);

	for_each_possible_cpu(cpu)
		prev_nmi_count[cpu] = get_nmi_count(cpu);
	local_irq_enable();
	mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */

	for_each_online_cpu(cpu) {
187 188
		if (!per_cpu(wd_enabled, cpu))
			continue;
189 190 191 192
		if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5)
			report_broken_nmi(cpu, prev_nmi_count);
	}
	endflag = 1;
193
	if (!atomic_read(&nmi_active)) {
194
		kfree(prev_nmi_count);
195
		atomic_set(&nmi_active, -1);
196 197 198 199 200 201 202 203 204 205
		err = -ENODEV;
		goto error;
	}
	printk("OK.\n");

	nmi_hz = 1;

	kfree(prev_nmi_count);
	return 0;
error:
206
	on_each_cpu(stop_nmi_watchdog, NULL, 1);
207 208 209
	return err;
}

210
void start_nmi_watchdog(void *unused)
211
{
212 213 214
	if (__this_cpu_read(wd_enabled))
		return;

215
	__this_cpu_write(wd_enabled, 1);
216 217
	atomic_inc(&nmi_active);

218
	pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable);
219
	pcr_ops->write_pic(0, pcr_ops->nmi_picl_value(nmi_hz));
220

221
	pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_enable);
222 223 224
}

static void nmi_adjust_hz_one(void *unused)
225
{
226
	if (!__this_cpu_read(wd_enabled))
227 228
		return;

229
	pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable);
230
	pcr_ops->write_pic(0, pcr_ops->nmi_picl_value(nmi_hz));
231

232
	pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_enable);
233 234 235 236 237
}

void nmi_adjust_hz(unsigned int new_hz)
{
	nmi_hz = new_hz;
238
	on_each_cpu(nmi_adjust_hz_one, NULL, 1);
239 240 241
}
EXPORT_SYMBOL_GPL(nmi_adjust_hz);

242 243
static int nmi_shutdown(struct notifier_block *nb, unsigned long cmd, void *p)
{
244
	on_each_cpu(stop_nmi_watchdog, NULL, 1);
245 246 247 248 249 250 251
	return 0;
}

static struct notifier_block nmi_reboot_notifier = {
	.notifier_call = nmi_shutdown,
};

252 253
int __init nmi_init(void)
{
254 255
	int err;

256
	on_each_cpu(start_nmi_watchdog, NULL, 1);
257

258 259 260 261
	err = check_nmi_watchdog();
	if (!err) {
		err = register_reboot_notifier(&nmi_reboot_notifier);
		if (err) {
262 263
			on_each_cpu(stop_nmi_watchdog, NULL, 1);
			atomic_set(&nmi_active, -1);
264 265
		}
	}
266

267 268
	nmi_init_done = 1;

269
	return err;
270 271 272 273 274 275 276 277 278 279
}

static int __init setup_nmi_watchdog(char *str)
{
	if (!strncmp(str, "panic", 5))
		panic_on_timeout = 1;

	return 0;
}
__setup("nmi_watchdog=", setup_nmi_watchdog);
280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314

/*
 * sparc specific NMI watchdog enable function.
 * Enables watchdog if it is not enabled already.
 */
int watchdog_nmi_enable(unsigned int cpu)
{
	if (atomic_read(&nmi_active) == -1) {
		pr_warn("NMI watchdog cannot be enabled or disabled\n");
		return -1;
	}

	/*
	 * watchdog thread could start even before nmi_init is called.
	 * Just Return in that case. Let nmi_init finish the init
	 * process first.
	 */
	if (!nmi_init_done)
		return 0;

	smp_call_function_single(cpu, start_nmi_watchdog, NULL, 1);

	return 0;
}
/*
 * sparc specific NMI watchdog disable function.
 * Disables watchdog if it is not disabled already.
 */
void watchdog_nmi_disable(unsigned int cpu)
{
	if (atomic_read(&nmi_active) == -1)
		pr_warn_once("NMI watchdog cannot be enabled or disabled\n");
	else
		smp_call_function_single(cpu, stop_nmi_watchdog, NULL, 1);
}