watchdog.c 22.8 KB
Newer Older
1 2 3 4 5
/*
 * Detect hard and soft lockups on a system
 *
 * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
 *
6 7 8
 * Note: Most of this code is borrowed heavily from the original softlockup
 * detector, so thanks to Ingo for the initial implementation.
 * Some chunks also taken from the old x86-specific nmi watchdog code, thanks
9 10 11
 * to those contributors as well.
 */

12 13
#define pr_fmt(fmt) "NMI watchdog: " fmt

14 15 16 17 18 19
#include <linux/mm.h>
#include <linux/cpu.h>
#include <linux/nmi.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/sysctl.h>
20
#include <linux/smpboot.h>
21
#include <linux/sched/rt.h>
22 23

#include <asm/irq_regs.h>
24
#include <linux/kvm_para.h>
25 26
#include <linux/perf_event.h>

27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
/*
 * The run state of the lockup detectors is controlled by the content of the
 * 'watchdog_enabled' variable. Each lockup detector has its dedicated bit -
 * bit 0 for the hard lockup detector and bit 1 for the soft lockup detector.
 *
 * 'watchdog_user_enabled', 'nmi_watchdog_enabled' and 'soft_watchdog_enabled'
 * are variables that are only used as an 'interface' between the parameters
 * in /proc/sys/kernel and the internal state bits in 'watchdog_enabled'. The
 * 'watchdog_thresh' variable is handled differently because its value is not
 * boolean, and the lockup detectors are 'suspended' while 'watchdog_thresh'
 * is equal zero.
 */
#define NMI_WATCHDOG_ENABLED_BIT   0
#define SOFT_WATCHDOG_ENABLED_BIT  1
#define NMI_WATCHDOG_ENABLED      (1 << NMI_WATCHDOG_ENABLED_BIT)
#define SOFT_WATCHDOG_ENABLED     (1 << SOFT_WATCHDOG_ENABLED_BIT)

#ifdef CONFIG_HARDLOCKUP_DETECTOR
static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED;
#else
static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED;
#endif
int __read_mostly nmi_watchdog_enabled;
int __read_mostly soft_watchdog_enabled;
int __read_mostly watchdog_user_enabled;
52
int __read_mostly watchdog_thresh = 10;
53

54 55 56 57 58 59
#ifdef CONFIG_SMP
int __read_mostly sysctl_softlockup_all_cpu_backtrace;
#else
#define sysctl_softlockup_all_cpu_backtrace 0
#endif

60
static int __read_mostly watchdog_running;
61
static u64 __read_mostly sample_period;
62 63 64 65 66 67

static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
static DEFINE_PER_CPU(bool, softlockup_touch_sync);
static DEFINE_PER_CPU(bool, soft_watchdog_warn);
68 69
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt);
70
static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved);
71
#ifdef CONFIG_HARDLOCKUP_DETECTOR
72 73
static DEFINE_PER_CPU(bool, hard_watchdog_warn);
static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
74 75 76
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
#endif
77
static unsigned long soft_lockup_nmi_warn;
78 79 80 81 82

/* boot commands */
/*
 * Should we panic when a soft-lockup or hard-lockup occurs:
 */
83
#ifdef CONFIG_HARDLOCKUP_DETECTOR
84 85
static int hardlockup_panic =
			CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
86

87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
static bool hardlockup_detector_enabled = true;
/*
 * We may not want to enable hard lockup detection by default in all cases,
 * for example when running the kernel as a guest on a hypervisor. In these
 * cases this function can be called to disable hard lockup detection. This
 * function should only be executed once by the boot processor before the
 * kernel command line parameters are parsed, because otherwise it is not
 * possible to override this in hardlockup_panic_setup().
 */
void watchdog_enable_hardlockup_detector(bool val)
{
	hardlockup_detector_enabled = val;
}

bool watchdog_hardlockup_detector_is_enabled(void)
{
	return hardlockup_detector_enabled;
}

106 107 108 109
static int __init hardlockup_panic_setup(char *str)
{
	if (!strncmp(str, "panic", 5))
		hardlockup_panic = 1;
110 111
	else if (!strncmp(str, "nopanic", 7))
		hardlockup_panic = 0;
112
	else if (!strncmp(str, "0", 1))
113
		watchdog_user_enabled = 0;
114 115 116 117 118 119 120 121
	else if (!strncmp(str, "1", 1) || !strncmp(str, "2", 1)) {
		/*
		 * Setting 'nmi_watchdog=1' or 'nmi_watchdog=2' (legacy option)
		 * has the same effect.
		 */
		watchdog_user_enabled = 1;
		watchdog_enable_hardlockup_detector(true);
	}
122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
	return 1;
}
__setup("nmi_watchdog=", hardlockup_panic_setup);
#endif

unsigned int __read_mostly softlockup_panic =
			CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;

static int __init softlockup_panic_setup(char *str)
{
	softlockup_panic = simple_strtoul(str, NULL, 0);

	return 1;
}
__setup("softlockup_panic=", softlockup_panic_setup);

static int __init nowatchdog_setup(char *str)
{
140
	watchdog_user_enabled = 0;
141 142 143 144 145 146 147
	return 1;
}
__setup("nowatchdog", nowatchdog_setup);

/* deprecated */
static int __init nosoftlockup_setup(char *str)
{
148
	watchdog_user_enabled = 0;
149 150 151 152
	return 1;
}
__setup("nosoftlockup", nosoftlockup_setup);
/*  */
153 154 155 156 157 158 159 160 161
#ifdef CONFIG_SMP
static int __init softlockup_all_cpu_backtrace_setup(char *str)
{
	sysctl_softlockup_all_cpu_backtrace =
		!!simple_strtol(str, NULL, 0);
	return 1;
}
__setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup);
#endif
162

163 164 165 166 167 168 169
/*
 * Hard-lockup warnings should be triggered after just a few seconds. Soft-
 * lockups can have false positives under extreme conditions. So we generally
 * want a higher threshold for soft lockups than for hard lockups. So we couple
 * the thresholds with a factor: we make the soft threshold twice the amount of
 * time the hard threshold is.
 */
170
static int get_softlockup_thresh(void)
171 172 173
{
	return watchdog_thresh * 2;
}
174 175 176 177 178 179

/*
 * Returns seconds, approximately.  We don't need nanosecond
 * resolution, and we don't need to waste time with a big divide when
 * 2^30ns == 1.074s.
 */
180
static unsigned long get_timestamp(void)
181
{
182
	return running_clock() >> 30LL;  /* 2^30 ~= 10^9 */
183 184
}

185
static void set_sample_period(void)
186 187
{
	/*
188
	 * convert watchdog_thresh from seconds to ns
189 190 191 192
	 * the divide by 5 is to give hrtimer several chances (two
	 * or three with the current relation between the soft
	 * and hard thresholds) to increment before the
	 * hardlockup detector generates a warning
193
	 */
194
	sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5);
195 196 197 198 199
}

/* Commands for resetting the watchdog */
static void __touch_watchdog(void)
{
200
	__this_cpu_write(watchdog_touch_ts, get_timestamp());
201 202
}

203
void touch_softlockup_watchdog(void)
204
{
205 206 207 208 209
	/*
	 * Preemption can be enabled.  It doesn't matter which CPU's timestamp
	 * gets zeroed here, so use the raw_ operation.
	 */
	raw_cpu_write(watchdog_touch_ts, 0);
210
}
211
EXPORT_SYMBOL(touch_softlockup_watchdog);
212

213
void touch_all_softlockup_watchdogs(void)
214 215 216 217 218 219 220 221 222 223 224 225
{
	int cpu;

	/*
	 * this is done lockless
	 * do we care if a 0 races with a timestamp?
	 * all it means is the softlock check starts one cycle later
	 */
	for_each_online_cpu(cpu)
		per_cpu(watchdog_touch_ts, cpu) = 0;
}

226
#ifdef CONFIG_HARDLOCKUP_DETECTOR
227 228
void touch_nmi_watchdog(void)
{
229 230 231 232 233 234 235
	/*
	 * Using __raw here because some code paths have
	 * preemption enabled.  If preemption is enabled
	 * then interrupts should be enabled too, in which
	 * case we shouldn't have to worry about the watchdog
	 * going off.
	 */
236
	raw_cpu_write(watchdog_nmi_touch, true);
237
	touch_softlockup_watchdog();
238 239 240
}
EXPORT_SYMBOL(touch_nmi_watchdog);

241 242
#endif

243 244
void touch_softlockup_watchdog_sync(void)
{
245 246
	__this_cpu_write(softlockup_touch_sync, true);
	__this_cpu_write(watchdog_touch_ts, 0);
247 248
}

249
#ifdef CONFIG_HARDLOCKUP_DETECTOR
250
/* watchdog detector functions */
251
static int is_hardlockup(void)
252
{
253
	unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
254

255
	if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
256 257
		return 1;

258
	__this_cpu_write(hrtimer_interrupts_saved, hrint);
259 260 261 262
	return 0;
}
#endif

263
static int is_softlockup(unsigned long touch_ts)
264
{
265
	unsigned long now = get_timestamp();
266 267

	/* Warn about unreasonable delays: */
268
	if (time_after(now, touch_ts + get_softlockup_thresh()))
269 270 271 272 273
		return now - touch_ts;

	return 0;
}

274
#ifdef CONFIG_HARDLOCKUP_DETECTOR
275

276 277 278 279 280 281 282 283 284
static struct perf_event_attr wd_hw_attr = {
	.type		= PERF_TYPE_HARDWARE,
	.config		= PERF_COUNT_HW_CPU_CYCLES,
	.size		= sizeof(struct perf_event_attr),
	.pinned		= 1,
	.disabled	= 1,
};

/* Callback function for perf event subsystem */
285
static void watchdog_overflow_callback(struct perf_event *event,
286 287 288
		 struct perf_sample_data *data,
		 struct pt_regs *regs)
{
289 290 291
	/* Ensure the watchdog never gets throttled */
	event->hw.interrupts = 0;

292 293
	if (__this_cpu_read(watchdog_nmi_touch) == true) {
		__this_cpu_write(watchdog_nmi_touch, false);
294 295 296 297 298 299 300 301 302
		return;
	}

	/* check for a hardlockup
	 * This is done by making sure our timer interrupt
	 * is incrementing.  The timer interrupt should have
	 * fired multiple times before we overflow'd.  If it hasn't
	 * then this is a good indication the cpu is stuck
	 */
303 304 305
	if (is_hardlockup()) {
		int this_cpu = smp_processor_id();

306
		/* only print hardlockups once */
307
		if (__this_cpu_read(hard_watchdog_warn) == true)
308 309 310
			return;

		if (hardlockup_panic)
311 312
			panic("Watchdog detected hard LOCKUP on cpu %d",
			      this_cpu);
313
		else
314 315
			WARN(1, "Watchdog detected hard LOCKUP on cpu %d",
			     this_cpu);
316

317
		__this_cpu_write(hard_watchdog_warn, true);
318 319 320
		return;
	}

321
	__this_cpu_write(hard_watchdog_warn, false);
322 323
	return;
}
324 325
#endif /* CONFIG_HARDLOCKUP_DETECTOR */

326 327
static void watchdog_interrupt_count(void)
{
328
	__this_cpu_inc(hrtimer_interrupts);
329
}
330 331 332

static int watchdog_nmi_enable(unsigned int cpu);
static void watchdog_nmi_disable(unsigned int cpu);
333 334 335 336

/* watchdog kicker functions */
static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
{
337
	unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts);
338 339
	struct pt_regs *regs = get_irq_regs();
	int duration;
340
	int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;
341 342 343 344 345

	/* kick the hardlockup detector */
	watchdog_interrupt_count();

	/* kick the softlockup detector */
346
	wake_up_process(__this_cpu_read(softlockup_watchdog));
347 348

	/* .. and repeat */
349
	hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
350 351

	if (touch_ts == 0) {
352
		if (unlikely(__this_cpu_read(softlockup_touch_sync))) {
353 354 355 356
			/*
			 * If the time stamp was touched atomically
			 * make sure the scheduler tick is up to date.
			 */
357
			__this_cpu_write(softlockup_touch_sync, false);
358 359
			sched_clock_tick();
		}
360 361 362

		/* Clear the guest paused flag on watchdog reset */
		kvm_check_and_clear_guest_paused();
363 364 365 366 367 368 369 370 371 372
		__touch_watchdog();
		return HRTIMER_RESTART;
	}

	/* check for a softlockup
	 * This is done by making sure a high priority task is
	 * being scheduled.  The task touches the watchdog to
	 * indicate it is getting cpu time.  If it hasn't then
	 * this is a good indication some task is hogging the cpu
	 */
373
	duration = is_softlockup(touch_ts);
374
	if (unlikely(duration)) {
375 376 377 378 379 380 381 382
		/*
		 * If a virtual machine is stopped by the host it can look to
		 * the watchdog like a soft lockup, check to see if the host
		 * stopped the vm before we issue the warning
		 */
		if (kvm_check_and_clear_guest_paused())
			return HRTIMER_RESTART;

383
		/* only warn once */
384 385 386 387 388 389 390 391 392 393 394 395 396 397
		if (__this_cpu_read(soft_watchdog_warn) == true) {
			/*
			 * When multiple processes are causing softlockups the
			 * softlockup detector only warns on the first one
			 * because the code relies on a full quiet cycle to
			 * re-arm.  The second process prevents the quiet cycle
			 * and never gets reported.  Use task pointers to detect
			 * this.
			 */
			if (__this_cpu_read(softlockup_task_ptr_saved) !=
			    current) {
				__this_cpu_write(soft_watchdog_warn, false);
				__touch_watchdog();
			}
398
			return HRTIMER_RESTART;
399
		}
400

401 402 403 404 405 406 407 408 409 410 411
		if (softlockup_all_cpu_backtrace) {
			/* Prevent multiple soft-lockup reports if one cpu is already
			 * engaged in dumping cpu back traces
			 */
			if (test_and_set_bit(0, &soft_lockup_nmi_warn)) {
				/* Someone else will report us. Let's give up */
				__this_cpu_write(soft_watchdog_warn, true);
				return HRTIMER_RESTART;
			}
		}

412
		pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
413
			smp_processor_id(), duration,
414
			current->comm, task_pid_nr(current));
415
		__this_cpu_write(softlockup_task_ptr_saved, current);
416 417 418 419 420 421 422
		print_modules();
		print_irqtrace_events(current);
		if (regs)
			show_regs(regs);
		else
			dump_stack();

423 424 425 426 427 428 429 430 431 432 433
		if (softlockup_all_cpu_backtrace) {
			/* Avoid generating two back traces for current
			 * given that one is already made above
			 */
			trigger_allbutself_cpu_backtrace();

			clear_bit(0, &soft_lockup_nmi_warn);
			/* Barrier to sync with other cpus */
			smp_mb__after_atomic();
		}

J
Josh Hunt 已提交
434
		add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
435 436
		if (softlockup_panic)
			panic("softlockup: hung tasks");
437
		__this_cpu_write(soft_watchdog_warn, true);
438
	} else
439
		__this_cpu_write(soft_watchdog_warn, false);
440 441 442 443

	return HRTIMER_RESTART;
}

444 445 446
static void watchdog_set_prio(unsigned int policy, unsigned int prio)
{
	struct sched_param param = { .sched_priority = prio };
447

448 449 450 451
	sched_setscheduler(current, policy, &param);
}

static void watchdog_enable(unsigned int cpu)
452
{
453
	struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer);
454

455 456 457 458
	/* kick off the timer for the hardlockup detector */
	hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
	hrtimer->function = watchdog_timer_fn;

459 460
	/* Enable the perf event */
	watchdog_nmi_enable(cpu);
461 462

	/* done here because hrtimer_start can only pin to smp_processor_id() */
463
	hrtimer_start(hrtimer, ns_to_ktime(sample_period),
464 465
		      HRTIMER_MODE_REL_PINNED);

466 467 468 469
	/* initialize timestamp */
	watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1);
	__touch_watchdog();
}
470

471 472
static void watchdog_disable(unsigned int cpu)
{
473
	struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer);
474

475 476 477 478
	watchdog_set_prio(SCHED_NORMAL, 0);
	hrtimer_cancel(hrtimer);
	/* disable the perf event */
	watchdog_nmi_disable(cpu);
479 480
}

481 482 483 484 485
static void watchdog_cleanup(unsigned int cpu, bool online)
{
	watchdog_disable(cpu);
}

486 487 488 489 490 491 492 493 494
static int watchdog_should_run(unsigned int cpu)
{
	return __this_cpu_read(hrtimer_interrupts) !=
		__this_cpu_read(soft_lockup_hrtimer_cnt);
}

/*
 * The watchdog thread function - touches the timestamp.
 *
495
 * It only runs once every sample_period seconds (4 seconds by
496 497 498 499 500 501 502 503 504 505
 * default) to reset the softlockup timestamp. If this gets delayed
 * for more than 2*watchdog_thresh seconds then the debug-printout
 * triggers in watchdog_timer_fn().
 */
static void watchdog(unsigned int cpu)
{
	__this_cpu_write(soft_lockup_hrtimer_cnt,
			 __this_cpu_read(hrtimer_interrupts));
	__touch_watchdog();
}
506

507
#ifdef CONFIG_HARDLOCKUP_DETECTOR
508 509 510 511 512 513 514
/*
 * People like the simple clean cpu node info on boot.
 * Reduce the watchdog noise by only printing messages
 * that are different from what cpu0 displayed.
 */
static unsigned long cpu0_err;

515
static int watchdog_nmi_enable(unsigned int cpu)
516 517 518 519
{
	struct perf_event_attr *wd_attr;
	struct perf_event *event = per_cpu(watchdog_ev, cpu);

520 521 522 523 524 525 526 527 528
	/*
	 * Some kernels need to default hard lockup detection to
	 * 'disabled', for example a guest on a hypervisor.
	 */
	if (!watchdog_hardlockup_detector_is_enabled()) {
		event = ERR_PTR(-ENOENT);
		goto handle_err;
	}

529 530 531 532 533 534 535 536 537
	/* is it already setup and enabled? */
	if (event && event->state > PERF_EVENT_STATE_OFF)
		goto out;

	/* it is setup but not enabled */
	if (event != NULL)
		goto out_enable;

	wd_attr = &wd_hw_attr;
538
	wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
539 540

	/* Try to register using hardware perf events */
541
	event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL);
542

543
handle_err:
544 545 546 547
	/* save cpu0 error for future comparision */
	if (cpu == 0 && IS_ERR(event))
		cpu0_err = PTR_ERR(event);

548
	if (!IS_ERR(event)) {
549 550 551
		/* only print for cpu0 or different than cpu0 */
		if (cpu == 0 || cpu0_err)
			pr_info("enabled on all CPUs, permanently consumes one hw-PMU counter.\n");
552 553 554
		goto out_save;
	}

555 556 557
	/* skip displaying the same error again */
	if (cpu > 0 && (PTR_ERR(event) == cpu0_err))
		return PTR_ERR(event);
558 559 560

	/* vary the KERN level based on the returned errno */
	if (PTR_ERR(event) == -EOPNOTSUPP)
561
		pr_info("disabled (cpu%i): not supported (no LAPIC?)\n", cpu);
562
	else if (PTR_ERR(event) == -ENOENT)
563
		pr_warn("disabled (cpu%i): hardware events not enabled\n",
564
			 cpu);
565
	else
566 567
		pr_err("disabled (cpu%i): unable to create perf event: %ld\n",
			cpu, PTR_ERR(event));
568
	return PTR_ERR(event);
569 570 571 572 573 574 575 576 577 578

	/* success path */
out_save:
	per_cpu(watchdog_ev, cpu) = event;
out_enable:
	perf_event_enable(per_cpu(watchdog_ev, cpu));
out:
	return 0;
}

579
static void watchdog_nmi_disable(unsigned int cpu)
580 581 582 583 584 585 586 587 588 589
{
	struct perf_event *event = per_cpu(watchdog_ev, cpu);

	if (event) {
		perf_event_disable(event);
		per_cpu(watchdog_ev, cpu) = NULL;

		/* should be in cleanup, but blocks oprofile */
		perf_event_release_kernel(event);
	}
590 591 592 593
	if (cpu == 0) {
		/* watchdog_nmi_enable() expects this to be zero initially. */
		cpu0_err = 0;
	}
594 595
}
#else
596 597
static int watchdog_nmi_enable(unsigned int cpu) { return 0; }
static void watchdog_nmi_disable(unsigned int cpu) { return; }
598
#endif /* CONFIG_HARDLOCKUP_DETECTOR */
599

600 601 602 603 604 605 606 607 608 609 610
static struct smp_hotplug_thread watchdog_threads = {
	.store			= &softlockup_watchdog,
	.thread_should_run	= watchdog_should_run,
	.thread_fn		= watchdog,
	.thread_comm		= "watchdog/%u",
	.setup			= watchdog_enable,
	.cleanup		= watchdog_cleanup,
	.park			= watchdog_disable,
	.unpark			= watchdog_enable,
};

611 612
static void restart_watchdog_hrtimer(void *info)
{
613
	struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer);
614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638
	int ret;

	/*
	 * No need to cancel and restart hrtimer if it is currently executing
	 * because it will reprogram itself with the new period now.
	 * We should never see it unqueued here because we are running per-cpu
	 * with interrupts disabled.
	 */
	ret = hrtimer_try_to_cancel(hrtimer);
	if (ret == 1)
		hrtimer_start(hrtimer, ns_to_ktime(sample_period),
				HRTIMER_MODE_REL_PINNED);
}

static void update_timers(int cpu)
{
	/*
	 * Make sure that perf event counter will adopt to a new
	 * sampling period. Updating the sampling period directly would
	 * be much nicer but we do not have an API for that now so
	 * let's use a big hammer.
	 * Hrtimer will adopt the new period on the next tick but this
	 * might be late already so we have to restart the timer as well.
	 */
	watchdog_nmi_disable(cpu);
639
	smp_call_function_single(cpu, restart_watchdog_hrtimer, NULL, 1);
640 641 642 643 644 645 646 647 648 649 650 651 652 653
	watchdog_nmi_enable(cpu);
}

static void update_timers_all_cpus(void)
{
	int cpu;

	get_online_cpus();
	for_each_online_cpu(cpu)
		update_timers(cpu);
	put_online_cpus();
}

static int watchdog_enable_all_cpus(bool sample_period_changed)
654
{
655
	int err = 0;
656

657
	if (!watchdog_running) {
658 659 660 661
		err = smpboot_register_percpu_thread(&watchdog_threads);
		if (err)
			pr_err("Failed to create watchdog threads, disabled\n");
		else
662
			watchdog_running = 1;
663 664
	} else if (sample_period_changed) {
		update_timers_all_cpus();
665
	}
666 667

	return err;
668 669
}

670 671 672
/* prepare/enable/disable routines */
/* sysctl functions */
#ifdef CONFIG_SYSCTL
673 674
static void watchdog_disable_all_cpus(void)
{
675 676
	if (watchdog_running) {
		watchdog_running = 0;
677
		smpboot_unregister_percpu_thread(&watchdog_threads);
678
	}
679 680
}

681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703
/*
 * Update the run state of the lockup detectors.
 */
static int proc_watchdog_update(void)
{
	int err = 0;

	/*
	 * Watchdog threads won't be started if they are already active.
	 * The 'watchdog_running' variable in watchdog_*_all_cpus() takes
	 * care of this. If those threads are already active, the sample
	 * period will be updated and the lockup detectors will be enabled
	 * or disabled 'on the fly'.
	 */
	if (watchdog_enabled && watchdog_thresh)
		err = watchdog_enable_all_cpus(true);
	else
		watchdog_disable_all_cpus();

	return err;

}

704 705
static DEFINE_MUTEX(watchdog_proc_mutex);

706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770
/*
 * common function for watchdog, nmi_watchdog and soft_watchdog parameter
 *
 * caller             | table->data points to | 'which' contains the flag(s)
 * -------------------|-----------------------|-----------------------------
 * proc_watchdog      | watchdog_user_enabled | NMI_WATCHDOG_ENABLED or'ed
 *                    |                       | with SOFT_WATCHDOG_ENABLED
 * -------------------|-----------------------|-----------------------------
 * proc_nmi_watchdog  | nmi_watchdog_enabled  | NMI_WATCHDOG_ENABLED
 * -------------------|-----------------------|-----------------------------
 * proc_soft_watchdog | soft_watchdog_enabled | SOFT_WATCHDOG_ENABLED
 */
static int proc_watchdog_common(int which, struct ctl_table *table, int write,
				void __user *buffer, size_t *lenp, loff_t *ppos)
{
	int err, old, new;
	int *watchdog_param = (int *)table->data;

	mutex_lock(&watchdog_proc_mutex);

	/*
	 * If the parameter is being read return the state of the corresponding
	 * bit(s) in 'watchdog_enabled', else update 'watchdog_enabled' and the
	 * run state of the lockup detectors.
	 */
	if (!write) {
		*watchdog_param = (watchdog_enabled & which) != 0;
		err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
	} else {
		err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
		if (err)
			goto out;

		/*
		 * There is a race window between fetching the current value
		 * from 'watchdog_enabled' and storing the new value. During
		 * this race window, watchdog_nmi_enable() can sneak in and
		 * clear the NMI_WATCHDOG_ENABLED bit in 'watchdog_enabled'.
		 * The 'cmpxchg' detects this race and the loop retries.
		 */
		do {
			old = watchdog_enabled;
			/*
			 * If the parameter value is not zero set the
			 * corresponding bit(s), else clear it(them).
			 */
			if (*watchdog_param)
				new = old | which;
			else
				new = old & ~which;
		} while (cmpxchg(&watchdog_enabled, old, new) != old);

		/*
		 * Update the run state of the lockup detectors.
		 * Restore 'watchdog_enabled' on failure.
		 */
		err = proc_watchdog_update();
		if (err)
			watchdog_enabled = old;
	}
out:
	mutex_unlock(&watchdog_proc_mutex);
	return err;
}

771
/*
772
 * proc handler for /proc/sys/kernel/nmi_watchdog,watchdog_thresh
773 774
 */

775 776
int proc_dowatchdog(struct ctl_table *table, int write,
		    void __user *buffer, size_t *lenp, loff_t *ppos)
777
{
778
	int err, old_thresh, old_enabled;
779
	bool old_hardlockup;
780

781
	mutex_lock(&watchdog_proc_mutex);
782
	old_thresh = ACCESS_ONCE(watchdog_thresh);
783
	old_enabled = ACCESS_ONCE(watchdog_user_enabled);
784
	old_hardlockup = watchdog_hardlockup_detector_is_enabled();
785

786 787
	err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
	if (err || !write)
788
		goto out;
789

790
	set_sample_period();
791 792
	/*
	 * Watchdog threads shouldn't be enabled if they are
793
	 * disabled. The 'watchdog_running' variable check in
794 795
	 * watchdog_*_all_cpus() function takes care of this.
	 */
796 797 798 799 800 801 802
	if (watchdog_user_enabled && watchdog_thresh) {
		/*
		 * Prevent a change in watchdog_thresh accidentally overriding
		 * the enablement of the hardlockup detector.
		 */
		if (watchdog_user_enabled != old_enabled)
			watchdog_enable_hardlockup_detector(true);
803
		err = watchdog_enable_all_cpus(old_thresh != watchdog_thresh);
804
	} else
805 806
		watchdog_disable_all_cpus();

807 808 809
	/* Restore old values on failure */
	if (err) {
		watchdog_thresh = old_thresh;
810
		watchdog_user_enabled = old_enabled;
811
		watchdog_enable_hardlockup_detector(old_hardlockup);
812
	}
813 814
out:
	mutex_unlock(&watchdog_proc_mutex);
815
	return err;
816 817 818
}
#endif /* CONFIG_SYSCTL */

819
void __init lockup_detector_init(void)
820
{
821
	set_sample_period();
822

823
	if (watchdog_user_enabled)
824
		watchdog_enable_all_cpus(false);
825
}