tick-common.c 11.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/*
 * linux/kernel/time/tick-common.c
 *
 * This file contains the base functions to manage periodic tick
 * related events.
 *
 * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
 * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
 * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
 *
 * This code is licenced under the GPL version 2. For details see
 * kernel-base/COPYING.
 */
#include <linux/cpu.h>
#include <linux/err.h>
#include <linux/hrtimer.h>
17
#include <linux/interrupt.h>
18 19 20
#include <linux/percpu.h>
#include <linux/profile.h>
#include <linux/sched.h>
21
#include <linux/module.h>
22

23 24
#include <asm/irq_regs.h>

25 26
#include "tick-internal.h"

27 28 29
/*
 * Tick devices
 */
30
DEFINE_PER_CPU(struct tick_device, tick_cpu_device);
31 32 33
/*
 * Tick next event: keeps track of the tick time
 */
34 35
ktime_t tick_next_period;
ktime_t tick_period;
A
Andrew Morton 已提交
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50

/*
 * tick_do_timer_cpu is a timer core internal variable which holds the CPU NR
 * which is responsible for calling do_timer(), i.e. the timekeeping stuff. This
 * variable has two functions:
 *
 * 1) Prevent a thundering herd issue of a gazillion of CPUs trying to grab the
 *    timekeeping lock all at once. Only the CPU which is assigned to do the
 *    update is handling it.
 *
 * 2) Hand off the duty in the NOHZ idle case by setting the value to
 *    TICK_DO_TIMER_NONE, i.e. a non existing CPU. So the next cpu which looks
 *    at it will take over and keep the time keeping alive.  The handover
 *    procedure also covers cpu hotplug.
 */
51
int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT;
52

53 54 55 56 57 58 59 60
/*
 * Debugging: see timer_list.c
 */
struct tick_device *tick_get_device(int cpu)
{
	return &per_cpu(tick_cpu_device, cpu);
}

61 62 63 64 65
/**
 * tick_is_oneshot_available - check for a oneshot capable event device
 */
int tick_is_oneshot_available(void)
{
66
	struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
67

68 69 70 71 72
	if (!dev || !(dev->features & CLOCK_EVT_FEAT_ONESHOT))
		return 0;
	if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
		return 1;
	return tick_broadcast_oneshot_available();
73 74
}

75 76 77 78 79 80
/*
 * Periodic tick
 */
static void tick_periodic(int cpu)
{
	if (tick_do_timer_cpu == cpu) {
81
		write_seqlock(&jiffies_lock);
82 83 84 85 86

		/* Keep track of the next tick event */
		tick_next_period = ktime_add(tick_next_period, tick_period);

		do_timer(1);
87
		write_sequnlock(&jiffies_lock);
88
		update_wall_time();
89 90 91 92 93 94 95 96 97 98 99 100
	}

	update_process_times(user_mode(get_irq_regs()));
	profile_tick(CPU_PROFILING);
}

/*
 * Event handler for periodic ticks
 */
void tick_handle_periodic(struct clock_event_device *dev)
{
	int cpu = smp_processor_id();
101
	ktime_t next = dev->next_event;
102 103 104 105 106 107

	tick_periodic(cpu);

	if (dev->mode != CLOCK_EVT_MODE_ONESHOT)
		return;
	for (;;) {
108 109 110 111 112 113
		/*
		 * Setup the next period for devices, which do not have
		 * periodic mode:
		 */
		next = ktime_add(next, tick_period);

114
		if (!clockevents_program_event(dev, next, false))
115
			return;
116 117 118 119 120 121
		/*
		 * Have to be careful here. If we're in oneshot mode,
		 * before we call tick_periodic() in a loop, we need
		 * to be sure we're using a real hardware clocksource.
		 * Otherwise we could get trapped in an infinite
		 * loop, as the tick_periodic() increments jiffies,
122
		 * which then will increment time, possibly causing
123 124 125 126
		 * the loop to trigger again and again.
		 */
		if (timekeeping_valid_for_hres())
			tick_periodic(cpu);
127 128 129 130 131 132
	}
}

/*
 * Setup the device for a periodic tick
 */
133
void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
134
{
135 136 137 138 139
	tick_set_periodic_handler(dev, broadcast);

	/* Broadcast setup ? */
	if (!tick_device_is_functional(dev))
		return;
140

141 142
	if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) &&
	    !tick_broadcast_oneshot_active()) {
143 144 145 146 147 148
		clockevents_set_mode(dev, CLOCK_EVT_MODE_PERIODIC);
	} else {
		unsigned long seq;
		ktime_t next;

		do {
149
			seq = read_seqbegin(&jiffies_lock);
150
			next = tick_next_period;
151
		} while (read_seqretry(&jiffies_lock, seq));
152 153 154 155

		clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);

		for (;;) {
156
			if (!clockevents_program_event(dev, next, false))
157 158 159 160 161 162 163 164 165 166 167
				return;
			next = ktime_add(next, tick_period);
		}
	}
}

/*
 * Setup the tick device
 */
static void tick_setup_device(struct tick_device *td,
			      struct clock_event_device *newdev, int cpu,
168
			      const struct cpumask *cpumask)
169 170 171 172 173 174 175 176 177 178 179 180
{
	ktime_t next_event;
	void (*handler)(struct clock_event_device *) = NULL;

	/*
	 * First device setup ?
	 */
	if (!td->evtdev) {
		/*
		 * If no cpu took the do_timer update, assign it to
		 * this cpu:
		 */
181
		if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) {
182
			if (!tick_nohz_full_cpu(cpu))
183 184 185
				tick_do_timer_cpu = cpu;
			else
				tick_do_timer_cpu = TICK_DO_TIMER_NONE;
186 187 188 189 190 191 192 193 194 195 196
			tick_next_period = ktime_get();
			tick_period = ktime_set(0, NSEC_PER_SEC / HZ);
		}

		/*
		 * Startup in periodic mode first.
		 */
		td->mode = TICKDEV_MODE_PERIODIC;
	} else {
		handler = td->evtdev->event_handler;
		next_event = td->evtdev->next_event;
197
		td->evtdev->event_handler = clockevents_handle_noop;
198 199 200 201 202 203 204 205
	}

	td->evtdev = newdev;

	/*
	 * When the device is not per cpu, pin the interrupt to the
	 * current cpu:
	 */
206
	if (!cpumask_equal(newdev->cpumask, cpumask))
207
		irq_set_affinity(newdev->irq, cpumask);
208

209 210 211 212
	/*
	 * When global broadcasting is active, check if the current
	 * device is registered as a placeholder for broadcast mode.
	 * This allows us to handle this x86 misfeature in a generic
213 214
	 * way. This function also returns !=0 when we keep the
	 * current active broadcast state for this CPU.
215 216 217 218
	 */
	if (tick_device_uses_broadcast(newdev, cpu))
		return;

219 220
	if (td->mode == TICKDEV_MODE_PERIODIC)
		tick_setup_periodic(newdev, 0);
221 222
	else
		tick_setup_oneshot(newdev, handler, next_event);
223 224
}

225 226
void tick_install_replacement(struct clock_event_device *newdev)
{
227
	struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
228 229 230 231 232 233 234 235
	int cpu = smp_processor_id();

	clockevents_exchange_device(td->evtdev, newdev);
	tick_setup_device(td, newdev, cpu, cpumask_of(cpu));
	if (newdev->features & CLOCK_EVT_FEAT_ONESHOT)
		tick_oneshot_notify();
}

236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262
static bool tick_check_percpu(struct clock_event_device *curdev,
			      struct clock_event_device *newdev, int cpu)
{
	if (!cpumask_test_cpu(cpu, newdev->cpumask))
		return false;
	if (cpumask_equal(newdev->cpumask, cpumask_of(cpu)))
		return true;
	/* Check if irq affinity can be set */
	if (newdev->irq >= 0 && !irq_can_set_affinity(newdev->irq))
		return false;
	/* Prefer an existing cpu local device */
	if (curdev && cpumask_equal(curdev->cpumask, cpumask_of(cpu)))
		return false;
	return true;
}

static bool tick_check_preferred(struct clock_event_device *curdev,
				 struct clock_event_device *newdev)
{
	/* Prefer oneshot capable device */
	if (!(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) {
		if (curdev && (curdev->features & CLOCK_EVT_FEAT_ONESHOT))
			return false;
		if (tick_oneshot_mode_active())
			return false;
	}

263 264 265 266 267 268 269
	/*
	 * Use the higher rated one, but prefer a CPU local device with a lower
	 * rating than a non-CPU local device
	 */
	return !curdev ||
		newdev->rating > curdev->rating ||
	       !cpumask_equal(curdev->cpumask, newdev->cpumask);
270 271
}

272 273 274 275 276 277 278
/*
 * Check whether the new device is a better fit than curdev. curdev
 * can be NULL !
 */
bool tick_check_replacement(struct clock_event_device *curdev,
			    struct clock_event_device *newdev)
{
279
	if (!tick_check_percpu(curdev, newdev, smp_processor_id()))
280 281 282 283 284
		return false;

	return tick_check_preferred(curdev, newdev);
}

285
/*
T
Thomas Gleixner 已提交
286 287
 * Check, if the new registered device should be used. Called with
 * clockevents_lock held and interrupts disabled.
288
 */
289
void tick_check_new_device(struct clock_event_device *newdev)
290 291 292
{
	struct clock_event_device *curdev;
	struct tick_device *td;
293
	int cpu;
294 295

	cpu = smp_processor_id();
296
	if (!cpumask_test_cpu(cpu, newdev->cpumask))
297
		goto out_bc;
298 299 300 301 302

	td = &per_cpu(tick_cpu_device, cpu);
	curdev = td->evtdev;

	/* cpu local device ? */
303 304
	if (!tick_check_percpu(curdev, newdev, cpu))
		goto out_bc;
305

306 307 308
	/* Preference decision */
	if (!tick_check_preferred(curdev, newdev))
		goto out_bc;
309

310 311 312
	if (!try_module_get(newdev->owner))
		return;

313 314
	/*
	 * Replace the eventually existing device by the new
315 316
	 * device. If the current device is the broadcast device, do
	 * not give it back to the clockevents layer !
317
	 */
318
	if (tick_is_broadcast_device(curdev)) {
319
		clockevents_shutdown(curdev);
320 321
		curdev = NULL;
	}
322
	clockevents_exchange_device(curdev, newdev);
323
	tick_setup_device(td, newdev, cpu, cpumask_of(cpu));
324 325
	if (newdev->features & CLOCK_EVT_FEAT_ONESHOT)
		tick_oneshot_notify();
326
	return;
327 328 329 330 331

out_bc:
	/*
	 * Can the new device be used as a broadcast device ?
	 */
332
	tick_install_broadcast_device(newdev);
333 334
}

335 336 337 338 339
/*
 * Transfer the do_timer job away from a dying cpu.
 *
 * Called with interrupts disabled.
 */
340
void tick_handover_do_timer(int *cpup)
341 342 343 344 345 346 347 348 349
{
	if (*cpup == tick_do_timer_cpu) {
		int cpu = cpumask_first(cpu_online_mask);

		tick_do_timer_cpu = (cpu < nr_cpu_ids) ? cpu :
			TICK_DO_TIMER_NONE;
	}
}

350 351 352 353 354 355 356
/*
 * Shutdown an event device on a given cpu:
 *
 * This is called on a life CPU, when a CPU is dead. So we cannot
 * access the hardware device itself.
 * We just set the mode and remove it from the lists.
 */
357
void tick_shutdown(unsigned int *cpup)
358 359 360 361 362 363 364 365 366 367 368 369
{
	struct tick_device *td = &per_cpu(tick_cpu_device, *cpup);
	struct clock_event_device *dev = td->evtdev;

	td->mode = TICKDEV_MODE_PERIODIC;
	if (dev) {
		/*
		 * Prevent that the clock events layer tries to call
		 * the set mode function!
		 */
		dev->mode = CLOCK_EVT_MODE_UNUSED;
		clockevents_exchange_device(dev, NULL);
370
		dev->event_handler = clockevents_handle_noop;
371 372 373 374
		td->evtdev = NULL;
	}
}

375
void tick_suspend(void)
376
{
377
	struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
378

379
	clockevents_shutdown(td->evtdev);
380 381
}

382
void tick_resume(void)
383
{
384
	struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
T
Thomas Gleixner 已提交
385
	int broadcast = tick_resume_broadcast();
386

T
Thomas Gleixner 已提交
387 388 389 390 391 392 393 394
	clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_RESUME);

	if (!broadcast) {
		if (td->mode == TICKDEV_MODE_PERIODIC)
			tick_setup_periodic(td->evtdev, 0);
		else
			tick_resume_oneshot();
	}
395 396
}

397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446
static DEFINE_RAW_SPINLOCK(tick_freeze_lock);
static unsigned int tick_freeze_depth;

/**
 * tick_freeze - Suspend the local tick and (possibly) timekeeping.
 *
 * Check if this is the last online CPU executing the function and if so,
 * suspend timekeeping.  Otherwise suspend the local tick.
 *
 * Call with interrupts disabled.  Must be balanced with %tick_unfreeze().
 * Interrupts must not be enabled before the subsequent %tick_unfreeze().
 */
void tick_freeze(void)
{
	raw_spin_lock(&tick_freeze_lock);

	tick_freeze_depth++;
	if (tick_freeze_depth == num_online_cpus()) {
		timekeeping_suspend();
	} else {
		tick_suspend();
		tick_suspend_broadcast();
	}

	raw_spin_unlock(&tick_freeze_lock);
}

/**
 * tick_unfreeze - Resume the local tick and (possibly) timekeeping.
 *
 * Check if this is the first CPU executing the function and if so, resume
 * timekeeping.  Otherwise resume the local tick.
 *
 * Call with interrupts disabled.  Must be balanced with %tick_freeze().
 * Interrupts must not be enabled after the preceding %tick_freeze().
 */
void tick_unfreeze(void)
{
	raw_spin_lock(&tick_freeze_lock);

	if (tick_freeze_depth == num_online_cpus())
		timekeeping_resume();
	else
		tick_resume();

	tick_freeze_depth--;

	raw_spin_unlock(&tick_freeze_lock);
}

447 448 449 450 451
/**
 * tick_init - initialize the tick control
 */
void __init tick_init(void)
{
452
	tick_broadcast_init();
453
	tick_nohz_init();
454
}