softirq.c 19.0 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5
/*
 *	linux/kernel/softirq.c
 *
 *	Copyright (C) 1992 Linus Torvalds
 *
P
Pavel Machek 已提交
6 7 8
 *	Distribute under GPLv2.
 *
 *	Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
L
Linus Torvalds 已提交
9 10
 */

11 12
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

13
#include <linux/export.h>
L
Linus Torvalds 已提交
14 15 16 17 18 19 20
#include <linux/kernel_stat.h>
#include <linux/interrupt.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/notifier.h>
#include <linux/percpu.h>
#include <linux/cpu.h>
21
#include <linux/freezer.h>
L
Linus Torvalds 已提交
22 23
#include <linux/kthread.h>
#include <linux/rcupdate.h>
24
#include <linux/ftrace.h>
25
#include <linux/smp.h>
26
#include <linux/smpboot.h>
27
#include <linux/tick.h>
28
#include <linux/irq.h>
29 30

#define CREATE_TRACE_POINTS
31
#include <trace/events/irq.h>
L
Linus Torvalds 已提交
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55

/*
   - No shared variables, all the data are CPU local.
   - If a softirq needs serialization, let it serialize itself
     by its own spinlocks.
   - Even if softirq is serialized, only local cpu is marked for
     execution. Hence, we get something sort of weak cpu binding.
     Though it is still not clear, will it result in better locality
     or will not.

   Examples:
   - NET RX softirq. It is multithreaded and does not require
     any global serialization.
   - NET TX softirq. It kicks software netdevice queues, hence
     it is logically serialized per device, but this serialization
     is invisible to common code.
   - Tasklets: serialized wrt itself.
 */

#ifndef __ARCH_IRQ_STAT
irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
EXPORT_SYMBOL(irq_stat);
#endif

A
Alexey Dobriyan 已提交
56
static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
L
Linus Torvalds 已提交
57

58
DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
L
Linus Torvalds 已提交
59

60
const char * const softirq_to_name[NR_SOFTIRQS] = {
61
	"HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "IRQ_POLL",
62
	"TASKLET", "SCHED", "HRTIMER", "RCU"
63 64
};

L
Linus Torvalds 已提交
65 66 67 68 69 70
/*
 * we cannot loop indefinitely here to avoid userspace starvation,
 * but we also don't want to introduce a worst case 1/HZ latency
 * to the pending events, so lets the scheduler to balance
 * the softirq load for us.
 */
71
static void wakeup_softirqd(void)
L
Linus Torvalds 已提交
72 73
{
	/* Interrupts are disabled: no need to stop preemption */
74
	struct task_struct *tsk = __this_cpu_read(ksoftirqd);
L
Linus Torvalds 已提交
75 76 77 78 79

	if (tsk && tsk->state != TASK_RUNNING)
		wake_up_process(tsk);
}

E
Eric Dumazet 已提交
80 81 82 83 84 85 86 87 88 89 90
/*
 * If ksoftirqd is scheduled, we do not want to process pending softirqs
 * right now. Let ksoftirqd handle this at its own rate, to get fairness.
 */
static bool ksoftirqd_running(void)
{
	struct task_struct *tsk = __this_cpu_read(ksoftirqd);

	return tsk && (tsk->state == TASK_RUNNING);
}

91 92 93 94 95 96 97 98 99 100
/*
 * preempt_count and SOFTIRQ_OFFSET usage:
 * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving
 *   softirq processing.
 * - preempt_count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET)
 *   on local_bh_disable or local_bh_enable.
 * This lets us distinguish between whether we are currently processing
 * softirq and whether we just have bh disabled.
 */

101 102 103 104
/*
 * This one is for softirq.c-internal use,
 * where hardirqs are disabled legitimately:
 */
105
#ifdef CONFIG_TRACE_IRQFLAGS
106
void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
107 108 109 110 111 112
{
	unsigned long flags;

	WARN_ON_ONCE(in_irq());

	raw_local_irq_save(flags);
113
	/*
114
	 * The preempt tracer hooks into preempt_count_add and will break
115 116 117 118 119
	 * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET
	 * is set and before current->softirq_enabled is cleared.
	 * We must manually increment preempt_count here and manually
	 * call the trace_preempt_off later.
	 */
120
	__preempt_count_add(cnt);
121 122 123
	/*
	 * Were softirqs turned off above:
	 */
124
	if (softirq_count() == (cnt & SOFTIRQ_MASK))
125 126
		trace_softirqs_off(ip);
	raw_local_irq_restore(flags);
127

128 129
	if (preempt_count() == cnt) {
#ifdef CONFIG_DEBUG_PREEMPT
130
		current->preempt_disable_ip = get_lock_parent_ip();
131
#endif
132
		trace_preempt_off(CALLER_ADDR0, get_lock_parent_ip());
133
	}
134
}
135
EXPORT_SYMBOL(__local_bh_disable_ip);
136
#endif /* CONFIG_TRACE_IRQFLAGS */
137

138 139 140 141
static void __local_bh_enable(unsigned int cnt)
{
	WARN_ON_ONCE(!irqs_disabled());

142
	if (softirq_count() == (cnt & SOFTIRQ_MASK))
D
Davidlohr Bueso 已提交
143
		trace_softirqs_on(_RET_IP_);
144
	preempt_count_sub(cnt);
145 146
}

147 148 149 150 151 152 153
/*
 * Special-case - softirqs can safely be enabled in
 * cond_resched_softirq(), or by __do_softirq(),
 * without processing still-pending softirqs:
 */
void _local_bh_enable(void)
{
154
	WARN_ON_ONCE(in_irq());
155
	__local_bh_enable(SOFTIRQ_DISABLE_OFFSET);
156 157 158
}
EXPORT_SYMBOL(_local_bh_enable);

159
void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
160
{
161
	WARN_ON_ONCE(in_irq() || irqs_disabled());
162
#ifdef CONFIG_TRACE_IRQFLAGS
163
	local_irq_disable();
164
#endif
165 166 167
	/*
	 * Are softirqs going to be turned on now:
	 */
168
	if (softirq_count() == SOFTIRQ_DISABLE_OFFSET)
169
		trace_softirqs_on(ip);
170 171 172
	/*
	 * Keep preemption disabled until we are done with
	 * softirq processing:
173
	 */
174
	preempt_count_sub(cnt - 1);
175

176 177 178 179 180
	if (unlikely(!in_interrupt() && local_softirq_pending())) {
		/*
		 * Run softirq if any pending. And do it in its own stack
		 * as we may be calling this deep in a task call stack already.
		 */
181
		do_softirq();
182
	}
183

184
	preempt_count_dec();
185
#ifdef CONFIG_TRACE_IRQFLAGS
186
	local_irq_enable();
187
#endif
188 189
	preempt_check_resched();
}
190
EXPORT_SYMBOL(__local_bh_enable_ip);
191

L
Linus Torvalds 已提交
192
/*
193 194 195 196 197 198
 * We restart softirq processing for at most MAX_SOFTIRQ_RESTART times,
 * but break the loop if need_resched() is set or after 2 ms.
 * The MAX_SOFTIRQ_TIME provides a nice upper bound in most cases, but in
 * certain cases, such as stop_machine(), jiffies may cease to
 * increment and so we need the MAX_SOFTIRQ_RESTART limit as
 * well to make sure we eventually return from this method.
L
Linus Torvalds 已提交
199
 *
E
Eric Dumazet 已提交
200
 * These limits have been established via experimentation.
L
Linus Torvalds 已提交
201 202 203 204
 * The two things to balance is latency against fairness -
 * we want to handle softirqs as soon as possible, but they
 * should not be able to lock up the box.
 */
E
Eric Dumazet 已提交
205
#define MAX_SOFTIRQ_TIME  msecs_to_jiffies(2)
206
#define MAX_SOFTIRQ_RESTART 10
L
Linus Torvalds 已提交
207

208 209 210 211 212 213 214
#ifdef CONFIG_TRACE_IRQFLAGS
/*
 * When we run softirqs from irq_exit() and thus on the hardirq stack we need
 * to keep the lockdep irq context tracking as tight as possible in order to
 * not miss-qualify lock contexts and miss possible deadlocks.
 */

215
static inline bool lockdep_softirq_start(void)
216
{
217
	bool in_hardirq = false;
218

219 220
	if (trace_hardirq_context(current)) {
		in_hardirq = true;
221
		trace_hardirq_exit();
222 223
	}

224
	lockdep_softirq_enter();
225 226

	return in_hardirq;
227 228
}

229
static inline void lockdep_softirq_end(bool in_hardirq)
230 231
{
	lockdep_softirq_exit();
232 233

	if (in_hardirq)
234 235 236
		trace_hardirq_enter();
}
#else
237 238
static inline bool lockdep_softirq_start(void) { return false; }
static inline void lockdep_softirq_end(bool in_hardirq) { }
239 240
#endif

241
asmlinkage __visible void __softirq_entry __do_softirq(void)
L
Linus Torvalds 已提交
242
{
E
Eric Dumazet 已提交
243
	unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
244
	unsigned long old_flags = current->flags;
245
	int max_restart = MAX_SOFTIRQ_RESTART;
246
	struct softirq_action *h;
247
	bool in_hardirq;
248
	__u32 pending;
249
	int softirq_bit;
250 251 252 253 254 255 256

	/*
	 * Mask out PF_MEMALLOC s current task context is borrowed for the
	 * softirq. A softirq handled such as network RX might set PF_MEMALLOC
	 * again if the socket is related to swap
	 */
	current->flags &= ~PF_MEMALLOC;
L
Linus Torvalds 已提交
257 258

	pending = local_softirq_pending();
259
	account_irq_enter_time(current);
260

261
	__local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
262
	in_hardirq = lockdep_softirq_start();
L
Linus Torvalds 已提交
263 264 265

restart:
	/* Reset the pending bitmask before enabling irqs */
266
	set_softirq_pending(0);
L
Linus Torvalds 已提交
267

268
	local_irq_enable();
L
Linus Torvalds 已提交
269 270 271

	h = softirq_vec;

272 273 274 275 276 277 278 279 280 281 282 283 284 285 286
	while ((softirq_bit = ffs(pending))) {
		unsigned int vec_nr;
		int prev_count;

		h += softirq_bit - 1;

		vec_nr = h - softirq_vec;
		prev_count = preempt_count();

		kstat_incr_softirqs_this_cpu(vec_nr);

		trace_softirq_entry(vec_nr);
		h->action(h);
		trace_softirq_exit(vec_nr);
		if (unlikely(prev_count != preempt_count())) {
287
			pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
288 289 290
			       vec_nr, softirq_to_name[vec_nr], h->action,
			       prev_count, preempt_count());
			preempt_count_set(prev_count);
L
Linus Torvalds 已提交
291 292
		}
		h++;
293 294
		pending >>= softirq_bit;
	}
L
Linus Torvalds 已提交
295

296
	rcu_bh_qs();
297
	local_irq_disable();
L
Linus Torvalds 已提交
298 299

	pending = local_softirq_pending();
E
Eric Dumazet 已提交
300
	if (pending) {
301 302
		if (time_before(jiffies, end) && !need_resched() &&
		    --max_restart)
E
Eric Dumazet 已提交
303
			goto restart;
L
Linus Torvalds 已提交
304 305

		wakeup_softirqd();
E
Eric Dumazet 已提交
306
	}
L
Linus Torvalds 已提交
307

308
	lockdep_softirq_end(in_hardirq);
309
	account_irq_exit_time(current);
310
	__local_bh_enable(SOFTIRQ_OFFSET);
311
	WARN_ON_ONCE(in_interrupt());
312
	tsk_restore_flags(current, old_flags, PF_MEMALLOC);
L
Linus Torvalds 已提交
313 314
}

315
asmlinkage __visible void do_softirq(void)
L
Linus Torvalds 已提交
316 317 318 319 320 321 322 323 324 325 326
{
	__u32 pending;
	unsigned long flags;

	if (in_interrupt())
		return;

	local_irq_save(flags);

	pending = local_softirq_pending();

E
Eric Dumazet 已提交
327
	if (pending && !ksoftirqd_running())
328
		do_softirq_own_stack();
L
Linus Torvalds 已提交
329 330 331 332

	local_irq_restore(flags);
}

I
Ingo Molnar 已提交
333 334 335 336 337
/*
 * Enter an interrupt context.
 */
void irq_enter(void)
{
338
	rcu_irq_enter();
339
	if (is_idle_task(current) && !in_interrupt()) {
340 341 342 343 344
		/*
		 * Prevent raise_softirq from needlessly waking up ksoftirqd
		 * here, as softirq will be serviced on return from interrupt.
		 */
		local_bh_disable();
345
		tick_irq_enter();
346 347 348 349
		_local_bh_enable();
	}

	__irq_enter();
I
Ingo Molnar 已提交
350 351
}

352 353
static inline void invoke_softirq(void)
{
E
Eric Dumazet 已提交
354 355 356
	if (ksoftirqd_running())
		return;

357
	if (!force_irqthreads) {
358
#ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK
359 360 361
		/*
		 * We can safely execute softirq on the current stack if
		 * it is the irq stack, because it should be near empty
362 363 364 365 366 367 368 369
		 * at this stage.
		 */
		__do_softirq();
#else
		/*
		 * Otherwise, irq_exit() is called on the task stack that can
		 * be potentially deep already. So call softirq in its own stack
		 * to prevent from any overrun.
370
		 */
371
		do_softirq_own_stack();
372
#endif
373
	} else {
374
		wakeup_softirqd();
375
	}
376
}
L
Linus Torvalds 已提交
377

378 379 380 381 382 383 384 385 386 387 388 389 390
static inline void tick_irq_exit(void)
{
#ifdef CONFIG_NO_HZ_COMMON
	int cpu = smp_processor_id();

	/* Make sure that timer wheel updates are propagated */
	if ((idle_cpu(cpu) && !need_resched()) || tick_nohz_full_cpu(cpu)) {
		if (!in_interrupt())
			tick_nohz_irq_exit();
	}
#endif
}

L
Linus Torvalds 已提交
391 392 393 394 395
/*
 * Exit an interrupt context. Process softirqs if needed and possible:
 */
void irq_exit(void)
{
396
#ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED
397
	local_irq_disable();
398 399 400 401
#else
	WARN_ON_ONCE(!irqs_disabled());
#endif

402
	account_irq_exit_time(current);
403
	preempt_count_sub(HARDIRQ_OFFSET);
L
Linus Torvalds 已提交
404 405
	if (!in_interrupt() && local_softirq_pending())
		invoke_softirq();
406

407
	tick_irq_exit();
408
	rcu_irq_exit();
409
	trace_hardirq_exit(); /* must be last! */
L
Linus Torvalds 已提交
410 411 412 413 414
}

/*
 * This function must run with irqs disabled!
 */
415
inline void raise_softirq_irqoff(unsigned int nr)
L
Linus Torvalds 已提交
416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431
{
	__raise_softirq_irqoff(nr);

	/*
	 * If we're in an interrupt or softirq, we're done
	 * (this also catches softirq-disabled code). We will
	 * actually run the softirq once we return from
	 * the irq or softirq.
	 *
	 * Otherwise we wake up ksoftirqd to make sure we
	 * schedule the softirq soon.
	 */
	if (!in_interrupt())
		wakeup_softirqd();
}

432
void raise_softirq(unsigned int nr)
L
Linus Torvalds 已提交
433 434 435 436 437 438 439 440
{
	unsigned long flags;

	local_irq_save(flags);
	raise_softirq_irqoff(nr);
	local_irq_restore(flags);
}

441 442 443 444 445 446
void __raise_softirq_irqoff(unsigned int nr)
{
	trace_softirq_raise(nr);
	or_softirq_pending(1UL << nr);
}

447
void open_softirq(int nr, void (*action)(struct softirq_action *))
L
Linus Torvalds 已提交
448 449 450 451
{
	softirq_vec[nr].action = action;
}

452 453 454
/*
 * Tasklets
 */
455
struct tasklet_head {
456 457
	struct tasklet_struct *head;
	struct tasklet_struct **tail;
L
Linus Torvalds 已提交
458 459
};

460 461
static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
L
Linus Torvalds 已提交
462

463
void __tasklet_schedule(struct tasklet_struct *t)
L
Linus Torvalds 已提交
464 465 466 467
{
	unsigned long flags;

	local_irq_save(flags);
468
	t->next = NULL;
469 470
	*__this_cpu_read(tasklet_vec.tail) = t;
	__this_cpu_write(tasklet_vec.tail, &(t->next));
L
Linus Torvalds 已提交
471 472 473 474 475
	raise_softirq_irqoff(TASKLET_SOFTIRQ);
	local_irq_restore(flags);
}
EXPORT_SYMBOL(__tasklet_schedule);

476
void __tasklet_hi_schedule(struct tasklet_struct *t)
L
Linus Torvalds 已提交
477 478 479 480
{
	unsigned long flags;

	local_irq_save(flags);
481
	t->next = NULL;
482 483
	*__this_cpu_read(tasklet_hi_vec.tail) = t;
	__this_cpu_write(tasklet_hi_vec.tail,  &(t->next));
L
Linus Torvalds 已提交
484 485 486 487 488
	raise_softirq_irqoff(HI_SOFTIRQ);
	local_irq_restore(flags);
}
EXPORT_SYMBOL(__tasklet_hi_schedule);

489 490 491 492
void __tasklet_hi_schedule_first(struct tasklet_struct *t)
{
	BUG_ON(!irqs_disabled());

493 494
	t->next = __this_cpu_read(tasklet_hi_vec.head);
	__this_cpu_write(tasklet_hi_vec.head, t);
495 496 497 498
	__raise_softirq_irqoff(HI_SOFTIRQ);
}
EXPORT_SYMBOL(__tasklet_hi_schedule_first);

499
static __latent_entropy void tasklet_action(struct softirq_action *a)
L
Linus Torvalds 已提交
500 501 502 503
{
	struct tasklet_struct *list;

	local_irq_disable();
504 505
	list = __this_cpu_read(tasklet_vec.head);
	__this_cpu_write(tasklet_vec.head, NULL);
506
	__this_cpu_write(tasklet_vec.tail, this_cpu_ptr(&tasklet_vec.head));
L
Linus Torvalds 已提交
507 508 509 510 511 512 513 514 515
	local_irq_enable();

	while (list) {
		struct tasklet_struct *t = list;

		list = list->next;

		if (tasklet_trylock(t)) {
			if (!atomic_read(&t->count)) {
516 517
				if (!test_and_clear_bit(TASKLET_STATE_SCHED,
							&t->state))
L
Linus Torvalds 已提交
518 519 520 521 522 523 524 525 526
					BUG();
				t->func(t->data);
				tasklet_unlock(t);
				continue;
			}
			tasklet_unlock(t);
		}

		local_irq_disable();
527
		t->next = NULL;
528 529
		*__this_cpu_read(tasklet_vec.tail) = t;
		__this_cpu_write(tasklet_vec.tail, &(t->next));
L
Linus Torvalds 已提交
530 531 532 533 534
		__raise_softirq_irqoff(TASKLET_SOFTIRQ);
		local_irq_enable();
	}
}

535
static __latent_entropy void tasklet_hi_action(struct softirq_action *a)
L
Linus Torvalds 已提交
536 537 538 539
{
	struct tasklet_struct *list;

	local_irq_disable();
540 541
	list = __this_cpu_read(tasklet_hi_vec.head);
	__this_cpu_write(tasklet_hi_vec.head, NULL);
542
	__this_cpu_write(tasklet_hi_vec.tail, this_cpu_ptr(&tasklet_hi_vec.head));
L
Linus Torvalds 已提交
543 544 545 546 547 548 549 550 551
	local_irq_enable();

	while (list) {
		struct tasklet_struct *t = list;

		list = list->next;

		if (tasklet_trylock(t)) {
			if (!atomic_read(&t->count)) {
552 553
				if (!test_and_clear_bit(TASKLET_STATE_SCHED,
							&t->state))
L
Linus Torvalds 已提交
554 555 556 557 558 559 560 561 562
					BUG();
				t->func(t->data);
				tasklet_unlock(t);
				continue;
			}
			tasklet_unlock(t);
		}

		local_irq_disable();
563
		t->next = NULL;
564 565
		*__this_cpu_read(tasklet_hi_vec.tail) = t;
		__this_cpu_write(tasklet_hi_vec.tail, &(t->next));
L
Linus Torvalds 已提交
566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584
		__raise_softirq_irqoff(HI_SOFTIRQ);
		local_irq_enable();
	}
}

void tasklet_init(struct tasklet_struct *t,
		  void (*func)(unsigned long), unsigned long data)
{
	t->next = NULL;
	t->state = 0;
	atomic_set(&t->count, 0);
	t->func = func;
	t->data = data;
}
EXPORT_SYMBOL(tasklet_init);

void tasklet_kill(struct tasklet_struct *t)
{
	if (in_interrupt())
585
		pr_notice("Attempt to kill tasklet from interrupt\n");
L
Linus Torvalds 已提交
586 587

	while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
588
		do {
L
Linus Torvalds 已提交
589
			yield();
590
		} while (test_bit(TASKLET_STATE_SCHED, &t->state));
L
Linus Torvalds 已提交
591 592 593 594 595 596
	}
	tasklet_unlock_wait(t);
	clear_bit(TASKLET_STATE_SCHED, &t->state);
}
EXPORT_SYMBOL(tasklet_kill);

597 598 599 600 601
/*
 * tasklet_hrtimer
 */

/*
602 603 604
 * The trampoline is called when the hrtimer expires. It schedules a tasklet
 * to run __tasklet_hrtimer_trampoline() which in turn will call the intended
 * hrtimer callback, but from softirq context.
605 606 607 608 609 610
 */
static enum hrtimer_restart __hrtimer_tasklet_trampoline(struct hrtimer *timer)
{
	struct tasklet_hrtimer *ttimer =
		container_of(timer, struct tasklet_hrtimer, timer);

611 612
	tasklet_hi_schedule(&ttimer->tasklet);
	return HRTIMER_NORESTART;
613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631
}

/*
 * Helper function which calls the hrtimer callback from
 * tasklet/softirq context
 */
static void __tasklet_hrtimer_trampoline(unsigned long data)
{
	struct tasklet_hrtimer *ttimer = (void *)data;
	enum hrtimer_restart restart;

	restart = ttimer->function(&ttimer->timer);
	if (restart != HRTIMER_NORESTART)
		hrtimer_restart(&ttimer->timer);
}

/**
 * tasklet_hrtimer_init - Init a tasklet/hrtimer combo for softirq callbacks
 * @ttimer:	 tasklet_hrtimer which is initialized
L
Lucas De Marchi 已提交
632
 * @function:	 hrtimer callback function which gets called from softirq context
633 634 635 636 637 638 639 640 641 642 643 644 645 646 647
 * @which_clock: clock id (CLOCK_MONOTONIC/CLOCK_REALTIME)
 * @mode:	 hrtimer mode (HRTIMER_MODE_ABS/HRTIMER_MODE_REL)
 */
void tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer,
			  enum hrtimer_restart (*function)(struct hrtimer *),
			  clockid_t which_clock, enum hrtimer_mode mode)
{
	hrtimer_init(&ttimer->timer, which_clock, mode);
	ttimer->timer.function = __hrtimer_tasklet_trampoline;
	tasklet_init(&ttimer->tasklet, __tasklet_hrtimer_trampoline,
		     (unsigned long)ttimer);
	ttimer->function = function;
}
EXPORT_SYMBOL_GPL(tasklet_hrtimer_init);

L
Linus Torvalds 已提交
648 649
void __init softirq_init(void)
{
650 651 652 653 654 655 656 657 658
	int cpu;

	for_each_possible_cpu(cpu) {
		per_cpu(tasklet_vec, cpu).tail =
			&per_cpu(tasklet_vec, cpu).head;
		per_cpu(tasklet_hi_vec, cpu).tail =
			&per_cpu(tasklet_hi_vec, cpu).head;
	}

659 660
	open_softirq(TASKLET_SOFTIRQ, tasklet_action);
	open_softirq(HI_SOFTIRQ, tasklet_hi_action);
L
Linus Torvalds 已提交
661 662
}

663
static int ksoftirqd_should_run(unsigned int cpu)
L
Linus Torvalds 已提交
664
{
665 666
	return local_softirq_pending();
}
L
Linus Torvalds 已提交
667

668 669 670 671
static void run_ksoftirqd(unsigned int cpu)
{
	local_irq_disable();
	if (local_softirq_pending()) {
672 673 674 675
		/*
		 * We can safely run softirq on inline stack, as we are not deep
		 * in the task stack here.
		 */
676 677
		__do_softirq();
		local_irq_enable();
678
		cond_resched_rcu_qs();
679
		return;
L
Linus Torvalds 已提交
680
	}
681
	local_irq_enable();
L
Linus Torvalds 已提交
682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704
}

#ifdef CONFIG_HOTPLUG_CPU
/*
 * tasklet_kill_immediate is called to remove a tasklet which can already be
 * scheduled for execution on @cpu.
 *
 * Unlike tasklet_kill, this function removes the tasklet
 * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
 *
 * When this function is called, @cpu must be in the CPU_DEAD state.
 */
void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
{
	struct tasklet_struct **i;

	BUG_ON(cpu_online(cpu));
	BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));

	if (!test_bit(TASKLET_STATE_SCHED, &t->state))
		return;

	/* CPU is dead, so no lock needed. */
705
	for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) {
L
Linus Torvalds 已提交
706 707
		if (*i == t) {
			*i = t->next;
708 709 710
			/* If this was the tail element, move the tail ptr */
			if (*i == NULL)
				per_cpu(tasklet_vec, cpu).tail = i;
L
Linus Torvalds 已提交
711 712 713 714 715 716
			return;
		}
	}
	BUG();
}

717
static int takeover_tasklets(unsigned int cpu)
L
Linus Torvalds 已提交
718 719 720 721 722
{
	/* CPU is dead, so no lock needed. */
	local_irq_disable();

	/* Find end, append list for that CPU. */
723
	if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
724 725
		*__this_cpu_read(tasklet_vec.tail) = per_cpu(tasklet_vec, cpu).head;
		this_cpu_write(tasklet_vec.tail, per_cpu(tasklet_vec, cpu).tail);
726 727 728
		per_cpu(tasklet_vec, cpu).head = NULL;
		per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
	}
L
Linus Torvalds 已提交
729 730
	raise_softirq_irqoff(TASKLET_SOFTIRQ);

731
	if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) {
732 733
		*__this_cpu_read(tasklet_hi_vec.tail) = per_cpu(tasklet_hi_vec, cpu).head;
		__this_cpu_write(tasklet_hi_vec.tail, per_cpu(tasklet_hi_vec, cpu).tail);
734 735 736
		per_cpu(tasklet_hi_vec, cpu).head = NULL;
		per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
	}
L
Linus Torvalds 已提交
737 738 739
	raise_softirq_irqoff(HI_SOFTIRQ);

	local_irq_enable();
740
	return 0;
L
Linus Torvalds 已提交
741
}
742 743
#else
#define takeover_tasklets	NULL
L
Linus Torvalds 已提交
744 745
#endif /* CONFIG_HOTPLUG_CPU */

746 747 748 749 750 751 752
static struct smp_hotplug_thread softirq_threads = {
	.store			= &ksoftirqd,
	.thread_should_run	= ksoftirqd_should_run,
	.thread_fn		= run_ksoftirqd,
	.thread_comm		= "ksoftirqd/%u",
};

753
static __init int spawn_ksoftirqd(void)
L
Linus Torvalds 已提交
754
{
755 756
	cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL,
				  takeover_tasklets);
757 758
	BUG_ON(smpboot_register_percpu_thread(&softirq_threads));

L
Linus Torvalds 已提交
759 760
	return 0;
}
761
early_initcall(spawn_ksoftirqd);
762

763 764 765 766 767 768 769 770 771 772
/*
 * [ These __weak aliases are kept in a separate compilation unit, so that
 *   GCC does not inline them incorrectly. ]
 */

int __init __weak early_irq_init(void)
{
	return 0;
}

Y
Yinghai Lu 已提交
773 774
int __init __weak arch_probe_nr_irqs(void)
{
775
	return NR_IRQS_LEGACY;
Y
Yinghai Lu 已提交
776 777
}

778 779 780 781
int __init __weak arch_early_irq_init(void)
{
	return 0;
}
782 783 784 785 786

unsigned int __weak arch_dynirq_lower_bound(unsigned int from)
{
	return from;
}