softirq.c 18.8 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5
/*
 *	linux/kernel/softirq.c
 *
 *	Copyright (C) 1992 Linus Torvalds
 *
P
Pavel Machek 已提交
6 7 8
 *	Distribute under GPLv2.
 *
 *	Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
L
Linus Torvalds 已提交
9 10
 */

11 12
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

13
#include <linux/export.h>
L
Linus Torvalds 已提交
14 15 16 17 18 19 20
#include <linux/kernel_stat.h>
#include <linux/interrupt.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/notifier.h>
#include <linux/percpu.h>
#include <linux/cpu.h>
21
#include <linux/freezer.h>
L
Linus Torvalds 已提交
22 23
#include <linux/kthread.h>
#include <linux/rcupdate.h>
24
#include <linux/ftrace.h>
25
#include <linux/smp.h>
26
#include <linux/smpboot.h>
27
#include <linux/tick.h>
28
#include <linux/irq.h>
29 30

#define CREATE_TRACE_POINTS
31
#include <trace/events/irq.h>
L
Linus Torvalds 已提交
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55

/*
   - No shared variables, all the data are CPU local.
   - If a softirq needs serialization, let it serialize itself
     by its own spinlocks.
   - Even if softirq is serialized, only local cpu is marked for
     execution. Hence, we get something sort of weak cpu binding.
     Though it is still not clear, will it result in better locality
     or will not.

   Examples:
   - NET RX softirq. It is multithreaded and does not require
     any global serialization.
   - NET TX softirq. It kicks software netdevice queues, hence
     it is logically serialized per device, but this serialization
     is invisible to common code.
   - Tasklets: serialized wrt itself.
 */

#ifndef __ARCH_IRQ_STAT
irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
EXPORT_SYMBOL(irq_stat);
#endif

A
Alexey Dobriyan 已提交
56
static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
L
Linus Torvalds 已提交
57

58
DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
L
Linus Torvalds 已提交
59

60
const char * const softirq_to_name[NR_SOFTIRQS] = {
61
	"HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "IRQ_POLL",
62
	"TASKLET", "SCHED", "HRTIMER", "RCU"
63 64
};

L
Linus Torvalds 已提交
65 66 67 68 69 70
/*
 * we cannot loop indefinitely here to avoid userspace starvation,
 * but we also don't want to introduce a worst case 1/HZ latency
 * to the pending events, so lets the scheduler to balance
 * the softirq load for us.
 */
71
static void wakeup_softirqd(void)
L
Linus Torvalds 已提交
72 73
{
	/* Interrupts are disabled: no need to stop preemption */
74
	struct task_struct *tsk = __this_cpu_read(ksoftirqd);
L
Linus Torvalds 已提交
75 76 77 78 79

	if (tsk && tsk->state != TASK_RUNNING)
		wake_up_process(tsk);
}

E
Eric Dumazet 已提交
80 81 82 83 84 85 86 87 88 89 90
/*
 * If ksoftirqd is scheduled, we do not want to process pending softirqs
 * right now. Let ksoftirqd handle this at its own rate, to get fairness.
 */
static bool ksoftirqd_running(void)
{
	struct task_struct *tsk = __this_cpu_read(ksoftirqd);

	return tsk && (tsk->state == TASK_RUNNING);
}

91 92 93 94 95 96 97 98 99 100
/*
 * preempt_count and SOFTIRQ_OFFSET usage:
 * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving
 *   softirq processing.
 * - preempt_count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET)
 *   on local_bh_disable or local_bh_enable.
 * This lets us distinguish between whether we are currently processing
 * softirq and whether we just have bh disabled.
 */

101 102 103 104
/*
 * This one is for softirq.c-internal use,
 * where hardirqs are disabled legitimately:
 */
105
#ifdef CONFIG_TRACE_IRQFLAGS
106
void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
107 108 109 110 111 112
{
	unsigned long flags;

	WARN_ON_ONCE(in_irq());

	raw_local_irq_save(flags);
113
	/*
114
	 * The preempt tracer hooks into preempt_count_add and will break
115 116 117 118 119
	 * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET
	 * is set and before current->softirq_enabled is cleared.
	 * We must manually increment preempt_count here and manually
	 * call the trace_preempt_off later.
	 */
120
	__preempt_count_add(cnt);
121 122 123
	/*
	 * Were softirqs turned off above:
	 */
124
	if (softirq_count() == (cnt & SOFTIRQ_MASK))
125 126
		trace_softirqs_off(ip);
	raw_local_irq_restore(flags);
127

128 129
	if (preempt_count() == cnt) {
#ifdef CONFIG_DEBUG_PREEMPT
130
		current->preempt_disable_ip = get_lock_parent_ip();
131
#endif
132
		trace_preempt_off(CALLER_ADDR0, get_lock_parent_ip());
133
	}
134
}
135
EXPORT_SYMBOL(__local_bh_disable_ip);
136
#endif /* CONFIG_TRACE_IRQFLAGS */
137

138 139
static void __local_bh_enable(unsigned int cnt)
{
140
	lockdep_assert_irqs_disabled();
141

142
	if (softirq_count() == (cnt & SOFTIRQ_MASK))
D
Davidlohr Bueso 已提交
143
		trace_softirqs_on(_RET_IP_);
144
	preempt_count_sub(cnt);
145 146
}

147 148 149 150 151 152 153
/*
 * Special-case - softirqs can safely be enabled in
 * cond_resched_softirq(), or by __do_softirq(),
 * without processing still-pending softirqs:
 */
void _local_bh_enable(void)
{
154
	WARN_ON_ONCE(in_irq());
155
	__local_bh_enable(SOFTIRQ_DISABLE_OFFSET);
156 157 158
}
EXPORT_SYMBOL(_local_bh_enable);

159
void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
160
{
161 162
	WARN_ON_ONCE(in_irq());
	lockdep_assert_irqs_enabled();
163
#ifdef CONFIG_TRACE_IRQFLAGS
164
	local_irq_disable();
165
#endif
166 167 168
	/*
	 * Are softirqs going to be turned on now:
	 */
169
	if (softirq_count() == SOFTIRQ_DISABLE_OFFSET)
170
		trace_softirqs_on(ip);
171 172 173
	/*
	 * Keep preemption disabled until we are done with
	 * softirq processing:
174
	 */
175
	preempt_count_sub(cnt - 1);
176

177 178 179 180 181
	if (unlikely(!in_interrupt() && local_softirq_pending())) {
		/*
		 * Run softirq if any pending. And do it in its own stack
		 * as we may be calling this deep in a task call stack already.
		 */
182
		do_softirq();
183
	}
184

185
	preempt_count_dec();
186
#ifdef CONFIG_TRACE_IRQFLAGS
187
	local_irq_enable();
188
#endif
189 190
	preempt_check_resched();
}
191
EXPORT_SYMBOL(__local_bh_enable_ip);
192

L
Linus Torvalds 已提交
193
/*
194 195 196 197 198 199
 * We restart softirq processing for at most MAX_SOFTIRQ_RESTART times,
 * but break the loop if need_resched() is set or after 2 ms.
 * The MAX_SOFTIRQ_TIME provides a nice upper bound in most cases, but in
 * certain cases, such as stop_machine(), jiffies may cease to
 * increment and so we need the MAX_SOFTIRQ_RESTART limit as
 * well to make sure we eventually return from this method.
L
Linus Torvalds 已提交
200
 *
E
Eric Dumazet 已提交
201
 * These limits have been established via experimentation.
L
Linus Torvalds 已提交
202 203 204 205
 * The two things to balance is latency against fairness -
 * we want to handle softirqs as soon as possible, but they
 * should not be able to lock up the box.
 */
E
Eric Dumazet 已提交
206
#define MAX_SOFTIRQ_TIME  msecs_to_jiffies(2)
207
#define MAX_SOFTIRQ_RESTART 10
L
Linus Torvalds 已提交
208

209 210 211 212 213 214 215
#ifdef CONFIG_TRACE_IRQFLAGS
/*
 * When we run softirqs from irq_exit() and thus on the hardirq stack we need
 * to keep the lockdep irq context tracking as tight as possible in order to
 * not miss-qualify lock contexts and miss possible deadlocks.
 */

216
static inline bool lockdep_softirq_start(void)
217
{
218
	bool in_hardirq = false;
219

220 221
	if (trace_hardirq_context(current)) {
		in_hardirq = true;
222
		trace_hardirq_exit();
223 224
	}

225
	lockdep_softirq_enter();
226 227

	return in_hardirq;
228 229
}

230
static inline void lockdep_softirq_end(bool in_hardirq)
231 232
{
	lockdep_softirq_exit();
233 234

	if (in_hardirq)
235 236 237
		trace_hardirq_enter();
}
#else
238 239
static inline bool lockdep_softirq_start(void) { return false; }
static inline void lockdep_softirq_end(bool in_hardirq) { }
240 241
#endif

242
asmlinkage __visible void __softirq_entry __do_softirq(void)
L
Linus Torvalds 已提交
243
{
E
Eric Dumazet 已提交
244
	unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
245
	unsigned long old_flags = current->flags;
246
	int max_restart = MAX_SOFTIRQ_RESTART;
247
	struct softirq_action *h;
248
	bool in_hardirq;
249
	__u32 pending;
250
	int softirq_bit;
251 252 253 254 255 256 257

	/*
	 * Mask out PF_MEMALLOC s current task context is borrowed for the
	 * softirq. A softirq handled such as network RX might set PF_MEMALLOC
	 * again if the socket is related to swap
	 */
	current->flags &= ~PF_MEMALLOC;
L
Linus Torvalds 已提交
258 259

	pending = local_softirq_pending();
260
	account_irq_enter_time(current);
261

262
	__local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
263
	in_hardirq = lockdep_softirq_start();
L
Linus Torvalds 已提交
264 265 266

restart:
	/* Reset the pending bitmask before enabling irqs */
267
	set_softirq_pending(0);
L
Linus Torvalds 已提交
268

269
	local_irq_enable();
L
Linus Torvalds 已提交
270 271 272

	h = softirq_vec;

273 274 275 276 277 278 279 280 281 282 283 284 285 286 287
	while ((softirq_bit = ffs(pending))) {
		unsigned int vec_nr;
		int prev_count;

		h += softirq_bit - 1;

		vec_nr = h - softirq_vec;
		prev_count = preempt_count();

		kstat_incr_softirqs_this_cpu(vec_nr);

		trace_softirq_entry(vec_nr);
		h->action(h);
		trace_softirq_exit(vec_nr);
		if (unlikely(prev_count != preempt_count())) {
288
			pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
289 290 291
			       vec_nr, softirq_to_name[vec_nr], h->action,
			       prev_count, preempt_count());
			preempt_count_set(prev_count);
L
Linus Torvalds 已提交
292 293
		}
		h++;
294 295
		pending >>= softirq_bit;
	}
L
Linus Torvalds 已提交
296

297
	rcu_bh_qs();
298
	local_irq_disable();
L
Linus Torvalds 已提交
299 300

	pending = local_softirq_pending();
E
Eric Dumazet 已提交
301
	if (pending) {
302 303
		if (time_before(jiffies, end) && !need_resched() &&
		    --max_restart)
E
Eric Dumazet 已提交
304
			goto restart;
L
Linus Torvalds 已提交
305 306

		wakeup_softirqd();
E
Eric Dumazet 已提交
307
	}
L
Linus Torvalds 已提交
308

309
	lockdep_softirq_end(in_hardirq);
310
	account_irq_exit_time(current);
311
	__local_bh_enable(SOFTIRQ_OFFSET);
312
	WARN_ON_ONCE(in_interrupt());
313
	current_restore_flags(old_flags, PF_MEMALLOC);
L
Linus Torvalds 已提交
314 315
}

316
asmlinkage __visible void do_softirq(void)
L
Linus Torvalds 已提交
317 318 319 320 321 322 323 324 325 326 327
{
	__u32 pending;
	unsigned long flags;

	if (in_interrupt())
		return;

	local_irq_save(flags);

	pending = local_softirq_pending();

E
Eric Dumazet 已提交
328
	if (pending && !ksoftirqd_running())
329
		do_softirq_own_stack();
L
Linus Torvalds 已提交
330 331 332 333

	local_irq_restore(flags);
}

I
Ingo Molnar 已提交
334 335 336 337 338
/*
 * Enter an interrupt context.
 */
void irq_enter(void)
{
339
	rcu_irq_enter();
340
	if (is_idle_task(current) && !in_interrupt()) {
341 342 343 344 345
		/*
		 * Prevent raise_softirq from needlessly waking up ksoftirqd
		 * here, as softirq will be serviced on return from interrupt.
		 */
		local_bh_disable();
346
		tick_irq_enter();
347 348 349 350
		_local_bh_enable();
	}

	__irq_enter();
I
Ingo Molnar 已提交
351 352
}

353 354
static inline void invoke_softirq(void)
{
E
Eric Dumazet 已提交
355 356 357
	if (ksoftirqd_running())
		return;

358
	if (!force_irqthreads) {
359
#ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK
360 361 362
		/*
		 * We can safely execute softirq on the current stack if
		 * it is the irq stack, because it should be near empty
363 364 365 366 367 368 369 370
		 * at this stage.
		 */
		__do_softirq();
#else
		/*
		 * Otherwise, irq_exit() is called on the task stack that can
		 * be potentially deep already. So call softirq in its own stack
		 * to prevent from any overrun.
371
		 */
372
		do_softirq_own_stack();
373
#endif
374
	} else {
375
		wakeup_softirqd();
376
	}
377
}
L
Linus Torvalds 已提交
378

379 380 381 382 383 384 385 386 387 388 389 390 391
static inline void tick_irq_exit(void)
{
#ifdef CONFIG_NO_HZ_COMMON
	int cpu = smp_processor_id();

	/* Make sure that timer wheel updates are propagated */
	if ((idle_cpu(cpu) && !need_resched()) || tick_nohz_full_cpu(cpu)) {
		if (!in_interrupt())
			tick_nohz_irq_exit();
	}
#endif
}

L
Linus Torvalds 已提交
392 393 394 395 396
/*
 * Exit an interrupt context. Process softirqs if needed and possible:
 */
void irq_exit(void)
{
397
#ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED
398
	local_irq_disable();
399
#else
400
	lockdep_assert_irqs_disabled();
401
#endif
402
	account_irq_exit_time(current);
403
	preempt_count_sub(HARDIRQ_OFFSET);
L
Linus Torvalds 已提交
404 405
	if (!in_interrupt() && local_softirq_pending())
		invoke_softirq();
406

407
	tick_irq_exit();
408
	rcu_irq_exit();
409
	trace_hardirq_exit(); /* must be last! */
L
Linus Torvalds 已提交
410 411 412 413 414
}

/*
 * This function must run with irqs disabled!
 */
415
inline void raise_softirq_irqoff(unsigned int nr)
L
Linus Torvalds 已提交
416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431
{
	__raise_softirq_irqoff(nr);

	/*
	 * If we're in an interrupt or softirq, we're done
	 * (this also catches softirq-disabled code). We will
	 * actually run the softirq once we return from
	 * the irq or softirq.
	 *
	 * Otherwise we wake up ksoftirqd to make sure we
	 * schedule the softirq soon.
	 */
	if (!in_interrupt())
		wakeup_softirqd();
}

432
void raise_softirq(unsigned int nr)
L
Linus Torvalds 已提交
433 434 435 436 437 438 439 440
{
	unsigned long flags;

	local_irq_save(flags);
	raise_softirq_irqoff(nr);
	local_irq_restore(flags);
}

441 442 443 444 445 446
void __raise_softirq_irqoff(unsigned int nr)
{
	trace_softirq_raise(nr);
	or_softirq_pending(1UL << nr);
}

447
void open_softirq(int nr, void (*action)(struct softirq_action *))
L
Linus Torvalds 已提交
448 449 450 451
{
	softirq_vec[nr].action = action;
}

452 453 454
/*
 * Tasklets
 */
455
struct tasklet_head {
456 457
	struct tasklet_struct *head;
	struct tasklet_struct **tail;
L
Linus Torvalds 已提交
458 459
};

460 461
static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
L
Linus Torvalds 已提交
462

463
void __tasklet_schedule(struct tasklet_struct *t)
L
Linus Torvalds 已提交
464 465 466 467
{
	unsigned long flags;

	local_irq_save(flags);
468
	t->next = NULL;
469 470
	*__this_cpu_read(tasklet_vec.tail) = t;
	__this_cpu_write(tasklet_vec.tail, &(t->next));
L
Linus Torvalds 已提交
471 472 473 474 475
	raise_softirq_irqoff(TASKLET_SOFTIRQ);
	local_irq_restore(flags);
}
EXPORT_SYMBOL(__tasklet_schedule);

476
void __tasklet_hi_schedule(struct tasklet_struct *t)
L
Linus Torvalds 已提交
477 478 479 480
{
	unsigned long flags;

	local_irq_save(flags);
481
	t->next = NULL;
482 483
	*__this_cpu_read(tasklet_hi_vec.tail) = t;
	__this_cpu_write(tasklet_hi_vec.tail,  &(t->next));
L
Linus Torvalds 已提交
484 485 486 487 488
	raise_softirq_irqoff(HI_SOFTIRQ);
	local_irq_restore(flags);
}
EXPORT_SYMBOL(__tasklet_hi_schedule);

489
static __latent_entropy void tasklet_action(struct softirq_action *a)
L
Linus Torvalds 已提交
490 491 492 493
{
	struct tasklet_struct *list;

	local_irq_disable();
494 495
	list = __this_cpu_read(tasklet_vec.head);
	__this_cpu_write(tasklet_vec.head, NULL);
496
	__this_cpu_write(tasklet_vec.tail, this_cpu_ptr(&tasklet_vec.head));
L
Linus Torvalds 已提交
497 498 499 500 501 502 503 504 505
	local_irq_enable();

	while (list) {
		struct tasklet_struct *t = list;

		list = list->next;

		if (tasklet_trylock(t)) {
			if (!atomic_read(&t->count)) {
506 507
				if (!test_and_clear_bit(TASKLET_STATE_SCHED,
							&t->state))
L
Linus Torvalds 已提交
508 509 510 511 512 513 514 515 516
					BUG();
				t->func(t->data);
				tasklet_unlock(t);
				continue;
			}
			tasklet_unlock(t);
		}

		local_irq_disable();
517
		t->next = NULL;
518 519
		*__this_cpu_read(tasklet_vec.tail) = t;
		__this_cpu_write(tasklet_vec.tail, &(t->next));
L
Linus Torvalds 已提交
520 521 522 523 524
		__raise_softirq_irqoff(TASKLET_SOFTIRQ);
		local_irq_enable();
	}
}

525
static __latent_entropy void tasklet_hi_action(struct softirq_action *a)
L
Linus Torvalds 已提交
526 527 528 529
{
	struct tasklet_struct *list;

	local_irq_disable();
530 531
	list = __this_cpu_read(tasklet_hi_vec.head);
	__this_cpu_write(tasklet_hi_vec.head, NULL);
532
	__this_cpu_write(tasklet_hi_vec.tail, this_cpu_ptr(&tasklet_hi_vec.head));
L
Linus Torvalds 已提交
533 534 535 536 537 538 539 540 541
	local_irq_enable();

	while (list) {
		struct tasklet_struct *t = list;

		list = list->next;

		if (tasklet_trylock(t)) {
			if (!atomic_read(&t->count)) {
542 543
				if (!test_and_clear_bit(TASKLET_STATE_SCHED,
							&t->state))
L
Linus Torvalds 已提交
544 545 546 547 548 549 550 551 552
					BUG();
				t->func(t->data);
				tasklet_unlock(t);
				continue;
			}
			tasklet_unlock(t);
		}

		local_irq_disable();
553
		t->next = NULL;
554 555
		*__this_cpu_read(tasklet_hi_vec.tail) = t;
		__this_cpu_write(tasklet_hi_vec.tail, &(t->next));
L
Linus Torvalds 已提交
556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574
		__raise_softirq_irqoff(HI_SOFTIRQ);
		local_irq_enable();
	}
}

void tasklet_init(struct tasklet_struct *t,
		  void (*func)(unsigned long), unsigned long data)
{
	t->next = NULL;
	t->state = 0;
	atomic_set(&t->count, 0);
	t->func = func;
	t->data = data;
}
EXPORT_SYMBOL(tasklet_init);

void tasklet_kill(struct tasklet_struct *t)
{
	if (in_interrupt())
575
		pr_notice("Attempt to kill tasklet from interrupt\n");
L
Linus Torvalds 已提交
576 577

	while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
578
		do {
L
Linus Torvalds 已提交
579
			yield();
580
		} while (test_bit(TASKLET_STATE_SCHED, &t->state));
L
Linus Torvalds 已提交
581 582 583 584 585 586
	}
	tasklet_unlock_wait(t);
	clear_bit(TASKLET_STATE_SCHED, &t->state);
}
EXPORT_SYMBOL(tasklet_kill);

587 588 589 590 591
/*
 * tasklet_hrtimer
 */

/*
592 593 594
 * The trampoline is called when the hrtimer expires. It schedules a tasklet
 * to run __tasklet_hrtimer_trampoline() which in turn will call the intended
 * hrtimer callback, but from softirq context.
595 596 597 598 599 600
 */
static enum hrtimer_restart __hrtimer_tasklet_trampoline(struct hrtimer *timer)
{
	struct tasklet_hrtimer *ttimer =
		container_of(timer, struct tasklet_hrtimer, timer);

601 602
	tasklet_hi_schedule(&ttimer->tasklet);
	return HRTIMER_NORESTART;
603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621
}

/*
 * Helper function which calls the hrtimer callback from
 * tasklet/softirq context
 */
static void __tasklet_hrtimer_trampoline(unsigned long data)
{
	struct tasklet_hrtimer *ttimer = (void *)data;
	enum hrtimer_restart restart;

	restart = ttimer->function(&ttimer->timer);
	if (restart != HRTIMER_NORESTART)
		hrtimer_restart(&ttimer->timer);
}

/**
 * tasklet_hrtimer_init - Init a tasklet/hrtimer combo for softirq callbacks
 * @ttimer:	 tasklet_hrtimer which is initialized
L
Lucas De Marchi 已提交
622
 * @function:	 hrtimer callback function which gets called from softirq context
623 624 625 626 627 628 629 630 631 632 633 634 635 636 637
 * @which_clock: clock id (CLOCK_MONOTONIC/CLOCK_REALTIME)
 * @mode:	 hrtimer mode (HRTIMER_MODE_ABS/HRTIMER_MODE_REL)
 */
void tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer,
			  enum hrtimer_restart (*function)(struct hrtimer *),
			  clockid_t which_clock, enum hrtimer_mode mode)
{
	hrtimer_init(&ttimer->timer, which_clock, mode);
	ttimer->timer.function = __hrtimer_tasklet_trampoline;
	tasklet_init(&ttimer->tasklet, __tasklet_hrtimer_trampoline,
		     (unsigned long)ttimer);
	ttimer->function = function;
}
EXPORT_SYMBOL_GPL(tasklet_hrtimer_init);

L
Linus Torvalds 已提交
638 639
void __init softirq_init(void)
{
640 641 642 643 644 645 646 647 648
	int cpu;

	for_each_possible_cpu(cpu) {
		per_cpu(tasklet_vec, cpu).tail =
			&per_cpu(tasklet_vec, cpu).head;
		per_cpu(tasklet_hi_vec, cpu).tail =
			&per_cpu(tasklet_hi_vec, cpu).head;
	}

649 650
	open_softirq(TASKLET_SOFTIRQ, tasklet_action);
	open_softirq(HI_SOFTIRQ, tasklet_hi_action);
L
Linus Torvalds 已提交
651 652
}

653
static int ksoftirqd_should_run(unsigned int cpu)
L
Linus Torvalds 已提交
654
{
655 656
	return local_softirq_pending();
}
L
Linus Torvalds 已提交
657

658 659 660 661
static void run_ksoftirqd(unsigned int cpu)
{
	local_irq_disable();
	if (local_softirq_pending()) {
662 663 664 665
		/*
		 * We can safely run softirq on inline stack, as we are not deep
		 * in the task stack here.
		 */
666 667
		__do_softirq();
		local_irq_enable();
668
		cond_resched();
669
		return;
L
Linus Torvalds 已提交
670
	}
671
	local_irq_enable();
L
Linus Torvalds 已提交
672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694
}

#ifdef CONFIG_HOTPLUG_CPU
/*
 * tasklet_kill_immediate is called to remove a tasklet which can already be
 * scheduled for execution on @cpu.
 *
 * Unlike tasklet_kill, this function removes the tasklet
 * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
 *
 * When this function is called, @cpu must be in the CPU_DEAD state.
 */
void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
{
	struct tasklet_struct **i;

	BUG_ON(cpu_online(cpu));
	BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));

	if (!test_bit(TASKLET_STATE_SCHED, &t->state))
		return;

	/* CPU is dead, so no lock needed. */
695
	for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) {
L
Linus Torvalds 已提交
696 697
		if (*i == t) {
			*i = t->next;
698 699 700
			/* If this was the tail element, move the tail ptr */
			if (*i == NULL)
				per_cpu(tasklet_vec, cpu).tail = i;
L
Linus Torvalds 已提交
701 702 703 704 705 706
			return;
		}
	}
	BUG();
}

707
static int takeover_tasklets(unsigned int cpu)
L
Linus Torvalds 已提交
708 709 710 711 712
{
	/* CPU is dead, so no lock needed. */
	local_irq_disable();

	/* Find end, append list for that CPU. */
713
	if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
714 715
		*__this_cpu_read(tasklet_vec.tail) = per_cpu(tasklet_vec, cpu).head;
		this_cpu_write(tasklet_vec.tail, per_cpu(tasklet_vec, cpu).tail);
716 717 718
		per_cpu(tasklet_vec, cpu).head = NULL;
		per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
	}
L
Linus Torvalds 已提交
719 720
	raise_softirq_irqoff(TASKLET_SOFTIRQ);

721
	if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) {
722 723
		*__this_cpu_read(tasklet_hi_vec.tail) = per_cpu(tasklet_hi_vec, cpu).head;
		__this_cpu_write(tasklet_hi_vec.tail, per_cpu(tasklet_hi_vec, cpu).tail);
724 725 726
		per_cpu(tasklet_hi_vec, cpu).head = NULL;
		per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
	}
L
Linus Torvalds 已提交
727 728 729
	raise_softirq_irqoff(HI_SOFTIRQ);

	local_irq_enable();
730
	return 0;
L
Linus Torvalds 已提交
731
}
732 733
#else
#define takeover_tasklets	NULL
L
Linus Torvalds 已提交
734 735
#endif /* CONFIG_HOTPLUG_CPU */

736 737 738 739 740 741 742
static struct smp_hotplug_thread softirq_threads = {
	.store			= &ksoftirqd,
	.thread_should_run	= ksoftirqd_should_run,
	.thread_fn		= run_ksoftirqd,
	.thread_comm		= "ksoftirqd/%u",
};

743
static __init int spawn_ksoftirqd(void)
L
Linus Torvalds 已提交
744
{
745 746
	cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL,
				  takeover_tasklets);
747 748
	BUG_ON(smpboot_register_percpu_thread(&softirq_threads));

L
Linus Torvalds 已提交
749 750
	return 0;
}
751
early_initcall(spawn_ksoftirqd);
752

753 754 755 756 757 758 759 760 761 762
/*
 * [ These __weak aliases are kept in a separate compilation unit, so that
 *   GCC does not inline them incorrectly. ]
 */

int __init __weak early_irq_init(void)
{
	return 0;
}

Y
Yinghai Lu 已提交
763 764
int __init __weak arch_probe_nr_irqs(void)
{
765
	return NR_IRQS_LEGACY;
Y
Yinghai Lu 已提交
766 767
}

768 769 770 771
int __init __weak arch_early_irq_init(void)
{
	return 0;
}
772 773 774 775 776

unsigned int __weak arch_dynirq_lower_bound(unsigned int from)
{
	return from;
}