softirq.c 18.2 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5
/*
 *	linux/kernel/softirq.c
 *
 *	Copyright (C) 1992 Linus Torvalds
 *
P
Pavel Machek 已提交
6 7 8
 *	Distribute under GPLv2.
 *
 *	Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
L
Linus Torvalds 已提交
9 10
 */

11 12
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

13
#include <linux/export.h>
L
Linus Torvalds 已提交
14 15 16 17 18 19 20
#include <linux/kernel_stat.h>
#include <linux/interrupt.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/notifier.h>
#include <linux/percpu.h>
#include <linux/cpu.h>
21
#include <linux/freezer.h>
L
Linus Torvalds 已提交
22 23
#include <linux/kthread.h>
#include <linux/rcupdate.h>
24
#include <linux/ftrace.h>
25
#include <linux/smp.h>
26
#include <linux/smpboot.h>
27
#include <linux/tick.h>
28
#include <linux/irq.h>
29 30

#define CREATE_TRACE_POINTS
31
#include <trace/events/irq.h>
L
Linus Torvalds 已提交
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55

/*
   - No shared variables, all the data are CPU local.
   - If a softirq needs serialization, let it serialize itself
     by its own spinlocks.
   - Even if softirq is serialized, only local cpu is marked for
     execution. Hence, we get something sort of weak cpu binding.
     Though it is still not clear, will it result in better locality
     or will not.

   Examples:
   - NET RX softirq. It is multithreaded and does not require
     any global serialization.
   - NET TX softirq. It kicks software netdevice queues, hence
     it is logically serialized per device, but this serialization
     is invisible to common code.
   - Tasklets: serialized wrt itself.
 */

#ifndef __ARCH_IRQ_STAT
irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
EXPORT_SYMBOL(irq_stat);
#endif

A
Alexey Dobriyan 已提交
56
static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
L
Linus Torvalds 已提交
57

58
DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
L
Linus Torvalds 已提交
59

60
const char * const softirq_to_name[NR_SOFTIRQS] = {
61
	"HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "IRQ_POLL",
62
	"TASKLET", "SCHED", "HRTIMER", "RCU"
63 64
};

L
Linus Torvalds 已提交
65 66 67 68 69 70
/*
 * we cannot loop indefinitely here to avoid userspace starvation,
 * but we also don't want to introduce a worst case 1/HZ latency
 * to the pending events, so lets the scheduler to balance
 * the softirq load for us.
 */
71
static void wakeup_softirqd(void)
L
Linus Torvalds 已提交
72 73
{
	/* Interrupts are disabled: no need to stop preemption */
74
	struct task_struct *tsk = __this_cpu_read(ksoftirqd);
L
Linus Torvalds 已提交
75 76 77 78 79

	if (tsk && tsk->state != TASK_RUNNING)
		wake_up_process(tsk);
}

E
Eric Dumazet 已提交
80 81 82 83 84 85 86 87 88 89 90
/*
 * If ksoftirqd is scheduled, we do not want to process pending softirqs
 * right now. Let ksoftirqd handle this at its own rate, to get fairness.
 */
static bool ksoftirqd_running(void)
{
	struct task_struct *tsk = __this_cpu_read(ksoftirqd);

	return tsk && (tsk->state == TASK_RUNNING);
}

91 92 93 94 95 96 97 98 99 100
/*
 * preempt_count and SOFTIRQ_OFFSET usage:
 * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving
 *   softirq processing.
 * - preempt_count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET)
 *   on local_bh_disable or local_bh_enable.
 * This lets us distinguish between whether we are currently processing
 * softirq and whether we just have bh disabled.
 */

101 102 103 104
/*
 * This one is for softirq.c-internal use,
 * where hardirqs are disabled legitimately:
 */
105
#ifdef CONFIG_TRACE_IRQFLAGS
106
void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
107 108 109 110 111 112
{
	unsigned long flags;

	WARN_ON_ONCE(in_irq());

	raw_local_irq_save(flags);
113
	/*
114
	 * The preempt tracer hooks into preempt_count_add and will break
115 116 117 118 119
	 * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET
	 * is set and before current->softirq_enabled is cleared.
	 * We must manually increment preempt_count here and manually
	 * call the trace_preempt_off later.
	 */
120
	__preempt_count_add(cnt);
121 122 123
	/*
	 * Were softirqs turned off above:
	 */
124
	if (softirq_count() == (cnt & SOFTIRQ_MASK))
125 126
		trace_softirqs_off(ip);
	raw_local_irq_restore(flags);
127

128 129
	if (preempt_count() == cnt) {
#ifdef CONFIG_DEBUG_PREEMPT
130
		current->preempt_disable_ip = get_lock_parent_ip();
131
#endif
132
		trace_preempt_off(CALLER_ADDR0, get_lock_parent_ip());
133
	}
134
}
135
EXPORT_SYMBOL(__local_bh_disable_ip);
136
#endif /* CONFIG_TRACE_IRQFLAGS */
137

138 139
static void __local_bh_enable(unsigned int cnt)
{
140
	lockdep_assert_irqs_disabled();
141

142
	if (softirq_count() == (cnt & SOFTIRQ_MASK))
D
Davidlohr Bueso 已提交
143
		trace_softirqs_on(_RET_IP_);
144
	preempt_count_sub(cnt);
145 146
}

147
/*
148
 * Special-case - softirqs can safely be enabled by __do_softirq(),
149 150 151 152
 * without processing still-pending softirqs:
 */
void _local_bh_enable(void)
{
153
	WARN_ON_ONCE(in_irq());
154
	__local_bh_enable(SOFTIRQ_DISABLE_OFFSET);
155 156 157
}
EXPORT_SYMBOL(_local_bh_enable);

158
void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
159
{
160 161
	WARN_ON_ONCE(in_irq());
	lockdep_assert_irqs_enabled();
162
#ifdef CONFIG_TRACE_IRQFLAGS
163
	local_irq_disable();
164
#endif
165 166 167
	/*
	 * Are softirqs going to be turned on now:
	 */
168
	if (softirq_count() == SOFTIRQ_DISABLE_OFFSET)
169
		trace_softirqs_on(ip);
170 171 172
	/*
	 * Keep preemption disabled until we are done with
	 * softirq processing:
173
	 */
174
	preempt_count_sub(cnt - 1);
175

176 177 178 179 180
	if (unlikely(!in_interrupt() && local_softirq_pending())) {
		/*
		 * Run softirq if any pending. And do it in its own stack
		 * as we may be calling this deep in a task call stack already.
		 */
181
		do_softirq();
182
	}
183

184
	preempt_count_dec();
185
#ifdef CONFIG_TRACE_IRQFLAGS
186
	local_irq_enable();
187
#endif
188 189
	preempt_check_resched();
}
190
EXPORT_SYMBOL(__local_bh_enable_ip);
191

L
Linus Torvalds 已提交
192
/*
193 194 195 196 197 198
 * We restart softirq processing for at most MAX_SOFTIRQ_RESTART times,
 * but break the loop if need_resched() is set or after 2 ms.
 * The MAX_SOFTIRQ_TIME provides a nice upper bound in most cases, but in
 * certain cases, such as stop_machine(), jiffies may cease to
 * increment and so we need the MAX_SOFTIRQ_RESTART limit as
 * well to make sure we eventually return from this method.
L
Linus Torvalds 已提交
199
 *
E
Eric Dumazet 已提交
200
 * These limits have been established via experimentation.
L
Linus Torvalds 已提交
201 202 203 204
 * The two things to balance is latency against fairness -
 * we want to handle softirqs as soon as possible, but they
 * should not be able to lock up the box.
 */
E
Eric Dumazet 已提交
205
#define MAX_SOFTIRQ_TIME  msecs_to_jiffies(2)
206
#define MAX_SOFTIRQ_RESTART 10
L
Linus Torvalds 已提交
207

208 209 210 211 212 213 214
#ifdef CONFIG_TRACE_IRQFLAGS
/*
 * When we run softirqs from irq_exit() and thus on the hardirq stack we need
 * to keep the lockdep irq context tracking as tight as possible in order to
 * not miss-qualify lock contexts and miss possible deadlocks.
 */

215
static inline bool lockdep_softirq_start(void)
216
{
217
	bool in_hardirq = false;
218

219 220
	if (trace_hardirq_context(current)) {
		in_hardirq = true;
221
		trace_hardirq_exit();
222 223
	}

224
	lockdep_softirq_enter();
225 226

	return in_hardirq;
227 228
}

229
static inline void lockdep_softirq_end(bool in_hardirq)
230 231
{
	lockdep_softirq_exit();
232 233

	if (in_hardirq)
234 235 236
		trace_hardirq_enter();
}
#else
237 238
static inline bool lockdep_softirq_start(void) { return false; }
static inline void lockdep_softirq_end(bool in_hardirq) { }
239 240
#endif

241
asmlinkage __visible void __softirq_entry __do_softirq(void)
L
Linus Torvalds 已提交
242
{
E
Eric Dumazet 已提交
243
	unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
244
	unsigned long old_flags = current->flags;
245
	int max_restart = MAX_SOFTIRQ_RESTART;
246
	struct softirq_action *h;
247
	bool in_hardirq;
248
	__u32 pending;
249
	int softirq_bit;
250 251 252 253 254 255 256

	/*
	 * Mask out PF_MEMALLOC s current task context is borrowed for the
	 * softirq. A softirq handled such as network RX might set PF_MEMALLOC
	 * again if the socket is related to swap
	 */
	current->flags &= ~PF_MEMALLOC;
L
Linus Torvalds 已提交
257 258

	pending = local_softirq_pending();
259
	account_irq_enter_time(current);
260

261
	__local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
262
	in_hardirq = lockdep_softirq_start();
L
Linus Torvalds 已提交
263 264 265

restart:
	/* Reset the pending bitmask before enabling irqs */
266
	set_softirq_pending(0);
L
Linus Torvalds 已提交
267

268
	local_irq_enable();
L
Linus Torvalds 已提交
269 270 271

	h = softirq_vec;

272 273 274 275 276 277 278 279 280 281 282 283 284 285 286
	while ((softirq_bit = ffs(pending))) {
		unsigned int vec_nr;
		int prev_count;

		h += softirq_bit - 1;

		vec_nr = h - softirq_vec;
		prev_count = preempt_count();

		kstat_incr_softirqs_this_cpu(vec_nr);

		trace_softirq_entry(vec_nr);
		h->action(h);
		trace_softirq_exit(vec_nr);
		if (unlikely(prev_count != preempt_count())) {
287
			pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
288 289 290
			       vec_nr, softirq_to_name[vec_nr], h->action,
			       prev_count, preempt_count());
			preempt_count_set(prev_count);
L
Linus Torvalds 已提交
291 292
		}
		h++;
293 294
		pending >>= softirq_bit;
	}
L
Linus Torvalds 已提交
295

296
	rcu_bh_qs();
297
	local_irq_disable();
L
Linus Torvalds 已提交
298 299

	pending = local_softirq_pending();
E
Eric Dumazet 已提交
300
	if (pending) {
301 302
		if (time_before(jiffies, end) && !need_resched() &&
		    --max_restart)
E
Eric Dumazet 已提交
303
			goto restart;
L
Linus Torvalds 已提交
304 305

		wakeup_softirqd();
E
Eric Dumazet 已提交
306
	}
L
Linus Torvalds 已提交
307

308
	lockdep_softirq_end(in_hardirq);
309
	account_irq_exit_time(current);
310
	__local_bh_enable(SOFTIRQ_OFFSET);
311
	WARN_ON_ONCE(in_interrupt());
312
	current_restore_flags(old_flags, PF_MEMALLOC);
L
Linus Torvalds 已提交
313 314
}

315
asmlinkage __visible void do_softirq(void)
L
Linus Torvalds 已提交
316 317 318 319 320 321 322 323 324 325 326
{
	__u32 pending;
	unsigned long flags;

	if (in_interrupt())
		return;

	local_irq_save(flags);

	pending = local_softirq_pending();

E
Eric Dumazet 已提交
327
	if (pending && !ksoftirqd_running())
328
		do_softirq_own_stack();
L
Linus Torvalds 已提交
329 330 331 332

	local_irq_restore(flags);
}

I
Ingo Molnar 已提交
333 334 335 336 337
/*
 * Enter an interrupt context.
 */
void irq_enter(void)
{
338
	rcu_irq_enter();
339
	if (is_idle_task(current) && !in_interrupt()) {
340 341 342 343 344
		/*
		 * Prevent raise_softirq from needlessly waking up ksoftirqd
		 * here, as softirq will be serviced on return from interrupt.
		 */
		local_bh_disable();
345
		tick_irq_enter();
346 347 348 349
		_local_bh_enable();
	}

	__irq_enter();
I
Ingo Molnar 已提交
350 351
}

352 353
static inline void invoke_softirq(void)
{
E
Eric Dumazet 已提交
354 355 356
	if (ksoftirqd_running())
		return;

357
	if (!force_irqthreads) {
358
#ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK
359 360 361
		/*
		 * We can safely execute softirq on the current stack if
		 * it is the irq stack, because it should be near empty
362 363 364 365 366 367 368 369
		 * at this stage.
		 */
		__do_softirq();
#else
		/*
		 * Otherwise, irq_exit() is called on the task stack that can
		 * be potentially deep already. So call softirq in its own stack
		 * to prevent from any overrun.
370
		 */
371
		do_softirq_own_stack();
372
#endif
373
	} else {
374
		wakeup_softirqd();
375
	}
376
}
L
Linus Torvalds 已提交
377

378 379 380 381 382 383 384 385 386 387 388 389 390
static inline void tick_irq_exit(void)
{
#ifdef CONFIG_NO_HZ_COMMON
	int cpu = smp_processor_id();

	/* Make sure that timer wheel updates are propagated */
	if ((idle_cpu(cpu) && !need_resched()) || tick_nohz_full_cpu(cpu)) {
		if (!in_interrupt())
			tick_nohz_irq_exit();
	}
#endif
}

L
Linus Torvalds 已提交
391 392 393 394 395
/*
 * Exit an interrupt context. Process softirqs if needed and possible:
 */
void irq_exit(void)
{
396
#ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED
397
	local_irq_disable();
398
#else
399
	lockdep_assert_irqs_disabled();
400
#endif
401
	account_irq_exit_time(current);
402
	preempt_count_sub(HARDIRQ_OFFSET);
L
Linus Torvalds 已提交
403 404
	if (!in_interrupt() && local_softirq_pending())
		invoke_softirq();
405

406
	tick_irq_exit();
407
	rcu_irq_exit();
408
	trace_hardirq_exit(); /* must be last! */
L
Linus Torvalds 已提交
409 410 411 412 413
}

/*
 * This function must run with irqs disabled!
 */
414
inline void raise_softirq_irqoff(unsigned int nr)
L
Linus Torvalds 已提交
415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430
{
	__raise_softirq_irqoff(nr);

	/*
	 * If we're in an interrupt or softirq, we're done
	 * (this also catches softirq-disabled code). We will
	 * actually run the softirq once we return from
	 * the irq or softirq.
	 *
	 * Otherwise we wake up ksoftirqd to make sure we
	 * schedule the softirq soon.
	 */
	if (!in_interrupt())
		wakeup_softirqd();
}

431
void raise_softirq(unsigned int nr)
L
Linus Torvalds 已提交
432 433 434 435 436 437 438 439
{
	unsigned long flags;

	local_irq_save(flags);
	raise_softirq_irqoff(nr);
	local_irq_restore(flags);
}

440 441 442 443 444 445
void __raise_softirq_irqoff(unsigned int nr)
{
	trace_softirq_raise(nr);
	or_softirq_pending(1UL << nr);
}

446
void open_softirq(int nr, void (*action)(struct softirq_action *))
L
Linus Torvalds 已提交
447 448 449 450
{
	softirq_vec[nr].action = action;
}

451 452 453
/*
 * Tasklets
 */
454
struct tasklet_head {
455 456
	struct tasklet_struct *head;
	struct tasklet_struct **tail;
L
Linus Torvalds 已提交
457 458
};

459 460
static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
L
Linus Torvalds 已提交
461

462 463 464
static void __tasklet_schedule_common(struct tasklet_struct *t,
				      struct tasklet_head __percpu *headp,
				      unsigned int softirq_nr)
L
Linus Torvalds 已提交
465
{
466
	struct tasklet_head *head;
L
Linus Torvalds 已提交
467 468 469
	unsigned long flags;

	local_irq_save(flags);
470
	head = this_cpu_ptr(headp);
471
	t->next = NULL;
472 473 474
	*head->tail = t;
	head->tail = &(t->next);
	raise_softirq_irqoff(softirq_nr);
L
Linus Torvalds 已提交
475 476
	local_irq_restore(flags);
}
477 478 479 480 481 482

void __tasklet_schedule(struct tasklet_struct *t)
{
	__tasklet_schedule_common(t, &tasklet_vec,
				  TASKLET_SOFTIRQ);
}
L
Linus Torvalds 已提交
483 484
EXPORT_SYMBOL(__tasklet_schedule);

485
void __tasklet_hi_schedule(struct tasklet_struct *t)
L
Linus Torvalds 已提交
486
{
487 488
	__tasklet_schedule_common(t, &tasklet_hi_vec,
				  HI_SOFTIRQ);
L
Linus Torvalds 已提交
489 490 491
}
EXPORT_SYMBOL(__tasklet_hi_schedule);

492 493 494
static void tasklet_action_common(struct softirq_action *a,
				  struct tasklet_head *tl_head,
				  unsigned int softirq_nr)
L
Linus Torvalds 已提交
495 496 497 498
{
	struct tasklet_struct *list;

	local_irq_disable();
499 500 501
	list = tl_head->head;
	tl_head->head = NULL;
	tl_head->tail = &tl_head->head;
L
Linus Torvalds 已提交
502 503 504 505 506 507 508 509 510
	local_irq_enable();

	while (list) {
		struct tasklet_struct *t = list;

		list = list->next;

		if (tasklet_trylock(t)) {
			if (!atomic_read(&t->count)) {
511 512
				if (!test_and_clear_bit(TASKLET_STATE_SCHED,
							&t->state))
L
Linus Torvalds 已提交
513 514 515 516 517 518 519 520 521
					BUG();
				t->func(t->data);
				tasklet_unlock(t);
				continue;
			}
			tasklet_unlock(t);
		}

		local_irq_disable();
522
		t->next = NULL;
523 524 525
		*tl_head->tail = t;
		tl_head->tail = &t->next;
		__raise_softirq_irqoff(softirq_nr);
L
Linus Torvalds 已提交
526 527 528 529
		local_irq_enable();
	}
}

530
static __latent_entropy void tasklet_action(struct softirq_action *a)
L
Linus Torvalds 已提交
531
{
532 533
	tasklet_action_common(a, this_cpu_ptr(&tasklet_vec), TASKLET_SOFTIRQ);
}
L
Linus Torvalds 已提交
534

535 536 537
static __latent_entropy void tasklet_hi_action(struct softirq_action *a)
{
	tasklet_action_common(a, this_cpu_ptr(&tasklet_hi_vec), HI_SOFTIRQ);
L
Linus Torvalds 已提交
538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553
}

void tasklet_init(struct tasklet_struct *t,
		  void (*func)(unsigned long), unsigned long data)
{
	t->next = NULL;
	t->state = 0;
	atomic_set(&t->count, 0);
	t->func = func;
	t->data = data;
}
EXPORT_SYMBOL(tasklet_init);

void tasklet_kill(struct tasklet_struct *t)
{
	if (in_interrupt())
554
		pr_notice("Attempt to kill tasklet from interrupt\n");
L
Linus Torvalds 已提交
555 556

	while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
557
		do {
L
Linus Torvalds 已提交
558
			yield();
559
		} while (test_bit(TASKLET_STATE_SCHED, &t->state));
L
Linus Torvalds 已提交
560 561 562 563 564 565
	}
	tasklet_unlock_wait(t);
	clear_bit(TASKLET_STATE_SCHED, &t->state);
}
EXPORT_SYMBOL(tasklet_kill);

566 567 568 569 570
/*
 * tasklet_hrtimer
 */

/*
571 572 573
 * The trampoline is called when the hrtimer expires. It schedules a tasklet
 * to run __tasklet_hrtimer_trampoline() which in turn will call the intended
 * hrtimer callback, but from softirq context.
574 575 576 577 578 579
 */
static enum hrtimer_restart __hrtimer_tasklet_trampoline(struct hrtimer *timer)
{
	struct tasklet_hrtimer *ttimer =
		container_of(timer, struct tasklet_hrtimer, timer);

580 581
	tasklet_hi_schedule(&ttimer->tasklet);
	return HRTIMER_NORESTART;
582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600
}

/*
 * Helper function which calls the hrtimer callback from
 * tasklet/softirq context
 */
static void __tasklet_hrtimer_trampoline(unsigned long data)
{
	struct tasklet_hrtimer *ttimer = (void *)data;
	enum hrtimer_restart restart;

	restart = ttimer->function(&ttimer->timer);
	if (restart != HRTIMER_NORESTART)
		hrtimer_restart(&ttimer->timer);
}

/**
 * tasklet_hrtimer_init - Init a tasklet/hrtimer combo for softirq callbacks
 * @ttimer:	 tasklet_hrtimer which is initialized
L
Lucas De Marchi 已提交
601
 * @function:	 hrtimer callback function which gets called from softirq context
602 603 604 605 606 607 608 609 610 611 612 613 614 615 616
 * @which_clock: clock id (CLOCK_MONOTONIC/CLOCK_REALTIME)
 * @mode:	 hrtimer mode (HRTIMER_MODE_ABS/HRTIMER_MODE_REL)
 */
void tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer,
			  enum hrtimer_restart (*function)(struct hrtimer *),
			  clockid_t which_clock, enum hrtimer_mode mode)
{
	hrtimer_init(&ttimer->timer, which_clock, mode);
	ttimer->timer.function = __hrtimer_tasklet_trampoline;
	tasklet_init(&ttimer->tasklet, __tasklet_hrtimer_trampoline,
		     (unsigned long)ttimer);
	ttimer->function = function;
}
EXPORT_SYMBOL_GPL(tasklet_hrtimer_init);

L
Linus Torvalds 已提交
617 618
void __init softirq_init(void)
{
619 620 621 622 623 624 625 626 627
	int cpu;

	for_each_possible_cpu(cpu) {
		per_cpu(tasklet_vec, cpu).tail =
			&per_cpu(tasklet_vec, cpu).head;
		per_cpu(tasklet_hi_vec, cpu).tail =
			&per_cpu(tasklet_hi_vec, cpu).head;
	}

628 629
	open_softirq(TASKLET_SOFTIRQ, tasklet_action);
	open_softirq(HI_SOFTIRQ, tasklet_hi_action);
L
Linus Torvalds 已提交
630 631
}

632
static int ksoftirqd_should_run(unsigned int cpu)
L
Linus Torvalds 已提交
633
{
634 635
	return local_softirq_pending();
}
L
Linus Torvalds 已提交
636

637 638 639 640
static void run_ksoftirqd(unsigned int cpu)
{
	local_irq_disable();
	if (local_softirq_pending()) {
641 642 643 644
		/*
		 * We can safely run softirq on inline stack, as we are not deep
		 * in the task stack here.
		 */
645 646
		__do_softirq();
		local_irq_enable();
647
		cond_resched();
648
		return;
L
Linus Torvalds 已提交
649
	}
650
	local_irq_enable();
L
Linus Torvalds 已提交
651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673
}

#ifdef CONFIG_HOTPLUG_CPU
/*
 * tasklet_kill_immediate is called to remove a tasklet which can already be
 * scheduled for execution on @cpu.
 *
 * Unlike tasklet_kill, this function removes the tasklet
 * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
 *
 * When this function is called, @cpu must be in the CPU_DEAD state.
 */
void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
{
	struct tasklet_struct **i;

	BUG_ON(cpu_online(cpu));
	BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));

	if (!test_bit(TASKLET_STATE_SCHED, &t->state))
		return;

	/* CPU is dead, so no lock needed. */
674
	for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) {
L
Linus Torvalds 已提交
675 676
		if (*i == t) {
			*i = t->next;
677 678 679
			/* If this was the tail element, move the tail ptr */
			if (*i == NULL)
				per_cpu(tasklet_vec, cpu).tail = i;
L
Linus Torvalds 已提交
680 681 682 683 684 685
			return;
		}
	}
	BUG();
}

686
static int takeover_tasklets(unsigned int cpu)
L
Linus Torvalds 已提交
687 688 689 690 691
{
	/* CPU is dead, so no lock needed. */
	local_irq_disable();

	/* Find end, append list for that CPU. */
692
	if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
693 694
		*__this_cpu_read(tasklet_vec.tail) = per_cpu(tasklet_vec, cpu).head;
		this_cpu_write(tasklet_vec.tail, per_cpu(tasklet_vec, cpu).tail);
695 696 697
		per_cpu(tasklet_vec, cpu).head = NULL;
		per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
	}
L
Linus Torvalds 已提交
698 699
	raise_softirq_irqoff(TASKLET_SOFTIRQ);

700
	if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) {
701 702
		*__this_cpu_read(tasklet_hi_vec.tail) = per_cpu(tasklet_hi_vec, cpu).head;
		__this_cpu_write(tasklet_hi_vec.tail, per_cpu(tasklet_hi_vec, cpu).tail);
703 704 705
		per_cpu(tasklet_hi_vec, cpu).head = NULL;
		per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
	}
L
Linus Torvalds 已提交
706 707 708
	raise_softirq_irqoff(HI_SOFTIRQ);

	local_irq_enable();
709
	return 0;
L
Linus Torvalds 已提交
710
}
711 712
#else
#define takeover_tasklets	NULL
L
Linus Torvalds 已提交
713 714
#endif /* CONFIG_HOTPLUG_CPU */

715 716 717 718 719 720 721
static struct smp_hotplug_thread softirq_threads = {
	.store			= &ksoftirqd,
	.thread_should_run	= ksoftirqd_should_run,
	.thread_fn		= run_ksoftirqd,
	.thread_comm		= "ksoftirqd/%u",
};

722
static __init int spawn_ksoftirqd(void)
L
Linus Torvalds 已提交
723
{
724 725
	cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL,
				  takeover_tasklets);
726 727
	BUG_ON(smpboot_register_percpu_thread(&softirq_threads));

L
Linus Torvalds 已提交
728 729
	return 0;
}
730
early_initcall(spawn_ksoftirqd);
731

732 733 734 735 736 737 738 739 740 741
/*
 * [ These __weak aliases are kept in a separate compilation unit, so that
 *   GCC does not inline them incorrectly. ]
 */

int __init __weak early_irq_init(void)
{
	return 0;
}

Y
Yinghai Lu 已提交
742 743
int __init __weak arch_probe_nr_irqs(void)
{
744
	return NR_IRQS_LEGACY;
Y
Yinghai Lu 已提交
745 746
}

747 748 749 750
int __init __weak arch_early_irq_init(void)
{
	return 0;
}
751 752 753 754 755

unsigned int __weak arch_dynirq_lower_bound(unsigned int from)
{
	return from;
}