softirq.c 21.3 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5
/*
 *	linux/kernel/softirq.c
 *
 *	Copyright (C) 1992 Linus Torvalds
 *
P
Pavel Machek 已提交
6 7 8
 *	Distribute under GPLv2.
 *
 *	Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
9 10
 *
 *	Remote softirq infrastructure is by Jens Axboe.
L
Linus Torvalds 已提交
11 12
 */

13
#include <linux/export.h>
L
Linus Torvalds 已提交
14 15 16 17 18 19 20
#include <linux/kernel_stat.h>
#include <linux/interrupt.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/notifier.h>
#include <linux/percpu.h>
#include <linux/cpu.h>
21
#include <linux/freezer.h>
L
Linus Torvalds 已提交
22 23
#include <linux/kthread.h>
#include <linux/rcupdate.h>
24
#include <linux/ftrace.h>
25
#include <linux/smp.h>
26
#include <linux/smpboot.h>
27
#include <linux/tick.h>
28 29

#define CREATE_TRACE_POINTS
30
#include <trace/events/irq.h>
L
Linus Torvalds 已提交
31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54

/*
   - No shared variables, all the data are CPU local.
   - If a softirq needs serialization, let it serialize itself
     by its own spinlocks.
   - Even if softirq is serialized, only local cpu is marked for
     execution. Hence, we get something sort of weak cpu binding.
     Though it is still not clear, will it result in better locality
     or will not.

   Examples:
   - NET RX softirq. It is multithreaded and does not require
     any global serialization.
   - NET TX softirq. It kicks software netdevice queues, hence
     it is logically serialized per device, but this serialization
     is invisible to common code.
   - Tasklets: serialized wrt itself.
 */

#ifndef __ARCH_IRQ_STAT
irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
EXPORT_SYMBOL(irq_stat);
#endif

A
Alexey Dobriyan 已提交
55
static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
L
Linus Torvalds 已提交
56

57
DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
L
Linus Torvalds 已提交
58

59
char *softirq_to_name[NR_SOFTIRQS] = {
60
	"HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
61
	"TASKLET", "SCHED", "HRTIMER", "RCU"
62 63
};

L
Linus Torvalds 已提交
64 65 66 67 68 69
/*
 * we cannot loop indefinitely here to avoid userspace starvation,
 * but we also don't want to introduce a worst case 1/HZ latency
 * to the pending events, so lets the scheduler to balance
 * the softirq load for us.
 */
70
static void wakeup_softirqd(void)
L
Linus Torvalds 已提交
71 72
{
	/* Interrupts are disabled: no need to stop preemption */
73
	struct task_struct *tsk = __this_cpu_read(ksoftirqd);
L
Linus Torvalds 已提交
74 75 76 77 78

	if (tsk && tsk->state != TASK_RUNNING)
		wake_up_process(tsk);
}

79 80 81 82 83 84 85 86 87 88
/*
 * preempt_count and SOFTIRQ_OFFSET usage:
 * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving
 *   softirq processing.
 * - preempt_count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET)
 *   on local_bh_disable or local_bh_enable.
 * This lets us distinguish between whether we are currently processing
 * softirq and whether we just have bh disabled.
 */

89 90 91 92
/*
 * This one is for softirq.c-internal use,
 * where hardirqs are disabled legitimately:
 */
93
#ifdef CONFIG_TRACE_IRQFLAGS
94
static void __local_bh_disable(unsigned long ip, unsigned int cnt)
95 96 97 98 99 100
{
	unsigned long flags;

	WARN_ON_ONCE(in_irq());

	raw_local_irq_save(flags);
101 102 103 104 105 106 107
	/*
	 * The preempt tracer hooks into add_preempt_count and will break
	 * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET
	 * is set and before current->softirq_enabled is cleared.
	 * We must manually increment preempt_count here and manually
	 * call the trace_preempt_off later.
	 */
108
	preempt_count() += cnt;
109 110 111
	/*
	 * Were softirqs turned off above:
	 */
112
	if (softirq_count() == cnt)
113 114
		trace_softirqs_off(ip);
	raw_local_irq_restore(flags);
115

116
	if (preempt_count() == cnt)
117
		trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
118
}
119
#else /* !CONFIG_TRACE_IRQFLAGS */
120
static inline void __local_bh_disable(unsigned long ip, unsigned int cnt)
121
{
122
	add_preempt_count(cnt);
123 124 125
	barrier();
}
#endif /* CONFIG_TRACE_IRQFLAGS */
126 127 128

void local_bh_disable(void)
{
D
Davidlohr Bueso 已提交
129
	__local_bh_disable(_RET_IP_, SOFTIRQ_DISABLE_OFFSET);
130 131 132 133
}

EXPORT_SYMBOL(local_bh_disable);

134 135 136 137 138 139
static void __local_bh_enable(unsigned int cnt)
{
	WARN_ON_ONCE(in_irq());
	WARN_ON_ONCE(!irqs_disabled());

	if (softirq_count() == cnt)
D
Davidlohr Bueso 已提交
140
		trace_softirqs_on(_RET_IP_);
141 142 143
	sub_preempt_count(cnt);
}

144 145 146 147 148 149 150
/*
 * Special-case - softirqs can safely be enabled in
 * cond_resched_softirq(), or by __do_softirq(),
 * without processing still-pending softirqs:
 */
void _local_bh_enable(void)
{
151
	__local_bh_enable(SOFTIRQ_DISABLE_OFFSET);
152 153 154 155
}

EXPORT_SYMBOL(_local_bh_enable);

156
static inline void _local_bh_enable_ip(unsigned long ip)
157
{
158
	WARN_ON_ONCE(in_irq() || irqs_disabled());
159
#ifdef CONFIG_TRACE_IRQFLAGS
160
	local_irq_disable();
161
#endif
162 163 164
	/*
	 * Are softirqs going to be turned on now:
	 */
165
	if (softirq_count() == SOFTIRQ_DISABLE_OFFSET)
166
		trace_softirqs_on(ip);
167 168 169 170
	/*
	 * Keep preemption disabled until we are done with
	 * softirq processing:
 	 */
171
	sub_preempt_count(SOFTIRQ_DISABLE_OFFSET - 1);
172 173 174 175 176

	if (unlikely(!in_interrupt() && local_softirq_pending()))
		do_softirq();

	dec_preempt_count();
177
#ifdef CONFIG_TRACE_IRQFLAGS
178
	local_irq_enable();
179
#endif
180 181
	preempt_check_resched();
}
182 183 184

void local_bh_enable(void)
{
D
Davidlohr Bueso 已提交
185
	_local_bh_enable_ip(_RET_IP_);
186
}
187 188 189 190
EXPORT_SYMBOL(local_bh_enable);

void local_bh_enable_ip(unsigned long ip)
{
191
	_local_bh_enable_ip(ip);
192 193 194
}
EXPORT_SYMBOL(local_bh_enable_ip);

L
Linus Torvalds 已提交
195
/*
196 197 198 199 200 201
 * We restart softirq processing for at most MAX_SOFTIRQ_RESTART times,
 * but break the loop if need_resched() is set or after 2 ms.
 * The MAX_SOFTIRQ_TIME provides a nice upper bound in most cases, but in
 * certain cases, such as stop_machine(), jiffies may cease to
 * increment and so we need the MAX_SOFTIRQ_RESTART limit as
 * well to make sure we eventually return from this method.
L
Linus Torvalds 已提交
202
 *
E
Eric Dumazet 已提交
203
 * These limits have been established via experimentation.
L
Linus Torvalds 已提交
204 205 206 207
 * The two things to balance is latency against fairness -
 * we want to handle softirqs as soon as possible, but they
 * should not be able to lock up the box.
 */
E
Eric Dumazet 已提交
208
#define MAX_SOFTIRQ_TIME  msecs_to_jiffies(2)
209
#define MAX_SOFTIRQ_RESTART 10
L
Linus Torvalds 已提交
210 211 212 213 214

asmlinkage void __do_softirq(void)
{
	struct softirq_action *h;
	__u32 pending;
E
Eric Dumazet 已提交
215
	unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
L
Linus Torvalds 已提交
216
	int cpu;
217
	unsigned long old_flags = current->flags;
218
	int max_restart = MAX_SOFTIRQ_RESTART;
219 220 221 222 223 224 225

	/*
	 * Mask out PF_MEMALLOC s current task context is borrowed for the
	 * softirq. A softirq handled such as network RX might set PF_MEMALLOC
	 * again if the socket is related to swap
	 */
	current->flags &= ~PF_MEMALLOC;
L
Linus Torvalds 已提交
226 227

	pending = local_softirq_pending();
228
	account_irq_enter_time(current);
229

D
Davidlohr Bueso 已提交
230
	__local_bh_disable(_RET_IP_, SOFTIRQ_OFFSET);
231
	lockdep_softirq_enter();
L
Linus Torvalds 已提交
232 233 234 235

	cpu = smp_processor_id();
restart:
	/* Reset the pending bitmask before enabling irqs */
236
	set_softirq_pending(0);
L
Linus Torvalds 已提交
237

238
	local_irq_enable();
L
Linus Torvalds 已提交
239 240 241 242 243

	h = softirq_vec;

	do {
		if (pending & 1) {
244
			unsigned int vec_nr = h - softirq_vec;
245 246
			int prev_count = preempt_count();

247 248 249
			kstat_incr_softirqs_this_cpu(vec_nr);

			trace_softirq_entry(vec_nr);
L
Linus Torvalds 已提交
250
			h->action(h);
251
			trace_softirq_exit(vec_nr);
252
			if (unlikely(prev_count != preempt_count())) {
253
				printk(KERN_ERR "huh, entered softirq %u %s %p"
254
				       "with preempt_count %08x,"
255 256 257
				       " exited with %08x?\n", vec_nr,
				       softirq_to_name[vec_nr], h->action,
				       prev_count, preempt_count());
258 259 260
				preempt_count() = prev_count;
			}

261
			rcu_bh_qs(cpu);
L
Linus Torvalds 已提交
262 263 264 265 266
		}
		h++;
		pending >>= 1;
	} while (pending);

267
	local_irq_disable();
L
Linus Torvalds 已提交
268 269

	pending = local_softirq_pending();
E
Eric Dumazet 已提交
270
	if (pending) {
271 272
		if (time_before(jiffies, end) && !need_resched() &&
		    --max_restart)
E
Eric Dumazet 已提交
273
			goto restart;
L
Linus Torvalds 已提交
274 275

		wakeup_softirqd();
E
Eric Dumazet 已提交
276
	}
L
Linus Torvalds 已提交
277

278
	lockdep_softirq_exit();
279

280
	account_irq_exit_time(current);
281
	__local_bh_enable(SOFTIRQ_OFFSET);
282
	tsk_restore_flags(current, old_flags, PF_MEMALLOC);
L
Linus Torvalds 已提交
283 284
}

285

L
Linus Torvalds 已提交
286 287 288 289 290 291 292 293 294 295 296 297 298 299

asmlinkage void do_softirq(void)
{
	__u32 pending;
	unsigned long flags;

	if (in_interrupt())
		return;

	local_irq_save(flags);

	pending = local_softirq_pending();

	if (pending)
300
		do_softirq_own_stack();
L
Linus Torvalds 已提交
301

302
	WARN_ON_ONCE(softirq_count());
L
Linus Torvalds 已提交
303 304 305
	local_irq_restore(flags);
}

I
Ingo Molnar 已提交
306 307 308 309 310
/*
 * Enter an interrupt context.
 */
void irq_enter(void)
{
311
	int cpu = smp_processor_id();
312

313
	rcu_irq_enter();
314
	if (is_idle_task(current) && !in_interrupt()) {
315 316 317 318 319
		/*
		 * Prevent raise_softirq from needlessly waking up ksoftirqd
		 * here, as softirq will be serviced on return from interrupt.
		 */
		local_bh_disable();
320
		tick_check_idle(cpu);
321 322 323 324
		_local_bh_enable();
	}

	__irq_enter();
I
Ingo Molnar 已提交
325 326
}

327 328
static inline void invoke_softirq(void)
{
329 330 331 332 333 334 335 336 337
	if (!force_irqthreads) {
		/*
		 * We can safely execute softirq on the current stack if
		 * it is the irq stack, because it should be near empty
		 * at this stage. But we have no way to know if the arch
		 * calls irq_exit() on the irq stack. So call softirq
		 * in its own stack to prevent from any overrun on top
		 * of a potentially deep task stack.
		 */
338
		do_softirq_own_stack();
339
	} else {
340
		wakeup_softirqd();
341
	}
342
}
L
Linus Torvalds 已提交
343

344 345 346 347 348 349 350 351 352 353 354 355 356
static inline void tick_irq_exit(void)
{
#ifdef CONFIG_NO_HZ_COMMON
	int cpu = smp_processor_id();

	/* Make sure that timer wheel updates are propagated */
	if ((idle_cpu(cpu) && !need_resched()) || tick_nohz_full_cpu(cpu)) {
		if (!in_interrupt())
			tick_nohz_irq_exit();
	}
#endif
}

L
Linus Torvalds 已提交
357 358 359 360 361
/*
 * Exit an interrupt context. Process softirqs if needed and possible:
 */
void irq_exit(void)
{
362
#ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED
363
	local_irq_disable();
364 365 366 367
#else
	WARN_ON_ONCE(!irqs_disabled());
#endif

368
	account_irq_exit_time(current);
369
	trace_hardirq_exit();
370
	sub_preempt_count(HARDIRQ_OFFSET);
L
Linus Torvalds 已提交
371 372
	if (!in_interrupt() && local_softirq_pending())
		invoke_softirq();
373

374
	tick_irq_exit();
375
	rcu_irq_exit();
L
Linus Torvalds 已提交
376 377 378 379 380
}

/*
 * This function must run with irqs disabled!
 */
381
inline void raise_softirq_irqoff(unsigned int nr)
L
Linus Torvalds 已提交
382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397
{
	__raise_softirq_irqoff(nr);

	/*
	 * If we're in an interrupt or softirq, we're done
	 * (this also catches softirq-disabled code). We will
	 * actually run the softirq once we return from
	 * the irq or softirq.
	 *
	 * Otherwise we wake up ksoftirqd to make sure we
	 * schedule the softirq soon.
	 */
	if (!in_interrupt())
		wakeup_softirqd();
}

398
void raise_softirq(unsigned int nr)
L
Linus Torvalds 已提交
399 400 401 402 403 404 405 406
{
	unsigned long flags;

	local_irq_save(flags);
	raise_softirq_irqoff(nr);
	local_irq_restore(flags);
}

407 408 409 410 411 412
void __raise_softirq_irqoff(unsigned int nr)
{
	trace_softirq_raise(nr);
	or_softirq_pending(1UL << nr);
}

413
void open_softirq(int nr, void (*action)(struct softirq_action *))
L
Linus Torvalds 已提交
414 415 416 417
{
	softirq_vec[nr].action = action;
}

418 419 420
/*
 * Tasklets
 */
L
Linus Torvalds 已提交
421 422
struct tasklet_head
{
423 424
	struct tasklet_struct *head;
	struct tasklet_struct **tail;
L
Linus Torvalds 已提交
425 426
};

427 428
static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
L
Linus Torvalds 已提交
429

430
void __tasklet_schedule(struct tasklet_struct *t)
L
Linus Torvalds 已提交
431 432 433 434
{
	unsigned long flags;

	local_irq_save(flags);
435
	t->next = NULL;
436 437
	*__this_cpu_read(tasklet_vec.tail) = t;
	__this_cpu_write(tasklet_vec.tail, &(t->next));
L
Linus Torvalds 已提交
438 439 440 441 442 443
	raise_softirq_irqoff(TASKLET_SOFTIRQ);
	local_irq_restore(flags);
}

EXPORT_SYMBOL(__tasklet_schedule);

444
void __tasklet_hi_schedule(struct tasklet_struct *t)
L
Linus Torvalds 已提交
445 446 447 448
{
	unsigned long flags;

	local_irq_save(flags);
449
	t->next = NULL;
450 451
	*__this_cpu_read(tasklet_hi_vec.tail) = t;
	__this_cpu_write(tasklet_hi_vec.tail,  &(t->next));
L
Linus Torvalds 已提交
452 453 454 455 456 457
	raise_softirq_irqoff(HI_SOFTIRQ);
	local_irq_restore(flags);
}

EXPORT_SYMBOL(__tasklet_hi_schedule);

458 459 460 461
void __tasklet_hi_schedule_first(struct tasklet_struct *t)
{
	BUG_ON(!irqs_disabled());

462 463
	t->next = __this_cpu_read(tasklet_hi_vec.head);
	__this_cpu_write(tasklet_hi_vec.head, t);
464 465 466 467 468
	__raise_softirq_irqoff(HI_SOFTIRQ);
}

EXPORT_SYMBOL(__tasklet_hi_schedule_first);

L
Linus Torvalds 已提交
469 470 471 472 473
static void tasklet_action(struct softirq_action *a)
{
	struct tasklet_struct *list;

	local_irq_disable();
474 475 476
	list = __this_cpu_read(tasklet_vec.head);
	__this_cpu_write(tasklet_vec.head, NULL);
	__this_cpu_write(tasklet_vec.tail, &__get_cpu_var(tasklet_vec).head);
L
Linus Torvalds 已提交
477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495
	local_irq_enable();

	while (list) {
		struct tasklet_struct *t = list;

		list = list->next;

		if (tasklet_trylock(t)) {
			if (!atomic_read(&t->count)) {
				if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
					BUG();
				t->func(t->data);
				tasklet_unlock(t);
				continue;
			}
			tasklet_unlock(t);
		}

		local_irq_disable();
496
		t->next = NULL;
497 498
		*__this_cpu_read(tasklet_vec.tail) = t;
		__this_cpu_write(tasklet_vec.tail, &(t->next));
L
Linus Torvalds 已提交
499 500 501 502 503 504 505 506 507 508
		__raise_softirq_irqoff(TASKLET_SOFTIRQ);
		local_irq_enable();
	}
}

static void tasklet_hi_action(struct softirq_action *a)
{
	struct tasklet_struct *list;

	local_irq_disable();
509 510 511
	list = __this_cpu_read(tasklet_hi_vec.head);
	__this_cpu_write(tasklet_hi_vec.head, NULL);
	__this_cpu_write(tasklet_hi_vec.tail, &__get_cpu_var(tasklet_hi_vec).head);
L
Linus Torvalds 已提交
512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530
	local_irq_enable();

	while (list) {
		struct tasklet_struct *t = list;

		list = list->next;

		if (tasklet_trylock(t)) {
			if (!atomic_read(&t->count)) {
				if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
					BUG();
				t->func(t->data);
				tasklet_unlock(t);
				continue;
			}
			tasklet_unlock(t);
		}

		local_irq_disable();
531
		t->next = NULL;
532 533
		*__this_cpu_read(tasklet_hi_vec.tail) = t;
		__this_cpu_write(tasklet_hi_vec.tail, &(t->next));
L
Linus Torvalds 已提交
534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557
		__raise_softirq_irqoff(HI_SOFTIRQ);
		local_irq_enable();
	}
}


void tasklet_init(struct tasklet_struct *t,
		  void (*func)(unsigned long), unsigned long data)
{
	t->next = NULL;
	t->state = 0;
	atomic_set(&t->count, 0);
	t->func = func;
	t->data = data;
}

EXPORT_SYMBOL(tasklet_init);

void tasklet_kill(struct tasklet_struct *t)
{
	if (in_interrupt())
		printk("Attempt to kill tasklet from interrupt\n");

	while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
558
		do {
L
Linus Torvalds 已提交
559
			yield();
560
		} while (test_bit(TASKLET_STATE_SCHED, &t->state));
L
Linus Torvalds 已提交
561 562 563 564 565 566 567
	}
	tasklet_unlock_wait(t);
	clear_bit(TASKLET_STATE_SCHED, &t->state);
}

EXPORT_SYMBOL(tasklet_kill);

568 569 570 571 572
/*
 * tasklet_hrtimer
 */

/*
573 574 575
 * The trampoline is called when the hrtimer expires. It schedules a tasklet
 * to run __tasklet_hrtimer_trampoline() which in turn will call the intended
 * hrtimer callback, but from softirq context.
576 577 578 579 580 581
 */
static enum hrtimer_restart __hrtimer_tasklet_trampoline(struct hrtimer *timer)
{
	struct tasklet_hrtimer *ttimer =
		container_of(timer, struct tasklet_hrtimer, timer);

582 583
	tasklet_hi_schedule(&ttimer->tasklet);
	return HRTIMER_NORESTART;
584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602
}

/*
 * Helper function which calls the hrtimer callback from
 * tasklet/softirq context
 */
static void __tasklet_hrtimer_trampoline(unsigned long data)
{
	struct tasklet_hrtimer *ttimer = (void *)data;
	enum hrtimer_restart restart;

	restart = ttimer->function(&ttimer->timer);
	if (restart != HRTIMER_NORESTART)
		hrtimer_restart(&ttimer->timer);
}

/**
 * tasklet_hrtimer_init - Init a tasklet/hrtimer combo for softirq callbacks
 * @ttimer:	 tasklet_hrtimer which is initialized
L
Lucas De Marchi 已提交
603
 * @function:	 hrtimer callback function which gets called from softirq context
604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622
 * @which_clock: clock id (CLOCK_MONOTONIC/CLOCK_REALTIME)
 * @mode:	 hrtimer mode (HRTIMER_MODE_ABS/HRTIMER_MODE_REL)
 */
void tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer,
			  enum hrtimer_restart (*function)(struct hrtimer *),
			  clockid_t which_clock, enum hrtimer_mode mode)
{
	hrtimer_init(&ttimer->timer, which_clock, mode);
	ttimer->timer.function = __hrtimer_tasklet_trampoline;
	tasklet_init(&ttimer->tasklet, __tasklet_hrtimer_trampoline,
		     (unsigned long)ttimer);
	ttimer->function = function;
}
EXPORT_SYMBOL_GPL(tasklet_hrtimer_init);

/*
 * Remote softirq bits
 */

623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643
DEFINE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list);
EXPORT_PER_CPU_SYMBOL(softirq_work_list);

static void __local_trigger(struct call_single_data *cp, int softirq)
{
	struct list_head *head = &__get_cpu_var(softirq_work_list[softirq]);

	list_add_tail(&cp->list, head);

	/* Trigger the softirq only if the list was previously empty.  */
	if (head->next == &cp->list)
		raise_softirq_irqoff(softirq);
}

#ifdef CONFIG_USE_GENERIC_SMP_HELPERS
static void remote_softirq_receive(void *data)
{
	struct call_single_data *cp = data;
	unsigned long flags;
	int softirq;

644
	softirq = *(int *)cp->info;
645 646 647 648 649 650 651 652 653
	local_irq_save(flags);
	__local_trigger(cp, softirq);
	local_irq_restore(flags);
}

static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
{
	if (cpu_online(cpu)) {
		cp->func = remote_softirq_receive;
654
		cp->info = &softirq;
655 656
		cp->flags = 0;

657
		__smp_call_function_single(cpu, cp, 0);
658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708
		return 0;
	}
	return 1;
}
#else /* CONFIG_USE_GENERIC_SMP_HELPERS */
static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
{
	return 1;
}
#endif

/**
 * __send_remote_softirq - try to schedule softirq work on a remote cpu
 * @cp: private SMP call function data area
 * @cpu: the remote cpu
 * @this_cpu: the currently executing cpu
 * @softirq: the softirq for the work
 *
 * Attempt to schedule softirq work on a remote cpu.  If this cannot be
 * done, the work is instead queued up on the local cpu.
 *
 * Interrupts must be disabled.
 */
void __send_remote_softirq(struct call_single_data *cp, int cpu, int this_cpu, int softirq)
{
	if (cpu == this_cpu || __try_remote_softirq(cp, cpu, softirq))
		__local_trigger(cp, softirq);
}
EXPORT_SYMBOL(__send_remote_softirq);

/**
 * send_remote_softirq - try to schedule softirq work on a remote cpu
 * @cp: private SMP call function data area
 * @cpu: the remote cpu
 * @softirq: the softirq for the work
 *
 * Like __send_remote_softirq except that disabling interrupts and
 * computing the current cpu is done for the caller.
 */
void send_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
{
	unsigned long flags;
	int this_cpu;

	local_irq_save(flags);
	this_cpu = smp_processor_id();
	__send_remote_softirq(cp, cpu, this_cpu, softirq);
	local_irq_restore(flags);
}
EXPORT_SYMBOL(send_remote_softirq);

709
static int remote_softirq_cpu_notify(struct notifier_block *self,
710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737
					       unsigned long action, void *hcpu)
{
	/*
	 * If a CPU goes away, splice its entries to the current CPU
	 * and trigger a run of the softirq
	 */
	if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
		int cpu = (unsigned long) hcpu;
		int i;

		local_irq_disable();
		for (i = 0; i < NR_SOFTIRQS; i++) {
			struct list_head *head = &per_cpu(softirq_work_list[i], cpu);
			struct list_head *local_head;

			if (list_empty(head))
				continue;

			local_head = &__get_cpu_var(softirq_work_list[i]);
			list_splice_init(head, local_head);
			raise_softirq_irqoff(i);
		}
		local_irq_enable();
	}

	return NOTIFY_OK;
}

738
static struct notifier_block remote_softirq_cpu_notifier = {
739 740 741
	.notifier_call	= remote_softirq_cpu_notify,
};

L
Linus Torvalds 已提交
742 743
void __init softirq_init(void)
{
744 745 746
	int cpu;

	for_each_possible_cpu(cpu) {
747 748
		int i;

749 750 751 752
		per_cpu(tasklet_vec, cpu).tail =
			&per_cpu(tasklet_vec, cpu).head;
		per_cpu(tasklet_hi_vec, cpu).tail =
			&per_cpu(tasklet_hi_vec, cpu).head;
753 754
		for (i = 0; i < NR_SOFTIRQS; i++)
			INIT_LIST_HEAD(&per_cpu(softirq_work_list[i], cpu));
755 756
	}

757 758
	register_hotcpu_notifier(&remote_softirq_cpu_notifier);

759 760
	open_softirq(TASKLET_SOFTIRQ, tasklet_action);
	open_softirq(HI_SOFTIRQ, tasklet_hi_action);
L
Linus Torvalds 已提交
761 762
}

763
static int ksoftirqd_should_run(unsigned int cpu)
L
Linus Torvalds 已提交
764
{
765 766
	return local_softirq_pending();
}
L
Linus Torvalds 已提交
767

768 769 770 771 772 773 774 775 776
static void run_ksoftirqd(unsigned int cpu)
{
	local_irq_disable();
	if (local_softirq_pending()) {
		__do_softirq();
		rcu_note_context_switch(cpu);
		local_irq_enable();
		cond_resched();
		return;
L
Linus Torvalds 已提交
777
	}
778
	local_irq_enable();
L
Linus Torvalds 已提交
779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801
}

#ifdef CONFIG_HOTPLUG_CPU
/*
 * tasklet_kill_immediate is called to remove a tasklet which can already be
 * scheduled for execution on @cpu.
 *
 * Unlike tasklet_kill, this function removes the tasklet
 * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
 *
 * When this function is called, @cpu must be in the CPU_DEAD state.
 */
void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
{
	struct tasklet_struct **i;

	BUG_ON(cpu_online(cpu));
	BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));

	if (!test_bit(TASKLET_STATE_SCHED, &t->state))
		return;

	/* CPU is dead, so no lock needed. */
802
	for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) {
L
Linus Torvalds 已提交
803 804
		if (*i == t) {
			*i = t->next;
805 806 807
			/* If this was the tail element, move the tail ptr */
			if (*i == NULL)
				per_cpu(tasklet_vec, cpu).tail = i;
L
Linus Torvalds 已提交
808 809 810 811 812 813 814 815 816 817 818 819
			return;
		}
	}
	BUG();
}

static void takeover_tasklets(unsigned int cpu)
{
	/* CPU is dead, so no lock needed. */
	local_irq_disable();

	/* Find end, append list for that CPU. */
820
	if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
821 822
		*__this_cpu_read(tasklet_vec.tail) = per_cpu(tasklet_vec, cpu).head;
		this_cpu_write(tasklet_vec.tail, per_cpu(tasklet_vec, cpu).tail);
823 824 825
		per_cpu(tasklet_vec, cpu).head = NULL;
		per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
	}
L
Linus Torvalds 已提交
826 827
	raise_softirq_irqoff(TASKLET_SOFTIRQ);

828
	if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) {
829 830
		*__this_cpu_read(tasklet_hi_vec.tail) = per_cpu(tasklet_hi_vec, cpu).head;
		__this_cpu_write(tasklet_hi_vec.tail, per_cpu(tasklet_hi_vec, cpu).tail);
831 832 833
		per_cpu(tasklet_hi_vec, cpu).head = NULL;
		per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
	}
L
Linus Torvalds 已提交
834 835 836 837 838 839
	raise_softirq_irqoff(HI_SOFTIRQ);

	local_irq_enable();
}
#endif /* CONFIG_HOTPLUG_CPU */

840
static int cpu_callback(struct notifier_block *nfb,
L
Linus Torvalds 已提交
841 842 843 844 845 846
				  unsigned long action,
				  void *hcpu)
{
	switch (action) {
#ifdef CONFIG_HOTPLUG_CPU
	case CPU_DEAD:
847 848
	case CPU_DEAD_FROZEN:
		takeover_tasklets((unsigned long)hcpu);
L
Linus Torvalds 已提交
849 850
		break;
#endif /* CONFIG_HOTPLUG_CPU */
851
	}
L
Linus Torvalds 已提交
852 853 854
	return NOTIFY_OK;
}

855
static struct notifier_block cpu_nfb = {
L
Linus Torvalds 已提交
856 857 858
	.notifier_call = cpu_callback
};

859 860 861 862 863 864 865
static struct smp_hotplug_thread softirq_threads = {
	.store			= &ksoftirqd,
	.thread_should_run	= ksoftirqd_should_run,
	.thread_fn		= run_ksoftirqd,
	.thread_comm		= "ksoftirqd/%u",
};

866
static __init int spawn_ksoftirqd(void)
L
Linus Torvalds 已提交
867 868
{
	register_cpu_notifier(&cpu_nfb);
869 870 871

	BUG_ON(smpboot_register_percpu_thread(&softirq_threads));

L
Linus Torvalds 已提交
872 873
	return 0;
}
874
early_initcall(spawn_ksoftirqd);
875

876 877 878 879 880 881 882 883 884 885
/*
 * [ These __weak aliases are kept in a separate compilation unit, so that
 *   GCC does not inline them incorrectly. ]
 */

int __init __weak early_irq_init(void)
{
	return 0;
}

Y
Yinghai Lu 已提交
886 887
int __init __weak arch_probe_nr_irqs(void)
{
888
	return NR_IRQS_LEGACY;
Y
Yinghai Lu 已提交
889 890
}

891 892 893 894
int __init __weak arch_early_irq_init(void)
{
	return 0;
}