workqueue.c 25.0 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11
/*
 * linux/kernel/workqueue.c
 *
 * Generic mechanism for defining kernel helper threads for running
 * arbitrary tasks in process context.
 *
 * Started by Ingo Molnar, Copyright (C) 2002
 *
 * Derived from the taskqueue/keventd code by:
 *
 *   David Woodhouse <dwmw2@infradead.org>
12
 *   Andrew Morton
L
Linus Torvalds 已提交
13 14
 *   Kai Petzke <wpp@marie.physik.tu-berlin.de>
 *   Theodore Ts'o <tytso@mit.edu>
15
 *
C
Christoph Lameter 已提交
16
 * Made to use alloc_percpu by Christoph Lameter.
L
Linus Torvalds 已提交
17 18 19 20 21 22 23 24 25 26 27 28 29
 */

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/init.h>
#include <linux/signal.h>
#include <linux/completion.h>
#include <linux/workqueue.h>
#include <linux/slab.h>
#include <linux/cpu.h>
#include <linux/notifier.h>
#include <linux/kthread.h>
30
#include <linux/hardirq.h>
31
#include <linux/mempolicy.h>
32
#include <linux/freezer.h>
33 34
#include <linux/kallsyms.h>
#include <linux/debug_locks.h>
35
#include <linux/lockdep.h>
L
Linus Torvalds 已提交
36 37

/*
38 39
 * The per-CPU workqueue (if single thread, we always use the first
 * possible cpu).
L
Linus Torvalds 已提交
40 41 42 43 44 45 46
 */
struct cpu_workqueue_struct {

	spinlock_t lock;

	struct list_head worklist;
	wait_queue_head_t more_work;
47
	struct work_struct *current_work;
L
Linus Torvalds 已提交
48 49

	struct workqueue_struct *wq;
50
	struct task_struct *thread;
L
Linus Torvalds 已提交
51 52 53 54 55 56 57 58 59

	int run_depth;		/* Detect run_workqueue() recursion depth */
} ____cacheline_aligned;

/*
 * The externally visible workqueue abstraction is an array of
 * per-CPU workqueues:
 */
struct workqueue_struct {
60
	struct cpu_workqueue_struct *cpu_wq;
61
	struct list_head list;
L
Linus Torvalds 已提交
62
	const char *name;
63
	int singlethread;
64
	int freezeable;		/* Freeze threads during suspend */
65
	int rt;
66 67 68
#ifdef CONFIG_LOCKDEP
	struct lockdep_map lockdep_map;
#endif
L
Linus Torvalds 已提交
69 70
};

71 72
/* Serializes the accesses to the list of workqueues. */
static DEFINE_SPINLOCK(workqueue_lock);
L
Linus Torvalds 已提交
73 74
static LIST_HEAD(workqueues);

75
static int singlethread_cpu __read_mostly;
76
static const struct cpumask *cpu_singlethread_map __read_mostly;
77 78 79 80 81 82 83
/*
 * _cpu_down() first removes CPU from cpu_online_map, then CPU_DEAD
 * flushes cwq->worklist. This means that flush_workqueue/wait_on_work
 * which comes in between can't use for_each_online_cpu(). We could
 * use cpu_possible_map, the cpumask below is more a documentation
 * than optimization.
 */
84
static cpumask_var_t cpu_populated_map __read_mostly;
85

L
Linus Torvalds 已提交
86
/* If it's single threaded, it isn't in the list of workqueues. */
87
static inline int is_wq_single_threaded(struct workqueue_struct *wq)
L
Linus Torvalds 已提交
88
{
89
	return wq->singlethread;
L
Linus Torvalds 已提交
90 91
}

92
static const struct cpumask *wq_cpu_map(struct workqueue_struct *wq)
93
{
94
	return is_wq_single_threaded(wq)
95
		? cpu_singlethread_map : cpu_populated_map;
96 97
}

98 99 100
static
struct cpu_workqueue_struct *wq_per_cpu(struct workqueue_struct *wq, int cpu)
{
101
	if (unlikely(is_wq_single_threaded(wq)))
102 103 104 105
		cpu = singlethread_cpu;
	return per_cpu_ptr(wq->cpu_wq, cpu);
}

106 107 108 109
/*
 * Set the workqueue on which a work item is to be run
 * - Must *only* be called if the pending flag is set
 */
110 111
static inline void set_wq_data(struct work_struct *work,
				struct cpu_workqueue_struct *cwq)
112
{
113 114 115
	unsigned long new;

	BUG_ON(!work_pending(work));
116

117
	new = (unsigned long) cwq | (1UL << WORK_STRUCT_PENDING);
118 119
	new |= WORK_STRUCT_FLAG_MASK & *work_data_bits(work);
	atomic_long_set(&work->data, new);
120 121
}

122 123
static inline
struct cpu_workqueue_struct *get_wq_data(struct work_struct *work)
124
{
125
	return (void *) (atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK);
126 127
}

O
Oleg Nesterov 已提交
128
static void insert_work(struct cpu_workqueue_struct *cwq,
129
			struct work_struct *work, struct list_head *head)
O
Oleg Nesterov 已提交
130 131
{
	set_wq_data(work, cwq);
132 133 134 135 136
	/*
	 * Ensure that we get the right work->data if we see the
	 * result of list_add() below, see try_to_grab_pending().
	 */
	smp_wmb();
137
	list_add_tail(&work->entry, head);
O
Oleg Nesterov 已提交
138 139 140
	wake_up(&cwq->more_work);
}

L
Linus Torvalds 已提交
141 142 143 144 145 146
static void __queue_work(struct cpu_workqueue_struct *cwq,
			 struct work_struct *work)
{
	unsigned long flags;

	spin_lock_irqsave(&cwq->lock, flags);
147
	insert_work(cwq, work, &cwq->worklist);
L
Linus Torvalds 已提交
148 149 150
	spin_unlock_irqrestore(&cwq->lock, flags);
}

151 152 153 154 155
/**
 * queue_work - queue work on a workqueue
 * @wq: workqueue to use
 * @work: work to queue
 *
A
Alan Stern 已提交
156
 * Returns 0 if @work was already on a queue, non-zero otherwise.
L
Linus Torvalds 已提交
157
 *
158 159
 * We queue the work to the CPU on which it was submitted, but if the CPU dies
 * it can be processed by another CPU.
L
Linus Torvalds 已提交
160
 */
161
int queue_work(struct workqueue_struct *wq, struct work_struct *work)
L
Linus Torvalds 已提交
162
{
163 164 165 166 167
	int ret;

	ret = queue_work_on(get_cpu(), wq, work);
	put_cpu();

L
Linus Torvalds 已提交
168 169
	return ret;
}
170
EXPORT_SYMBOL_GPL(queue_work);
L
Linus Torvalds 已提交
171

172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196
/**
 * queue_work_on - queue work on specific cpu
 * @cpu: CPU number to execute work on
 * @wq: workqueue to use
 * @work: work to queue
 *
 * Returns 0 if @work was already on a queue, non-zero otherwise.
 *
 * We queue the work to a specific CPU, the caller must ensure it
 * can't go away.
 */
int
queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work)
{
	int ret = 0;

	if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
		BUG_ON(!list_empty(&work->entry));
		__queue_work(wq_per_cpu(wq, cpu), work);
		ret = 1;
	}
	return ret;
}
EXPORT_SYMBOL_GPL(queue_work_on);

197
static void delayed_work_timer_fn(unsigned long __data)
L
Linus Torvalds 已提交
198
{
199
	struct delayed_work *dwork = (struct delayed_work *)__data;
200 201
	struct cpu_workqueue_struct *cwq = get_wq_data(&dwork->work);
	struct workqueue_struct *wq = cwq->wq;
L
Linus Torvalds 已提交
202

203
	__queue_work(wq_per_cpu(wq, smp_processor_id()), &dwork->work);
L
Linus Torvalds 已提交
204 205
}

206 207 208
/**
 * queue_delayed_work - queue work on a workqueue after delay
 * @wq: workqueue to use
209
 * @dwork: delayable work to queue
210 211
 * @delay: number of jiffies to wait before queueing
 *
A
Alan Stern 已提交
212
 * Returns 0 if @work was already on a queue, non-zero otherwise.
213
 */
214
int queue_delayed_work(struct workqueue_struct *wq,
215
			struct delayed_work *dwork, unsigned long delay)
L
Linus Torvalds 已提交
216
{
217
	if (delay == 0)
218
		return queue_work(wq, &dwork->work);
L
Linus Torvalds 已提交
219

220
	return queue_delayed_work_on(-1, wq, dwork, delay);
L
Linus Torvalds 已提交
221
}
222
EXPORT_SYMBOL_GPL(queue_delayed_work);
L
Linus Torvalds 已提交
223

224 225 226 227
/**
 * queue_delayed_work_on - queue work on specific CPU after delay
 * @cpu: CPU number to execute work on
 * @wq: workqueue to use
228
 * @dwork: work to queue
229 230
 * @delay: number of jiffies to wait before queueing
 *
A
Alan Stern 已提交
231
 * Returns 0 if @work was already on a queue, non-zero otherwise.
232
 */
233
int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
234
			struct delayed_work *dwork, unsigned long delay)
235 236
{
	int ret = 0;
237 238
	struct timer_list *timer = &dwork->timer;
	struct work_struct *work = &dwork->work;
239

240
	if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
241 242 243
		BUG_ON(timer_pending(timer));
		BUG_ON(!list_empty(&work->entry));

244 245
		timer_stats_timer_set_start_info(&dwork->timer);

246
		/* This stores cwq for the moment, for the timer_fn */
247
		set_wq_data(work, wq_per_cpu(wq, raw_smp_processor_id()));
248
		timer->expires = jiffies + delay;
249
		timer->data = (unsigned long)dwork;
250
		timer->function = delayed_work_timer_fn;
251 252 253 254 255

		if (unlikely(cpu >= 0))
			add_timer_on(timer, cpu);
		else
			add_timer(timer);
256 257 258 259
		ret = 1;
	}
	return ret;
}
260
EXPORT_SYMBOL_GPL(queue_delayed_work_on);
L
Linus Torvalds 已提交
261

262
static void run_workqueue(struct cpu_workqueue_struct *cwq)
L
Linus Torvalds 已提交
263
{
264
	spin_lock_irq(&cwq->lock);
L
Linus Torvalds 已提交
265 266 267 268
	cwq->run_depth++;
	if (cwq->run_depth > 3) {
		/* morton gets to eat his hat */
		printk("%s: recursion depth exceeded: %d\n",
269
			__func__, cwq->run_depth);
L
Linus Torvalds 已提交
270 271 272 273 274
		dump_stack();
	}
	while (!list_empty(&cwq->worklist)) {
		struct work_struct *work = list_entry(cwq->worklist.next,
						struct work_struct, entry);
275
		work_func_t f = work->func;
276 277 278 279 280 281 282 283 284 285 286
#ifdef CONFIG_LOCKDEP
		/*
		 * It is permissible to free the struct work_struct
		 * from inside the function that is called from it,
		 * this we need to take into account for lockdep too.
		 * To avoid bogus "held lock freed" warnings as well
		 * as problems when looking into work->lockdep_map,
		 * make a copy and use that here.
		 */
		struct lockdep_map lockdep_map = work->lockdep_map;
#endif
L
Linus Torvalds 已提交
287

O
Oleg Nesterov 已提交
288
		cwq->current_work = work;
L
Linus Torvalds 已提交
289
		list_del_init(cwq->worklist.next);
290
		spin_unlock_irq(&cwq->lock);
L
Linus Torvalds 已提交
291

292
		BUG_ON(get_wq_data(work) != cwq);
O
Oleg Nesterov 已提交
293
		work_clear_pending(work);
294 295
		lock_map_acquire(&cwq->wq->lockdep_map);
		lock_map_acquire(&lockdep_map);
296
		f(work);
297 298
		lock_map_release(&lockdep_map);
		lock_map_release(&cwq->wq->lockdep_map);
L
Linus Torvalds 已提交
299

300 301 302 303
		if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
			printk(KERN_ERR "BUG: workqueue leaked lock or atomic: "
					"%s/0x%08x/%d\n",
					current->comm, preempt_count(),
304
				       	task_pid_nr(current));
305 306 307 308 309 310
			printk(KERN_ERR "    last function: ");
			print_symbol("%s\n", (unsigned long)f);
			debug_show_held_locks(current);
			dump_stack();
		}

311
		spin_lock_irq(&cwq->lock);
O
Oleg Nesterov 已提交
312
		cwq->current_work = NULL;
L
Linus Torvalds 已提交
313 314
	}
	cwq->run_depth--;
315
	spin_unlock_irq(&cwq->lock);
L
Linus Torvalds 已提交
316 317 318 319 320
}

static int worker_thread(void *__cwq)
{
	struct cpu_workqueue_struct *cwq = __cwq;
321
	DEFINE_WAIT(wait);
L
Linus Torvalds 已提交
322

323 324
	if (cwq->wq->freezeable)
		set_freezable();
L
Linus Torvalds 已提交
325 326 327

	set_user_nice(current, -5);

328 329
	for (;;) {
		prepare_to_wait(&cwq->more_work, &wait, TASK_INTERRUPTIBLE);
330 331 332
		if (!freezing(current) &&
		    !kthread_should_stop() &&
		    list_empty(&cwq->worklist))
L
Linus Torvalds 已提交
333
			schedule();
334 335
		finish_wait(&cwq->more_work, &wait);

336 337
		try_to_freeze();

338
		if (kthread_should_stop())
339
			break;
L
Linus Torvalds 已提交
340

341
		run_workqueue(cwq);
L
Linus Torvalds 已提交
342
	}
343

L
Linus Torvalds 已提交
344 345 346
	return 0;
}

O
Oleg Nesterov 已提交
347 348 349 350 351 352 353 354 355 356 357
struct wq_barrier {
	struct work_struct	work;
	struct completion	done;
};

static void wq_barrier_func(struct work_struct *work)
{
	struct wq_barrier *barr = container_of(work, struct wq_barrier, work);
	complete(&barr->done);
}

358
static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
359
			struct wq_barrier *barr, struct list_head *head)
O
Oleg Nesterov 已提交
360 361 362 363 364
{
	INIT_WORK(&barr->work, wq_barrier_func);
	__set_bit(WORK_STRUCT_PENDING, work_data_bits(&barr->work));

	init_completion(&barr->done);
365

366
	insert_work(cwq, &barr->work, head);
O
Oleg Nesterov 已提交
367 368
}

369
static int flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
L
Linus Torvalds 已提交
370
{
371 372
	int active;

L
Linus Torvalds 已提交
373 374 375 376 377 378
	if (cwq->thread == current) {
		/*
		 * Probably keventd trying to flush its own queue. So simply run
		 * it by hand rather than deadlocking.
		 */
		run_workqueue(cwq);
379
		active = 1;
L
Linus Torvalds 已提交
380
	} else {
O
Oleg Nesterov 已提交
381
		struct wq_barrier barr;
L
Linus Torvalds 已提交
382

383
		active = 0;
384 385
		spin_lock_irq(&cwq->lock);
		if (!list_empty(&cwq->worklist) || cwq->current_work != NULL) {
386
			insert_wq_barrier(cwq, &barr, &cwq->worklist);
387 388 389
			active = 1;
		}
		spin_unlock_irq(&cwq->lock);
L
Linus Torvalds 已提交
390

391
		if (active)
392
			wait_for_completion(&barr.done);
L
Linus Torvalds 已提交
393
	}
394 395

	return active;
L
Linus Torvalds 已提交
396 397
}

398
/**
L
Linus Torvalds 已提交
399
 * flush_workqueue - ensure that any scheduled work has run to completion.
400
 * @wq: workqueue to flush
L
Linus Torvalds 已提交
401 402 403 404
 *
 * Forces execution of the workqueue and blocks until its completion.
 * This is typically used in driver shutdown handlers.
 *
O
Oleg Nesterov 已提交
405 406
 * We sleep until all works which were queued on entry have been handled,
 * but we are not livelocked by new incoming ones.
L
Linus Torvalds 已提交
407 408 409 410
 *
 * This function used to run the workqueues itself.  Now we just wait for the
 * helper threads to do it.
 */
411
void flush_workqueue(struct workqueue_struct *wq)
L
Linus Torvalds 已提交
412
{
413
	const struct cpumask *cpu_map = wq_cpu_map(wq);
414
	int cpu;
L
Linus Torvalds 已提交
415

416
	might_sleep();
417 418
	lock_map_acquire(&wq->lockdep_map);
	lock_map_release(&wq->lockdep_map);
419
	for_each_cpu_mask_nr(cpu, *cpu_map)
420
		flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, cpu));
L
Linus Torvalds 已提交
421
}
422
EXPORT_SYMBOL_GPL(flush_workqueue);
L
Linus Torvalds 已提交
423

424 425 426 427
/**
 * flush_work - block until a work_struct's callback has terminated
 * @work: the work which is to be flushed
 *
428 429
 * Returns false if @work has already terminated.
 *
430 431 432 433 434 435 436 437 438 439 440 441 442 443 444
 * It is expected that, prior to calling flush_work(), the caller has
 * arranged for the work to not be requeued, otherwise it doesn't make
 * sense to use this function.
 */
int flush_work(struct work_struct *work)
{
	struct cpu_workqueue_struct *cwq;
	struct list_head *prev;
	struct wq_barrier barr;

	might_sleep();
	cwq = get_wq_data(work);
	if (!cwq)
		return 0;

445 446
	lock_map_acquire(&cwq->wq->lockdep_map);
	lock_map_release(&cwq->wq->lockdep_map);
447

448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474
	prev = NULL;
	spin_lock_irq(&cwq->lock);
	if (!list_empty(&work->entry)) {
		/*
		 * See the comment near try_to_grab_pending()->smp_rmb().
		 * If it was re-queued under us we are not going to wait.
		 */
		smp_rmb();
		if (unlikely(cwq != get_wq_data(work)))
			goto out;
		prev = &work->entry;
	} else {
		if (cwq->current_work != work)
			goto out;
		prev = &cwq->worklist;
	}
	insert_wq_barrier(cwq, &barr, prev->next);
out:
	spin_unlock_irq(&cwq->lock);
	if (!prev)
		return 0;

	wait_for_completion(&barr.done);
	return 1;
}
EXPORT_SYMBOL_GPL(flush_work);

475
/*
476
 * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit,
477 478 479 480 481
 * so this work can't be re-armed in any way.
 */
static int try_to_grab_pending(struct work_struct *work)
{
	struct cpu_workqueue_struct *cwq;
482
	int ret = -1;
483 484

	if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work)))
485
		return 0;
486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514

	/*
	 * The queueing is in progress, or it is already queued. Try to
	 * steal it from ->worklist without clearing WORK_STRUCT_PENDING.
	 */

	cwq = get_wq_data(work);
	if (!cwq)
		return ret;

	spin_lock_irq(&cwq->lock);
	if (!list_empty(&work->entry)) {
		/*
		 * This work is queued, but perhaps we locked the wrong cwq.
		 * In that case we must see the new value after rmb(), see
		 * insert_work()->wmb().
		 */
		smp_rmb();
		if (cwq == get_wq_data(work)) {
			list_del_init(&work->entry);
			ret = 1;
		}
	}
	spin_unlock_irq(&cwq->lock);

	return ret;
}

static void wait_on_cpu_work(struct cpu_workqueue_struct *cwq,
O
Oleg Nesterov 已提交
515 516 517 518 519 520 521
				struct work_struct *work)
{
	struct wq_barrier barr;
	int running = 0;

	spin_lock_irq(&cwq->lock);
	if (unlikely(cwq->current_work == work)) {
522
		insert_wq_barrier(cwq, &barr, cwq->worklist.next);
O
Oleg Nesterov 已提交
523 524 525 526
		running = 1;
	}
	spin_unlock_irq(&cwq->lock);

527
	if (unlikely(running))
O
Oleg Nesterov 已提交
528 529 530
		wait_for_completion(&barr.done);
}

531
static void wait_on_work(struct work_struct *work)
O
Oleg Nesterov 已提交
532 533
{
	struct cpu_workqueue_struct *cwq;
534
	struct workqueue_struct *wq;
535
	const struct cpumask *cpu_map;
536
	int cpu;
O
Oleg Nesterov 已提交
537

538 539
	might_sleep();

540 541
	lock_map_acquire(&work->lockdep_map);
	lock_map_release(&work->lockdep_map);
542

O
Oleg Nesterov 已提交
543 544
	cwq = get_wq_data(work);
	if (!cwq)
545
		return;
O
Oleg Nesterov 已提交
546

547 548 549
	wq = cwq->wq;
	cpu_map = wq_cpu_map(wq);

550
	for_each_cpu_mask_nr(cpu, *cpu_map)
551 552 553
		wait_on_cpu_work(per_cpu_ptr(wq->cpu_wq, cpu), work);
}

554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569
static int __cancel_work_timer(struct work_struct *work,
				struct timer_list* timer)
{
	int ret;

	do {
		ret = (timer && likely(del_timer(timer)));
		if (!ret)
			ret = try_to_grab_pending(work);
		wait_on_work(work);
	} while (unlikely(ret < 0));

	work_clear_pending(work);
	return ret;
}

570 571 572 573
/**
 * cancel_work_sync - block until a work_struct's callback has terminated
 * @work: the work which is to be flushed
 *
574 575
 * Returns true if @work was pending.
 *
576 577 578 579 580 581 582 583 584 585 586 587 588 589 590
 * cancel_work_sync() will cancel the work if it is queued. If the work's
 * callback appears to be running, cancel_work_sync() will block until it
 * has completed.
 *
 * It is possible to use this function if the work re-queues itself. It can
 * cancel the work even if it migrates to another workqueue, however in that
 * case it only guarantees that work->func() has completed on the last queued
 * workqueue.
 *
 * cancel_work_sync(&delayed_work->work) should be used only if ->timer is not
 * pending, otherwise it goes into a busy-wait loop until the timer expires.
 *
 * The caller must ensure that workqueue_struct on which this work was last
 * queued can't be destroyed before this function returns.
 */
591
int cancel_work_sync(struct work_struct *work)
592
{
593
	return __cancel_work_timer(work, NULL);
O
Oleg Nesterov 已提交
594
}
595
EXPORT_SYMBOL_GPL(cancel_work_sync);
O
Oleg Nesterov 已提交
596

597
/**
598
 * cancel_delayed_work_sync - reliably kill off a delayed work.
599 600
 * @dwork: the delayed work struct
 *
601 602
 * Returns true if @dwork was pending.
 *
603 604 605
 * It is possible to use this function if @dwork rearms itself via queue_work()
 * or queue_delayed_work(). See also the comment for cancel_work_sync().
 */
606
int cancel_delayed_work_sync(struct delayed_work *dwork)
607
{
608
	return __cancel_work_timer(&dwork->work, &dwork->timer);
609
}
610
EXPORT_SYMBOL(cancel_delayed_work_sync);
L
Linus Torvalds 已提交
611

612
static struct workqueue_struct *keventd_wq __read_mostly;
L
Linus Torvalds 已提交
613

614 615 616 617 618 619
/**
 * schedule_work - put work task in global workqueue
 * @work: job to be done
 *
 * This puts a job in the kernel-global workqueue.
 */
620
int schedule_work(struct work_struct *work)
L
Linus Torvalds 已提交
621 622 623
{
	return queue_work(keventd_wq, work);
}
624
EXPORT_SYMBOL(schedule_work);
L
Linus Torvalds 已提交
625

626 627 628 629 630 631 632 633 634 635 636 637 638
/*
 * schedule_work_on - put work task on a specific cpu
 * @cpu: cpu to put the work task on
 * @work: job to be done
 *
 * This puts a job on a specific cpu
 */
int schedule_work_on(int cpu, struct work_struct *work)
{
	return queue_work_on(cpu, keventd_wq, work);
}
EXPORT_SYMBOL(schedule_work_on);

639 640
/**
 * schedule_delayed_work - put work task in global workqueue after delay
641 642
 * @dwork: job to be done
 * @delay: number of jiffies to wait or 0 for immediate execution
643 644 645 646
 *
 * After waiting for a given time this puts a job in the kernel-global
 * workqueue.
 */
647
int schedule_delayed_work(struct delayed_work *dwork,
648
					unsigned long delay)
L
Linus Torvalds 已提交
649
{
650
	return queue_delayed_work(keventd_wq, dwork, delay);
L
Linus Torvalds 已提交
651
}
652
EXPORT_SYMBOL(schedule_delayed_work);
L
Linus Torvalds 已提交
653

654 655 656
/**
 * schedule_delayed_work_on - queue work in global workqueue on CPU after delay
 * @cpu: cpu to use
657
 * @dwork: job to be done
658 659 660 661 662
 * @delay: number of jiffies to wait
 *
 * After waiting for a given time this puts a job in the kernel-global
 * workqueue on the specified CPU.
 */
L
Linus Torvalds 已提交
663
int schedule_delayed_work_on(int cpu,
664
			struct delayed_work *dwork, unsigned long delay)
L
Linus Torvalds 已提交
665
{
666
	return queue_delayed_work_on(cpu, keventd_wq, dwork, delay);
L
Linus Torvalds 已提交
667
}
668
EXPORT_SYMBOL(schedule_delayed_work_on);
L
Linus Torvalds 已提交
669

670 671 672 673 674 675 676 677 678
/**
 * schedule_on_each_cpu - call a function on each online CPU from keventd
 * @func: the function to call
 *
 * Returns zero on success.
 * Returns -ve errno on failure.
 *
 * schedule_on_each_cpu() is very slow.
 */
679
int schedule_on_each_cpu(work_func_t func)
680 681
{
	int cpu;
682
	struct work_struct *works;
683

684 685
	works = alloc_percpu(struct work_struct);
	if (!works)
686
		return -ENOMEM;
687

688
	get_online_cpus();
689
	for_each_online_cpu(cpu) {
690 691 692
		struct work_struct *work = per_cpu_ptr(works, cpu);

		INIT_WORK(work, func);
693
		schedule_work_on(cpu, work);
694
	}
695 696
	for_each_online_cpu(cpu)
		flush_work(per_cpu_ptr(works, cpu));
697
	put_online_cpus();
698
	free_percpu(works);
699 700 701
	return 0;
}

L
Linus Torvalds 已提交
702 703 704 705
void flush_scheduled_work(void)
{
	flush_workqueue(keventd_wq);
}
706
EXPORT_SYMBOL(flush_scheduled_work);
L
Linus Torvalds 已提交
707

708 709 710 711 712 713 714 715 716 717 718 719
/**
 * execute_in_process_context - reliably execute the routine with user context
 * @fn:		the function to execute
 * @ew:		guaranteed storage for the execute work structure (must
 *		be available when the work executes)
 *
 * Executes the function immediately if process context is available,
 * otherwise schedules the function for delayed execution.
 *
 * Returns:	0 - function was executed
 *		1 - function was scheduled for execution
 */
720
int execute_in_process_context(work_func_t fn, struct execute_work *ew)
721 722
{
	if (!in_interrupt()) {
723
		fn(&ew->work);
724 725 726
		return 0;
	}

727
	INIT_WORK(&ew->work, fn);
728 729 730 731 732 733
	schedule_work(&ew->work);

	return 1;
}
EXPORT_SYMBOL_GPL(execute_in_process_context);

L
Linus Torvalds 已提交
734 735 736 737 738 739 740 741
int keventd_up(void)
{
	return keventd_wq != NULL;
}

int current_is_keventd(void)
{
	struct cpu_workqueue_struct *cwq;
H
Hugh Dickins 已提交
742
	int cpu = raw_smp_processor_id(); /* preempt-safe: keventd is per-cpu */
L
Linus Torvalds 已提交
743 744 745 746
	int ret = 0;

	BUG_ON(!keventd_wq);

747
	cwq = per_cpu_ptr(keventd_wq->cpu_wq, cpu);
L
Linus Torvalds 已提交
748 749 750 751 752 753 754
	if (current == cwq->thread)
		ret = 1;

	return ret;

}

755 756
static struct cpu_workqueue_struct *
init_cpu_workqueue(struct workqueue_struct *wq, int cpu)
L
Linus Torvalds 已提交
757
{
758
	struct cpu_workqueue_struct *cwq = per_cpu_ptr(wq->cpu_wq, cpu);
L
Linus Torvalds 已提交
759

760 761 762 763 764 765
	cwq->wq = wq;
	spin_lock_init(&cwq->lock);
	INIT_LIST_HEAD(&cwq->worklist);
	init_waitqueue_head(&cwq->more_work);

	return cwq;
L
Linus Torvalds 已提交
766 767
}

768 769
static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
{
770
	struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
771
	struct workqueue_struct *wq = cwq->wq;
772
	const char *fmt = is_wq_single_threaded(wq) ? "%s" : "%s/%d";
773 774 775 776 777 778 779 780 781 782 783 784 785
	struct task_struct *p;

	p = kthread_create(worker_thread, cwq, fmt, wq->name, cpu);
	/*
	 * Nobody can add the work_struct to this cwq,
	 *	if (caller is __create_workqueue)
	 *		nobody should see this wq
	 *	else // caller is CPU_UP_PREPARE
	 *		cpu is not on cpu_online_map
	 * so we can abort safely.
	 */
	if (IS_ERR(p))
		return PTR_ERR(p);
786 787
	if (cwq->wq->rt)
		sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
788 789 790 791 792
	cwq->thread = p;

	return 0;
}

793 794 795 796 797 798 799 800 801 802 803
static void start_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
{
	struct task_struct *p = cwq->thread;

	if (p != NULL) {
		if (cpu >= 0)
			kthread_bind(p, cpu);
		wake_up_process(p);
	}
}

804 805 806
struct workqueue_struct *__create_workqueue_key(const char *name,
						int singlethread,
						int freezeable,
807
						int rt,
808 809
						struct lock_class_key *key,
						const char *lock_name)
L
Linus Torvalds 已提交
810 811
{
	struct workqueue_struct *wq;
812 813
	struct cpu_workqueue_struct *cwq;
	int err = 0, cpu;
L
Linus Torvalds 已提交
814

815 816 817 818 819 820 821 822 823 824 825
	wq = kzalloc(sizeof(*wq), GFP_KERNEL);
	if (!wq)
		return NULL;

	wq->cpu_wq = alloc_percpu(struct cpu_workqueue_struct);
	if (!wq->cpu_wq) {
		kfree(wq);
		return NULL;
	}

	wq->name = name;
826
	lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
827
	wq->singlethread = singlethread;
828
	wq->freezeable = freezeable;
829
	wq->rt = rt;
830
	INIT_LIST_HEAD(&wq->list);
831 832 833 834

	if (singlethread) {
		cwq = init_cpu_workqueue(wq, singlethread_cpu);
		err = create_workqueue_thread(cwq, singlethread_cpu);
835
		start_workqueue_thread(cwq, -1);
836
	} else {
837
		cpu_maps_update_begin();
838 839 840 841 842 843
		/*
		 * We must place this wq on list even if the code below fails.
		 * cpu_down(cpu) can remove cpu from cpu_populated_map before
		 * destroy_workqueue() takes the lock, in that case we leak
		 * cwq[cpu]->thread.
		 */
844
		spin_lock(&workqueue_lock);
845
		list_add(&wq->list, &workqueues);
846
		spin_unlock(&workqueue_lock);
847 848 849 850 851 852
		/*
		 * We must initialize cwqs for each possible cpu even if we
		 * are going to call destroy_workqueue() finally. Otherwise
		 * cpu_up() can hit the uninitialized cwq once we drop the
		 * lock.
		 */
853 854 855 856 857
		for_each_possible_cpu(cpu) {
			cwq = init_cpu_workqueue(wq, cpu);
			if (err || !cpu_online(cpu))
				continue;
			err = create_workqueue_thread(cwq, cpu);
858
			start_workqueue_thread(cwq, cpu);
L
Linus Torvalds 已提交
859
		}
860
		cpu_maps_update_done();
861 862 863 864 865 866 867 868
	}

	if (err) {
		destroy_workqueue(wq);
		wq = NULL;
	}
	return wq;
}
869
EXPORT_SYMBOL_GPL(__create_workqueue_key);
L
Linus Torvalds 已提交
870

871
static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
872
{
873
	/*
874 875
	 * Our caller is either destroy_workqueue() or CPU_POST_DEAD,
	 * cpu_add_remove_lock protects cwq->thread.
876 877 878
	 */
	if (cwq->thread == NULL)
		return;
879

880 881
	lock_map_acquire(&cwq->wq->lockdep_map);
	lock_map_release(&cwq->wq->lockdep_map);
882

O
Oleg Nesterov 已提交
883
	flush_cpu_workqueue(cwq);
884
	/*
885
	 * If the caller is CPU_POST_DEAD and cwq->worklist was not empty,
O
Oleg Nesterov 已提交
886 887 888
	 * a concurrent flush_workqueue() can insert a barrier after us.
	 * However, in that case run_workqueue() won't return and check
	 * kthread_should_stop() until it flushes all work_struct's.
889 890 891 892 893 894 895
	 * When ->worklist becomes empty it is safe to exit because no
	 * more work_structs can be queued on this cwq: flush_workqueue
	 * checks list_empty(), and a "normal" queue_work() can't use
	 * a dead CPU.
	 */
	kthread_stop(cwq->thread);
	cwq->thread = NULL;
896 897 898 899 900 901 902 903 904 905
}

/**
 * destroy_workqueue - safely terminate a workqueue
 * @wq: target workqueue
 *
 * Safely destroy a workqueue. All work currently pending will be done first.
 */
void destroy_workqueue(struct workqueue_struct *wq)
{
906
	const struct cpumask *cpu_map = wq_cpu_map(wq);
907
	int cpu;
908

909
	cpu_maps_update_begin();
910
	spin_lock(&workqueue_lock);
911
	list_del(&wq->list);
912
	spin_unlock(&workqueue_lock);
913

914
	for_each_cpu_mask_nr(cpu, *cpu_map)
915
		cleanup_workqueue_thread(per_cpu_ptr(wq->cpu_wq, cpu));
916
 	cpu_maps_update_done();
917

918 919 920 921 922 923 924 925 926 927 928 929
	free_percpu(wq->cpu_wq);
	kfree(wq);
}
EXPORT_SYMBOL_GPL(destroy_workqueue);

static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
						unsigned long action,
						void *hcpu)
{
	unsigned int cpu = (unsigned long)hcpu;
	struct cpu_workqueue_struct *cwq;
	struct workqueue_struct *wq;
930
	int ret = NOTIFY_OK;
931

932 933
	action &= ~CPU_TASKS_FROZEN;

934 935
	switch (action) {
	case CPU_UP_PREPARE:
936
		cpumask_set_cpu(cpu, cpu_populated_map);
937
	}
938
undo:
939 940 941 942 943 944 945
	list_for_each_entry(wq, &workqueues, list) {
		cwq = per_cpu_ptr(wq->cpu_wq, cpu);

		switch (action) {
		case CPU_UP_PREPARE:
			if (!create_workqueue_thread(cwq, cpu))
				break;
946 947
			printk(KERN_ERR "workqueue [%s] for %i failed\n",
				wq->name, cpu);
948 949 950
			action = CPU_UP_CANCELED;
			ret = NOTIFY_BAD;
			goto undo;
951 952

		case CPU_ONLINE:
953
			start_workqueue_thread(cwq, cpu);
954 955 956
			break;

		case CPU_UP_CANCELED:
957
			start_workqueue_thread(cwq, -1);
958
		case CPU_POST_DEAD:
959
			cleanup_workqueue_thread(cwq);
960 961
			break;
		}
L
Linus Torvalds 已提交
962 963
	}

964 965
	switch (action) {
	case CPU_UP_CANCELED:
966
	case CPU_POST_DEAD:
967
		cpumask_clear_cpu(cpu, cpu_populated_map);
968 969
	}

970
	return ret;
L
Linus Torvalds 已提交
971 972
}

973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017
#ifdef CONFIG_SMP
struct work_for_cpu {
	struct work_struct work;
	long (*fn)(void *);
	void *arg;
	long ret;
};

static void do_work_for_cpu(struct work_struct *w)
{
	struct work_for_cpu *wfc = container_of(w, struct work_for_cpu, work);

	wfc->ret = wfc->fn(wfc->arg);
}

/**
 * work_on_cpu - run a function in user context on a particular cpu
 * @cpu: the cpu to run on
 * @fn: the function to run
 * @arg: the function arg
 *
 * This will return -EINVAL in the cpu is not online, or the return value
 * of @fn otherwise.
 */
long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
{
	struct work_for_cpu wfc;

	INIT_WORK(&wfc.work, do_work_for_cpu);
	wfc.fn = fn;
	wfc.arg = arg;
	get_online_cpus();
	if (unlikely(!cpu_online(cpu)))
		wfc.ret = -EINVAL;
	else {
		schedule_work_on(cpu, &wfc.work);
		flush_work(&wfc.work);
	}
	put_online_cpus();

	return wfc.ret;
}
EXPORT_SYMBOL_GPL(work_on_cpu);
#endif /* CONFIG_SMP */

1018
void __init init_workqueues(void)
L
Linus Torvalds 已提交
1019
{
1020 1021 1022 1023 1024
	alloc_cpumask_var(&cpu_populated_map, GFP_KERNEL);

	cpumask_copy(cpu_populated_map, cpu_online_mask);
	singlethread_cpu = cpumask_first(cpu_possible_mask);
	cpu_singlethread_map = cpumask_of(singlethread_cpu);
L
Linus Torvalds 已提交
1025 1026 1027 1028
	hotcpu_notifier(workqueue_cpu_callback, 0);
	keventd_wq = create_workqueue("events");
	BUG_ON(!keventd_wq);
}