workqueue.c 30.7 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11
/*
 * linux/kernel/workqueue.c
 *
 * Generic mechanism for defining kernel helper threads for running
 * arbitrary tasks in process context.
 *
 * Started by Ingo Molnar, Copyright (C) 2002
 *
 * Derived from the taskqueue/keventd code by:
 *
 *   David Woodhouse <dwmw2@infradead.org>
12
 *   Andrew Morton
L
Linus Torvalds 已提交
13 14
 *   Kai Petzke <wpp@marie.physik.tu-berlin.de>
 *   Theodore Ts'o <tytso@mit.edu>
15
 *
C
Christoph Lameter 已提交
16
 * Made to use alloc_percpu by Christoph Lameter.
L
Linus Torvalds 已提交
17 18 19 20 21 22 23 24 25 26 27 28 29
 */

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/init.h>
#include <linux/signal.h>
#include <linux/completion.h>
#include <linux/workqueue.h>
#include <linux/slab.h>
#include <linux/cpu.h>
#include <linux/notifier.h>
#include <linux/kthread.h>
30
#include <linux/hardirq.h>
31
#include <linux/mempolicy.h>
32
#include <linux/freezer.h>
33 34
#include <linux/kallsyms.h>
#include <linux/debug_locks.h>
35
#include <linux/lockdep.h>
36 37
#define CREATE_TRACE_POINTS
#include <trace/events/workqueue.h>
L
Linus Torvalds 已提交
38

T
Tejun Heo 已提交
39 40 41 42 43 44 45 46 47 48
/*
 * Structure fields follow one of the following exclusion rules.
 *
 * I: Set during initialization and read-only afterwards.
 *
 * L: cwq->lock protected.  Access with cwq->lock held.
 *
 * W: workqueue_lock protected.
 */

L
Linus Torvalds 已提交
49
/*
50 51
 * The per-CPU workqueue (if single thread, we always use the first
 * possible cpu).
L
Linus Torvalds 已提交
52 53 54 55 56 57 58
 */
struct cpu_workqueue_struct {

	spinlock_t lock;

	struct list_head worklist;
	wait_queue_head_t more_work;
59
	struct work_struct *current_work;
L
Linus Torvalds 已提交
60

T
Tejun Heo 已提交
61 62
	struct workqueue_struct *wq;		/* I: the owning workqueue */
	struct task_struct	*thread;
L
Linus Torvalds 已提交
63 64 65 66 67 68 69
} ____cacheline_aligned;

/*
 * The externally visible workqueue abstraction is an array of
 * per-CPU workqueues:
 */
struct workqueue_struct {
70
	unsigned int		flags;		/* I: WQ_* flags */
T
Tejun Heo 已提交
71 72 73
	struct cpu_workqueue_struct *cpu_wq;	/* I: cwq's */
	struct list_head	list;		/* W: list of all workqueues */
	const char		*name;		/* I: workqueue name */
74
#ifdef CONFIG_LOCKDEP
T
Tejun Heo 已提交
75
	struct lockdep_map	lockdep_map;
76
#endif
L
Linus Torvalds 已提交
77 78
};

79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
#ifdef CONFIG_DEBUG_OBJECTS_WORK

static struct debug_obj_descr work_debug_descr;

/*
 * fixup_init is called when:
 * - an active object is initialized
 */
static int work_fixup_init(void *addr, enum debug_obj_state state)
{
	struct work_struct *work = addr;

	switch (state) {
	case ODEBUG_STATE_ACTIVE:
		cancel_work_sync(work);
		debug_object_init(work, &work_debug_descr);
		return 1;
	default:
		return 0;
	}
}

/*
 * fixup_activate is called when:
 * - an active object is activated
 * - an unknown object is activated (might be a statically initialized object)
 */
static int work_fixup_activate(void *addr, enum debug_obj_state state)
{
	struct work_struct *work = addr;

	switch (state) {

	case ODEBUG_STATE_NOTAVAILABLE:
		/*
		 * This is not really a fixup. The work struct was
		 * statically initialized. We just make sure that it
		 * is tracked in the object tracker.
		 */
118
		if (test_bit(WORK_STRUCT_STATIC_BIT, work_data_bits(work))) {
119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188
			debug_object_init(work, &work_debug_descr);
			debug_object_activate(work, &work_debug_descr);
			return 0;
		}
		WARN_ON_ONCE(1);
		return 0;

	case ODEBUG_STATE_ACTIVE:
		WARN_ON(1);

	default:
		return 0;
	}
}

/*
 * fixup_free is called when:
 * - an active object is freed
 */
static int work_fixup_free(void *addr, enum debug_obj_state state)
{
	struct work_struct *work = addr;

	switch (state) {
	case ODEBUG_STATE_ACTIVE:
		cancel_work_sync(work);
		debug_object_free(work, &work_debug_descr);
		return 1;
	default:
		return 0;
	}
}

static struct debug_obj_descr work_debug_descr = {
	.name		= "work_struct",
	.fixup_init	= work_fixup_init,
	.fixup_activate	= work_fixup_activate,
	.fixup_free	= work_fixup_free,
};

static inline void debug_work_activate(struct work_struct *work)
{
	debug_object_activate(work, &work_debug_descr);
}

static inline void debug_work_deactivate(struct work_struct *work)
{
	debug_object_deactivate(work, &work_debug_descr);
}

void __init_work(struct work_struct *work, int onstack)
{
	if (onstack)
		debug_object_init_on_stack(work, &work_debug_descr);
	else
		debug_object_init(work, &work_debug_descr);
}
EXPORT_SYMBOL_GPL(__init_work);

void destroy_work_on_stack(struct work_struct *work)
{
	debug_object_free(work, &work_debug_descr);
}
EXPORT_SYMBOL_GPL(destroy_work_on_stack);

#else
static inline void debug_work_activate(struct work_struct *work) { }
static inline void debug_work_deactivate(struct work_struct *work) { }
#endif

189 190
/* Serializes the accesses to the list of workqueues. */
static DEFINE_SPINLOCK(workqueue_lock);
L
Linus Torvalds 已提交
191 192
static LIST_HEAD(workqueues);

193
static int singlethread_cpu __read_mostly;
194
static const struct cpumask *cpu_singlethread_map __read_mostly;
195 196 197 198 199 200 201
/*
 * _cpu_down() first removes CPU from cpu_online_map, then CPU_DEAD
 * flushes cwq->worklist. This means that flush_workqueue/wait_on_work
 * which comes in between can't use for_each_online_cpu(). We could
 * use cpu_possible_map, the cpumask below is more a documentation
 * than optimization.
 */
202
static cpumask_var_t cpu_populated_map __read_mostly;
203

L
Linus Torvalds 已提交
204
/* If it's single threaded, it isn't in the list of workqueues. */
205
static inline bool is_wq_single_threaded(struct workqueue_struct *wq)
L
Linus Torvalds 已提交
206
{
207
	return wq->flags & WQ_SINGLE_THREAD;
L
Linus Torvalds 已提交
208 209
}

210
static const struct cpumask *wq_cpu_map(struct workqueue_struct *wq)
211
{
212
	return is_wq_single_threaded(wq)
213
		? cpu_singlethread_map : cpu_populated_map;
214 215
}

T
Tejun Heo 已提交
216 217
static struct cpu_workqueue_struct *get_cwq(unsigned int cpu,
					    struct workqueue_struct *wq)
218
{
219
	if (unlikely(is_wq_single_threaded(wq)))
220 221 222 223
		cpu = singlethread_cpu;
	return per_cpu_ptr(wq->cpu_wq, cpu);
}

224 225 226 227
/*
 * Set the workqueue on which a work item is to be run
 * - Must *only* be called if the pending flag is set
 */
228
static inline void set_wq_data(struct work_struct *work,
T
Tejun Heo 已提交
229 230
			       struct cpu_workqueue_struct *cwq,
			       unsigned long extra_flags)
231
{
232
	BUG_ON(!work_pending(work));
233

T
Tejun Heo 已提交
234
	atomic_long_set(&work->data, (unsigned long)cwq | work_static(work) |
235
			WORK_STRUCT_PENDING | extra_flags);
236 237
}

238 239 240 241 242
/*
 * Clear WORK_STRUCT_PENDING and the workqueue on which it was queued.
 */
static inline void clear_wq_data(struct work_struct *work)
{
T
Tejun Heo 已提交
243
	atomic_long_set(&work->data, work_static(work));
244 245
}

246 247
static inline
struct cpu_workqueue_struct *get_wq_data(struct work_struct *work)
248
{
249
	return (void *) (atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK);
250 251
}

T
Tejun Heo 已提交
252 253 254 255 256 257 258 259 260 261 262 263
/**
 * insert_work - insert a work into cwq
 * @cwq: cwq @work belongs to
 * @work: work to insert
 * @head: insertion point
 * @extra_flags: extra WORK_STRUCT_* flags to set
 *
 * Insert @work into @cwq after @head.
 *
 * CONTEXT:
 * spin_lock_irq(cwq->lock).
 */
O
Oleg Nesterov 已提交
264
static void insert_work(struct cpu_workqueue_struct *cwq,
T
Tejun Heo 已提交
265 266
			struct work_struct *work, struct list_head *head,
			unsigned int extra_flags)
O
Oleg Nesterov 已提交
267
{
268 269
	trace_workqueue_insertion(cwq->thread, work);

T
Tejun Heo 已提交
270 271 272
	/* we own @work, set data and link */
	set_wq_data(work, cwq, extra_flags);

273 274 275 276 277
	/*
	 * Ensure that we get the right work->data if we see the
	 * result of list_add() below, see try_to_grab_pending().
	 */
	smp_wmb();
T
Tejun Heo 已提交
278

279
	list_add_tail(&work->entry, head);
O
Oleg Nesterov 已提交
280 281 282
	wake_up(&cwq->more_work);
}

T
Tejun Heo 已提交
283
static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
L
Linus Torvalds 已提交
284 285
			 struct work_struct *work)
{
T
Tejun Heo 已提交
286
	struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
L
Linus Torvalds 已提交
287 288
	unsigned long flags;

289
	debug_work_activate(work);
L
Linus Torvalds 已提交
290
	spin_lock_irqsave(&cwq->lock, flags);
T
Tejun Heo 已提交
291 292
	BUG_ON(!list_empty(&work->entry));
	insert_work(cwq, work, &cwq->worklist, 0);
L
Linus Torvalds 已提交
293 294 295
	spin_unlock_irqrestore(&cwq->lock, flags);
}

296 297 298 299 300
/**
 * queue_work - queue work on a workqueue
 * @wq: workqueue to use
 * @work: work to queue
 *
A
Alan Stern 已提交
301
 * Returns 0 if @work was already on a queue, non-zero otherwise.
L
Linus Torvalds 已提交
302
 *
303 304
 * We queue the work to the CPU on which it was submitted, but if the CPU dies
 * it can be processed by another CPU.
L
Linus Torvalds 已提交
305
 */
306
int queue_work(struct workqueue_struct *wq, struct work_struct *work)
L
Linus Torvalds 已提交
307
{
308 309 310 311 312
	int ret;

	ret = queue_work_on(get_cpu(), wq, work);
	put_cpu();

L
Linus Torvalds 已提交
313 314
	return ret;
}
315
EXPORT_SYMBOL_GPL(queue_work);
L
Linus Torvalds 已提交
316

317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332
/**
 * queue_work_on - queue work on specific cpu
 * @cpu: CPU number to execute work on
 * @wq: workqueue to use
 * @work: work to queue
 *
 * Returns 0 if @work was already on a queue, non-zero otherwise.
 *
 * We queue the work to a specific CPU, the caller must ensure it
 * can't go away.
 */
int
queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work)
{
	int ret = 0;

333
	if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
T
Tejun Heo 已提交
334
		__queue_work(cpu, wq, work);
335 336 337 338 339 340
		ret = 1;
	}
	return ret;
}
EXPORT_SYMBOL_GPL(queue_work_on);

341
static void delayed_work_timer_fn(unsigned long __data)
L
Linus Torvalds 已提交
342
{
343
	struct delayed_work *dwork = (struct delayed_work *)__data;
344
	struct cpu_workqueue_struct *cwq = get_wq_data(&dwork->work);
L
Linus Torvalds 已提交
345

T
Tejun Heo 已提交
346
	__queue_work(smp_processor_id(), cwq->wq, &dwork->work);
L
Linus Torvalds 已提交
347 348
}

349 350 351
/**
 * queue_delayed_work - queue work on a workqueue after delay
 * @wq: workqueue to use
352
 * @dwork: delayable work to queue
353 354
 * @delay: number of jiffies to wait before queueing
 *
A
Alan Stern 已提交
355
 * Returns 0 if @work was already on a queue, non-zero otherwise.
356
 */
357
int queue_delayed_work(struct workqueue_struct *wq,
358
			struct delayed_work *dwork, unsigned long delay)
L
Linus Torvalds 已提交
359
{
360
	if (delay == 0)
361
		return queue_work(wq, &dwork->work);
L
Linus Torvalds 已提交
362

363
	return queue_delayed_work_on(-1, wq, dwork, delay);
L
Linus Torvalds 已提交
364
}
365
EXPORT_SYMBOL_GPL(queue_delayed_work);
L
Linus Torvalds 已提交
366

367 368 369 370
/**
 * queue_delayed_work_on - queue work on specific CPU after delay
 * @cpu: CPU number to execute work on
 * @wq: workqueue to use
371
 * @dwork: work to queue
372 373
 * @delay: number of jiffies to wait before queueing
 *
A
Alan Stern 已提交
374
 * Returns 0 if @work was already on a queue, non-zero otherwise.
375
 */
376
int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
377
			struct delayed_work *dwork, unsigned long delay)
378 379
{
	int ret = 0;
380 381
	struct timer_list *timer = &dwork->timer;
	struct work_struct *work = &dwork->work;
382

383
	if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
384 385 386
		BUG_ON(timer_pending(timer));
		BUG_ON(!list_empty(&work->entry));

387 388
		timer_stats_timer_set_start_info(&dwork->timer);

389
		/* This stores cwq for the moment, for the timer_fn */
T
Tejun Heo 已提交
390
		set_wq_data(work, get_cwq(raw_smp_processor_id(), wq), 0);
391
		timer->expires = jiffies + delay;
392
		timer->data = (unsigned long)dwork;
393
		timer->function = delayed_work_timer_fn;
394 395 396 397 398

		if (unlikely(cpu >= 0))
			add_timer_on(timer, cpu);
		else
			add_timer(timer);
399 400 401 402
		ret = 1;
	}
	return ret;
}
403
EXPORT_SYMBOL_GPL(queue_delayed_work_on);
L
Linus Torvalds 已提交
404

405
static void run_workqueue(struct cpu_workqueue_struct *cwq)
L
Linus Torvalds 已提交
406
{
407
	spin_lock_irq(&cwq->lock);
L
Linus Torvalds 已提交
408 409 410
	while (!list_empty(&cwq->worklist)) {
		struct work_struct *work = list_entry(cwq->worklist.next,
						struct work_struct, entry);
411
		work_func_t f = work->func;
412 413 414 415 416 417 418 419 420 421 422
#ifdef CONFIG_LOCKDEP
		/*
		 * It is permissible to free the struct work_struct
		 * from inside the function that is called from it,
		 * this we need to take into account for lockdep too.
		 * To avoid bogus "held lock freed" warnings as well
		 * as problems when looking into work->lockdep_map,
		 * make a copy and use that here.
		 */
		struct lockdep_map lockdep_map = work->lockdep_map;
#endif
423
		trace_workqueue_execution(cwq->thread, work);
424
		debug_work_deactivate(work);
O
Oleg Nesterov 已提交
425
		cwq->current_work = work;
L
Linus Torvalds 已提交
426
		list_del_init(cwq->worklist.next);
427
		spin_unlock_irq(&cwq->lock);
L
Linus Torvalds 已提交
428

429
		BUG_ON(get_wq_data(work) != cwq);
O
Oleg Nesterov 已提交
430
		work_clear_pending(work);
431 432
		lock_map_acquire(&cwq->wq->lockdep_map);
		lock_map_acquire(&lockdep_map);
433
		f(work);
434 435
		lock_map_release(&lockdep_map);
		lock_map_release(&cwq->wq->lockdep_map);
L
Linus Torvalds 已提交
436

437 438 439 440
		if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
			printk(KERN_ERR "BUG: workqueue leaked lock or atomic: "
					"%s/0x%08x/%d\n",
					current->comm, preempt_count(),
441
				       	task_pid_nr(current));
442 443 444 445 446 447
			printk(KERN_ERR "    last function: ");
			print_symbol("%s\n", (unsigned long)f);
			debug_show_held_locks(current);
			dump_stack();
		}

448
		spin_lock_irq(&cwq->lock);
O
Oleg Nesterov 已提交
449
		cwq->current_work = NULL;
L
Linus Torvalds 已提交
450
	}
451
	spin_unlock_irq(&cwq->lock);
L
Linus Torvalds 已提交
452 453
}

T
Tejun Heo 已提交
454 455 456 457 458 459
/**
 * worker_thread - the worker thread function
 * @__cwq: cwq to serve
 *
 * The cwq worker thread function.
 */
L
Linus Torvalds 已提交
460 461 462
static int worker_thread(void *__cwq)
{
	struct cpu_workqueue_struct *cwq = __cwq;
463
	DEFINE_WAIT(wait);
L
Linus Torvalds 已提交
464

465
	if (cwq->wq->flags & WQ_FREEZEABLE)
466
		set_freezable();
L
Linus Torvalds 已提交
467

468 469
	for (;;) {
		prepare_to_wait(&cwq->more_work, &wait, TASK_INTERRUPTIBLE);
470 471 472
		if (!freezing(current) &&
		    !kthread_should_stop() &&
		    list_empty(&cwq->worklist))
L
Linus Torvalds 已提交
473
			schedule();
474 475
		finish_wait(&cwq->more_work, &wait);

476 477
		try_to_freeze();

478
		if (kthread_should_stop())
479
			break;
L
Linus Torvalds 已提交
480

481
		run_workqueue(cwq);
L
Linus Torvalds 已提交
482
	}
483

L
Linus Torvalds 已提交
484 485 486
	return 0;
}

O
Oleg Nesterov 已提交
487 488 489 490 491 492 493 494 495 496 497
struct wq_barrier {
	struct work_struct	work;
	struct completion	done;
};

static void wq_barrier_func(struct work_struct *work)
{
	struct wq_barrier *barr = container_of(work, struct wq_barrier, work);
	complete(&barr->done);
}

T
Tejun Heo 已提交
498 499 500 501 502 503 504 505 506 507 508
/**
 * insert_wq_barrier - insert a barrier work
 * @cwq: cwq to insert barrier into
 * @barr: wq_barrier to insert
 * @head: insertion point
 *
 * Insert barrier @barr into @cwq before @head.
 *
 * CONTEXT:
 * spin_lock_irq(cwq->lock).
 */
509
static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
510
			struct wq_barrier *barr, struct list_head *head)
O
Oleg Nesterov 已提交
511
{
512 513 514 515 516 517 518
	/*
	 * debugobject calls are safe here even with cwq->lock locked
	 * as we know for sure that this will not trigger any of the
	 * checks and call back into the fixup functions where we
	 * might deadlock.
	 */
	INIT_WORK_ON_STACK(&barr->work, wq_barrier_func);
519
	__set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work));
O
Oleg Nesterov 已提交
520
	init_completion(&barr->done);
521

522
	debug_work_activate(&barr->work);
T
Tejun Heo 已提交
523
	insert_work(cwq, &barr->work, head, 0);
O
Oleg Nesterov 已提交
524 525
}

526
static int flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
L
Linus Torvalds 已提交
527
{
528 529
	int active = 0;
	struct wq_barrier barr;
L
Linus Torvalds 已提交
530

531
	WARN_ON(cwq->thread == current);
L
Linus Torvalds 已提交
532

533 534 535 536
	spin_lock_irq(&cwq->lock);
	if (!list_empty(&cwq->worklist) || cwq->current_work != NULL) {
		insert_wq_barrier(cwq, &barr, &cwq->worklist);
		active = 1;
L
Linus Torvalds 已提交
537
	}
538 539
	spin_unlock_irq(&cwq->lock);

540
	if (active) {
541
		wait_for_completion(&barr.done);
542 543
		destroy_work_on_stack(&barr.work);
	}
544 545

	return active;
L
Linus Torvalds 已提交
546 547
}

548
/**
L
Linus Torvalds 已提交
549
 * flush_workqueue - ensure that any scheduled work has run to completion.
550
 * @wq: workqueue to flush
L
Linus Torvalds 已提交
551 552 553 554
 *
 * Forces execution of the workqueue and blocks until its completion.
 * This is typically used in driver shutdown handlers.
 *
O
Oleg Nesterov 已提交
555 556
 * We sleep until all works which were queued on entry have been handled,
 * but we are not livelocked by new incoming ones.
L
Linus Torvalds 已提交
557
 */
558
void flush_workqueue(struct workqueue_struct *wq)
L
Linus Torvalds 已提交
559
{
560
	const struct cpumask *cpu_map = wq_cpu_map(wq);
561
	int cpu;
L
Linus Torvalds 已提交
562

563
	might_sleep();
564 565
	lock_map_acquire(&wq->lockdep_map);
	lock_map_release(&wq->lockdep_map);
566
	for_each_cpu(cpu, cpu_map)
567
		flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, cpu));
L
Linus Torvalds 已提交
568
}
569
EXPORT_SYMBOL_GPL(flush_workqueue);
L
Linus Torvalds 已提交
570

571 572 573 574
/**
 * flush_work - block until a work_struct's callback has terminated
 * @work: the work which is to be flushed
 *
575 576
 * Returns false if @work has already terminated.
 *
577 578 579 580 581 582 583 584 585 586 587 588 589 590 591
 * It is expected that, prior to calling flush_work(), the caller has
 * arranged for the work to not be requeued, otherwise it doesn't make
 * sense to use this function.
 */
int flush_work(struct work_struct *work)
{
	struct cpu_workqueue_struct *cwq;
	struct list_head *prev;
	struct wq_barrier barr;

	might_sleep();
	cwq = get_wq_data(work);
	if (!cwq)
		return 0;

592 593
	lock_map_acquire(&cwq->wq->lockdep_map);
	lock_map_release(&cwq->wq->lockdep_map);
594

595 596 597 598 599 600 601 602
	spin_lock_irq(&cwq->lock);
	if (!list_empty(&work->entry)) {
		/*
		 * See the comment near try_to_grab_pending()->smp_rmb().
		 * If it was re-queued under us we are not going to wait.
		 */
		smp_rmb();
		if (unlikely(cwq != get_wq_data(work)))
T
Tejun Heo 已提交
603
			goto already_gone;
604 605 606
		prev = &work->entry;
	} else {
		if (cwq->current_work != work)
T
Tejun Heo 已提交
607
			goto already_gone;
608 609 610 611
		prev = &cwq->worklist;
	}
	insert_wq_barrier(cwq, &barr, prev->next);

T
Tejun Heo 已提交
612
	spin_unlock_irq(&cwq->lock);
613
	wait_for_completion(&barr.done);
614
	destroy_work_on_stack(&barr.work);
615
	return 1;
T
Tejun Heo 已提交
616 617 618
already_gone:
	spin_unlock_irq(&cwq->lock);
	return 0;
619 620 621
}
EXPORT_SYMBOL_GPL(flush_work);

622
/*
623
 * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit,
624 625 626 627 628
 * so this work can't be re-armed in any way.
 */
static int try_to_grab_pending(struct work_struct *work)
{
	struct cpu_workqueue_struct *cwq;
629
	int ret = -1;
630

631
	if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
632
		return 0;
633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651

	/*
	 * The queueing is in progress, or it is already queued. Try to
	 * steal it from ->worklist without clearing WORK_STRUCT_PENDING.
	 */

	cwq = get_wq_data(work);
	if (!cwq)
		return ret;

	spin_lock_irq(&cwq->lock);
	if (!list_empty(&work->entry)) {
		/*
		 * This work is queued, but perhaps we locked the wrong cwq.
		 * In that case we must see the new value after rmb(), see
		 * insert_work()->wmb().
		 */
		smp_rmb();
		if (cwq == get_wq_data(work)) {
652
			debug_work_deactivate(work);
653 654 655 656 657 658 659 660 661 662
			list_del_init(&work->entry);
			ret = 1;
		}
	}
	spin_unlock_irq(&cwq->lock);

	return ret;
}

static void wait_on_cpu_work(struct cpu_workqueue_struct *cwq,
O
Oleg Nesterov 已提交
663 664 665 666 667 668 669
				struct work_struct *work)
{
	struct wq_barrier barr;
	int running = 0;

	spin_lock_irq(&cwq->lock);
	if (unlikely(cwq->current_work == work)) {
670
		insert_wq_barrier(cwq, &barr, cwq->worklist.next);
O
Oleg Nesterov 已提交
671 672 673 674
		running = 1;
	}
	spin_unlock_irq(&cwq->lock);

675
	if (unlikely(running)) {
O
Oleg Nesterov 已提交
676
		wait_for_completion(&barr.done);
677 678
		destroy_work_on_stack(&barr.work);
	}
O
Oleg Nesterov 已提交
679 680
}

681
static void wait_on_work(struct work_struct *work)
O
Oleg Nesterov 已提交
682 683
{
	struct cpu_workqueue_struct *cwq;
684
	struct workqueue_struct *wq;
685
	const struct cpumask *cpu_map;
686
	int cpu;
O
Oleg Nesterov 已提交
687

688 689
	might_sleep();

690 691
	lock_map_acquire(&work->lockdep_map);
	lock_map_release(&work->lockdep_map);
692

O
Oleg Nesterov 已提交
693 694
	cwq = get_wq_data(work);
	if (!cwq)
695
		return;
O
Oleg Nesterov 已提交
696

697 698 699
	wq = cwq->wq;
	cpu_map = wq_cpu_map(wq);

700
	for_each_cpu(cpu, cpu_map)
T
Tejun Heo 已提交
701
		wait_on_cpu_work(get_cwq(cpu, wq), work);
702 703
}

704 705 706 707 708 709 710 711 712 713 714 715
static int __cancel_work_timer(struct work_struct *work,
				struct timer_list* timer)
{
	int ret;

	do {
		ret = (timer && likely(del_timer(timer)));
		if (!ret)
			ret = try_to_grab_pending(work);
		wait_on_work(work);
	} while (unlikely(ret < 0));

716
	clear_wq_data(work);
717 718 719
	return ret;
}

720 721 722 723
/**
 * cancel_work_sync - block until a work_struct's callback has terminated
 * @work: the work which is to be flushed
 *
724 725
 * Returns true if @work was pending.
 *
726 727 728 729 730 731 732 733 734 735 736 737 738 739 740
 * cancel_work_sync() will cancel the work if it is queued. If the work's
 * callback appears to be running, cancel_work_sync() will block until it
 * has completed.
 *
 * It is possible to use this function if the work re-queues itself. It can
 * cancel the work even if it migrates to another workqueue, however in that
 * case it only guarantees that work->func() has completed on the last queued
 * workqueue.
 *
 * cancel_work_sync(&delayed_work->work) should be used only if ->timer is not
 * pending, otherwise it goes into a busy-wait loop until the timer expires.
 *
 * The caller must ensure that workqueue_struct on which this work was last
 * queued can't be destroyed before this function returns.
 */
741
int cancel_work_sync(struct work_struct *work)
742
{
743
	return __cancel_work_timer(work, NULL);
O
Oleg Nesterov 已提交
744
}
745
EXPORT_SYMBOL_GPL(cancel_work_sync);
O
Oleg Nesterov 已提交
746

747
/**
748
 * cancel_delayed_work_sync - reliably kill off a delayed work.
749 750
 * @dwork: the delayed work struct
 *
751 752
 * Returns true if @dwork was pending.
 *
753 754 755
 * It is possible to use this function if @dwork rearms itself via queue_work()
 * or queue_delayed_work(). See also the comment for cancel_work_sync().
 */
756
int cancel_delayed_work_sync(struct delayed_work *dwork)
757
{
758
	return __cancel_work_timer(&dwork->work, &dwork->timer);
759
}
760
EXPORT_SYMBOL(cancel_delayed_work_sync);
L
Linus Torvalds 已提交
761

762
static struct workqueue_struct *keventd_wq __read_mostly;
L
Linus Torvalds 已提交
763

764 765 766 767
/**
 * schedule_work - put work task in global workqueue
 * @work: job to be done
 *
768 769 770 771 772 773
 * Returns zero if @work was already on the kernel-global workqueue and
 * non-zero otherwise.
 *
 * This puts a job in the kernel-global workqueue if it was not already
 * queued and leaves it in the same position on the kernel-global
 * workqueue otherwise.
774
 */
775
int schedule_work(struct work_struct *work)
L
Linus Torvalds 已提交
776 777 778
{
	return queue_work(keventd_wq, work);
}
779
EXPORT_SYMBOL(schedule_work);
L
Linus Torvalds 已提交
780

781 782 783 784 785 786 787 788 789 790 791 792 793
/*
 * schedule_work_on - put work task on a specific cpu
 * @cpu: cpu to put the work task on
 * @work: job to be done
 *
 * This puts a job on a specific cpu
 */
int schedule_work_on(int cpu, struct work_struct *work)
{
	return queue_work_on(cpu, keventd_wq, work);
}
EXPORT_SYMBOL(schedule_work_on);

794 795
/**
 * schedule_delayed_work - put work task in global workqueue after delay
796 797
 * @dwork: job to be done
 * @delay: number of jiffies to wait or 0 for immediate execution
798 799 800 801
 *
 * After waiting for a given time this puts a job in the kernel-global
 * workqueue.
 */
802
int schedule_delayed_work(struct delayed_work *dwork,
803
					unsigned long delay)
L
Linus Torvalds 已提交
804
{
805
	return queue_delayed_work(keventd_wq, dwork, delay);
L
Linus Torvalds 已提交
806
}
807
EXPORT_SYMBOL(schedule_delayed_work);
L
Linus Torvalds 已提交
808

809 810 811 812 813 814 815 816 817
/**
 * flush_delayed_work - block until a dwork_struct's callback has terminated
 * @dwork: the delayed work which is to be flushed
 *
 * Any timeout is cancelled, and any pending work is run immediately.
 */
void flush_delayed_work(struct delayed_work *dwork)
{
	if (del_timer_sync(&dwork->timer)) {
T
Tejun Heo 已提交
818 819
		__queue_work(get_cpu(), get_wq_data(&dwork->work)->wq,
			     &dwork->work);
820 821 822 823 824 825
		put_cpu();
	}
	flush_work(&dwork->work);
}
EXPORT_SYMBOL(flush_delayed_work);

826 827 828
/**
 * schedule_delayed_work_on - queue work in global workqueue on CPU after delay
 * @cpu: cpu to use
829
 * @dwork: job to be done
830 831 832 833 834
 * @delay: number of jiffies to wait
 *
 * After waiting for a given time this puts a job in the kernel-global
 * workqueue on the specified CPU.
 */
L
Linus Torvalds 已提交
835
int schedule_delayed_work_on(int cpu,
836
			struct delayed_work *dwork, unsigned long delay)
L
Linus Torvalds 已提交
837
{
838
	return queue_delayed_work_on(cpu, keventd_wq, dwork, delay);
L
Linus Torvalds 已提交
839
}
840
EXPORT_SYMBOL(schedule_delayed_work_on);
L
Linus Torvalds 已提交
841

842 843 844 845 846 847 848 849 850
/**
 * schedule_on_each_cpu - call a function on each online CPU from keventd
 * @func: the function to call
 *
 * Returns zero on success.
 * Returns -ve errno on failure.
 *
 * schedule_on_each_cpu() is very slow.
 */
851
int schedule_on_each_cpu(work_func_t func)
852 853
{
	int cpu;
854
	int orig = -1;
855
	struct work_struct *works;
856

857 858
	works = alloc_percpu(struct work_struct);
	if (!works)
859
		return -ENOMEM;
860

861 862
	get_online_cpus();

863
	/*
864 865 866
	 * When running in keventd don't schedule a work item on
	 * itself.  Can just call directly because the work queue is
	 * already bound.  This also is faster.
867
	 */
868
	if (current_is_keventd())
869 870
		orig = raw_smp_processor_id();

871
	for_each_online_cpu(cpu) {
872 873 874
		struct work_struct *work = per_cpu_ptr(works, cpu);

		INIT_WORK(work, func);
875
		if (cpu != orig)
876
			schedule_work_on(cpu, work);
877
	}
878 879 880 881 882 883
	if (orig >= 0)
		func(per_cpu_ptr(works, orig));

	for_each_online_cpu(cpu)
		flush_work(per_cpu_ptr(works, cpu));

884
	put_online_cpus();
885
	free_percpu(works);
886 887 888
	return 0;
}

889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912
/**
 * flush_scheduled_work - ensure that any scheduled work has run to completion.
 *
 * Forces execution of the kernel-global workqueue and blocks until its
 * completion.
 *
 * Think twice before calling this function!  It's very easy to get into
 * trouble if you don't take great care.  Either of the following situations
 * will lead to deadlock:
 *
 *	One of the work items currently on the workqueue needs to acquire
 *	a lock held by your code or its caller.
 *
 *	Your code is running in the context of a work routine.
 *
 * They will be detected by lockdep when they occur, but the first might not
 * occur very often.  It depends on what work items are on the workqueue and
 * what locks they need, which you have no control over.
 *
 * In most situations flushing the entire workqueue is overkill; you merely
 * need to know that a particular work item isn't queued and isn't running.
 * In such cases you should use cancel_delayed_work_sync() or
 * cancel_work_sync() instead.
 */
L
Linus Torvalds 已提交
913 914 915 916
void flush_scheduled_work(void)
{
	flush_workqueue(keventd_wq);
}
917
EXPORT_SYMBOL(flush_scheduled_work);
L
Linus Torvalds 已提交
918

919 920 921 922 923 924 925 926 927 928 929 930
/**
 * execute_in_process_context - reliably execute the routine with user context
 * @fn:		the function to execute
 * @ew:		guaranteed storage for the execute work structure (must
 *		be available when the work executes)
 *
 * Executes the function immediately if process context is available,
 * otherwise schedules the function for delayed execution.
 *
 * Returns:	0 - function was executed
 *		1 - function was scheduled for execution
 */
931
int execute_in_process_context(work_func_t fn, struct execute_work *ew)
932 933
{
	if (!in_interrupt()) {
934
		fn(&ew->work);
935 936 937
		return 0;
	}

938
	INIT_WORK(&ew->work, fn);
939 940 941 942 943 944
	schedule_work(&ew->work);

	return 1;
}
EXPORT_SYMBOL_GPL(execute_in_process_context);

L
Linus Torvalds 已提交
945 946 947 948 949 950 951 952
int keventd_up(void)
{
	return keventd_wq != NULL;
}

int current_is_keventd(void)
{
	struct cpu_workqueue_struct *cwq;
H
Hugh Dickins 已提交
953
	int cpu = raw_smp_processor_id(); /* preempt-safe: keventd is per-cpu */
L
Linus Torvalds 已提交
954 955 956 957
	int ret = 0;

	BUG_ON(!keventd_wq);

958
	cwq = per_cpu_ptr(keventd_wq->cpu_wq, cpu);
L
Linus Torvalds 已提交
959 960 961 962 963 964 965
	if (current == cwq->thread)
		ret = 1;

	return ret;

}

966 967
static struct cpu_workqueue_struct *
init_cpu_workqueue(struct workqueue_struct *wq, int cpu)
L
Linus Torvalds 已提交
968
{
969
	struct cpu_workqueue_struct *cwq = per_cpu_ptr(wq->cpu_wq, cpu);
L
Linus Torvalds 已提交
970

971 972 973 974 975 976
	cwq->wq = wq;
	spin_lock_init(&cwq->lock);
	INIT_LIST_HEAD(&cwq->worklist);
	init_waitqueue_head(&cwq->more_work);

	return cwq;
L
Linus Torvalds 已提交
977 978
}

979 980 981
static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
{
	struct workqueue_struct *wq = cwq->wq;
982
	const char *fmt = is_wq_single_threaded(wq) ? "%s" : "%s/%d";
983 984 985 986 987 988 989 990 991 992 993 994 995 996 997
	struct task_struct *p;

	p = kthread_create(worker_thread, cwq, fmt, wq->name, cpu);
	/*
	 * Nobody can add the work_struct to this cwq,
	 *	if (caller is __create_workqueue)
	 *		nobody should see this wq
	 *	else // caller is CPU_UP_PREPARE
	 *		cpu is not on cpu_online_map
	 * so we can abort safely.
	 */
	if (IS_ERR(p))
		return PTR_ERR(p);
	cwq->thread = p;

998 999
	trace_workqueue_creation(cwq->thread, cpu);

1000 1001 1002
	return 0;
}

1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013
static void start_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
{
	struct task_struct *p = cwq->thread;

	if (p != NULL) {
		if (cpu >= 0)
			kthread_bind(p, cpu);
		wake_up_process(p);
	}
}

1014
struct workqueue_struct *__create_workqueue_key(const char *name,
1015
						unsigned int flags,
1016 1017
						struct lock_class_key *key,
						const char *lock_name)
L
Linus Torvalds 已提交
1018 1019
{
	struct workqueue_struct *wq;
1020 1021
	struct cpu_workqueue_struct *cwq;
	int err = 0, cpu;
L
Linus Torvalds 已提交
1022

1023 1024
	wq = kzalloc(sizeof(*wq), GFP_KERNEL);
	if (!wq)
T
Tejun Heo 已提交
1025
		goto err;
1026 1027

	wq->cpu_wq = alloc_percpu(struct cpu_workqueue_struct);
T
Tejun Heo 已提交
1028 1029
	if (!wq->cpu_wq)
		goto err;
1030

1031
	wq->flags = flags;
1032
	wq->name = name;
1033
	lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
1034
	INIT_LIST_HEAD(&wq->list);
1035

1036
	if (flags & WQ_SINGLE_THREAD) {
1037 1038
		cwq = init_cpu_workqueue(wq, singlethread_cpu);
		err = create_workqueue_thread(cwq, singlethread_cpu);
1039
		start_workqueue_thread(cwq, -1);
1040
	} else {
1041
		cpu_maps_update_begin();
1042 1043 1044 1045 1046 1047
		/*
		 * We must place this wq on list even if the code below fails.
		 * cpu_down(cpu) can remove cpu from cpu_populated_map before
		 * destroy_workqueue() takes the lock, in that case we leak
		 * cwq[cpu]->thread.
		 */
1048
		spin_lock(&workqueue_lock);
1049
		list_add(&wq->list, &workqueues);
1050
		spin_unlock(&workqueue_lock);
1051 1052 1053 1054 1055 1056
		/*
		 * We must initialize cwqs for each possible cpu even if we
		 * are going to call destroy_workqueue() finally. Otherwise
		 * cpu_up() can hit the uninitialized cwq once we drop the
		 * lock.
		 */
1057 1058 1059 1060 1061
		for_each_possible_cpu(cpu) {
			cwq = init_cpu_workqueue(wq, cpu);
			if (err || !cpu_online(cpu))
				continue;
			err = create_workqueue_thread(cwq, cpu);
1062
			start_workqueue_thread(cwq, cpu);
L
Linus Torvalds 已提交
1063
		}
1064
		cpu_maps_update_done();
1065 1066 1067 1068 1069 1070 1071
	}

	if (err) {
		destroy_workqueue(wq);
		wq = NULL;
	}
	return wq;
T
Tejun Heo 已提交
1072 1073 1074 1075 1076 1077
err:
	if (wq) {
		free_percpu(wq->cpu_wq);
		kfree(wq);
	}
	return NULL;
1078
}
1079
EXPORT_SYMBOL_GPL(__create_workqueue_key);
L
Linus Torvalds 已提交
1080

1081
static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
1082
{
1083
	/*
1084 1085
	 * Our caller is either destroy_workqueue() or CPU_POST_DEAD,
	 * cpu_add_remove_lock protects cwq->thread.
1086 1087 1088
	 */
	if (cwq->thread == NULL)
		return;
1089

1090 1091
	lock_map_acquire(&cwq->wq->lockdep_map);
	lock_map_release(&cwq->wq->lockdep_map);
1092

O
Oleg Nesterov 已提交
1093
	flush_cpu_workqueue(cwq);
1094
	/*
1095
	 * If the caller is CPU_POST_DEAD and cwq->worklist was not empty,
O
Oleg Nesterov 已提交
1096 1097 1098
	 * a concurrent flush_workqueue() can insert a barrier after us.
	 * However, in that case run_workqueue() won't return and check
	 * kthread_should_stop() until it flushes all work_struct's.
1099 1100 1101 1102 1103
	 * When ->worklist becomes empty it is safe to exit because no
	 * more work_structs can be queued on this cwq: flush_workqueue
	 * checks list_empty(), and a "normal" queue_work() can't use
	 * a dead CPU.
	 */
1104
	trace_workqueue_destruction(cwq->thread);
1105 1106
	kthread_stop(cwq->thread);
	cwq->thread = NULL;
1107 1108 1109 1110 1111 1112 1113 1114 1115 1116
}

/**
 * destroy_workqueue - safely terminate a workqueue
 * @wq: target workqueue
 *
 * Safely destroy a workqueue. All work currently pending will be done first.
 */
void destroy_workqueue(struct workqueue_struct *wq)
{
1117
	const struct cpumask *cpu_map = wq_cpu_map(wq);
1118
	int cpu;
1119

1120
	cpu_maps_update_begin();
1121
	spin_lock(&workqueue_lock);
1122
	list_del(&wq->list);
1123
	spin_unlock(&workqueue_lock);
1124

1125
	for_each_cpu(cpu, cpu_map)
1126
		cleanup_workqueue_thread(per_cpu_ptr(wq->cpu_wq, cpu));
1127
 	cpu_maps_update_done();
1128

1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140
	free_percpu(wq->cpu_wq);
	kfree(wq);
}
EXPORT_SYMBOL_GPL(destroy_workqueue);

static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
						unsigned long action,
						void *hcpu)
{
	unsigned int cpu = (unsigned long)hcpu;
	struct cpu_workqueue_struct *cwq;
	struct workqueue_struct *wq;
1141
	int err = 0;
1142

1143 1144
	action &= ~CPU_TASKS_FROZEN;

1145 1146
	switch (action) {
	case CPU_UP_PREPARE:
1147
		cpumask_set_cpu(cpu, cpu_populated_map);
1148
	}
1149
undo:
1150 1151 1152 1153 1154
	list_for_each_entry(wq, &workqueues, list) {
		cwq = per_cpu_ptr(wq->cpu_wq, cpu);

		switch (action) {
		case CPU_UP_PREPARE:
1155 1156
			err = create_workqueue_thread(cwq, cpu);
			if (!err)
1157
				break;
1158 1159
			printk(KERN_ERR "workqueue [%s] for %i failed\n",
				wq->name, cpu);
1160
			action = CPU_UP_CANCELED;
1161
			err = -ENOMEM;
1162
			goto undo;
1163 1164

		case CPU_ONLINE:
1165
			start_workqueue_thread(cwq, cpu);
1166 1167 1168
			break;

		case CPU_UP_CANCELED:
1169
			start_workqueue_thread(cwq, -1);
1170
		case CPU_POST_DEAD:
1171
			cleanup_workqueue_thread(cwq);
1172 1173
			break;
		}
L
Linus Torvalds 已提交
1174 1175
	}

1176 1177
	switch (action) {
	case CPU_UP_CANCELED:
1178
	case CPU_POST_DEAD:
1179
		cpumask_clear_cpu(cpu, cpu_populated_map);
1180 1181
	}

1182
	return notifier_from_errno(err);
L
Linus Torvalds 已提交
1183 1184
}

1185
#ifdef CONFIG_SMP
1186

1187
struct work_for_cpu {
1188
	struct completion completion;
1189 1190 1191 1192 1193
	long (*fn)(void *);
	void *arg;
	long ret;
};

1194
static int do_work_for_cpu(void *_wfc)
1195
{
1196
	struct work_for_cpu *wfc = _wfc;
1197
	wfc->ret = wfc->fn(wfc->arg);
1198 1199
	complete(&wfc->completion);
	return 0;
1200 1201 1202 1203 1204 1205 1206 1207
}

/**
 * work_on_cpu - run a function in user context on a particular cpu
 * @cpu: the cpu to run on
 * @fn: the function to run
 * @arg: the function arg
 *
1208 1209
 * This will return the value @fn returns.
 * It is up to the caller to ensure that the cpu doesn't go offline.
1210
 * The caller must not hold any locks which would prevent @fn from completing.
1211 1212 1213
 */
long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
{
1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226
	struct task_struct *sub_thread;
	struct work_for_cpu wfc = {
		.completion = COMPLETION_INITIALIZER_ONSTACK(wfc.completion),
		.fn = fn,
		.arg = arg,
	};

	sub_thread = kthread_create(do_work_for_cpu, &wfc, "work_for_cpu");
	if (IS_ERR(sub_thread))
		return PTR_ERR(sub_thread);
	kthread_bind(sub_thread, cpu);
	wake_up_process(sub_thread);
	wait_for_completion(&wfc.completion);
1227 1228 1229 1230 1231
	return wfc.ret;
}
EXPORT_SYMBOL_GPL(work_on_cpu);
#endif /* CONFIG_SMP */

1232
void __init init_workqueues(void)
L
Linus Torvalds 已提交
1233
{
1234 1235 1236 1237 1238
	alloc_cpumask_var(&cpu_populated_map, GFP_KERNEL);

	cpumask_copy(cpu_populated_map, cpu_online_mask);
	singlethread_cpu = cpumask_first(cpu_possible_mask);
	cpu_singlethread_map = cpumask_of(singlethread_cpu);
L
Linus Torvalds 已提交
1239 1240 1241 1242
	hotcpu_notifier(workqueue_cpu_callback, 0);
	keventd_wq = create_workqueue("events");
	BUG_ON(!keventd_wq);
}