workqueue.c 31.4 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11
/*
 * linux/kernel/workqueue.c
 *
 * Generic mechanism for defining kernel helper threads for running
 * arbitrary tasks in process context.
 *
 * Started by Ingo Molnar, Copyright (C) 2002
 *
 * Derived from the taskqueue/keventd code by:
 *
 *   David Woodhouse <dwmw2@infradead.org>
12
 *   Andrew Morton
L
Linus Torvalds 已提交
13 14
 *   Kai Petzke <wpp@marie.physik.tu-berlin.de>
 *   Theodore Ts'o <tytso@mit.edu>
15
 *
C
Christoph Lameter 已提交
16
 * Made to use alloc_percpu by Christoph Lameter.
L
Linus Torvalds 已提交
17 18 19 20 21 22 23 24 25 26 27 28 29
 */

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/init.h>
#include <linux/signal.h>
#include <linux/completion.h>
#include <linux/workqueue.h>
#include <linux/slab.h>
#include <linux/cpu.h>
#include <linux/notifier.h>
#include <linux/kthread.h>
30
#include <linux/hardirq.h>
31
#include <linux/mempolicy.h>
32
#include <linux/freezer.h>
33 34
#include <linux/kallsyms.h>
#include <linux/debug_locks.h>
35
#include <linux/lockdep.h>
36 37
#define CREATE_TRACE_POINTS
#include <trace/events/workqueue.h>
L
Linus Torvalds 已提交
38

T
Tejun Heo 已提交
39 40 41 42 43 44 45 46 47 48
/*
 * Structure fields follow one of the following exclusion rules.
 *
 * I: Set during initialization and read-only afterwards.
 *
 * L: cwq->lock protected.  Access with cwq->lock held.
 *
 * W: workqueue_lock protected.
 */

L
Linus Torvalds 已提交
49
/*
50 51
 * The per-CPU workqueue (if single thread, we always use the first
 * possible cpu).
L
Linus Torvalds 已提交
52 53 54 55 56 57 58
 */
struct cpu_workqueue_struct {

	spinlock_t lock;

	struct list_head worklist;
	wait_queue_head_t more_work;
59
	struct work_struct *current_work;
L
Linus Torvalds 已提交
60

T
Tejun Heo 已提交
61 62
	struct workqueue_struct *wq;		/* I: the owning workqueue */
	struct task_struct	*thread;
L
Linus Torvalds 已提交
63 64 65 66 67 68 69
} ____cacheline_aligned;

/*
 * The externally visible workqueue abstraction is an array of
 * per-CPU workqueues:
 */
struct workqueue_struct {
70
	unsigned int		flags;		/* I: WQ_* flags */
T
Tejun Heo 已提交
71 72 73
	struct cpu_workqueue_struct *cpu_wq;	/* I: cwq's */
	struct list_head	list;		/* W: list of all workqueues */
	const char		*name;		/* I: workqueue name */
74
#ifdef CONFIG_LOCKDEP
T
Tejun Heo 已提交
75
	struct lockdep_map	lockdep_map;
76
#endif
L
Linus Torvalds 已提交
77 78
};

79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
#ifdef CONFIG_DEBUG_OBJECTS_WORK

static struct debug_obj_descr work_debug_descr;

/*
 * fixup_init is called when:
 * - an active object is initialized
 */
static int work_fixup_init(void *addr, enum debug_obj_state state)
{
	struct work_struct *work = addr;

	switch (state) {
	case ODEBUG_STATE_ACTIVE:
		cancel_work_sync(work);
		debug_object_init(work, &work_debug_descr);
		return 1;
	default:
		return 0;
	}
}

/*
 * fixup_activate is called when:
 * - an active object is activated
 * - an unknown object is activated (might be a statically initialized object)
 */
static int work_fixup_activate(void *addr, enum debug_obj_state state)
{
	struct work_struct *work = addr;

	switch (state) {

	case ODEBUG_STATE_NOTAVAILABLE:
		/*
		 * This is not really a fixup. The work struct was
		 * statically initialized. We just make sure that it
		 * is tracked in the object tracker.
		 */
118
		if (test_bit(WORK_STRUCT_STATIC_BIT, work_data_bits(work))) {
119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188
			debug_object_init(work, &work_debug_descr);
			debug_object_activate(work, &work_debug_descr);
			return 0;
		}
		WARN_ON_ONCE(1);
		return 0;

	case ODEBUG_STATE_ACTIVE:
		WARN_ON(1);

	default:
		return 0;
	}
}

/*
 * fixup_free is called when:
 * - an active object is freed
 */
static int work_fixup_free(void *addr, enum debug_obj_state state)
{
	struct work_struct *work = addr;

	switch (state) {
	case ODEBUG_STATE_ACTIVE:
		cancel_work_sync(work);
		debug_object_free(work, &work_debug_descr);
		return 1;
	default:
		return 0;
	}
}

static struct debug_obj_descr work_debug_descr = {
	.name		= "work_struct",
	.fixup_init	= work_fixup_init,
	.fixup_activate	= work_fixup_activate,
	.fixup_free	= work_fixup_free,
};

static inline void debug_work_activate(struct work_struct *work)
{
	debug_object_activate(work, &work_debug_descr);
}

static inline void debug_work_deactivate(struct work_struct *work)
{
	debug_object_deactivate(work, &work_debug_descr);
}

void __init_work(struct work_struct *work, int onstack)
{
	if (onstack)
		debug_object_init_on_stack(work, &work_debug_descr);
	else
		debug_object_init(work, &work_debug_descr);
}
EXPORT_SYMBOL_GPL(__init_work);

void destroy_work_on_stack(struct work_struct *work)
{
	debug_object_free(work, &work_debug_descr);
}
EXPORT_SYMBOL_GPL(destroy_work_on_stack);

#else
static inline void debug_work_activate(struct work_struct *work) { }
static inline void debug_work_deactivate(struct work_struct *work) { }
#endif

189 190
/* Serializes the accesses to the list of workqueues. */
static DEFINE_SPINLOCK(workqueue_lock);
L
Linus Torvalds 已提交
191 192
static LIST_HEAD(workqueues);

193
static int singlethread_cpu __read_mostly;
194
static const struct cpumask *cpu_singlethread_map __read_mostly;
195 196 197 198 199 200 201
/*
 * _cpu_down() first removes CPU from cpu_online_map, then CPU_DEAD
 * flushes cwq->worklist. This means that flush_workqueue/wait_on_work
 * which comes in between can't use for_each_online_cpu(). We could
 * use cpu_possible_map, the cpumask below is more a documentation
 * than optimization.
 */
202
static cpumask_var_t cpu_populated_map __read_mostly;
203

L
Linus Torvalds 已提交
204
/* If it's single threaded, it isn't in the list of workqueues. */
205
static inline bool is_wq_single_threaded(struct workqueue_struct *wq)
L
Linus Torvalds 已提交
206
{
207
	return wq->flags & WQ_SINGLE_THREAD;
L
Linus Torvalds 已提交
208 209
}

210
static const struct cpumask *wq_cpu_map(struct workqueue_struct *wq)
211
{
212
	return is_wq_single_threaded(wq)
213
		? cpu_singlethread_map : cpu_populated_map;
214 215
}

T
Tejun Heo 已提交
216 217
static struct cpu_workqueue_struct *get_cwq(unsigned int cpu,
					    struct workqueue_struct *wq)
218
{
219
	if (unlikely(is_wq_single_threaded(wq)))
220 221 222 223
		cpu = singlethread_cpu;
	return per_cpu_ptr(wq->cpu_wq, cpu);
}

224 225 226 227
/*
 * Set the workqueue on which a work item is to be run
 * - Must *only* be called if the pending flag is set
 */
228
static inline void set_wq_data(struct work_struct *work,
T
Tejun Heo 已提交
229 230
			       struct cpu_workqueue_struct *cwq,
			       unsigned long extra_flags)
231
{
232
	BUG_ON(!work_pending(work));
233

T
Tejun Heo 已提交
234
	atomic_long_set(&work->data, (unsigned long)cwq | work_static(work) |
235
			WORK_STRUCT_PENDING | extra_flags);
236 237
}

238 239 240 241 242
/*
 * Clear WORK_STRUCT_PENDING and the workqueue on which it was queued.
 */
static inline void clear_wq_data(struct work_struct *work)
{
T
Tejun Heo 已提交
243
	atomic_long_set(&work->data, work_static(work));
244 245
}

246 247
static inline
struct cpu_workqueue_struct *get_wq_data(struct work_struct *work)
248
{
249
	return (void *) (atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK);
250 251
}

T
Tejun Heo 已提交
252 253 254 255 256 257 258 259 260 261 262 263
/**
 * insert_work - insert a work into cwq
 * @cwq: cwq @work belongs to
 * @work: work to insert
 * @head: insertion point
 * @extra_flags: extra WORK_STRUCT_* flags to set
 *
 * Insert @work into @cwq after @head.
 *
 * CONTEXT:
 * spin_lock_irq(cwq->lock).
 */
O
Oleg Nesterov 已提交
264
static void insert_work(struct cpu_workqueue_struct *cwq,
T
Tejun Heo 已提交
265 266
			struct work_struct *work, struct list_head *head,
			unsigned int extra_flags)
O
Oleg Nesterov 已提交
267
{
268 269
	trace_workqueue_insertion(cwq->thread, work);

T
Tejun Heo 已提交
270 271 272
	/* we own @work, set data and link */
	set_wq_data(work, cwq, extra_flags);

273 274 275 276 277
	/*
	 * Ensure that we get the right work->data if we see the
	 * result of list_add() below, see try_to_grab_pending().
	 */
	smp_wmb();
T
Tejun Heo 已提交
278

279
	list_add_tail(&work->entry, head);
O
Oleg Nesterov 已提交
280 281 282
	wake_up(&cwq->more_work);
}

T
Tejun Heo 已提交
283
static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
L
Linus Torvalds 已提交
284 285
			 struct work_struct *work)
{
T
Tejun Heo 已提交
286
	struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
L
Linus Torvalds 已提交
287 288
	unsigned long flags;

289
	debug_work_activate(work);
L
Linus Torvalds 已提交
290
	spin_lock_irqsave(&cwq->lock, flags);
T
Tejun Heo 已提交
291 292
	BUG_ON(!list_empty(&work->entry));
	insert_work(cwq, work, &cwq->worklist, 0);
L
Linus Torvalds 已提交
293 294 295
	spin_unlock_irqrestore(&cwq->lock, flags);
}

296 297 298 299 300
/**
 * queue_work - queue work on a workqueue
 * @wq: workqueue to use
 * @work: work to queue
 *
A
Alan Stern 已提交
301
 * Returns 0 if @work was already on a queue, non-zero otherwise.
L
Linus Torvalds 已提交
302
 *
303 304
 * We queue the work to the CPU on which it was submitted, but if the CPU dies
 * it can be processed by another CPU.
L
Linus Torvalds 已提交
305
 */
306
int queue_work(struct workqueue_struct *wq, struct work_struct *work)
L
Linus Torvalds 已提交
307
{
308 309 310 311 312
	int ret;

	ret = queue_work_on(get_cpu(), wq, work);
	put_cpu();

L
Linus Torvalds 已提交
313 314
	return ret;
}
315
EXPORT_SYMBOL_GPL(queue_work);
L
Linus Torvalds 已提交
316

317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332
/**
 * queue_work_on - queue work on specific cpu
 * @cpu: CPU number to execute work on
 * @wq: workqueue to use
 * @work: work to queue
 *
 * Returns 0 if @work was already on a queue, non-zero otherwise.
 *
 * We queue the work to a specific CPU, the caller must ensure it
 * can't go away.
 */
int
queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work)
{
	int ret = 0;

333
	if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
T
Tejun Heo 已提交
334
		__queue_work(cpu, wq, work);
335 336 337 338 339 340
		ret = 1;
	}
	return ret;
}
EXPORT_SYMBOL_GPL(queue_work_on);

341
static void delayed_work_timer_fn(unsigned long __data)
L
Linus Torvalds 已提交
342
{
343
	struct delayed_work *dwork = (struct delayed_work *)__data;
344
	struct cpu_workqueue_struct *cwq = get_wq_data(&dwork->work);
L
Linus Torvalds 已提交
345

T
Tejun Heo 已提交
346
	__queue_work(smp_processor_id(), cwq->wq, &dwork->work);
L
Linus Torvalds 已提交
347 348
}

349 350 351
/**
 * queue_delayed_work - queue work on a workqueue after delay
 * @wq: workqueue to use
352
 * @dwork: delayable work to queue
353 354
 * @delay: number of jiffies to wait before queueing
 *
A
Alan Stern 已提交
355
 * Returns 0 if @work was already on a queue, non-zero otherwise.
356
 */
357
int queue_delayed_work(struct workqueue_struct *wq,
358
			struct delayed_work *dwork, unsigned long delay)
L
Linus Torvalds 已提交
359
{
360
	if (delay == 0)
361
		return queue_work(wq, &dwork->work);
L
Linus Torvalds 已提交
362

363
	return queue_delayed_work_on(-1, wq, dwork, delay);
L
Linus Torvalds 已提交
364
}
365
EXPORT_SYMBOL_GPL(queue_delayed_work);
L
Linus Torvalds 已提交
366

367 368 369 370
/**
 * queue_delayed_work_on - queue work on specific CPU after delay
 * @cpu: CPU number to execute work on
 * @wq: workqueue to use
371
 * @dwork: work to queue
372 373
 * @delay: number of jiffies to wait before queueing
 *
A
Alan Stern 已提交
374
 * Returns 0 if @work was already on a queue, non-zero otherwise.
375
 */
376
int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
377
			struct delayed_work *dwork, unsigned long delay)
378 379
{
	int ret = 0;
380 381
	struct timer_list *timer = &dwork->timer;
	struct work_struct *work = &dwork->work;
382

383
	if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
384 385 386
		BUG_ON(timer_pending(timer));
		BUG_ON(!list_empty(&work->entry));

387 388
		timer_stats_timer_set_start_info(&dwork->timer);

389
		/* This stores cwq for the moment, for the timer_fn */
T
Tejun Heo 已提交
390
		set_wq_data(work, get_cwq(raw_smp_processor_id(), wq), 0);
391
		timer->expires = jiffies + delay;
392
		timer->data = (unsigned long)dwork;
393
		timer->function = delayed_work_timer_fn;
394 395 396 397 398

		if (unlikely(cpu >= 0))
			add_timer_on(timer, cpu);
		else
			add_timer(timer);
399 400 401 402
		ret = 1;
	}
	return ret;
}
403
EXPORT_SYMBOL_GPL(queue_delayed_work_on);
L
Linus Torvalds 已提交
404

405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464
/**
 * process_one_work - process single work
 * @cwq: cwq to process work for
 * @work: work to process
 *
 * Process @work.  This function contains all the logics necessary to
 * process a single work including synchronization against and
 * interaction with other workers on the same cpu, queueing and
 * flushing.  As long as context requirement is met, any worker can
 * call this function to process a work.
 *
 * CONTEXT:
 * spin_lock_irq(cwq->lock) which is released and regrabbed.
 */
static void process_one_work(struct cpu_workqueue_struct *cwq,
			     struct work_struct *work)
{
	work_func_t f = work->func;
#ifdef CONFIG_LOCKDEP
	/*
	 * It is permissible to free the struct work_struct from
	 * inside the function that is called from it, this we need to
	 * take into account for lockdep too.  To avoid bogus "held
	 * lock freed" warnings as well as problems when looking into
	 * work->lockdep_map, make a copy and use that here.
	 */
	struct lockdep_map lockdep_map = work->lockdep_map;
#endif
	/* claim and process */
	trace_workqueue_execution(cwq->thread, work);
	debug_work_deactivate(work);
	cwq->current_work = work;
	list_del_init(&work->entry);

	spin_unlock_irq(&cwq->lock);

	BUG_ON(get_wq_data(work) != cwq);
	work_clear_pending(work);
	lock_map_acquire(&cwq->wq->lockdep_map);
	lock_map_acquire(&lockdep_map);
	f(work);
	lock_map_release(&lockdep_map);
	lock_map_release(&cwq->wq->lockdep_map);

	if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
		printk(KERN_ERR "BUG: workqueue leaked lock or atomic: "
		       "%s/0x%08x/%d\n",
		       current->comm, preempt_count(), task_pid_nr(current));
		printk(KERN_ERR "    last function: ");
		print_symbol("%s\n", (unsigned long)f);
		debug_show_held_locks(current);
		dump_stack();
	}

	spin_lock_irq(&cwq->lock);

	/* we're done with it, release */
	cwq->current_work = NULL;
}

465
static void run_workqueue(struct cpu_workqueue_struct *cwq)
L
Linus Torvalds 已提交
466
{
467
	spin_lock_irq(&cwq->lock);
L
Linus Torvalds 已提交
468 469 470
	while (!list_empty(&cwq->worklist)) {
		struct work_struct *work = list_entry(cwq->worklist.next,
						struct work_struct, entry);
471
		process_one_work(cwq, work);
L
Linus Torvalds 已提交
472
	}
473
	spin_unlock_irq(&cwq->lock);
L
Linus Torvalds 已提交
474 475
}

T
Tejun Heo 已提交
476 477 478 479 480 481
/**
 * worker_thread - the worker thread function
 * @__cwq: cwq to serve
 *
 * The cwq worker thread function.
 */
L
Linus Torvalds 已提交
482 483 484
static int worker_thread(void *__cwq)
{
	struct cpu_workqueue_struct *cwq = __cwq;
485
	DEFINE_WAIT(wait);
L
Linus Torvalds 已提交
486

487
	if (cwq->wq->flags & WQ_FREEZEABLE)
488
		set_freezable();
L
Linus Torvalds 已提交
489

490 491
	for (;;) {
		prepare_to_wait(&cwq->more_work, &wait, TASK_INTERRUPTIBLE);
492 493 494
		if (!freezing(current) &&
		    !kthread_should_stop() &&
		    list_empty(&cwq->worklist))
L
Linus Torvalds 已提交
495
			schedule();
496 497
		finish_wait(&cwq->more_work, &wait);

498 499
		try_to_freeze();

500
		if (kthread_should_stop())
501
			break;
L
Linus Torvalds 已提交
502

503
		run_workqueue(cwq);
L
Linus Torvalds 已提交
504
	}
505

L
Linus Torvalds 已提交
506 507 508
	return 0;
}

O
Oleg Nesterov 已提交
509 510 511 512 513 514 515 516 517 518 519
struct wq_barrier {
	struct work_struct	work;
	struct completion	done;
};

static void wq_barrier_func(struct work_struct *work)
{
	struct wq_barrier *barr = container_of(work, struct wq_barrier, work);
	complete(&barr->done);
}

T
Tejun Heo 已提交
520 521 522 523 524 525 526 527 528 529 530
/**
 * insert_wq_barrier - insert a barrier work
 * @cwq: cwq to insert barrier into
 * @barr: wq_barrier to insert
 * @head: insertion point
 *
 * Insert barrier @barr into @cwq before @head.
 *
 * CONTEXT:
 * spin_lock_irq(cwq->lock).
 */
531
static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
532
			struct wq_barrier *barr, struct list_head *head)
O
Oleg Nesterov 已提交
533
{
534 535 536 537 538 539 540
	/*
	 * debugobject calls are safe here even with cwq->lock locked
	 * as we know for sure that this will not trigger any of the
	 * checks and call back into the fixup functions where we
	 * might deadlock.
	 */
	INIT_WORK_ON_STACK(&barr->work, wq_barrier_func);
541
	__set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work));
O
Oleg Nesterov 已提交
542
	init_completion(&barr->done);
543

544
	debug_work_activate(&barr->work);
T
Tejun Heo 已提交
545
	insert_work(cwq, &barr->work, head, 0);
O
Oleg Nesterov 已提交
546 547
}

548
static int flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
L
Linus Torvalds 已提交
549
{
550 551
	int active = 0;
	struct wq_barrier barr;
L
Linus Torvalds 已提交
552

553
	WARN_ON(cwq->thread == current);
L
Linus Torvalds 已提交
554

555 556 557 558
	spin_lock_irq(&cwq->lock);
	if (!list_empty(&cwq->worklist) || cwq->current_work != NULL) {
		insert_wq_barrier(cwq, &barr, &cwq->worklist);
		active = 1;
L
Linus Torvalds 已提交
559
	}
560 561
	spin_unlock_irq(&cwq->lock);

562
	if (active) {
563
		wait_for_completion(&barr.done);
564 565
		destroy_work_on_stack(&barr.work);
	}
566 567

	return active;
L
Linus Torvalds 已提交
568 569
}

570
/**
L
Linus Torvalds 已提交
571
 * flush_workqueue - ensure that any scheduled work has run to completion.
572
 * @wq: workqueue to flush
L
Linus Torvalds 已提交
573 574 575 576
 *
 * Forces execution of the workqueue and blocks until its completion.
 * This is typically used in driver shutdown handlers.
 *
O
Oleg Nesterov 已提交
577 578
 * We sleep until all works which were queued on entry have been handled,
 * but we are not livelocked by new incoming ones.
L
Linus Torvalds 已提交
579
 */
580
void flush_workqueue(struct workqueue_struct *wq)
L
Linus Torvalds 已提交
581
{
582
	const struct cpumask *cpu_map = wq_cpu_map(wq);
583
	int cpu;
L
Linus Torvalds 已提交
584

585
	might_sleep();
586 587
	lock_map_acquire(&wq->lockdep_map);
	lock_map_release(&wq->lockdep_map);
588
	for_each_cpu(cpu, cpu_map)
589
		flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, cpu));
L
Linus Torvalds 已提交
590
}
591
EXPORT_SYMBOL_GPL(flush_workqueue);
L
Linus Torvalds 已提交
592

593 594 595 596
/**
 * flush_work - block until a work_struct's callback has terminated
 * @work: the work which is to be flushed
 *
597 598
 * Returns false if @work has already terminated.
 *
599 600 601 602 603 604 605 606 607 608 609 610 611 612 613
 * It is expected that, prior to calling flush_work(), the caller has
 * arranged for the work to not be requeued, otherwise it doesn't make
 * sense to use this function.
 */
int flush_work(struct work_struct *work)
{
	struct cpu_workqueue_struct *cwq;
	struct list_head *prev;
	struct wq_barrier barr;

	might_sleep();
	cwq = get_wq_data(work);
	if (!cwq)
		return 0;

614 615
	lock_map_acquire(&cwq->wq->lockdep_map);
	lock_map_release(&cwq->wq->lockdep_map);
616

617 618 619 620 621 622 623 624
	spin_lock_irq(&cwq->lock);
	if (!list_empty(&work->entry)) {
		/*
		 * See the comment near try_to_grab_pending()->smp_rmb().
		 * If it was re-queued under us we are not going to wait.
		 */
		smp_rmb();
		if (unlikely(cwq != get_wq_data(work)))
T
Tejun Heo 已提交
625
			goto already_gone;
626 627 628
		prev = &work->entry;
	} else {
		if (cwq->current_work != work)
T
Tejun Heo 已提交
629
			goto already_gone;
630 631 632 633
		prev = &cwq->worklist;
	}
	insert_wq_barrier(cwq, &barr, prev->next);

T
Tejun Heo 已提交
634
	spin_unlock_irq(&cwq->lock);
635
	wait_for_completion(&barr.done);
636
	destroy_work_on_stack(&barr.work);
637
	return 1;
T
Tejun Heo 已提交
638 639 640
already_gone:
	spin_unlock_irq(&cwq->lock);
	return 0;
641 642 643
}
EXPORT_SYMBOL_GPL(flush_work);

644
/*
645
 * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit,
646 647 648 649 650
 * so this work can't be re-armed in any way.
 */
static int try_to_grab_pending(struct work_struct *work)
{
	struct cpu_workqueue_struct *cwq;
651
	int ret = -1;
652

653
	if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
654
		return 0;
655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673

	/*
	 * The queueing is in progress, or it is already queued. Try to
	 * steal it from ->worklist without clearing WORK_STRUCT_PENDING.
	 */

	cwq = get_wq_data(work);
	if (!cwq)
		return ret;

	spin_lock_irq(&cwq->lock);
	if (!list_empty(&work->entry)) {
		/*
		 * This work is queued, but perhaps we locked the wrong cwq.
		 * In that case we must see the new value after rmb(), see
		 * insert_work()->wmb().
		 */
		smp_rmb();
		if (cwq == get_wq_data(work)) {
674
			debug_work_deactivate(work);
675 676 677 678 679 680 681 682 683 684
			list_del_init(&work->entry);
			ret = 1;
		}
	}
	spin_unlock_irq(&cwq->lock);

	return ret;
}

static void wait_on_cpu_work(struct cpu_workqueue_struct *cwq,
O
Oleg Nesterov 已提交
685 686 687 688 689 690 691
				struct work_struct *work)
{
	struct wq_barrier barr;
	int running = 0;

	spin_lock_irq(&cwq->lock);
	if (unlikely(cwq->current_work == work)) {
692
		insert_wq_barrier(cwq, &barr, cwq->worklist.next);
O
Oleg Nesterov 已提交
693 694 695 696
		running = 1;
	}
	spin_unlock_irq(&cwq->lock);

697
	if (unlikely(running)) {
O
Oleg Nesterov 已提交
698
		wait_for_completion(&barr.done);
699 700
		destroy_work_on_stack(&barr.work);
	}
O
Oleg Nesterov 已提交
701 702
}

703
static void wait_on_work(struct work_struct *work)
O
Oleg Nesterov 已提交
704 705
{
	struct cpu_workqueue_struct *cwq;
706
	struct workqueue_struct *wq;
707
	const struct cpumask *cpu_map;
708
	int cpu;
O
Oleg Nesterov 已提交
709

710 711
	might_sleep();

712 713
	lock_map_acquire(&work->lockdep_map);
	lock_map_release(&work->lockdep_map);
714

O
Oleg Nesterov 已提交
715 716
	cwq = get_wq_data(work);
	if (!cwq)
717
		return;
O
Oleg Nesterov 已提交
718

719 720 721
	wq = cwq->wq;
	cpu_map = wq_cpu_map(wq);

722
	for_each_cpu(cpu, cpu_map)
T
Tejun Heo 已提交
723
		wait_on_cpu_work(get_cwq(cpu, wq), work);
724 725
}

726 727 728 729 730 731 732 733 734 735 736 737
static int __cancel_work_timer(struct work_struct *work,
				struct timer_list* timer)
{
	int ret;

	do {
		ret = (timer && likely(del_timer(timer)));
		if (!ret)
			ret = try_to_grab_pending(work);
		wait_on_work(work);
	} while (unlikely(ret < 0));

738
	clear_wq_data(work);
739 740 741
	return ret;
}

742 743 744 745
/**
 * cancel_work_sync - block until a work_struct's callback has terminated
 * @work: the work which is to be flushed
 *
746 747
 * Returns true if @work was pending.
 *
748 749 750 751 752 753 754 755 756 757 758 759 760 761 762
 * cancel_work_sync() will cancel the work if it is queued. If the work's
 * callback appears to be running, cancel_work_sync() will block until it
 * has completed.
 *
 * It is possible to use this function if the work re-queues itself. It can
 * cancel the work even if it migrates to another workqueue, however in that
 * case it only guarantees that work->func() has completed on the last queued
 * workqueue.
 *
 * cancel_work_sync(&delayed_work->work) should be used only if ->timer is not
 * pending, otherwise it goes into a busy-wait loop until the timer expires.
 *
 * The caller must ensure that workqueue_struct on which this work was last
 * queued can't be destroyed before this function returns.
 */
763
int cancel_work_sync(struct work_struct *work)
764
{
765
	return __cancel_work_timer(work, NULL);
O
Oleg Nesterov 已提交
766
}
767
EXPORT_SYMBOL_GPL(cancel_work_sync);
O
Oleg Nesterov 已提交
768

769
/**
770
 * cancel_delayed_work_sync - reliably kill off a delayed work.
771 772
 * @dwork: the delayed work struct
 *
773 774
 * Returns true if @dwork was pending.
 *
775 776 777
 * It is possible to use this function if @dwork rearms itself via queue_work()
 * or queue_delayed_work(). See also the comment for cancel_work_sync().
 */
778
int cancel_delayed_work_sync(struct delayed_work *dwork)
779
{
780
	return __cancel_work_timer(&dwork->work, &dwork->timer);
781
}
782
EXPORT_SYMBOL(cancel_delayed_work_sync);
L
Linus Torvalds 已提交
783

784
static struct workqueue_struct *keventd_wq __read_mostly;
L
Linus Torvalds 已提交
785

786 787 788 789
/**
 * schedule_work - put work task in global workqueue
 * @work: job to be done
 *
790 791 792 793 794 795
 * Returns zero if @work was already on the kernel-global workqueue and
 * non-zero otherwise.
 *
 * This puts a job in the kernel-global workqueue if it was not already
 * queued and leaves it in the same position on the kernel-global
 * workqueue otherwise.
796
 */
797
int schedule_work(struct work_struct *work)
L
Linus Torvalds 已提交
798 799 800
{
	return queue_work(keventd_wq, work);
}
801
EXPORT_SYMBOL(schedule_work);
L
Linus Torvalds 已提交
802

803 804 805 806 807 808 809 810 811 812 813 814 815
/*
 * schedule_work_on - put work task on a specific cpu
 * @cpu: cpu to put the work task on
 * @work: job to be done
 *
 * This puts a job on a specific cpu
 */
int schedule_work_on(int cpu, struct work_struct *work)
{
	return queue_work_on(cpu, keventd_wq, work);
}
EXPORT_SYMBOL(schedule_work_on);

816 817
/**
 * schedule_delayed_work - put work task in global workqueue after delay
818 819
 * @dwork: job to be done
 * @delay: number of jiffies to wait or 0 for immediate execution
820 821 822 823
 *
 * After waiting for a given time this puts a job in the kernel-global
 * workqueue.
 */
824
int schedule_delayed_work(struct delayed_work *dwork,
825
					unsigned long delay)
L
Linus Torvalds 已提交
826
{
827
	return queue_delayed_work(keventd_wq, dwork, delay);
L
Linus Torvalds 已提交
828
}
829
EXPORT_SYMBOL(schedule_delayed_work);
L
Linus Torvalds 已提交
830

831 832 833 834 835 836 837 838 839
/**
 * flush_delayed_work - block until a dwork_struct's callback has terminated
 * @dwork: the delayed work which is to be flushed
 *
 * Any timeout is cancelled, and any pending work is run immediately.
 */
void flush_delayed_work(struct delayed_work *dwork)
{
	if (del_timer_sync(&dwork->timer)) {
T
Tejun Heo 已提交
840 841
		__queue_work(get_cpu(), get_wq_data(&dwork->work)->wq,
			     &dwork->work);
842 843 844 845 846 847
		put_cpu();
	}
	flush_work(&dwork->work);
}
EXPORT_SYMBOL(flush_delayed_work);

848 849 850
/**
 * schedule_delayed_work_on - queue work in global workqueue on CPU after delay
 * @cpu: cpu to use
851
 * @dwork: job to be done
852 853 854 855 856
 * @delay: number of jiffies to wait
 *
 * After waiting for a given time this puts a job in the kernel-global
 * workqueue on the specified CPU.
 */
L
Linus Torvalds 已提交
857
int schedule_delayed_work_on(int cpu,
858
			struct delayed_work *dwork, unsigned long delay)
L
Linus Torvalds 已提交
859
{
860
	return queue_delayed_work_on(cpu, keventd_wq, dwork, delay);
L
Linus Torvalds 已提交
861
}
862
EXPORT_SYMBOL(schedule_delayed_work_on);
L
Linus Torvalds 已提交
863

864 865 866 867 868 869 870 871 872
/**
 * schedule_on_each_cpu - call a function on each online CPU from keventd
 * @func: the function to call
 *
 * Returns zero on success.
 * Returns -ve errno on failure.
 *
 * schedule_on_each_cpu() is very slow.
 */
873
int schedule_on_each_cpu(work_func_t func)
874 875
{
	int cpu;
876
	int orig = -1;
877
	struct work_struct *works;
878

879 880
	works = alloc_percpu(struct work_struct);
	if (!works)
881
		return -ENOMEM;
882

883 884
	get_online_cpus();

885
	/*
886 887 888
	 * When running in keventd don't schedule a work item on
	 * itself.  Can just call directly because the work queue is
	 * already bound.  This also is faster.
889
	 */
890
	if (current_is_keventd())
891 892
		orig = raw_smp_processor_id();

893
	for_each_online_cpu(cpu) {
894 895 896
		struct work_struct *work = per_cpu_ptr(works, cpu);

		INIT_WORK(work, func);
897
		if (cpu != orig)
898
			schedule_work_on(cpu, work);
899
	}
900 901 902 903 904 905
	if (orig >= 0)
		func(per_cpu_ptr(works, orig));

	for_each_online_cpu(cpu)
		flush_work(per_cpu_ptr(works, cpu));

906
	put_online_cpus();
907
	free_percpu(works);
908 909 910
	return 0;
}

911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934
/**
 * flush_scheduled_work - ensure that any scheduled work has run to completion.
 *
 * Forces execution of the kernel-global workqueue and blocks until its
 * completion.
 *
 * Think twice before calling this function!  It's very easy to get into
 * trouble if you don't take great care.  Either of the following situations
 * will lead to deadlock:
 *
 *	One of the work items currently on the workqueue needs to acquire
 *	a lock held by your code or its caller.
 *
 *	Your code is running in the context of a work routine.
 *
 * They will be detected by lockdep when they occur, but the first might not
 * occur very often.  It depends on what work items are on the workqueue and
 * what locks they need, which you have no control over.
 *
 * In most situations flushing the entire workqueue is overkill; you merely
 * need to know that a particular work item isn't queued and isn't running.
 * In such cases you should use cancel_delayed_work_sync() or
 * cancel_work_sync() instead.
 */
L
Linus Torvalds 已提交
935 936 937 938
void flush_scheduled_work(void)
{
	flush_workqueue(keventd_wq);
}
939
EXPORT_SYMBOL(flush_scheduled_work);
L
Linus Torvalds 已提交
940

941 942 943 944 945 946 947 948 949 950 951 952
/**
 * execute_in_process_context - reliably execute the routine with user context
 * @fn:		the function to execute
 * @ew:		guaranteed storage for the execute work structure (must
 *		be available when the work executes)
 *
 * Executes the function immediately if process context is available,
 * otherwise schedules the function for delayed execution.
 *
 * Returns:	0 - function was executed
 *		1 - function was scheduled for execution
 */
953
int execute_in_process_context(work_func_t fn, struct execute_work *ew)
954 955
{
	if (!in_interrupt()) {
956
		fn(&ew->work);
957 958 959
		return 0;
	}

960
	INIT_WORK(&ew->work, fn);
961 962 963 964 965 966
	schedule_work(&ew->work);

	return 1;
}
EXPORT_SYMBOL_GPL(execute_in_process_context);

L
Linus Torvalds 已提交
967 968 969 970 971 972 973 974
int keventd_up(void)
{
	return keventd_wq != NULL;
}

int current_is_keventd(void)
{
	struct cpu_workqueue_struct *cwq;
H
Hugh Dickins 已提交
975
	int cpu = raw_smp_processor_id(); /* preempt-safe: keventd is per-cpu */
L
Linus Torvalds 已提交
976 977 978 979
	int ret = 0;

	BUG_ON(!keventd_wq);

980
	cwq = per_cpu_ptr(keventd_wq->cpu_wq, cpu);
L
Linus Torvalds 已提交
981 982 983 984 985 986 987
	if (current == cwq->thread)
		ret = 1;

	return ret;

}

988 989
static struct cpu_workqueue_struct *
init_cpu_workqueue(struct workqueue_struct *wq, int cpu)
L
Linus Torvalds 已提交
990
{
991
	struct cpu_workqueue_struct *cwq = per_cpu_ptr(wq->cpu_wq, cpu);
L
Linus Torvalds 已提交
992

993 994 995 996 997 998
	cwq->wq = wq;
	spin_lock_init(&cwq->lock);
	INIT_LIST_HEAD(&cwq->worklist);
	init_waitqueue_head(&cwq->more_work);

	return cwq;
L
Linus Torvalds 已提交
999 1000
}

1001 1002 1003
static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
{
	struct workqueue_struct *wq = cwq->wq;
1004
	const char *fmt = is_wq_single_threaded(wq) ? "%s" : "%s/%d";
1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019
	struct task_struct *p;

	p = kthread_create(worker_thread, cwq, fmt, wq->name, cpu);
	/*
	 * Nobody can add the work_struct to this cwq,
	 *	if (caller is __create_workqueue)
	 *		nobody should see this wq
	 *	else // caller is CPU_UP_PREPARE
	 *		cpu is not on cpu_online_map
	 * so we can abort safely.
	 */
	if (IS_ERR(p))
		return PTR_ERR(p);
	cwq->thread = p;

1020 1021
	trace_workqueue_creation(cwq->thread, cpu);

1022 1023 1024
	return 0;
}

1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035
static void start_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
{
	struct task_struct *p = cwq->thread;

	if (p != NULL) {
		if (cpu >= 0)
			kthread_bind(p, cpu);
		wake_up_process(p);
	}
}

1036
struct workqueue_struct *__create_workqueue_key(const char *name,
1037
						unsigned int flags,
1038 1039
						struct lock_class_key *key,
						const char *lock_name)
L
Linus Torvalds 已提交
1040 1041
{
	struct workqueue_struct *wq;
1042 1043
	struct cpu_workqueue_struct *cwq;
	int err = 0, cpu;
L
Linus Torvalds 已提交
1044

1045 1046
	wq = kzalloc(sizeof(*wq), GFP_KERNEL);
	if (!wq)
T
Tejun Heo 已提交
1047
		goto err;
1048 1049

	wq->cpu_wq = alloc_percpu(struct cpu_workqueue_struct);
T
Tejun Heo 已提交
1050 1051
	if (!wq->cpu_wq)
		goto err;
1052

1053
	wq->flags = flags;
1054
	wq->name = name;
1055
	lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
1056
	INIT_LIST_HEAD(&wq->list);
1057

1058
	if (flags & WQ_SINGLE_THREAD) {
1059 1060
		cwq = init_cpu_workqueue(wq, singlethread_cpu);
		err = create_workqueue_thread(cwq, singlethread_cpu);
1061
		start_workqueue_thread(cwq, -1);
1062
	} else {
1063
		cpu_maps_update_begin();
1064 1065 1066 1067 1068 1069
		/*
		 * We must place this wq on list even if the code below fails.
		 * cpu_down(cpu) can remove cpu from cpu_populated_map before
		 * destroy_workqueue() takes the lock, in that case we leak
		 * cwq[cpu]->thread.
		 */
1070
		spin_lock(&workqueue_lock);
1071
		list_add(&wq->list, &workqueues);
1072
		spin_unlock(&workqueue_lock);
1073 1074 1075 1076 1077 1078
		/*
		 * We must initialize cwqs for each possible cpu even if we
		 * are going to call destroy_workqueue() finally. Otherwise
		 * cpu_up() can hit the uninitialized cwq once we drop the
		 * lock.
		 */
1079 1080 1081 1082 1083
		for_each_possible_cpu(cpu) {
			cwq = init_cpu_workqueue(wq, cpu);
			if (err || !cpu_online(cpu))
				continue;
			err = create_workqueue_thread(cwq, cpu);
1084
			start_workqueue_thread(cwq, cpu);
L
Linus Torvalds 已提交
1085
		}
1086
		cpu_maps_update_done();
1087 1088 1089 1090 1091 1092 1093
	}

	if (err) {
		destroy_workqueue(wq);
		wq = NULL;
	}
	return wq;
T
Tejun Heo 已提交
1094 1095 1096 1097 1098 1099
err:
	if (wq) {
		free_percpu(wq->cpu_wq);
		kfree(wq);
	}
	return NULL;
1100
}
1101
EXPORT_SYMBOL_GPL(__create_workqueue_key);
L
Linus Torvalds 已提交
1102

1103
static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
1104
{
1105
	/*
1106 1107
	 * Our caller is either destroy_workqueue() or CPU_POST_DEAD,
	 * cpu_add_remove_lock protects cwq->thread.
1108 1109 1110
	 */
	if (cwq->thread == NULL)
		return;
1111

1112 1113
	lock_map_acquire(&cwq->wq->lockdep_map);
	lock_map_release(&cwq->wq->lockdep_map);
1114

O
Oleg Nesterov 已提交
1115
	flush_cpu_workqueue(cwq);
1116
	/*
1117
	 * If the caller is CPU_POST_DEAD and cwq->worklist was not empty,
O
Oleg Nesterov 已提交
1118 1119 1120
	 * a concurrent flush_workqueue() can insert a barrier after us.
	 * However, in that case run_workqueue() won't return and check
	 * kthread_should_stop() until it flushes all work_struct's.
1121 1122 1123 1124 1125
	 * When ->worklist becomes empty it is safe to exit because no
	 * more work_structs can be queued on this cwq: flush_workqueue
	 * checks list_empty(), and a "normal" queue_work() can't use
	 * a dead CPU.
	 */
1126
	trace_workqueue_destruction(cwq->thread);
1127 1128
	kthread_stop(cwq->thread);
	cwq->thread = NULL;
1129 1130 1131 1132 1133 1134 1135 1136 1137 1138
}

/**
 * destroy_workqueue - safely terminate a workqueue
 * @wq: target workqueue
 *
 * Safely destroy a workqueue. All work currently pending will be done first.
 */
void destroy_workqueue(struct workqueue_struct *wq)
{
1139
	const struct cpumask *cpu_map = wq_cpu_map(wq);
1140
	int cpu;
1141

1142
	cpu_maps_update_begin();
1143
	spin_lock(&workqueue_lock);
1144
	list_del(&wq->list);
1145
	spin_unlock(&workqueue_lock);
1146

1147
	for_each_cpu(cpu, cpu_map)
1148
		cleanup_workqueue_thread(per_cpu_ptr(wq->cpu_wq, cpu));
1149
 	cpu_maps_update_done();
1150

1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162
	free_percpu(wq->cpu_wq);
	kfree(wq);
}
EXPORT_SYMBOL_GPL(destroy_workqueue);

static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
						unsigned long action,
						void *hcpu)
{
	unsigned int cpu = (unsigned long)hcpu;
	struct cpu_workqueue_struct *cwq;
	struct workqueue_struct *wq;
1163
	int err = 0;
1164

1165 1166
	action &= ~CPU_TASKS_FROZEN;

1167 1168
	switch (action) {
	case CPU_UP_PREPARE:
1169
		cpumask_set_cpu(cpu, cpu_populated_map);
1170
	}
1171
undo:
1172 1173 1174 1175 1176
	list_for_each_entry(wq, &workqueues, list) {
		cwq = per_cpu_ptr(wq->cpu_wq, cpu);

		switch (action) {
		case CPU_UP_PREPARE:
1177 1178
			err = create_workqueue_thread(cwq, cpu);
			if (!err)
1179
				break;
1180 1181
			printk(KERN_ERR "workqueue [%s] for %i failed\n",
				wq->name, cpu);
1182
			action = CPU_UP_CANCELED;
1183
			err = -ENOMEM;
1184
			goto undo;
1185 1186

		case CPU_ONLINE:
1187
			start_workqueue_thread(cwq, cpu);
1188 1189 1190
			break;

		case CPU_UP_CANCELED:
1191
			start_workqueue_thread(cwq, -1);
1192
		case CPU_POST_DEAD:
1193
			cleanup_workqueue_thread(cwq);
1194 1195
			break;
		}
L
Linus Torvalds 已提交
1196 1197
	}

1198 1199
	switch (action) {
	case CPU_UP_CANCELED:
1200
	case CPU_POST_DEAD:
1201
		cpumask_clear_cpu(cpu, cpu_populated_map);
1202 1203
	}

1204
	return notifier_from_errno(err);
L
Linus Torvalds 已提交
1205 1206
}

1207
#ifdef CONFIG_SMP
1208

1209
struct work_for_cpu {
1210
	struct completion completion;
1211 1212 1213 1214 1215
	long (*fn)(void *);
	void *arg;
	long ret;
};

1216
static int do_work_for_cpu(void *_wfc)
1217
{
1218
	struct work_for_cpu *wfc = _wfc;
1219
	wfc->ret = wfc->fn(wfc->arg);
1220 1221
	complete(&wfc->completion);
	return 0;
1222 1223 1224 1225 1226 1227 1228 1229
}

/**
 * work_on_cpu - run a function in user context on a particular cpu
 * @cpu: the cpu to run on
 * @fn: the function to run
 * @arg: the function arg
 *
1230 1231
 * This will return the value @fn returns.
 * It is up to the caller to ensure that the cpu doesn't go offline.
1232
 * The caller must not hold any locks which would prevent @fn from completing.
1233 1234 1235
 */
long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
{
1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248
	struct task_struct *sub_thread;
	struct work_for_cpu wfc = {
		.completion = COMPLETION_INITIALIZER_ONSTACK(wfc.completion),
		.fn = fn,
		.arg = arg,
	};

	sub_thread = kthread_create(do_work_for_cpu, &wfc, "work_for_cpu");
	if (IS_ERR(sub_thread))
		return PTR_ERR(sub_thread);
	kthread_bind(sub_thread, cpu);
	wake_up_process(sub_thread);
	wait_for_completion(&wfc.completion);
1249 1250 1251 1252 1253
	return wfc.ret;
}
EXPORT_SYMBOL_GPL(work_on_cpu);
#endif /* CONFIG_SMP */

1254
void __init init_workqueues(void)
L
Linus Torvalds 已提交
1255
{
1256 1257 1258 1259 1260
	alloc_cpumask_var(&cpu_populated_map, GFP_KERNEL);

	cpumask_copy(cpu_populated_map, cpu_online_mask);
	singlethread_cpu = cpumask_first(cpu_possible_mask);
	cpu_singlethread_map = cpumask_of(singlethread_cpu);
L
Linus Torvalds 已提交
1261 1262 1263 1264
	hotcpu_notifier(workqueue_cpu_callback, 0);
	keventd_wq = create_workqueue("events");
	BUG_ON(!keventd_wq);
}