workqueue.c 30.1 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11
/*
 * linux/kernel/workqueue.c
 *
 * Generic mechanism for defining kernel helper threads for running
 * arbitrary tasks in process context.
 *
 * Started by Ingo Molnar, Copyright (C) 2002
 *
 * Derived from the taskqueue/keventd code by:
 *
 *   David Woodhouse <dwmw2@infradead.org>
12
 *   Andrew Morton
L
Linus Torvalds 已提交
13 14
 *   Kai Petzke <wpp@marie.physik.tu-berlin.de>
 *   Theodore Ts'o <tytso@mit.edu>
15
 *
C
Christoph Lameter 已提交
16
 * Made to use alloc_percpu by Christoph Lameter.
L
Linus Torvalds 已提交
17 18 19 20 21 22 23 24 25 26 27 28 29
 */

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/init.h>
#include <linux/signal.h>
#include <linux/completion.h>
#include <linux/workqueue.h>
#include <linux/slab.h>
#include <linux/cpu.h>
#include <linux/notifier.h>
#include <linux/kthread.h>
30
#include <linux/hardirq.h>
31
#include <linux/mempolicy.h>
32
#include <linux/freezer.h>
33 34
#include <linux/kallsyms.h>
#include <linux/debug_locks.h>
35
#include <linux/lockdep.h>
36 37
#define CREATE_TRACE_POINTS
#include <trace/events/workqueue.h>
L
Linus Torvalds 已提交
38 39

/*
40 41
 * The per-CPU workqueue (if single thread, we always use the first
 * possible cpu).
L
Linus Torvalds 已提交
42 43 44 45 46 47 48
 */
struct cpu_workqueue_struct {

	spinlock_t lock;

	struct list_head worklist;
	wait_queue_head_t more_work;
49
	struct work_struct *current_work;
L
Linus Torvalds 已提交
50 51

	struct workqueue_struct *wq;
52
	struct task_struct *thread;
L
Linus Torvalds 已提交
53 54 55 56 57 58 59
} ____cacheline_aligned;

/*
 * The externally visible workqueue abstraction is an array of
 * per-CPU workqueues:
 */
struct workqueue_struct {
60
	struct cpu_workqueue_struct *cpu_wq;
61
	struct list_head list;
L
Linus Torvalds 已提交
62
	const char *name;
63
	int singlethread;
64
	int freezeable;		/* Freeze threads during suspend */
65
	int rt;
66 67 68
#ifdef CONFIG_LOCKDEP
	struct lockdep_map lockdep_map;
#endif
L
Linus Torvalds 已提交
69 70
};

71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
#ifdef CONFIG_DEBUG_OBJECTS_WORK

static struct debug_obj_descr work_debug_descr;

/*
 * fixup_init is called when:
 * - an active object is initialized
 */
static int work_fixup_init(void *addr, enum debug_obj_state state)
{
	struct work_struct *work = addr;

	switch (state) {
	case ODEBUG_STATE_ACTIVE:
		cancel_work_sync(work);
		debug_object_init(work, &work_debug_descr);
		return 1;
	default:
		return 0;
	}
}

/*
 * fixup_activate is called when:
 * - an active object is activated
 * - an unknown object is activated (might be a statically initialized object)
 */
static int work_fixup_activate(void *addr, enum debug_obj_state state)
{
	struct work_struct *work = addr;

	switch (state) {

	case ODEBUG_STATE_NOTAVAILABLE:
		/*
		 * This is not really a fixup. The work struct was
		 * statically initialized. We just make sure that it
		 * is tracked in the object tracker.
		 */
		if (test_bit(WORK_STRUCT_STATIC, work_data_bits(work))) {
			debug_object_init(work, &work_debug_descr);
			debug_object_activate(work, &work_debug_descr);
			return 0;
		}
		WARN_ON_ONCE(1);
		return 0;

	case ODEBUG_STATE_ACTIVE:
		WARN_ON(1);

	default:
		return 0;
	}
}

/*
 * fixup_free is called when:
 * - an active object is freed
 */
static int work_fixup_free(void *addr, enum debug_obj_state state)
{
	struct work_struct *work = addr;

	switch (state) {
	case ODEBUG_STATE_ACTIVE:
		cancel_work_sync(work);
		debug_object_free(work, &work_debug_descr);
		return 1;
	default:
		return 0;
	}
}

static struct debug_obj_descr work_debug_descr = {
	.name		= "work_struct",
	.fixup_init	= work_fixup_init,
	.fixup_activate	= work_fixup_activate,
	.fixup_free	= work_fixup_free,
};

static inline void debug_work_activate(struct work_struct *work)
{
	debug_object_activate(work, &work_debug_descr);
}

static inline void debug_work_deactivate(struct work_struct *work)
{
	debug_object_deactivate(work, &work_debug_descr);
}

void __init_work(struct work_struct *work, int onstack)
{
	if (onstack)
		debug_object_init_on_stack(work, &work_debug_descr);
	else
		debug_object_init(work, &work_debug_descr);
}
EXPORT_SYMBOL_GPL(__init_work);

void destroy_work_on_stack(struct work_struct *work)
{
	debug_object_free(work, &work_debug_descr);
}
EXPORT_SYMBOL_GPL(destroy_work_on_stack);

#else
static inline void debug_work_activate(struct work_struct *work) { }
static inline void debug_work_deactivate(struct work_struct *work) { }
#endif

181 182
/* Serializes the accesses to the list of workqueues. */
static DEFINE_SPINLOCK(workqueue_lock);
L
Linus Torvalds 已提交
183 184
static LIST_HEAD(workqueues);

185
static int singlethread_cpu __read_mostly;
186
static const struct cpumask *cpu_singlethread_map __read_mostly;
187 188 189 190 191 192 193
/*
 * _cpu_down() first removes CPU from cpu_online_map, then CPU_DEAD
 * flushes cwq->worklist. This means that flush_workqueue/wait_on_work
 * which comes in between can't use for_each_online_cpu(). We could
 * use cpu_possible_map, the cpumask below is more a documentation
 * than optimization.
 */
194
static cpumask_var_t cpu_populated_map __read_mostly;
195

L
Linus Torvalds 已提交
196
/* If it's single threaded, it isn't in the list of workqueues. */
197
static inline int is_wq_single_threaded(struct workqueue_struct *wq)
L
Linus Torvalds 已提交
198
{
199
	return wq->singlethread;
L
Linus Torvalds 已提交
200 201
}

202
static const struct cpumask *wq_cpu_map(struct workqueue_struct *wq)
203
{
204
	return is_wq_single_threaded(wq)
205
		? cpu_singlethread_map : cpu_populated_map;
206 207
}

208 209 210
static
struct cpu_workqueue_struct *wq_per_cpu(struct workqueue_struct *wq, int cpu)
{
211
	if (unlikely(is_wq_single_threaded(wq)))
212 213 214 215
		cpu = singlethread_cpu;
	return per_cpu_ptr(wq->cpu_wq, cpu);
}

216 217 218 219
/*
 * Set the workqueue on which a work item is to be run
 * - Must *only* be called if the pending flag is set
 */
220 221
static inline void set_wq_data(struct work_struct *work,
				struct cpu_workqueue_struct *cwq)
222
{
223 224 225
	unsigned long new;

	BUG_ON(!work_pending(work));
226

227
	new = (unsigned long) cwq | (1UL << WORK_STRUCT_PENDING);
228 229
	new |= WORK_STRUCT_FLAG_MASK & *work_data_bits(work);
	atomic_long_set(&work->data, new);
230 231
}

232 233 234 235 236 237 238 239 240 241
/*
 * Clear WORK_STRUCT_PENDING and the workqueue on which it was queued.
 */
static inline void clear_wq_data(struct work_struct *work)
{
	unsigned long flags = *work_data_bits(work) &
				(1UL << WORK_STRUCT_STATIC);
	atomic_long_set(&work->data, flags);
}

242 243
static inline
struct cpu_workqueue_struct *get_wq_data(struct work_struct *work)
244
{
245
	return (void *) (atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK);
246 247
}

O
Oleg Nesterov 已提交
248
static void insert_work(struct cpu_workqueue_struct *cwq,
249
			struct work_struct *work, struct list_head *head)
O
Oleg Nesterov 已提交
250
{
251 252
	trace_workqueue_insertion(cwq->thread, work);

O
Oleg Nesterov 已提交
253
	set_wq_data(work, cwq);
254 255 256 257 258
	/*
	 * Ensure that we get the right work->data if we see the
	 * result of list_add() below, see try_to_grab_pending().
	 */
	smp_wmb();
259
	list_add_tail(&work->entry, head);
O
Oleg Nesterov 已提交
260 261 262
	wake_up(&cwq->more_work);
}

L
Linus Torvalds 已提交
263 264 265 266 267
static void __queue_work(struct cpu_workqueue_struct *cwq,
			 struct work_struct *work)
{
	unsigned long flags;

268
	debug_work_activate(work);
L
Linus Torvalds 已提交
269
	spin_lock_irqsave(&cwq->lock, flags);
270
	insert_work(cwq, work, &cwq->worklist);
L
Linus Torvalds 已提交
271 272 273
	spin_unlock_irqrestore(&cwq->lock, flags);
}

274 275 276 277 278
/**
 * queue_work - queue work on a workqueue
 * @wq: workqueue to use
 * @work: work to queue
 *
A
Alan Stern 已提交
279
 * Returns 0 if @work was already on a queue, non-zero otherwise.
L
Linus Torvalds 已提交
280
 *
281 282
 * We queue the work to the CPU on which it was submitted, but if the CPU dies
 * it can be processed by another CPU.
L
Linus Torvalds 已提交
283
 */
284
int queue_work(struct workqueue_struct *wq, struct work_struct *work)
L
Linus Torvalds 已提交
285
{
286 287 288 289 290
	int ret;

	ret = queue_work_on(get_cpu(), wq, work);
	put_cpu();

L
Linus Torvalds 已提交
291 292
	return ret;
}
293
EXPORT_SYMBOL_GPL(queue_work);
L
Linus Torvalds 已提交
294

295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319
/**
 * queue_work_on - queue work on specific cpu
 * @cpu: CPU number to execute work on
 * @wq: workqueue to use
 * @work: work to queue
 *
 * Returns 0 if @work was already on a queue, non-zero otherwise.
 *
 * We queue the work to a specific CPU, the caller must ensure it
 * can't go away.
 */
int
queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work)
{
	int ret = 0;

	if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
		BUG_ON(!list_empty(&work->entry));
		__queue_work(wq_per_cpu(wq, cpu), work);
		ret = 1;
	}
	return ret;
}
EXPORT_SYMBOL_GPL(queue_work_on);

320
static void delayed_work_timer_fn(unsigned long __data)
L
Linus Torvalds 已提交
321
{
322
	struct delayed_work *dwork = (struct delayed_work *)__data;
323 324
	struct cpu_workqueue_struct *cwq = get_wq_data(&dwork->work);
	struct workqueue_struct *wq = cwq->wq;
L
Linus Torvalds 已提交
325

326
	__queue_work(wq_per_cpu(wq, smp_processor_id()), &dwork->work);
L
Linus Torvalds 已提交
327 328
}

329 330 331
/**
 * queue_delayed_work - queue work on a workqueue after delay
 * @wq: workqueue to use
332
 * @dwork: delayable work to queue
333 334
 * @delay: number of jiffies to wait before queueing
 *
A
Alan Stern 已提交
335
 * Returns 0 if @work was already on a queue, non-zero otherwise.
336
 */
337
int queue_delayed_work(struct workqueue_struct *wq,
338
			struct delayed_work *dwork, unsigned long delay)
L
Linus Torvalds 已提交
339
{
340
	if (delay == 0)
341
		return queue_work(wq, &dwork->work);
L
Linus Torvalds 已提交
342

343
	return queue_delayed_work_on(-1, wq, dwork, delay);
L
Linus Torvalds 已提交
344
}
345
EXPORT_SYMBOL_GPL(queue_delayed_work);
L
Linus Torvalds 已提交
346

347 348 349 350
/**
 * queue_delayed_work_on - queue work on specific CPU after delay
 * @cpu: CPU number to execute work on
 * @wq: workqueue to use
351
 * @dwork: work to queue
352 353
 * @delay: number of jiffies to wait before queueing
 *
A
Alan Stern 已提交
354
 * Returns 0 if @work was already on a queue, non-zero otherwise.
355
 */
356
int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
357
			struct delayed_work *dwork, unsigned long delay)
358 359
{
	int ret = 0;
360 361
	struct timer_list *timer = &dwork->timer;
	struct work_struct *work = &dwork->work;
362

363
	if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
364 365 366
		BUG_ON(timer_pending(timer));
		BUG_ON(!list_empty(&work->entry));

367 368
		timer_stats_timer_set_start_info(&dwork->timer);

369
		/* This stores cwq for the moment, for the timer_fn */
370
		set_wq_data(work, wq_per_cpu(wq, raw_smp_processor_id()));
371
		timer->expires = jiffies + delay;
372
		timer->data = (unsigned long)dwork;
373
		timer->function = delayed_work_timer_fn;
374 375 376 377 378

		if (unlikely(cpu >= 0))
			add_timer_on(timer, cpu);
		else
			add_timer(timer);
379 380 381 382
		ret = 1;
	}
	return ret;
}
383
EXPORT_SYMBOL_GPL(queue_delayed_work_on);
L
Linus Torvalds 已提交
384

385
static void run_workqueue(struct cpu_workqueue_struct *cwq)
L
Linus Torvalds 已提交
386
{
387
	spin_lock_irq(&cwq->lock);
L
Linus Torvalds 已提交
388 389 390
	while (!list_empty(&cwq->worklist)) {
		struct work_struct *work = list_entry(cwq->worklist.next,
						struct work_struct, entry);
391
		work_func_t f = work->func;
392 393 394 395 396 397 398 399 400 401 402
#ifdef CONFIG_LOCKDEP
		/*
		 * It is permissible to free the struct work_struct
		 * from inside the function that is called from it,
		 * this we need to take into account for lockdep too.
		 * To avoid bogus "held lock freed" warnings as well
		 * as problems when looking into work->lockdep_map,
		 * make a copy and use that here.
		 */
		struct lockdep_map lockdep_map = work->lockdep_map;
#endif
403
		trace_workqueue_execution(cwq->thread, work);
404
		debug_work_deactivate(work);
O
Oleg Nesterov 已提交
405
		cwq->current_work = work;
L
Linus Torvalds 已提交
406
		list_del_init(cwq->worklist.next);
407
		spin_unlock_irq(&cwq->lock);
L
Linus Torvalds 已提交
408

409
		BUG_ON(get_wq_data(work) != cwq);
O
Oleg Nesterov 已提交
410
		work_clear_pending(work);
411 412
		lock_map_acquire(&cwq->wq->lockdep_map);
		lock_map_acquire(&lockdep_map);
413
		f(work);
414 415
		lock_map_release(&lockdep_map);
		lock_map_release(&cwq->wq->lockdep_map);
L
Linus Torvalds 已提交
416

417 418 419 420
		if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
			printk(KERN_ERR "BUG: workqueue leaked lock or atomic: "
					"%s/0x%08x/%d\n",
					current->comm, preempt_count(),
421
				       	task_pid_nr(current));
422 423 424 425 426 427
			printk(KERN_ERR "    last function: ");
			print_symbol("%s\n", (unsigned long)f);
			debug_show_held_locks(current);
			dump_stack();
		}

428
		spin_lock_irq(&cwq->lock);
O
Oleg Nesterov 已提交
429
		cwq->current_work = NULL;
L
Linus Torvalds 已提交
430
	}
431
	spin_unlock_irq(&cwq->lock);
L
Linus Torvalds 已提交
432 433 434 435 436
}

static int worker_thread(void *__cwq)
{
	struct cpu_workqueue_struct *cwq = __cwq;
437
	DEFINE_WAIT(wait);
L
Linus Torvalds 已提交
438

439 440
	if (cwq->wq->freezeable)
		set_freezable();
L
Linus Torvalds 已提交
441

442 443
	for (;;) {
		prepare_to_wait(&cwq->more_work, &wait, TASK_INTERRUPTIBLE);
444 445 446
		if (!freezing(current) &&
		    !kthread_should_stop() &&
		    list_empty(&cwq->worklist))
L
Linus Torvalds 已提交
447
			schedule();
448 449
		finish_wait(&cwq->more_work, &wait);

450 451
		try_to_freeze();

452
		if (kthread_should_stop())
453
			break;
L
Linus Torvalds 已提交
454

455
		run_workqueue(cwq);
L
Linus Torvalds 已提交
456
	}
457

L
Linus Torvalds 已提交
458 459 460
	return 0;
}

O
Oleg Nesterov 已提交
461 462 463 464 465 466 467 468 469 470 471
struct wq_barrier {
	struct work_struct	work;
	struct completion	done;
};

static void wq_barrier_func(struct work_struct *work)
{
	struct wq_barrier *barr = container_of(work, struct wq_barrier, work);
	complete(&barr->done);
}

472
static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
473
			struct wq_barrier *barr, struct list_head *head)
O
Oleg Nesterov 已提交
474
{
475 476 477 478 479 480 481
	/*
	 * debugobject calls are safe here even with cwq->lock locked
	 * as we know for sure that this will not trigger any of the
	 * checks and call back into the fixup functions where we
	 * might deadlock.
	 */
	INIT_WORK_ON_STACK(&barr->work, wq_barrier_func);
O
Oleg Nesterov 已提交
482 483 484
	__set_bit(WORK_STRUCT_PENDING, work_data_bits(&barr->work));

	init_completion(&barr->done);
485

486
	debug_work_activate(&barr->work);
487
	insert_work(cwq, &barr->work, head);
O
Oleg Nesterov 已提交
488 489
}

490
static int flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
L
Linus Torvalds 已提交
491
{
492 493
	int active = 0;
	struct wq_barrier barr;
L
Linus Torvalds 已提交
494

495
	WARN_ON(cwq->thread == current);
L
Linus Torvalds 已提交
496

497 498 499 500
	spin_lock_irq(&cwq->lock);
	if (!list_empty(&cwq->worklist) || cwq->current_work != NULL) {
		insert_wq_barrier(cwq, &barr, &cwq->worklist);
		active = 1;
L
Linus Torvalds 已提交
501
	}
502 503
	spin_unlock_irq(&cwq->lock);

504
	if (active) {
505
		wait_for_completion(&barr.done);
506 507
		destroy_work_on_stack(&barr.work);
	}
508 509

	return active;
L
Linus Torvalds 已提交
510 511
}

512
/**
L
Linus Torvalds 已提交
513
 * flush_workqueue - ensure that any scheduled work has run to completion.
514
 * @wq: workqueue to flush
L
Linus Torvalds 已提交
515 516 517 518
 *
 * Forces execution of the workqueue and blocks until its completion.
 * This is typically used in driver shutdown handlers.
 *
O
Oleg Nesterov 已提交
519 520
 * We sleep until all works which were queued on entry have been handled,
 * but we are not livelocked by new incoming ones.
L
Linus Torvalds 已提交
521 522 523 524
 *
 * This function used to run the workqueues itself.  Now we just wait for the
 * helper threads to do it.
 */
525
void flush_workqueue(struct workqueue_struct *wq)
L
Linus Torvalds 已提交
526
{
527
	const struct cpumask *cpu_map = wq_cpu_map(wq);
528
	int cpu;
L
Linus Torvalds 已提交
529

530
	might_sleep();
531 532
	lock_map_acquire(&wq->lockdep_map);
	lock_map_release(&wq->lockdep_map);
533
	for_each_cpu(cpu, cpu_map)
534
		flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, cpu));
L
Linus Torvalds 已提交
535
}
536
EXPORT_SYMBOL_GPL(flush_workqueue);
L
Linus Torvalds 已提交
537

538 539 540 541
/**
 * flush_work - block until a work_struct's callback has terminated
 * @work: the work which is to be flushed
 *
542 543
 * Returns false if @work has already terminated.
 *
544 545 546 547 548 549 550 551 552 553 554 555 556 557 558
 * It is expected that, prior to calling flush_work(), the caller has
 * arranged for the work to not be requeued, otherwise it doesn't make
 * sense to use this function.
 */
int flush_work(struct work_struct *work)
{
	struct cpu_workqueue_struct *cwq;
	struct list_head *prev;
	struct wq_barrier barr;

	might_sleep();
	cwq = get_wq_data(work);
	if (!cwq)
		return 0;

559 560
	lock_map_acquire(&cwq->wq->lockdep_map);
	lock_map_release(&cwq->wq->lockdep_map);
561

562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584
	prev = NULL;
	spin_lock_irq(&cwq->lock);
	if (!list_empty(&work->entry)) {
		/*
		 * See the comment near try_to_grab_pending()->smp_rmb().
		 * If it was re-queued under us we are not going to wait.
		 */
		smp_rmb();
		if (unlikely(cwq != get_wq_data(work)))
			goto out;
		prev = &work->entry;
	} else {
		if (cwq->current_work != work)
			goto out;
		prev = &cwq->worklist;
	}
	insert_wq_barrier(cwq, &barr, prev->next);
out:
	spin_unlock_irq(&cwq->lock);
	if (!prev)
		return 0;

	wait_for_completion(&barr.done);
585
	destroy_work_on_stack(&barr.work);
586 587 588 589
	return 1;
}
EXPORT_SYMBOL_GPL(flush_work);

590
/*
591
 * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit,
592 593 594 595 596
 * so this work can't be re-armed in any way.
 */
static int try_to_grab_pending(struct work_struct *work)
{
	struct cpu_workqueue_struct *cwq;
597
	int ret = -1;
598 599

	if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work)))
600
		return 0;
601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619

	/*
	 * The queueing is in progress, or it is already queued. Try to
	 * steal it from ->worklist without clearing WORK_STRUCT_PENDING.
	 */

	cwq = get_wq_data(work);
	if (!cwq)
		return ret;

	spin_lock_irq(&cwq->lock);
	if (!list_empty(&work->entry)) {
		/*
		 * This work is queued, but perhaps we locked the wrong cwq.
		 * In that case we must see the new value after rmb(), see
		 * insert_work()->wmb().
		 */
		smp_rmb();
		if (cwq == get_wq_data(work)) {
620
			debug_work_deactivate(work);
621 622 623 624 625 626 627 628 629 630
			list_del_init(&work->entry);
			ret = 1;
		}
	}
	spin_unlock_irq(&cwq->lock);

	return ret;
}

static void wait_on_cpu_work(struct cpu_workqueue_struct *cwq,
O
Oleg Nesterov 已提交
631 632 633 634 635 636 637
				struct work_struct *work)
{
	struct wq_barrier barr;
	int running = 0;

	spin_lock_irq(&cwq->lock);
	if (unlikely(cwq->current_work == work)) {
638
		insert_wq_barrier(cwq, &barr, cwq->worklist.next);
O
Oleg Nesterov 已提交
639 640 641 642
		running = 1;
	}
	spin_unlock_irq(&cwq->lock);

643
	if (unlikely(running)) {
O
Oleg Nesterov 已提交
644
		wait_for_completion(&barr.done);
645 646
		destroy_work_on_stack(&barr.work);
	}
O
Oleg Nesterov 已提交
647 648
}

649
static void wait_on_work(struct work_struct *work)
O
Oleg Nesterov 已提交
650 651
{
	struct cpu_workqueue_struct *cwq;
652
	struct workqueue_struct *wq;
653
	const struct cpumask *cpu_map;
654
	int cpu;
O
Oleg Nesterov 已提交
655

656 657
	might_sleep();

658 659
	lock_map_acquire(&work->lockdep_map);
	lock_map_release(&work->lockdep_map);
660

O
Oleg Nesterov 已提交
661 662
	cwq = get_wq_data(work);
	if (!cwq)
663
		return;
O
Oleg Nesterov 已提交
664

665 666 667
	wq = cwq->wq;
	cpu_map = wq_cpu_map(wq);

668
	for_each_cpu(cpu, cpu_map)
669 670 671
		wait_on_cpu_work(per_cpu_ptr(wq->cpu_wq, cpu), work);
}

672 673 674 675 676 677 678 679 680 681 682 683
static int __cancel_work_timer(struct work_struct *work,
				struct timer_list* timer)
{
	int ret;

	do {
		ret = (timer && likely(del_timer(timer)));
		if (!ret)
			ret = try_to_grab_pending(work);
		wait_on_work(work);
	} while (unlikely(ret < 0));

684
	clear_wq_data(work);
685 686 687
	return ret;
}

688 689 690 691
/**
 * cancel_work_sync - block until a work_struct's callback has terminated
 * @work: the work which is to be flushed
 *
692 693
 * Returns true if @work was pending.
 *
694 695 696 697 698 699 700 701 702 703 704 705 706 707 708
 * cancel_work_sync() will cancel the work if it is queued. If the work's
 * callback appears to be running, cancel_work_sync() will block until it
 * has completed.
 *
 * It is possible to use this function if the work re-queues itself. It can
 * cancel the work even if it migrates to another workqueue, however in that
 * case it only guarantees that work->func() has completed on the last queued
 * workqueue.
 *
 * cancel_work_sync(&delayed_work->work) should be used only if ->timer is not
 * pending, otherwise it goes into a busy-wait loop until the timer expires.
 *
 * The caller must ensure that workqueue_struct on which this work was last
 * queued can't be destroyed before this function returns.
 */
709
int cancel_work_sync(struct work_struct *work)
710
{
711
	return __cancel_work_timer(work, NULL);
O
Oleg Nesterov 已提交
712
}
713
EXPORT_SYMBOL_GPL(cancel_work_sync);
O
Oleg Nesterov 已提交
714

715
/**
716
 * cancel_delayed_work_sync - reliably kill off a delayed work.
717 718
 * @dwork: the delayed work struct
 *
719 720
 * Returns true if @dwork was pending.
 *
721 722 723
 * It is possible to use this function if @dwork rearms itself via queue_work()
 * or queue_delayed_work(). See also the comment for cancel_work_sync().
 */
724
int cancel_delayed_work_sync(struct delayed_work *dwork)
725
{
726
	return __cancel_work_timer(&dwork->work, &dwork->timer);
727
}
728
EXPORT_SYMBOL(cancel_delayed_work_sync);
L
Linus Torvalds 已提交
729

730
static struct workqueue_struct *keventd_wq __read_mostly;
L
Linus Torvalds 已提交
731

732 733 734 735
/**
 * schedule_work - put work task in global workqueue
 * @work: job to be done
 *
736 737 738 739 740 741
 * Returns zero if @work was already on the kernel-global workqueue and
 * non-zero otherwise.
 *
 * This puts a job in the kernel-global workqueue if it was not already
 * queued and leaves it in the same position on the kernel-global
 * workqueue otherwise.
742
 */
743
int schedule_work(struct work_struct *work)
L
Linus Torvalds 已提交
744 745 746
{
	return queue_work(keventd_wq, work);
}
747
EXPORT_SYMBOL(schedule_work);
L
Linus Torvalds 已提交
748

749 750 751 752 753 754 755 756 757 758 759 760 761
/*
 * schedule_work_on - put work task on a specific cpu
 * @cpu: cpu to put the work task on
 * @work: job to be done
 *
 * This puts a job on a specific cpu
 */
int schedule_work_on(int cpu, struct work_struct *work)
{
	return queue_work_on(cpu, keventd_wq, work);
}
EXPORT_SYMBOL(schedule_work_on);

762 763
/**
 * schedule_delayed_work - put work task in global workqueue after delay
764 765
 * @dwork: job to be done
 * @delay: number of jiffies to wait or 0 for immediate execution
766 767 768 769
 *
 * After waiting for a given time this puts a job in the kernel-global
 * workqueue.
 */
770
int schedule_delayed_work(struct delayed_work *dwork,
771
					unsigned long delay)
L
Linus Torvalds 已提交
772
{
773
	return queue_delayed_work(keventd_wq, dwork, delay);
L
Linus Torvalds 已提交
774
}
775
EXPORT_SYMBOL(schedule_delayed_work);
L
Linus Torvalds 已提交
776

777 778 779 780 781 782 783 784 785 786
/**
 * flush_delayed_work - block until a dwork_struct's callback has terminated
 * @dwork: the delayed work which is to be flushed
 *
 * Any timeout is cancelled, and any pending work is run immediately.
 */
void flush_delayed_work(struct delayed_work *dwork)
{
	if (del_timer_sync(&dwork->timer)) {
		struct cpu_workqueue_struct *cwq;
787
		cwq = wq_per_cpu(get_wq_data(&dwork->work)->wq, get_cpu());
788 789 790 791 792 793 794
		__queue_work(cwq, &dwork->work);
		put_cpu();
	}
	flush_work(&dwork->work);
}
EXPORT_SYMBOL(flush_delayed_work);

795 796 797
/**
 * schedule_delayed_work_on - queue work in global workqueue on CPU after delay
 * @cpu: cpu to use
798
 * @dwork: job to be done
799 800 801 802 803
 * @delay: number of jiffies to wait
 *
 * After waiting for a given time this puts a job in the kernel-global
 * workqueue on the specified CPU.
 */
L
Linus Torvalds 已提交
804
int schedule_delayed_work_on(int cpu,
805
			struct delayed_work *dwork, unsigned long delay)
L
Linus Torvalds 已提交
806
{
807
	return queue_delayed_work_on(cpu, keventd_wq, dwork, delay);
L
Linus Torvalds 已提交
808
}
809
EXPORT_SYMBOL(schedule_delayed_work_on);
L
Linus Torvalds 已提交
810

811 812 813 814 815 816 817 818 819
/**
 * schedule_on_each_cpu - call a function on each online CPU from keventd
 * @func: the function to call
 *
 * Returns zero on success.
 * Returns -ve errno on failure.
 *
 * schedule_on_each_cpu() is very slow.
 */
820
int schedule_on_each_cpu(work_func_t func)
821 822
{
	int cpu;
823
	int orig = -1;
824
	struct work_struct *works;
825

826 827
	works = alloc_percpu(struct work_struct);
	if (!works)
828
		return -ENOMEM;
829

830 831
	get_online_cpus();

832
	/*
833 834 835
	 * When running in keventd don't schedule a work item on
	 * itself.  Can just call directly because the work queue is
	 * already bound.  This also is faster.
836
	 */
837
	if (current_is_keventd())
838 839
		orig = raw_smp_processor_id();

840
	for_each_online_cpu(cpu) {
841 842 843
		struct work_struct *work = per_cpu_ptr(works, cpu);

		INIT_WORK(work, func);
844
		if (cpu != orig)
845
			schedule_work_on(cpu, work);
846
	}
847 848 849 850 851 852
	if (orig >= 0)
		func(per_cpu_ptr(works, orig));

	for_each_online_cpu(cpu)
		flush_work(per_cpu_ptr(works, cpu));

853
	put_online_cpus();
854
	free_percpu(works);
855 856 857
	return 0;
}

858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881
/**
 * flush_scheduled_work - ensure that any scheduled work has run to completion.
 *
 * Forces execution of the kernel-global workqueue and blocks until its
 * completion.
 *
 * Think twice before calling this function!  It's very easy to get into
 * trouble if you don't take great care.  Either of the following situations
 * will lead to deadlock:
 *
 *	One of the work items currently on the workqueue needs to acquire
 *	a lock held by your code or its caller.
 *
 *	Your code is running in the context of a work routine.
 *
 * They will be detected by lockdep when they occur, but the first might not
 * occur very often.  It depends on what work items are on the workqueue and
 * what locks they need, which you have no control over.
 *
 * In most situations flushing the entire workqueue is overkill; you merely
 * need to know that a particular work item isn't queued and isn't running.
 * In such cases you should use cancel_delayed_work_sync() or
 * cancel_work_sync() instead.
 */
L
Linus Torvalds 已提交
882 883 884 885
void flush_scheduled_work(void)
{
	flush_workqueue(keventd_wq);
}
886
EXPORT_SYMBOL(flush_scheduled_work);
L
Linus Torvalds 已提交
887

888 889 890 891 892 893 894 895 896 897 898 899
/**
 * execute_in_process_context - reliably execute the routine with user context
 * @fn:		the function to execute
 * @ew:		guaranteed storage for the execute work structure (must
 *		be available when the work executes)
 *
 * Executes the function immediately if process context is available,
 * otherwise schedules the function for delayed execution.
 *
 * Returns:	0 - function was executed
 *		1 - function was scheduled for execution
 */
900
int execute_in_process_context(work_func_t fn, struct execute_work *ew)
901 902
{
	if (!in_interrupt()) {
903
		fn(&ew->work);
904 905 906
		return 0;
	}

907
	INIT_WORK(&ew->work, fn);
908 909 910 911 912 913
	schedule_work(&ew->work);

	return 1;
}
EXPORT_SYMBOL_GPL(execute_in_process_context);

L
Linus Torvalds 已提交
914 915 916 917 918 919 920 921
int keventd_up(void)
{
	return keventd_wq != NULL;
}

int current_is_keventd(void)
{
	struct cpu_workqueue_struct *cwq;
H
Hugh Dickins 已提交
922
	int cpu = raw_smp_processor_id(); /* preempt-safe: keventd is per-cpu */
L
Linus Torvalds 已提交
923 924 925 926
	int ret = 0;

	BUG_ON(!keventd_wq);

927
	cwq = per_cpu_ptr(keventd_wq->cpu_wq, cpu);
L
Linus Torvalds 已提交
928 929 930 931 932 933 934
	if (current == cwq->thread)
		ret = 1;

	return ret;

}

935 936
static struct cpu_workqueue_struct *
init_cpu_workqueue(struct workqueue_struct *wq, int cpu)
L
Linus Torvalds 已提交
937
{
938
	struct cpu_workqueue_struct *cwq = per_cpu_ptr(wq->cpu_wq, cpu);
L
Linus Torvalds 已提交
939

940 941 942 943 944 945
	cwq->wq = wq;
	spin_lock_init(&cwq->lock);
	INIT_LIST_HEAD(&cwq->worklist);
	init_waitqueue_head(&cwq->more_work);

	return cwq;
L
Linus Torvalds 已提交
946 947
}

948 949
static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
{
950
	struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
951
	struct workqueue_struct *wq = cwq->wq;
952
	const char *fmt = is_wq_single_threaded(wq) ? "%s" : "%s/%d";
953 954 955 956 957 958 959 960 961 962 963 964 965
	struct task_struct *p;

	p = kthread_create(worker_thread, cwq, fmt, wq->name, cpu);
	/*
	 * Nobody can add the work_struct to this cwq,
	 *	if (caller is __create_workqueue)
	 *		nobody should see this wq
	 *	else // caller is CPU_UP_PREPARE
	 *		cpu is not on cpu_online_map
	 * so we can abort safely.
	 */
	if (IS_ERR(p))
		return PTR_ERR(p);
966 967
	if (cwq->wq->rt)
		sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
968 969
	cwq->thread = p;

970 971
	trace_workqueue_creation(cwq->thread, cpu);

972 973 974
	return 0;
}

975 976 977 978 979 980 981 982 983 984 985
static void start_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
{
	struct task_struct *p = cwq->thread;

	if (p != NULL) {
		if (cpu >= 0)
			kthread_bind(p, cpu);
		wake_up_process(p);
	}
}

986 987 988
struct workqueue_struct *__create_workqueue_key(const char *name,
						int singlethread,
						int freezeable,
989
						int rt,
990 991
						struct lock_class_key *key,
						const char *lock_name)
L
Linus Torvalds 已提交
992 993
{
	struct workqueue_struct *wq;
994 995
	struct cpu_workqueue_struct *cwq;
	int err = 0, cpu;
L
Linus Torvalds 已提交
996

997 998 999 1000 1001 1002 1003 1004 1005 1006 1007
	wq = kzalloc(sizeof(*wq), GFP_KERNEL);
	if (!wq)
		return NULL;

	wq->cpu_wq = alloc_percpu(struct cpu_workqueue_struct);
	if (!wq->cpu_wq) {
		kfree(wq);
		return NULL;
	}

	wq->name = name;
1008
	lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
1009
	wq->singlethread = singlethread;
1010
	wq->freezeable = freezeable;
1011
	wq->rt = rt;
1012
	INIT_LIST_HEAD(&wq->list);
1013 1014 1015 1016

	if (singlethread) {
		cwq = init_cpu_workqueue(wq, singlethread_cpu);
		err = create_workqueue_thread(cwq, singlethread_cpu);
1017
		start_workqueue_thread(cwq, -1);
1018
	} else {
1019
		cpu_maps_update_begin();
1020 1021 1022 1023 1024 1025
		/*
		 * We must place this wq on list even if the code below fails.
		 * cpu_down(cpu) can remove cpu from cpu_populated_map before
		 * destroy_workqueue() takes the lock, in that case we leak
		 * cwq[cpu]->thread.
		 */
1026
		spin_lock(&workqueue_lock);
1027
		list_add(&wq->list, &workqueues);
1028
		spin_unlock(&workqueue_lock);
1029 1030 1031 1032 1033 1034
		/*
		 * We must initialize cwqs for each possible cpu even if we
		 * are going to call destroy_workqueue() finally. Otherwise
		 * cpu_up() can hit the uninitialized cwq once we drop the
		 * lock.
		 */
1035 1036 1037 1038 1039
		for_each_possible_cpu(cpu) {
			cwq = init_cpu_workqueue(wq, cpu);
			if (err || !cpu_online(cpu))
				continue;
			err = create_workqueue_thread(cwq, cpu);
1040
			start_workqueue_thread(cwq, cpu);
L
Linus Torvalds 已提交
1041
		}
1042
		cpu_maps_update_done();
1043 1044 1045 1046 1047 1048 1049 1050
	}

	if (err) {
		destroy_workqueue(wq);
		wq = NULL;
	}
	return wq;
}
1051
EXPORT_SYMBOL_GPL(__create_workqueue_key);
L
Linus Torvalds 已提交
1052

1053
static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
1054
{
1055
	/*
1056 1057
	 * Our caller is either destroy_workqueue() or CPU_POST_DEAD,
	 * cpu_add_remove_lock protects cwq->thread.
1058 1059 1060
	 */
	if (cwq->thread == NULL)
		return;
1061

1062 1063
	lock_map_acquire(&cwq->wq->lockdep_map);
	lock_map_release(&cwq->wq->lockdep_map);
1064

O
Oleg Nesterov 已提交
1065
	flush_cpu_workqueue(cwq);
1066
	/*
1067
	 * If the caller is CPU_POST_DEAD and cwq->worklist was not empty,
O
Oleg Nesterov 已提交
1068 1069 1070
	 * a concurrent flush_workqueue() can insert a barrier after us.
	 * However, in that case run_workqueue() won't return and check
	 * kthread_should_stop() until it flushes all work_struct's.
1071 1072 1073 1074 1075
	 * When ->worklist becomes empty it is safe to exit because no
	 * more work_structs can be queued on this cwq: flush_workqueue
	 * checks list_empty(), and a "normal" queue_work() can't use
	 * a dead CPU.
	 */
1076
	trace_workqueue_destruction(cwq->thread);
1077 1078
	kthread_stop(cwq->thread);
	cwq->thread = NULL;
1079 1080 1081 1082 1083 1084 1085 1086 1087 1088
}

/**
 * destroy_workqueue - safely terminate a workqueue
 * @wq: target workqueue
 *
 * Safely destroy a workqueue. All work currently pending will be done first.
 */
void destroy_workqueue(struct workqueue_struct *wq)
{
1089
	const struct cpumask *cpu_map = wq_cpu_map(wq);
1090
	int cpu;
1091

1092
	cpu_maps_update_begin();
1093
	spin_lock(&workqueue_lock);
1094
	list_del(&wq->list);
1095
	spin_unlock(&workqueue_lock);
1096

1097
	for_each_cpu(cpu, cpu_map)
1098
		cleanup_workqueue_thread(per_cpu_ptr(wq->cpu_wq, cpu));
1099
 	cpu_maps_update_done();
1100

1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112
	free_percpu(wq->cpu_wq);
	kfree(wq);
}
EXPORT_SYMBOL_GPL(destroy_workqueue);

static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
						unsigned long action,
						void *hcpu)
{
	unsigned int cpu = (unsigned long)hcpu;
	struct cpu_workqueue_struct *cwq;
	struct workqueue_struct *wq;
1113
	int err = 0;
1114

1115 1116
	action &= ~CPU_TASKS_FROZEN;

1117 1118
	switch (action) {
	case CPU_UP_PREPARE:
1119
		cpumask_set_cpu(cpu, cpu_populated_map);
1120
	}
1121
undo:
1122 1123 1124 1125 1126
	list_for_each_entry(wq, &workqueues, list) {
		cwq = per_cpu_ptr(wq->cpu_wq, cpu);

		switch (action) {
		case CPU_UP_PREPARE:
1127 1128
			err = create_workqueue_thread(cwq, cpu);
			if (!err)
1129
				break;
1130 1131
			printk(KERN_ERR "workqueue [%s] for %i failed\n",
				wq->name, cpu);
1132
			action = CPU_UP_CANCELED;
1133
			err = -ENOMEM;
1134
			goto undo;
1135 1136

		case CPU_ONLINE:
1137
			start_workqueue_thread(cwq, cpu);
1138 1139 1140
			break;

		case CPU_UP_CANCELED:
1141
			start_workqueue_thread(cwq, -1);
1142
		case CPU_POST_DEAD:
1143
			cleanup_workqueue_thread(cwq);
1144 1145
			break;
		}
L
Linus Torvalds 已提交
1146 1147
	}

1148 1149
	switch (action) {
	case CPU_UP_CANCELED:
1150
	case CPU_POST_DEAD:
1151
		cpumask_clear_cpu(cpu, cpu_populated_map);
1152 1153
	}

1154
	return notifier_from_errno(err);
L
Linus Torvalds 已提交
1155 1156
}

1157
#ifdef CONFIG_SMP
1158

1159
struct work_for_cpu {
1160
	struct completion completion;
1161 1162 1163 1164 1165
	long (*fn)(void *);
	void *arg;
	long ret;
};

1166
static int do_work_for_cpu(void *_wfc)
1167
{
1168
	struct work_for_cpu *wfc = _wfc;
1169
	wfc->ret = wfc->fn(wfc->arg);
1170 1171
	complete(&wfc->completion);
	return 0;
1172 1173 1174 1175 1176 1177 1178 1179
}

/**
 * work_on_cpu - run a function in user context on a particular cpu
 * @cpu: the cpu to run on
 * @fn: the function to run
 * @arg: the function arg
 *
1180 1181
 * This will return the value @fn returns.
 * It is up to the caller to ensure that the cpu doesn't go offline.
1182
 * The caller must not hold any locks which would prevent @fn from completing.
1183 1184 1185
 */
long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
{
1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198
	struct task_struct *sub_thread;
	struct work_for_cpu wfc = {
		.completion = COMPLETION_INITIALIZER_ONSTACK(wfc.completion),
		.fn = fn,
		.arg = arg,
	};

	sub_thread = kthread_create(do_work_for_cpu, &wfc, "work_for_cpu");
	if (IS_ERR(sub_thread))
		return PTR_ERR(sub_thread);
	kthread_bind(sub_thread, cpu);
	wake_up_process(sub_thread);
	wait_for_completion(&wfc.completion);
1199 1200 1201 1202 1203
	return wfc.ret;
}
EXPORT_SYMBOL_GPL(work_on_cpu);
#endif /* CONFIG_SMP */

1204
void __init init_workqueues(void)
L
Linus Torvalds 已提交
1205
{
1206 1207 1208 1209 1210
	alloc_cpumask_var(&cpu_populated_map, GFP_KERNEL);

	cpumask_copy(cpu_populated_map, cpu_online_mask);
	singlethread_cpu = cpumask_first(cpu_possible_mask);
	cpu_singlethread_map = cpumask_of(singlethread_cpu);
L
Linus Torvalds 已提交
1211 1212 1213 1214
	hotcpu_notifier(workqueue_cpu_callback, 0);
	keventd_wq = create_workqueue("events");
	BUG_ON(!keventd_wq);
}