workqueue.c 22.9 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
/*
 * linux/kernel/workqueue.c
 *
 * Generic mechanism for defining kernel helper threads for running
 * arbitrary tasks in process context.
 *
 * Started by Ingo Molnar, Copyright (C) 2002
 *
 * Derived from the taskqueue/keventd code by:
 *
 *   David Woodhouse <dwmw2@infradead.org>
 *   Andrew Morton <andrewm@uow.edu.au>
 *   Kai Petzke <wpp@marie.physik.tu-berlin.de>
 *   Theodore Ts'o <tytso@mit.edu>
15 16
 *
 * Made to use alloc_percpu by Christoph Lameter <clameter@sgi.com>.
L
Linus Torvalds 已提交
17 18 19 20 21 22 23 24 25 26 27 28 29
 */

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/init.h>
#include <linux/signal.h>
#include <linux/completion.h>
#include <linux/workqueue.h>
#include <linux/slab.h>
#include <linux/cpu.h>
#include <linux/notifier.h>
#include <linux/kthread.h>
30
#include <linux/hardirq.h>
31
#include <linux/mempolicy.h>
32
#include <linux/freezer.h>
33 34
#include <linux/kallsyms.h>
#include <linux/debug_locks.h>
L
Linus Torvalds 已提交
35 36

/*
37 38
 * The per-CPU workqueue (if single thread, we always use the first
 * possible cpu).
L
Linus Torvalds 已提交
39 40 41 42 43 44 45 46 47
 */
struct cpu_workqueue_struct {

	spinlock_t lock;

	struct list_head worklist;
	wait_queue_head_t more_work;

	struct workqueue_struct *wq;
48
	struct task_struct *thread;
O
Oleg Nesterov 已提交
49
	struct work_struct *current_work;
L
Linus Torvalds 已提交
50 51 52 53 54 55 56 57 58

	int run_depth;		/* Detect run_workqueue() recursion depth */
} ____cacheline_aligned;

/*
 * The externally visible workqueue abstraction is an array of
 * per-CPU workqueues:
 */
struct workqueue_struct {
59
	struct cpu_workqueue_struct *cpu_wq;
L
Linus Torvalds 已提交
60 61
	const char *name;
	struct list_head list; 	/* Empty if single thread */
62
	int freezeable;		/* Freeze threads during suspend */
L
Linus Torvalds 已提交
63 64 65 66
};

/* All the per-cpu workqueues on the system, for hotplug cpu to add/remove
   threads to each one as cpus come/go. */
67
static DEFINE_MUTEX(workqueue_mutex);
L
Linus Torvalds 已提交
68 69
static LIST_HEAD(workqueues);

70 71
static int singlethread_cpu;

L
Linus Torvalds 已提交
72 73 74 75 76 77
/* If it's single threaded, it isn't in the list of workqueues. */
static inline int is_single_threaded(struct workqueue_struct *wq)
{
	return list_empty(&wq->list);
}

78 79 80 81
/*
 * Set the workqueue on which a work item is to be run
 * - Must *only* be called if the pending flag is set
 */
82 83
static inline void set_wq_data(struct work_struct *work, void *wq)
{
84 85 86
	unsigned long new;

	BUG_ON(!work_pending(work));
87 88

	new = (unsigned long) wq | (1UL << WORK_STRUCT_PENDING);
89 90
	new |= WORK_STRUCT_FLAG_MASK & *work_data_bits(work);
	atomic_long_set(&work->data, new);
91 92 93 94
}

static inline void *get_wq_data(struct work_struct *work)
{
95
	return (void *) (atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK);
96 97
}

98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
static int __run_work(struct cpu_workqueue_struct *cwq, struct work_struct *work)
{
	int ret = 0;
	unsigned long flags;

	spin_lock_irqsave(&cwq->lock, flags);
	/*
	 * We need to re-validate the work info after we've gotten
	 * the cpu_workqueue lock. We can run the work now iff:
	 *
	 *  - the wq_data still matches the cpu_workqueue_struct
	 *  - AND the work is still marked pending
	 *  - AND the work is still on a list (which will be this
	 *    workqueue_struct list)
	 *
	 * All these conditions are important, because we
	 * need to protect against the work being run right
	 * now on another CPU (all but the last one might be
	 * true if it's currently running and has not been
	 * released yet, for example).
	 */
	if (get_wq_data(work) == cwq
	    && work_pending(work)
	    && !list_empty(&work->entry)) {
		work_func_t f = work->func;
O
Oleg Nesterov 已提交
123
		cwq->current_work = work;
124 125 126
		list_del_init(&work->entry);
		spin_unlock_irqrestore(&cwq->lock, flags);

127
		if (!test_bit(WORK_STRUCT_NOAUTOREL, work_data_bits(work)))
128 129 130 131
			work_release(work);
		f(work);

		spin_lock_irqsave(&cwq->lock, flags);
O
Oleg Nesterov 已提交
132
		cwq->current_work = NULL;
133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
		ret = 1;
	}
	spin_unlock_irqrestore(&cwq->lock, flags);
	return ret;
}

/**
 * run_scheduled_work - run scheduled work synchronously
 * @work: work to run
 *
 * This checks if the work was pending, and runs it
 * synchronously if so. It returns a boolean to indicate
 * whether it had any scheduled work to run or not.
 *
 * NOTE! This _only_ works for normal work_structs. You
 * CANNOT use this for delayed work, because the wq data
 * for delayed work will not point properly to the per-
 * CPU workqueue struct, but will change!
 */
int fastcall run_scheduled_work(struct work_struct *work)
{
	for (;;) {
		struct cpu_workqueue_struct *cwq;

		if (!work_pending(work))
			return 0;
		if (list_empty(&work->entry))
			return 0;
		/* NOTE! This depends intimately on __queue_work! */
		cwq = get_wq_data(work);
		if (!cwq)
			return 0;
		if (__run_work(cwq, work))
			return 1;
	}
}
EXPORT_SYMBOL(run_scheduled_work);

O
Oleg Nesterov 已提交
171 172 173 174 175 176 177 178 179 180 181
static void insert_work(struct cpu_workqueue_struct *cwq,
				struct work_struct *work, int tail)
{
	set_wq_data(work, cwq);
	if (tail)
		list_add_tail(&work->entry, &cwq->worklist);
	else
		list_add(&work->entry, &cwq->worklist);
	wake_up(&cwq->more_work);
}

L
Linus Torvalds 已提交
182 183 184 185 186 187 188
/* Preempt must be disabled. */
static void __queue_work(struct cpu_workqueue_struct *cwq,
			 struct work_struct *work)
{
	unsigned long flags;

	spin_lock_irqsave(&cwq->lock, flags);
O
Oleg Nesterov 已提交
189
	insert_work(cwq, work, 1);
L
Linus Torvalds 已提交
190 191 192
	spin_unlock_irqrestore(&cwq->lock, flags);
}

193 194 195 196 197
/**
 * queue_work - queue work on a workqueue
 * @wq: workqueue to use
 * @work: work to queue
 *
A
Alan Stern 已提交
198
 * Returns 0 if @work was already on a queue, non-zero otherwise.
L
Linus Torvalds 已提交
199 200 201 202 203 204 205 206
 *
 * We queue the work to the CPU it was submitted, but there is no
 * guarantee that it will be processed by that CPU.
 */
int fastcall queue_work(struct workqueue_struct *wq, struct work_struct *work)
{
	int ret = 0, cpu = get_cpu();

207
	if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
L
Linus Torvalds 已提交
208
		if (unlikely(is_single_threaded(wq)))
209
			cpu = singlethread_cpu;
L
Linus Torvalds 已提交
210
		BUG_ON(!list_empty(&work->entry));
211
		__queue_work(per_cpu_ptr(wq->cpu_wq, cpu), work);
L
Linus Torvalds 已提交
212 213 214 215 216
		ret = 1;
	}
	put_cpu();
	return ret;
}
217
EXPORT_SYMBOL_GPL(queue_work);
L
Linus Torvalds 已提交
218

219
void delayed_work_timer_fn(unsigned long __data)
L
Linus Torvalds 已提交
220
{
221
	struct delayed_work *dwork = (struct delayed_work *)__data;
222
	struct workqueue_struct *wq = get_wq_data(&dwork->work);
L
Linus Torvalds 已提交
223 224 225
	int cpu = smp_processor_id();

	if (unlikely(is_single_threaded(wq)))
226
		cpu = singlethread_cpu;
L
Linus Torvalds 已提交
227

228
	__queue_work(per_cpu_ptr(wq->cpu_wq, cpu), &dwork->work);
L
Linus Torvalds 已提交
229 230
}

231 232 233
/**
 * queue_delayed_work - queue work on a workqueue after delay
 * @wq: workqueue to use
234
 * @dwork: delayable work to queue
235 236
 * @delay: number of jiffies to wait before queueing
 *
A
Alan Stern 已提交
237
 * Returns 0 if @work was already on a queue, non-zero otherwise.
238
 */
L
Linus Torvalds 已提交
239
int fastcall queue_delayed_work(struct workqueue_struct *wq,
240
			struct delayed_work *dwork, unsigned long delay)
L
Linus Torvalds 已提交
241 242
{
	int ret = 0;
243 244 245
	struct timer_list *timer = &dwork->timer;
	struct work_struct *work = &dwork->work;

246
	timer_stats_timer_set_start_info(timer);
247 248
	if (delay == 0)
		return queue_work(wq, work);
L
Linus Torvalds 已提交
249

250
	if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
L
Linus Torvalds 已提交
251 252 253 254
		BUG_ON(timer_pending(timer));
		BUG_ON(!list_empty(&work->entry));

		/* This stores wq for the moment, for the timer_fn */
255
		set_wq_data(work, wq);
L
Linus Torvalds 已提交
256
		timer->expires = jiffies + delay;
257
		timer->data = (unsigned long)dwork;
L
Linus Torvalds 已提交
258 259 260 261 262 263
		timer->function = delayed_work_timer_fn;
		add_timer(timer);
		ret = 1;
	}
	return ret;
}
264
EXPORT_SYMBOL_GPL(queue_delayed_work);
L
Linus Torvalds 已提交
265

266 267 268 269
/**
 * queue_delayed_work_on - queue work on specific CPU after delay
 * @cpu: CPU number to execute work on
 * @wq: workqueue to use
270
 * @dwork: work to queue
271 272
 * @delay: number of jiffies to wait before queueing
 *
A
Alan Stern 已提交
273
 * Returns 0 if @work was already on a queue, non-zero otherwise.
274
 */
275
int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
276
			struct delayed_work *dwork, unsigned long delay)
277 278
{
	int ret = 0;
279 280
	struct timer_list *timer = &dwork->timer;
	struct work_struct *work = &dwork->work;
281

282
	if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
283 284 285 286
		BUG_ON(timer_pending(timer));
		BUG_ON(!list_empty(&work->entry));

		/* This stores wq for the moment, for the timer_fn */
287
		set_wq_data(work, wq);
288
		timer->expires = jiffies + delay;
289
		timer->data = (unsigned long)dwork;
290 291 292 293 294 295
		timer->function = delayed_work_timer_fn;
		add_timer_on(timer, cpu);
		ret = 1;
	}
	return ret;
}
296
EXPORT_SYMBOL_GPL(queue_delayed_work_on);
L
Linus Torvalds 已提交
297

298
static void run_workqueue(struct cpu_workqueue_struct *cwq)
L
Linus Torvalds 已提交
299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316
{
	unsigned long flags;

	/*
	 * Keep taking off work from the queue until
	 * done.
	 */
	spin_lock_irqsave(&cwq->lock, flags);
	cwq->run_depth++;
	if (cwq->run_depth > 3) {
		/* morton gets to eat his hat */
		printk("%s: recursion depth exceeded: %d\n",
			__FUNCTION__, cwq->run_depth);
		dump_stack();
	}
	while (!list_empty(&cwq->worklist)) {
		struct work_struct *work = list_entry(cwq->worklist.next,
						struct work_struct, entry);
317
		work_func_t f = work->func;
L
Linus Torvalds 已提交
318

O
Oleg Nesterov 已提交
319
		cwq->current_work = work;
L
Linus Torvalds 已提交
320 321 322
		list_del_init(cwq->worklist.next);
		spin_unlock_irqrestore(&cwq->lock, flags);

323
		BUG_ON(get_wq_data(work) != cwq);
324
		if (!test_bit(WORK_STRUCT_NOAUTOREL, work_data_bits(work)))
325 326
			work_release(work);
		f(work);
L
Linus Torvalds 已提交
327

328 329 330 331 332 333 334 335 336 337 338
		if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
			printk(KERN_ERR "BUG: workqueue leaked lock or atomic: "
					"%s/0x%08x/%d\n",
					current->comm, preempt_count(),
				       	current->pid);
			printk(KERN_ERR "    last function: ");
			print_symbol("%s\n", (unsigned long)f);
			debug_show_held_locks(current);
			dump_stack();
		}

L
Linus Torvalds 已提交
339
		spin_lock_irqsave(&cwq->lock, flags);
O
Oleg Nesterov 已提交
340
		cwq->current_work = NULL;
L
Linus Torvalds 已提交
341 342 343 344 345 346 347 348 349 350 351 352
	}
	cwq->run_depth--;
	spin_unlock_irqrestore(&cwq->lock, flags);
}

static int worker_thread(void *__cwq)
{
	struct cpu_workqueue_struct *cwq = __cwq;
	DECLARE_WAITQUEUE(wait, current);
	struct k_sigaction sa;
	sigset_t blocked;

353
	if (!cwq->wq->freezeable)
354
		current->flags |= PF_NOFREEZE;
L
Linus Torvalds 已提交
355 356 357 358 359 360 361 362

	set_user_nice(current, -5);

	/* Block and flush all signals */
	sigfillset(&blocked);
	sigprocmask(SIG_BLOCK, &blocked, NULL);
	flush_signals(current);

363 364 365 366 367 368
	/*
	 * We inherited MPOL_INTERLEAVE from the booting kernel.
	 * Set MPOL_DEFAULT to insure node local allocations.
	 */
	numa_default_policy();

L
Linus Torvalds 已提交
369 370 371 372 373 374 375 376
	/* SIG_IGN makes children autoreap: see do_notify_parent(). */
	sa.sa.sa_handler = SIG_IGN;
	sa.sa.sa_flags = 0;
	siginitset(&sa.sa.sa_mask, sigmask(SIGCHLD));
	do_sigaction(SIGCHLD, &sa, (struct k_sigaction *)0);

	set_current_state(TASK_INTERRUPTIBLE);
	while (!kthread_should_stop()) {
377
		if (cwq->wq->freezeable)
378 379
			try_to_freeze();

L
Linus Torvalds 已提交
380 381 382 383 384 385 386 387 388 389 390 391 392 393 394
		add_wait_queue(&cwq->more_work, &wait);
		if (list_empty(&cwq->worklist))
			schedule();
		else
			__set_current_state(TASK_RUNNING);
		remove_wait_queue(&cwq->more_work, &wait);

		if (!list_empty(&cwq->worklist))
			run_workqueue(cwq);
		set_current_state(TASK_INTERRUPTIBLE);
	}
	__set_current_state(TASK_RUNNING);
	return 0;
}

O
Oleg Nesterov 已提交
395 396 397 398 399 400 401 402 403 404 405
struct wq_barrier {
	struct work_struct	work;
	struct completion	done;
};

static void wq_barrier_func(struct work_struct *work)
{
	struct wq_barrier *barr = container_of(work, struct wq_barrier, work);
	complete(&barr->done);
}

406 407
static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
					struct wq_barrier *barr, int tail)
O
Oleg Nesterov 已提交
408 409 410 411 412
{
	INIT_WORK(&barr->work, wq_barrier_func);
	__set_bit(WORK_STRUCT_PENDING, work_data_bits(&barr->work));

	init_completion(&barr->done);
413 414

	insert_work(cwq, &barr->work, tail);
O
Oleg Nesterov 已提交
415 416
}

L
Linus Torvalds 已提交
417 418 419 420 421 422 423
static void flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
{
	if (cwq->thread == current) {
		/*
		 * Probably keventd trying to flush its own queue. So simply run
		 * it by hand rather than deadlocking.
		 */
424 425 426 427 428
		preempt_enable();
		/*
		 * We can still touch *cwq here because we are keventd, and
		 * hot-unplug will be waiting us to exit.
		 */
L
Linus Torvalds 已提交
429
		run_workqueue(cwq);
430
		preempt_disable();
L
Linus Torvalds 已提交
431
	} else {
O
Oleg Nesterov 已提交
432
		struct wq_barrier barr;
433
		int active = 0;
L
Linus Torvalds 已提交
434

435 436 437 438 439 440
		spin_lock_irq(&cwq->lock);
		if (!list_empty(&cwq->worklist) || cwq->current_work != NULL) {
			insert_wq_barrier(cwq, &barr, 1);
			active = 1;
		}
		spin_unlock_irq(&cwq->lock);
L
Linus Torvalds 已提交
441

442 443 444 445 446
		if (active) {
			preempt_enable();
			wait_for_completion(&barr.done);
			preempt_disable();
		}
L
Linus Torvalds 已提交
447 448 449
	}
}

450
/**
L
Linus Torvalds 已提交
451
 * flush_workqueue - ensure that any scheduled work has run to completion.
452
 * @wq: workqueue to flush
L
Linus Torvalds 已提交
453 454 455 456
 *
 * Forces execution of the workqueue and blocks until its completion.
 * This is typically used in driver shutdown handlers.
 *
O
Oleg Nesterov 已提交
457 458
 * We sleep until all works which were queued on entry have been handled,
 * but we are not livelocked by new incoming ones.
L
Linus Torvalds 已提交
459 460 461 462 463 464
 *
 * This function used to run the workqueues itself.  Now we just wait for the
 * helper threads to do it.
 */
void fastcall flush_workqueue(struct workqueue_struct *wq)
{
465
	preempt_disable();		/* CPU hotplug */
L
Linus Torvalds 已提交
466
	if (is_single_threaded(wq)) {
467
		/* Always use first cpu's area. */
468
		flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, singlethread_cpu));
L
Linus Torvalds 已提交
469 470 471 472
	} else {
		int cpu;

		for_each_online_cpu(cpu)
473
			flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, cpu));
L
Linus Torvalds 已提交
474
	}
475
	preempt_enable();
L
Linus Torvalds 已提交
476
}
477
EXPORT_SYMBOL_GPL(flush_workqueue);
L
Linus Torvalds 已提交
478

O
Oleg Nesterov 已提交
479 480 481 482 483 484 485 486
static void wait_on_work(struct cpu_workqueue_struct *cwq,
				struct work_struct *work)
{
	struct wq_barrier barr;
	int running = 0;

	spin_lock_irq(&cwq->lock);
	if (unlikely(cwq->current_work == work)) {
487
		insert_wq_barrier(cwq, &barr, 0);
O
Oleg Nesterov 已提交
488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546
		running = 1;
	}
	spin_unlock_irq(&cwq->lock);

	if (unlikely(running)) {
		mutex_unlock(&workqueue_mutex);
		wait_for_completion(&barr.done);
		mutex_lock(&workqueue_mutex);
	}
}

/**
 * flush_work - block until a work_struct's callback has terminated
 * @wq: the workqueue on which the work is queued
 * @work: the work which is to be flushed
 *
 * flush_work() will attempt to cancel the work if it is queued.  If the work's
 * callback appears to be running, flush_work() will block until it has
 * completed.
 *
 * flush_work() is designed to be used when the caller is tearing down data
 * structures which the callback function operates upon.  It is expected that,
 * prior to calling flush_work(), the caller has arranged for the work to not
 * be requeued.
 */
void flush_work(struct workqueue_struct *wq, struct work_struct *work)
{
	struct cpu_workqueue_struct *cwq;

	mutex_lock(&workqueue_mutex);
	cwq = get_wq_data(work);
	/* Was it ever queued ? */
	if (!cwq)
		goto out;

	/*
	 * This work can't be re-queued, and the lock above protects us
	 * from take_over_work(), no need to re-check that get_wq_data()
	 * is still the same when we take cwq->lock.
	 */
	spin_lock_irq(&cwq->lock);
	list_del_init(&work->entry);
	work_release(work);
	spin_unlock_irq(&cwq->lock);

	if (is_single_threaded(wq)) {
		/* Always use first cpu's area. */
		wait_on_work(per_cpu_ptr(wq->cpu_wq, singlethread_cpu), work);
	} else {
		int cpu;

		for_each_online_cpu(cpu)
			wait_on_work(per_cpu_ptr(wq->cpu_wq, cpu), work);
	}
out:
	mutex_unlock(&workqueue_mutex);
}
EXPORT_SYMBOL_GPL(flush_work);

L
Linus Torvalds 已提交
547
static struct task_struct *create_workqueue_thread(struct workqueue_struct *wq,
548
							int cpu)
L
Linus Torvalds 已提交
549
{
550
	struct cpu_workqueue_struct *cwq = per_cpu_ptr(wq->cpu_wq, cpu);
L
Linus Torvalds 已提交
551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569
	struct task_struct *p;

	spin_lock_init(&cwq->lock);
	cwq->wq = wq;
	cwq->thread = NULL;
	INIT_LIST_HEAD(&cwq->worklist);
	init_waitqueue_head(&cwq->more_work);

	if (is_single_threaded(wq))
		p = kthread_create(worker_thread, cwq, "%s", wq->name);
	else
		p = kthread_create(worker_thread, cwq, "%s/%d", wq->name, cpu);
	if (IS_ERR(p))
		return NULL;
	cwq->thread = p;
	return p;
}

struct workqueue_struct *__create_workqueue(const char *name,
570
					    int singlethread, int freezeable)
L
Linus Torvalds 已提交
571 572 573 574 575
{
	int cpu, destroy = 0;
	struct workqueue_struct *wq;
	struct task_struct *p;

576
	wq = kzalloc(sizeof(*wq), GFP_KERNEL);
L
Linus Torvalds 已提交
577 578 579
	if (!wq)
		return NULL;

580
	wq->cpu_wq = alloc_percpu(struct cpu_workqueue_struct);
581 582 583 584 585
	if (!wq->cpu_wq) {
		kfree(wq);
		return NULL;
	}

L
Linus Torvalds 已提交
586
	wq->name = name;
587 588
	wq->freezeable = freezeable;

589
	mutex_lock(&workqueue_mutex);
L
Linus Torvalds 已提交
590 591
	if (singlethread) {
		INIT_LIST_HEAD(&wq->list);
592
		p = create_workqueue_thread(wq, singlethread_cpu);
L
Linus Torvalds 已提交
593 594 595 596 597 598 599
		if (!p)
			destroy = 1;
		else
			wake_up_process(p);
	} else {
		list_add(&wq->list, &workqueues);
		for_each_online_cpu(cpu) {
600
			p = create_workqueue_thread(wq, cpu);
L
Linus Torvalds 已提交
601 602 603 604 605 606 607
			if (p) {
				kthread_bind(p, cpu);
				wake_up_process(p);
			} else
				destroy = 1;
		}
	}
608
	mutex_unlock(&workqueue_mutex);
L
Linus Torvalds 已提交
609 610 611 612 613 614 615 616 617 618

	/*
	 * Was there any error during startup? If yes then clean up:
	 */
	if (destroy) {
		destroy_workqueue(wq);
		wq = NULL;
	}
	return wq;
}
619
EXPORT_SYMBOL_GPL(__create_workqueue);
L
Linus Torvalds 已提交
620 621 622 623 624 625 626

static void cleanup_workqueue_thread(struct workqueue_struct *wq, int cpu)
{
	struct cpu_workqueue_struct *cwq;
	unsigned long flags;
	struct task_struct *p;

627
	cwq = per_cpu_ptr(wq->cpu_wq, cpu);
L
Linus Torvalds 已提交
628 629 630 631 632 633 634 635
	spin_lock_irqsave(&cwq->lock, flags);
	p = cwq->thread;
	cwq->thread = NULL;
	spin_unlock_irqrestore(&cwq->lock, flags);
	if (p)
		kthread_stop(p);
}

636 637 638 639 640 641
/**
 * destroy_workqueue - safely terminate a workqueue
 * @wq: target workqueue
 *
 * Safely destroy a workqueue. All work currently pending will be done first.
 */
L
Linus Torvalds 已提交
642 643 644 645 646 647 648
void destroy_workqueue(struct workqueue_struct *wq)
{
	int cpu;

	flush_workqueue(wq);

	/* We don't need the distraction of CPUs appearing and vanishing. */
649
	mutex_lock(&workqueue_mutex);
L
Linus Torvalds 已提交
650
	if (is_single_threaded(wq))
651
		cleanup_workqueue_thread(wq, singlethread_cpu);
L
Linus Torvalds 已提交
652 653 654 655 656
	else {
		for_each_online_cpu(cpu)
			cleanup_workqueue_thread(wq, cpu);
		list_del(&wq->list);
	}
657
	mutex_unlock(&workqueue_mutex);
658
	free_percpu(wq->cpu_wq);
L
Linus Torvalds 已提交
659 660
	kfree(wq);
}
661
EXPORT_SYMBOL_GPL(destroy_workqueue);
L
Linus Torvalds 已提交
662 663 664

static struct workqueue_struct *keventd_wq;

665 666 667 668 669 670
/**
 * schedule_work - put work task in global workqueue
 * @work: job to be done
 *
 * This puts a job in the kernel-global workqueue.
 */
L
Linus Torvalds 已提交
671 672 673 674
int fastcall schedule_work(struct work_struct *work)
{
	return queue_work(keventd_wq, work);
}
675
EXPORT_SYMBOL(schedule_work);
L
Linus Torvalds 已提交
676

677 678
/**
 * schedule_delayed_work - put work task in global workqueue after delay
679 680
 * @dwork: job to be done
 * @delay: number of jiffies to wait or 0 for immediate execution
681 682 683 684
 *
 * After waiting for a given time this puts a job in the kernel-global
 * workqueue.
 */
685 686
int fastcall schedule_delayed_work(struct delayed_work *dwork,
					unsigned long delay)
L
Linus Torvalds 已提交
687
{
688
	timer_stats_timer_set_start_info(&dwork->timer);
689
	return queue_delayed_work(keventd_wq, dwork, delay);
L
Linus Torvalds 已提交
690
}
691
EXPORT_SYMBOL(schedule_delayed_work);
L
Linus Torvalds 已提交
692

693 694 695
/**
 * schedule_delayed_work_on - queue work in global workqueue on CPU after delay
 * @cpu: cpu to use
696
 * @dwork: job to be done
697 698 699 700 701
 * @delay: number of jiffies to wait
 *
 * After waiting for a given time this puts a job in the kernel-global
 * workqueue on the specified CPU.
 */
L
Linus Torvalds 已提交
702
int schedule_delayed_work_on(int cpu,
703
			struct delayed_work *dwork, unsigned long delay)
L
Linus Torvalds 已提交
704
{
705
	return queue_delayed_work_on(cpu, keventd_wq, dwork, delay);
L
Linus Torvalds 已提交
706
}
707
EXPORT_SYMBOL(schedule_delayed_work_on);
L
Linus Torvalds 已提交
708

709 710 711 712 713 714 715 716 717 718 719
/**
 * schedule_on_each_cpu - call a function on each online CPU from keventd
 * @func: the function to call
 *
 * Returns zero on success.
 * Returns -ve errno on failure.
 *
 * Appears to be racy against CPU hotplug.
 *
 * schedule_on_each_cpu() is very slow.
 */
720
int schedule_on_each_cpu(work_func_t func)
721 722
{
	int cpu;
723
	struct work_struct *works;
724

725 726
	works = alloc_percpu(struct work_struct);
	if (!works)
727
		return -ENOMEM;
728

729
	preempt_disable();		/* CPU hotplug */
730
	for_each_online_cpu(cpu) {
731 732 733 734 735
		struct work_struct *work = per_cpu_ptr(works, cpu);

		INIT_WORK(work, func);
		set_bit(WORK_STRUCT_PENDING, work_data_bits(work));
		__queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu), work);
736
	}
737
	preempt_enable();
738
	flush_workqueue(keventd_wq);
739
	free_percpu(works);
740 741 742
	return 0;
}

L
Linus Torvalds 已提交
743 744 745 746
void flush_scheduled_work(void)
{
	flush_workqueue(keventd_wq);
}
747
EXPORT_SYMBOL(flush_scheduled_work);
L
Linus Torvalds 已提交
748

O
Oleg Nesterov 已提交
749 750 751 752 753 754
void flush_work_keventd(struct work_struct *work)
{
	flush_work(keventd_wq, work);
}
EXPORT_SYMBOL(flush_work_keventd);

L
Linus Torvalds 已提交
755
/**
756
 * cancel_rearming_delayed_workqueue - reliably kill off a delayed work whose handler rearms the delayed work.
L
Linus Torvalds 已提交
757
 * @wq:   the controlling workqueue structure
758
 * @dwork: the delayed work struct
L
Linus Torvalds 已提交
759
 */
760
void cancel_rearming_delayed_workqueue(struct workqueue_struct *wq,
761
				       struct delayed_work *dwork)
L
Linus Torvalds 已提交
762
{
763
	while (!cancel_delayed_work(dwork))
L
Linus Torvalds 已提交
764 765
		flush_workqueue(wq);
}
766
EXPORT_SYMBOL(cancel_rearming_delayed_workqueue);
L
Linus Torvalds 已提交
767 768

/**
769
 * cancel_rearming_delayed_work - reliably kill off a delayed keventd work whose handler rearms the delayed work.
770
 * @dwork: the delayed work struct
L
Linus Torvalds 已提交
771
 */
772
void cancel_rearming_delayed_work(struct delayed_work *dwork)
L
Linus Torvalds 已提交
773
{
774
	cancel_rearming_delayed_workqueue(keventd_wq, dwork);
L
Linus Torvalds 已提交
775 776 777
}
EXPORT_SYMBOL(cancel_rearming_delayed_work);

778 779 780 781 782 783 784 785 786 787 788 789
/**
 * execute_in_process_context - reliably execute the routine with user context
 * @fn:		the function to execute
 * @ew:		guaranteed storage for the execute work structure (must
 *		be available when the work executes)
 *
 * Executes the function immediately if process context is available,
 * otherwise schedules the function for delayed execution.
 *
 * Returns:	0 - function was executed
 *		1 - function was scheduled for execution
 */
790
int execute_in_process_context(work_func_t fn, struct execute_work *ew)
791 792
{
	if (!in_interrupt()) {
793
		fn(&ew->work);
794 795 796
		return 0;
	}

797
	INIT_WORK(&ew->work, fn);
798 799 800 801 802 803
	schedule_work(&ew->work);

	return 1;
}
EXPORT_SYMBOL_GPL(execute_in_process_context);

L
Linus Torvalds 已提交
804 805 806 807 808 809 810 811 812 813 814 815 816
int keventd_up(void)
{
	return keventd_wq != NULL;
}

int current_is_keventd(void)
{
	struct cpu_workqueue_struct *cwq;
	int cpu = smp_processor_id();	/* preempt-safe: keventd is per-cpu */
	int ret = 0;

	BUG_ON(!keventd_wq);

817
	cwq = per_cpu_ptr(keventd_wq->cpu_wq, cpu);
L
Linus Torvalds 已提交
818 819 820 821 822 823 824 825 826 827
	if (current == cwq->thread)
		ret = 1;

	return ret;

}

/* Take the work from this (downed) CPU. */
static void take_over_work(struct workqueue_struct *wq, unsigned int cpu)
{
828
	struct cpu_workqueue_struct *cwq = per_cpu_ptr(wq->cpu_wq, cpu);
829
	struct list_head list;
L
Linus Torvalds 已提交
830 831 832
	struct work_struct *work;

	spin_lock_irq(&cwq->lock);
833
	list_replace_init(&cwq->worklist, &list);
L
Linus Torvalds 已提交
834 835 836 837 838

	while (!list_empty(&list)) {
		printk("Taking work for %s\n", wq->name);
		work = list_entry(list.next,struct work_struct,entry);
		list_del(&work->entry);
839
		__queue_work(per_cpu_ptr(wq->cpu_wq, smp_processor_id()), work);
L
Linus Torvalds 已提交
840 841 842 843 844
	}
	spin_unlock_irq(&cwq->lock);
}

/* We're holding the cpucontrol mutex here */
845
static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
L
Linus Torvalds 已提交
846 847 848 849 850 851 852 853
				  unsigned long action,
				  void *hcpu)
{
	unsigned int hotcpu = (unsigned long)hcpu;
	struct workqueue_struct *wq;

	switch (action) {
	case CPU_UP_PREPARE:
854
		mutex_lock(&workqueue_mutex);
L
Linus Torvalds 已提交
855 856
		/* Create a new workqueue thread for it. */
		list_for_each_entry(wq, &workqueues, list) {
857
			if (!create_workqueue_thread(wq, hotcpu)) {
L
Linus Torvalds 已提交
858 859 860 861 862 863 864 865 866
				printk("workqueue for %i failed\n", hotcpu);
				return NOTIFY_BAD;
			}
		}
		break;

	case CPU_ONLINE:
		/* Kick off worker threads. */
		list_for_each_entry(wq, &workqueues, list) {
867 868 869 870 871
			struct cpu_workqueue_struct *cwq;

			cwq = per_cpu_ptr(wq->cpu_wq, hotcpu);
			kthread_bind(cwq->thread, hotcpu);
			wake_up_process(cwq->thread);
L
Linus Torvalds 已提交
872
		}
873
		mutex_unlock(&workqueue_mutex);
L
Linus Torvalds 已提交
874 875 876 877
		break;

	case CPU_UP_CANCELED:
		list_for_each_entry(wq, &workqueues, list) {
878 879
			if (!per_cpu_ptr(wq->cpu_wq, hotcpu)->thread)
				continue;
L
Linus Torvalds 已提交
880
			/* Unbind so it can run. */
881
			kthread_bind(per_cpu_ptr(wq->cpu_wq, hotcpu)->thread,
882
				     any_online_cpu(cpu_online_map));
L
Linus Torvalds 已提交
883 884
			cleanup_workqueue_thread(wq, hotcpu);
		}
885 886 887 888 889 890 891 892 893
		mutex_unlock(&workqueue_mutex);
		break;

	case CPU_DOWN_PREPARE:
		mutex_lock(&workqueue_mutex);
		break;

	case CPU_DOWN_FAILED:
		mutex_unlock(&workqueue_mutex);
L
Linus Torvalds 已提交
894 895 896 897 898 899 900
		break;

	case CPU_DEAD:
		list_for_each_entry(wq, &workqueues, list)
			cleanup_workqueue_thread(wq, hotcpu);
		list_for_each_entry(wq, &workqueues, list)
			take_over_work(wq, hotcpu);
901
		mutex_unlock(&workqueue_mutex);
L
Linus Torvalds 已提交
902 903 904 905 906 907 908 909
		break;
	}

	return NOTIFY_OK;
}

void init_workqueues(void)
{
910
	singlethread_cpu = first_cpu(cpu_possible_map);
L
Linus Torvalds 已提交
911 912 913 914 915
	hotcpu_notifier(workqueue_cpu_callback, 0);
	keventd_wq = create_workqueue("events");
	BUG_ON(!keventd_wq);
}