async-thread.c 9.1 KB
Newer Older
1 2
/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
3
 * Copyright (C) 2014 Fujitsu.  All rights reserved.
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License v2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 */

#include <linux/kthread.h>
21
#include <linux/slab.h>
22 23
#include <linux/list.h>
#include <linux/spinlock.h>
24
#include <linux/freezer.h>
25
#include "async-thread.h"
26
#include "ctree.h"
27

Q
Qu Wenruo 已提交
28 29 30
#define WORK_DONE_BIT 0
#define WORK_ORDER_DONE_BIT 1
#define WORK_HIGH_PRIO_BIT 2
C
Chris Mason 已提交
31

32 33 34
#define NO_THRESHOLD (-1)
#define DFT_THRESHOLD (32)

35
struct __btrfs_workqueue {
36 37 38 39 40 41
	struct workqueue_struct *normal_wq;
	/* List head pointing to ordered work list */
	struct list_head ordered_list;

	/* Spinlock for ordered_list */
	spinlock_t list_lock;
42 43 44 45 46 47 48 49

	/* Thresholding related variants */
	atomic_t pending;
	int max_active;
	int current_max;
	int thresh;
	unsigned int count;
	spinlock_t thres_lock;
50 51
};

52 53 54
struct btrfs_workqueue {
	struct __btrfs_workqueue *normal;
	struct __btrfs_workqueue *high;
55 56
};

57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
static void normal_work_helper(struct btrfs_work *work);

#define BTRFS_WORK_HELPER(name)					\
void btrfs_##name(struct work_struct *arg)				\
{									\
	struct btrfs_work *work = container_of(arg, struct btrfs_work,	\
					       normal_work);		\
	normal_work_helper(work);					\
}

BTRFS_WORK_HELPER(worker_helper);
BTRFS_WORK_HELPER(delalloc_helper);
BTRFS_WORK_HELPER(flush_delalloc_helper);
BTRFS_WORK_HELPER(cache_helper);
BTRFS_WORK_HELPER(submit_helper);
BTRFS_WORK_HELPER(fixup_helper);
BTRFS_WORK_HELPER(endio_helper);
BTRFS_WORK_HELPER(endio_meta_helper);
BTRFS_WORK_HELPER(endio_meta_write_helper);
BTRFS_WORK_HELPER(endio_raid56_helper);
77
BTRFS_WORK_HELPER(endio_repair_helper);
78 79 80 81 82 83 84 85 86 87 88 89
BTRFS_WORK_HELPER(rmw_helper);
BTRFS_WORK_HELPER(endio_write_helper);
BTRFS_WORK_HELPER(freespace_write_helper);
BTRFS_WORK_HELPER(delayed_meta_helper);
BTRFS_WORK_HELPER(readahead_helper);
BTRFS_WORK_HELPER(qgroup_rescan_helper);
BTRFS_WORK_HELPER(extent_refs_helper);
BTRFS_WORK_HELPER(scrub_helper);
BTRFS_WORK_HELPER(scrubwrc_helper);
BTRFS_WORK_HELPER(scrubnc_helper);

static struct __btrfs_workqueue *
90
__btrfs_alloc_workqueue(const char *name, unsigned int flags, int max_active,
91
			 int thresh)
92
{
93
	struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS);
94

95
	if (!ret)
96 97
		return NULL;

98 99 100 101 102 103 104 105 106 107 108 109 110
	ret->max_active = max_active;
	atomic_set(&ret->pending, 0);
	if (thresh == 0)
		thresh = DFT_THRESHOLD;
	/* For low threshold, disabling threshold is a better choice */
	if (thresh < DFT_THRESHOLD) {
		ret->current_max = max_active;
		ret->thresh = NO_THRESHOLD;
	} else {
		ret->current_max = 1;
		ret->thresh = thresh;
	}

111 112
	if (flags & WQ_HIGHPRI)
		ret->normal_wq = alloc_workqueue("%s-%s-high", flags,
113 114
						 ret->max_active,
						 "btrfs", name);
115 116
	else
		ret->normal_wq = alloc_workqueue("%s-%s", flags,
117 118
						 ret->max_active, "btrfs",
						 name);
119
	if (!ret->normal_wq) {
120 121 122 123 124 125
		kfree(ret);
		return NULL;
	}

	INIT_LIST_HEAD(&ret->ordered_list);
	spin_lock_init(&ret->list_lock);
126
	spin_lock_init(&ret->thres_lock);
127
	trace_btrfs_workqueue_alloc(ret, name, flags & WQ_HIGHPRI);
128 129 130 131
	return ret;
}

static inline void
132
__btrfs_destroy_workqueue(struct __btrfs_workqueue *wq);
133

134
struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name,
135
					      unsigned int flags,
136 137
					      int max_active,
					      int thresh)
138
{
139
	struct btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS);
140

141
	if (!ret)
142 143
		return NULL;

144
	ret->normal = __btrfs_alloc_workqueue(name, flags & ~WQ_HIGHPRI,
145
					      max_active, thresh);
146
	if (!ret->normal) {
147 148 149 150
		kfree(ret);
		return NULL;
	}

151
	if (flags & WQ_HIGHPRI) {
152 153
		ret->high = __btrfs_alloc_workqueue(name, flags, max_active,
						    thresh);
154
		if (!ret->high) {
155 156 157 158 159
			__btrfs_destroy_workqueue(ret->normal);
			kfree(ret);
			return NULL;
		}
	}
160 161 162
	return ret;
}

163 164 165 166 167
/*
 * Hook for threshold which will be called in btrfs_queue_work.
 * This hook WILL be called in IRQ handler context,
 * so workqueue_set_max_active MUST NOT be called in this hook
 */
168
static inline void thresh_queue_hook(struct __btrfs_workqueue *wq)
169 170 171 172 173 174 175 176 177 178 179
{
	if (wq->thresh == NO_THRESHOLD)
		return;
	atomic_inc(&wq->pending);
}

/*
 * Hook for threshold which will be called before executing the work,
 * This hook is called in kthread content.
 * So workqueue_set_max_active is called here.
 */
180
static inline void thresh_exec_hook(struct __btrfs_workqueue *wq)
181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222
{
	int new_max_active;
	long pending;
	int need_change = 0;

	if (wq->thresh == NO_THRESHOLD)
		return;

	atomic_dec(&wq->pending);
	spin_lock(&wq->thres_lock);
	/*
	 * Use wq->count to limit the calling frequency of
	 * workqueue_set_max_active.
	 */
	wq->count++;
	wq->count %= (wq->thresh / 4);
	if (!wq->count)
		goto  out;
	new_max_active = wq->current_max;

	/*
	 * pending may be changed later, but it's OK since we really
	 * don't need it so accurate to calculate new_max_active.
	 */
	pending = atomic_read(&wq->pending);
	if (pending > wq->thresh)
		new_max_active++;
	if (pending < wq->thresh / 2)
		new_max_active--;
	new_max_active = clamp_val(new_max_active, 1, wq->max_active);
	if (new_max_active != wq->current_max)  {
		need_change = 1;
		wq->current_max = new_max_active;
	}
out:
	spin_unlock(&wq->thres_lock);

	if (need_change) {
		workqueue_set_max_active(wq->normal_wq, wq->current_max);
	}
}

223
static void run_ordered_work(struct __btrfs_workqueue *wq)
224 225
{
	struct list_head *list = &wq->ordered_list;
226
	struct btrfs_work *work;
227 228 229 230 231 232 233
	spinlock_t *lock = &wq->list_lock;
	unsigned long flags;

	while (1) {
		spin_lock_irqsave(lock, flags);
		if (list_empty(list))
			break;
234
		work = list_entry(list->next, struct btrfs_work,
235 236 237 238 239 240 241 242 243 244 245 246
				  ordered_list);
		if (!test_bit(WORK_DONE_BIT, &work->flags))
			break;

		/*
		 * we are going to call the ordered done function, but
		 * we leave the work item on the list as a barrier so
		 * that later work items that are done don't have their
		 * functions called before this one returns
		 */
		if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags))
			break;
247
		trace_btrfs_ordered_sched(work);
248 249 250 251 252 253 254 255 256 257 258 259 260
		spin_unlock_irqrestore(lock, flags);
		work->ordered_func(work);

		/* now take the lock again and drop our item from the list */
		spin_lock_irqsave(lock, flags);
		list_del(&work->ordered_list);
		spin_unlock_irqrestore(lock, flags);

		/*
		 * we don't want to call the ordered free functions
		 * with the lock held though
		 */
		work->ordered_free(work);
261
		trace_btrfs_all_work_done(work);
262 263 264 265
	}
	spin_unlock_irqrestore(lock, flags);
}

266
static void normal_work_helper(struct btrfs_work *work)
267
{
268
	struct __btrfs_workqueue *wq;
269 270 271 272 273 274 275 276 277 278 279 280 281 282
	int need_order = 0;

	/*
	 * We should not touch things inside work in the following cases:
	 * 1) after work->func() if it has no ordered_free
	 *    Since the struct is freed in work->func().
	 * 2) after setting WORK_DONE_BIT
	 *    The work may be freed in other threads almost instantly.
	 * So we save the needed things here.
	 */
	if (work->ordered_func)
		need_order = 1;
	wq = work->wq;

283
	trace_btrfs_work_sched(work);
284
	thresh_exec_hook(wq);
285 286 287 288 289
	work->func(work);
	if (need_order) {
		set_bit(WORK_DONE_BIT, &work->flags);
		run_ordered_work(wq);
	}
290 291
	if (!need_order)
		trace_btrfs_all_work_done(work);
292 293
}

294
void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t uniq_func,
295 296 297
		     btrfs_func_t func,
		     btrfs_func_t ordered_func,
		     btrfs_func_t ordered_free)
298 299 300 301
{
	work->func = func;
	work->ordered_func = ordered_func;
	work->ordered_free = ordered_free;
302
	INIT_WORK(&work->normal_work, uniq_func);
303 304 305 306
	INIT_LIST_HEAD(&work->ordered_list);
	work->flags = 0;
}

307 308
static inline void __btrfs_queue_work(struct __btrfs_workqueue *wq,
				      struct btrfs_work *work)
309 310 311 312
{
	unsigned long flags;

	work->wq = wq;
313
	thresh_queue_hook(wq);
314 315 316 317 318 319
	if (work->ordered_func) {
		spin_lock_irqsave(&wq->list_lock, flags);
		list_add_tail(&work->ordered_list, &wq->ordered_list);
		spin_unlock_irqrestore(&wq->list_lock, flags);
	}
	queue_work(wq->normal_wq, &work->normal_work);
320
	trace_btrfs_work_queued(work);
321 322
}

323 324
void btrfs_queue_work(struct btrfs_workqueue *wq,
		      struct btrfs_work *work)
325
{
326
	struct __btrfs_workqueue *dest_wq;
327 328 329 330 331 332 333 334 335

	if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags) && wq->high)
		dest_wq = wq->high;
	else
		dest_wq = wq->normal;
	__btrfs_queue_work(dest_wq, work);
}

static inline void
336
__btrfs_destroy_workqueue(struct __btrfs_workqueue *wq)
337 338
{
	destroy_workqueue(wq->normal_wq);
339
	trace_btrfs_workqueue_destroy(wq);
340 341 342
	kfree(wq);
}

343
void btrfs_destroy_workqueue(struct btrfs_workqueue *wq)
344 345 346 347 348 349
{
	if (!wq)
		return;
	if (wq->high)
		__btrfs_destroy_workqueue(wq->high);
	__btrfs_destroy_workqueue(wq->normal);
350
	kfree(wq);
351 352
}

353
void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int max)
354
{
355 356
	if (!wq)
		return;
357
	wq->normal->max_active = max;
358
	if (wq->high)
359
		wq->high->max_active = max;
360 361
}

362
void btrfs_set_work_high_priority(struct btrfs_work *work)
363 364
{
	set_bit(WORK_HIGH_PRIO_BIT, &work->flags);
365
}