blk-ioc.c 8.6 KB
Newer Older
J
Jens Axboe 已提交
1 2 3 4 5 6 7 8 9
/*
 * Functions related to io context handling
 */
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/bootmem.h>	/* for max_pfn/max_low_pfn */
10
#include <linux/slab.h>
J
Jens Axboe 已提交
11 12 13 14 15 16 17 18

#include "blk.h"

/*
 * For io context allocations
 */
static struct kmem_cache *iocontext_cachep;

19 20 21 22 23 24 25 26 27 28 29 30 31
/**
 * get_io_context - increment reference count to io_context
 * @ioc: io_context to get
 *
 * Increment reference count to @ioc.
 */
void get_io_context(struct io_context *ioc)
{
	BUG_ON(atomic_long_read(&ioc->refcount) <= 0);
	atomic_long_inc(&ioc->refcount);
}
EXPORT_SYMBOL(get_io_context);

32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
/*
 * Releasing ioc may nest into another put_io_context() leading to nested
 * fast path release.  As the ioc's can't be the same, this is okay but
 * makes lockdep whine.  Keep track of nesting and use it as subclass.
 */
#ifdef CONFIG_LOCKDEP
#define ioc_release_depth(q)		((q) ? (q)->ioc_release_depth : 0)
#define ioc_release_depth_inc(q)	(q)->ioc_release_depth++
#define ioc_release_depth_dec(q)	(q)->ioc_release_depth--
#else
#define ioc_release_depth(q)		0
#define ioc_release_depth_inc(q)	do { } while (0)
#define ioc_release_depth_dec(q)	do { } while (0)
#endif

/*
 * Slow path for ioc release in put_io_context().  Performs double-lock
 * dancing to unlink all cic's and then frees ioc.
 */
static void ioc_release_fn(struct work_struct *work)
J
Jens Axboe 已提交
52
{
53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
	struct io_context *ioc = container_of(work, struct io_context,
					      release_work);
	struct request_queue *last_q = NULL;

	spin_lock_irq(&ioc->lock);

	while (!hlist_empty(&ioc->cic_list)) {
		struct cfq_io_context *cic = hlist_entry(ioc->cic_list.first,
							 struct cfq_io_context,
							 cic_list);
		struct request_queue *this_q = cic->q;

		if (this_q != last_q) {
			/*
			 * Need to switch to @this_q.  Once we release
			 * @ioc->lock, it can go away along with @cic.
			 * Hold on to it.
			 */
			__blk_get_queue(this_q);

			/*
			 * blk_put_queue() might sleep thanks to kobject
			 * idiocy.  Always release both locks, put and
			 * restart.
			 */
			if (last_q) {
				spin_unlock(last_q->queue_lock);
				spin_unlock_irq(&ioc->lock);
				blk_put_queue(last_q);
			} else {
				spin_unlock_irq(&ioc->lock);
			}

			last_q = this_q;
			spin_lock_irq(this_q->queue_lock);
			spin_lock(&ioc->lock);
			continue;
		}
		ioc_release_depth_inc(this_q);
		cic->exit(cic);
		cic->release(cic);
		ioc_release_depth_dec(this_q);
	}
96

97 98 99 100 101 102
	if (last_q) {
		spin_unlock(last_q->queue_lock);
		spin_unlock_irq(&ioc->lock);
		blk_put_queue(last_q);
	} else {
		spin_unlock_irq(&ioc->lock);
103
	}
104 105

	kmem_cache_free(iocontext_cachep, ioc);
J
Jens Axboe 已提交
106 107
}

T
Tejun Heo 已提交
108 109 110
/**
 * put_io_context - put a reference of io_context
 * @ioc: io_context to put
111
 * @locked_q: request_queue the caller is holding queue_lock of (hint)
T
Tejun Heo 已提交
112 113
 *
 * Decrement reference count of @ioc and release it if the count reaches
114 115 116
 * zero.  If the caller is holding queue_lock of a queue, it can indicate
 * that with @locked_q.  This is an optimization hint and the caller is
 * allowed to pass in %NULL even when it's holding a queue_lock.
J
Jens Axboe 已提交
117
 */
118
void put_io_context(struct io_context *ioc, struct request_queue *locked_q)
J
Jens Axboe 已提交
119
{
120 121 122
	struct request_queue *last_q = locked_q;
	unsigned long flags;

J
Jens Axboe 已提交
123
	if (ioc == NULL)
T
Tejun Heo 已提交
124
		return;
J
Jens Axboe 已提交
125

T
Tejun Heo 已提交
126
	BUG_ON(atomic_long_read(&ioc->refcount) <= 0);
127 128
	if (locked_q)
		lockdep_assert_held(locked_q->queue_lock);
J
Jens Axboe 已提交
129

T
Tejun Heo 已提交
130 131
	if (!atomic_long_dec_and_test(&ioc->refcount))
		return;
J
Jens Axboe 已提交
132

133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
	/*
	 * Destroy @ioc.  This is a bit messy because cic's are chained
	 * from both ioc and queue, and ioc->lock nests inside queue_lock.
	 * The inner ioc->lock should be held to walk our cic_list and then
	 * for each cic the outer matching queue_lock should be grabbed.
	 * ie. We need to do reverse-order double lock dancing.
	 *
	 * Another twist is that we are often called with one of the
	 * matching queue_locks held as indicated by @locked_q, which
	 * prevents performing double-lock dance for other queues.
	 *
	 * So, we do it in two stages.  The fast path uses the queue_lock
	 * the caller is holding and, if other queues need to be accessed,
	 * uses trylock to avoid introducing locking dependency.  This can
	 * handle most cases, especially if @ioc was performing IO on only
	 * single device.
	 *
	 * If trylock doesn't cut it, we defer to @ioc->release_work which
	 * can do all the double-locking dancing.
	 */
	spin_lock_irqsave_nested(&ioc->lock, flags,
				 ioc_release_depth(locked_q));

	while (!hlist_empty(&ioc->cic_list)) {
		struct cfq_io_context *cic = hlist_entry(ioc->cic_list.first,
							 struct cfq_io_context,
							 cic_list);
		struct request_queue *this_q = cic->q;

		if (this_q != last_q) {
			if (last_q && last_q != locked_q)
				spin_unlock(last_q->queue_lock);
			last_q = NULL;

			if (!spin_trylock(this_q->queue_lock))
				break;
			last_q = this_q;
			continue;
		}
		ioc_release_depth_inc(this_q);
		cic->exit(cic);
		cic->release(cic);
		ioc_release_depth_dec(this_q);
	}
J
Jens Axboe 已提交
177

178 179
	if (last_q && last_q != locked_q)
		spin_unlock(last_q->queue_lock);
J
Jens Axboe 已提交
180

181
	spin_unlock_irqrestore(&ioc->lock, flags);
182

183 184 185 186 187
	/* if no cic's left, we're done; otherwise, kick release_work */
	if (hlist_empty(&ioc->cic_list))
		kmem_cache_free(iocontext_cachep, ioc);
	else
		schedule_work(&ioc->release_work);
J
Jens Axboe 已提交
188
}
189
EXPORT_SYMBOL(put_io_context);
J
Jens Axboe 已提交
190

191
/* Called by the exiting task */
192
void exit_io_context(struct task_struct *task)
J
Jens Axboe 已提交
193 194 195
{
	struct io_context *ioc;

196 197 198
	/* PF_EXITING prevents new io_context from being attached to @task */
	WARN_ON_ONCE(!(current->flags & PF_EXITING));

199 200 201 202
	task_lock(task);
	ioc = task->io_context;
	task->io_context = NULL;
	task_unlock(task);
J
Jens Axboe 已提交
203

204 205
	atomic_dec(&ioc->nr_tasks);
	put_io_context(ioc, NULL);
J
Jens Axboe 已提交
206 207
}

208 209
void create_io_context_slowpath(struct task_struct *task, gfp_t gfp_flags,
				int node)
J
Jens Axboe 已提交
210
{
211
	struct io_context *ioc;
J
Jens Axboe 已提交
212

T
Tejun Heo 已提交
213 214 215
	ioc = kmem_cache_alloc_node(iocontext_cachep, gfp_flags | __GFP_ZERO,
				    node);
	if (unlikely(!ioc))
216
		return;
T
Tejun Heo 已提交
217 218 219 220 221 222 223

	/* initialize */
	atomic_long_set(&ioc->refcount, 1);
	atomic_set(&ioc->nr_tasks, 1);
	spin_lock_init(&ioc->lock);
	INIT_RADIX_TREE(&ioc->radix_root, GFP_ATOMIC | __GFP_HIGH);
	INIT_HLIST_HEAD(&ioc->cic_list);
224
	INIT_WORK(&ioc->release_work, ioc_release_fn);
J
Jens Axboe 已提交
225

226 227
	/* try to install, somebody might already have beaten us to it */
	task_lock(task);
228
	if (!task->io_context && !(task->flags & PF_EXITING))
229
		task->io_context = ioc;
230
	else
231 232
		kmem_cache_free(iocontext_cachep, ioc);
	task_unlock(task);
J
Jens Axboe 已提交
233
}
234
EXPORT_SYMBOL(create_io_context_slowpath);
J
Jens Axboe 已提交
235

236 237 238 239 240 241 242 243 244
/**
 * get_task_io_context - get io_context of a task
 * @task: task of interest
 * @gfp_flags: allocation flags, used if allocation is necessary
 * @node: allocation node, used if allocation is necessary
 *
 * Return io_context of @task.  If it doesn't exist, it is created with
 * @gfp_flags and @node.  The returned io_context has its reference count
 * incremented.
J
Jens Axboe 已提交
245
 *
246
 * This function always goes through task_lock() and it's better to use
247
 * %current->io_context + get_io_context() for %current.
J
Jens Axboe 已提交
248
 */
249 250
struct io_context *get_task_io_context(struct task_struct *task,
				       gfp_t gfp_flags, int node)
J
Jens Axboe 已提交
251
{
252
	struct io_context *ioc;
J
Jens Axboe 已提交
253

254 255
	might_sleep_if(gfp_flags & __GFP_WAIT);

256 257 258 259 260 261 262 263
	do {
		task_lock(task);
		ioc = task->io_context;
		if (likely(ioc)) {
			get_io_context(ioc);
			task_unlock(task);
			return ioc;
		}
264
		task_unlock(task);
265
	} while (create_io_context(task, gfp_flags, node));
266

267
	return NULL;
J
Jens Axboe 已提交
268
}
269
EXPORT_SYMBOL(get_task_io_context);
J
Jens Axboe 已提交
270

271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315
void ioc_set_changed(struct io_context *ioc, int which)
{
	struct cfq_io_context *cic;
	struct hlist_node *n;

	hlist_for_each_entry(cic, n, &ioc->cic_list, cic_list)
		set_bit(which, &cic->changed);
}

/**
 * ioc_ioprio_changed - notify ioprio change
 * @ioc: io_context of interest
 * @ioprio: new ioprio
 *
 * @ioc's ioprio has changed to @ioprio.  Set %CIC_IOPRIO_CHANGED for all
 * cic's.  iosched is responsible for checking the bit and applying it on
 * request issue path.
 */
void ioc_ioprio_changed(struct io_context *ioc, int ioprio)
{
	unsigned long flags;

	spin_lock_irqsave(&ioc->lock, flags);
	ioc->ioprio = ioprio;
	ioc_set_changed(ioc, CIC_IOPRIO_CHANGED);
	spin_unlock_irqrestore(&ioc->lock, flags);
}

/**
 * ioc_cgroup_changed - notify cgroup change
 * @ioc: io_context of interest
 *
 * @ioc's cgroup has changed.  Set %CIC_CGROUP_CHANGED for all cic's.
 * iosched is responsible for checking the bit and applying it on request
 * issue path.
 */
void ioc_cgroup_changed(struct io_context *ioc)
{
	unsigned long flags;

	spin_lock_irqsave(&ioc->lock, flags);
	ioc_set_changed(ioc, CIC_CGROUP_CHANGED);
	spin_unlock_irqrestore(&ioc->lock, flags);
}

A
Adrian Bunk 已提交
316
static int __init blk_ioc_init(void)
J
Jens Axboe 已提交
317 318 319 320 321 322
{
	iocontext_cachep = kmem_cache_create("blkdev_ioc",
			sizeof(struct io_context), 0, SLAB_PANIC, NULL);
	return 0;
}
subsys_initcall(blk_ioc_init);