elevator.c 24.1 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5
/*
 *  Block device elevator/IO-scheduler.
 *
 *  Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
 *
6
 * 30042000 Jens Axboe <axboe@kernel.dk> :
L
Linus Torvalds 已提交
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
 *
 * Split the elevator a bit so that it is possible to choose a different
 * one or even write a new "plug in". There are three pieces:
 * - elevator_fn, inserts a new request in the queue list
 * - elevator_merge_fn, decides whether a new buffer can be merged with
 *   an existing request
 * - elevator_dequeue_fn, called when a request is taken off the active list
 *
 * 20082000 Dave Jones <davej@suse.de> :
 * Removed tests for max-bomb-segments, which was breaking elvtune
 *  when run without -bN
 *
 * Jens:
 * - Rework again to work with bio instead of buffer_heads
 * - loose bi_dev comparisons, partition handling is right now
 * - completely modularize elevator setup and teardown
 *
 */
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/blkdev.h>
#include <linux/elevator.h>
#include <linux/bio.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/compiler.h>
T
Tejun Heo 已提交
34
#include <linux/delay.h>
35
#include <linux/blktrace_api.h>
36
#include <trace/block.h>
37
#include <linux/hash.h>
38
#include <linux/uaccess.h>
L
Linus Torvalds 已提交
39

J
Jens Axboe 已提交
40 41
#include "blk.h"

L
Linus Torvalds 已提交
42 43 44
static DEFINE_SPINLOCK(elv_list_lock);
static LIST_HEAD(elv_list);

45 46
DEFINE_TRACE(block_rq_abort);

47 48 49 50 51
/*
 * Merge hash stuff.
 */
static const int elv_hash_shift = 6;
#define ELV_HASH_BLOCK(sec)	((sec) >> 3)
52 53
#define ELV_HASH_FN(sec)	\
		(hash_long(ELV_HASH_BLOCK((sec)), elv_hash_shift))
54 55 56
#define ELV_HASH_ENTRIES	(1 << elv_hash_shift)
#define rq_hash_key(rq)		((rq)->sector + (rq)->nr_sectors)

57 58 59
DEFINE_TRACE(block_rq_insert);
DEFINE_TRACE(block_rq_issue);

60 61 62 63 64 65
/*
 * Query io scheduler to see if the current process issuing bio may be
 * merged with rq.
 */
static int elv_iosched_allow_merge(struct request *rq, struct bio *bio)
{
66
	struct request_queue *q = rq->q;
J
Jens Axboe 已提交
67
	struct elevator_queue *e = q->elevator;
68 69 70 71 72 73 74

	if (e->ops->elevator_allow_merge_fn)
		return e->ops->elevator_allow_merge_fn(q, rq, bio);

	return 1;
}

L
Linus Torvalds 已提交
75 76 77
/*
 * can we safely merge with this request?
 */
78
int elv_rq_merge_ok(struct request *rq, struct bio *bio)
L
Linus Torvalds 已提交
79 80 81 82
{
	if (!rq_mergeable(rq))
		return 0;

83 84 85 86 87 88
	/*
	 * Don't merge file system requests and discard requests
	 */
	if (bio_discard(bio) != bio_discard(rq->bio))
		return 0;

L
Linus Torvalds 已提交
89 90 91 92 93 94 95
	/*
	 * different data direction or already started, don't merge
	 */
	if (bio_data_dir(bio) != rq_data_dir(rq))
		return 0;

	/*
96
	 * must be same device and not a special request
L
Linus Torvalds 已提交
97
	 */
98
	if (rq->rq_disk != bio->bi_bdev->bd_disk || rq->special)
99 100
		return 0;

101 102 103 104 105 106
	/*
	 * only merge integrity protected bio into ditto rq
	 */
	if (bio_integrity(bio) != blk_integrity_rq(rq))
		return 0;

107 108
	if (!elv_iosched_allow_merge(rq, bio))
		return 0;
L
Linus Torvalds 已提交
109

110
	return 1;
L
Linus Torvalds 已提交
111 112 113
}
EXPORT_SYMBOL(elv_rq_merge_ok);

114
static inline int elv_try_merge(struct request *__rq, struct bio *bio)
L
Linus Torvalds 已提交
115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
{
	int ret = ELEVATOR_NO_MERGE;

	/*
	 * we can merge and sequence is ok, check if it's possible
	 */
	if (elv_rq_merge_ok(__rq, bio)) {
		if (__rq->sector + __rq->nr_sectors == bio->bi_sector)
			ret = ELEVATOR_BACK_MERGE;
		else if (__rq->sector - bio_sectors(bio) == bio->bi_sector)
			ret = ELEVATOR_FRONT_MERGE;
	}

	return ret;
}

static struct elevator_type *elevator_find(const char *name)
{
133
	struct elevator_type *e;
L
Linus Torvalds 已提交
134

135
	list_for_each_entry(e, &elv_list, list) {
136 137
		if (!strcmp(e->elevator_name, name))
			return e;
L
Linus Torvalds 已提交
138 139
	}

140
	return NULL;
L
Linus Torvalds 已提交
141 142 143 144 145 146 147 148 149
}

static void elevator_put(struct elevator_type *e)
{
	module_put(e->elevator_owner);
}

static struct elevator_type *elevator_get(const char *name)
{
150
	struct elevator_type *e;
L
Linus Torvalds 已提交
151

152
	spin_lock(&elv_list_lock);
153 154

	e = elevator_find(name);
155 156 157 158 159 160 161 162 163 164
	if (!e) {
		char elv[ELV_NAME_MAX + strlen("-iosched")];

		spin_unlock(&elv_list_lock);

		if (!strcmp(name, "anticipatory"))
			sprintf(elv, "as-iosched");
		else
			sprintf(elv, "%s-iosched", name);

165
		request_module("%s", elv);
166 167 168 169
		spin_lock(&elv_list_lock);
		e = elevator_find(name);
	}

170 171 172
	if (e && !try_module_get(e->elevator_owner))
		e = NULL;

173
	spin_unlock(&elv_list_lock);
L
Linus Torvalds 已提交
174 175 176 177

	return e;
}

178 179
static void *elevator_init_queue(struct request_queue *q,
				 struct elevator_queue *eq)
L
Linus Torvalds 已提交
180
{
181
	return eq->ops->elevator_init_fn(q);
J
Jens Axboe 已提交
182
}
L
Linus Torvalds 已提交
183

184
static void elevator_attach(struct request_queue *q, struct elevator_queue *eq,
J
Jens Axboe 已提交
185 186
			   void *data)
{
L
Linus Torvalds 已提交
187
	q->elevator = eq;
J
Jens Axboe 已提交
188
	eq->elevator_data = data;
L
Linus Torvalds 已提交
189 190 191 192
}

static char chosen_elevator[16];

193
static int __init elevator_setup(char *str)
L
Linus Torvalds 已提交
194
{
195 196 197 198
	/*
	 * Be backwards-compatible with previous kernels, so users
	 * won't get the wrong elevator.
	 */
199
	if (!strcmp(str, "as"))
200
		strcpy(chosen_elevator, "anticipatory");
Z
Zachary Amsden 已提交
201
	else
202
		strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1);
203
	return 1;
L
Linus Torvalds 已提交
204 205 206 207
}

__setup("elevator=", elevator_setup);

208 209
static struct kobj_type elv_ktype;

J
Jens Axboe 已提交
210
static struct elevator_queue *elevator_alloc(struct request_queue *q,
211
				  struct elevator_type *e)
212
{
J
Jens Axboe 已提交
213
	struct elevator_queue *eq;
214 215
	int i;

J
Jens Axboe 已提交
216
	eq = kmalloc_node(sizeof(*eq), GFP_KERNEL | __GFP_ZERO, q->node);
217 218 219 220 221
	if (unlikely(!eq))
		goto err;

	eq->ops = &e->ops;
	eq->elevator_type = e;
222
	kobject_init(&eq->kobj, &elv_ktype);
223 224
	mutex_init(&eq->sysfs_lock);

225 226
	eq->hash = kmalloc_node(sizeof(struct hlist_head) * ELV_HASH_ENTRIES,
					GFP_KERNEL, q->node);
227 228 229 230 231 232
	if (!eq->hash)
		goto err;

	for (i = 0; i < ELV_HASH_ENTRIES; i++)
		INIT_HLIST_HEAD(&eq->hash[i]);

233
	return eq;
234 235 236 237
err:
	kfree(eq);
	elevator_put(e);
	return NULL;
238 239 240 241
}

static void elevator_release(struct kobject *kobj)
{
J
Jens Axboe 已提交
242
	struct elevator_queue *e;
243

J
Jens Axboe 已提交
244
	e = container_of(kobj, struct elevator_queue, kobj);
245
	elevator_put(e->elevator_type);
246
	kfree(e->hash);
247 248 249
	kfree(e);
}

250
int elevator_init(struct request_queue *q, char *name)
L
Linus Torvalds 已提交
251 252 253 254
{
	struct elevator_type *e = NULL;
	struct elevator_queue *eq;
	int ret = 0;
J
Jens Axboe 已提交
255
	void *data;
L
Linus Torvalds 已提交
256

T
Tejun Heo 已提交
257 258 259 260 261
	INIT_LIST_HEAD(&q->queue_head);
	q->last_merge = NULL;
	q->end_sector = 0;
	q->boundary_rq = NULL;

262 263 264 265 266
	if (name) {
		e = elevator_get(name);
		if (!e)
			return -EINVAL;
	}
L
Linus Torvalds 已提交
267

268 269 270 271 272 273
	if (!e && *chosen_elevator) {
		e = elevator_get(chosen_elevator);
		if (!e)
			printk(KERN_ERR "I/O scheduler %s not found\n",
							chosen_elevator);
	}
274

275 276 277 278 279 280 281 282
	if (!e) {
		e = elevator_get(CONFIG_DEFAULT_IOSCHED);
		if (!e) {
			printk(KERN_ERR
				"Default I/O scheduler not found. " \
				"Using noop.\n");
			e = elevator_get("noop");
		}
283 284
	}

285
	eq = elevator_alloc(q, e);
286
	if (!eq)
L
Linus Torvalds 已提交
287 288
		return -ENOMEM;

J
Jens Axboe 已提交
289 290
	data = elevator_init_queue(q, eq);
	if (!data) {
291
		kobject_put(&eq->kobj);
J
Jens Axboe 已提交
292 293
		return -ENOMEM;
	}
L
Linus Torvalds 已提交
294

J
Jens Axboe 已提交
295
	elevator_attach(q, eq, data);
L
Linus Torvalds 已提交
296 297
	return ret;
}
298 299
EXPORT_SYMBOL(elevator_init);

J
Jens Axboe 已提交
300
void elevator_exit(struct elevator_queue *e)
L
Linus Torvalds 已提交
301
{
302
	mutex_lock(&e->sysfs_lock);
L
Linus Torvalds 已提交
303 304
	if (e->ops->elevator_exit_fn)
		e->ops->elevator_exit_fn(e);
305 306
	e->ops = NULL;
	mutex_unlock(&e->sysfs_lock);
L
Linus Torvalds 已提交
307

308
	kobject_put(&e->kobj);
L
Linus Torvalds 已提交
309
}
310 311
EXPORT_SYMBOL(elevator_exit);

312 313 314 315 316
static inline void __elv_rqhash_del(struct request *rq)
{
	hlist_del_init(&rq->hash);
}

317
static void elv_rqhash_del(struct request_queue *q, struct request *rq)
318 319 320 321 322
{
	if (ELV_ON_HASH(rq))
		__elv_rqhash_del(rq);
}

323
static void elv_rqhash_add(struct request_queue *q, struct request *rq)
324
{
J
Jens Axboe 已提交
325
	struct elevator_queue *e = q->elevator;
326 327 328 329 330

	BUG_ON(ELV_ON_HASH(rq));
	hlist_add_head(&rq->hash, &e->hash[ELV_HASH_FN(rq_hash_key(rq))]);
}

331
static void elv_rqhash_reposition(struct request_queue *q, struct request *rq)
332 333 334 335 336
{
	__elv_rqhash_del(rq);
	elv_rqhash_add(q, rq);
}

337
static struct request *elv_rqhash_find(struct request_queue *q, sector_t offset)
338
{
J
Jens Axboe 已提交
339
	struct elevator_queue *e = q->elevator;
340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358
	struct hlist_head *hash_list = &e->hash[ELV_HASH_FN(offset)];
	struct hlist_node *entry, *next;
	struct request *rq;

	hlist_for_each_entry_safe(rq, entry, next, hash_list, hash) {
		BUG_ON(!ELV_ON_HASH(rq));

		if (unlikely(!rq_mergeable(rq))) {
			__elv_rqhash_del(rq);
			continue;
		}

		if (rq_hash_key(rq) == offset)
			return rq;
	}

	return NULL;
}

359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414
/*
 * RB-tree support functions for inserting/lookup/removal of requests
 * in a sorted RB tree.
 */
struct request *elv_rb_add(struct rb_root *root, struct request *rq)
{
	struct rb_node **p = &root->rb_node;
	struct rb_node *parent = NULL;
	struct request *__rq;

	while (*p) {
		parent = *p;
		__rq = rb_entry(parent, struct request, rb_node);

		if (rq->sector < __rq->sector)
			p = &(*p)->rb_left;
		else if (rq->sector > __rq->sector)
			p = &(*p)->rb_right;
		else
			return __rq;
	}

	rb_link_node(&rq->rb_node, parent, p);
	rb_insert_color(&rq->rb_node, root);
	return NULL;
}
EXPORT_SYMBOL(elv_rb_add);

void elv_rb_del(struct rb_root *root, struct request *rq)
{
	BUG_ON(RB_EMPTY_NODE(&rq->rb_node));
	rb_erase(&rq->rb_node, root);
	RB_CLEAR_NODE(&rq->rb_node);
}
EXPORT_SYMBOL(elv_rb_del);

struct request *elv_rb_find(struct rb_root *root, sector_t sector)
{
	struct rb_node *n = root->rb_node;
	struct request *rq;

	while (n) {
		rq = rb_entry(n, struct request, rb_node);

		if (sector < rq->sector)
			n = n->rb_left;
		else if (sector > rq->sector)
			n = n->rb_right;
		else
			return rq;
	}

	return NULL;
}
EXPORT_SYMBOL(elv_rb_find);

415 416
/*
 * Insert rq into dispatch queue of q.  Queue lock must be held on
U
Uwe Kleine-König 已提交
417
 * entry.  rq is sort instead into the dispatch queue. To be used by
418
 * specific elevators.
419
 */
420
void elv_dispatch_sort(struct request_queue *q, struct request *rq)
421 422 423
{
	sector_t boundary;
	struct list_head *entry;
424
	int stop_flags;
425

426 427
	if (q->last_merge == rq)
		q->last_merge = NULL;
428 429 430

	elv_rqhash_del(q, rq);

431
	q->nr_sorted--;
432

J
Jens Axboe 已提交
433
	boundary = q->end_sector;
434
	stop_flags = REQ_SOFTBARRIER | REQ_HARDBARRIER | REQ_STARTED;
435 436 437
	list_for_each_prev(entry, &q->queue_head) {
		struct request *pos = list_entry_rq(entry);

438 439
		if (blk_discard_rq(rq) != blk_discard_rq(pos))
			break;
440 441
		if (rq_data_dir(rq) != rq_data_dir(pos))
			break;
442
		if (pos->cmd_flags & stop_flags)
443 444 445 446 447 448 449 450 451 452 453 454 455 456
			break;
		if (rq->sector >= boundary) {
			if (pos->sector < boundary)
				continue;
		} else {
			if (pos->sector >= boundary)
				break;
		}
		if (rq->sector >= pos->sector)
			break;
	}

	list_add(&rq->queuelist, entry);
}
457 458
EXPORT_SYMBOL(elv_dispatch_sort);

459
/*
460 461 462
 * Insert rq into dispatch queue of q.  Queue lock must be held on
 * entry.  rq is added to the back of the dispatch queue. To be used by
 * specific elevators.
463 464 465 466 467 468 469 470 471 472 473 474 475 476
 */
void elv_dispatch_add_tail(struct request_queue *q, struct request *rq)
{
	if (q->last_merge == rq)
		q->last_merge = NULL;

	elv_rqhash_del(q, rq);

	q->nr_sorted--;

	q->end_sector = rq_end_sector(rq);
	q->boundary_rq = rq;
	list_add_tail(&rq->queuelist, &q->queue_head);
}
477 478
EXPORT_SYMBOL(elv_dispatch_add_tail);

479
int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
L
Linus Torvalds 已提交
480
{
J
Jens Axboe 已提交
481
	struct elevator_queue *e = q->elevator;
482
	struct request *__rq;
483 484
	int ret;

485 486 487
	/*
	 * First try one-hit cache.
	 */
488 489 490 491 492 493 494
	if (q->last_merge) {
		ret = elv_try_merge(q->last_merge, bio);
		if (ret != ELEVATOR_NO_MERGE) {
			*req = q->last_merge;
			return ret;
		}
	}
L
Linus Torvalds 已提交
495

496 497 498
	if (blk_queue_nomerges(q))
		return ELEVATOR_NO_MERGE;

499 500 501 502 503 504 505 506 507
	/*
	 * See if our hash lookup can find a potential backmerge.
	 */
	__rq = elv_rqhash_find(q, bio->bi_sector);
	if (__rq && elv_rq_merge_ok(__rq, bio)) {
		*req = __rq;
		return ELEVATOR_BACK_MERGE;
	}

L
Linus Torvalds 已提交
508 509 510 511 512 513
	if (e->ops->elevator_merge_fn)
		return e->ops->elevator_merge_fn(q, req, bio);

	return ELEVATOR_NO_MERGE;
}

514
void elv_merged_request(struct request_queue *q, struct request *rq, int type)
L
Linus Torvalds 已提交
515
{
J
Jens Axboe 已提交
516
	struct elevator_queue *e = q->elevator;
L
Linus Torvalds 已提交
517 518

	if (e->ops->elevator_merged_fn)
519
		e->ops->elevator_merged_fn(q, rq, type);
520

521 522
	if (type == ELEVATOR_BACK_MERGE)
		elv_rqhash_reposition(q, rq);
523

524
	q->last_merge = rq;
L
Linus Torvalds 已提交
525 526
}

527
void elv_merge_requests(struct request_queue *q, struct request *rq,
L
Linus Torvalds 已提交
528 529
			     struct request *next)
{
J
Jens Axboe 已提交
530
	struct elevator_queue *e = q->elevator;
L
Linus Torvalds 已提交
531 532 533

	if (e->ops->elevator_merge_req_fn)
		e->ops->elevator_merge_req_fn(q, rq, next);
534

535 536 537 538
	elv_rqhash_reposition(q, rq);
	elv_rqhash_del(q, next);

	q->nr_sorted--;
539
	q->last_merge = rq;
L
Linus Torvalds 已提交
540 541
}

542
void elv_requeue_request(struct request_queue *q, struct request *rq)
L
Linus Torvalds 已提交
543 544 545 546 547
{
	/*
	 * it already went through dequeue, we need to decrement the
	 * in_flight count again
	 */
548
	if (blk_account_rq(rq)) {
L
Linus Torvalds 已提交
549
		q->in_flight--;
550 551
		if (blk_sorted_rq(rq))
			elv_deactivate_rq(q, rq);
552
	}
L
Linus Torvalds 已提交
553

554
	rq->cmd_flags &= ~REQ_STARTED;
L
Linus Torvalds 已提交
555

556
	elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE);
L
Linus Torvalds 已提交
557 558
}

559
void elv_drain_elevator(struct request_queue *q)
560 561 562 563 564 565 566 567 568 569 570 571 572
{
	static int printed;
	while (q->elevator->ops->elevator_dispatch_fn(q, 1))
		;
	if (q->nr_sorted == 0)
		return;
	if (printed++ < 10) {
		printk(KERN_ERR "%s: forced dispatching is broken "
		       "(nr_sorted=%u), please report this\n",
		       q->elevator->elevator_type->elevator_name, q->nr_sorted);
	}
}

J
Jens Axboe 已提交
573 574 575
/*
 * Call with queue lock held, interrupts disabled
 */
J
Jens Axboe 已提交
576
void elv_quiesce_start(struct request_queue *q)
J
Jens Axboe 已提交
577 578 579 580 581 582 583 584
{
	queue_flag_set(QUEUE_FLAG_ELVSWITCH, q);

	/*
	 * make sure we don't have any requests in flight
	 */
	elv_drain_elevator(q);
	while (q->rq.elvpriv) {
T
Tejun Heo 已提交
585
		__blk_run_queue(q);
J
Jens Axboe 已提交
586 587 588 589 590 591 592
		spin_unlock_irq(q->queue_lock);
		msleep(10);
		spin_lock_irq(q->queue_lock);
		elv_drain_elevator(q);
	}
}

J
Jens Axboe 已提交
593
void elv_quiesce_end(struct request_queue *q)
J
Jens Axboe 已提交
594 595 596 597
{
	queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q);
}

598
void elv_insert(struct request_queue *q, struct request *rq, int where)
L
Linus Torvalds 已提交
599
{
600 601
	struct list_head *pos;
	unsigned ordseq;
J
Jens Axboe 已提交
602
	int unplug_it = 1;
603

604
	trace_block_rq_insert(q, rq);
605

L
Linus Torvalds 已提交
606 607
	rq->q = q;

608 609
	switch (where) {
	case ELEVATOR_INSERT_FRONT:
610
		rq->cmd_flags |= REQ_SOFTBARRIER;
611 612 613 614 615

		list_add(&rq->queuelist, &q->queue_head);
		break;

	case ELEVATOR_INSERT_BACK:
616
		rq->cmd_flags |= REQ_SOFTBARRIER;
617
		elv_drain_elevator(q);
618 619 620 621 622 623 624 625 626 627 628
		list_add_tail(&rq->queuelist, &q->queue_head);
		/*
		 * We kick the queue here for the following reasons.
		 * - The elevator might have returned NULL previously
		 *   to delay requests and returned them now.  As the
		 *   queue wasn't empty before this request, ll_rw_blk
		 *   won't run the queue on return, resulting in hang.
		 * - Usually, back inserted requests won't be merged
		 *   with anything.  There's no point in delaying queue
		 *   processing.
		 */
T
Tejun Heo 已提交
629
		__blk_run_queue(q);
630 631 632
		break;

	case ELEVATOR_INSERT_SORT:
633
		BUG_ON(!blk_fs_request(rq) && !blk_discard_rq(rq));
634
		rq->cmd_flags |= REQ_SORTED;
635
		q->nr_sorted++;
636 637 638 639 640 641
		if (rq_mergeable(rq)) {
			elv_rqhash_add(q, rq);
			if (!q->last_merge)
				q->last_merge = rq;
		}

642 643 644 645 646 647
		/*
		 * Some ioscheds (cfq) run q->request_fn directly, so
		 * rq cannot be accessed after calling
		 * elevator_add_req_fn.
		 */
		q->elevator->ops->elevator_add_req_fn(q, rq);
648 649
		break;

650 651 652 653 654 655
	case ELEVATOR_INSERT_REQUEUE:
		/*
		 * If ordered flush isn't in progress, we do front
		 * insertion; otherwise, requests should be requeued
		 * in ordseq order.
		 */
656
		rq->cmd_flags |= REQ_SOFTBARRIER;
657

658 659 660 661 662 663
		/*
		 * Most requeues happen because of a busy condition,
		 * don't force unplug of the queue for that case.
		 */
		unplug_it = 0;

664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679
		if (q->ordseq == 0) {
			list_add(&rq->queuelist, &q->queue_head);
			break;
		}

		ordseq = blk_ordered_req_seq(rq);

		list_for_each(pos, &q->queue_head) {
			struct request *pos_rq = list_entry_rq(pos);
			if (ordseq <= blk_ordered_req_seq(pos_rq))
				break;
		}

		list_add_tail(&rq->queuelist, pos);
		break;

680 681
	default:
		printk(KERN_ERR "%s: bad insertion point %d\n",
682
		       __func__, where);
683 684 685
		BUG();
	}

J
Jens Axboe 已提交
686
	if (unplug_it && blk_queue_plugged(q)) {
687
		int nrq = q->rq.count[BLK_RW_SYNC] + q->rq.count[BLK_RW_ASYNC]
688 689 690 691 692
			- q->in_flight;

		if (nrq >= q->unplug_thresh)
			__generic_unplug_device(q);
	}
L
Linus Torvalds 已提交
693 694
}

695
void __elv_add_request(struct request_queue *q, struct request *rq, int where,
696 697 698
		       int plug)
{
	if (q->ordcolor)
699
		rq->cmd_flags |= REQ_ORDERED_COLOR;
700

701
	if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717
		/*
		 * toggle ordered color
		 */
		if (blk_barrier_rq(rq))
			q->ordcolor ^= 1;

		/*
		 * barriers implicitly indicate back insertion
		 */
		if (where == ELEVATOR_INSERT_SORT)
			where = ELEVATOR_INSERT_BACK;

		/*
		 * this request is scheduling boundary, update
		 * end_sector
		 */
718
		if (blk_fs_request(rq) || blk_discard_rq(rq)) {
719 720 721
			q->end_sector = rq_end_sector(rq);
			q->boundary_rq = rq;
		}
722 723
	} else if (!(rq->cmd_flags & REQ_ELVPRIV) &&
		    where == ELEVATOR_INSERT_SORT)
724 725 726 727 728 729 730
		where = ELEVATOR_INSERT_BACK;

	if (plug)
		blk_plug_device(q);

	elv_insert(q, rq, where);
}
731 732
EXPORT_SYMBOL(__elv_add_request);

733
void elv_add_request(struct request_queue *q, struct request *rq, int where,
L
Linus Torvalds 已提交
734 735 736 737 738 739 740 741
		     int plug)
{
	unsigned long flags;

	spin_lock_irqsave(q->queue_lock, flags);
	__elv_add_request(q, rq, where, plug);
	spin_unlock_irqrestore(q->queue_lock, flags);
}
742 743
EXPORT_SYMBOL(elv_add_request);

744
int elv_queue_empty(struct request_queue *q)
L
Linus Torvalds 已提交
745
{
J
Jens Axboe 已提交
746
	struct elevator_queue *e = q->elevator;
L
Linus Torvalds 已提交
747

748 749 750
	if (!list_empty(&q->queue_head))
		return 0;

L
Linus Torvalds 已提交
751 752 753
	if (e->ops->elevator_queue_empty_fn)
		return e->ops->elevator_queue_empty_fn(q);

754
	return 1;
L
Linus Torvalds 已提交
755
}
756 757
EXPORT_SYMBOL(elv_queue_empty);

758
struct request *elv_latter_request(struct request_queue *q, struct request *rq)
L
Linus Torvalds 已提交
759
{
J
Jens Axboe 已提交
760
	struct elevator_queue *e = q->elevator;
L
Linus Torvalds 已提交
761 762 763 764 765 766

	if (e->ops->elevator_latter_req_fn)
		return e->ops->elevator_latter_req_fn(q, rq);
	return NULL;
}

767
struct request *elv_former_request(struct request_queue *q, struct request *rq)
L
Linus Torvalds 已提交
768
{
J
Jens Axboe 已提交
769
	struct elevator_queue *e = q->elevator;
L
Linus Torvalds 已提交
770 771 772 773 774 775

	if (e->ops->elevator_former_req_fn)
		return e->ops->elevator_former_req_fn(q, rq);
	return NULL;
}

776
int elv_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
L
Linus Torvalds 已提交
777
{
J
Jens Axboe 已提交
778
	struct elevator_queue *e = q->elevator;
L
Linus Torvalds 已提交
779 780

	if (e->ops->elevator_set_req_fn)
781
		return e->ops->elevator_set_req_fn(q, rq, gfp_mask);
L
Linus Torvalds 已提交
782 783 784 785 786

	rq->elevator_private = NULL;
	return 0;
}

787
void elv_put_request(struct request_queue *q, struct request *rq)
L
Linus Torvalds 已提交
788
{
J
Jens Axboe 已提交
789
	struct elevator_queue *e = q->elevator;
L
Linus Torvalds 已提交
790 791

	if (e->ops->elevator_put_req_fn)
792
		e->ops->elevator_put_req_fn(rq);
L
Linus Torvalds 已提交
793 794
}

795
int elv_may_queue(struct request_queue *q, int rw)
L
Linus Torvalds 已提交
796
{
J
Jens Axboe 已提交
797
	struct elevator_queue *e = q->elevator;
L
Linus Torvalds 已提交
798 799

	if (e->ops->elevator_may_queue_fn)
800
		return e->ops->elevator_may_queue_fn(q, rw);
L
Linus Torvalds 已提交
801 802 803 804

	return ELV_MQUEUE_MAY;
}

805 806 807 808 809 810 811
void elv_abort_queue(struct request_queue *q)
{
	struct request *rq;

	while (!list_empty(&q->queue_head)) {
		rq = list_entry_rq(q->queue_head.next);
		rq->cmd_flags |= REQ_QUIET;
812
		trace_block_rq_abort(q, rq);
813
		__blk_end_request(rq, -EIO, blk_rq_bytes(rq));
814 815 816 817
	}
}
EXPORT_SYMBOL(elv_abort_queue);

818
void elv_completed_request(struct request_queue *q, struct request *rq)
L
Linus Torvalds 已提交
819
{
J
Jens Axboe 已提交
820
	struct elevator_queue *e = q->elevator;
L
Linus Torvalds 已提交
821 822 823 824

	/*
	 * request is released from the driver, io must be done
	 */
825
	if (blk_account_rq(rq)) {
L
Linus Torvalds 已提交
826
		q->in_flight--;
827 828 829
		if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn)
			e->ops->elevator_completed_req_fn(q, rq);
	}
830

831 832 833 834 835
	/*
	 * Check if the queue is waiting for fs requests to be
	 * drained for flush sequence.
	 */
	if (unlikely(q->ordseq)) {
836 837 838 839 840 841
		struct request *next = NULL;

		if (!list_empty(&q->queue_head))
			next = list_entry_rq(q->queue_head.next);

		if (!q->in_flight &&
842
		    blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN &&
843
		    (!next || blk_ordered_req_seq(next) > QUEUE_ORDSEQ_DRAIN)) {
844
			blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0);
T
Tejun Heo 已提交
845
			__blk_run_queue(q);
846
		}
847
	}
L
Linus Torvalds 已提交
848 849
}

850 851 852 853
#define to_elv(atr) container_of((atr), struct elv_fs_entry, attr)

static ssize_t
elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
L
Linus Torvalds 已提交
854
{
855
	struct elv_fs_entry *entry = to_elv(attr);
J
Jens Axboe 已提交
856
	struct elevator_queue *e;
857 858 859 860 861
	ssize_t error;

	if (!entry->show)
		return -EIO;

J
Jens Axboe 已提交
862
	e = container_of(kobj, struct elevator_queue, kobj);
863 864 865 866 867
	mutex_lock(&e->sysfs_lock);
	error = e->ops ? entry->show(e, page) : -ENOENT;
	mutex_unlock(&e->sysfs_lock);
	return error;
}
L
Linus Torvalds 已提交
868

869 870 871 872 873
static ssize_t
elv_attr_store(struct kobject *kobj, struct attribute *attr,
	       const char *page, size_t length)
{
	struct elv_fs_entry *entry = to_elv(attr);
J
Jens Axboe 已提交
874
	struct elevator_queue *e;
875
	ssize_t error;
L
Linus Torvalds 已提交
876

877 878
	if (!entry->store)
		return -EIO;
L
Linus Torvalds 已提交
879

J
Jens Axboe 已提交
880
	e = container_of(kobj, struct elevator_queue, kobj);
881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898
	mutex_lock(&e->sysfs_lock);
	error = e->ops ? entry->store(e, page, length) : -ENOENT;
	mutex_unlock(&e->sysfs_lock);
	return error;
}

static struct sysfs_ops elv_sysfs_ops = {
	.show	= elv_attr_show,
	.store	= elv_attr_store,
};

static struct kobj_type elv_ktype = {
	.sysfs_ops	= &elv_sysfs_ops,
	.release	= elevator_release,
};

int elv_register_queue(struct request_queue *q)
{
J
Jens Axboe 已提交
899
	struct elevator_queue *e = q->elevator;
900 901
	int error;

902
	error = kobject_add(&e->kobj, &q->kobj, "%s", "iosched");
903
	if (!error) {
904
		struct elv_fs_entry *attr = e->elevator_type->elevator_attrs;
905
		if (attr) {
906 907
			while (attr->attr.name) {
				if (sysfs_create_file(&e->kobj, &attr->attr))
908
					break;
909
				attr++;
910 911 912 913 914
			}
		}
		kobject_uevent(&e->kobj, KOBJ_ADD);
	}
	return error;
L
Linus Torvalds 已提交
915 916
}

J
Jens Axboe 已提交
917
static void __elv_unregister_queue(struct elevator_queue *e)
J
Jens Axboe 已提交
918 919 920 921 922
{
	kobject_uevent(&e->kobj, KOBJ_REMOVE);
	kobject_del(&e->kobj);
}

L
Linus Torvalds 已提交
923 924
void elv_unregister_queue(struct request_queue *q)
{
J
Jens Axboe 已提交
925 926
	if (q)
		__elv_unregister_queue(q->elevator);
L
Linus Torvalds 已提交
927 928
}

929
void elv_register(struct elevator_type *e)
L
Linus Torvalds 已提交
930
{
931
	char *def = "";
932 933

	spin_lock(&elv_list_lock);
934
	BUG_ON(elevator_find(e->elevator_name));
L
Linus Torvalds 已提交
935
	list_add_tail(&e->list, &elv_list);
936
	spin_unlock(&elv_list_lock);
L
Linus Torvalds 已提交
937

938 939 940
	if (!strcmp(e->elevator_name, chosen_elevator) ||
			(!*chosen_elevator &&
			 !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED)))
941 942
				def = " (default)";

943 944
	printk(KERN_INFO "io scheduler %s registered%s\n", e->elevator_name,
								def);
L
Linus Torvalds 已提交
945 946 947 948 949
}
EXPORT_SYMBOL_GPL(elv_register);

void elv_unregister(struct elevator_type *e)
{
950 951 952 953 954
	struct task_struct *g, *p;

	/*
	 * Iterate every thread in the process to remove the io contexts.
	 */
955 956 957 958
	if (e->ops.trim) {
		read_lock(&tasklist_lock);
		do_each_thread(g, p) {
			task_lock(p);
959 960
			if (p->io_context)
				e->ops.trim(p->io_context);
961 962 963 964
			task_unlock(p);
		} while_each_thread(g, p);
		read_unlock(&tasklist_lock);
	}
965

966
	spin_lock(&elv_list_lock);
L
Linus Torvalds 已提交
967
	list_del_init(&e->list);
968
	spin_unlock(&elv_list_lock);
L
Linus Torvalds 已提交
969 970 971 972 973 974 975
}
EXPORT_SYMBOL_GPL(elv_unregister);

/*
 * switch to new_e io scheduler. be careful not to introduce deadlocks -
 * we don't free the old io scheduler, before we have allocated what we
 * need for the new one. this way we have a chance of going back to the old
T
Tejun Heo 已提交
976
 * one, if the new one fails init for some reason.
L
Linus Torvalds 已提交
977
 */
978
static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
L
Linus Torvalds 已提交
979
{
J
Jens Axboe 已提交
980
	struct elevator_queue *old_elevator, *e;
J
Jens Axboe 已提交
981
	void *data;
L
Linus Torvalds 已提交
982

T
Tejun Heo 已提交
983 984 985
	/*
	 * Allocate new elevator
	 */
986
	e = elevator_alloc(q, new_e);
L
Linus Torvalds 已提交
987
	if (!e)
988
		return 0;
L
Linus Torvalds 已提交
989

J
Jens Axboe 已提交
990 991 992 993 994 995
	data = elevator_init_queue(q, e);
	if (!data) {
		kobject_put(&e->kobj);
		return 0;
	}

L
Linus Torvalds 已提交
996
	/*
T
Tejun Heo 已提交
997
	 * Turn on BYPASS and drain all requests w/ elevator private data
L
Linus Torvalds 已提交
998
	 */
T
Tejun Heo 已提交
999
	spin_lock_irq(q->queue_lock);
J
Jens Axboe 已提交
1000
	elv_quiesce_start(q);
T
Tejun Heo 已提交
1001

L
Linus Torvalds 已提交
1002
	/*
J
Jens Axboe 已提交
1003
	 * Remember old elevator.
L
Linus Torvalds 已提交
1004 1005 1006 1007 1008 1009
	 */
	old_elevator = q->elevator;

	/*
	 * attach and start new elevator
	 */
J
Jens Axboe 已提交
1010 1011 1012 1013 1014
	elevator_attach(q, e, data);

	spin_unlock_irq(q->queue_lock);

	__elv_unregister_queue(old_elevator);
L
Linus Torvalds 已提交
1015 1016 1017 1018 1019

	if (elv_register_queue(q))
		goto fail_register;

	/*
T
Tejun Heo 已提交
1020
	 * finally exit old elevator and turn off BYPASS.
L
Linus Torvalds 已提交
1021 1022
	 */
	elevator_exit(old_elevator);
N
Nick Piggin 已提交
1023
	spin_lock_irq(q->queue_lock);
J
Jens Axboe 已提交
1024
	elv_quiesce_end(q);
N
Nick Piggin 已提交
1025 1026
	spin_unlock_irq(q->queue_lock);

1027 1028
	blk_add_trace_msg(q, "elv switch: %s", e->elevator_type->elevator_name);

1029
	return 1;
L
Linus Torvalds 已提交
1030 1031 1032 1033 1034 1035 1036 1037 1038

fail_register:
	/*
	 * switch failed, exit the new io scheduler and reattach the old
	 * one again (along with re-adding the sysfs dir)
	 */
	elevator_exit(e);
	q->elevator = old_elevator;
	elv_register_queue(q);
N
Nick Piggin 已提交
1039 1040 1041 1042 1043

	spin_lock_irq(q->queue_lock);
	queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q);
	spin_unlock_irq(q->queue_lock);

1044
	return 0;
L
Linus Torvalds 已提交
1045 1046
}

1047 1048
ssize_t elv_iosched_store(struct request_queue *q, const char *name,
			  size_t count)
L
Linus Torvalds 已提交
1049 1050 1051 1052
{
	char elevator_name[ELV_NAME_MAX];
	struct elevator_type *e;

1053 1054
	strlcpy(elevator_name, name, sizeof(elevator_name));
	strstrip(elevator_name);
L
Linus Torvalds 已提交
1055 1056 1057 1058 1059 1060 1061

	e = elevator_get(elevator_name);
	if (!e) {
		printk(KERN_ERR "elevator: type %s not found\n", elevator_name);
		return -EINVAL;
	}

1062 1063
	if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) {
		elevator_put(e);
L
Linus Torvalds 已提交
1064
		return count;
1065
	}
L
Linus Torvalds 已提交
1066

1067
	if (!elevator_switch(q, e))
1068 1069
		printk(KERN_ERR "elevator: switch to %s failed\n",
							elevator_name);
L
Linus Torvalds 已提交
1070 1071 1072
	return count;
}

1073
ssize_t elv_iosched_show(struct request_queue *q, char *name)
L
Linus Torvalds 已提交
1074
{
J
Jens Axboe 已提交
1075
	struct elevator_queue *e = q->elevator;
L
Linus Torvalds 已提交
1076
	struct elevator_type *elv = e->elevator_type;
1077
	struct elevator_type *__e;
L
Linus Torvalds 已提交
1078 1079
	int len = 0;

1080
	spin_lock(&elv_list_lock);
1081
	list_for_each_entry(__e, &elv_list, list) {
L
Linus Torvalds 已提交
1082 1083 1084 1085 1086
		if (!strcmp(elv->elevator_name, __e->elevator_name))
			len += sprintf(name+len, "[%s] ", elv->elevator_name);
		else
			len += sprintf(name+len, "%s ", __e->elevator_name);
	}
1087
	spin_unlock(&elv_list_lock);
L
Linus Torvalds 已提交
1088 1089 1090 1091 1092

	len += sprintf(len+name, "\n");
	return len;
}

1093 1094
struct request *elv_rb_former_request(struct request_queue *q,
				      struct request *rq)
1095 1096 1097 1098 1099 1100 1101 1102 1103 1104
{
	struct rb_node *rbprev = rb_prev(&rq->rb_node);

	if (rbprev)
		return rb_entry_rq(rbprev);

	return NULL;
}
EXPORT_SYMBOL(elv_rb_former_request);

1105 1106
struct request *elv_rb_latter_request(struct request_queue *q,
				      struct request *rq)
1107 1108 1109 1110 1111 1112 1113 1114 1115
{
	struct rb_node *rbnext = rb_next(&rq->rb_node);

	if (rbnext)
		return rb_entry_rq(rbnext);

	return NULL;
}
EXPORT_SYMBOL(elv_rb_latter_request);