blk-merge.c 30.5 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
2 3 4 5 6 7 8 9 10
/*
 * Functions related to segment and merge handling
 */
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/scatterlist.h>

11 12
#include <trace/events/block.h>

13
#include "blk.h"
14
#include "blk-rq-qos.h"
15

16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
static inline bool bio_will_gap(struct request_queue *q,
		struct request *prev_rq, struct bio *prev, struct bio *next)
{
	struct bio_vec pb, nb;

	if (!bio_has_data(prev) || !queue_virt_boundary(q))
		return false;

	/*
	 * Don't merge if the 1st bio starts with non-zero offset, otherwise it
	 * is quite difficult to respect the sg gap limit.  We work hard to
	 * merge a huge number of small single bios in case of mkfs.
	 */
	if (prev_rq)
		bio_get_first_bvec(prev_rq->bio, &pb);
	else
		bio_get_first_bvec(prev, &pb);
33
	if (pb.bv_offset & queue_virt_boundary(q))
34 35 36 37 38 39 40 41 42 43 44 45 46
		return true;

	/*
	 * We don't need to worry about the situation that the merged segment
	 * ends in unaligned virt boundary:
	 *
	 * - if 'pb' ends aligned, the merged segment ends aligned
	 * - if 'pb' ends unaligned, the next bio must include
	 *   one single bvec of 'nb', otherwise the 'nb' can't
	 *   merge with 'pb'
	 */
	bio_get_last_bvec(prev, &pb);
	bio_get_first_bvec(next, &nb);
47
	if (biovec_phys_mergeable(q, &pb, &nb))
48 49 50 51 52 53 54 55 56 57 58 59 60 61
		return false;
	return __bvec_gap_to_prev(q, &pb, nb.bv_offset);
}

static inline bool req_gap_back_merge(struct request *req, struct bio *bio)
{
	return bio_will_gap(req->q, req, req->biotail, bio);
}

static inline bool req_gap_front_merge(struct request *req, struct bio *bio)
{
	return bio_will_gap(req->q, NULL, bio, req->bio);
}

62 63
static struct bio *blk_bio_discard_split(struct request_queue *q,
					 struct bio *bio,
64 65
					 struct bio_set *bs,
					 unsigned *nsegs)
66 67 68 69 70 71
{
	unsigned int max_discard_sectors, granularity;
	int alignment;
	sector_t tmp;
	unsigned split_sectors;

72 73
	*nsegs = 1;

74 75 76
	/* Zero-sector (unknown) and one-sector granularities are the same.  */
	granularity = max(q->limits.discard_granularity >> 9, 1U);

77 78
	max_discard_sectors = min(q->limits.max_discard_sectors,
			bio_allowed_max_sectors(q));
79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
	max_discard_sectors -= max_discard_sectors % granularity;

	if (unlikely(!max_discard_sectors)) {
		/* XXX: warn */
		return NULL;
	}

	if (bio_sectors(bio) <= max_discard_sectors)
		return NULL;

	split_sectors = max_discard_sectors;

	/*
	 * If the next starting sector would be misaligned, stop the discard at
	 * the previous aligned sector.
	 */
	alignment = (q->limits.discard_alignment >> 9) % granularity;

	tmp = bio->bi_iter.bi_sector + split_sectors - alignment;
	tmp = sector_div(tmp, granularity);

	if (split_sectors > tmp)
		split_sectors -= tmp;

	return bio_split(bio, split_sectors, GFP_NOIO, bs);
}

106 107 108
static struct bio *blk_bio_write_zeroes_split(struct request_queue *q,
		struct bio *bio, struct bio_set *bs, unsigned *nsegs)
{
109
	*nsegs = 0;
110 111 112 113 114 115 116 117 118 119

	if (!q->limits.max_write_zeroes_sectors)
		return NULL;

	if (bio_sectors(bio) <= q->limits.max_write_zeroes_sectors)
		return NULL;

	return bio_split(bio, q->limits.max_write_zeroes_sectors, GFP_NOIO, bs);
}

120 121
static struct bio *blk_bio_write_same_split(struct request_queue *q,
					    struct bio *bio,
122 123
					    struct bio_set *bs,
					    unsigned *nsegs)
124
{
125 126
	*nsegs = 1;

127 128 129 130 131 132 133 134 135
	if (!q->limits.max_write_same_sectors)
		return NULL;

	if (bio_sectors(bio) <= q->limits.max_write_same_sectors)
		return NULL;

	return bio_split(bio, q->limits.max_write_same_sectors, GFP_NOIO, bs);
}

136 137 138 139 140 141 142 143
/*
 * Return the maximum number of sectors from the start of a bio that may be
 * submitted as a single request to a block device. If enough sectors remain,
 * align the end to the physical block size. Otherwise align the end to the
 * logical block size. This approach minimizes the number of non-aligned
 * requests that are submitted to a block device if the start of a bio is not
 * aligned to a physical block boundary.
 */
144 145 146
static inline unsigned get_max_io_size(struct request_queue *q,
				       struct bio *bio)
{
M
Mike Snitzer 已提交
147
	unsigned sectors = blk_max_size_offset(q, bio->bi_iter.bi_sector, 0);
148 149 150 151
	unsigned max_sectors = sectors;
	unsigned pbs = queue_physical_block_size(q) >> SECTOR_SHIFT;
	unsigned lbs = queue_logical_block_size(q) >> SECTOR_SHIFT;
	unsigned start_offset = bio->bi_iter.bi_sector & (pbs - 1);
152

153 154 155 156
	max_sectors += start_offset;
	max_sectors &= ~(pbs - 1);
	if (max_sectors > start_offset)
		return max_sectors - start_offset;
157

K
Keith Busch 已提交
158
	return sectors & ~(lbs - 1);
159 160
}

161 162 163
static inline unsigned get_max_segment_size(const struct request_queue *q,
					    struct page *start_page,
					    unsigned long offset)
164 165 166
{
	unsigned long mask = queue_segment_boundary(q);

167
	offset = mask & (page_to_phys(start_page) + offset);
168 169 170 171 172 173 174

	/*
	 * overflow may be triggered in case of zero page physical address
	 * on 32bit arch, use queue's max segment size when that happens.
	 */
	return min_not_zero(mask - offset + 1,
			(unsigned long)queue_max_segment_size(q));
175 176
}

177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195
/**
 * bvec_split_segs - verify whether or not a bvec should be split in the middle
 * @q:        [in] request queue associated with the bio associated with @bv
 * @bv:       [in] bvec to examine
 * @nsegs:    [in,out] Number of segments in the bio being built. Incremented
 *            by the number of segments from @bv that may be appended to that
 *            bio without exceeding @max_segs
 * @sectors:  [in,out] Number of sectors in the bio being built. Incremented
 *            by the number of sectors from @bv that may be appended to that
 *            bio without exceeding @max_sectors
 * @max_segs: [in] upper bound for *@nsegs
 * @max_sectors: [in] upper bound for *@sectors
 *
 * When splitting a bio, it can happen that a bvec is encountered that is too
 * big to fit in a single segment and hence that it has to be split in the
 * middle. This function verifies whether or not that should happen. The value
 * %true is returned if and only if appending the entire @bv to a bio with
 * *@nsegs segments and *@sectors sectors would make that bio unacceptable for
 * the block driver.
196
 */
197 198
static bool bvec_split_segs(const struct request_queue *q,
			    const struct bio_vec *bv, unsigned *nsegs,
199 200
			    unsigned *sectors, unsigned max_segs,
			    unsigned max_sectors)
201
{
202 203
	unsigned max_len = (min(max_sectors, UINT_MAX >> 9) - *sectors) << 9;
	unsigned len = min(bv->bv_len, max_len);
204
	unsigned total_len = 0;
205
	unsigned seg_size = 0;
206

207
	while (len && *nsegs < max_segs) {
208 209
		seg_size = get_max_segment_size(q, bv->bv_page,
						bv->bv_offset + total_len);
210 211
		seg_size = min(seg_size, len);

212
		(*nsegs)++;
213 214 215 216 217 218 219
		total_len += seg_size;
		len -= seg_size;

		if ((bv->bv_offset + total_len) & queue_virt_boundary(q))
			break;
	}

220
	*sectors += total_len >> 9;
221

222 223
	/* tell the caller to split the bvec if it is too big to fit */
	return len > 0 || bv->bv_len > max_len;
224 225
}

226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244
/**
 * blk_bio_segment_split - split a bio in two bios
 * @q:    [in] request queue pointer
 * @bio:  [in] bio to be split
 * @bs:	  [in] bio set to allocate the clone from
 * @segs: [out] number of segments in the bio with the first half of the sectors
 *
 * Clone @bio, update the bi_iter of the clone to represent the first sectors
 * of @bio and update @bio->bi_iter to represent the remaining sectors. The
 * following is guaranteed for the cloned bio:
 * - That it has at most get_max_io_size(@q, @bio) sectors.
 * - That it has at most queue_max_segments(@q) segments.
 *
 * Except for discard requests the cloned bio will point at the bi_io_vec of
 * the original bio. It is the responsibility of the caller to ensure that the
 * original bio is not freed before the cloned bio. The caller is also
 * responsible for ensuring that @bs is only destroyed after processing of the
 * split bio has finished.
 */
245 246
static struct bio *blk_bio_segment_split(struct request_queue *q,
					 struct bio *bio,
247 248
					 struct bio_set *bs,
					 unsigned *segs)
249
{
250
	struct bio_vec bv, bvprv, *bvprvp = NULL;
251
	struct bvec_iter iter;
252
	unsigned nsegs = 0, sectors = 0;
253
	const unsigned max_sectors = get_max_io_size(q, bio);
254
	const unsigned max_segs = queue_max_segments(q);
255

256
	bio_for_each_bvec(bv, bio, iter) {
257 258 259 260
		/*
		 * If the queue doesn't support SG gaps and adding this
		 * offset would create a gap, disallow it.
		 */
261
		if (bvprvp && bvec_gap_to_prev(q, bvprvp, bv.bv_offset))
262 263
			goto split;

264 265 266 267 268 269 270
		if (nsegs < max_segs &&
		    sectors + (bv.bv_len >> 9) <= max_sectors &&
		    bv.bv_offset + bv.bv_len <= PAGE_SIZE) {
			nsegs++;
			sectors += bv.bv_len >> 9;
		} else if (bvec_split_segs(q, &bv, &nsegs, &sectors, max_segs,
					 max_sectors)) {
271
			goto split;
272 273
		}

274
		bvprv = bv;
M
Ming Lei 已提交
275
		bvprvp = &bvprv;
276 277
	}

278 279
	*segs = nsegs;
	return NULL;
280
split:
281
	*segs = nsegs;
J
Jeffle Xu 已提交
282 283 284 285 286 287 288 289

	/*
	 * Bio splitting may cause subtle trouble such as hang when doing sync
	 * iopoll in direct IO routine. Given performance gain of iopoll for
	 * big IO can be trival, disable iopoll when split needed.
	 */
	bio->bi_opf &= ~REQ_HIPRI;

290
	return bio_split(bio, sectors, GFP_NOIO, bs);
291 292
}

293 294 295 296 297 298 299 300
/**
 * __blk_queue_split - split a bio and submit the second half
 * @bio:     [in, out] bio to be split
 * @nr_segs: [out] number of segments in the first bio
 *
 * Split a bio into two bios, chain the two bios, submit the second half and
 * store a pointer to the first half in *@bio. If the second bio is still too
 * big it will be split by a recursive call to this function. Since this
301 302 303
 * function may allocate a new bio from q->bio_split, it is the responsibility
 * of the caller to ensure that q->bio_split is only released after processing
 * of the split bio has finished.
304
 */
305
void __blk_queue_split(struct bio **bio, unsigned int *nr_segs)
306
{
307
	struct request_queue *q = (*bio)->bi_bdev->bd_disk->queue;
308
	struct bio *split = NULL;
309

A
Adrian Hunter 已提交
310 311 312
	switch (bio_op(*bio)) {
	case REQ_OP_DISCARD:
	case REQ_OP_SECURE_ERASE:
313
		split = blk_bio_discard_split(q, *bio, &q->bio_split, nr_segs);
A
Adrian Hunter 已提交
314
		break;
315
	case REQ_OP_WRITE_ZEROES:
316 317
		split = blk_bio_write_zeroes_split(q, *bio, &q->bio_split,
				nr_segs);
318
		break;
A
Adrian Hunter 已提交
319
	case REQ_OP_WRITE_SAME:
320 321
		split = blk_bio_write_same_split(q, *bio, &q->bio_split,
				nr_segs);
A
Adrian Hunter 已提交
322 323
		break;
	default:
324 325 326 327 328 329 330 331 332 333
		/*
		 * All drivers must accept single-segments bios that are <=
		 * PAGE_SIZE.  This is a quick and dirty check that relies on
		 * the fact that bi_io_vec[0] is always valid if a bio has data.
		 * The check might lead to occasional false negatives when bios
		 * are cloned, but compared to the performance impact of cloned
		 * bios themselves the loop below doesn't matter anyway.
		 */
		if (!q->limits.chunk_sectors &&
		    (*bio)->bi_vcnt == 1 &&
334
		    ((*bio)->bi_io_vec[0].bv_len +
335
		     (*bio)->bi_io_vec[0].bv_offset) <= PAGE_SIZE) {
336 337 338
			*nr_segs = 1;
			break;
		}
339
		split = blk_bio_segment_split(q, *bio, &q->bio_split, nr_segs);
A
Adrian Hunter 已提交
340 341
		break;
	}
342

343
	if (split) {
M
Ming Lei 已提交
344
		/* there isn't chance to merge the splitted bio */
J
Jens Axboe 已提交
345
		split->bi_opf |= REQ_NOMERGE;
M
Ming Lei 已提交
346

347
		bio_chain(split, *bio);
348
		trace_block_split(split, (*bio)->bi_iter.bi_sector);
349
		submit_bio_noacct(*bio);
350 351 352
		*bio = split;
	}
}
353

354 355 356 357 358 359
/**
 * blk_queue_split - split a bio and submit the second half
 * @bio: [in, out] bio to be split
 *
 * Split a bio into two bios, chains the two bios, submit the second half and
 * store a pointer to the first half in *@bio. Since this function may allocate
360 361 362
 * a new bio from q->bio_split, it is the responsibility of the caller to ensure
 * that q->bio_split is only released after processing of the split bio has
 * finished.
363
 */
364
void blk_queue_split(struct bio **bio)
365 366 367
{
	unsigned int nr_segs;

368
	__blk_queue_split(bio, &nr_segs);
369
}
370 371
EXPORT_SYMBOL(blk_queue_split);

372
unsigned int blk_recalc_rq_segments(struct request *rq)
373
{
374
	unsigned int nr_phys_segs = 0;
375
	unsigned int nr_sectors = 0;
376
	struct req_iterator iter;
377
	struct bio_vec bv;
378

379
	if (!rq->bio)
380
		return 0;
381

382
	switch (bio_op(rq->bio)) {
383 384
	case REQ_OP_DISCARD:
	case REQ_OP_SECURE_ERASE:
385 386 387 388 389 390 391 392
		if (queue_max_discard_segments(rq->q) > 1) {
			struct bio *bio = rq->bio;

			for_each_bio(bio)
				nr_phys_segs++;
			return nr_phys_segs;
		}
		return 1;
393
	case REQ_OP_WRITE_ZEROES:
394 395
		return 0;
	case REQ_OP_WRITE_SAME:
396
		return 1;
397
	}
398

399
	rq_for_each_bvec(bv, rq, iter)
400
		bvec_split_segs(rq->q, &bv, &nr_phys_segs, &nr_sectors,
401
				UINT_MAX, UINT_MAX);
402 403 404
	return nr_phys_segs;
}

405
static inline struct scatterlist *blk_next_sg(struct scatterlist **sg,
406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426
		struct scatterlist *sglist)
{
	if (!*sg)
		return sglist;

	/*
	 * If the driver previously mapped a shorter list, we could see a
	 * termination bit prematurely unless it fully inits the sg table
	 * on each mapping. We KNOW that there must be more entries here
	 * or the driver would be buggy, so force clear the termination bit
	 * to avoid doing a full sg_init_table() in drivers for each command.
	 */
	sg_unmark_end(*sg);
	return sg_next(*sg);
}

static unsigned blk_bvec_map_sg(struct request_queue *q,
		struct bio_vec *bvec, struct scatterlist *sglist,
		struct scatterlist **sg)
{
	unsigned nbytes = bvec->bv_len;
427
	unsigned nsegs = 0, total = 0;
428 429

	while (nbytes > 0) {
430
		unsigned offset = bvec->bv_offset + total;
431 432
		unsigned len = min(get_max_segment_size(q, bvec->bv_page,
					offset), nbytes);
433 434 435 436 437 438 439 440 441 442 443 444
		struct page *page = bvec->bv_page;

		/*
		 * Unfortunately a fair number of drivers barf on scatterlists
		 * that have an offset larger than PAGE_SIZE, despite other
		 * subsystems dealing with that invariant just fine.  For now
		 * stick to the legacy format where we never present those from
		 * the block layer, but the code below should be removed once
		 * these offenders (mostly MMC/SD drivers) are fixed.
		 */
		page += (offset >> PAGE_SHIFT);
		offset &= ~PAGE_MASK;
445 446

		*sg = blk_next_sg(sg, sglist);
447
		sg_set_page(*sg, page, len, offset);
448

449 450
		total += len;
		nbytes -= len;
451 452 453 454 455 456
		nsegs++;
	}

	return nsegs;
}

457 458 459 460 461 462 463 464
static inline int __blk_bvec_map_sg(struct bio_vec bv,
		struct scatterlist *sglist, struct scatterlist **sg)
{
	*sg = blk_next_sg(sg, sglist);
	sg_set_page(*sg, bv.bv_page, bv.bv_len, bv.bv_offset);
	return 1;
}

465 466 467 468
/* only try to merge bvecs into one sg if they are from two bios */
static inline bool
__blk_segment_map_sg_merge(struct request_queue *q, struct bio_vec *bvec,
			   struct bio_vec *bvprv, struct scatterlist **sg)
469 470 471 472
{

	int nbytes = bvec->bv_len;

473 474
	if (!*sg)
		return false;
475

476 477 478 479 480 481 482 483 484
	if ((*sg)->length + nbytes > queue_max_segment_size(q))
		return false;

	if (!biovec_phys_mergeable(q, bvprv, bvec))
		return false;

	(*sg)->length += nbytes;

	return true;
485 486
}

487 488 489
static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio,
			     struct scatterlist *sglist,
			     struct scatterlist **sg)
490
{
491
	struct bio_vec bvec, bvprv = { NULL };
492
	struct bvec_iter iter;
493
	int nsegs = 0;
494
	bool new_bio = false;
495

496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513
	for_each_bio(bio) {
		bio_for_each_bvec(bvec, bio, iter) {
			/*
			 * Only try to merge bvecs from two bios given we
			 * have done bio internal merge when adding pages
			 * to bio
			 */
			if (new_bio &&
			    __blk_segment_map_sg_merge(q, &bvec, &bvprv, sg))
				goto next_bvec;

			if (bvec.bv_offset + bvec.bv_len <= PAGE_SIZE)
				nsegs += __blk_bvec_map_sg(bvec, sglist, sg);
			else
				nsegs += blk_bvec_map_sg(q, &bvec, sglist, sg);
 next_bvec:
			new_bio = false;
		}
514 515 516 517
		if (likely(bio->bi_iter.bi_size)) {
			bvprv = bvec;
			new_bio = true;
		}
518
	}
519

520 521 522 523 524 525 526
	return nsegs;
}

/*
 * map a request to scatterlist, return number of sg entries setup. Caller
 * must make sure sg can hold rq->nr_phys_segments entries
 */
527 528
int __blk_rq_map_sg(struct request_queue *q, struct request *rq,
		struct scatterlist *sglist, struct scatterlist **last_sg)
529 530 531
{
	int nsegs = 0;

532
	if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
533
		nsegs = __blk_bvec_map_sg(rq->special_vec, sglist, last_sg);
534
	else if (rq->bio && bio_op(rq->bio) == REQ_OP_WRITE_SAME)
535
		nsegs = __blk_bvec_map_sg(bio_iovec(rq->bio), sglist, last_sg);
536
	else if (rq->bio)
537
		nsegs = __blk_bios_map_sg(q, rq->bio, sglist, last_sg);
538

539 540
	if (*last_sg)
		sg_mark_end(*last_sg);
541

542 543 544 545
	/*
	 * Something must have been wrong if the figured number of
	 * segment is bigger than number of req's physical segments
	 */
546
	WARN_ON(nsegs > blk_rq_nr_phys_segments(rq));
547

548 549
	return nsegs;
}
550
EXPORT_SYMBOL(__blk_rq_map_sg);
551

552 553 554 555 556 557 558
static inline unsigned int blk_rq_get_max_segments(struct request *rq)
{
	if (req_op(rq) == REQ_OP_DISCARD)
		return queue_max_discard_segments(rq->q);
	return queue_max_segments(rq->q);
}

559 560
static inline int ll_new_hw_segment(struct request *req, struct bio *bio,
		unsigned int nr_phys_segs)
561
{
562
	if (req->nr_phys_segments + nr_phys_segs > blk_rq_get_max_segments(req))
563 564
		goto no_merge;

565
	if (blk_integrity_merge_bio(req->q, req, bio) == false)
566
		goto no_merge;
567 568 569 570 571 572 573

	/*
	 * This will form the start of a new hw segment.  Bump both
	 * counters.
	 */
	req->nr_phys_segments += nr_phys_segs;
	return 1;
574 575

no_merge:
576
	req_set_nomerge(req->q, req);
577
	return 0;
578 579
}

580
int ll_back_merge_fn(struct request *req, struct bio *bio, unsigned int nr_segs)
581
{
582 583
	if (req_gap_back_merge(req, bio))
		return 0;
584 585 586
	if (blk_integrity_rq(req) &&
	    integrity_req_gap_back_merge(req, bio))
		return 0;
587 588
	if (!bio_crypt_ctx_back_mergeable(req, bio))
		return 0;
589
	if (blk_rq_sectors(req) + bio_sectors(bio) >
D
Damien Le Moal 已提交
590
	    blk_rq_get_max_sectors(req, blk_rq_pos(req))) {
591
		req_set_nomerge(req->q, req);
592 593 594
		return 0;
	}

595
	return ll_new_hw_segment(req, bio, nr_segs);
596 597
}

598 599
static int ll_front_merge_fn(struct request *req, struct bio *bio,
		unsigned int nr_segs)
600
{
601 602
	if (req_gap_front_merge(req, bio))
		return 0;
603 604 605
	if (blk_integrity_rq(req) &&
	    integrity_req_gap_front_merge(req, bio))
		return 0;
606 607
	if (!bio_crypt_ctx_front_mergeable(req, bio))
		return 0;
608
	if (blk_rq_sectors(req) + bio_sectors(bio) >
D
Damien Le Moal 已提交
609
	    blk_rq_get_max_sectors(req, bio->bi_iter.bi_sector)) {
610
		req_set_nomerge(req->q, req);
611 612 613
		return 0;
	}

614
	return ll_new_hw_segment(req, bio, nr_segs);
615 616
}

617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634
static bool req_attempt_discard_merge(struct request_queue *q, struct request *req,
		struct request *next)
{
	unsigned short segments = blk_rq_nr_discard_segments(req);

	if (segments >= queue_max_discard_segments(q))
		goto no_merge;
	if (blk_rq_sectors(req) + bio_sectors(next->bio) >
	    blk_rq_get_max_sectors(req, blk_rq_pos(req)))
		goto no_merge;

	req->nr_phys_segments = segments + blk_rq_nr_discard_segments(next);
	return true;
no_merge:
	req_set_nomerge(q, req);
	return false;
}

635 636 637 638 639
static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
				struct request *next)
{
	int total_phys_segments;

640
	if (req_gap_back_merge(req, next->bio))
641 642
		return 0;

643 644 645
	/*
	 * Will it become too large?
	 */
646
	if ((blk_rq_sectors(req) + blk_rq_sectors(next)) >
D
Damien Le Moal 已提交
647
	    blk_rq_get_max_sectors(req, blk_rq_pos(req)))
648 649 650
		return 0;

	total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
651
	if (total_phys_segments > blk_rq_get_max_segments(req))
652 653
		return 0;

654
	if (blk_integrity_merge_rq(q, req, next) == false)
655 656
		return 0;

657 658 659
	if (!bio_crypt_ctx_merge_rq(req, next))
		return 0;

660 661 662 663 664
	/* Merge is OK... */
	req->nr_phys_segments = total_phys_segments;
	return 1;
}

665 666 667 668 669 670 671 672 673 674 675 676 677 678
/**
 * blk_rq_set_mixed_merge - mark a request as mixed merge
 * @rq: request to mark as mixed merge
 *
 * Description:
 *     @rq is about to be mixed merged.  Make sure the attributes
 *     which can be mixed are set in each bio and mark @rq as mixed
 *     merged.
 */
void blk_rq_set_mixed_merge(struct request *rq)
{
	unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK;
	struct bio *bio;

679
	if (rq->rq_flags & RQF_MIXED_MERGE)
680 681 682 683 684 685 686 687
		return;

	/*
	 * @rq will no longer represent mixable attributes for all the
	 * contained bios.  It will just track those of the first one.
	 * Distributes the attributs to each bio.
	 */
	for (bio = rq->bio; bio; bio = bio->bi_next) {
J
Jens Axboe 已提交
688 689 690
		WARN_ON_ONCE((bio->bi_opf & REQ_FAILFAST_MASK) &&
			     (bio->bi_opf & REQ_FAILFAST_MASK) != ff);
		bio->bi_opf |= ff;
691
	}
692
	rq->rq_flags |= RQF_MIXED_MERGE;
693 694
}

695
static void blk_account_io_merge_request(struct request *req)
696 697
{
	if (blk_do_io_stat(req)) {
698
		part_stat_lock();
699
		part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
700 701 702
		part_stat_unlock();
	}
}
703

704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719
/*
 * Two cases of handling DISCARD merge:
 * If max_discard_segments > 1, the driver takes every bio
 * as a range and send them to controller together. The ranges
 * needn't to be contiguous.
 * Otherwise, the bios/requests will be handled as same as
 * others which should be contiguous.
 */
static inline bool blk_discard_mergable(struct request *req)
{
	if (req_op(req) == REQ_OP_DISCARD &&
	    queue_max_discard_segments(req->q) > 1)
		return true;
	return false;
}

720 721
static enum elv_merge blk_try_req_merge(struct request *req,
					struct request *next)
722 723 724 725 726 727 728 729
{
	if (blk_discard_mergable(req))
		return ELEVATOR_DISCARD_MERGE;
	else if (blk_rq_pos(req) + blk_rq_sectors(req) == blk_rq_pos(next))
		return ELEVATOR_BACK_MERGE;

	return ELEVATOR_NO_MERGE;
}
730

731
/*
732 733
 * For non-mq, this has to be called with the request spinlock acquired.
 * For mq with scheduling, the appropriate queue wide lock should be held.
734
 */
735 736
static struct request *attempt_merge(struct request_queue *q,
				     struct request *req, struct request *next)
737 738
{
	if (!rq_mergeable(req) || !rq_mergeable(next))
739
		return NULL;
740

741
	if (req_op(req) != req_op(next))
742
		return NULL;
743

744
	if (rq_data_dir(req) != rq_data_dir(next)
745
	    || req->rq_disk != next->rq_disk)
746
		return NULL;
747

748
	if (req_op(req) == REQ_OP_WRITE_SAME &&
749
	    !blk_write_same_mergeable(req->bio, next->bio))
750
		return NULL;
751

752 753 754 755 756 757 758
	/*
	 * Don't allow merge of different write hints, or for a hint with
	 * non-hint IO.
	 */
	if (req->write_hint != next->write_hint)
		return NULL;

759 760 761
	if (req->ioprio != next->ioprio)
		return NULL;

762 763 764 765
	/*
	 * If we are allowed to merge, then append bio list
	 * from next to rq and release next. merge_requests_fn
	 * will have updated segment counts, update sector
766 767
	 * counts here. Handle DISCARDs separately, as they
	 * have separate settings.
768
	 */
769 770 771

	switch (blk_try_req_merge(req, next)) {
	case ELEVATOR_DISCARD_MERGE:
772 773
		if (!req_attempt_discard_merge(q, req, next))
			return NULL;
774 775 776 777 778 779
		break;
	case ELEVATOR_BACK_MERGE:
		if (!ll_merge_requests_fn(q, req, next))
			return NULL;
		break;
	default:
780
		return NULL;
781
	}
782

783 784 785 786 787 788
	/*
	 * If failfast settings disagree or any of the two is already
	 * a mixed merge, mark both as mixed before proceeding.  This
	 * makes sure that all involved bios have mixable attributes
	 * set properly.
	 */
789
	if (((req->rq_flags | next->rq_flags) & RQF_MIXED_MERGE) ||
790 791 792 793 794 795
	    (req->cmd_flags & REQ_FAILFAST_MASK) !=
	    (next->cmd_flags & REQ_FAILFAST_MASK)) {
		blk_rq_set_mixed_merge(req);
		blk_rq_set_mixed_merge(next);
	}

796
	/*
797 798 799
	 * At this point we have either done a back merge or front merge. We
	 * need the smaller start_time_ns of the merged requests to be the
	 * current request for accounting purposes.
800
	 */
801 802
	if (next->start_time_ns < req->start_time_ns)
		req->start_time_ns = next->start_time_ns;
803 804 805 806

	req->biotail->bi_next = next->bio;
	req->biotail = next->biotail;

807
	req->__data_len += blk_rq_bytes(next);
808

M
Ming Lei 已提交
809
	if (!blk_discard_mergable(req))
810
		elv_merge_requests(q, req, next);
811

812 813 814
	/*
	 * 'next' is going away, so update stats accordingly
	 */
815
	blk_account_io_merge_request(next);
816

817
	trace_block_rq_merge(next);
818

819 820 821 822
	/*
	 * ownership of bio passed from next to req, return 'next' for
	 * the caller to free
	 */
823
	next->bio = NULL;
824
	return next;
825 826
}

827 828
static struct request *attempt_back_merge(struct request_queue *q,
		struct request *rq)
829 830 831 832 833 834
{
	struct request *next = elv_latter_request(q, rq);

	if (next)
		return attempt_merge(q, rq, next);

835
	return NULL;
836 837
}

838 839
static struct request *attempt_front_merge(struct request_queue *q,
		struct request *rq)
840 841 842 843 844 845
{
	struct request *prev = elv_former_request(q, rq);

	if (prev)
		return attempt_merge(q, prev, rq);

846
	return NULL;
847
}
848 849 850 851

int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
			  struct request *next)
{
852
	struct request *free;
853

854 855
	free = attempt_merge(q, rq, next);
	if (free) {
J
Jens Axboe 已提交
856
		blk_put_request(free);
857 858 859 860
		return 1;
	}

	return 0;
861
}
862 863 864

bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
{
865
	if (!rq_mergeable(rq) || !bio_mergeable(bio))
866 867
		return false;

868
	if (req_op(rq) != bio_op(bio))
869 870
		return false;

871 872 873 874
	/* different data direction or already started, don't merge */
	if (bio_data_dir(bio) != rq_data_dir(rq))
		return false;

875
	/* must be same device */
876
	if (rq->rq_disk != bio->bi_bdev->bd_disk)
877 878 879
		return false;

	/* only merge integrity protected bio into ditto rq */
880
	if (blk_integrity_merge_bio(rq->q, rq, bio) == false)
881 882
		return false;

883 884 885 886
	/* Only merge if the crypt contexts are compatible */
	if (!bio_crypt_rq_ctx_compatible(rq, bio))
		return false;

887
	/* must be using the same buffer */
888
	if (req_op(rq) == REQ_OP_WRITE_SAME &&
889 890 891
	    !blk_write_same_mergeable(rq->bio, bio))
		return false;

892 893 894 895 896 897 898
	/*
	 * Don't allow merge of different write hints, or for a hint with
	 * non-hint IO.
	 */
	if (rq->write_hint != bio->bi_write_hint)
		return false;

899 900 901
	if (rq->ioprio != bio_prio(bio))
		return false;

902 903 904
	return true;
}

905
enum elv_merge blk_try_merge(struct request *rq, struct bio *bio)
906
{
907
	if (blk_discard_mergable(rq))
908 909
		return ELEVATOR_DISCARD_MERGE;
	else if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector)
910
		return ELEVATOR_BACK_MERGE;
911
	else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_iter.bi_sector)
912 913 914
		return ELEVATOR_FRONT_MERGE;
	return ELEVATOR_NO_MERGE;
}
915 916 917 918 919 920 921 922 923 924 925

static void blk_account_io_merge_bio(struct request *req)
{
	if (!blk_do_io_stat(req))
		return;

	part_stat_lock();
	part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
	part_stat_unlock();
}

926 927 928 929 930 931 932 933
enum bio_merge_status {
	BIO_MERGE_OK,
	BIO_MERGE_NONE,
	BIO_MERGE_FAILED,
};

static enum bio_merge_status bio_attempt_back_merge(struct request *req,
		struct bio *bio, unsigned int nr_segs)
934 935 936 937
{
	const int ff = bio->bi_opf & REQ_FAILFAST_MASK;

	if (!ll_back_merge_fn(req, bio, nr_segs))
938
		return BIO_MERGE_FAILED;
939

940
	trace_block_bio_backmerge(bio);
941 942 943 944 945 946 947 948 949 950 951 952
	rq_qos_merge(req->q, req, bio);

	if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
		blk_rq_set_mixed_merge(req);

	req->biotail->bi_next = bio;
	req->biotail = bio;
	req->__data_len += bio->bi_iter.bi_size;

	bio_crypt_free_ctx(bio);

	blk_account_io_merge_bio(req);
953
	return BIO_MERGE_OK;
954 955
}

956 957
static enum bio_merge_status bio_attempt_front_merge(struct request *req,
		struct bio *bio, unsigned int nr_segs)
958 959 960 961
{
	const int ff = bio->bi_opf & REQ_FAILFAST_MASK;

	if (!ll_front_merge_fn(req, bio, nr_segs))
962
		return BIO_MERGE_FAILED;
963

964
	trace_block_bio_frontmerge(bio);
965 966 967 968 969 970 971 972 973 974 975 976 977 978
	rq_qos_merge(req->q, req, bio);

	if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
		blk_rq_set_mixed_merge(req);

	bio->bi_next = req->bio;
	req->bio = bio;

	req->__sector = bio->bi_iter.bi_sector;
	req->__data_len += bio->bi_iter.bi_size;

	bio_crypt_do_front_merge(req, bio);

	blk_account_io_merge_bio(req);
979
	return BIO_MERGE_OK;
980 981
}

982 983
static enum bio_merge_status bio_attempt_discard_merge(struct request_queue *q,
		struct request *req, struct bio *bio)
984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000
{
	unsigned short segments = blk_rq_nr_discard_segments(req);

	if (segments >= queue_max_discard_segments(q))
		goto no_merge;
	if (blk_rq_sectors(req) + bio_sectors(bio) >
	    blk_rq_get_max_sectors(req, blk_rq_pos(req)))
		goto no_merge;

	rq_qos_merge(q, req, bio);

	req->biotail->bi_next = bio;
	req->biotail = bio;
	req->__data_len += bio->bi_iter.bi_size;
	req->nr_phys_segments = segments + 1;

	blk_account_io_merge_bio(req);
1001
	return BIO_MERGE_OK;
1002 1003
no_merge:
	req_set_nomerge(q, req);
1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017
	return BIO_MERGE_FAILED;
}

static enum bio_merge_status blk_attempt_bio_merge(struct request_queue *q,
						   struct request *rq,
						   struct bio *bio,
						   unsigned int nr_segs,
						   bool sched_allow_merge)
{
	if (!blk_rq_merge_ok(rq, bio))
		return BIO_MERGE_NONE;

	switch (blk_try_merge(rq, bio)) {
	case ELEVATOR_BACK_MERGE:
1018
		if (!sched_allow_merge || blk_mq_sched_allow_merge(q, rq, bio))
1019 1020 1021
			return bio_attempt_back_merge(rq, bio, nr_segs);
		break;
	case ELEVATOR_FRONT_MERGE:
1022
		if (!sched_allow_merge || blk_mq_sched_allow_merge(q, rq, bio))
1023 1024 1025 1026 1027 1028 1029 1030 1031
			return bio_attempt_front_merge(rq, bio, nr_segs);
		break;
	case ELEVATOR_DISCARD_MERGE:
		return bio_attempt_discard_merge(q, rq, bio);
	default:
		return BIO_MERGE_NONE;
	}

	return BIO_MERGE_FAILED;
1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078
}

/**
 * blk_attempt_plug_merge - try to merge with %current's plugged list
 * @q: request_queue new bio is being queued at
 * @bio: new bio being queued
 * @nr_segs: number of segments in @bio
 * @same_queue_rq: pointer to &struct request that gets filled in when
 * another request associated with @q is found on the plug list
 * (optional, may be %NULL)
 *
 * Determine whether @bio being queued on @q can be merged with a request
 * on %current's plugged list.  Returns %true if merge was successful,
 * otherwise %false.
 *
 * Plugging coalesces IOs from the same issuer for the same purpose without
 * going through @q->queue_lock.  As such it's more of an issuing mechanism
 * than scheduling, and the request, while may have elvpriv data, is not
 * added on the elevator at this point.  In addition, we don't have
 * reliable access to the elevator outside queue lock.  Only check basic
 * merging parameters without querying the elevator.
 *
 * Caller must ensure !blk_queue_nomerges(q) beforehand.
 */
bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
		unsigned int nr_segs, struct request **same_queue_rq)
{
	struct blk_plug *plug;
	struct request *rq;
	struct list_head *plug_list;

	plug = blk_mq_plug(q, bio);
	if (!plug)
		return false;

	plug_list = &plug->mq_list;

	list_for_each_entry_reverse(rq, plug_list, queuelist) {
		if (rq->q == q && same_queue_rq) {
			/*
			 * Only blk-mq multiple hardware queues case checks the
			 * rq in the same queue, there should be only one such
			 * rq in a queue
			 **/
			*same_queue_rq = rq;
		}

1079
		if (rq->q != q)
1080 1081
			continue;

1082 1083
		if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) ==
		    BIO_MERGE_OK)
1084 1085 1086 1087 1088
			return true;
	}

	return false;
}
1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103

/*
 * Iterate list of requests and see if we can merge this bio with any
 * of them.
 */
bool blk_bio_list_merge(struct request_queue *q, struct list_head *list,
			struct bio *bio, unsigned int nr_segs)
{
	struct request *rq;
	int checked = 8;

	list_for_each_entry_reverse(rq, list, queuelist) {
		if (!checked--)
			break;

1104 1105
		switch (blk_attempt_bio_merge(q, rq, bio, nr_segs, true)) {
		case BIO_MERGE_NONE:
1106
			continue;
1107 1108 1109 1110
		case BIO_MERGE_OK:
			return true;
		case BIO_MERGE_FAILED:
			return false;
1111 1112 1113 1114 1115 1116 1117
		}

	}

	return false;
}
EXPORT_SYMBOL_GPL(blk_bio_list_merge);
1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149

bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
		unsigned int nr_segs, struct request **merged_request)
{
	struct request *rq;

	switch (elv_merge(q, &rq, bio)) {
	case ELEVATOR_BACK_MERGE:
		if (!blk_mq_sched_allow_merge(q, rq, bio))
			return false;
		if (bio_attempt_back_merge(rq, bio, nr_segs) != BIO_MERGE_OK)
			return false;
		*merged_request = attempt_back_merge(q, rq);
		if (!*merged_request)
			elv_merged_request(q, rq, ELEVATOR_BACK_MERGE);
		return true;
	case ELEVATOR_FRONT_MERGE:
		if (!blk_mq_sched_allow_merge(q, rq, bio))
			return false;
		if (bio_attempt_front_merge(rq, bio, nr_segs) != BIO_MERGE_OK)
			return false;
		*merged_request = attempt_front_merge(q, rq);
		if (!*merged_request)
			elv_merged_request(q, rq, ELEVATOR_FRONT_MERGE);
		return true;
	case ELEVATOR_DISCARD_MERGE:
		return bio_attempt_discard_merge(q, rq, bio) == BIO_MERGE_OK;
	default:
		return false;
	}
}
EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge);