blk-lib.c 10.6 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
2 3 4 5 6 7 8 9 10 11 12
/*
 * Functions related to generic helpers functions
 */
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/scatterlist.h>

#include "blk.h"

13
static struct bio *next_bio(struct bio *bio, unsigned int nr_pages,
14
		gfp_t gfp)
15
{
16 17 18 19
	struct bio *new = bio_alloc(gfp, nr_pages);

	if (bio) {
		bio_chain(bio, new);
20
		submit_bio(bio);
21
	}
22

23
	return new;
24 25
}

26
int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
27
		sector_t nr_sects, gfp_t gfp_mask, int flags,
28
		struct bio **biop)
29 30
{
	struct request_queue *q = bdev_get_queue(bdev);
31
	struct bio *bio = *biop;
32
	unsigned int granularity;
33
	unsigned int op;
34
	int alignment;
35
	sector_t bs_mask;
36 37 38

	if (!q)
		return -ENXIO;
39 40 41 42 43 44 45 46 47 48

	if (flags & BLKDEV_DISCARD_SECURE) {
		if (!blk_queue_secure_erase(q))
			return -EOPNOTSUPP;
		op = REQ_OP_SECURE_ERASE;
	} else {
		if (!blk_queue_discard(q))
			return -EOPNOTSUPP;
		op = REQ_OP_DISCARD;
	}
49

50 51 52 53
	bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
	if ((sector | nr_sects) & bs_mask)
		return -EINVAL;

54 55 56 57
	/* Zero-sector (unknown) and one-sector granularities are the same.  */
	granularity = max(q->limits.discard_granularity >> 9, 1U);
	alignment = (bdev_discard_alignment(bdev) >> 9) % granularity;

58
	while (nr_sects) {
59
		unsigned int req_sects;
60
		sector_t end_sect, tmp;
61

62 63 64
		/* Make sure bi_size doesn't overflow */
		req_sects = min_t(sector_t, nr_sects, UINT_MAX >> 9);

65
		/**
66 67 68
		 * If splitting a request, and the next starting sector would be
		 * misaligned, stop the discard at the previous aligned sector.
		 */
69
		end_sect = sector + req_sects;
70 71 72 73 74 75 76 77
		tmp = end_sect;
		if (req_sects < nr_sects &&
		    sector_div(tmp, granularity) != alignment) {
			end_sect = end_sect - alignment;
			sector_div(end_sect, granularity);
			end_sect = end_sect * granularity + alignment;
			req_sects = end_sect - sector;
		}
78

79
		bio = next_bio(bio, 0, gfp_mask);
80
		bio->bi_iter.bi_sector = sector;
81
		bio_set_dev(bio, bdev);
82
		bio_set_op_attrs(bio, op, 0);
83

84
		bio->bi_iter.bi_size = req_sects << 9;
85 86
		nr_sects -= req_sects;
		sector = end_sect;
87

88 89 90 91 92 93 94
		/*
		 * We can loop for a long time in here, if someone does
		 * full device discards (like mkfs). Be nice and allow
		 * us to schedule out to avoid softlocking if preempt
		 * is disabled.
		 */
		cond_resched();
95
	}
96 97 98 99 100 101 102 103 104 105 106 107

	*biop = bio;
	return 0;
}
EXPORT_SYMBOL(__blkdev_issue_discard);

/**
 * blkdev_issue_discard - queue a discard
 * @bdev:	blockdev to issue discard for
 * @sector:	start sector
 * @nr_sects:	number of sectors to discard
 * @gfp_mask:	memory allocation flags (for bio_alloc)
108
 * @flags:	BLKDEV_DISCARD_* flags to control behaviour
109 110 111 112 113 114 115 116 117 118 119 120
 *
 * Description:
 *    Issue a discard request for the sectors in question.
 */
int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
		sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
{
	struct bio *bio = NULL;
	struct blk_plug plug;
	int ret;

	blk_start_plug(&plug);
121
	ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, flags,
122
			&bio);
123
	if (!ret && bio) {
124
		ret = submit_bio_wait(bio);
125
		if (ret == -EOPNOTSUPP)
126
			ret = 0;
127
		bio_put(bio);
128
	}
129
	blk_finish_plug(&plug);
130

131
	return ret;
132 133
}
EXPORT_SYMBOL(blkdev_issue_discard);
134

135
/**
136
 * __blkdev_issue_write_same - generate number of bios with same page
137 138 139 140 141
 * @bdev:	target blockdev
 * @sector:	start sector
 * @nr_sects:	number of sectors to write
 * @gfp_mask:	memory allocation flags (for bio_alloc)
 * @page:	page containing data to write
142
 * @biop:	pointer to anchor bio
143 144
 *
 * Description:
145
 *  Generate and issue number of bios(REQ_OP_WRITE_SAME) with same page.
146
 */
147 148 149
static int __blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
		sector_t nr_sects, gfp_t gfp_mask, struct page *page,
		struct bio **biop)
150 151 152
{
	struct request_queue *q = bdev_get_queue(bdev);
	unsigned int max_write_same_sectors;
153
	struct bio *bio = *biop;
154
	sector_t bs_mask;
155 156 157 158

	if (!q)
		return -ENXIO;

159 160 161 162
	bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
	if ((sector | nr_sects) & bs_mask)
		return -EINVAL;

163 164 165
	if (!bdev_write_same(bdev))
		return -EOPNOTSUPP;

166 167
	/* Ensure that max_write_same_sectors doesn't overflow bi_size */
	max_write_same_sectors = UINT_MAX >> 9;
168 169

	while (nr_sects) {
170
		bio = next_bio(bio, 1, gfp_mask);
171
		bio->bi_iter.bi_sector = sector;
172
		bio_set_dev(bio, bdev);
173 174 175 176
		bio->bi_vcnt = 1;
		bio->bi_io_vec->bv_page = page;
		bio->bi_io_vec->bv_offset = 0;
		bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev);
177
		bio_set_op_attrs(bio, REQ_OP_WRITE_SAME, 0);
178 179

		if (nr_sects > max_write_same_sectors) {
180
			bio->bi_iter.bi_size = max_write_same_sectors << 9;
181 182 183
			nr_sects -= max_write_same_sectors;
			sector += max_write_same_sectors;
		} else {
184
			bio->bi_iter.bi_size = nr_sects << 9;
185 186
			nr_sects = 0;
		}
187
		cond_resched();
188 189
	}

190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
	*biop = bio;
	return 0;
}

/**
 * blkdev_issue_write_same - queue a write same operation
 * @bdev:	target blockdev
 * @sector:	start sector
 * @nr_sects:	number of sectors to write
 * @gfp_mask:	memory allocation flags (for bio_alloc)
 * @page:	page containing data
 *
 * Description:
 *    Issue a write same request for the sectors in question.
 */
int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
				sector_t nr_sects, gfp_t gfp_mask,
				struct page *page)
{
	struct bio *bio = NULL;
	struct blk_plug plug;
	int ret;

	blk_start_plug(&plug);
	ret = __blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask, page,
			&bio);
	if (ret == 0 && bio) {
217
		ret = submit_bio_wait(bio);
218 219
		bio_put(bio);
	}
220
	blk_finish_plug(&plug);
221
	return ret;
222 223 224
}
EXPORT_SYMBOL(blkdev_issue_write_same);

225 226
static int __blkdev_issue_write_zeroes(struct block_device *bdev,
		sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
227
		struct bio **biop, unsigned flags)
228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244
{
	struct bio *bio = *biop;
	unsigned int max_write_zeroes_sectors;
	struct request_queue *q = bdev_get_queue(bdev);

	if (!q)
		return -ENXIO;

	/* Ensure that max_write_zeroes_sectors doesn't overflow bi_size */
	max_write_zeroes_sectors = bdev_write_zeroes_sectors(bdev);

	if (max_write_zeroes_sectors == 0)
		return -EOPNOTSUPP;

	while (nr_sects) {
		bio = next_bio(bio, 0, gfp_mask);
		bio->bi_iter.bi_sector = sector;
245
		bio_set_dev(bio, bdev);
246 247 248
		bio->bi_opf = REQ_OP_WRITE_ZEROES;
		if (flags & BLKDEV_ZERO_NOUNMAP)
			bio->bi_opf |= REQ_NOUNMAP;
249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264

		if (nr_sects > max_write_zeroes_sectors) {
			bio->bi_iter.bi_size = max_write_zeroes_sectors << 9;
			nr_sects -= max_write_zeroes_sectors;
			sector += max_write_zeroes_sectors;
		} else {
			bio->bi_iter.bi_size = nr_sects << 9;
			nr_sects = 0;
		}
		cond_resched();
	}

	*biop = bio;
	return 0;
}

265 266 267 268 269 270 271 272
/*
 * Convert a number of 512B sectors to a number of pages.
 * The result is limited to a number of pages that can fit into a BIO.
 * Also make sure that the result is always at least 1 (page) for the cases
 * where nr_sects is lower than the number of sectors in a page.
 */
static unsigned int __blkdev_sectors_to_bio_pages(sector_t nr_sects)
{
273
	sector_t pages = DIV_ROUND_UP_SECTOR_T(nr_sects, PAGE_SIZE / 512);
274

275
	return min(pages, (sector_t)BIO_MAX_PAGES);
276 277
}

278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311
static int __blkdev_issue_zero_pages(struct block_device *bdev,
		sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
		struct bio **biop)
{
	struct request_queue *q = bdev_get_queue(bdev);
	struct bio *bio = *biop;
	int bi_size = 0;
	unsigned int sz;

	if (!q)
		return -ENXIO;

	while (nr_sects != 0) {
		bio = next_bio(bio, __blkdev_sectors_to_bio_pages(nr_sects),
			       gfp_mask);
		bio->bi_iter.bi_sector = sector;
		bio_set_dev(bio, bdev);
		bio_set_op_attrs(bio, REQ_OP_WRITE, 0);

		while (nr_sects != 0) {
			sz = min((sector_t) PAGE_SIZE, nr_sects << 9);
			bi_size = bio_add_page(bio, ZERO_PAGE(0), sz, 0);
			nr_sects -= bi_size >> 9;
			sector += bi_size >> 9;
			if (bi_size < sz)
				break;
		}
		cond_resched();
	}

	*biop = bio;
	return 0;
}

312
/**
313
 * __blkdev_issue_zeroout - generate number of zero filed write bios
314 315 316 317
 * @bdev:	blockdev to issue
 * @sector:	start sector
 * @nr_sects:	number of sectors to write
 * @gfp_mask:	memory allocation flags (for bio_alloc)
318
 * @biop:	pointer to anchor bio
319
 * @flags:	controls detailed behavior
320 321
 *
 * Description:
322 323 324 325 326
 *  Zero-fill a block range, either using hardware offload or by explicitly
 *  writing zeroes to the device.
 *
 *  If a device is using logical block provisioning, the underlying space will
 *  not be released if %flags contains BLKDEV_ZERO_NOUNMAP.
327 328 329
 *
 *  If %flags contains BLKDEV_ZERO_NOFALLBACK, the function will return
 *  -EOPNOTSUPP if no explicit hardware offload for zeroing is provided.
330
 */
331 332
int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
		sector_t nr_sects, gfp_t gfp_mask, struct bio **biop,
333
		unsigned flags)
334
{
335
	int ret;
336 337 338 339 340
	sector_t bs_mask;

	bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
	if ((sector | nr_sects) & bs_mask)
		return -EINVAL;
341

342
	ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp_mask,
343
			biop, flags);
344
	if (ret != -EOPNOTSUPP || (flags & BLKDEV_ZERO_NOFALLBACK))
345
		return ret;
346

347 348
	return __blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp_mask,
					 biop);
349
}
350
EXPORT_SYMBOL(__blkdev_issue_zeroout);
351 352 353 354 355 356 357

/**
 * blkdev_issue_zeroout - zero-fill a block range
 * @bdev:	blockdev to write
 * @sector:	start sector
 * @nr_sects:	number of sectors to write
 * @gfp_mask:	memory allocation flags (for bio_alloc)
358
 * @flags:	controls detailed behavior
359 360
 *
 * Description:
361 362 363
 *  Zero-fill a block range, either using hardware offload or by explicitly
 *  writing zeroes to the device.  See __blkdev_issue_zeroout() for the
 *  valid values for %flags.
364 365
 */
int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
366
		sector_t nr_sects, gfp_t gfp_mask, unsigned flags)
367
{
368 369 370
	int ret = 0;
	sector_t bs_mask;
	struct bio *bio;
371
	struct blk_plug plug;
372
	bool try_write_zeroes = !!bdev_write_zeroes_sectors(bdev);
373

374 375 376 377 378 379
	bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
	if ((sector | nr_sects) & bs_mask)
		return -EINVAL;

retry:
	bio = NULL;
380
	blk_start_plug(&plug);
381 382 383 384 385 386 387 388 389 390
	if (try_write_zeroes) {
		ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects,
						  gfp_mask, &bio, flags);
	} else if (!(flags & BLKDEV_ZERO_NOFALLBACK)) {
		ret = __blkdev_issue_zero_pages(bdev, sector, nr_sects,
						gfp_mask, &bio);
	} else {
		/* No zeroing offload support */
		ret = -EOPNOTSUPP;
	}
391 392 393 394 395
	if (ret == 0 && bio) {
		ret = submit_bio_wait(bio);
		bio_put(bio);
	}
	blk_finish_plug(&plug);
396 397 398 399 400 401 402 403 404 405 406 407 408 409 410
	if (ret && try_write_zeroes) {
		if (!(flags & BLKDEV_ZERO_NOFALLBACK)) {
			try_write_zeroes = false;
			goto retry;
		}
		if (!bdev_write_zeroes_sectors(bdev)) {
			/*
			 * Zeroing offload support was indicated, but the
			 * device reported ILLEGAL REQUEST (for some devices
			 * there is no non-destructive way to verify whether
			 * WRITE ZEROES is actually supported).
			 */
			ret = -EOPNOTSUPP;
		}
	}
411

412
	return ret;
413
}
414
EXPORT_SYMBOL(blkdev_issue_zeroout);