blk-lib.c 10.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11
/*
 * Functions related to generic helpers functions
 */
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/scatterlist.h>

#include "blk.h"

12
static struct bio *next_bio(struct bio *bio, unsigned int nr_pages,
13
		gfp_t gfp)
14
{
15 16 17 18
	struct bio *new = bio_alloc(gfp, nr_pages);

	if (bio) {
		bio_chain(bio, new);
19
		submit_bio(bio);
20
	}
21

22
	return new;
23 24
}

25
int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
26
		sector_t nr_sects, gfp_t gfp_mask, int flags,
27
		struct bio **biop)
28 29
{
	struct request_queue *q = bdev_get_queue(bdev);
30
	struct bio *bio = *biop;
31
	unsigned int granularity;
32
	unsigned int op;
33
	int alignment;
34
	sector_t bs_mask;
35 36 37

	if (!q)
		return -ENXIO;
38 39 40 41 42 43 44 45 46 47

	if (flags & BLKDEV_DISCARD_SECURE) {
		if (!blk_queue_secure_erase(q))
			return -EOPNOTSUPP;
		op = REQ_OP_SECURE_ERASE;
	} else {
		if (!blk_queue_discard(q))
			return -EOPNOTSUPP;
		op = REQ_OP_DISCARD;
	}
48

49 50 51 52
	bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
	if ((sector | nr_sects) & bs_mask)
		return -EINVAL;

53 54 55 56
	/* Zero-sector (unknown) and one-sector granularities are the same.  */
	granularity = max(q->limits.discard_granularity >> 9, 1U);
	alignment = (bdev_discard_alignment(bdev) >> 9) % granularity;

57
	while (nr_sects) {
58
		unsigned int req_sects;
59
		sector_t end_sect, tmp;
60

61 62 63
		/* Make sure bi_size doesn't overflow */
		req_sects = min_t(sector_t, nr_sects, UINT_MAX >> 9);

64
		/**
65 66 67
		 * If splitting a request, and the next starting sector would be
		 * misaligned, stop the discard at the previous aligned sector.
		 */
68
		end_sect = sector + req_sects;
69 70 71 72 73 74 75 76
		tmp = end_sect;
		if (req_sects < nr_sects &&
		    sector_div(tmp, granularity) != alignment) {
			end_sect = end_sect - alignment;
			sector_div(end_sect, granularity);
			end_sect = end_sect * granularity + alignment;
			req_sects = end_sect - sector;
		}
77

78
		bio = next_bio(bio, 0, gfp_mask);
79
		bio->bi_iter.bi_sector = sector;
80
		bio_set_dev(bio, bdev);
81
		bio_set_op_attrs(bio, op, 0);
82

83
		bio->bi_iter.bi_size = req_sects << 9;
84 85
		nr_sects -= req_sects;
		sector = end_sect;
86

87 88 89 90 91 92 93
		/*
		 * We can loop for a long time in here, if someone does
		 * full device discards (like mkfs). Be nice and allow
		 * us to schedule out to avoid softlocking if preempt
		 * is disabled.
		 */
		cond_resched();
94
	}
95 96 97 98 99 100 101 102 103 104 105 106

	*biop = bio;
	return 0;
}
EXPORT_SYMBOL(__blkdev_issue_discard);

/**
 * blkdev_issue_discard - queue a discard
 * @bdev:	blockdev to issue discard for
 * @sector:	start sector
 * @nr_sects:	number of sectors to discard
 * @gfp_mask:	memory allocation flags (for bio_alloc)
107
 * @flags:	BLKDEV_DISCARD_* flags to control behaviour
108 109 110 111 112 113 114 115 116 117 118 119
 *
 * Description:
 *    Issue a discard request for the sectors in question.
 */
int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
		sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
{
	struct bio *bio = NULL;
	struct blk_plug plug;
	int ret;

	blk_start_plug(&plug);
120
	ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, flags,
121
			&bio);
122
	if (!ret && bio) {
123
		ret = submit_bio_wait(bio);
124
		if (ret == -EOPNOTSUPP)
125
			ret = 0;
126
		bio_put(bio);
127
	}
128
	blk_finish_plug(&plug);
129

130
	return ret;
131 132
}
EXPORT_SYMBOL(blkdev_issue_discard);
133

134
/**
135
 * __blkdev_issue_write_same - generate number of bios with same page
136 137 138 139 140
 * @bdev:	target blockdev
 * @sector:	start sector
 * @nr_sects:	number of sectors to write
 * @gfp_mask:	memory allocation flags (for bio_alloc)
 * @page:	page containing data to write
141
 * @biop:	pointer to anchor bio
142 143
 *
 * Description:
144
 *  Generate and issue number of bios(REQ_OP_WRITE_SAME) with same page.
145
 */
146 147 148
static int __blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
		sector_t nr_sects, gfp_t gfp_mask, struct page *page,
		struct bio **biop)
149 150 151
{
	struct request_queue *q = bdev_get_queue(bdev);
	unsigned int max_write_same_sectors;
152
	struct bio *bio = *biop;
153
	sector_t bs_mask;
154 155 156 157

	if (!q)
		return -ENXIO;

158 159 160 161
	bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
	if ((sector | nr_sects) & bs_mask)
		return -EINVAL;

162 163 164
	if (!bdev_write_same(bdev))
		return -EOPNOTSUPP;

165 166
	/* Ensure that max_write_same_sectors doesn't overflow bi_size */
	max_write_same_sectors = UINT_MAX >> 9;
167 168

	while (nr_sects) {
169
		bio = next_bio(bio, 1, gfp_mask);
170
		bio->bi_iter.bi_sector = sector;
171
		bio_set_dev(bio, bdev);
172 173 174 175
		bio->bi_vcnt = 1;
		bio->bi_io_vec->bv_page = page;
		bio->bi_io_vec->bv_offset = 0;
		bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev);
176
		bio_set_op_attrs(bio, REQ_OP_WRITE_SAME, 0);
177 178

		if (nr_sects > max_write_same_sectors) {
179
			bio->bi_iter.bi_size = max_write_same_sectors << 9;
180 181 182
			nr_sects -= max_write_same_sectors;
			sector += max_write_same_sectors;
		} else {
183
			bio->bi_iter.bi_size = nr_sects << 9;
184 185
			nr_sects = 0;
		}
186
		cond_resched();
187 188
	}

189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
	*biop = bio;
	return 0;
}

/**
 * blkdev_issue_write_same - queue a write same operation
 * @bdev:	target blockdev
 * @sector:	start sector
 * @nr_sects:	number of sectors to write
 * @gfp_mask:	memory allocation flags (for bio_alloc)
 * @page:	page containing data
 *
 * Description:
 *    Issue a write same request for the sectors in question.
 */
int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
				sector_t nr_sects, gfp_t gfp_mask,
				struct page *page)
{
	struct bio *bio = NULL;
	struct blk_plug plug;
	int ret;

	blk_start_plug(&plug);
	ret = __blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask, page,
			&bio);
	if (ret == 0 && bio) {
216
		ret = submit_bio_wait(bio);
217 218
		bio_put(bio);
	}
219
	blk_finish_plug(&plug);
220
	return ret;
221 222 223
}
EXPORT_SYMBOL(blkdev_issue_write_same);

224 225
static int __blkdev_issue_write_zeroes(struct block_device *bdev,
		sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
226
		struct bio **biop, unsigned flags)
227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243
{
	struct bio *bio = *biop;
	unsigned int max_write_zeroes_sectors;
	struct request_queue *q = bdev_get_queue(bdev);

	if (!q)
		return -ENXIO;

	/* Ensure that max_write_zeroes_sectors doesn't overflow bi_size */
	max_write_zeroes_sectors = bdev_write_zeroes_sectors(bdev);

	if (max_write_zeroes_sectors == 0)
		return -EOPNOTSUPP;

	while (nr_sects) {
		bio = next_bio(bio, 0, gfp_mask);
		bio->bi_iter.bi_sector = sector;
244
		bio_set_dev(bio, bdev);
245 246 247
		bio->bi_opf = REQ_OP_WRITE_ZEROES;
		if (flags & BLKDEV_ZERO_NOUNMAP)
			bio->bi_opf |= REQ_NOUNMAP;
248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263

		if (nr_sects > max_write_zeroes_sectors) {
			bio->bi_iter.bi_size = max_write_zeroes_sectors << 9;
			nr_sects -= max_write_zeroes_sectors;
			sector += max_write_zeroes_sectors;
		} else {
			bio->bi_iter.bi_size = nr_sects << 9;
			nr_sects = 0;
		}
		cond_resched();
	}

	*biop = bio;
	return 0;
}

264 265 266 267 268 269 270 271
/*
 * Convert a number of 512B sectors to a number of pages.
 * The result is limited to a number of pages that can fit into a BIO.
 * Also make sure that the result is always at least 1 (page) for the cases
 * where nr_sects is lower than the number of sectors in a page.
 */
static unsigned int __blkdev_sectors_to_bio_pages(sector_t nr_sects)
{
272
	sector_t pages = DIV_ROUND_UP_SECTOR_T(nr_sects, PAGE_SIZE / 512);
273

274
	return min(pages, (sector_t)BIO_MAX_PAGES);
275 276
}

277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310
static int __blkdev_issue_zero_pages(struct block_device *bdev,
		sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
		struct bio **biop)
{
	struct request_queue *q = bdev_get_queue(bdev);
	struct bio *bio = *biop;
	int bi_size = 0;
	unsigned int sz;

	if (!q)
		return -ENXIO;

	while (nr_sects != 0) {
		bio = next_bio(bio, __blkdev_sectors_to_bio_pages(nr_sects),
			       gfp_mask);
		bio->bi_iter.bi_sector = sector;
		bio_set_dev(bio, bdev);
		bio_set_op_attrs(bio, REQ_OP_WRITE, 0);

		while (nr_sects != 0) {
			sz = min((sector_t) PAGE_SIZE, nr_sects << 9);
			bi_size = bio_add_page(bio, ZERO_PAGE(0), sz, 0);
			nr_sects -= bi_size >> 9;
			sector += bi_size >> 9;
			if (bi_size < sz)
				break;
		}
		cond_resched();
	}

	*biop = bio;
	return 0;
}

311
/**
312
 * __blkdev_issue_zeroout - generate number of zero filed write bios
313 314 315 316
 * @bdev:	blockdev to issue
 * @sector:	start sector
 * @nr_sects:	number of sectors to write
 * @gfp_mask:	memory allocation flags (for bio_alloc)
317
 * @biop:	pointer to anchor bio
318
 * @flags:	controls detailed behavior
319 320
 *
 * Description:
321 322 323
 *  Zero-fill a block range, either using hardware offload or by explicitly
 *  writing zeroes to the device.
 *
324 325 326 327 328 329
 *  Note that this function may fail with -EOPNOTSUPP if the driver signals
 *  zeroing offload support, but the device fails to process the command (for
 *  some devices there is no non-destructive way to verify whether this
 *  operation is actually supported).  In this case the caller should call
 *  retry the call to blkdev_issue_zeroout() and the fallback path will be used.
 *
330 331
 *  If a device is using logical block provisioning, the underlying space will
 *  not be released if %flags contains BLKDEV_ZERO_NOUNMAP.
332 333 334
 *
 *  If %flags contains BLKDEV_ZERO_NOFALLBACK, the function will return
 *  -EOPNOTSUPP if no explicit hardware offload for zeroing is provided.
335
 */
336 337
int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
		sector_t nr_sects, gfp_t gfp_mask, struct bio **biop,
338
		unsigned flags)
339
{
340
	int ret;
341 342 343 344 345
	sector_t bs_mask;

	bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
	if ((sector | nr_sects) & bs_mask)
		return -EINVAL;
346

347
	ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp_mask,
348
			biop, flags);
349
	if (ret != -EOPNOTSUPP || (flags & BLKDEV_ZERO_NOFALLBACK))
350
		return ret;
351

352 353
	return __blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp_mask,
					 biop);
354
}
355
EXPORT_SYMBOL(__blkdev_issue_zeroout);
356 357 358 359 360 361 362

/**
 * blkdev_issue_zeroout - zero-fill a block range
 * @bdev:	blockdev to write
 * @sector:	start sector
 * @nr_sects:	number of sectors to write
 * @gfp_mask:	memory allocation flags (for bio_alloc)
363
 * @flags:	controls detailed behavior
364 365
 *
 * Description:
366 367 368
 *  Zero-fill a block range, either using hardware offload or by explicitly
 *  writing zeroes to the device.  See __blkdev_issue_zeroout() for the
 *  valid values for %flags.
369 370
 */
int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
371
		sector_t nr_sects, gfp_t gfp_mask, unsigned flags)
372
{
373 374 375
	int ret;
	struct bio *bio = NULL;
	struct blk_plug plug;
376

377 378
	blk_start_plug(&plug);
	ret = __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask,
379
			&bio, flags);
380 381 382 383 384
	if (ret == 0 && bio) {
		ret = submit_bio_wait(bio);
		bio_put(bio);
	}
	blk_finish_plug(&plug);
385

386
	return ret;
387
}
388
EXPORT_SYMBOL(blkdev_issue_zeroout);