blk-lib.c 9.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11
/*
 * Functions related to generic helpers functions
 */
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/scatterlist.h>

#include "blk.h"

12
static struct bio *next_bio(struct bio *bio, unsigned int nr_pages,
13
		gfp_t gfp)
14
{
15 16 17 18
	struct bio *new = bio_alloc(gfp, nr_pages);

	if (bio) {
		bio_chain(bio, new);
19
		submit_bio(bio);
20
	}
21

22
	return new;
23 24
}

25
int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
26
		sector_t nr_sects, gfp_t gfp_mask, int flags,
27
		struct bio **biop)
28 29
{
	struct request_queue *q = bdev_get_queue(bdev);
30
	struct bio *bio = *biop;
31
	unsigned int granularity;
32
	unsigned int op;
33
	int alignment;
34
	sector_t bs_mask;
35 36 37

	if (!q)
		return -ENXIO;
38 39

	if (flags & BLKDEV_DISCARD_SECURE) {
40 41
		if (flags & BLKDEV_DISCARD_ZERO)
			return -EOPNOTSUPP;
42 43 44 45 46 47
		if (!blk_queue_secure_erase(q))
			return -EOPNOTSUPP;
		op = REQ_OP_SECURE_ERASE;
	} else {
		if (!blk_queue_discard(q))
			return -EOPNOTSUPP;
48 49 50
		if ((flags & BLKDEV_DISCARD_ZERO) &&
		    !q->limits.discard_zeroes_data)
			return -EOPNOTSUPP;
51 52
		op = REQ_OP_DISCARD;
	}
53

54 55 56 57
	bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
	if ((sector | nr_sects) & bs_mask)
		return -EINVAL;

58 59 60 61
	/* Zero-sector (unknown) and one-sector granularities are the same.  */
	granularity = max(q->limits.discard_granularity >> 9, 1U);
	alignment = (bdev_discard_alignment(bdev) >> 9) % granularity;

62
	while (nr_sects) {
63
		unsigned int req_sects;
64
		sector_t end_sect, tmp;
65

66 67 68
		/* Make sure bi_size doesn't overflow */
		req_sects = min_t(sector_t, nr_sects, UINT_MAX >> 9);

69
		/**
70 71 72
		 * If splitting a request, and the next starting sector would be
		 * misaligned, stop the discard at the previous aligned sector.
		 */
73
		end_sect = sector + req_sects;
74 75 76 77 78 79 80 81
		tmp = end_sect;
		if (req_sects < nr_sects &&
		    sector_div(tmp, granularity) != alignment) {
			end_sect = end_sect - alignment;
			sector_div(end_sect, granularity);
			end_sect = end_sect * granularity + alignment;
			req_sects = end_sect - sector;
		}
82

83
		bio = next_bio(bio, 0, gfp_mask);
84
		bio->bi_iter.bi_sector = sector;
85
		bio->bi_bdev = bdev;
86
		bio_set_op_attrs(bio, op, 0);
87

88
		bio->bi_iter.bi_size = req_sects << 9;
89 90
		nr_sects -= req_sects;
		sector = end_sect;
91

92 93 94 95 96 97 98
		/*
		 * We can loop for a long time in here, if someone does
		 * full device discards (like mkfs). Be nice and allow
		 * us to schedule out to avoid softlocking if preempt
		 * is disabled.
		 */
		cond_resched();
99
	}
100 101 102 103 104 105 106 107 108 109 110 111

	*biop = bio;
	return 0;
}
EXPORT_SYMBOL(__blkdev_issue_discard);

/**
 * blkdev_issue_discard - queue a discard
 * @bdev:	blockdev to issue discard for
 * @sector:	start sector
 * @nr_sects:	number of sectors to discard
 * @gfp_mask:	memory allocation flags (for bio_alloc)
112
 * @flags:	BLKDEV_DISCARD_* flags to control behaviour
113 114 115 116 117 118 119 120 121 122 123 124
 *
 * Description:
 *    Issue a discard request for the sectors in question.
 */
int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
		sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
{
	struct bio *bio = NULL;
	struct blk_plug plug;
	int ret;

	blk_start_plug(&plug);
125
	ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, flags,
126
			&bio);
127
	if (!ret && bio) {
128
		ret = submit_bio_wait(bio);
129
		if (ret == -EOPNOTSUPP && !(flags & BLKDEV_DISCARD_ZERO))
130
			ret = 0;
131
		bio_put(bio);
132
	}
133
	blk_finish_plug(&plug);
134

135
	return ret;
136 137
}
EXPORT_SYMBOL(blkdev_issue_discard);
138

139
/**
140
 * __blkdev_issue_write_same - generate number of bios with same page
141 142 143 144 145
 * @bdev:	target blockdev
 * @sector:	start sector
 * @nr_sects:	number of sectors to write
 * @gfp_mask:	memory allocation flags (for bio_alloc)
 * @page:	page containing data to write
146
 * @biop:	pointer to anchor bio
147 148
 *
 * Description:
149
 *  Generate and issue number of bios(REQ_OP_WRITE_SAME) with same page.
150
 */
151 152 153
static int __blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
		sector_t nr_sects, gfp_t gfp_mask, struct page *page,
		struct bio **biop)
154 155 156
{
	struct request_queue *q = bdev_get_queue(bdev);
	unsigned int max_write_same_sectors;
157
	struct bio *bio = *biop;
158
	sector_t bs_mask;
159 160 161 162

	if (!q)
		return -ENXIO;

163 164 165 166
	bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
	if ((sector | nr_sects) & bs_mask)
		return -EINVAL;

167 168 169
	if (!bdev_write_same(bdev))
		return -EOPNOTSUPP;

170 171
	/* Ensure that max_write_same_sectors doesn't overflow bi_size */
	max_write_same_sectors = UINT_MAX >> 9;
172 173

	while (nr_sects) {
174
		bio = next_bio(bio, 1, gfp_mask);
175
		bio->bi_iter.bi_sector = sector;
176 177 178 179 180
		bio->bi_bdev = bdev;
		bio->bi_vcnt = 1;
		bio->bi_io_vec->bv_page = page;
		bio->bi_io_vec->bv_offset = 0;
		bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev);
181
		bio_set_op_attrs(bio, REQ_OP_WRITE_SAME, 0);
182 183

		if (nr_sects > max_write_same_sectors) {
184
			bio->bi_iter.bi_size = max_write_same_sectors << 9;
185 186 187
			nr_sects -= max_write_same_sectors;
			sector += max_write_same_sectors;
		} else {
188
			bio->bi_iter.bi_size = nr_sects << 9;
189 190
			nr_sects = 0;
		}
191
		cond_resched();
192 193
	}

194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220
	*biop = bio;
	return 0;
}

/**
 * blkdev_issue_write_same - queue a write same operation
 * @bdev:	target blockdev
 * @sector:	start sector
 * @nr_sects:	number of sectors to write
 * @gfp_mask:	memory allocation flags (for bio_alloc)
 * @page:	page containing data
 *
 * Description:
 *    Issue a write same request for the sectors in question.
 */
int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
				sector_t nr_sects, gfp_t gfp_mask,
				struct page *page)
{
	struct bio *bio = NULL;
	struct blk_plug plug;
	int ret;

	blk_start_plug(&plug);
	ret = __blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask, page,
			&bio);
	if (ret == 0 && bio) {
221
		ret = submit_bio_wait(bio);
222 223
		bio_put(bio);
	}
224
	blk_finish_plug(&plug);
225
	return ret;
226 227 228
}
EXPORT_SYMBOL(blkdev_issue_write_same);

229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277
/**
 * __blkdev_issue_write_zeroes - generate number of bios with WRITE ZEROES
 * @bdev:	blockdev to issue
 * @sector:	start sector
 * @nr_sects:	number of sectors to write
 * @gfp_mask:	memory allocation flags (for bio_alloc)
 * @biop:	pointer to anchor bio
 *
 * Description:
 *  Generate and issue number of bios(REQ_OP_WRITE_ZEROES) with zerofiled pages.
 */
static int __blkdev_issue_write_zeroes(struct block_device *bdev,
		sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
		struct bio **biop)
{
	struct bio *bio = *biop;
	unsigned int max_write_zeroes_sectors;
	struct request_queue *q = bdev_get_queue(bdev);

	if (!q)
		return -ENXIO;

	/* Ensure that max_write_zeroes_sectors doesn't overflow bi_size */
	max_write_zeroes_sectors = bdev_write_zeroes_sectors(bdev);

	if (max_write_zeroes_sectors == 0)
		return -EOPNOTSUPP;

	while (nr_sects) {
		bio = next_bio(bio, 0, gfp_mask);
		bio->bi_iter.bi_sector = sector;
		bio->bi_bdev = bdev;
		bio_set_op_attrs(bio, REQ_OP_WRITE_ZEROES, 0);

		if (nr_sects > max_write_zeroes_sectors) {
			bio->bi_iter.bi_size = max_write_zeroes_sectors << 9;
			nr_sects -= max_write_zeroes_sectors;
			sector += max_write_zeroes_sectors;
		} else {
			bio->bi_iter.bi_size = nr_sects << 9;
			nr_sects = 0;
		}
		cond_resched();
	}

	*biop = bio;
	return 0;
}

278
/**
279
 * __blkdev_issue_zeroout - generate number of zero filed write bios
280 281 282 283
 * @bdev:	blockdev to issue
 * @sector:	start sector
 * @nr_sects:	number of sectors to write
 * @gfp_mask:	memory allocation flags (for bio_alloc)
284
 * @biop:	pointer to anchor bio
285
 * @flags:	controls detailed behavior
286 287
 *
 * Description:
288 289 290 291 292
 *  Zero-fill a block range, either using hardware offload or by explicitly
 *  writing zeroes to the device.
 *
 *  If a device is using logical block provisioning, the underlying space will
 *  not be released if %flags contains BLKDEV_ZERO_NOUNMAP.
293
 */
294 295
int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
		sector_t nr_sects, gfp_t gfp_mask, struct bio **biop,
296
		unsigned flags)
297
{
298
	int ret;
299 300
	int bi_size = 0;
	struct bio *bio = *biop;
301
	unsigned int sz;
302 303 304 305 306
	sector_t bs_mask;

	bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
	if ((sector | nr_sects) & bs_mask)
		return -EINVAL;
307

308 309 310 311 312
	ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp_mask,
			biop);
	if (ret == 0 || (ret && ret != -EOPNOTSUPP))
		goto out;

313
	ret = 0;
314
	while (nr_sects != 0) {
315
		bio = next_bio(bio, min(nr_sects, (sector_t)BIO_MAX_PAGES),
316
				gfp_mask);
317
		bio->bi_iter.bi_sector = sector;
318
		bio->bi_bdev   = bdev;
319
		bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
320

321 322
		while (nr_sects != 0) {
			sz = min((sector_t) PAGE_SIZE >> 9 , nr_sects);
323 324 325 326
			bi_size = bio_add_page(bio, ZERO_PAGE(0), sz << 9, 0);
			nr_sects -= bi_size >> 9;
			sector += bi_size >> 9;
			if (bi_size < (sz << 9))
327 328
				break;
		}
329
		cond_resched();
330 331
	}

332 333 334
	*biop = bio;
out:
	return ret;
335
}
336
EXPORT_SYMBOL(__blkdev_issue_zeroout);
337 338 339 340 341 342 343

/**
 * blkdev_issue_zeroout - zero-fill a block range
 * @bdev:	blockdev to write
 * @sector:	start sector
 * @nr_sects:	number of sectors to write
 * @gfp_mask:	memory allocation flags (for bio_alloc)
344
 * @flags:	controls detailed behavior
345 346
 *
 * Description:
347 348 349
 *  Zero-fill a block range, either using hardware offload or by explicitly
 *  writing zeroes to the device.  See __blkdev_issue_zeroout() for the
 *  valid values for %flags.
350 351
 */
int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
352
		sector_t nr_sects, gfp_t gfp_mask, unsigned flags)
353
{
354 355 356
	int ret;
	struct bio *bio = NULL;
	struct blk_plug plug;
357

358
	if (!(flags & BLKDEV_ZERO_NOUNMAP)) {
359 360 361 362 363
		if (!blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask,
				BLKDEV_DISCARD_ZERO))
			return 0;
	}

364 365
	blk_start_plug(&plug);
	ret = __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask,
366
			&bio, flags);
367 368 369 370 371
	if (ret == 0 && bio) {
		ret = submit_bio_wait(bio);
		bio_put(bio);
	}
	blk_finish_plug(&plug);
372

373
	return ret;
374
}
375
EXPORT_SYMBOL(blkdev_issue_zeroout);