blk-lib.c 8.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11
/*
 * Functions related to generic helpers functions
 */
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/scatterlist.h>

#include "blk.h"

12 13 14 15 16 17 18
struct bio_batch {
	atomic_t		done;
	unsigned long		flags;
	struct completion	*wait;
};

static void bio_batch_end_io(struct bio *bio, int err)
19
{
20 21
	struct bio_batch *bb = bio->bi_private;

22
	if (err && (err != -EOPNOTSUPP))
23 24 25
		clear_bit(BIO_UPTODATE, &bb->flags);
	if (atomic_dec_and_test(&bb->done))
		complete(bb->wait);
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
	bio_put(bio);
}

/**
 * blkdev_issue_discard - queue a discard
 * @bdev:	blockdev to issue discard for
 * @sector:	start sector
 * @nr_sects:	number of sectors to discard
 * @gfp_mask:	memory allocation flags (for bio_alloc)
 * @flags:	BLKDEV_IFL_* flags to control behaviour
 *
 * Description:
 *    Issue a discard request for the sectors in question.
 */
int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
		sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
{
	DECLARE_COMPLETION_ONSTACK(wait);
	struct request_queue *q = bdev_get_queue(bdev);
45
	int type = REQ_WRITE | REQ_DISCARD;
46 47
	unsigned int max_discard_sectors, granularity;
	int alignment;
48
	struct bio_batch bb;
49 50
	struct bio *bio;
	int ret = 0;
51
	struct blk_plug plug;
52 53 54 55 56 57 58

	if (!q)
		return -ENXIO;

	if (!blk_queue_discard(q))
		return -EOPNOTSUPP;

59 60
	/* Zero-sector (unknown) and one-sector granularities are the same.  */
	granularity = max(q->limits.discard_granularity >> 9, 1U);
61
	alignment = (bdev_discard_alignment(bdev) >> 9) % granularity;
62

63 64
	/*
	 * Ensure that max_discard_sectors is of the proper
65
	 * granularity, so that requests stay aligned after a split.
66 67
	 */
	max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9);
68
	max_discard_sectors -= max_discard_sectors % granularity;
69
	if (unlikely(!max_discard_sectors)) {
70 71
		/* Avoid infinite loop below. Being cautious never hurts. */
		return -EOPNOTSUPP;
72
	}
73

74
	if (flags & BLKDEV_DISCARD_SECURE) {
A
Adrian Hunter 已提交
75 76
		if (!blk_queue_secdiscard(q))
			return -EOPNOTSUPP;
77
		type |= REQ_SECURE;
A
Adrian Hunter 已提交
78 79
	}

80 81 82 83
	atomic_set(&bb.done, 1);
	bb.flags = 1 << BIO_UPTODATE;
	bb.wait = &wait;

84
	blk_start_plug(&plug);
85
	while (nr_sects) {
86
		unsigned int req_sects;
87
		sector_t end_sect, tmp;
88

89
		bio = bio_alloc(gfp_mask, 1);
90 91 92 93 94
		if (!bio) {
			ret = -ENOMEM;
			break;
		}

95 96 97 98 99 100 101
		req_sects = min_t(sector_t, nr_sects, max_discard_sectors);

		/*
		 * If splitting a request, and the next starting sector would be
		 * misaligned, stop the discard at the previous aligned sector.
		 */
		end_sect = sector + req_sects;
102 103 104 105 106 107
		tmp = end_sect;
		if (req_sects < nr_sects &&
		    sector_div(tmp, granularity) != alignment) {
			end_sect = end_sect - alignment;
			sector_div(end_sect, granularity);
			end_sect = end_sect * granularity + alignment;
108 109 110
			req_sects = end_sect - sector;
		}

111
		bio->bi_iter.bi_sector = sector;
112
		bio->bi_end_io = bio_batch_end_io;
113
		bio->bi_bdev = bdev;
114
		bio->bi_private = &bb;
115

116
		bio->bi_iter.bi_size = req_sects << 9;
117 118
		nr_sects -= req_sects;
		sector = end_sect;
119

120
		atomic_inc(&bb.done);
121
		submit_bio(type, bio);
122 123 124 125 126 127 128 129

		/*
		 * We can loop for a long time in here, if someone does
		 * full device discards (like mkfs). Be nice and allow
		 * us to schedule out to avoid softlocking if preempt
		 * is disabled.
		 */
		cond_resched();
130
	}
131
	blk_finish_plug(&plug);
132

133 134
	/* Wait for bios in-flight */
	if (!atomic_dec_and_test(&bb.done))
135
		wait_for_completion_io(&wait);
136

137
	if (!test_bit(BIO_UPTODATE, &bb.flags))
138
		ret = -EIO;
139

140 141 142
	return ret;
}
EXPORT_SYMBOL(blkdev_issue_discard);
143

144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184
/**
 * blkdev_issue_write_same - queue a write same operation
 * @bdev:	target blockdev
 * @sector:	start sector
 * @nr_sects:	number of sectors to write
 * @gfp_mask:	memory allocation flags (for bio_alloc)
 * @page:	page containing data to write
 *
 * Description:
 *    Issue a write same request for the sectors in question.
 */
int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
			    sector_t nr_sects, gfp_t gfp_mask,
			    struct page *page)
{
	DECLARE_COMPLETION_ONSTACK(wait);
	struct request_queue *q = bdev_get_queue(bdev);
	unsigned int max_write_same_sectors;
	struct bio_batch bb;
	struct bio *bio;
	int ret = 0;

	if (!q)
		return -ENXIO;

	max_write_same_sectors = q->limits.max_write_same_sectors;

	if (max_write_same_sectors == 0)
		return -EOPNOTSUPP;

	atomic_set(&bb.done, 1);
	bb.flags = 1 << BIO_UPTODATE;
	bb.wait = &wait;

	while (nr_sects) {
		bio = bio_alloc(gfp_mask, 1);
		if (!bio) {
			ret = -ENOMEM;
			break;
		}

185
		bio->bi_iter.bi_sector = sector;
186 187 188 189 190 191 192 193 194
		bio->bi_end_io = bio_batch_end_io;
		bio->bi_bdev = bdev;
		bio->bi_private = &bb;
		bio->bi_vcnt = 1;
		bio->bi_io_vec->bv_page = page;
		bio->bi_io_vec->bv_offset = 0;
		bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev);

		if (nr_sects > max_write_same_sectors) {
195
			bio->bi_iter.bi_size = max_write_same_sectors << 9;
196 197 198
			nr_sects -= max_write_same_sectors;
			sector += max_write_same_sectors;
		} else {
199
			bio->bi_iter.bi_size = nr_sects << 9;
200 201 202 203 204 205 206 207 208
			nr_sects = 0;
		}

		atomic_inc(&bb.done);
		submit_bio(REQ_WRITE | REQ_WRITE_SAME, bio);
	}

	/* Wait for bios in-flight */
	if (!atomic_dec_and_test(&bb.done))
209
		wait_for_completion_io(&wait);
210 211 212 213 214 215 216 217

	if (!test_bit(BIO_UPTODATE, &bb.flags))
		ret = -ENOTSUPP;

	return ret;
}
EXPORT_SYMBOL(blkdev_issue_write_same);

218
/**
219
 * blkdev_issue_zeroout - generate number of zero filed write bios
220 221 222 223 224 225 226 227 228
 * @bdev:	blockdev to issue
 * @sector:	start sector
 * @nr_sects:	number of sectors to write
 * @gfp_mask:	memory allocation flags (for bio_alloc)
 *
 * Description:
 *  Generate and issue number of bios with zerofiled pages.
 */

229 230
static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
				  sector_t nr_sects, gfp_t gfp_mask)
231
{
232
	int ret;
233 234
	struct bio *bio;
	struct bio_batch bb;
235
	unsigned int sz;
236 237
	DECLARE_COMPLETION_ONSTACK(wait);

238
	atomic_set(&bb.done, 1);
239 240 241
	bb.flags = 1 << BIO_UPTODATE;
	bb.wait = &wait;

242
	ret = 0;
243 244 245
	while (nr_sects != 0) {
		bio = bio_alloc(gfp_mask,
				min(nr_sects, (sector_t)BIO_MAX_PAGES));
246 247
		if (!bio) {
			ret = -ENOMEM;
248
			break;
249
		}
250

251
		bio->bi_iter.bi_sector = sector;
252 253
		bio->bi_bdev   = bdev;
		bio->bi_end_io = bio_batch_end_io;
254
		bio->bi_private = &bb;
255

256 257
		while (nr_sects != 0) {
			sz = min((sector_t) PAGE_SIZE >> 9 , nr_sects);
258 259 260 261 262 263
			ret = bio_add_page(bio, ZERO_PAGE(0), sz << 9, 0);
			nr_sects -= ret >> 9;
			sector += ret >> 9;
			if (ret < (sz << 9))
				break;
		}
264
		ret = 0;
265
		atomic_inc(&bb.done);
266 267 268
		submit_bio(WRITE, bio);
	}

269
	/* Wait for bios in-flight */
270
	if (!atomic_dec_and_test(&bb.done))
271
		wait_for_completion_io(&wait);
272 273 274 275 276 277 278

	if (!test_bit(BIO_UPTODATE, &bb.flags))
		/* One of bios in the batch was completed with error.*/
		ret = -EIO;

	return ret;
}
279 280 281 282 283 284 285

/**
 * blkdev_issue_zeroout - zero-fill a block range
 * @bdev:	blockdev to write
 * @sector:	start sector
 * @nr_sects:	number of sectors to write
 * @gfp_mask:	memory allocation flags (for bio_alloc)
286
 * @discard:	whether to discard the block range
287 288
 *
 * Description:
289 290 291 292 293 294 295 296 297 298 299

 *  Zero-fill a block range.  If the discard flag is set and the block
 *  device guarantees that subsequent READ operations to the block range
 *  in question will return zeroes, the blocks will be discarded. Should
 *  the discard request fail, if the discard flag is not set, or if
 *  discard_zeroes_data is not supported, this function will resort to
 *  zeroing the blocks manually, thus provisioning (allocating,
 *  anchoring) them. If the block device supports the WRITE SAME command
 *  blkdev_issue_zeroout() will use it to optimize the process of
 *  clearing the block range. Otherwise the zeroing will be performed
 *  using regular WRITE calls.
300 301 302
 */

int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
303
			 sector_t nr_sects, gfp_t gfp_mask, bool discard)
304
{
305 306 307 308 309 310 311 312 313 314 315 316
	struct request_queue *q = bdev_get_queue(bdev);
	unsigned char bdn[BDEVNAME_SIZE];

	if (discard && blk_queue_discard(q) && q->limits.discard_zeroes_data) {

		if (!blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, 0))
			return 0;

		bdevname(bdev, bdn);
		pr_warn("%s: DISCARD failed. Manually zeroing.\n", bdn);
	}

317 318 319 320 321 322 323
	if (bdev_write_same(bdev)) {

		if (!blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask,
					     ZERO_PAGE(0)))
			return 0;

		bdevname(bdev, bdn);
324
		pr_warn("%s: WRITE SAME failed. Manually zeroing.\n", bdn);
325 326 327 328
	}

	return __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask);
}
329
EXPORT_SYMBOL(blkdev_issue_zeroout);