bio-integrity.c 14.2 KB
Newer Older
1 2 3
/*
 * bio-integrity.c - bio data integrity extensions
 *
4
 * Copyright (C) 2007, 2008, 2009 Oracle Corporation
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 * Written by: Martin K. Petersen <martin.petersen@oracle.com>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License version
 * 2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; see the file COPYING.  If not, write to
 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
 * USA.
 *
 */

#include <linux/blkdev.h>
#include <linux/mempool.h>
25
#include <linux/export.h>
26 27
#include <linux/bio.h>
#include <linux/workqueue.h>
28
#include <linux/slab.h>
29
#include "blk.h"
30

31
#define BIP_INLINE_VECS	4
32

33
static struct kmem_cache *bip_slab;
34 35
static struct workqueue_struct *kintegrityd_wq;

36 37 38 39 40
void blk_flush_integrity(void)
{
	flush_workqueue(kintegrityd_wq);
}

41
/**
42
 * bio_integrity_alloc - Allocate integrity payload and attach it to bio
43 44 45 46 47 48 49 50
 * @bio:	bio to attach integrity metadata to
 * @gfp_mask:	Memory allocation mask
 * @nr_vecs:	Number of integrity metadata scatter-gather elements
 *
 * Description: This function prepares a bio for attaching integrity
 * metadata.  nr_vecs specifies the maximum number of pages containing
 * integrity metadata that can be attached.
 */
51 52 53
struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
						  gfp_t gfp_mask,
						  unsigned int nr_vecs)
54 55
{
	struct bio_integrity_payload *bip;
56
	struct bio_set *bs = bio->bi_pool;
57 58
	unsigned inline_vecs;

59
	if (!bs || !bs->bio_integrity_pool) {
60 61 62 63
		bip = kmalloc(sizeof(struct bio_integrity_payload) +
			      sizeof(struct bio_vec) * nr_vecs, gfp_mask);
		inline_vecs = nr_vecs;
	} else {
64
		bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask);
65
		inline_vecs = BIP_INLINE_VECS;
66 67
	}

68
	if (unlikely(!bip))
69
		return ERR_PTR(-ENOMEM);
70

71 72
	memset(bip, 0, sizeof(*bip));

73
	if (nr_vecs > inline_vecs) {
74 75
		unsigned long idx = 0;

76 77 78 79
		bip->bip_vec = bvec_alloc(gfp_mask, nr_vecs, &idx,
					  bs->bvec_integrity_pool);
		if (!bip->bip_vec)
			goto err;
80
		bip->bip_max_vcnt = bvec_nr_vecs(idx);
81
		bip->bip_slab = idx;
82 83
	} else {
		bip->bip_vec = bip->bip_inline_vecs;
84
		bip->bip_max_vcnt = inline_vecs;
85 86
	}

87 88
	bip->bip_bio = bio;
	bio->bi_integrity = bip;
89
	bio->bi_rw |= REQ_INTEGRITY;
90 91

	return bip;
92 93
err:
	mempool_free(bip, bs->bio_integrity_pool);
94
	return ERR_PTR(-ENOMEM);
95 96 97 98 99 100 101 102 103 104
}
EXPORT_SYMBOL(bio_integrity_alloc);

/**
 * bio_integrity_free - Free bio integrity payload
 * @bio:	bio containing bip to be freed
 *
 * Description: Used to free the integrity portion of a bio. Usually
 * called from bio_free().
 */
105
void bio_integrity_free(struct bio *bio)
106
{
107
	struct bio_integrity_payload *bip = bio_integrity(bio);
108 109
	struct bio_set *bs = bio->bi_pool;

110
	if (bip->bip_flags & BIP_BLOCK_INTEGRITY)
M
Martin K. Petersen 已提交
111 112
		kfree(page_address(bip->bip_vec->bv_page) +
		      bip->bip_vec->bv_offset);
113

114
	if (bs && bs->bio_integrity_pool) {
115
		bvec_free(bs->bvec_integrity_pool, bip->bip_vec, bip->bip_slab);
116

117
		mempool_free(bip, bs->bio_integrity_pool);
118 119 120
	} else {
		kfree(bip);
	}
121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137

	bio->bi_integrity = NULL;
}
EXPORT_SYMBOL(bio_integrity_free);

/**
 * bio_integrity_add_page - Attach integrity metadata
 * @bio:	bio to update
 * @page:	page containing integrity metadata
 * @len:	number of bytes of integrity metadata in page
 * @offset:	start offset within page
 *
 * Description: Attach a page containing integrity metadata to bio.
 */
int bio_integrity_add_page(struct bio *bio, struct page *page,
			   unsigned int len, unsigned int offset)
{
138
	struct bio_integrity_payload *bip = bio_integrity(bio);
139 140
	struct bio_vec *iv;

141
	if (bip->bip_vcnt >= bip->bip_max_vcnt) {
142 143 144 145
		printk(KERN_ERR "%s: bip_vec full\n", __func__);
		return 0;
	}

146
	iv = bip->bip_vec + bip->bip_vcnt;
147

148 149 150 151 152
	if (bip->bip_vcnt &&
	    bvec_gap_to_prev(bdev_get_queue(bio->bi_bdev),
			     &bip->bip_vec[bip->bip_vcnt - 1], offset))
		return 0;

153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
	iv->bv_page = page;
	iv->bv_len = len;
	iv->bv_offset = offset;
	bip->bip_vcnt++;

	return len;
}
EXPORT_SYMBOL(bio_integrity_add_page);

/**
 * bio_integrity_enabled - Check whether integrity can be passed
 * @bio:	bio to check
 *
 * Description: Determines whether bio_integrity_prep() can be called
 * on this bio or not.	bio data direction and target device must be
 * set prior to calling.  The functions honors the write_generate and
 * read_verify flags in sysfs.
 */
171
bool bio_integrity_enabled(struct bio *bio)
172
{
173 174
	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);

175
	if (!bio_is_rw(bio))
176
		return false;
177

178 179
	/* Already protected? */
	if (bio_integrity(bio))
180 181 182 183 184
		return false;

	if (bi == NULL)
		return false;

185
	if (bio_data_dir(bio) == READ && bi->profile->verify_fn != NULL &&
186
	    (bi->flags & BLK_INTEGRITY_VERIFY))
187 188
		return true;

189
	if (bio_data_dir(bio) == WRITE && bi->profile->generate_fn != NULL &&
190
	    (bi->flags & BLK_INTEGRITY_GENERATE))
191
		return true;
192

193
	return false;
194 195 196 197
}
EXPORT_SYMBOL(bio_integrity_enabled);

/**
198
 * bio_integrity_intervals - Return number of integrity intervals for a bio
199
 * @bi:		blk_integrity profile for device
200
 * @sectors:	Size of the bio in 512-byte sectors
201 202
 *
 * Description: The block layer calculates everything in 512 byte
203 204 205
 * sectors but integrity metadata is done in terms of the data integrity
 * interval size of the storage device.  Convert the block layer sectors
 * to the appropriate number of integrity intervals.
206
 */
207 208
static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi,
						   unsigned int sectors)
209
{
210
	return sectors >> (bi->interval_exp - 9);
211 212
}

213 214 215
static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
					       unsigned int sectors)
{
216
	return bio_integrity_intervals(bi, sectors) * bi->tuple_size;
217 218
}

219
/**
220
 * bio_integrity_process - Process integrity metadata for a bio
221
 * @bio:	bio to generate/verify integrity metadata for
222
 * @proc_fn:	Pointer to the relevant processing function
223
 */
224 225
static int bio_integrity_process(struct bio *bio,
				 integrity_processing_fn *proc_fn)
226 227
{
	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
228
	struct blk_integrity_iter iter;
229 230
	struct bvec_iter bviter;
	struct bio_vec bv;
M
Martin K. Petersen 已提交
231
	struct bio_integrity_payload *bip = bio_integrity(bio);
232
	unsigned int ret = 0;
M
Martin K. Petersen 已提交
233 234
	void *prot_buf = page_address(bip->bip_vec->bv_page) +
		bip->bip_vec->bv_offset;
235

236
	iter.disk_name = bio->bi_bdev->bd_disk->disk_name;
237
	iter.interval = 1 << bi->interval_exp;
238 239
	iter.seed = bip_get_seed(bip);
	iter.prot_buf = prot_buf;
240

241 242
	bio_for_each_segment(bv, bio, bviter) {
		void *kaddr = kmap_atomic(bv.bv_page);
243

244 245
		iter.data_buf = kaddr + bv.bv_offset;
		iter.data_size = bv.bv_len;
246 247 248 249 250 251

		ret = proc_fn(&iter);
		if (ret) {
			kunmap_atomic(kaddr);
			return ret;
		}
252

253
		kunmap_atomic(kaddr);
254
	}
255 256 257
	return ret;
}

258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277
/**
 * bio_integrity_prep - Prepare bio for integrity I/O
 * @bio:	bio to prepare
 *
 * Description: Allocates a buffer for integrity metadata, maps the
 * pages and attaches them to a bio.  The bio must have data
 * direction, target device and start sector set priot to calling.  In
 * the WRITE case, integrity metadata will be generated using the
 * block device's integrity function.  In the READ case, the buffer
 * will be prepared for DMA and a suitable end_io handler set up.
 */
int bio_integrity_prep(struct bio *bio)
{
	struct bio_integrity_payload *bip;
	struct blk_integrity *bi;
	struct request_queue *q;
	void *buf;
	unsigned long start, end;
	unsigned int len, nr_pages;
	unsigned int bytes, offset, i;
278
	unsigned int intervals;
279 280 281 282 283 284

	bi = bdev_get_integrity(bio->bi_bdev);
	q = bdev_get_queue(bio->bi_bdev);
	BUG_ON(bi == NULL);
	BUG_ON(bio_integrity(bio));

285
	intervals = bio_integrity_intervals(bi, bio_sectors(bio));
286 287

	/* Allocate kernel buffer for protection data */
288
	len = intervals * bi->tuple_size;
289
	buf = kmalloc(len, GFP_NOIO | q->bounce_gfp);
290 291
	if (unlikely(buf == NULL)) {
		printk(KERN_ERR "could not allocate integrity buffer\n");
292
		return -ENOMEM;
293 294 295 296 297 298 299 300
	}

	end = (((unsigned long) buf) + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
	start = ((unsigned long) buf) >> PAGE_SHIFT;
	nr_pages = end - start;

	/* Allocate bio integrity payload and integrity vectors */
	bip = bio_integrity_alloc(bio, GFP_NOIO, nr_pages);
301
	if (IS_ERR(bip)) {
302 303
		printk(KERN_ERR "could not allocate data integrity bioset\n");
		kfree(buf);
304
		return PTR_ERR(bip);
305 306
	}

307
	bip->bip_flags |= BIP_BLOCK_INTEGRITY;
308
	bip->bip_iter.bi_size = len;
309
	bip_set_seed(bip, bio->bi_iter.bi_sector);
310

311 312 313
	if (bi->flags & BLK_INTEGRITY_IP_CHECKSUM)
		bip->bip_flags |= BIP_IP_CHECKSUM;

314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347
	/* Map it */
	offset = offset_in_page(buf);
	for (i = 0 ; i < nr_pages ; i++) {
		int ret;
		bytes = PAGE_SIZE - offset;

		if (len <= 0)
			break;

		if (bytes > len)
			bytes = len;

		ret = bio_integrity_add_page(bio, virt_to_page(buf),
					     bytes, offset);

		if (ret == 0)
			return 0;

		if (ret < bytes)
			break;

		buf += bytes;
		len -= bytes;
		offset = 0;
	}

	/* Install custom I/O completion handler if read verify is enabled */
	if (bio_data_dir(bio) == READ) {
		bip->bip_end_io = bio->bi_end_io;
		bio->bi_end_io = bio_integrity_endio;
	}

	/* Auto-generate integrity metadata if this is a write */
	if (bio_data_dir(bio) == WRITE)
348
		bio_integrity_process(bio, bi->profile->generate_fn);
349 350 351 352 353 354 355 356 357 358 359 360 361 362 363

	return 0;
}
EXPORT_SYMBOL(bio_integrity_prep);

/**
 * bio_integrity_verify_fn - Integrity I/O completion worker
 * @work:	Work struct stored in bio to be verified
 *
 * Description: This workqueue function is called to complete a READ
 * request.  The function verifies the transferred integrity metadata
 * and then calls the original bio end_io function.
 */
static void bio_integrity_verify_fn(struct work_struct *work)
{
364
	struct bio_integrity_payload *bip =
365 366
		container_of(work, struct bio_integrity_payload, bip_work);
	struct bio *bio = bip->bip_bio;
367
	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
368

369
	bio->bi_error = bio_integrity_process(bio, bi->profile->verify_fn);
370 371 372

	/* Restore original bio completion handler */
	bio->bi_end_io = bip->bip_end_io;
373
	bio_endio(bio);
374 375 376 377 378 379 380 381 382 383 384 385 386 387
}

/**
 * bio_integrity_endio - Integrity I/O completion function
 * @bio:	Protected bio
 * @error:	Pointer to errno
 *
 * Description: Completion for integrity I/O
 *
 * Normally I/O completion is done in interrupt context.  However,
 * verifying I/O integrity is a time-consuming task which must be run
 * in process context.	This function postpones completion
 * accordingly.
 */
388
void bio_integrity_endio(struct bio *bio)
389
{
390
	struct bio_integrity_payload *bip = bio_integrity(bio);
391 392 393

	BUG_ON(bip->bip_bio != bio);

394 395 396 397
	/* In case of an I/O error there is no point in verifying the
	 * integrity metadata.  Restore original bio end_io handler
	 * and run it.
	 */
398
	if (bio->bi_error) {
399
		bio->bi_end_io = bip->bip_end_io;
400
		bio_endio(bio);
401 402 403 404

		return;
	}

405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420
	INIT_WORK(&bip->bip_work, bio_integrity_verify_fn);
	queue_work(kintegrityd_wq, &bip->bip_work);
}
EXPORT_SYMBOL(bio_integrity_endio);

/**
 * bio_integrity_advance - Advance integrity vector
 * @bio:	bio whose integrity vector to update
 * @bytes_done:	number of data bytes that have been completed
 *
 * Description: This function calculates how many integrity bytes the
 * number of completed data bytes correspond to and advances the
 * integrity vector accordingly.
 */
void bio_integrity_advance(struct bio *bio, unsigned int bytes_done)
{
421
	struct bio_integrity_payload *bip = bio_integrity(bio);
422
	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
423
	unsigned bytes = bio_integrity_bytes(bi, bytes_done >> 9);
424

425
	bvec_iter_advance(bip->bip_vec, &bip->bip_iter, bytes);
426 427 428 429 430 431 432 433 434 435 436 437 438 439
}
EXPORT_SYMBOL(bio_integrity_advance);

/**
 * bio_integrity_trim - Trim integrity vector
 * @bio:	bio whose integrity vector to update
 * @offset:	offset to first data sector
 * @sectors:	number of data sectors
 *
 * Description: Used to trim the integrity vector in a cloned bio.
 * The ivec will be advanced corresponding to 'offset' data sectors
 * and the length will be truncated corresponding to 'len' data
 * sectors.
 */
440 441
void bio_integrity_trim(struct bio *bio, unsigned int offset,
			unsigned int sectors)
442
{
443
	struct bio_integrity_payload *bip = bio_integrity(bio);
444 445
	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);

446 447
	bio_integrity_advance(bio, offset << 9);
	bip->bip_iter.bi_size = bio_integrity_bytes(bi, sectors);
448 449 450 451 452 453 454
}
EXPORT_SYMBOL(bio_integrity_trim);

/**
 * bio_integrity_clone - Callback for cloning bios with integrity metadata
 * @bio:	New bio
 * @bio_src:	Original bio
455
 * @gfp_mask:	Memory allocation mask
456 457 458
 *
 * Description:	Called to allocate a bip when cloning a bio
 */
459
int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
460
			gfp_t gfp_mask)
461
{
462
	struct bio_integrity_payload *bip_src = bio_integrity(bio_src);
463 464 465 466
	struct bio_integrity_payload *bip;

	BUG_ON(bip_src == NULL);

467
	bip = bio_integrity_alloc(bio, gfp_mask, bip_src->bip_vcnt);
468 469
	if (IS_ERR(bip))
		return PTR_ERR(bip);
470 471 472 473 474

	memcpy(bip->bip_vec, bip_src->bip_vec,
	       bip_src->bip_vcnt * sizeof(struct bio_vec));

	bip->bip_vcnt = bip_src->bip_vcnt;
475
	bip->bip_iter = bip_src->bip_iter;
476 477 478 479 480

	return 0;
}
EXPORT_SYMBOL(bio_integrity_clone);

481
int bioset_integrity_create(struct bio_set *bs, int pool_size)
482
{
483 484 485
	if (bs->bio_integrity_pool)
		return 0;

486
	bs->bio_integrity_pool = mempool_create_slab_pool(pool_size, bip_slab);
487
	if (!bs->bio_integrity_pool)
488
		return -1;
489

490
	bs->bvec_integrity_pool = biovec_create_pool(pool_size);
491 492
	if (!bs->bvec_integrity_pool) {
		mempool_destroy(bs->bio_integrity_pool);
493
		return -1;
494
	}
495 496 497 498 499 500 501 502 503

	return 0;
}
EXPORT_SYMBOL(bioset_integrity_create);

void bioset_integrity_free(struct bio_set *bs)
{
	if (bs->bio_integrity_pool)
		mempool_destroy(bs->bio_integrity_pool);
504 505

	if (bs->bvec_integrity_pool)
506
		mempool_destroy(bs->bvec_integrity_pool);
507 508 509 510 511
}
EXPORT_SYMBOL(bioset_integrity_free);

void __init bio_integrity_init(void)
{
512 513 514 515 516 517
	/*
	 * kintegrityd won't block much but may burn a lot of CPU cycles.
	 * Make it highpri CPU intensive wq with max concurrency of 1.
	 */
	kintegrityd_wq = alloc_workqueue("kintegrityd", WQ_MEM_RECLAIM |
					 WQ_HIGHPRI | WQ_CPU_INTENSIVE, 1);
518 519
	if (!kintegrityd_wq)
		panic("Failed to create kintegrityd\n");
520

521 522 523 524
	bip_slab = kmem_cache_create("bio_integrity_payload",
				     sizeof(struct bio_integrity_payload) +
				     sizeof(struct bio_vec) * BIP_INLINE_VECS,
				     0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
525
}