提交 8357422d 编写于 作者: L Linus Torvalds

Merge git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm

* git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm: (33 commits)
  dm mpath: support discard
  dm stripe: support discards
  dm: split discard requests on target boundaries
  dm stripe: optimize sector division
  dm stripe: move sector translation to a function
  dm: error return error for discards
  dm delay: support discard
  dm: zero silently drop discards
  dm: use dm_target_offset macro
  dm: factor out max_io_len_target_boundary
  dm: use common __issue_target_request for flush and discard support
  dm: linear support discard
  dm crypt: simplify crypt_ctr
  dm crypt: simplify crypt_config destruction logic
  dm: allow autoloading of dm mod
  dm: rename map_info flush_request to target_request_nr
  dm ioctl: refactor dm_table_complete
  dm snapshot: implement merge
  dm: do not initialise full request queue when bio based
  dm ioctl: make bio or request based device type immutable
  ...
...@@ -445,6 +445,7 @@ Your cooperation is appreciated. ...@@ -445,6 +445,7 @@ Your cooperation is appreciated.
233 = /dev/kmview View-OS A process with a view 233 = /dev/kmview View-OS A process with a view
234 = /dev/btrfs-control Btrfs control device 234 = /dev/btrfs-control Btrfs control device
235 = /dev/autofs Autofs control device 235 = /dev/autofs Autofs control device
236 = /dev/mapper/control Device-Mapper control device
240-254 Reserved for local use 240-254 Reserved for local use
255 Reserved for MISC_DYNAMIC_MINOR 255 Reserved for MISC_DYNAMIC_MINOR
......
...@@ -107,11 +107,10 @@ struct crypt_config { ...@@ -107,11 +107,10 @@ struct crypt_config {
struct workqueue_struct *io_queue; struct workqueue_struct *io_queue;
struct workqueue_struct *crypt_queue; struct workqueue_struct *crypt_queue;
/* char *cipher;
* crypto related data char *cipher_mode;
*/
struct crypt_iv_operations *iv_gen_ops; struct crypt_iv_operations *iv_gen_ops;
char *iv_mode;
union { union {
struct iv_essiv_private essiv; struct iv_essiv_private essiv;
struct iv_benbi_private benbi; struct iv_benbi_private benbi;
...@@ -135,8 +134,6 @@ struct crypt_config { ...@@ -135,8 +134,6 @@ struct crypt_config {
unsigned int dmreq_start; unsigned int dmreq_start;
struct ablkcipher_request *req; struct ablkcipher_request *req;
char cipher[CRYPTO_MAX_ALG_NAME];
char chainmode[CRYPTO_MAX_ALG_NAME];
struct crypto_ablkcipher *tfm; struct crypto_ablkcipher *tfm;
unsigned long flags; unsigned long flags;
unsigned int key_size; unsigned int key_size;
...@@ -999,82 +996,135 @@ static int crypt_wipe_key(struct crypt_config *cc) ...@@ -999,82 +996,135 @@ static int crypt_wipe_key(struct crypt_config *cc)
return crypto_ablkcipher_setkey(cc->tfm, cc->key, cc->key_size); return crypto_ablkcipher_setkey(cc->tfm, cc->key, cc->key_size);
} }
/* static void crypt_dtr(struct dm_target *ti)
* Construct an encryption mapping:
* <cipher> <key> <iv_offset> <dev_path> <start>
*/
static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
{ {
struct crypt_config *cc; struct crypt_config *cc = ti->private;
struct crypto_ablkcipher *tfm;
char *tmp;
char *cipher;
char *chainmode;
char *ivmode;
char *ivopts;
unsigned int key_size;
unsigned long long tmpll;
if (argc != 5) { ti->private = NULL;
ti->error = "Not enough arguments";
if (!cc)
return;
if (cc->io_queue)
destroy_workqueue(cc->io_queue);
if (cc->crypt_queue)
destroy_workqueue(cc->crypt_queue);
if (cc->bs)
bioset_free(cc->bs);
if (cc->page_pool)
mempool_destroy(cc->page_pool);
if (cc->req_pool)
mempool_destroy(cc->req_pool);
if (cc->io_pool)
mempool_destroy(cc->io_pool);
if (cc->iv_gen_ops && cc->iv_gen_ops->dtr)
cc->iv_gen_ops->dtr(cc);
if (cc->tfm && !IS_ERR(cc->tfm))
crypto_free_ablkcipher(cc->tfm);
if (cc->dev)
dm_put_device(ti, cc->dev);
kzfree(cc->cipher);
kzfree(cc->cipher_mode);
/* Must zero key material before freeing */
kzfree(cc);
}
static int crypt_ctr_cipher(struct dm_target *ti,
char *cipher_in, char *key)
{
struct crypt_config *cc = ti->private;
char *tmp, *cipher, *chainmode, *ivmode, *ivopts;
char *cipher_api = NULL;
int ret = -EINVAL;
/* Convert to crypto api definition? */
if (strchr(cipher_in, '(')) {
ti->error = "Bad cipher specification";
return -EINVAL; return -EINVAL;
} }
tmp = argv[0]; /*
* Legacy dm-crypt cipher specification
* cipher-mode-iv:ivopts
*/
tmp = cipher_in;
cipher = strsep(&tmp, "-"); cipher = strsep(&tmp, "-");
cc->cipher = kstrdup(cipher, GFP_KERNEL);
if (!cc->cipher)
goto bad_mem;
if (tmp) {
cc->cipher_mode = kstrdup(tmp, GFP_KERNEL);
if (!cc->cipher_mode)
goto bad_mem;
}
chainmode = strsep(&tmp, "-"); chainmode = strsep(&tmp, "-");
ivopts = strsep(&tmp, "-"); ivopts = strsep(&tmp, "-");
ivmode = strsep(&ivopts, ":"); ivmode = strsep(&ivopts, ":");
if (tmp) if (tmp)
DMWARN("Unexpected additional cipher options"); DMWARN("Ignoring unexpected additional cipher options");
key_size = strlen(argv[1]) >> 1;
cc = kzalloc(sizeof(*cc) + key_size * sizeof(u8), GFP_KERNEL);
if (cc == NULL) {
ti->error =
"Cannot allocate transparent encryption context";
return -ENOMEM;
}
/* Compatibility mode for old dm-crypt cipher strings */ /* Compatibility mode for old dm-crypt mappings */
if (!chainmode || (strcmp(chainmode, "plain") == 0 && !ivmode)) { if (!chainmode || (!strcmp(chainmode, "plain") && !ivmode)) {
kfree(cc->cipher_mode);
cc->cipher_mode = kstrdup("cbc-plain", GFP_KERNEL);
chainmode = "cbc"; chainmode = "cbc";
ivmode = "plain"; ivmode = "plain";
} }
if (strcmp(chainmode, "ecb") && !ivmode) { if (strcmp(chainmode, "ecb") && !ivmode) {
ti->error = "This chaining mode requires an IV mechanism"; ti->error = "IV mechanism required";
goto bad_cipher; return -EINVAL;
} }
if (snprintf(cc->cipher, CRYPTO_MAX_ALG_NAME, "%s(%s)", cipher_api = kmalloc(CRYPTO_MAX_ALG_NAME, GFP_KERNEL);
chainmode, cipher) >= CRYPTO_MAX_ALG_NAME) { if (!cipher_api)
ti->error = "Chain mode + cipher name is too long"; goto bad_mem;
goto bad_cipher;
ret = snprintf(cipher_api, CRYPTO_MAX_ALG_NAME,
"%s(%s)", chainmode, cipher);
if (ret < 0) {
kfree(cipher_api);
goto bad_mem;
} }
tfm = crypto_alloc_ablkcipher(cc->cipher, 0, 0); /* Allocate cipher */
if (IS_ERR(tfm)) { cc->tfm = crypto_alloc_ablkcipher(cipher_api, 0, 0);
if (IS_ERR(cc->tfm)) {
ret = PTR_ERR(cc->tfm);
ti->error = "Error allocating crypto tfm"; ti->error = "Error allocating crypto tfm";
goto bad_cipher; goto bad;
} }
strcpy(cc->cipher, cipher); /* Initialize and set key */
strcpy(cc->chainmode, chainmode); ret = crypt_set_key(cc, key);
cc->tfm = tfm; if (ret < 0) {
if (crypt_set_key(cc, argv[1]) < 0) {
ti->error = "Error decoding and setting key"; ti->error = "Error decoding and setting key";
goto bad_ivmode; goto bad;
} }
/* /* Initialize IV */
* Choose ivmode. Valid modes: "plain", "essiv:<esshash>", "benbi". cc->iv_size = crypto_ablkcipher_ivsize(cc->tfm);
* See comments at iv code if (cc->iv_size)
*/ /* at least a 64 bit sector number should fit in our buffer */
cc->iv_size = max(cc->iv_size,
(unsigned int)(sizeof(u64) / sizeof(u8)));
else if (ivmode) {
DMWARN("Selected cipher does not support IVs");
ivmode = NULL;
}
/* Choose ivmode, see comments at iv code. */
if (ivmode == NULL) if (ivmode == NULL)
cc->iv_gen_ops = NULL; cc->iv_gen_ops = NULL;
else if (strcmp(ivmode, "plain") == 0) else if (strcmp(ivmode, "plain") == 0)
...@@ -1088,159 +1138,138 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) ...@@ -1088,159 +1138,138 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
else if (strcmp(ivmode, "null") == 0) else if (strcmp(ivmode, "null") == 0)
cc->iv_gen_ops = &crypt_iv_null_ops; cc->iv_gen_ops = &crypt_iv_null_ops;
else { else {
ret = -EINVAL;
ti->error = "Invalid IV mode"; ti->error = "Invalid IV mode";
goto bad_ivmode; goto bad;
} }
if (cc->iv_gen_ops && cc->iv_gen_ops->ctr && /* Allocate IV */
cc->iv_gen_ops->ctr(cc, ti, ivopts) < 0) if (cc->iv_gen_ops && cc->iv_gen_ops->ctr) {
goto bad_ivmode; ret = cc->iv_gen_ops->ctr(cc, ti, ivopts);
if (ret < 0) {
if (cc->iv_gen_ops && cc->iv_gen_ops->init && ti->error = "Error creating IV";
cc->iv_gen_ops->init(cc) < 0) { goto bad;
ti->error = "Error initialising IV"; }
goto bad_slab_pool;
} }
cc->iv_size = crypto_ablkcipher_ivsize(tfm); /* Initialize IV (set keys for ESSIV etc) */
if (cc->iv_size) if (cc->iv_gen_ops && cc->iv_gen_ops->init) {
/* at least a 64 bit sector number should fit in our buffer */ ret = cc->iv_gen_ops->init(cc);
cc->iv_size = max(cc->iv_size, if (ret < 0) {
(unsigned int)(sizeof(u64) / sizeof(u8))); ti->error = "Error initialising IV";
else { goto bad;
if (cc->iv_gen_ops) {
DMWARN("Selected cipher does not support IVs");
if (cc->iv_gen_ops->dtr)
cc->iv_gen_ops->dtr(cc);
cc->iv_gen_ops = NULL;
} }
} }
ret = 0;
bad:
kfree(cipher_api);
return ret;
bad_mem:
ti->error = "Cannot allocate cipher strings";
return -ENOMEM;
}
/*
* Construct an encryption mapping:
* <cipher> <key> <iv_offset> <dev_path> <start>
*/
static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
{
struct crypt_config *cc;
unsigned int key_size;
unsigned long long tmpll;
int ret;
if (argc != 5) {
ti->error = "Not enough arguments";
return -EINVAL;
}
key_size = strlen(argv[1]) >> 1;
cc = kzalloc(sizeof(*cc) + key_size * sizeof(u8), GFP_KERNEL);
if (!cc) {
ti->error = "Cannot allocate encryption context";
return -ENOMEM;
}
ti->private = cc;
ret = crypt_ctr_cipher(ti, argv[0], argv[1]);
if (ret < 0)
goto bad;
ret = -ENOMEM;
cc->io_pool = mempool_create_slab_pool(MIN_IOS, _crypt_io_pool); cc->io_pool = mempool_create_slab_pool(MIN_IOS, _crypt_io_pool);
if (!cc->io_pool) { if (!cc->io_pool) {
ti->error = "Cannot allocate crypt io mempool"; ti->error = "Cannot allocate crypt io mempool";
goto bad_slab_pool; goto bad;
} }
cc->dmreq_start = sizeof(struct ablkcipher_request); cc->dmreq_start = sizeof(struct ablkcipher_request);
cc->dmreq_start += crypto_ablkcipher_reqsize(tfm); cc->dmreq_start += crypto_ablkcipher_reqsize(cc->tfm);
cc->dmreq_start = ALIGN(cc->dmreq_start, crypto_tfm_ctx_alignment()); cc->dmreq_start = ALIGN(cc->dmreq_start, crypto_tfm_ctx_alignment());
cc->dmreq_start += crypto_ablkcipher_alignmask(tfm) & cc->dmreq_start += crypto_ablkcipher_alignmask(cc->tfm) &
~(crypto_tfm_ctx_alignment() - 1); ~(crypto_tfm_ctx_alignment() - 1);
cc->req_pool = mempool_create_kmalloc_pool(MIN_IOS, cc->dmreq_start + cc->req_pool = mempool_create_kmalloc_pool(MIN_IOS, cc->dmreq_start +
sizeof(struct dm_crypt_request) + cc->iv_size); sizeof(struct dm_crypt_request) + cc->iv_size);
if (!cc->req_pool) { if (!cc->req_pool) {
ti->error = "Cannot allocate crypt request mempool"; ti->error = "Cannot allocate crypt request mempool";
goto bad_req_pool; goto bad;
} }
cc->req = NULL; cc->req = NULL;
cc->page_pool = mempool_create_page_pool(MIN_POOL_PAGES, 0); cc->page_pool = mempool_create_page_pool(MIN_POOL_PAGES, 0);
if (!cc->page_pool) { if (!cc->page_pool) {
ti->error = "Cannot allocate page mempool"; ti->error = "Cannot allocate page mempool";
goto bad_page_pool; goto bad;
} }
cc->bs = bioset_create(MIN_IOS, 0); cc->bs = bioset_create(MIN_IOS, 0);
if (!cc->bs) { if (!cc->bs) {
ti->error = "Cannot allocate crypt bioset"; ti->error = "Cannot allocate crypt bioset";
goto bad_bs; goto bad;
} }
ret = -EINVAL;
if (sscanf(argv[2], "%llu", &tmpll) != 1) { if (sscanf(argv[2], "%llu", &tmpll) != 1) {
ti->error = "Invalid iv_offset sector"; ti->error = "Invalid iv_offset sector";
goto bad_device; goto bad;
} }
cc->iv_offset = tmpll; cc->iv_offset = tmpll;
if (sscanf(argv[4], "%llu", &tmpll) != 1) {
ti->error = "Invalid device sector";
goto bad_device;
}
cc->start = tmpll;
if (dm_get_device(ti, argv[3], dm_table_get_mode(ti->table), &cc->dev)) { if (dm_get_device(ti, argv[3], dm_table_get_mode(ti->table), &cc->dev)) {
ti->error = "Device lookup failed"; ti->error = "Device lookup failed";
goto bad_device; goto bad;
} }
if (ivmode && cc->iv_gen_ops) { if (sscanf(argv[4], "%llu", &tmpll) != 1) {
if (ivopts) ti->error = "Invalid device sector";
*(ivopts - 1) = ':'; goto bad;
cc->iv_mode = kmalloc(strlen(ivmode) + 1, GFP_KERNEL); }
if (!cc->iv_mode) { cc->start = tmpll;
ti->error = "Error kmallocing iv_mode string";
goto bad_ivmode_string;
}
strcpy(cc->iv_mode, ivmode);
} else
cc->iv_mode = NULL;
ret = -ENOMEM;
cc->io_queue = create_singlethread_workqueue("kcryptd_io"); cc->io_queue = create_singlethread_workqueue("kcryptd_io");
if (!cc->io_queue) { if (!cc->io_queue) {
ti->error = "Couldn't create kcryptd io queue"; ti->error = "Couldn't create kcryptd io queue";
goto bad_io_queue; goto bad;
} }
cc->crypt_queue = create_singlethread_workqueue("kcryptd"); cc->crypt_queue = create_singlethread_workqueue("kcryptd");
if (!cc->crypt_queue) { if (!cc->crypt_queue) {
ti->error = "Couldn't create kcryptd queue"; ti->error = "Couldn't create kcryptd queue";
goto bad_crypt_queue; goto bad;
} }
ti->num_flush_requests = 1; ti->num_flush_requests = 1;
ti->private = cc;
return 0; return 0;
bad_crypt_queue: bad:
destroy_workqueue(cc->io_queue); crypt_dtr(ti);
bad_io_queue: return ret;
kfree(cc->iv_mode);
bad_ivmode_string:
dm_put_device(ti, cc->dev);
bad_device:
bioset_free(cc->bs);
bad_bs:
mempool_destroy(cc->page_pool);
bad_page_pool:
mempool_destroy(cc->req_pool);
bad_req_pool:
mempool_destroy(cc->io_pool);
bad_slab_pool:
if (cc->iv_gen_ops && cc->iv_gen_ops->dtr)
cc->iv_gen_ops->dtr(cc);
bad_ivmode:
crypto_free_ablkcipher(tfm);
bad_cipher:
/* Must zero key material before freeing */
kzfree(cc);
return -EINVAL;
}
static void crypt_dtr(struct dm_target *ti)
{
struct crypt_config *cc = (struct crypt_config *) ti->private;
destroy_workqueue(cc->io_queue);
destroy_workqueue(cc->crypt_queue);
if (cc->req)
mempool_free(cc->req, cc->req_pool);
bioset_free(cc->bs);
mempool_destroy(cc->page_pool);
mempool_destroy(cc->req_pool);
mempool_destroy(cc->io_pool);
kfree(cc->iv_mode);
if (cc->iv_gen_ops && cc->iv_gen_ops->dtr)
cc->iv_gen_ops->dtr(cc);
crypto_free_ablkcipher(cc->tfm);
dm_put_device(ti, cc->dev);
/* Must zero key material before freeing */
kzfree(cc);
} }
static int crypt_map(struct dm_target *ti, struct bio *bio, static int crypt_map(struct dm_target *ti, struct bio *bio,
...@@ -1255,7 +1284,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio, ...@@ -1255,7 +1284,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio,
return DM_MAPIO_REMAPPED; return DM_MAPIO_REMAPPED;
} }
io = crypt_io_alloc(ti, bio, bio->bi_sector - ti->begin); io = crypt_io_alloc(ti, bio, dm_target_offset(ti, bio->bi_sector));
if (bio_data_dir(io->base_bio) == READ) if (bio_data_dir(io->base_bio) == READ)
kcryptd_queue_io(io); kcryptd_queue_io(io);
...@@ -1268,7 +1297,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio, ...@@ -1268,7 +1297,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio,
static int crypt_status(struct dm_target *ti, status_type_t type, static int crypt_status(struct dm_target *ti, status_type_t type,
char *result, unsigned int maxlen) char *result, unsigned int maxlen)
{ {
struct crypt_config *cc = (struct crypt_config *) ti->private; struct crypt_config *cc = ti->private;
unsigned int sz = 0; unsigned int sz = 0;
switch (type) { switch (type) {
...@@ -1277,11 +1306,10 @@ static int crypt_status(struct dm_target *ti, status_type_t type, ...@@ -1277,11 +1306,10 @@ static int crypt_status(struct dm_target *ti, status_type_t type,
break; break;
case STATUSTYPE_TABLE: case STATUSTYPE_TABLE:
if (cc->iv_mode) if (cc->cipher_mode)
DMEMIT("%s-%s-%s ", cc->cipher, cc->chainmode, DMEMIT("%s-%s ", cc->cipher, cc->cipher_mode);
cc->iv_mode);
else else
DMEMIT("%s-%s ", cc->cipher, cc->chainmode); DMEMIT("%s ", cc->cipher);
if (cc->key_size > 0) { if (cc->key_size > 0) {
if ((maxlen - sz) < ((cc->key_size << 1) + 1)) if ((maxlen - sz) < ((cc->key_size << 1) + 1))
...@@ -1378,7 +1406,7 @@ static int crypt_merge(struct dm_target *ti, struct bvec_merge_data *bvm, ...@@ -1378,7 +1406,7 @@ static int crypt_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
return max_size; return max_size;
bvm->bi_bdev = cc->dev->bdev; bvm->bi_bdev = cc->dev->bdev;
bvm->bi_sector = cc->start + bvm->bi_sector - ti->begin; bvm->bi_sector = cc->start + dm_target_offset(ti, bvm->bi_sector);
return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
} }
......
...@@ -198,6 +198,7 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv) ...@@ -198,6 +198,7 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
atomic_set(&dc->may_delay, 1); atomic_set(&dc->may_delay, 1);
ti->num_flush_requests = 1; ti->num_flush_requests = 1;
ti->num_discard_requests = 1;
ti->private = dc; ti->private = dc;
return 0; return 0;
...@@ -281,14 +282,13 @@ static int delay_map(struct dm_target *ti, struct bio *bio, ...@@ -281,14 +282,13 @@ static int delay_map(struct dm_target *ti, struct bio *bio,
bio->bi_bdev = dc->dev_write->bdev; bio->bi_bdev = dc->dev_write->bdev;
if (bio_sectors(bio)) if (bio_sectors(bio))
bio->bi_sector = dc->start_write + bio->bi_sector = dc->start_write +
(bio->bi_sector - ti->begin); dm_target_offset(ti, bio->bi_sector);
return delay_bio(dc, dc->write_delay, bio); return delay_bio(dc, dc->write_delay, bio);
} }
bio->bi_bdev = dc->dev_read->bdev; bio->bi_bdev = dc->dev_read->bdev;
bio->bi_sector = dc->start_read + bio->bi_sector = dc->start_read + dm_target_offset(ti, bio->bi_sector);
(bio->bi_sector - ti->begin);
return delay_bio(dc, dc->read_delay, bio); return delay_bio(dc, dc->read_delay, bio);
} }
......
...@@ -173,7 +173,9 @@ int dm_exception_store_set_chunk_size(struct dm_exception_store *store, ...@@ -173,7 +173,9 @@ int dm_exception_store_set_chunk_size(struct dm_exception_store *store,
/* Validate the chunk size against the device block size */ /* Validate the chunk size against the device block size */
if (chunk_size % if (chunk_size %
(bdev_logical_block_size(dm_snap_cow(store->snap)->bdev) >> 9)) { (bdev_logical_block_size(dm_snap_cow(store->snap)->bdev) >> 9) ||
chunk_size %
(bdev_logical_block_size(dm_snap_origin(store->snap)->bdev) >> 9)) {
*error = "Chunk size is not a multiple of device blocksize"; *error = "Chunk size is not a multiple of device blocksize";
return -EINVAL; return -EINVAL;
} }
......
...@@ -126,8 +126,9 @@ struct dm_exception_store { ...@@ -126,8 +126,9 @@ struct dm_exception_store {
}; };
/* /*
* Obtain the cow device used by a given snapshot. * Obtain the origin or cow device used by a given snapshot.
*/ */
struct dm_dev *dm_snap_origin(struct dm_snapshot *snap);
struct dm_dev *dm_snap_cow(struct dm_snapshot *snap); struct dm_dev *dm_snap_cow(struct dm_snapshot *snap);
/* /*
......
...@@ -249,55 +249,66 @@ static void __hash_remove(struct hash_cell *hc) ...@@ -249,55 +249,66 @@ static void __hash_remove(struct hash_cell *hc)
static void dm_hash_remove_all(int keep_open_devices) static void dm_hash_remove_all(int keep_open_devices)
{ {
int i, dev_skipped, dev_removed; int i, dev_skipped;
struct hash_cell *hc; struct hash_cell *hc;
struct list_head *tmp, *n; struct mapped_device *md;
retry:
dev_skipped = 0;
down_write(&_hash_lock); down_write(&_hash_lock);
retry:
dev_skipped = dev_removed = 0;
for (i = 0; i < NUM_BUCKETS; i++) { for (i = 0; i < NUM_BUCKETS; i++) {
list_for_each_safe (tmp, n, _name_buckets + i) { list_for_each_entry(hc, _name_buckets + i, name_list) {
hc = list_entry(tmp, struct hash_cell, name_list); md = hc->md;
dm_get(md);
if (keep_open_devices && if (keep_open_devices && dm_lock_for_deletion(md)) {
dm_lock_for_deletion(hc->md)) { dm_put(md);
dev_skipped++; dev_skipped++;
continue; continue;
} }
__hash_remove(hc); __hash_remove(hc);
dev_removed = 1;
}
}
/* up_write(&_hash_lock);
* Some mapped devices may be using other mapped devices, so if any
* still exist, repeat until we make no further progress.
*/
if (dev_skipped) {
if (dev_removed)
goto retry;
DMWARN("remove_all left %d open device(s)", dev_skipped); dm_put(md);
if (likely(keep_open_devices))
dm_destroy(md);
else
dm_destroy_immediate(md);
/*
* Some mapped devices may be using other mapped
* devices, so repeat until we make no further
* progress. If a new mapped device is created
* here it will also get removed.
*/
goto retry;
}
} }
up_write(&_hash_lock); up_write(&_hash_lock);
if (dev_skipped)
DMWARN("remove_all left %d open device(s)", dev_skipped);
} }
static int dm_hash_rename(uint32_t cookie, uint32_t *flags, const char *old, static struct mapped_device *dm_hash_rename(struct dm_ioctl *param,
const char *new) const char *new)
{ {
char *new_name, *old_name; char *new_name, *old_name;
struct hash_cell *hc; struct hash_cell *hc;
struct dm_table *table; struct dm_table *table;
struct mapped_device *md;
/* /*
* duplicate new. * duplicate new.
*/ */
new_name = kstrdup(new, GFP_KERNEL); new_name = kstrdup(new, GFP_KERNEL);
if (!new_name) if (!new_name)
return -ENOMEM; return ERR_PTR(-ENOMEM);
down_write(&_hash_lock); down_write(&_hash_lock);
...@@ -306,24 +317,24 @@ static int dm_hash_rename(uint32_t cookie, uint32_t *flags, const char *old, ...@@ -306,24 +317,24 @@ static int dm_hash_rename(uint32_t cookie, uint32_t *flags, const char *old,
*/ */
hc = __get_name_cell(new); hc = __get_name_cell(new);
if (hc) { if (hc) {
DMWARN("asked to rename to an already existing name %s -> %s", DMWARN("asked to rename to an already-existing name %s -> %s",
old, new); param->name, new);
dm_put(hc->md); dm_put(hc->md);
up_write(&_hash_lock); up_write(&_hash_lock);
kfree(new_name); kfree(new_name);
return -EBUSY; return ERR_PTR(-EBUSY);
} }
/* /*
* Is there such a device as 'old' ? * Is there such a device as 'old' ?
*/ */
hc = __get_name_cell(old); hc = __get_name_cell(param->name);
if (!hc) { if (!hc) {
DMWARN("asked to rename a non existent device %s -> %s", DMWARN("asked to rename a non-existent device %s -> %s",
old, new); param->name, new);
up_write(&_hash_lock); up_write(&_hash_lock);
kfree(new_name); kfree(new_name);
return -ENXIO; return ERR_PTR(-ENXIO);
} }
/* /*
...@@ -345,13 +356,14 @@ static int dm_hash_rename(uint32_t cookie, uint32_t *flags, const char *old, ...@@ -345,13 +356,14 @@ static int dm_hash_rename(uint32_t cookie, uint32_t *flags, const char *old,
dm_table_put(table); dm_table_put(table);
} }
if (!dm_kobject_uevent(hc->md, KOBJ_CHANGE, cookie)) if (!dm_kobject_uevent(hc->md, KOBJ_CHANGE, param->event_nr))
*flags |= DM_UEVENT_GENERATED_FLAG; param->flags |= DM_UEVENT_GENERATED_FLAG;
dm_put(hc->md); md = hc->md;
up_write(&_hash_lock); up_write(&_hash_lock);
kfree(old_name); kfree(old_name);
return 0;
return md;
} }
/*----------------------------------------------------------------- /*-----------------------------------------------------------------
...@@ -573,7 +585,7 @@ static struct dm_table *dm_get_live_or_inactive_table(struct mapped_device *md, ...@@ -573,7 +585,7 @@ static struct dm_table *dm_get_live_or_inactive_table(struct mapped_device *md,
* Fills in a dm_ioctl structure, ready for sending back to * Fills in a dm_ioctl structure, ready for sending back to
* userland. * userland.
*/ */
static int __dev_status(struct mapped_device *md, struct dm_ioctl *param) static void __dev_status(struct mapped_device *md, struct dm_ioctl *param)
{ {
struct gendisk *disk = dm_disk(md); struct gendisk *disk = dm_disk(md);
struct dm_table *table; struct dm_table *table;
...@@ -617,8 +629,6 @@ static int __dev_status(struct mapped_device *md, struct dm_ioctl *param) ...@@ -617,8 +629,6 @@ static int __dev_status(struct mapped_device *md, struct dm_ioctl *param)
dm_table_put(table); dm_table_put(table);
} }
} }
return 0;
} }
static int dev_create(struct dm_ioctl *param, size_t param_size) static int dev_create(struct dm_ioctl *param, size_t param_size)
...@@ -640,15 +650,17 @@ static int dev_create(struct dm_ioctl *param, size_t param_size) ...@@ -640,15 +650,17 @@ static int dev_create(struct dm_ioctl *param, size_t param_size)
r = dm_hash_insert(param->name, *param->uuid ? param->uuid : NULL, md); r = dm_hash_insert(param->name, *param->uuid ? param->uuid : NULL, md);
if (r) { if (r) {
dm_put(md); dm_put(md);
dm_destroy(md);
return r; return r;
} }
param->flags &= ~DM_INACTIVE_PRESENT_FLAG; param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
r = __dev_status(md, param); __dev_status(md, param);
dm_put(md); dm_put(md);
return r; return 0;
} }
/* /*
...@@ -742,6 +754,7 @@ static int dev_remove(struct dm_ioctl *param, size_t param_size) ...@@ -742,6 +754,7 @@ static int dev_remove(struct dm_ioctl *param, size_t param_size)
param->flags |= DM_UEVENT_GENERATED_FLAG; param->flags |= DM_UEVENT_GENERATED_FLAG;
dm_put(md); dm_put(md);
dm_destroy(md);
return 0; return 0;
} }
...@@ -762,6 +775,7 @@ static int dev_rename(struct dm_ioctl *param, size_t param_size) ...@@ -762,6 +775,7 @@ static int dev_rename(struct dm_ioctl *param, size_t param_size)
{ {
int r; int r;
char *new_name = (char *) param + param->data_start; char *new_name = (char *) param + param->data_start;
struct mapped_device *md;
if (new_name < param->data || if (new_name < param->data ||
invalid_str(new_name, (void *) param + param_size) || invalid_str(new_name, (void *) param + param_size) ||
...@@ -774,10 +788,14 @@ static int dev_rename(struct dm_ioctl *param, size_t param_size) ...@@ -774,10 +788,14 @@ static int dev_rename(struct dm_ioctl *param, size_t param_size)
if (r) if (r)
return r; return r;
param->data_size = 0; md = dm_hash_rename(param, new_name);
if (IS_ERR(md))
return PTR_ERR(md);
__dev_status(md, param);
dm_put(md);
return dm_hash_rename(param->event_nr, &param->flags, param->name, return 0;
new_name);
} }
static int dev_set_geometry(struct dm_ioctl *param, size_t param_size) static int dev_set_geometry(struct dm_ioctl *param, size_t param_size)
...@@ -818,8 +836,6 @@ static int dev_set_geometry(struct dm_ioctl *param, size_t param_size) ...@@ -818,8 +836,6 @@ static int dev_set_geometry(struct dm_ioctl *param, size_t param_size)
geometry.start = indata[3]; geometry.start = indata[3];
r = dm_set_geometry(md, &geometry); r = dm_set_geometry(md, &geometry);
if (!r)
r = __dev_status(md, param);
param->data_size = 0; param->data_size = 0;
...@@ -843,13 +859,17 @@ static int do_suspend(struct dm_ioctl *param) ...@@ -843,13 +859,17 @@ static int do_suspend(struct dm_ioctl *param)
if (param->flags & DM_NOFLUSH_FLAG) if (param->flags & DM_NOFLUSH_FLAG)
suspend_flags |= DM_SUSPEND_NOFLUSH_FLAG; suspend_flags |= DM_SUSPEND_NOFLUSH_FLAG;
if (!dm_suspended_md(md)) if (!dm_suspended_md(md)) {
r = dm_suspend(md, suspend_flags); r = dm_suspend(md, suspend_flags);
if (r)
goto out;
}
if (!r) __dev_status(md, param);
r = __dev_status(md, param);
out:
dm_put(md); dm_put(md);
return r; return r;
} }
...@@ -911,7 +931,7 @@ static int do_resume(struct dm_ioctl *param) ...@@ -911,7 +931,7 @@ static int do_resume(struct dm_ioctl *param)
dm_table_destroy(old_map); dm_table_destroy(old_map);
if (!r) if (!r)
r = __dev_status(md, param); __dev_status(md, param);
dm_put(md); dm_put(md);
return r; return r;
...@@ -935,16 +955,16 @@ static int dev_suspend(struct dm_ioctl *param, size_t param_size) ...@@ -935,16 +955,16 @@ static int dev_suspend(struct dm_ioctl *param, size_t param_size)
*/ */
static int dev_status(struct dm_ioctl *param, size_t param_size) static int dev_status(struct dm_ioctl *param, size_t param_size)
{ {
int r;
struct mapped_device *md; struct mapped_device *md;
md = find_device(param); md = find_device(param);
if (!md) if (!md)
return -ENXIO; return -ENXIO;
r = __dev_status(md, param); __dev_status(md, param);
dm_put(md); dm_put(md);
return r;
return 0;
} }
/* /*
...@@ -1019,7 +1039,7 @@ static void retrieve_status(struct dm_table *table, ...@@ -1019,7 +1039,7 @@ static void retrieve_status(struct dm_table *table,
*/ */
static int dev_wait(struct dm_ioctl *param, size_t param_size) static int dev_wait(struct dm_ioctl *param, size_t param_size)
{ {
int r; int r = 0;
struct mapped_device *md; struct mapped_device *md;
struct dm_table *table; struct dm_table *table;
...@@ -1040,9 +1060,7 @@ static int dev_wait(struct dm_ioctl *param, size_t param_size) ...@@ -1040,9 +1060,7 @@ static int dev_wait(struct dm_ioctl *param, size_t param_size)
* changed to trigger the event, so we may as well tell * changed to trigger the event, so we may as well tell
* him and save an ioctl. * him and save an ioctl.
*/ */
r = __dev_status(md, param); __dev_status(md, param);
if (r)
goto out;
table = dm_get_live_or_inactive_table(md, param); table = dm_get_live_or_inactive_table(md, param);
if (table) { if (table) {
...@@ -1050,8 +1068,9 @@ static int dev_wait(struct dm_ioctl *param, size_t param_size) ...@@ -1050,8 +1068,9 @@ static int dev_wait(struct dm_ioctl *param, size_t param_size)
dm_table_put(table); dm_table_put(table);
} }
out: out:
dm_put(md); dm_put(md);
return r; return r;
} }
...@@ -1112,28 +1131,9 @@ static int populate_table(struct dm_table *table, ...@@ -1112,28 +1131,9 @@ static int populate_table(struct dm_table *table,
next = spec->next; next = spec->next;
} }
r = dm_table_set_type(table);
if (r) {
DMWARN("unable to set table type");
return r;
}
return dm_table_complete(table); return dm_table_complete(table);
} }
static int table_prealloc_integrity(struct dm_table *t,
struct mapped_device *md)
{
struct list_head *devices = dm_table_get_devices(t);
struct dm_dev_internal *dd;
list_for_each_entry(dd, devices, list)
if (bdev_get_integrity(dd->dm_dev.bdev))
return blk_integrity_register(dm_disk(md), NULL);
return 0;
}
static int table_load(struct dm_ioctl *param, size_t param_size) static int table_load(struct dm_ioctl *param, size_t param_size)
{ {
int r; int r;
...@@ -1155,21 +1155,30 @@ static int table_load(struct dm_ioctl *param, size_t param_size) ...@@ -1155,21 +1155,30 @@ static int table_load(struct dm_ioctl *param, size_t param_size)
goto out; goto out;
} }
r = table_prealloc_integrity(t, md); /* Protect md->type and md->queue against concurrent table loads. */
if (r) { dm_lock_md_type(md);
DMERR("%s: could not register integrity profile.", if (dm_get_md_type(md) == DM_TYPE_NONE)
dm_device_name(md)); /* Initial table load: acquire type of table. */
dm_set_md_type(md, dm_table_get_type(t));
else if (dm_get_md_type(md) != dm_table_get_type(t)) {
DMWARN("can't change device type after initial table load.");
dm_table_destroy(t); dm_table_destroy(t);
dm_unlock_md_type(md);
r = -EINVAL;
goto out; goto out;
} }
r = dm_table_alloc_md_mempools(t); /* setup md->queue to reflect md's type (may block) */
r = dm_setup_md_queue(md);
if (r) { if (r) {
DMWARN("unable to allocate mempools for this table"); DMWARN("unable to set up device queue for new table.");
dm_table_destroy(t); dm_table_destroy(t);
dm_unlock_md_type(md);
goto out; goto out;
} }
dm_unlock_md_type(md);
/* stage inactive table */
down_write(&_hash_lock); down_write(&_hash_lock);
hc = dm_get_mdptr(md); hc = dm_get_mdptr(md);
if (!hc || hc->md != md) { if (!hc || hc->md != md) {
...@@ -1186,7 +1195,7 @@ static int table_load(struct dm_ioctl *param, size_t param_size) ...@@ -1186,7 +1195,7 @@ static int table_load(struct dm_ioctl *param, size_t param_size)
up_write(&_hash_lock); up_write(&_hash_lock);
param->flags |= DM_INACTIVE_PRESENT_FLAG; param->flags |= DM_INACTIVE_PRESENT_FLAG;
r = __dev_status(md, param); __dev_status(md, param);
out: out:
dm_put(md); dm_put(md);
...@@ -1196,7 +1205,6 @@ static int table_load(struct dm_ioctl *param, size_t param_size) ...@@ -1196,7 +1205,6 @@ static int table_load(struct dm_ioctl *param, size_t param_size)
static int table_clear(struct dm_ioctl *param, size_t param_size) static int table_clear(struct dm_ioctl *param, size_t param_size)
{ {
int r;
struct hash_cell *hc; struct hash_cell *hc;
struct mapped_device *md; struct mapped_device *md;
...@@ -1216,11 +1224,12 @@ static int table_clear(struct dm_ioctl *param, size_t param_size) ...@@ -1216,11 +1224,12 @@ static int table_clear(struct dm_ioctl *param, size_t param_size)
param->flags &= ~DM_INACTIVE_PRESENT_FLAG; param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
r = __dev_status(hc->md, param); __dev_status(hc->md, param);
md = hc->md; md = hc->md;
up_write(&_hash_lock); up_write(&_hash_lock);
dm_put(md); dm_put(md);
return r;
return 0;
} }
/* /*
...@@ -1265,7 +1274,6 @@ static void retrieve_deps(struct dm_table *table, ...@@ -1265,7 +1274,6 @@ static void retrieve_deps(struct dm_table *table,
static int table_deps(struct dm_ioctl *param, size_t param_size) static int table_deps(struct dm_ioctl *param, size_t param_size)
{ {
int r = 0;
struct mapped_device *md; struct mapped_device *md;
struct dm_table *table; struct dm_table *table;
...@@ -1273,9 +1281,7 @@ static int table_deps(struct dm_ioctl *param, size_t param_size) ...@@ -1273,9 +1281,7 @@ static int table_deps(struct dm_ioctl *param, size_t param_size)
if (!md) if (!md)
return -ENXIO; return -ENXIO;
r = __dev_status(md, param); __dev_status(md, param);
if (r)
goto out;
table = dm_get_live_or_inactive_table(md, param); table = dm_get_live_or_inactive_table(md, param);
if (table) { if (table) {
...@@ -1283,9 +1289,9 @@ static int table_deps(struct dm_ioctl *param, size_t param_size) ...@@ -1283,9 +1289,9 @@ static int table_deps(struct dm_ioctl *param, size_t param_size)
dm_table_put(table); dm_table_put(table);
} }
out:
dm_put(md); dm_put(md);
return r;
return 0;
} }
/* /*
...@@ -1294,7 +1300,6 @@ static int table_deps(struct dm_ioctl *param, size_t param_size) ...@@ -1294,7 +1300,6 @@ static int table_deps(struct dm_ioctl *param, size_t param_size)
*/ */
static int table_status(struct dm_ioctl *param, size_t param_size) static int table_status(struct dm_ioctl *param, size_t param_size)
{ {
int r;
struct mapped_device *md; struct mapped_device *md;
struct dm_table *table; struct dm_table *table;
...@@ -1302,9 +1307,7 @@ static int table_status(struct dm_ioctl *param, size_t param_size) ...@@ -1302,9 +1307,7 @@ static int table_status(struct dm_ioctl *param, size_t param_size)
if (!md) if (!md)
return -ENXIO; return -ENXIO;
r = __dev_status(md, param); __dev_status(md, param);
if (r)
goto out;
table = dm_get_live_or_inactive_table(md, param); table = dm_get_live_or_inactive_table(md, param);
if (table) { if (table) {
...@@ -1312,9 +1315,9 @@ static int table_status(struct dm_ioctl *param, size_t param_size) ...@@ -1312,9 +1315,9 @@ static int table_status(struct dm_ioctl *param, size_t param_size)
dm_table_put(table); dm_table_put(table);
} }
out:
dm_put(md); dm_put(md);
return r;
return 0;
} }
/* /*
...@@ -1333,10 +1336,6 @@ static int target_message(struct dm_ioctl *param, size_t param_size) ...@@ -1333,10 +1336,6 @@ static int target_message(struct dm_ioctl *param, size_t param_size)
if (!md) if (!md)
return -ENXIO; return -ENXIO;
r = __dev_status(md, param);
if (r)
goto out;
if (tmsg < (struct dm_target_msg *) param->data || if (tmsg < (struct dm_target_msg *) param->data ||
invalid_str(tmsg->message, (void *) param + param_size)) { invalid_str(tmsg->message, (void *) param + param_size)) {
DMWARN("Invalid target message parameters."); DMWARN("Invalid target message parameters.");
...@@ -1593,18 +1592,22 @@ static long dm_compat_ctl_ioctl(struct file *file, uint command, ulong u) ...@@ -1593,18 +1592,22 @@ static long dm_compat_ctl_ioctl(struct file *file, uint command, ulong u)
#endif #endif
static const struct file_operations _ctl_fops = { static const struct file_operations _ctl_fops = {
.open = nonseekable_open,
.unlocked_ioctl = dm_ctl_ioctl, .unlocked_ioctl = dm_ctl_ioctl,
.compat_ioctl = dm_compat_ctl_ioctl, .compat_ioctl = dm_compat_ctl_ioctl,
.owner = THIS_MODULE, .owner = THIS_MODULE,
}; };
static struct miscdevice _dm_misc = { static struct miscdevice _dm_misc = {
.minor = MISC_DYNAMIC_MINOR, .minor = MAPPER_CTRL_MINOR,
.name = DM_NAME, .name = DM_NAME,
.nodename = "mapper/control", .nodename = DM_DIR "/" DM_CONTROL_NODE,
.fops = &_ctl_fops .fops = &_ctl_fops
}; };
MODULE_ALIAS_MISCDEV(MAPPER_CTRL_MINOR);
MODULE_ALIAS("devname:" DM_DIR "/" DM_CONTROL_NODE);
/* /*
* Create misc character device and link to DM_DIR/control. * Create misc character device and link to DM_DIR/control.
*/ */
......
...@@ -53,6 +53,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv) ...@@ -53,6 +53,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv)
} }
ti->num_flush_requests = 1; ti->num_flush_requests = 1;
ti->num_discard_requests = 1;
ti->private = lc; ti->private = lc;
return 0; return 0;
...@@ -73,7 +74,7 @@ static sector_t linear_map_sector(struct dm_target *ti, sector_t bi_sector) ...@@ -73,7 +74,7 @@ static sector_t linear_map_sector(struct dm_target *ti, sector_t bi_sector)
{ {
struct linear_c *lc = ti->private; struct linear_c *lc = ti->private;
return lc->start + (bi_sector - ti->begin); return lc->start + dm_target_offset(ti, bi_sector);
} }
static void linear_map_bio(struct dm_target *ti, struct bio *bio) static void linear_map_bio(struct dm_target *ti, struct bio *bio)
......
...@@ -706,6 +706,7 @@ static struct priority_group *parse_priority_group(struct arg_set *as, ...@@ -706,6 +706,7 @@ static struct priority_group *parse_priority_group(struct arg_set *as,
if (as->argc < nr_params) { if (as->argc < nr_params) {
ti->error = "not enough path parameters"; ti->error = "not enough path parameters";
r = -EINVAL;
goto bad; goto bad;
} }
...@@ -892,6 +893,7 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc, ...@@ -892,6 +893,7 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc,
} }
ti->num_flush_requests = 1; ti->num_flush_requests = 1;
ti->num_discard_requests = 1;
return 0; return 0;
...@@ -1271,6 +1273,15 @@ static int do_end_io(struct multipath *m, struct request *clone, ...@@ -1271,6 +1273,15 @@ static int do_end_io(struct multipath *m, struct request *clone,
if (error == -EOPNOTSUPP) if (error == -EOPNOTSUPP)
return error; return error;
if (clone->cmd_flags & REQ_DISCARD)
/*
* Pass all discard request failures up.
* FIXME: only fail_path if the discard failed due to a
* transport problem. This requires precise understanding
* of the underlying failure (e.g. the SCSI sense).
*/
return error;
if (mpio->pgpath) if (mpio->pgpath)
fail_path(mpio->pgpath); fail_path(mpio->pgpath);
......
...@@ -445,7 +445,7 @@ static sector_t map_sector(struct mirror *m, struct bio *bio) ...@@ -445,7 +445,7 @@ static sector_t map_sector(struct mirror *m, struct bio *bio)
{ {
if (unlikely(!bio->bi_size)) if (unlikely(!bio->bi_size))
return 0; return 0;
return m->offset + (bio->bi_sector - m->ms->ti->begin); return m->offset + dm_target_offset(m->ms->ti, bio->bi_sector);
} }
static void map_bio(struct mirror *m, struct bio *bio) static void map_bio(struct mirror *m, struct bio *bio)
......
...@@ -266,7 +266,7 @@ static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int rw, ...@@ -266,7 +266,7 @@ static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int rw,
*/ */
static chunk_t area_location(struct pstore *ps, chunk_t area) static chunk_t area_location(struct pstore *ps, chunk_t area)
{ {
return 1 + ((ps->exceptions_per_area + 1) * area); return NUM_SNAPSHOT_HDR_CHUNKS + ((ps->exceptions_per_area + 1) * area);
} }
/* /*
...@@ -780,8 +780,8 @@ static int persistent_commit_merge(struct dm_exception_store *store, ...@@ -780,8 +780,8 @@ static int persistent_commit_merge(struct dm_exception_store *store,
* ps->current_area does not get reduced by prepare_merge() until * ps->current_area does not get reduced by prepare_merge() until
* after commit_merge() has removed the nr_merged previous exceptions. * after commit_merge() has removed the nr_merged previous exceptions.
*/ */
ps->next_free = (area_location(ps, ps->current_area) - 1) + ps->next_free = area_location(ps, ps->current_area) +
(ps->current_committed + 1) + NUM_SNAPSHOT_HDR_CHUNKS; ps->current_committed + 1;
return 0; return 0;
} }
......
...@@ -148,6 +148,12 @@ struct dm_snapshot { ...@@ -148,6 +148,12 @@ struct dm_snapshot {
#define RUNNING_MERGE 0 #define RUNNING_MERGE 0
#define SHUTDOWN_MERGE 1 #define SHUTDOWN_MERGE 1
struct dm_dev *dm_snap_origin(struct dm_snapshot *s)
{
return s->origin;
}
EXPORT_SYMBOL(dm_snap_origin);
struct dm_dev *dm_snap_cow(struct dm_snapshot *s) struct dm_dev *dm_snap_cow(struct dm_snapshot *s)
{ {
return s->cow; return s->cow;
...@@ -1065,10 +1071,6 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) ...@@ -1065,10 +1071,6 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
origin_mode = FMODE_WRITE; origin_mode = FMODE_WRITE;
} }
origin_path = argv[0];
argv++;
argc--;
s = kmalloc(sizeof(*s), GFP_KERNEL); s = kmalloc(sizeof(*s), GFP_KERNEL);
if (!s) { if (!s) {
ti->error = "Cannot allocate snapshot context private " ti->error = "Cannot allocate snapshot context private "
...@@ -1077,6 +1079,16 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) ...@@ -1077,6 +1079,16 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad; goto bad;
} }
origin_path = argv[0];
argv++;
argc--;
r = dm_get_device(ti, origin_path, origin_mode, &s->origin);
if (r) {
ti->error = "Cannot get origin device";
goto bad_origin;
}
cow_path = argv[0]; cow_path = argv[0];
argv++; argv++;
argc--; argc--;
...@@ -1097,12 +1109,6 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) ...@@ -1097,12 +1109,6 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
argv += args_used; argv += args_used;
argc -= args_used; argc -= args_used;
r = dm_get_device(ti, origin_path, origin_mode, &s->origin);
if (r) {
ti->error = "Cannot get origin device";
goto bad_origin;
}
s->ti = ti; s->ti = ti;
s->valid = 1; s->valid = 1;
s->active = 0; s->active = 0;
...@@ -1212,15 +1218,15 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) ...@@ -1212,15 +1218,15 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
dm_exception_table_exit(&s->complete, exception_cache); dm_exception_table_exit(&s->complete, exception_cache);
bad_hash_tables: bad_hash_tables:
dm_put_device(ti, s->origin);
bad_origin:
dm_exception_store_destroy(s->store); dm_exception_store_destroy(s->store);
bad_store: bad_store:
dm_put_device(ti, s->cow); dm_put_device(ti, s->cow);
bad_cow: bad_cow:
dm_put_device(ti, s->origin);
bad_origin:
kfree(s); kfree(s);
bad: bad:
...@@ -1314,12 +1320,12 @@ static void snapshot_dtr(struct dm_target *ti) ...@@ -1314,12 +1320,12 @@ static void snapshot_dtr(struct dm_target *ti)
mempool_destroy(s->pending_pool); mempool_destroy(s->pending_pool);
dm_put_device(ti, s->origin);
dm_exception_store_destroy(s->store); dm_exception_store_destroy(s->store);
dm_put_device(ti, s->cow); dm_put_device(ti, s->cow);
dm_put_device(ti, s->origin);
kfree(s); kfree(s);
} }
...@@ -1686,7 +1692,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio, ...@@ -1686,7 +1692,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio,
chunk_t chunk; chunk_t chunk;
if (unlikely(bio_empty_barrier(bio))) { if (unlikely(bio_empty_barrier(bio))) {
if (!map_context->flush_request) if (!map_context->target_request_nr)
bio->bi_bdev = s->origin->bdev; bio->bi_bdev = s->origin->bdev;
else else
bio->bi_bdev = s->cow->bdev; bio->bi_bdev = s->cow->bdev;
...@@ -1899,8 +1905,14 @@ static int snapshot_iterate_devices(struct dm_target *ti, ...@@ -1899,8 +1905,14 @@ static int snapshot_iterate_devices(struct dm_target *ti,
iterate_devices_callout_fn fn, void *data) iterate_devices_callout_fn fn, void *data)
{ {
struct dm_snapshot *snap = ti->private; struct dm_snapshot *snap = ti->private;
int r;
r = fn(ti, snap->origin, 0, ti->len, data);
if (!r)
r = fn(ti, snap->cow, 0, get_dev_size(snap->cow->bdev), data);
return fn(ti, snap->origin, 0, ti->len, data); return r;
} }
...@@ -2159,6 +2171,21 @@ static int origin_status(struct dm_target *ti, status_type_t type, char *result, ...@@ -2159,6 +2171,21 @@ static int origin_status(struct dm_target *ti, status_type_t type, char *result,
return 0; return 0;
} }
static int origin_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
struct bio_vec *biovec, int max_size)
{
struct dm_dev *dev = ti->private;
struct request_queue *q = bdev_get_queue(dev->bdev);
if (!q->merge_bvec_fn)
return max_size;
bvm->bi_bdev = dev->bdev;
bvm->bi_sector = bvm->bi_sector;
return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
}
static int origin_iterate_devices(struct dm_target *ti, static int origin_iterate_devices(struct dm_target *ti,
iterate_devices_callout_fn fn, void *data) iterate_devices_callout_fn fn, void *data)
{ {
...@@ -2176,6 +2203,7 @@ static struct target_type origin_target = { ...@@ -2176,6 +2203,7 @@ static struct target_type origin_target = {
.map = origin_map, .map = origin_map,
.resume = origin_resume, .resume = origin_resume,
.status = origin_status, .status = origin_status,
.merge = origin_merge,
.iterate_devices = origin_iterate_devices, .iterate_devices = origin_iterate_devices,
}; };
......
...@@ -25,6 +25,8 @@ struct stripe { ...@@ -25,6 +25,8 @@ struct stripe {
struct stripe_c { struct stripe_c {
uint32_t stripes; uint32_t stripes;
int stripes_shift;
sector_t stripes_mask;
/* The size of this target / num. stripes */ /* The size of this target / num. stripes */
sector_t stripe_width; sector_t stripe_width;
...@@ -162,16 +164,22 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) ...@@ -162,16 +164,22 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
/* Set pointer to dm target; used in trigger_event */ /* Set pointer to dm target; used in trigger_event */
sc->ti = ti; sc->ti = ti;
sc->stripes = stripes; sc->stripes = stripes;
sc->stripe_width = width; sc->stripe_width = width;
if (stripes & (stripes - 1))
sc->stripes_shift = -1;
else {
sc->stripes_shift = ffs(stripes) - 1;
sc->stripes_mask = ((sector_t) stripes) - 1;
}
ti->split_io = chunk_size; ti->split_io = chunk_size;
ti->num_flush_requests = stripes; ti->num_flush_requests = stripes;
ti->num_discard_requests = stripes;
sc->chunk_shift = ffs(chunk_size) - 1;
sc->chunk_mask = ((sector_t) chunk_size) - 1; sc->chunk_mask = ((sector_t) chunk_size) - 1;
for (sc->chunk_shift = 0; chunk_size; sc->chunk_shift++)
chunk_size >>= 1;
sc->chunk_shift--;
/* /*
* Get the stripe destinations. * Get the stripe destinations.
...@@ -207,26 +215,79 @@ static void stripe_dtr(struct dm_target *ti) ...@@ -207,26 +215,79 @@ static void stripe_dtr(struct dm_target *ti)
kfree(sc); kfree(sc);
} }
static void stripe_map_sector(struct stripe_c *sc, sector_t sector,
uint32_t *stripe, sector_t *result)
{
sector_t offset = dm_target_offset(sc->ti, sector);
sector_t chunk = offset >> sc->chunk_shift;
if (sc->stripes_shift < 0)
*stripe = sector_div(chunk, sc->stripes);
else {
*stripe = chunk & sc->stripes_mask;
chunk >>= sc->stripes_shift;
}
*result = (chunk << sc->chunk_shift) | (offset & sc->chunk_mask);
}
static void stripe_map_range_sector(struct stripe_c *sc, sector_t sector,
uint32_t target_stripe, sector_t *result)
{
uint32_t stripe;
stripe_map_sector(sc, sector, &stripe, result);
if (stripe == target_stripe)
return;
*result &= ~sc->chunk_mask; /* round down */
if (target_stripe < stripe)
*result += sc->chunk_mask + 1; /* next chunk */
}
static int stripe_map_discard(struct stripe_c *sc, struct bio *bio,
uint32_t target_stripe)
{
sector_t begin, end;
stripe_map_range_sector(sc, bio->bi_sector, target_stripe, &begin);
stripe_map_range_sector(sc, bio->bi_sector + bio_sectors(bio),
target_stripe, &end);
if (begin < end) {
bio->bi_bdev = sc->stripe[target_stripe].dev->bdev;
bio->bi_sector = begin + sc->stripe[target_stripe].physical_start;
bio->bi_size = to_bytes(end - begin);
return DM_MAPIO_REMAPPED;
} else {
/* The range doesn't map to the target stripe */
bio_endio(bio, 0);
return DM_MAPIO_SUBMITTED;
}
}
static int stripe_map(struct dm_target *ti, struct bio *bio, static int stripe_map(struct dm_target *ti, struct bio *bio,
union map_info *map_context) union map_info *map_context)
{ {
struct stripe_c *sc = (struct stripe_c *) ti->private; struct stripe_c *sc = ti->private;
sector_t offset, chunk;
uint32_t stripe; uint32_t stripe;
unsigned target_request_nr;
if (unlikely(bio_empty_barrier(bio))) { if (unlikely(bio_empty_barrier(bio))) {
BUG_ON(map_context->flush_request >= sc->stripes); target_request_nr = map_context->target_request_nr;
bio->bi_bdev = sc->stripe[map_context->flush_request].dev->bdev; BUG_ON(target_request_nr >= sc->stripes);
bio->bi_bdev = sc->stripe[target_request_nr].dev->bdev;
return DM_MAPIO_REMAPPED; return DM_MAPIO_REMAPPED;
} }
if (unlikely(bio->bi_rw & REQ_DISCARD)) {
target_request_nr = map_context->target_request_nr;
BUG_ON(target_request_nr >= sc->stripes);
return stripe_map_discard(sc, bio, target_request_nr);
}
offset = bio->bi_sector - ti->begin; stripe_map_sector(sc, bio->bi_sector, &stripe, &bio->bi_sector);
chunk = offset >> sc->chunk_shift;
stripe = sector_div(chunk, sc->stripes);
bio->bi_sector += sc->stripe[stripe].physical_start;
bio->bi_bdev = sc->stripe[stripe].dev->bdev; bio->bi_bdev = sc->stripe[stripe].dev->bdev;
bio->bi_sector = sc->stripe[stripe].physical_start +
(chunk << sc->chunk_shift) + (offset & sc->chunk_mask);
return DM_MAPIO_REMAPPED; return DM_MAPIO_REMAPPED;
} }
......
...@@ -54,6 +54,8 @@ struct dm_table { ...@@ -54,6 +54,8 @@ struct dm_table {
sector_t *highs; sector_t *highs;
struct dm_target *targets; struct dm_target *targets;
unsigned discards_supported:1;
/* /*
* Indicates the rw permissions for the new logical * Indicates the rw permissions for the new logical
* device. This should be a combination of FMODE_READ * device. This should be a combination of FMODE_READ
...@@ -203,6 +205,7 @@ int dm_table_create(struct dm_table **result, fmode_t mode, ...@@ -203,6 +205,7 @@ int dm_table_create(struct dm_table **result, fmode_t mode,
INIT_LIST_HEAD(&t->devices); INIT_LIST_HEAD(&t->devices);
atomic_set(&t->holders, 0); atomic_set(&t->holders, 0);
t->discards_supported = 1;
if (!num_targets) if (!num_targets)
num_targets = KEYS_PER_NODE; num_targets = KEYS_PER_NODE;
...@@ -245,7 +248,7 @@ void dm_table_destroy(struct dm_table *t) ...@@ -245,7 +248,7 @@ void dm_table_destroy(struct dm_table *t)
msleep(1); msleep(1);
smp_mb(); smp_mb();
/* free the indexes (see dm_table_complete) */ /* free the indexes */
if (t->depth >= 2) if (t->depth >= 2)
vfree(t->index[t->depth - 2]); vfree(t->index[t->depth - 2]);
...@@ -770,6 +773,9 @@ int dm_table_add_target(struct dm_table *t, const char *type, ...@@ -770,6 +773,9 @@ int dm_table_add_target(struct dm_table *t, const char *type,
t->highs[t->num_targets++] = tgt->begin + tgt->len - 1; t->highs[t->num_targets++] = tgt->begin + tgt->len - 1;
if (!tgt->num_discard_requests)
t->discards_supported = 0;
return 0; return 0;
bad: bad:
...@@ -778,7 +784,7 @@ int dm_table_add_target(struct dm_table *t, const char *type, ...@@ -778,7 +784,7 @@ int dm_table_add_target(struct dm_table *t, const char *type,
return r; return r;
} }
int dm_table_set_type(struct dm_table *t) static int dm_table_set_type(struct dm_table *t)
{ {
unsigned i; unsigned i;
unsigned bio_based = 0, request_based = 0; unsigned bio_based = 0, request_based = 0;
...@@ -900,7 +906,7 @@ static int setup_indexes(struct dm_table *t) ...@@ -900,7 +906,7 @@ static int setup_indexes(struct dm_table *t)
/* /*
* Builds the btree to index the map. * Builds the btree to index the map.
*/ */
int dm_table_complete(struct dm_table *t) static int dm_table_build_index(struct dm_table *t)
{ {
int r = 0; int r = 0;
unsigned int leaf_nodes; unsigned int leaf_nodes;
...@@ -919,6 +925,55 @@ int dm_table_complete(struct dm_table *t) ...@@ -919,6 +925,55 @@ int dm_table_complete(struct dm_table *t)
return r; return r;
} }
/*
* Register the mapped device for blk_integrity support if
* the underlying devices support it.
*/
static int dm_table_prealloc_integrity(struct dm_table *t, struct mapped_device *md)
{
struct list_head *devices = dm_table_get_devices(t);
struct dm_dev_internal *dd;
list_for_each_entry(dd, devices, list)
if (bdev_get_integrity(dd->dm_dev.bdev))
return blk_integrity_register(dm_disk(md), NULL);
return 0;
}
/*
* Prepares the table for use by building the indices,
* setting the type, and allocating mempools.
*/
int dm_table_complete(struct dm_table *t)
{
int r;
r = dm_table_set_type(t);
if (r) {
DMERR("unable to set table type");
return r;
}
r = dm_table_build_index(t);
if (r) {
DMERR("unable to build btrees");
return r;
}
r = dm_table_prealloc_integrity(t, t->md);
if (r) {
DMERR("could not register integrity profile.");
return r;
}
r = dm_table_alloc_md_mempools(t);
if (r)
DMERR("unable to allocate mempools");
return r;
}
static DEFINE_MUTEX(_event_lock); static DEFINE_MUTEX(_event_lock);
void dm_table_event_callback(struct dm_table *t, void dm_table_event_callback(struct dm_table *t,
void (*fn)(void *), void *context) void (*fn)(void *), void *context)
...@@ -1086,6 +1141,11 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, ...@@ -1086,6 +1141,11 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
else else
queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, q); queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, q);
if (!dm_table_supports_discards(t))
queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
else
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
dm_table_set_integrity(t); dm_table_set_integrity(t);
/* /*
...@@ -1232,6 +1292,39 @@ struct mapped_device *dm_table_get_md(struct dm_table *t) ...@@ -1232,6 +1292,39 @@ struct mapped_device *dm_table_get_md(struct dm_table *t)
return t->md; return t->md;
} }
static int device_discard_capable(struct dm_target *ti, struct dm_dev *dev,
sector_t start, sector_t len, void *data)
{
struct request_queue *q = bdev_get_queue(dev->bdev);
return q && blk_queue_discard(q);
}
bool dm_table_supports_discards(struct dm_table *t)
{
struct dm_target *ti;
unsigned i = 0;
if (!t->discards_supported)
return 0;
/*
* Ensure that at least one underlying device supports discards.
* t->devices includes internal dm devices such as mirror logs
* so we need to use iterate_devices here, which targets
* supporting discard must provide.
*/
while (i < dm_table_get_num_targets(t)) {
ti = dm_table_get_target(t, i++);
if (ti->type->iterate_devices &&
ti->type->iterate_devices(ti, device_discard_capable, NULL))
return 1;
}
return 0;
}
EXPORT_SYMBOL(dm_vcalloc); EXPORT_SYMBOL(dm_vcalloc);
EXPORT_SYMBOL(dm_get_device); EXPORT_SYMBOL(dm_get_device);
EXPORT_SYMBOL(dm_put_device); EXPORT_SYMBOL(dm_put_device);
......
...@@ -113,6 +113,11 @@ void dm_unregister_target(struct target_type *tt) ...@@ -113,6 +113,11 @@ void dm_unregister_target(struct target_type *tt)
*/ */
static int io_err_ctr(struct dm_target *tt, unsigned int argc, char **args) static int io_err_ctr(struct dm_target *tt, unsigned int argc, char **args)
{ {
/*
* Return error for discards instead of -EOPNOTSUPP
*/
tt->num_discard_requests = 1;
return 0; return 0;
} }
......
...@@ -22,6 +22,11 @@ static int zero_ctr(struct dm_target *ti, unsigned int argc, char **argv) ...@@ -22,6 +22,11 @@ static int zero_ctr(struct dm_target *ti, unsigned int argc, char **argv)
return -EINVAL; return -EINVAL;
} }
/*
* Silently drop discards, avoiding -EOPNOTSUPP.
*/
ti->num_discard_requests = 1;
return 0; return 0;
} }
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/idr.h> #include <linux/idr.h>
#include <linux/hdreg.h> #include <linux/hdreg.h>
#include <linux/delay.h>
#include <trace/events/block.h> #include <trace/events/block.h>
...@@ -124,6 +125,10 @@ struct mapped_device { ...@@ -124,6 +125,10 @@ struct mapped_device {
unsigned long flags; unsigned long flags;
struct request_queue *queue; struct request_queue *queue;
unsigned type;
/* Protect queue and type against concurrent access. */
struct mutex type_lock;
struct gendisk *disk; struct gendisk *disk;
char name[16]; char name[16];
...@@ -638,8 +643,14 @@ static void dec_pending(struct dm_io *io, int error) ...@@ -638,8 +643,14 @@ static void dec_pending(struct dm_io *io, int error)
* There can be just one barrier request so we use * There can be just one barrier request so we use
* a per-device variable for error reporting. * a per-device variable for error reporting.
* Note that you can't touch the bio after end_io_acct * Note that you can't touch the bio after end_io_acct
*
* We ignore -EOPNOTSUPP for empty flush reported by
* underlying devices. We assume that if the device
* doesn't support empty barriers, it doesn't need
* cache flushing commands.
*/ */
if (!md->barrier_error && io_error != -EOPNOTSUPP) if (!md->barrier_error &&
!(bio_empty_barrier(bio) && io_error == -EOPNOTSUPP))
md->barrier_error = io_error; md->barrier_error = io_error;
end_io_acct(io); end_io_acct(io);
free_io(md, io); free_io(md, io);
...@@ -1019,17 +1030,27 @@ static void end_clone_request(struct request *clone, int error) ...@@ -1019,17 +1030,27 @@ static void end_clone_request(struct request *clone, int error)
dm_complete_request(clone, error); dm_complete_request(clone, error);
} }
static sector_t max_io_len(struct mapped_device *md, /*
sector_t sector, struct dm_target *ti) * Return maximum size of I/O possible at the supplied sector up to the current
* target boundary.
*/
static sector_t max_io_len_target_boundary(sector_t sector, struct dm_target *ti)
{
sector_t target_offset = dm_target_offset(ti, sector);
return ti->len - target_offset;
}
static sector_t max_io_len(sector_t sector, struct dm_target *ti)
{ {
sector_t offset = sector - ti->begin; sector_t len = max_io_len_target_boundary(sector, ti);
sector_t len = ti->len - offset;
/* /*
* Does the target need to split even further ? * Does the target need to split even further ?
*/ */
if (ti->split_io) { if (ti->split_io) {
sector_t boundary; sector_t boundary;
sector_t offset = dm_target_offset(ti, sector);
boundary = ((offset + ti->split_io) & ~(ti->split_io - 1)) boundary = ((offset + ti->split_io) & ~(ti->split_io - 1))
- offset; - offset;
if (len > boundary) if (len > boundary)
...@@ -1171,36 +1192,96 @@ static struct dm_target_io *alloc_tio(struct clone_info *ci, ...@@ -1171,36 +1192,96 @@ static struct dm_target_io *alloc_tio(struct clone_info *ci,
return tio; return tio;
} }
static void __flush_target(struct clone_info *ci, struct dm_target *ti, static void __issue_target_request(struct clone_info *ci, struct dm_target *ti,
unsigned flush_nr) unsigned request_nr, sector_t len)
{ {
struct dm_target_io *tio = alloc_tio(ci, ti); struct dm_target_io *tio = alloc_tio(ci, ti);
struct bio *clone; struct bio *clone;
tio->info.flush_request = flush_nr; tio->info.target_request_nr = request_nr;
clone = bio_alloc_bioset(GFP_NOIO, 0, ci->md->bs); /*
* Discard requests require the bio's inline iovecs be initialized.
* ci->bio->bi_max_vecs is BIO_INLINE_VECS anyway, for both flush
* and discard, so no need for concern about wasted bvec allocations.
*/
clone = bio_alloc_bioset(GFP_NOIO, ci->bio->bi_max_vecs, ci->md->bs);
__bio_clone(clone, ci->bio); __bio_clone(clone, ci->bio);
clone->bi_destructor = dm_bio_destructor; clone->bi_destructor = dm_bio_destructor;
if (len) {
clone->bi_sector = ci->sector;
clone->bi_size = to_bytes(len);
}
__map_bio(ti, clone, tio); __map_bio(ti, clone, tio);
} }
static void __issue_target_requests(struct clone_info *ci, struct dm_target *ti,
unsigned num_requests, sector_t len)
{
unsigned request_nr;
for (request_nr = 0; request_nr < num_requests; request_nr++)
__issue_target_request(ci, ti, request_nr, len);
}
static int __clone_and_map_empty_barrier(struct clone_info *ci) static int __clone_and_map_empty_barrier(struct clone_info *ci)
{ {
unsigned target_nr = 0, flush_nr; unsigned target_nr = 0;
struct dm_target *ti; struct dm_target *ti;
while ((ti = dm_table_get_target(ci->map, target_nr++))) while ((ti = dm_table_get_target(ci->map, target_nr++)))
for (flush_nr = 0; flush_nr < ti->num_flush_requests; __issue_target_requests(ci, ti, ti->num_flush_requests, 0);
flush_nr++)
__flush_target(ci, ti, flush_nr);
ci->sector_count = 0; ci->sector_count = 0;
return 0; return 0;
} }
/*
* Perform all io with a single clone.
*/
static void __clone_and_map_simple(struct clone_info *ci, struct dm_target *ti)
{
struct bio *clone, *bio = ci->bio;
struct dm_target_io *tio;
tio = alloc_tio(ci, ti);
clone = clone_bio(bio, ci->sector, ci->idx,
bio->bi_vcnt - ci->idx, ci->sector_count,
ci->md->bs);
__map_bio(ti, clone, tio);
ci->sector_count = 0;
}
static int __clone_and_map_discard(struct clone_info *ci)
{
struct dm_target *ti;
sector_t len;
do {
ti = dm_table_find_target(ci->map, ci->sector);
if (!dm_target_is_valid(ti))
return -EIO;
/*
* Even though the device advertised discard support,
* reconfiguration might have changed that since the
* check was performed.
*/
if (!ti->num_discard_requests)
return -EOPNOTSUPP;
len = min(ci->sector_count, max_io_len_target_boundary(ci->sector, ti));
__issue_target_requests(ci, ti, ti->num_discard_requests, len);
ci->sector += len;
} while (ci->sector_count -= len);
return 0;
}
static int __clone_and_map(struct clone_info *ci) static int __clone_and_map(struct clone_info *ci)
{ {
struct bio *clone, *bio = ci->bio; struct bio *clone, *bio = ci->bio;
...@@ -1211,27 +1292,21 @@ static int __clone_and_map(struct clone_info *ci) ...@@ -1211,27 +1292,21 @@ static int __clone_and_map(struct clone_info *ci)
if (unlikely(bio_empty_barrier(bio))) if (unlikely(bio_empty_barrier(bio)))
return __clone_and_map_empty_barrier(ci); return __clone_and_map_empty_barrier(ci);
if (unlikely(bio->bi_rw & REQ_DISCARD))
return __clone_and_map_discard(ci);
ti = dm_table_find_target(ci->map, ci->sector); ti = dm_table_find_target(ci->map, ci->sector);
if (!dm_target_is_valid(ti)) if (!dm_target_is_valid(ti))
return -EIO; return -EIO;
max = max_io_len(ci->md, ci->sector, ti); max = max_io_len(ci->sector, ti);
/*
* Allocate a target io object.
*/
tio = alloc_tio(ci, ti);
if (ci->sector_count <= max) { if (ci->sector_count <= max) {
/* /*
* Optimise for the simple case where we can do all of * Optimise for the simple case where we can do all of
* the remaining io with a single clone. * the remaining io with a single clone.
*/ */
clone = clone_bio(bio, ci->sector, ci->idx, __clone_and_map_simple(ci, ti);
bio->bi_vcnt - ci->idx, ci->sector_count,
ci->md->bs);
__map_bio(ti, clone, tio);
ci->sector_count = 0;
} else if (to_sector(bio->bi_io_vec[ci->idx].bv_len) <= max) { } else if (to_sector(bio->bi_io_vec[ci->idx].bv_len) <= max) {
/* /*
...@@ -1252,6 +1327,7 @@ static int __clone_and_map(struct clone_info *ci) ...@@ -1252,6 +1327,7 @@ static int __clone_and_map(struct clone_info *ci)
len += bv_len; len += bv_len;
} }
tio = alloc_tio(ci, ti);
clone = clone_bio(bio, ci->sector, ci->idx, i - ci->idx, len, clone = clone_bio(bio, ci->sector, ci->idx, i - ci->idx, len,
ci->md->bs); ci->md->bs);
__map_bio(ti, clone, tio); __map_bio(ti, clone, tio);
...@@ -1274,13 +1350,12 @@ static int __clone_and_map(struct clone_info *ci) ...@@ -1274,13 +1350,12 @@ static int __clone_and_map(struct clone_info *ci)
if (!dm_target_is_valid(ti)) if (!dm_target_is_valid(ti))
return -EIO; return -EIO;
max = max_io_len(ci->md, ci->sector, ti); max = max_io_len(ci->sector, ti);
tio = alloc_tio(ci, ti);
} }
len = min(remaining, max); len = min(remaining, max);
tio = alloc_tio(ci, ti);
clone = split_bvec(bio, ci->sector, ci->idx, clone = split_bvec(bio, ci->sector, ci->idx,
bv->bv_offset + offset, len, bv->bv_offset + offset, len,
ci->md->bs); ci->md->bs);
...@@ -1362,7 +1437,7 @@ static int dm_merge_bvec(struct request_queue *q, ...@@ -1362,7 +1437,7 @@ static int dm_merge_bvec(struct request_queue *q,
/* /*
* Find maximum amount of I/O that won't need splitting * Find maximum amount of I/O that won't need splitting
*/ */
max_sectors = min(max_io_len(md, bvm->bi_sector, ti), max_sectors = min(max_io_len(bvm->bi_sector, ti),
(sector_t) BIO_MAX_SECTORS); (sector_t) BIO_MAX_SECTORS);
max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size; max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size;
if (max_size < 0) if (max_size < 0)
...@@ -1845,6 +1920,28 @@ static const struct block_device_operations dm_blk_dops; ...@@ -1845,6 +1920,28 @@ static const struct block_device_operations dm_blk_dops;
static void dm_wq_work(struct work_struct *work); static void dm_wq_work(struct work_struct *work);
static void dm_rq_barrier_work(struct work_struct *work); static void dm_rq_barrier_work(struct work_struct *work);
static void dm_init_md_queue(struct mapped_device *md)
{
/*
* Request-based dm devices cannot be stacked on top of bio-based dm
* devices. The type of this dm device has not been decided yet.
* The type is decided at the first table loading time.
* To prevent problematic device stacking, clear the queue flag
* for request stacking support until then.
*
* This queue is new, so no concurrency on the queue_flags.
*/
queue_flag_clear_unlocked(QUEUE_FLAG_STACKABLE, md->queue);
md->queue->queuedata = md;
md->queue->backing_dev_info.congested_fn = dm_any_congested;
md->queue->backing_dev_info.congested_data = md;
blk_queue_make_request(md->queue, dm_request);
blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
md->queue->unplug_fn = dm_unplug_all;
blk_queue_merge_bvec(md->queue, dm_merge_bvec);
}
/* /*
* Allocate and initialise a blank device with a given minor. * Allocate and initialise a blank device with a given minor.
*/ */
...@@ -1870,8 +1967,10 @@ static struct mapped_device *alloc_dev(int minor) ...@@ -1870,8 +1967,10 @@ static struct mapped_device *alloc_dev(int minor)
if (r < 0) if (r < 0)
goto bad_minor; goto bad_minor;
md->type = DM_TYPE_NONE;
init_rwsem(&md->io_lock); init_rwsem(&md->io_lock);
mutex_init(&md->suspend_lock); mutex_init(&md->suspend_lock);
mutex_init(&md->type_lock);
spin_lock_init(&md->deferred_lock); spin_lock_init(&md->deferred_lock);
spin_lock_init(&md->barrier_error_lock); spin_lock_init(&md->barrier_error_lock);
rwlock_init(&md->map_lock); rwlock_init(&md->map_lock);
...@@ -1882,33 +1981,11 @@ static struct mapped_device *alloc_dev(int minor) ...@@ -1882,33 +1981,11 @@ static struct mapped_device *alloc_dev(int minor)
INIT_LIST_HEAD(&md->uevent_list); INIT_LIST_HEAD(&md->uevent_list);
spin_lock_init(&md->uevent_lock); spin_lock_init(&md->uevent_lock);
md->queue = blk_init_queue(dm_request_fn, NULL); md->queue = blk_alloc_queue(GFP_KERNEL);
if (!md->queue) if (!md->queue)
goto bad_queue; goto bad_queue;
/* dm_init_md_queue(md);
* Request-based dm devices cannot be stacked on top of bio-based dm
* devices. The type of this dm device has not been decided yet,
* although we initialized the queue using blk_init_queue().
* The type is decided at the first table loading time.
* To prevent problematic device stacking, clear the queue flag
* for request stacking support until then.
*
* This queue is new, so no concurrency on the queue_flags.
*/
queue_flag_clear_unlocked(QUEUE_FLAG_STACKABLE, md->queue);
md->saved_make_request_fn = md->queue->make_request_fn;
md->queue->queuedata = md;
md->queue->backing_dev_info.congested_fn = dm_any_congested;
md->queue->backing_dev_info.congested_data = md;
blk_queue_make_request(md->queue, dm_request);
blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
md->queue->unplug_fn = dm_unplug_all;
blk_queue_merge_bvec(md->queue, dm_merge_bvec);
blk_queue_softirq_done(md->queue, dm_softirq_done);
blk_queue_prep_rq(md->queue, dm_prep_fn);
blk_queue_lld_busy(md->queue, dm_lld_busy);
blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN_FLUSH);
md->disk = alloc_disk(1); md->disk = alloc_disk(1);
if (!md->disk) if (!md->disk)
...@@ -2123,6 +2200,72 @@ int dm_create(int minor, struct mapped_device **result) ...@@ -2123,6 +2200,72 @@ int dm_create(int minor, struct mapped_device **result)
return 0; return 0;
} }
/*
* Functions to manage md->type.
* All are required to hold md->type_lock.
*/
void dm_lock_md_type(struct mapped_device *md)
{
mutex_lock(&md->type_lock);
}
void dm_unlock_md_type(struct mapped_device *md)
{
mutex_unlock(&md->type_lock);
}
void dm_set_md_type(struct mapped_device *md, unsigned type)
{
md->type = type;
}
unsigned dm_get_md_type(struct mapped_device *md)
{
return md->type;
}
/*
* Fully initialize a request-based queue (->elevator, ->request_fn, etc).
*/
static int dm_init_request_based_queue(struct mapped_device *md)
{
struct request_queue *q = NULL;
if (md->queue->elevator)
return 1;
/* Fully initialize the queue */
q = blk_init_allocated_queue(md->queue, dm_request_fn, NULL);
if (!q)
return 0;
md->queue = q;
md->saved_make_request_fn = md->queue->make_request_fn;
dm_init_md_queue(md);
blk_queue_softirq_done(md->queue, dm_softirq_done);
blk_queue_prep_rq(md->queue, dm_prep_fn);
blk_queue_lld_busy(md->queue, dm_lld_busy);
blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN_FLUSH);
elv_register_queue(md->queue);
return 1;
}
/*
* Setup the DM device's queue based on md's type
*/
int dm_setup_md_queue(struct mapped_device *md)
{
if ((dm_get_md_type(md) == DM_TYPE_REQUEST_BASED) &&
!dm_init_request_based_queue(md)) {
DMWARN("Cannot initialize queue for request-based mapped device");
return -EINVAL;
}
return 0;
}
static struct mapped_device *dm_find_md(dev_t dev) static struct mapped_device *dm_find_md(dev_t dev)
{ {
struct mapped_device *md; struct mapped_device *md;
...@@ -2136,6 +2279,7 @@ static struct mapped_device *dm_find_md(dev_t dev) ...@@ -2136,6 +2279,7 @@ static struct mapped_device *dm_find_md(dev_t dev)
md = idr_find(&_minor_idr, minor); md = idr_find(&_minor_idr, minor);
if (md && (md == MINOR_ALLOCED || if (md && (md == MINOR_ALLOCED ||
(MINOR(disk_devt(dm_disk(md))) != minor) || (MINOR(disk_devt(dm_disk(md))) != minor) ||
dm_deleting_md(md) ||
test_bit(DMF_FREEING, &md->flags))) { test_bit(DMF_FREEING, &md->flags))) {
md = NULL; md = NULL;
goto out; goto out;
...@@ -2170,6 +2314,7 @@ void dm_set_mdptr(struct mapped_device *md, void *ptr) ...@@ -2170,6 +2314,7 @@ void dm_set_mdptr(struct mapped_device *md, void *ptr)
void dm_get(struct mapped_device *md) void dm_get(struct mapped_device *md)
{ {
atomic_inc(&md->holders); atomic_inc(&md->holders);
BUG_ON(test_bit(DMF_FREEING, &md->flags));
} }
const char *dm_device_name(struct mapped_device *md) const char *dm_device_name(struct mapped_device *md)
...@@ -2178,27 +2323,55 @@ const char *dm_device_name(struct mapped_device *md) ...@@ -2178,27 +2323,55 @@ const char *dm_device_name(struct mapped_device *md)
} }
EXPORT_SYMBOL_GPL(dm_device_name); EXPORT_SYMBOL_GPL(dm_device_name);
void dm_put(struct mapped_device *md) static void __dm_destroy(struct mapped_device *md, bool wait)
{ {
struct dm_table *map; struct dm_table *map;
BUG_ON(test_bit(DMF_FREEING, &md->flags)); might_sleep();
if (atomic_dec_and_lock(&md->holders, &_minor_lock)) { spin_lock(&_minor_lock);
map = dm_get_live_table(md); map = dm_get_live_table(md);
idr_replace(&_minor_idr, MINOR_ALLOCED, idr_replace(&_minor_idr, MINOR_ALLOCED, MINOR(disk_devt(dm_disk(md))));
MINOR(disk_devt(dm_disk(md)))); set_bit(DMF_FREEING, &md->flags);
set_bit(DMF_FREEING, &md->flags); spin_unlock(&_minor_lock);
spin_unlock(&_minor_lock);
if (!dm_suspended_md(md)) { if (!dm_suspended_md(md)) {
dm_table_presuspend_targets(map); dm_table_presuspend_targets(map);
dm_table_postsuspend_targets(map); dm_table_postsuspend_targets(map);
}
dm_sysfs_exit(md);
dm_table_put(map);
dm_table_destroy(__unbind(md));
free_dev(md);
} }
/*
* Rare, but there may be I/O requests still going to complete,
* for example. Wait for all references to disappear.
* No one should increment the reference count of the mapped_device,
* after the mapped_device state becomes DMF_FREEING.
*/
if (wait)
while (atomic_read(&md->holders))
msleep(1);
else if (atomic_read(&md->holders))
DMWARN("%s: Forcibly removing mapped_device still in use! (%d users)",
dm_device_name(md), atomic_read(&md->holders));
dm_sysfs_exit(md);
dm_table_put(map);
dm_table_destroy(__unbind(md));
free_dev(md);
}
void dm_destroy(struct mapped_device *md)
{
__dm_destroy(md, true);
}
void dm_destroy_immediate(struct mapped_device *md)
{
__dm_destroy(md, false);
}
void dm_put(struct mapped_device *md)
{
atomic_dec(&md->holders);
} }
EXPORT_SYMBOL_GPL(dm_put); EXPORT_SYMBOL_GPL(dm_put);
...@@ -2253,7 +2426,12 @@ static void process_barrier(struct mapped_device *md, struct bio *bio) ...@@ -2253,7 +2426,12 @@ static void process_barrier(struct mapped_device *md, struct bio *bio)
if (!bio_empty_barrier(bio)) { if (!bio_empty_barrier(bio)) {
__split_and_process_bio(md, bio); __split_and_process_bio(md, bio);
dm_flush(md); /*
* If the request isn't supported, don't waste time with
* the second flush.
*/
if (md->barrier_error != -EOPNOTSUPP)
dm_flush(md);
} }
if (md->barrier_error != DM_ENDIO_REQUEUE) if (md->barrier_error != DM_ENDIO_REQUEUE)
...@@ -2310,11 +2488,11 @@ static void dm_queue_flush(struct mapped_device *md) ...@@ -2310,11 +2488,11 @@ static void dm_queue_flush(struct mapped_device *md)
queue_work(md->wq, &md->work); queue_work(md->wq, &md->work);
} }
static void dm_rq_set_flush_nr(struct request *clone, unsigned flush_nr) static void dm_rq_set_target_request_nr(struct request *clone, unsigned request_nr)
{ {
struct dm_rq_target_io *tio = clone->end_io_data; struct dm_rq_target_io *tio = clone->end_io_data;
tio->info.flush_request = flush_nr; tio->info.target_request_nr = request_nr;
} }
/* Issue barrier requests to targets and wait for their completion. */ /* Issue barrier requests to targets and wait for their completion. */
...@@ -2332,7 +2510,7 @@ static int dm_rq_barrier(struct mapped_device *md) ...@@ -2332,7 +2510,7 @@ static int dm_rq_barrier(struct mapped_device *md)
ti = dm_table_get_target(map, i); ti = dm_table_get_target(map, i);
for (j = 0; j < ti->num_flush_requests; j++) { for (j = 0; j < ti->num_flush_requests; j++) {
clone = clone_rq(md->flush_request, md, GFP_NOIO); clone = clone_rq(md->flush_request, md, GFP_NOIO);
dm_rq_set_flush_nr(clone, j); dm_rq_set_target_request_nr(clone, j);
atomic_inc(&md->pending[rq_data_dir(clone)]); atomic_inc(&md->pending[rq_data_dir(clone)]);
map_request(ti, clone, md); map_request(ti, clone, md);
} }
...@@ -2398,13 +2576,6 @@ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table) ...@@ -2398,13 +2576,6 @@ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table)
goto out; goto out;
} }
/* cannot change the device type, once a table is bound */
if (md->map &&
(dm_table_get_type(md->map) != dm_table_get_type(table))) {
DMWARN("can't change the device type after a table is bound");
goto out;
}
map = __bind(md, table, &limits); map = __bind(md, table, &limits);
out: out:
......
...@@ -59,13 +59,20 @@ void dm_table_postsuspend_targets(struct dm_table *t); ...@@ -59,13 +59,20 @@ void dm_table_postsuspend_targets(struct dm_table *t);
int dm_table_resume_targets(struct dm_table *t); int dm_table_resume_targets(struct dm_table *t);
int dm_table_any_congested(struct dm_table *t, int bdi_bits); int dm_table_any_congested(struct dm_table *t, int bdi_bits);
int dm_table_any_busy_target(struct dm_table *t); int dm_table_any_busy_target(struct dm_table *t);
int dm_table_set_type(struct dm_table *t);
unsigned dm_table_get_type(struct dm_table *t); unsigned dm_table_get_type(struct dm_table *t);
bool dm_table_request_based(struct dm_table *t); bool dm_table_request_based(struct dm_table *t);
bool dm_table_supports_discards(struct dm_table *t);
int dm_table_alloc_md_mempools(struct dm_table *t); int dm_table_alloc_md_mempools(struct dm_table *t);
void dm_table_free_md_mempools(struct dm_table *t); void dm_table_free_md_mempools(struct dm_table *t);
struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t); struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t);
void dm_lock_md_type(struct mapped_device *md);
void dm_unlock_md_type(struct mapped_device *md);
void dm_set_md_type(struct mapped_device *md, unsigned type);
unsigned dm_get_md_type(struct mapped_device *md);
int dm_setup_md_queue(struct mapped_device *md);
/* /*
* To check the return value from dm_table_find_target(). * To check the return value from dm_table_find_target().
*/ */
...@@ -122,6 +129,11 @@ void dm_linear_exit(void); ...@@ -122,6 +129,11 @@ void dm_linear_exit(void);
int dm_stripe_init(void); int dm_stripe_init(void);
void dm_stripe_exit(void); void dm_stripe_exit(void);
/*
* mapped_device operations
*/
void dm_destroy(struct mapped_device *md);
void dm_destroy_immediate(struct mapped_device *md);
int dm_open_count(struct mapped_device *md); int dm_open_count(struct mapped_device *md);
int dm_lock_for_deletion(struct mapped_device *md); int dm_lock_for_deletion(struct mapped_device *md);
......
...@@ -22,7 +22,7 @@ typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t; ...@@ -22,7 +22,7 @@ typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t;
union map_info { union map_info {
void *ptr; void *ptr;
unsigned long long ll; unsigned long long ll;
unsigned flush_request; unsigned target_request_nr;
}; };
/* /*
...@@ -174,12 +174,18 @@ struct dm_target { ...@@ -174,12 +174,18 @@ struct dm_target {
* A number of zero-length barrier requests that will be submitted * A number of zero-length barrier requests that will be submitted
* to the target for the purpose of flushing cache. * to the target for the purpose of flushing cache.
* *
* The request number will be placed in union map_info->flush_request. * The request number will be placed in union map_info->target_request_nr.
* It is a responsibility of the target driver to remap these requests * It is a responsibility of the target driver to remap these requests
* to the real underlying devices. * to the real underlying devices.
*/ */
unsigned num_flush_requests; unsigned num_flush_requests;
/*
* The number of discard requests that will be submitted to the
* target. map_info->request_nr is used just like num_flush_requests.
*/
unsigned num_discard_requests;
/* target specific data */ /* target specific data */
void *private; void *private;
...@@ -392,6 +398,12 @@ void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size); ...@@ -392,6 +398,12 @@ void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size);
#define dm_array_too_big(fixed, obj, num) \ #define dm_array_too_big(fixed, obj, num) \
((num) > (UINT_MAX - (fixed)) / (obj)) ((num) > (UINT_MAX - (fixed)) / (obj))
/*
* Sector offset taken relative to the start of the target instead of
* relative to the start of the device.
*/
#define dm_target_offset(ti, sector) ((sector) - (ti)->begin)
static inline sector_t to_sector(unsigned long n) static inline sector_t to_sector(unsigned long n)
{ {
return (n >> SECTOR_SHIFT); return (n >> SECTOR_SHIFT);
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include <linux/types.h> #include <linux/types.h>
#define DM_DIR "mapper" /* Slashes not supported */ #define DM_DIR "mapper" /* Slashes not supported */
#define DM_CONTROL_NODE "control"
#define DM_MAX_TYPE_NAME 16 #define DM_MAX_TYPE_NAME 16
#define DM_NAME_LEN 128 #define DM_NAME_LEN 128
#define DM_UUID_LEN 129 #define DM_UUID_LEN 129
...@@ -266,9 +267,9 @@ enum { ...@@ -266,9 +267,9 @@ enum {
#define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
#define DM_VERSION_MAJOR 4 #define DM_VERSION_MAJOR 4
#define DM_VERSION_MINOR 17 #define DM_VERSION_MINOR 18
#define DM_VERSION_PATCHLEVEL 0 #define DM_VERSION_PATCHLEVEL 0
#define DM_VERSION_EXTRA "-ioctl (2010-03-05)" #define DM_VERSION_EXTRA "-ioctl (2010-06-29)"
/* Status bits */ /* Status bits */
#define DM_READONLY_FLAG (1 << 0) /* In/Out */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */
......
...@@ -38,6 +38,7 @@ ...@@ -38,6 +38,7 @@
#define KVM_MINOR 232 #define KVM_MINOR 232
#define BTRFS_MINOR 234 #define BTRFS_MINOR 234
#define AUTOFS_MINOR 235 #define AUTOFS_MINOR 235
#define MAPPER_CTRL_MINOR 236
#define MISC_DYNAMIC_MINOR 255 #define MISC_DYNAMIC_MINOR 255
struct device; struct device;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册