提交 ebb7c197 编写于 作者: L Linus Torvalds

Merge tag 'dm-3.14-fixes-1' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper fixes from Mike Snitzer:
 "A few dm-cache fixes, an invalid ioctl handling fix for dm multipath,
  a couple immutable biovec fixups for dm mirror, and a few dm-thin
  fixes.

  There will likely be additional dm-thin metadata and data resize fixes
  to include in 3.14-rc6 next week.

  Note to stable-minded folks: Immutable biovecs were introduced in
  3.14, so the related fixups for dm mirror are not needed in stable@
  kernels"

* tag 'dm-3.14-fixes-1' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm cache: fix truncation bug when mapping I/O to >2TB fast device
  dm thin: allow metadata space larger than supported to go unused
  dm mpath: fix stalls when handling invalid ioctls
  dm thin: fix the error path for the thin device constructor
  dm raid1: fix immutable biovec related BUG when retrying read bio
  dm io: fix I/O to multiple destinations
  dm thin: avoid metadata commit if a pool's thin devices haven't changed
  dm cache: do not add migration to completed list before unhooking bio
  dm cache: move hook_info into common portion of per_bio_data structure
...@@ -289,6 +289,7 @@ struct per_bio_data { ...@@ -289,6 +289,7 @@ struct per_bio_data {
bool tick:1; bool tick:1;
unsigned req_nr:2; unsigned req_nr:2;
struct dm_deferred_entry *all_io_entry; struct dm_deferred_entry *all_io_entry;
struct dm_hook_info hook_info;
/* /*
* writethrough fields. These MUST remain at the end of this * writethrough fields. These MUST remain at the end of this
...@@ -297,7 +298,6 @@ struct per_bio_data { ...@@ -297,7 +298,6 @@ struct per_bio_data {
*/ */
struct cache *cache; struct cache *cache;
dm_cblock_t cblock; dm_cblock_t cblock;
struct dm_hook_info hook_info;
struct dm_bio_details bio_details; struct dm_bio_details bio_details;
}; };
...@@ -671,15 +671,16 @@ static void remap_to_cache(struct cache *cache, struct bio *bio, ...@@ -671,15 +671,16 @@ static void remap_to_cache(struct cache *cache, struct bio *bio,
dm_cblock_t cblock) dm_cblock_t cblock)
{ {
sector_t bi_sector = bio->bi_iter.bi_sector; sector_t bi_sector = bio->bi_iter.bi_sector;
sector_t block = from_cblock(cblock);
bio->bi_bdev = cache->cache_dev->bdev; bio->bi_bdev = cache->cache_dev->bdev;
if (!block_size_is_power_of_two(cache)) if (!block_size_is_power_of_two(cache))
bio->bi_iter.bi_sector = bio->bi_iter.bi_sector =
(from_cblock(cblock) * cache->sectors_per_block) + (block * cache->sectors_per_block) +
sector_div(bi_sector, cache->sectors_per_block); sector_div(bi_sector, cache->sectors_per_block);
else else
bio->bi_iter.bi_sector = bio->bi_iter.bi_sector =
(from_cblock(cblock) << cache->sectors_per_block_shift) | (block << cache->sectors_per_block_shift) |
(bi_sector & (cache->sectors_per_block - 1)); (bi_sector & (cache->sectors_per_block - 1));
} }
...@@ -1010,13 +1011,15 @@ static void overwrite_endio(struct bio *bio, int err) ...@@ -1010,13 +1011,15 @@ static void overwrite_endio(struct bio *bio, int err)
struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
unsigned long flags; unsigned long flags;
dm_unhook_bio(&pb->hook_info, bio);
if (err) if (err)
mg->err = true; mg->err = true;
mg->requeue_holder = false;
spin_lock_irqsave(&cache->lock, flags); spin_lock_irqsave(&cache->lock, flags);
list_add_tail(&mg->list, &cache->completed_migrations); list_add_tail(&mg->list, &cache->completed_migrations);
dm_unhook_bio(&pb->hook_info, bio);
mg->requeue_holder = false;
spin_unlock_irqrestore(&cache->lock, flags); spin_unlock_irqrestore(&cache->lock, flags);
wake_worker(cache); wake_worker(cache);
......
...@@ -201,29 +201,28 @@ static void list_dp_init(struct dpages *dp, struct page_list *pl, unsigned offse ...@@ -201,29 +201,28 @@ static void list_dp_init(struct dpages *dp, struct page_list *pl, unsigned offse
/* /*
* Functions for getting the pages from a bvec. * Functions for getting the pages from a bvec.
*/ */
static void bio_get_page(struct dpages *dp, static void bio_get_page(struct dpages *dp, struct page **p,
struct page **p, unsigned long *len, unsigned *offset) unsigned long *len, unsigned *offset)
{ {
struct bio *bio = dp->context_ptr; struct bio_vec *bvec = dp->context_ptr;
struct bio_vec bvec = bio_iovec(bio); *p = bvec->bv_page;
*p = bvec.bv_page; *len = bvec->bv_len - dp->context_u;
*len = bvec.bv_len; *offset = bvec->bv_offset + dp->context_u;
*offset = bvec.bv_offset;
} }
static void bio_next_page(struct dpages *dp) static void bio_next_page(struct dpages *dp)
{ {
struct bio *bio = dp->context_ptr; struct bio_vec *bvec = dp->context_ptr;
struct bio_vec bvec = bio_iovec(bio); dp->context_ptr = bvec + 1;
dp->context_u = 0;
bio_advance(bio, bvec.bv_len);
} }
static void bio_dp_init(struct dpages *dp, struct bio *bio) static void bio_dp_init(struct dpages *dp, struct bio *bio)
{ {
dp->get_page = bio_get_page; dp->get_page = bio_get_page;
dp->next_page = bio_next_page; dp->next_page = bio_next_page;
dp->context_ptr = bio; dp->context_ptr = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
dp->context_u = bio->bi_iter.bi_bvec_done;
} }
/* /*
......
...@@ -1626,8 +1626,11 @@ static int multipath_ioctl(struct dm_target *ti, unsigned int cmd, ...@@ -1626,8 +1626,11 @@ static int multipath_ioctl(struct dm_target *ti, unsigned int cmd,
/* /*
* Only pass ioctls through if the device sizes match exactly. * Only pass ioctls through if the device sizes match exactly.
*/ */
if (!r && ti->len != i_size_read(bdev->bd_inode) >> SECTOR_SHIFT) if (!bdev || ti->len != i_size_read(bdev->bd_inode) >> SECTOR_SHIFT) {
r = scsi_verify_blk_ioctl(NULL, cmd); int err = scsi_verify_blk_ioctl(NULL, cmd);
if (err)
r = err;
}
if (r == -ENOTCONN && !fatal_signal_pending(current)) if (r == -ENOTCONN && !fatal_signal_pending(current))
queue_work(kmultipathd, &m->process_queued_ios); queue_work(kmultipathd, &m->process_queued_ios);
......
...@@ -1244,6 +1244,9 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error) ...@@ -1244,6 +1244,9 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
dm_bio_restore(bd, bio); dm_bio_restore(bd, bio);
bio_record->details.bi_bdev = NULL; bio_record->details.bi_bdev = NULL;
atomic_inc(&bio->bi_remaining);
queue_bio(ms, bio, rw); queue_bio(ms, bio, rw);
return DM_ENDIO_INCOMPLETE; return DM_ENDIO_INCOMPLETE;
} }
......
...@@ -483,7 +483,7 @@ static int __write_initial_superblock(struct dm_pool_metadata *pmd) ...@@ -483,7 +483,7 @@ static int __write_initial_superblock(struct dm_pool_metadata *pmd)
disk_super->data_mapping_root = cpu_to_le64(pmd->root); disk_super->data_mapping_root = cpu_to_le64(pmd->root);
disk_super->device_details_root = cpu_to_le64(pmd->details_root); disk_super->device_details_root = cpu_to_le64(pmd->details_root);
disk_super->metadata_block_size = cpu_to_le32(THIN_METADATA_BLOCK_SIZE >> SECTOR_SHIFT); disk_super->metadata_block_size = cpu_to_le32(THIN_METADATA_BLOCK_SIZE);
disk_super->metadata_nr_blocks = cpu_to_le64(bdev_size >> SECTOR_TO_BLOCK_SHIFT); disk_super->metadata_nr_blocks = cpu_to_le64(bdev_size >> SECTOR_TO_BLOCK_SHIFT);
disk_super->data_block_size = cpu_to_le32(pmd->data_block_size); disk_super->data_block_size = cpu_to_le32(pmd->data_block_size);
...@@ -651,7 +651,7 @@ static int __create_persistent_data_objects(struct dm_pool_metadata *pmd, bool f ...@@ -651,7 +651,7 @@ static int __create_persistent_data_objects(struct dm_pool_metadata *pmd, bool f
{ {
int r; int r;
pmd->bm = dm_block_manager_create(pmd->bdev, THIN_METADATA_BLOCK_SIZE, pmd->bm = dm_block_manager_create(pmd->bdev, THIN_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
THIN_METADATA_CACHE_SIZE, THIN_METADATA_CACHE_SIZE,
THIN_MAX_CONCURRENT_LOCKS); THIN_MAX_CONCURRENT_LOCKS);
if (IS_ERR(pmd->bm)) { if (IS_ERR(pmd->bm)) {
...@@ -1489,6 +1489,23 @@ bool dm_thin_changed_this_transaction(struct dm_thin_device *td) ...@@ -1489,6 +1489,23 @@ bool dm_thin_changed_this_transaction(struct dm_thin_device *td)
return r; return r;
} }
bool dm_pool_changed_this_transaction(struct dm_pool_metadata *pmd)
{
bool r = false;
struct dm_thin_device *td, *tmp;
down_read(&pmd->root_lock);
list_for_each_entry_safe(td, tmp, &pmd->thin_devices, list) {
if (td->changed) {
r = td->changed;
break;
}
}
up_read(&pmd->root_lock);
return r;
}
bool dm_thin_aborted_changes(struct dm_thin_device *td) bool dm_thin_aborted_changes(struct dm_thin_device *td)
{ {
bool r; bool r;
......
...@@ -9,16 +9,14 @@ ...@@ -9,16 +9,14 @@
#include "persistent-data/dm-block-manager.h" #include "persistent-data/dm-block-manager.h"
#include "persistent-data/dm-space-map.h" #include "persistent-data/dm-space-map.h"
#include "persistent-data/dm-space-map-metadata.h"
#define THIN_METADATA_BLOCK_SIZE 4096 #define THIN_METADATA_BLOCK_SIZE DM_SM_METADATA_BLOCK_SIZE
/* /*
* The metadata device is currently limited in size. * The metadata device is currently limited in size.
*
* We have one block of index, which can hold 255 index entries. Each
* index entry contains allocation info about 16k metadata blocks.
*/ */
#define THIN_METADATA_MAX_SECTORS (255 * (1 << 14) * (THIN_METADATA_BLOCK_SIZE / (1 << SECTOR_SHIFT))) #define THIN_METADATA_MAX_SECTORS DM_SM_METADATA_MAX_SECTORS
/* /*
* A metadata device larger than 16GB triggers a warning. * A metadata device larger than 16GB triggers a warning.
...@@ -161,6 +159,8 @@ int dm_thin_remove_block(struct dm_thin_device *td, dm_block_t block); ...@@ -161,6 +159,8 @@ int dm_thin_remove_block(struct dm_thin_device *td, dm_block_t block);
*/ */
bool dm_thin_changed_this_transaction(struct dm_thin_device *td); bool dm_thin_changed_this_transaction(struct dm_thin_device *td);
bool dm_pool_changed_this_transaction(struct dm_pool_metadata *pmd);
bool dm_thin_aborted_changes(struct dm_thin_device *td); bool dm_thin_aborted_changes(struct dm_thin_device *td);
int dm_thin_get_highest_mapped_block(struct dm_thin_device *td, int dm_thin_get_highest_mapped_block(struct dm_thin_device *td,
......
...@@ -1357,7 +1357,8 @@ static void process_deferred_bios(struct pool *pool) ...@@ -1357,7 +1357,8 @@ static void process_deferred_bios(struct pool *pool)
bio_list_init(&pool->deferred_flush_bios); bio_list_init(&pool->deferred_flush_bios);
spin_unlock_irqrestore(&pool->lock, flags); spin_unlock_irqrestore(&pool->lock, flags);
if (bio_list_empty(&bios) && !need_commit_due_to_time(pool)) if (bio_list_empty(&bios) &&
!(dm_pool_changed_this_transaction(pool->pmd) && need_commit_due_to_time(pool)))
return; return;
if (commit(pool)) { if (commit(pool)) {
...@@ -1999,16 +2000,27 @@ static void metadata_low_callback(void *context) ...@@ -1999,16 +2000,27 @@ static void metadata_low_callback(void *context)
dm_table_event(pool->ti->table); dm_table_event(pool->ti->table);
} }
static sector_t get_metadata_dev_size(struct block_device *bdev) static sector_t get_dev_size(struct block_device *bdev)
{
return i_size_read(bdev->bd_inode) >> SECTOR_SHIFT;
}
static void warn_if_metadata_device_too_big(struct block_device *bdev)
{ {
sector_t metadata_dev_size = i_size_read(bdev->bd_inode) >> SECTOR_SHIFT; sector_t metadata_dev_size = get_dev_size(bdev);
char buffer[BDEVNAME_SIZE]; char buffer[BDEVNAME_SIZE];
if (metadata_dev_size > THIN_METADATA_MAX_SECTORS_WARNING) { if (metadata_dev_size > THIN_METADATA_MAX_SECTORS_WARNING)
DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.", DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.",
bdevname(bdev, buffer), THIN_METADATA_MAX_SECTORS); bdevname(bdev, buffer), THIN_METADATA_MAX_SECTORS);
metadata_dev_size = THIN_METADATA_MAX_SECTORS_WARNING; }
}
static sector_t get_metadata_dev_size(struct block_device *bdev)
{
sector_t metadata_dev_size = get_dev_size(bdev);
if (metadata_dev_size > THIN_METADATA_MAX_SECTORS)
metadata_dev_size = THIN_METADATA_MAX_SECTORS;
return metadata_dev_size; return metadata_dev_size;
} }
...@@ -2017,7 +2029,7 @@ static dm_block_t get_metadata_dev_size_in_blocks(struct block_device *bdev) ...@@ -2017,7 +2029,7 @@ static dm_block_t get_metadata_dev_size_in_blocks(struct block_device *bdev)
{ {
sector_t metadata_dev_size = get_metadata_dev_size(bdev); sector_t metadata_dev_size = get_metadata_dev_size(bdev);
sector_div(metadata_dev_size, THIN_METADATA_BLOCK_SIZE >> SECTOR_SHIFT); sector_div(metadata_dev_size, THIN_METADATA_BLOCK_SIZE);
return metadata_dev_size; return metadata_dev_size;
} }
...@@ -2095,12 +2107,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) ...@@ -2095,12 +2107,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
ti->error = "Error opening metadata block device"; ti->error = "Error opening metadata block device";
goto out_unlock; goto out_unlock;
} }
warn_if_metadata_device_too_big(metadata_dev->bdev);
/*
* Run for the side-effect of possibly issuing a warning if the
* device is too big.
*/
(void) get_metadata_dev_size(metadata_dev->bdev);
r = dm_get_device(ti, argv[1], FMODE_READ | FMODE_WRITE, &data_dev); r = dm_get_device(ti, argv[1], FMODE_READ | FMODE_WRITE, &data_dev);
if (r) { if (r) {
...@@ -2287,6 +2294,7 @@ static int maybe_resize_metadata_dev(struct dm_target *ti, bool *need_commit) ...@@ -2287,6 +2294,7 @@ static int maybe_resize_metadata_dev(struct dm_target *ti, bool *need_commit)
return -EINVAL; return -EINVAL;
} else if (metadata_dev_size > sb_metadata_dev_size) { } else if (metadata_dev_size > sb_metadata_dev_size) {
warn_if_metadata_device_too_big(pool->md_dev);
DMINFO("%s: growing the metadata device from %llu to %llu blocks", DMINFO("%s: growing the metadata device from %llu to %llu blocks",
dm_device_name(pool->pool_md), dm_device_name(pool->pool_md),
sb_metadata_dev_size, metadata_dev_size); sb_metadata_dev_size, metadata_dev_size);
...@@ -2894,6 +2902,7 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) ...@@ -2894,6 +2902,7 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
if (get_pool_mode(tc->pool) == PM_FAIL) { if (get_pool_mode(tc->pool) == PM_FAIL) {
ti->error = "Couldn't open thin device, Pool is in fail mode"; ti->error = "Couldn't open thin device, Pool is in fail mode";
r = -EINVAL;
goto bad_thin_open; goto bad_thin_open;
} }
...@@ -2905,7 +2914,7 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) ...@@ -2905,7 +2914,7 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
r = dm_set_target_max_io_len(ti, tc->pool->sectors_per_block); r = dm_set_target_max_io_len(ti, tc->pool->sectors_per_block);
if (r) if (r)
goto bad_thin_open; goto bad_target_max_io_len;
ti->num_flush_bios = 1; ti->num_flush_bios = 1;
ti->flush_supported = true; ti->flush_supported = true;
...@@ -2926,6 +2935,8 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) ...@@ -2926,6 +2935,8 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
return 0; return 0;
bad_target_max_io_len:
dm_pool_close_thin_device(tc->td);
bad_thin_open: bad_thin_open:
__pool_dec(tc->pool); __pool_dec(tc->pool);
bad_pool_lookup: bad_pool_lookup:
......
...@@ -680,6 +680,8 @@ int dm_sm_metadata_create(struct dm_space_map *sm, ...@@ -680,6 +680,8 @@ int dm_sm_metadata_create(struct dm_space_map *sm,
if (r) if (r)
return r; return r;
if (nr_blocks > DM_SM_METADATA_MAX_BLOCKS)
nr_blocks = DM_SM_METADATA_MAX_BLOCKS;
r = sm_ll_extend(&smm->ll, nr_blocks); r = sm_ll_extend(&smm->ll, nr_blocks);
if (r) if (r)
return r; return r;
......
...@@ -9,6 +9,17 @@ ...@@ -9,6 +9,17 @@
#include "dm-transaction-manager.h" #include "dm-transaction-manager.h"
#define DM_SM_METADATA_BLOCK_SIZE (4096 >> SECTOR_SHIFT)
/*
* The metadata device is currently limited in size.
*
* We have one block of index, which can hold 255 index entries. Each
* index entry contains allocation info about ~16k metadata blocks.
*/
#define DM_SM_METADATA_MAX_BLOCKS (255 * ((1 << 14) - 64))
#define DM_SM_METADATA_MAX_SECTORS (DM_SM_METADATA_MAX_BLOCKS * DM_SM_METADATA_BLOCK_SIZE)
/* /*
* Unfortunately we have to use two-phase construction due to the cycle * Unfortunately we have to use two-phase construction due to the cycle
* between the tm and sm. * between the tm and sm.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册