提交 929254d8 编写于 作者: L Linus Torvalds

Merge tag 'dm-3.18' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device-mapper updates from Mike Snitzer:
 "I rebased the DM tree ontop of linux-block.git's 'for-3.18/core' at
  the beginning of October because DM core now depends on the newly
  introduced bioset_create_nobvec() interface.

  Summary:

   - fix DM's long-standing excessive use of memory by leveraging the
     new bioset_create_nobvec() interface when creating the DM's bioset

   - fix a few bugs in dm-bufio and dm-log-userspace

   - add DM core support for a DM multipath use-case that requires
     loading DM tables that contain devices that have failed (by
     allowing active and inactive DM tables to share dm_devs)

   - add discard support to the DM raid target; like MD raid456 the user
     must opt-in to raid456 discard support be specifying the
     devices_handle_discard_safely=Y module param"

* tag 'dm-3.18' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm log userspace: fix memory leak in dm_ulog_tfr_init failure path
  dm bufio: when done scanning return from __scan immediately
  dm bufio: update last_accessed when relinking a buffer
  dm raid: add discard support for RAID levels 4, 5 and 6
  dm raid: add discard support for RAID levels 1 and 10
  dm: allow active and inactive tables to share dm_devs
  dm mpath: stop queueing IO when no valid paths exist
  dm: use bioset_create_nobvec()
  dm: remove nr_iovecs parameter from alloc_tio()
......@@ -465,6 +465,7 @@ static void __relink_lru(struct dm_buffer *b, int dirty)
c->n_buffers[dirty]++;
b->list_mode = dirty;
list_move(&b->lru_list, &c->lru[dirty]);
b->last_accessed = jiffies;
}
/*----------------------------------------------------------------
......@@ -1471,9 +1472,9 @@ static long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list) {
freed += __cleanup_old_buffer(b, gfp_mask, 0);
if (!--nr_to_scan)
break;
return freed;
dm_bufio_cond_resched();
}
dm_bufio_cond_resched();
}
return freed;
}
......
......@@ -1418,7 +1418,7 @@ static void retrieve_deps(struct dm_table *table,
deps->count = count;
count = 0;
list_for_each_entry (dd, dm_table_get_devices(table), list)
deps->dev[count++] = huge_encode_dev(dd->dm_dev.bdev->bd_dev);
deps->dev[count++] = huge_encode_dev(dd->dm_dev->bdev->bd_dev);
param->data_size = param->data_start + needed;
}
......
......@@ -272,7 +272,7 @@ int dm_ulog_tfr_init(void)
r = cn_add_callback(&ulog_cn_id, "dmlogusr", cn_ulog_callback);
if (r) {
cn_del_callback(&ulog_cn_id);
kfree(prealloced_cn_msg);
return r;
}
......
......@@ -317,8 +317,10 @@ static void __choose_pgpath(struct multipath *m, size_t nr_bytes)
struct priority_group *pg;
unsigned bypassed = 1;
if (!m->nr_valid_paths)
if (!m->nr_valid_paths) {
m->queue_io = 0;
goto failed;
}
/* Were we instructed to switch PG? */
if (m->next_pg) {
......
/*
* Copyright (C) 2010-2011 Neil Brown
* Copyright (C) 2010-2011 Red Hat, Inc. All rights reserved.
* Copyright (C) 2010-2014 Red Hat, Inc. All rights reserved.
*
* This file is released under the GPL.
*/
......@@ -18,6 +18,8 @@
#define DM_MSG_PREFIX "raid"
static bool devices_handle_discard_safely = false;
/*
* The following flags are used by dm-raid.c to set up the array state.
* They must be cleared before md_run is called.
......@@ -475,6 +477,8 @@ static int validate_raid_redundancy(struct raid_set *rs)
* will form the "stripe"
* [[no]sync] Force or prevent recovery of the
* entire array
* [devices_handle_discard_safely] Allow discards on RAID4/5/6; useful if RAID
* member device(s) properly support TRIM/UNMAP
* [rebuild <idx>] Rebuild the drive indicated by the index
* [daemon_sleep <ms>] Time between bitmap daemon work to
* clear bits
......@@ -1149,6 +1153,49 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
return 0;
}
/*
* Enable/disable discard support on RAID set depending on
* RAID level and discard properties of underlying RAID members.
*/
static void configure_discard_support(struct dm_target *ti, struct raid_set *rs)
{
int i;
bool raid456;
/* Assume discards not supported until after checks below. */
ti->discards_supported = false;
/* RAID level 4,5,6 require discard_zeroes_data for data integrity! */
raid456 = (rs->md.level == 4 || rs->md.level == 5 || rs->md.level == 6);
for (i = 0; i < rs->md.raid_disks; i++) {
struct request_queue *q = bdev_get_queue(rs->dev[i].rdev.bdev);
if (!q || !blk_queue_discard(q))
return;
if (raid456) {
if (!q->limits.discard_zeroes_data)
return;
if (!devices_handle_discard_safely) {
DMERR("raid456 discard support disabled due to discard_zeroes_data uncertainty.");
DMERR("Set dm-raid.devices_handle_discard_safely=Y to override.");
return;
}
}
}
/* All RAID members properly support discards */
ti->discards_supported = true;
/*
* RAID1 and RAID10 personalities require bio splitting,
* RAID0/4/5/6 don't and process large discard bios properly.
*/
ti->split_discard_bios = !!(rs->md.level == 1 || rs->md.level == 10);
ti->num_discard_bios = 1;
}
/*
* Construct a RAID4/5/6 mapping:
* Args:
......@@ -1231,6 +1278,11 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
ti->private = rs;
ti->num_flush_bios = 1;
/*
* Disable/enable discard support on RAID set.
*/
configure_discard_support(ti, rs);
mutex_lock(&rs->md.reconfig_mutex);
ret = md_run(&rs->md);
rs->md.in_sync = 0; /* Assume already marked dirty */
......@@ -1652,7 +1704,7 @@ static void raid_resume(struct dm_target *ti)
static struct target_type raid_target = {
.name = "raid",
.version = {1, 5, 2},
.version = {1, 6, 0},
.module = THIS_MODULE,
.ctr = raid_ctr,
.dtr = raid_dtr,
......@@ -1683,6 +1735,10 @@ static void __exit dm_raid_exit(void)
module_init(dm_raid_init);
module_exit(dm_raid_exit);
module_param(devices_handle_discard_safely, bool, 0644);
MODULE_PARM_DESC(devices_handle_discard_safely,
"Set to Y if all devices in each array reliably return zeroes on reads from discarded regions");
MODULE_DESCRIPTION(DM_NAME " raid4/5/6 target");
MODULE_ALIAS("dm-raid1");
MODULE_ALIAS("dm-raid10");
......
......@@ -210,15 +210,16 @@ int dm_table_create(struct dm_table **result, fmode_t mode,
return 0;
}
static void free_devices(struct list_head *devices)
static void free_devices(struct list_head *devices, struct mapped_device *md)
{
struct list_head *tmp, *next;
list_for_each_safe(tmp, next, devices) {
struct dm_dev_internal *dd =
list_entry(tmp, struct dm_dev_internal, list);
DMWARN("dm_table_destroy: dm_put_device call missing for %s",
dd->dm_dev.name);
DMWARN("%s: dm_table_destroy: dm_put_device call missing for %s",
dm_device_name(md), dd->dm_dev->name);
dm_put_table_device(md, dd->dm_dev);
kfree(dd);
}
}
......@@ -247,7 +248,7 @@ void dm_table_destroy(struct dm_table *t)
vfree(t->highs);
/* free the device list */
free_devices(&t->devices);
free_devices(&t->devices, t->md);
dm_free_md_mempools(t->mempools);
......@@ -262,52 +263,12 @@ static struct dm_dev_internal *find_device(struct list_head *l, dev_t dev)
struct dm_dev_internal *dd;
list_for_each_entry (dd, l, list)
if (dd->dm_dev.bdev->bd_dev == dev)
if (dd->dm_dev->bdev->bd_dev == dev)
return dd;
return NULL;
}
/*
* Open a device so we can use it as a map destination.
*/
static int open_dev(struct dm_dev_internal *d, dev_t dev,
struct mapped_device *md)
{
static char *_claim_ptr = "I belong to device-mapper";
struct block_device *bdev;
int r;
BUG_ON(d->dm_dev.bdev);
bdev = blkdev_get_by_dev(dev, d->dm_dev.mode | FMODE_EXCL, _claim_ptr);
if (IS_ERR(bdev))
return PTR_ERR(bdev);
r = bd_link_disk_holder(bdev, dm_disk(md));
if (r) {
blkdev_put(bdev, d->dm_dev.mode | FMODE_EXCL);
return r;
}
d->dm_dev.bdev = bdev;
return 0;
}
/*
* Close a device that we've been using.
*/
static void close_dev(struct dm_dev_internal *d, struct mapped_device *md)
{
if (!d->dm_dev.bdev)
return;
bd_unlink_disk_holder(d->dm_dev.bdev, dm_disk(md));
blkdev_put(d->dm_dev.bdev, d->dm_dev.mode | FMODE_EXCL);
d->dm_dev.bdev = NULL;
}
/*
* If possible, this checks an area of a destination device is invalid.
*/
......@@ -386,19 +347,17 @@ static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode,
struct mapped_device *md)
{
int r;
struct dm_dev_internal dd_new, dd_old;
struct dm_dev *old_dev, *new_dev;
dd_new = dd_old = *dd;
old_dev = dd->dm_dev;
dd_new.dm_dev.mode |= new_mode;
dd_new.dm_dev.bdev = NULL;
r = open_dev(&dd_new, dd->dm_dev.bdev->bd_dev, md);
r = dm_get_table_device(md, dd->dm_dev->bdev->bd_dev,
dd->dm_dev->mode | new_mode, &new_dev);
if (r)
return r;
dd->dm_dev.mode |= new_mode;
close_dev(&dd_old, md);
dd->dm_dev = new_dev;
dm_put_table_device(md, old_dev);
return 0;
}
......@@ -440,27 +399,22 @@ int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
if (!dd)
return -ENOMEM;
dd->dm_dev.mode = mode;
dd->dm_dev.bdev = NULL;
if ((r = open_dev(dd, dev, t->md))) {
if ((r = dm_get_table_device(t->md, dev, mode, &dd->dm_dev))) {
kfree(dd);
return r;
}
format_dev_t(dd->dm_dev.name, dev);
atomic_set(&dd->count, 0);
list_add(&dd->list, &t->devices);
} else if (dd->dm_dev.mode != (mode | dd->dm_dev.mode)) {
} else if (dd->dm_dev->mode != (mode | dd->dm_dev->mode)) {
r = upgrade_mode(dd, mode, t->md);
if (r)
return r;
}
atomic_inc(&dd->count);
*result = &dd->dm_dev;
*result = dd->dm_dev;
return 0;
}
EXPORT_SYMBOL(dm_get_device);
......@@ -505,11 +459,23 @@ static int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
*/
void dm_put_device(struct dm_target *ti, struct dm_dev *d)
{
struct dm_dev_internal *dd = container_of(d, struct dm_dev_internal,
dm_dev);
int found = 0;
struct list_head *devices = &ti->table->devices;
struct dm_dev_internal *dd;
list_for_each_entry(dd, devices, list) {
if (dd->dm_dev == d) {
found = 1;
break;
}
}
if (!found) {
DMWARN("%s: device %s not in table devices list",
dm_device_name(ti->table->md), d->name);
return;
}
if (atomic_dec_and_test(&dd->count)) {
close_dev(dd, ti->table->md);
dm_put_table_device(ti->table->md, d);
list_del(&dd->list);
kfree(dd);
}
......@@ -906,7 +872,7 @@ static int dm_table_set_type(struct dm_table *t)
/* Non-request-stackable devices can't be used for request-based dm */
devices = dm_table_get_devices(t);
list_for_each_entry(dd, devices, list) {
if (!blk_queue_stackable(bdev_get_queue(dd->dm_dev.bdev))) {
if (!blk_queue_stackable(bdev_get_queue(dd->dm_dev->bdev))) {
DMWARN("table load rejected: including"
" non-request-stackable devices");
return -EINVAL;
......@@ -1043,7 +1009,7 @@ static struct gendisk * dm_table_get_integrity_disk(struct dm_table *t,
struct gendisk *prev_disk = NULL, *template_disk = NULL;
list_for_each_entry(dd, devices, list) {
template_disk = dd->dm_dev.bdev->bd_disk;
template_disk = dd->dm_dev->bdev->bd_disk;
if (!blk_get_integrity(template_disk))
goto no_integrity;
if (!match_all && !blk_integrity_is_initialized(template_disk))
......@@ -1629,7 +1595,7 @@ int dm_table_any_congested(struct dm_table *t, int bdi_bits)
int r = 0;
list_for_each_entry(dd, devices, list) {
struct request_queue *q = bdev_get_queue(dd->dm_dev.bdev);
struct request_queue *q = bdev_get_queue(dd->dm_dev->bdev);
char b[BDEVNAME_SIZE];
if (likely(q))
......@@ -1637,7 +1603,7 @@ int dm_table_any_congested(struct dm_table *t, int bdi_bits)
else
DMWARN_LIMIT("%s: any_congested: nonexistent device %s",
dm_device_name(t->md),
bdevname(dd->dm_dev.bdev, b));
bdevname(dd->dm_dev->bdev, b));
}
list_for_each_entry(cb, &t->target_callbacks, list)
......
......@@ -142,6 +142,9 @@ struct mapped_device {
*/
struct dm_table *map;
struct list_head table_devices;
struct mutex table_devices_lock;
unsigned long flags;
struct request_queue *queue;
......@@ -212,6 +215,12 @@ struct dm_md_mempools {
struct bio_set *bs;
};
struct table_device {
struct list_head list;
atomic_t count;
struct dm_dev dm_dev;
};
#define RESERVED_BIO_BASED_IOS 16
#define RESERVED_REQUEST_BASED_IOS 256
#define RESERVED_MAX_IOS 1024
......@@ -669,6 +678,120 @@ static void dm_put_live_table_fast(struct mapped_device *md) __releases(RCU)
rcu_read_unlock();
}
/*
* Open a table device so we can use it as a map destination.
*/
static int open_table_device(struct table_device *td, dev_t dev,
struct mapped_device *md)
{
static char *_claim_ptr = "I belong to device-mapper";
struct block_device *bdev;
int r;
BUG_ON(td->dm_dev.bdev);
bdev = blkdev_get_by_dev(dev, td->dm_dev.mode | FMODE_EXCL, _claim_ptr);
if (IS_ERR(bdev))
return PTR_ERR(bdev);
r = bd_link_disk_holder(bdev, dm_disk(md));
if (r) {
blkdev_put(bdev, td->dm_dev.mode | FMODE_EXCL);
return r;
}
td->dm_dev.bdev = bdev;
return 0;
}
/*
* Close a table device that we've been using.
*/
static void close_table_device(struct table_device *td, struct mapped_device *md)
{
if (!td->dm_dev.bdev)
return;
bd_unlink_disk_holder(td->dm_dev.bdev, dm_disk(md));
blkdev_put(td->dm_dev.bdev, td->dm_dev.mode | FMODE_EXCL);
td->dm_dev.bdev = NULL;
}
static struct table_device *find_table_device(struct list_head *l, dev_t dev,
fmode_t mode) {
struct table_device *td;
list_for_each_entry(td, l, list)
if (td->dm_dev.bdev->bd_dev == dev && td->dm_dev.mode == mode)
return td;
return NULL;
}
int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode,
struct dm_dev **result) {
int r;
struct table_device *td;
mutex_lock(&md->table_devices_lock);
td = find_table_device(&md->table_devices, dev, mode);
if (!td) {
td = kmalloc(sizeof(*td), GFP_KERNEL);
if (!td) {
mutex_unlock(&md->table_devices_lock);
return -ENOMEM;
}
td->dm_dev.mode = mode;
td->dm_dev.bdev = NULL;
if ((r = open_table_device(td, dev, md))) {
mutex_unlock(&md->table_devices_lock);
kfree(td);
return r;
}
format_dev_t(td->dm_dev.name, dev);
atomic_set(&td->count, 0);
list_add(&td->list, &md->table_devices);
}
atomic_inc(&td->count);
mutex_unlock(&md->table_devices_lock);
*result = &td->dm_dev;
return 0;
}
EXPORT_SYMBOL_GPL(dm_get_table_device);
void dm_put_table_device(struct mapped_device *md, struct dm_dev *d)
{
struct table_device *td = container_of(d, struct table_device, dm_dev);
mutex_lock(&md->table_devices_lock);
if (atomic_dec_and_test(&td->count)) {
close_table_device(td, md);
list_del(&td->list);
kfree(td);
}
mutex_unlock(&md->table_devices_lock);
}
EXPORT_SYMBOL(dm_put_table_device);
static void free_table_devices(struct list_head *devices)
{
struct list_head *tmp, *next;
list_for_each_safe(tmp, next, devices) {
struct table_device *td = list_entry(tmp, struct table_device, list);
DMWARN("dm_destroy: %s still exists with %d references",
td->dm_dev.name, atomic_read(&td->count));
kfree(td);
}
}
/*
* Get the geometry associated with a dm device
*/
......@@ -1249,13 +1372,13 @@ static void clone_bio(struct dm_target_io *tio, struct bio *bio,
}
static struct dm_target_io *alloc_tio(struct clone_info *ci,
struct dm_target *ti, int nr_iovecs,
struct dm_target *ti,
unsigned target_bio_nr)
{
struct dm_target_io *tio;
struct bio *clone;
clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, ci->md->bs);
clone = bio_alloc_bioset(GFP_NOIO, 0, ci->md->bs);
tio = container_of(clone, struct dm_target_io, clone);
tio->io = ci->io;
......@@ -1269,17 +1392,12 @@ static void __clone_and_map_simple_bio(struct clone_info *ci,
struct dm_target *ti,
unsigned target_bio_nr, unsigned *len)
{
struct dm_target_io *tio = alloc_tio(ci, ti, ci->bio->bi_max_vecs, target_bio_nr);
struct dm_target_io *tio = alloc_tio(ci, ti, target_bio_nr);
struct bio *clone = &tio->clone;
tio->len_ptr = len;
/*
* Discard requests require the bio's inline iovecs be initialized.
* ci->bio->bi_max_vecs is BIO_INLINE_VECS anyway, for both flush
* and discard, so no need for concern about wasted bvec allocations.
*/
__bio_clone_fast(clone, ci->bio);
__bio_clone_fast(clone, ci->bio);
if (len)
bio_setup_sector(clone, ci->sector, *len);
......@@ -1322,7 +1440,7 @@ static void __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti
num_target_bios = ti->num_write_bios(ti, bio);
for (target_bio_nr = 0; target_bio_nr < num_target_bios; target_bio_nr++) {
tio = alloc_tio(ci, ti, 0, target_bio_nr);
tio = alloc_tio(ci, ti, target_bio_nr);
tio->len_ptr = len;
clone_bio(tio, bio, sector, *len);
__map_bio(tio);
......@@ -1949,12 +2067,14 @@ static struct mapped_device *alloc_dev(int minor)
md->type = DM_TYPE_NONE;
mutex_init(&md->suspend_lock);
mutex_init(&md->type_lock);
mutex_init(&md->table_devices_lock);
spin_lock_init(&md->deferred_lock);
atomic_set(&md->holders, 1);
atomic_set(&md->open_count, 0);
atomic_set(&md->event_nr, 0);
atomic_set(&md->uevent_seq, 0);
INIT_LIST_HEAD(&md->uevent_list);
INIT_LIST_HEAD(&md->table_devices);
spin_lock_init(&md->uevent_lock);
md->queue = blk_alloc_queue(GFP_KERNEL);
......@@ -2040,6 +2160,7 @@ static void free_dev(struct mapped_device *md)
blk_integrity_unregister(md->disk);
del_gendisk(md->disk);
cleanup_srcu_struct(&md->io_barrier);
free_table_devices(&md->table_devices);
free_minor(minor);
spin_lock(&_minor_lock);
......@@ -2900,7 +3021,7 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, u
if (!pools->io_pool)
goto out;
pools->bs = bioset_create(pool_size, front_pad);
pools->bs = bioset_create_nobvec(pool_size, front_pad);
if (!pools->bs)
goto out;
......
......@@ -44,7 +44,7 @@
struct dm_dev_internal {
struct list_head list;
atomic_t count;
struct dm_dev dm_dev;
struct dm_dev *dm_dev;
};
struct dm_table;
......@@ -188,6 +188,9 @@ int dm_cancel_deferred_remove(struct mapped_device *md);
int dm_request_based(struct mapped_device *md);
sector_t dm_get_size(struct mapped_device *md);
struct request_queue *dm_get_md_queue(struct mapped_device *md);
int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode,
struct dm_dev **result);
void dm_put_table_device(struct mapped_device *md, struct dm_dev *d);
struct dm_stats *dm_get_stats(struct mapped_device *md);
int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
......
......@@ -267,9 +267,9 @@ enum {
#define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
#define DM_VERSION_MAJOR 4
#define DM_VERSION_MINOR 27
#define DM_VERSION_MINOR 28
#define DM_VERSION_PATCHLEVEL 0
#define DM_VERSION_EXTRA "-ioctl (2013-10-30)"
#define DM_VERSION_EXTRA "-ioctl (2014-09-17)"
/* Status bits */
#define DM_READONLY_FLAG (1 << 0) /* In/Out */
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册