提交 e93dd910 编写于 作者: L Linus Torvalds

Merge tag 'dm-3.12-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device-mapper fixes from Mike Snitzer:
 "A few fixes for dm-snapshot, a 32 bit fix for dm-stats, a couple error
  handling fixes for dm-multipath.  A fix for the thin provisioning
  target to not expose non-zero discard limits if discards are disabled.

  Lastly, add two DM module parameters which allow users to tune the
  emergency memory reserves that DM mainatins per device -- this helps
  fix a long-standing issue for dm-multipath.  The conservative default
  reserve for request-based dm-multipath devices (256) has proven
  problematic for users with many multipathed SCSI devices but
  relatively little memory.  To responsibly select a smaller value users
  should use the new nr_bios tracepoint info (via commit 75afb352
  "block: Add nr_bios to block_rq_remap tracepoint") to determine the
  peak number of bios their workloads create"

* tag 'dm-3.12-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm: add reserved_bio_based_ios module parameter
  dm: add reserved_rq_based_ios module parameter
  dm: lower bio-based mempool reservation
  dm thin: do not expose non-zero discard limits if discards disabled
  dm mpath: disable WRITE SAME if it fails
  dm-snapshot: fix performance degradation due to small hash size
  dm snapshot: workaround for a false positive lockdep warning
  dm stats: fix possible counter corruption on 32-bit systems
  dm mpath: do not fail path on -ENOSPC
...@@ -19,8 +19,6 @@ ...@@ -19,8 +19,6 @@
#define DM_MSG_PREFIX "io" #define DM_MSG_PREFIX "io"
#define DM_IO_MAX_REGIONS BITS_PER_LONG #define DM_IO_MAX_REGIONS BITS_PER_LONG
#define MIN_IOS 16
#define MIN_BIOS 16
struct dm_io_client { struct dm_io_client {
mempool_t *pool; mempool_t *pool;
...@@ -50,16 +48,17 @@ static struct kmem_cache *_dm_io_cache; ...@@ -50,16 +48,17 @@ static struct kmem_cache *_dm_io_cache;
struct dm_io_client *dm_io_client_create(void) struct dm_io_client *dm_io_client_create(void)
{ {
struct dm_io_client *client; struct dm_io_client *client;
unsigned min_ios = dm_get_reserved_bio_based_ios();
client = kmalloc(sizeof(*client), GFP_KERNEL); client = kmalloc(sizeof(*client), GFP_KERNEL);
if (!client) if (!client)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
client->pool = mempool_create_slab_pool(MIN_IOS, _dm_io_cache); client->pool = mempool_create_slab_pool(min_ios, _dm_io_cache);
if (!client->pool) if (!client->pool)
goto bad; goto bad;
client->bios = bioset_create(MIN_BIOS, 0); client->bios = bioset_create(min_ios, 0);
if (!client->bios) if (!client->bios)
goto bad; goto bad;
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#include <linux/device-mapper.h> #include <linux/device-mapper.h>
#include "dm.h"
#include "dm-path-selector.h" #include "dm-path-selector.h"
#include "dm-uevent.h" #include "dm-uevent.h"
...@@ -116,8 +117,6 @@ struct dm_mpath_io { ...@@ -116,8 +117,6 @@ struct dm_mpath_io {
typedef int (*action_fn) (struct pgpath *pgpath); typedef int (*action_fn) (struct pgpath *pgpath);
#define MIN_IOS 256 /* Mempool size */
static struct kmem_cache *_mpio_cache; static struct kmem_cache *_mpio_cache;
static struct workqueue_struct *kmultipathd, *kmpath_handlerd; static struct workqueue_struct *kmultipathd, *kmpath_handlerd;
...@@ -190,6 +189,7 @@ static void free_priority_group(struct priority_group *pg, ...@@ -190,6 +189,7 @@ static void free_priority_group(struct priority_group *pg,
static struct multipath *alloc_multipath(struct dm_target *ti) static struct multipath *alloc_multipath(struct dm_target *ti)
{ {
struct multipath *m; struct multipath *m;
unsigned min_ios = dm_get_reserved_rq_based_ios();
m = kzalloc(sizeof(*m), GFP_KERNEL); m = kzalloc(sizeof(*m), GFP_KERNEL);
if (m) { if (m) {
...@@ -202,7 +202,7 @@ static struct multipath *alloc_multipath(struct dm_target *ti) ...@@ -202,7 +202,7 @@ static struct multipath *alloc_multipath(struct dm_target *ti)
INIT_WORK(&m->trigger_event, trigger_event); INIT_WORK(&m->trigger_event, trigger_event);
init_waitqueue_head(&m->pg_init_wait); init_waitqueue_head(&m->pg_init_wait);
mutex_init(&m->work_mutex); mutex_init(&m->work_mutex);
m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache); m->mpio_pool = mempool_create_slab_pool(min_ios, _mpio_cache);
if (!m->mpio_pool) { if (!m->mpio_pool) {
kfree(m); kfree(m);
return NULL; return NULL;
...@@ -1268,6 +1268,7 @@ static int noretry_error(int error) ...@@ -1268,6 +1268,7 @@ static int noretry_error(int error)
case -EREMOTEIO: case -EREMOTEIO:
case -EILSEQ: case -EILSEQ:
case -ENODATA: case -ENODATA:
case -ENOSPC:
return 1; return 1;
} }
...@@ -1298,8 +1299,17 @@ static int do_end_io(struct multipath *m, struct request *clone, ...@@ -1298,8 +1299,17 @@ static int do_end_io(struct multipath *m, struct request *clone,
if (!error && !clone->errors) if (!error && !clone->errors)
return 0; /* I/O complete */ return 0; /* I/O complete */
if (noretry_error(error)) if (noretry_error(error)) {
if ((clone->cmd_flags & REQ_WRITE_SAME) &&
!clone->q->limits.max_write_same_sectors) {
struct queue_limits *limits;
/* device doesn't really support WRITE SAME, disable it */
limits = dm_get_queue_limits(dm_table_get_md(m->ti->table));
limits->max_write_same_sectors = 0;
}
return error; return error;
}
if (mpio->pgpath) if (mpio->pgpath)
fail_path(mpio->pgpath); fail_path(mpio->pgpath);
......
...@@ -256,7 +256,7 @@ static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int rw, ...@@ -256,7 +256,7 @@ static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int rw,
*/ */
INIT_WORK_ONSTACK(&req.work, do_metadata); INIT_WORK_ONSTACK(&req.work, do_metadata);
queue_work(ps->metadata_wq, &req.work); queue_work(ps->metadata_wq, &req.work);
flush_work(&req.work); flush_workqueue(ps->metadata_wq);
return req.result; return req.result;
} }
......
...@@ -725,17 +725,16 @@ static int calc_max_buckets(void) ...@@ -725,17 +725,16 @@ static int calc_max_buckets(void)
*/ */
static int init_hash_tables(struct dm_snapshot *s) static int init_hash_tables(struct dm_snapshot *s)
{ {
sector_t hash_size, cow_dev_size, origin_dev_size, max_buckets; sector_t hash_size, cow_dev_size, max_buckets;
/* /*
* Calculate based on the size of the original volume or * Calculate based on the size of the original volume or
* the COW volume... * the COW volume...
*/ */
cow_dev_size = get_dev_size(s->cow->bdev); cow_dev_size = get_dev_size(s->cow->bdev);
origin_dev_size = get_dev_size(s->origin->bdev);
max_buckets = calc_max_buckets(); max_buckets = calc_max_buckets();
hash_size = min(origin_dev_size, cow_dev_size) >> s->store->chunk_shift; hash_size = cow_dev_size >> s->store->chunk_shift;
hash_size = min(hash_size, max_buckets); hash_size = min(hash_size, max_buckets);
if (hash_size < 64) if (hash_size < 64)
......
...@@ -451,19 +451,26 @@ static void dm_stat_for_entry(struct dm_stat *s, size_t entry, ...@@ -451,19 +451,26 @@ static void dm_stat_for_entry(struct dm_stat *s, size_t entry,
struct dm_stat_percpu *p; struct dm_stat_percpu *p;
/* /*
* For strict correctness we should use local_irq_disable/enable * For strict correctness we should use local_irq_save/restore
* instead of preempt_disable/enable. * instead of preempt_disable/enable.
* *
* This is racy if the driver finishes bios from non-interrupt * preempt_disable/enable is racy if the driver finishes bios
* context as well as from interrupt context or from more different * from non-interrupt context as well as from interrupt context
* interrupts. * or from more different interrupts.
* *
* However, the race only results in not counting some events, * On 64-bit architectures the race only results in not counting some
* so it is acceptable. * events, so it is acceptable. On 32-bit architectures the race could
* cause the counter going off by 2^32, so we need to do proper locking
* there.
* *
* part_stat_lock()/part_stat_unlock() have this race too. * part_stat_lock()/part_stat_unlock() have this race too.
*/ */
#if BITS_PER_LONG == 32
unsigned long flags;
local_irq_save(flags);
#else
preempt_disable(); preempt_disable();
#endif
p = &s->stat_percpu[smp_processor_id()][entry]; p = &s->stat_percpu[smp_processor_id()][entry];
if (!end) { if (!end) {
...@@ -478,7 +485,11 @@ static void dm_stat_for_entry(struct dm_stat *s, size_t entry, ...@@ -478,7 +485,11 @@ static void dm_stat_for_entry(struct dm_stat *s, size_t entry,
p->ticks[idx] += duration; p->ticks[idx] += duration;
} }
#if BITS_PER_LONG == 32
local_irq_restore(flags);
#else
preempt_enable(); preempt_enable();
#endif
} }
static void __dm_stat_bio(struct dm_stat *s, unsigned long bi_rw, static void __dm_stat_bio(struct dm_stat *s, unsigned long bi_rw,
......
...@@ -2095,6 +2095,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) ...@@ -2095,6 +2095,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
* them down to the data device. The thin device's discard * them down to the data device. The thin device's discard
* processing will cause mappings to be removed from the btree. * processing will cause mappings to be removed from the btree.
*/ */
ti->discard_zeroes_data_unsupported = true;
if (pf.discard_enabled && pf.discard_passdown) { if (pf.discard_enabled && pf.discard_passdown) {
ti->num_discard_bios = 1; ti->num_discard_bios = 1;
...@@ -2104,7 +2105,6 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) ...@@ -2104,7 +2105,6 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
* thin devices' discard limits consistent). * thin devices' discard limits consistent).
*/ */
ti->discards_supported = true; ti->discards_supported = true;
ti->discard_zeroes_data_unsupported = true;
} }
ti->private = pt; ti->private = pt;
...@@ -2689,8 +2689,16 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits) ...@@ -2689,8 +2689,16 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
* They get transferred to the live pool in bind_control_target() * They get transferred to the live pool in bind_control_target()
* called from pool_preresume(). * called from pool_preresume().
*/ */
if (!pt->adjusted_pf.discard_enabled) if (!pt->adjusted_pf.discard_enabled) {
/*
* Must explicitly disallow stacking discard limits otherwise the
* block layer will stack them if pool's data device has support.
* QUEUE_FLAG_DISCARD wouldn't be set but there is no way for the
* user to see that, so make sure to set all discard limits to 0.
*/
limits->discard_granularity = 0;
return; return;
}
disable_passdown_if_not_supported(pt); disable_passdown_if_not_supported(pt);
...@@ -2826,10 +2834,10 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) ...@@ -2826,10 +2834,10 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
ti->per_bio_data_size = sizeof(struct dm_thin_endio_hook); ti->per_bio_data_size = sizeof(struct dm_thin_endio_hook);
/* In case the pool supports discards, pass them on. */ /* In case the pool supports discards, pass them on. */
ti->discard_zeroes_data_unsupported = true;
if (tc->pool->pf.discard_enabled) { if (tc->pool->pf.discard_enabled) {
ti->discards_supported = true; ti->discards_supported = true;
ti->num_discard_bios = 1; ti->num_discard_bios = 1;
ti->discard_zeroes_data_unsupported = true;
/* Discard bios must be split on a block boundary */ /* Discard bios must be split on a block boundary */
ti->split_discard_bios = true; ti->split_discard_bios = true;
} }
......
...@@ -211,10 +211,55 @@ struct dm_md_mempools { ...@@ -211,10 +211,55 @@ struct dm_md_mempools {
struct bio_set *bs; struct bio_set *bs;
}; };
#define MIN_IOS 256 #define RESERVED_BIO_BASED_IOS 16
#define RESERVED_REQUEST_BASED_IOS 256
#define RESERVED_MAX_IOS 1024
static struct kmem_cache *_io_cache; static struct kmem_cache *_io_cache;
static struct kmem_cache *_rq_tio_cache; static struct kmem_cache *_rq_tio_cache;
/*
* Bio-based DM's mempools' reserved IOs set by the user.
*/
static unsigned reserved_bio_based_ios = RESERVED_BIO_BASED_IOS;
/*
* Request-based DM's mempools' reserved IOs set by the user.
*/
static unsigned reserved_rq_based_ios = RESERVED_REQUEST_BASED_IOS;
static unsigned __dm_get_reserved_ios(unsigned *reserved_ios,
unsigned def, unsigned max)
{
unsigned ios = ACCESS_ONCE(*reserved_ios);
unsigned modified_ios = 0;
if (!ios)
modified_ios = def;
else if (ios > max)
modified_ios = max;
if (modified_ios) {
(void)cmpxchg(reserved_ios, ios, modified_ios);
ios = modified_ios;
}
return ios;
}
unsigned dm_get_reserved_bio_based_ios(void)
{
return __dm_get_reserved_ios(&reserved_bio_based_ios,
RESERVED_BIO_BASED_IOS, RESERVED_MAX_IOS);
}
EXPORT_SYMBOL_GPL(dm_get_reserved_bio_based_ios);
unsigned dm_get_reserved_rq_based_ios(void)
{
return __dm_get_reserved_ios(&reserved_rq_based_ios,
RESERVED_REQUEST_BASED_IOS, RESERVED_MAX_IOS);
}
EXPORT_SYMBOL_GPL(dm_get_reserved_rq_based_ios);
static int __init local_init(void) static int __init local_init(void)
{ {
int r = -ENOMEM; int r = -ENOMEM;
...@@ -2277,6 +2322,17 @@ struct target_type *dm_get_immutable_target_type(struct mapped_device *md) ...@@ -2277,6 +2322,17 @@ struct target_type *dm_get_immutable_target_type(struct mapped_device *md)
return md->immutable_target_type; return md->immutable_target_type;
} }
/*
* The queue_limits are only valid as long as you have a reference
* count on 'md'.
*/
struct queue_limits *dm_get_queue_limits(struct mapped_device *md)
{
BUG_ON(!atomic_read(&md->holders));
return &md->queue->limits;
}
EXPORT_SYMBOL_GPL(dm_get_queue_limits);
/* /*
* Fully initialize a request-based queue (->elevator, ->request_fn, etc). * Fully initialize a request-based queue (->elevator, ->request_fn, etc).
*/ */
...@@ -2862,18 +2918,18 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, u ...@@ -2862,18 +2918,18 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, u
if (type == DM_TYPE_BIO_BASED) { if (type == DM_TYPE_BIO_BASED) {
cachep = _io_cache; cachep = _io_cache;
pool_size = 16; pool_size = dm_get_reserved_bio_based_ios();
front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone); front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone);
} else if (type == DM_TYPE_REQUEST_BASED) { } else if (type == DM_TYPE_REQUEST_BASED) {
cachep = _rq_tio_cache; cachep = _rq_tio_cache;
pool_size = MIN_IOS; pool_size = dm_get_reserved_rq_based_ios();
front_pad = offsetof(struct dm_rq_clone_bio_info, clone); front_pad = offsetof(struct dm_rq_clone_bio_info, clone);
/* per_bio_data_size is not used. See __bind_mempools(). */ /* per_bio_data_size is not used. See __bind_mempools(). */
WARN_ON(per_bio_data_size != 0); WARN_ON(per_bio_data_size != 0);
} else } else
goto out; goto out;
pools->io_pool = mempool_create_slab_pool(MIN_IOS, cachep); pools->io_pool = mempool_create_slab_pool(pool_size, cachep);
if (!pools->io_pool) if (!pools->io_pool)
goto out; goto out;
...@@ -2924,6 +2980,13 @@ module_exit(dm_exit); ...@@ -2924,6 +2980,13 @@ module_exit(dm_exit);
module_param(major, uint, 0); module_param(major, uint, 0);
MODULE_PARM_DESC(major, "The major number of the device mapper"); MODULE_PARM_DESC(major, "The major number of the device mapper");
module_param(reserved_bio_based_ios, uint, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(reserved_bio_based_ios, "Reserved IOs in bio-based mempools");
module_param(reserved_rq_based_ios, uint, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(reserved_rq_based_ios, "Reserved IOs in request-based mempools");
MODULE_DESCRIPTION(DM_NAME " driver"); MODULE_DESCRIPTION(DM_NAME " driver");
MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>"); MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
...@@ -184,6 +184,9 @@ void dm_free_md_mempools(struct dm_md_mempools *pools); ...@@ -184,6 +184,9 @@ void dm_free_md_mempools(struct dm_md_mempools *pools);
/* /*
* Helpers that are used by DM core * Helpers that are used by DM core
*/ */
unsigned dm_get_reserved_bio_based_ios(void);
unsigned dm_get_reserved_rq_based_ios(void);
static inline bool dm_message_test_buffer_overflow(char *result, unsigned maxlen) static inline bool dm_message_test_buffer_overflow(char *result, unsigned maxlen)
{ {
return !maxlen || strlen(result) + 1 >= maxlen; return !maxlen || strlen(result) + 1 >= maxlen;
......
...@@ -406,13 +406,14 @@ int dm_noflush_suspending(struct dm_target *ti); ...@@ -406,13 +406,14 @@ int dm_noflush_suspending(struct dm_target *ti);
union map_info *dm_get_mapinfo(struct bio *bio); union map_info *dm_get_mapinfo(struct bio *bio);
union map_info *dm_get_rq_mapinfo(struct request *rq); union map_info *dm_get_rq_mapinfo(struct request *rq);
struct queue_limits *dm_get_queue_limits(struct mapped_device *md);
/* /*
* Geometry functions. * Geometry functions.
*/ */
int dm_get_geometry(struct mapped_device *md, struct hd_geometry *geo); int dm_get_geometry(struct mapped_device *md, struct hd_geometry *geo);
int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo); int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo);
/*----------------------------------------------------------------- /*-----------------------------------------------------------------
* Functions for manipulating device-mapper tables. * Functions for manipulating device-mapper tables.
*---------------------------------------------------------------*/ *---------------------------------------------------------------*/
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册