diff --git a/Documentation/device-mapper/dm-raid.txt b/Documentation/device-mapper/dm-raid.txt index cb12af3b51c21b3b11674fb9b15312c9cee827cb..df2d636b60880cb36c105f68c953d60e50110c77 100644 --- a/Documentation/device-mapper/dm-raid.txt +++ b/Documentation/device-mapper/dm-raid.txt @@ -209,6 +209,37 @@ include: "repair" - Initiate a repair of the array. "reshape"- Currently unsupported (-EINVAL). + +Discard Support +--------------- +The implementation of discard support among hardware vendors varies. +When a block is discarded, some storage devices will return zeroes when +the block is read. These devices set the 'discard_zeroes_data' +attribute. Other devices will return random data. Confusingly, some +devices that advertise 'discard_zeroes_data' will not reliably return +zeroes when discarded blocks are read! Since RAID 4/5/6 uses blocks +from a number of devices to calculate parity blocks and (for performance +reasons) relies on 'discard_zeroes_data' being reliable, it is important +that the devices be consistent. Blocks may be discarded in the middle +of a RAID 4/5/6 stripe and if subsequent read results are not +consistent, the parity blocks may be calculated differently at any time; +making the parity blocks useless for redundancy. It is important to +understand how your hardware behaves with discards if you are going to +enable discards with RAID 4/5/6. + +Since the behavior of storage devices is unreliable in this respect, +even when reporting 'discard_zeroes_data', by default RAID 4/5/6 +discard support is disabled -- this ensures data integrity at the +expense of losing some performance. + +Storage devices that properly support 'discard_zeroes_data' are +increasingly whitelisted in the kernel and can thus be trusted. + +For trusted devices, the following dm-raid module parameter can be set +to safely enable discard support for RAID 4/5/6: + 'devices_handle_discards_safely' + + Version History --------------- 1.0.0 Initial version. Support for RAID 4/5/6 diff --git a/Documentation/device-mapper/statistics.txt b/Documentation/device-mapper/statistics.txt index 4919b2dfd1b391c4169de1b66f5e78bd9ba2d9cc..6f5ef944ca4c8fcdbe9ce8cb06b0bddf0bcdfe59 100644 --- a/Documentation/device-mapper/statistics.txt +++ b/Documentation/device-mapper/statistics.txt @@ -121,6 +121,10 @@ Messages Output format: : + + precise_timestamps histogram:n1,n2,n3,... + + The strings "precise_timestamps" and "histogram" are printed only + if they were specified when creating the region. @stats_print [ ] diff --git a/drivers/md/dm-cache-policy-smq.c b/drivers/md/dm-cache-policy-smq.c index 200366c62231dd5f8f38f74284a42810c8603d19..1ffbeb1b3ea6088fd07bed9c68352821223c0b84 100644 --- a/drivers/md/dm-cache-policy-smq.c +++ b/drivers/md/dm-cache-policy-smq.c @@ -772,7 +772,7 @@ struct smq_policy { struct dm_cache_policy policy; /* protects everything */ - struct mutex lock; + spinlock_t lock; dm_cblock_t cache_size; sector_t cache_block_size; @@ -807,13 +807,7 @@ struct smq_policy { /* * Keeps track of time, incremented by the core. We use this to * avoid attributing multiple hits within the same tick. - * - * Access to tick_protected should be done with the spin lock held. - * It's copied to tick at the start of the map function (within the - * mutex). */ - spinlock_t tick_lock; - unsigned tick_protected; unsigned tick; /* @@ -1296,46 +1290,20 @@ static void smq_destroy(struct dm_cache_policy *p) kfree(mq); } -static void copy_tick(struct smq_policy *mq) -{ - unsigned long flags, tick; - - spin_lock_irqsave(&mq->tick_lock, flags); - tick = mq->tick_protected; - if (tick != mq->tick) { - update_sentinels(mq); - end_hotspot_period(mq); - end_cache_period(mq); - mq->tick = tick; - } - spin_unlock_irqrestore(&mq->tick_lock, flags); -} - -static bool maybe_lock(struct smq_policy *mq, bool can_block) -{ - if (can_block) { - mutex_lock(&mq->lock); - return true; - } else - return mutex_trylock(&mq->lock); -} - static int smq_map(struct dm_cache_policy *p, dm_oblock_t oblock, bool can_block, bool can_migrate, bool fast_promote, struct bio *bio, struct policy_locker *locker, struct policy_result *result) { int r; + unsigned long flags; struct smq_policy *mq = to_smq_policy(p); result->op = POLICY_MISS; - if (!maybe_lock(mq, can_block)) - return -EWOULDBLOCK; - - copy_tick(mq); + spin_lock_irqsave(&mq->lock, flags); r = map(mq, bio, oblock, can_migrate, fast_promote, locker, result); - mutex_unlock(&mq->lock); + spin_unlock_irqrestore(&mq->lock, flags); return r; } @@ -1343,20 +1311,18 @@ static int smq_map(struct dm_cache_policy *p, dm_oblock_t oblock, static int smq_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock) { int r; + unsigned long flags; struct smq_policy *mq = to_smq_policy(p); struct entry *e; - if (!mutex_trylock(&mq->lock)) - return -EWOULDBLOCK; - + spin_lock_irqsave(&mq->lock, flags); e = h_lookup(&mq->table, oblock); if (e) { *cblock = infer_cblock(mq, e); r = 0; } else r = -ENOENT; - - mutex_unlock(&mq->lock); + spin_unlock_irqrestore(&mq->lock, flags); return r; } @@ -1375,20 +1341,22 @@ static void __smq_set_clear_dirty(struct smq_policy *mq, dm_oblock_t oblock, boo static void smq_set_dirty(struct dm_cache_policy *p, dm_oblock_t oblock) { + unsigned long flags; struct smq_policy *mq = to_smq_policy(p); - mutex_lock(&mq->lock); + spin_lock_irqsave(&mq->lock, flags); __smq_set_clear_dirty(mq, oblock, true); - mutex_unlock(&mq->lock); + spin_unlock_irqrestore(&mq->lock, flags); } static void smq_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock) { struct smq_policy *mq = to_smq_policy(p); + unsigned long flags; - mutex_lock(&mq->lock); + spin_lock_irqsave(&mq->lock, flags); __smq_set_clear_dirty(mq, oblock, false); - mutex_unlock(&mq->lock); + spin_unlock_irqrestore(&mq->lock, flags); } static int smq_load_mapping(struct dm_cache_policy *p, @@ -1433,14 +1401,14 @@ static int smq_walk_mappings(struct dm_cache_policy *p, policy_walk_fn fn, struct smq_policy *mq = to_smq_policy(p); int r = 0; - mutex_lock(&mq->lock); - + /* + * We don't need to lock here since this method is only called once + * the IO has stopped. + */ r = smq_save_hints(mq, &mq->clean, fn, context); if (!r) r = smq_save_hints(mq, &mq->dirty, fn, context); - mutex_unlock(&mq->lock); - return r; } @@ -1458,10 +1426,11 @@ static void __remove_mapping(struct smq_policy *mq, dm_oblock_t oblock) static void smq_remove_mapping(struct dm_cache_policy *p, dm_oblock_t oblock) { struct smq_policy *mq = to_smq_policy(p); + unsigned long flags; - mutex_lock(&mq->lock); + spin_lock_irqsave(&mq->lock, flags); __remove_mapping(mq, oblock); - mutex_unlock(&mq->lock); + spin_unlock_irqrestore(&mq->lock, flags); } static int __remove_cblock(struct smq_policy *mq, dm_cblock_t cblock) @@ -1480,11 +1449,12 @@ static int __remove_cblock(struct smq_policy *mq, dm_cblock_t cblock) static int smq_remove_cblock(struct dm_cache_policy *p, dm_cblock_t cblock) { int r; + unsigned long flags; struct smq_policy *mq = to_smq_policy(p); - mutex_lock(&mq->lock); + spin_lock_irqsave(&mq->lock, flags); r = __remove_cblock(mq, cblock); - mutex_unlock(&mq->lock); + spin_unlock_irqrestore(&mq->lock, flags); return r; } @@ -1537,11 +1507,12 @@ static int smq_writeback_work(struct dm_cache_policy *p, dm_oblock_t *oblock, dm_cblock_t *cblock, bool critical_only) { int r; + unsigned long flags; struct smq_policy *mq = to_smq_policy(p); - mutex_lock(&mq->lock); + spin_lock_irqsave(&mq->lock, flags); r = __smq_writeback_work(mq, oblock, cblock, critical_only); - mutex_unlock(&mq->lock); + spin_unlock_irqrestore(&mq->lock, flags); return r; } @@ -1562,21 +1533,23 @@ static void __force_mapping(struct smq_policy *mq, static void smq_force_mapping(struct dm_cache_policy *p, dm_oblock_t current_oblock, dm_oblock_t new_oblock) { + unsigned long flags; struct smq_policy *mq = to_smq_policy(p); - mutex_lock(&mq->lock); + spin_lock_irqsave(&mq->lock, flags); __force_mapping(mq, current_oblock, new_oblock); - mutex_unlock(&mq->lock); + spin_unlock_irqrestore(&mq->lock, flags); } static dm_cblock_t smq_residency(struct dm_cache_policy *p) { dm_cblock_t r; + unsigned long flags; struct smq_policy *mq = to_smq_policy(p); - mutex_lock(&mq->lock); + spin_lock_irqsave(&mq->lock, flags); r = to_cblock(mq->cache_alloc.nr_allocated); - mutex_unlock(&mq->lock); + spin_unlock_irqrestore(&mq->lock, flags); return r; } @@ -1586,15 +1559,12 @@ static void smq_tick(struct dm_cache_policy *p, bool can_block) struct smq_policy *mq = to_smq_policy(p); unsigned long flags; - spin_lock_irqsave(&mq->tick_lock, flags); - mq->tick_protected++; - spin_unlock_irqrestore(&mq->tick_lock, flags); - - if (can_block) { - mutex_lock(&mq->lock); - copy_tick(mq); - mutex_unlock(&mq->lock); - } + spin_lock_irqsave(&mq->lock, flags); + mq->tick++; + update_sentinels(mq); + end_hotspot_period(mq); + end_cache_period(mq); + spin_unlock_irqrestore(&mq->lock, flags); } /* Init the policy plugin interface function pointers. */ @@ -1694,10 +1664,8 @@ static struct dm_cache_policy *smq_create(dm_cblock_t cache_size, } else mq->cache_hit_bits = NULL; - mq->tick_protected = 0; mq->tick = 0; - mutex_init(&mq->lock); - spin_lock_init(&mq->tick_lock); + spin_lock_init(&mq->lock); q_init(&mq->hotspot, &mq->es, NR_HOTSPOT_LEVELS); mq->hotspot.nr_top_levels = 8; diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 7245071778dba2f28bac62057f7b2bb444fc56e3..dd90d1236f4a46be1cf28d7ef8ebc5d64235adf7 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -424,7 +424,6 @@ static void free_migration(struct dm_cache_migration *mg) wake_up(&cache->migration_wait); mempool_free(mg, cache->migration_pool); - wake_worker(cache); } static int prealloc_data_structs(struct cache *cache, struct prealloc *p) @@ -1064,14 +1063,6 @@ static void dec_io_migrations(struct cache *cache) atomic_dec(&cache->nr_io_migrations); } -static void __cell_release(struct cache *cache, struct dm_bio_prison_cell *cell, - bool holder, struct bio_list *bios) -{ - (holder ? dm_cell_release : dm_cell_release_no_holder) - (cache->prison, cell, bios); - free_prison_cell(cache, cell); -} - static bool discard_or_flush(struct bio *bio) { return bio->bi_rw & (REQ_FLUSH | REQ_FUA | REQ_DISCARD); @@ -1079,14 +1070,13 @@ static bool discard_or_flush(struct bio *bio) static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell) { - if (discard_or_flush(cell->holder)) + if (discard_or_flush(cell->holder)) { /* - * We have to handle these bios - * individually. + * We have to handle these bios individually. */ - __cell_release(cache, cell, true, &cache->deferred_bios); - - else + dm_cell_release(cache->prison, cell, &cache->deferred_bios); + free_prison_cell(cache, cell); + } else list_add_tail(&cell->user_list, &cache->deferred_cells); } @@ -1113,7 +1103,7 @@ static void cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell, boo static void cell_error_with_code(struct cache *cache, struct dm_bio_prison_cell *cell, int err) { dm_cell_error(cache->prison, cell, err); - dm_bio_prison_free_cell(cache->prison, cell); + free_prison_cell(cache, cell); } static void cell_requeue(struct cache *cache, struct dm_bio_prison_cell *cell) @@ -1123,8 +1113,11 @@ static void cell_requeue(struct cache *cache, struct dm_bio_prison_cell *cell) static void free_io_migration(struct dm_cache_migration *mg) { - dec_io_migrations(mg->cache); + struct cache *cache = mg->cache; + + dec_io_migrations(cache); free_migration(mg); + wake_worker(cache); } static void migration_failure(struct dm_cache_migration *mg) @@ -1351,16 +1344,18 @@ static void issue_discard(struct dm_cache_migration *mg) { dm_dblock_t b, e; struct bio *bio = mg->new_ocell->holder; + struct cache *cache = mg->cache; - calc_discard_block_range(mg->cache, bio, &b, &e); + calc_discard_block_range(cache, bio, &b, &e); while (b != e) { - set_discard(mg->cache, b); + set_discard(cache, b); b = to_dblock(from_dblock(b) + 1); } bio_endio(bio); - cell_defer(mg->cache, mg->new_ocell, false); + cell_defer(cache, mg->new_ocell, false); free_migration(mg); + wake_worker(cache); } static void issue_copy_or_discard(struct dm_cache_migration *mg) @@ -1729,6 +1724,8 @@ static void remap_cell_to_origin_clear_discard(struct cache *cache, remap_to_origin(cache, bio); issue(cache, bio); } + + free_prison_cell(cache, cell); } static void remap_cell_to_cache_dirty(struct cache *cache, struct dm_bio_prison_cell *cell, @@ -1763,6 +1760,8 @@ static void remap_cell_to_cache_dirty(struct cache *cache, struct dm_bio_prison_ remap_to_cache(cache, bio, cblock); issue(cache, bio); } + + free_prison_cell(cache, cell); } /*----------------------------------------------------------------*/ diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index ba5c2105f4e62f4d05c755fca131484f7633b6c1..d60c88df5234a0099292712cc1117dc1e65b78e0 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1811,11 +1811,13 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) } cc->iv_offset = tmpll; - if (dm_get_device(ti, argv[3], dm_table_get_mode(ti->table), &cc->dev)) { + ret = dm_get_device(ti, argv[3], dm_table_get_mode(ti->table), &cc->dev); + if (ret) { ti->error = "Device lookup failed"; goto bad; } + ret = -EINVAL; if (sscanf(argv[4], "%llu%c", &tmpll, &dummy) != 1) { ti->error = "Invalid device sector"; goto bad; diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index 57b6a1901c917127eccb8247cd1015a9b588159e..b34f6e27293dcc9e408289c7955af921b4cb8990 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -129,6 +129,7 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv) struct delay_c *dc; unsigned long long tmpll; char dummy; + int ret; if (argc != 3 && argc != 6) { ti->error = "requires exactly 3 or 6 arguments"; @@ -143,6 +144,7 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv) dc->reads = dc->writes = 0; + ret = -EINVAL; if (sscanf(argv[1], "%llu%c", &tmpll, &dummy) != 1) { ti->error = "Invalid device sector"; goto bad; @@ -154,12 +156,14 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; } - if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), - &dc->dev_read)) { + ret = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), + &dc->dev_read); + if (ret) { ti->error = "Device lookup failed"; goto bad; } + ret = -EINVAL; dc->dev_write = NULL; if (argc == 3) goto out; @@ -175,13 +179,15 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad_dev_read; } - if (dm_get_device(ti, argv[3], dm_table_get_mode(ti->table), - &dc->dev_write)) { + ret = dm_get_device(ti, argv[3], dm_table_get_mode(ti->table), + &dc->dev_write); + if (ret) { ti->error = "Write device lookup failed"; goto bad_dev_read; } out: + ret = -EINVAL; dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0); if (!dc->kdelayd_wq) { DMERR("Couldn't start kdelayd"); @@ -208,7 +214,7 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv) dm_put_device(ti, dc->dev_read); bad: kfree(dc); - return -EINVAL; + return ret; } static void delay_dtr(struct dm_target *ti) diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index afab13bd683e0e7bed2c21846155a0a9ce086e6e..645e8b4f808eee3f1f63e2432bf12cf8cbcde961 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -183,6 +183,7 @@ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv) devname = dm_shift_arg(&as); + r = -EINVAL; if (sscanf(dm_shift_arg(&as), "%llu%c", &tmpll, &dummy) != 1) { ti->error = "Invalid device sector"; goto bad; @@ -211,7 +212,8 @@ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (r) goto bad; - if (dm_get_device(ti, devname, dm_table_get_mode(ti->table), &fc->dev)) { + r = dm_get_device(ti, devname, dm_table_get_mode(ti->table), &fc->dev); + if (r) { ti->error = "Device lookup failed"; goto bad; } @@ -224,7 +226,7 @@ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv) bad: kfree(fc); - return -EINVAL; + return r; } static void flakey_dtr(struct dm_target *ti) diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 7dd5fc8e3eeaac8d8d8f148862ecf3c6f298fec2..436f5c9b6aea56dd6652696cc3eac489690c1e8e 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -30,6 +30,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv) struct linear_c *lc; unsigned long long tmp; char dummy; + int ret; if (argc != 2) { ti->error = "Invalid argument count"; @@ -42,13 +43,15 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv) return -ENOMEM; } + ret = -EINVAL; if (sscanf(argv[1], "%llu%c", &tmp, &dummy) != 1) { ti->error = "dm-linear: Invalid device sector"; goto bad; } lc->start = tmp; - if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &lc->dev)) { + ret = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &lc->dev); + if (ret) { ti->error = "dm-linear: Device lookup failed"; goto bad; } @@ -61,7 +64,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv) bad: kfree(lc); - return -EINVAL; + return ret; } static void linear_dtr(struct dm_target *ti) diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index 316cc3fb741f972a075fb56888585ef4ada72468..b2912dbac8bce7356f06eebc8063317c6bc411b0 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -417,6 +417,7 @@ static int log_writes_ctr(struct dm_target *ti, unsigned int argc, char **argv) struct log_writes_c *lc; struct dm_arg_set as; const char *devname, *logdevname; + int ret; as.argc = argc; as.argv = argv; @@ -440,18 +441,22 @@ static int log_writes_ctr(struct dm_target *ti, unsigned int argc, char **argv) atomic_set(&lc->pending_blocks, 0); devname = dm_shift_arg(&as); - if (dm_get_device(ti, devname, dm_table_get_mode(ti->table), &lc->dev)) { + ret = dm_get_device(ti, devname, dm_table_get_mode(ti->table), &lc->dev); + if (ret) { ti->error = "Device lookup failed"; goto bad; } logdevname = dm_shift_arg(&as); - if (dm_get_device(ti, logdevname, dm_table_get_mode(ti->table), &lc->logdev)) { + ret = dm_get_device(ti, logdevname, dm_table_get_mode(ti->table), + &lc->logdev); + if (ret) { ti->error = "Log device lookup failed"; dm_put_device(ti, lc->dev); goto bad; } + ret = -EINVAL; lc->log_kthread = kthread_run(log_writes_kthread, lc, "log-write"); if (!lc->log_kthread) { ti->error = "Couldn't alloc kthread"; @@ -476,7 +481,7 @@ static int log_writes_ctr(struct dm_target *ti, unsigned int argc, char **argv) bad: kfree(lc); - return -EINVAL; + return ret; } static int log_mark(struct log_writes_c *lc, char *data) diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index e1eabfb2f52da0bc017f7be80dc7d8d949dfa52e..f2a363a89629902350649cb818f6159ecda1afd9 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -945,16 +945,18 @@ static int get_mirror(struct mirror_set *ms, struct dm_target *ti, { unsigned long long offset; char dummy; + int ret; if (sscanf(argv[1], "%llu%c", &offset, &dummy) != 1) { ti->error = "Invalid offset"; return -EINVAL; } - if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), - &ms->mirror[mirror].dev)) { + ret = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), + &ms->mirror[mirror].dev); + if (ret) { ti->error = "Device lookup failure"; - return -ENXIO; + return ret; } ms->mirror[mirror].ms = ms; diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index 808b8419bc48d42423d750eb64d178bfa77509e5..bf71583296f732b6b78c71ae67dee5222824b2f8 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -533,7 +533,7 @@ static int read_exceptions(struct pstore *ps, chunk = area_location(ps, ps->current_area); area = dm_bufio_read(client, chunk, &bp); - if (unlikely(IS_ERR(area))) { + if (IS_ERR(area)) { r = PTR_ERR(area); goto ret_destroy_bufio; } diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index d10b6876018eee6b62ca7f937022d3648c6295c0..c0bcd6516dfe17f8e7a06ec8c66d1e1d5801f133 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -63,6 +63,13 @@ struct dm_snapshot { */ int valid; + /* + * The snapshot overflowed because of a write to the snapshot device. + * We don't have to invalidate the snapshot in this case, but we need + * to prevent further writes. + */ + int snapshot_overflowed; + /* Origin writes don't trigger exceptions until this is set */ int active; @@ -1152,6 +1159,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) s->ti = ti; s->valid = 1; + s->snapshot_overflowed = 0; s->active = 0; atomic_set(&s->pending_exceptions_count, 0); s->exception_start_sequence = 0; @@ -1301,6 +1309,7 @@ static void __handover_exceptions(struct dm_snapshot *snap_src, snap_dest->ti->max_io_len = snap_dest->store->chunk_size; snap_dest->valid = snap_src->valid; + snap_dest->snapshot_overflowed = snap_src->snapshot_overflowed; /* * Set source invalid to ensure it receives no further I/O. @@ -1691,7 +1700,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) * to copy an exception */ down_write(&s->lock); - if (!s->valid) { + if (!s->valid || (unlikely(s->snapshot_overflowed) && bio_rw(bio) == WRITE)) { r = -EIO; goto out_unlock; } @@ -1715,7 +1724,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) pe = alloc_pending_exception(s); down_write(&s->lock); - if (!s->valid) { + if (!s->valid || s->snapshot_overflowed) { free_pending_exception(pe); r = -EIO; goto out_unlock; @@ -1730,7 +1739,8 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) pe = __find_pending_exception(s, pe, chunk); if (!pe) { - __invalidate_snapshot(s, -ENOMEM); + s->snapshot_overflowed = 1; + DMERR("Snapshot overflowed: Unable to allocate exception."); r = -EIO; goto out_unlock; } @@ -1990,6 +2000,8 @@ static void snapshot_status(struct dm_target *ti, status_type_t type, DMEMIT("Invalid"); else if (snap->merge_failed) DMEMIT("Merge failed"); + else if (snap->snapshot_overflowed) + DMEMIT("Overflow"); else { if (snap->store->type->usage) { sector_t total_sectors, sectors_allocated, @@ -2353,7 +2365,7 @@ static struct target_type origin_target = { static struct target_type snapshot_target = { .name = "snapshot", - .version = {1, 13, 0}, + .version = {1, 14, 0}, .module = THIS_MODULE, .ctr = snapshot_ctr, .dtr = snapshot_dtr, diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c index 8a8b48fa901ab162696d89c94679d78bff6f7d9c..8289804ccd998a1d4d9be9fce2a32bd4bfa6cad8 100644 --- a/drivers/md/dm-stats.c +++ b/drivers/md/dm-stats.c @@ -457,12 +457,24 @@ static int dm_stats_list(struct dm_stats *stats, const char *program, list_for_each_entry(s, &stats->list, list_entry) { if (!program || !strcmp(program, s->program_id)) { len = s->end - s->start; - DMEMIT("%d: %llu+%llu %llu %s %s\n", s->id, + DMEMIT("%d: %llu+%llu %llu %s %s", s->id, (unsigned long long)s->start, (unsigned long long)len, (unsigned long long)s->step, s->program_id, s->aux_data); + if (s->stat_flags & STAT_PRECISE_TIMESTAMPS) + DMEMIT(" precise_timestamps"); + if (s->n_histogram_entries) { + unsigned i; + DMEMIT(" histogram:"); + for (i = 0; i < s->n_histogram_entries; i++) { + if (i) + DMEMIT(","); + DMEMIT("%llu", s->histogram_boundaries[i]); + } + } + DMEMIT("\n"); } } mutex_unlock(&stats->mutex); diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 484029db8cba10931a91b0fabfdd58c315904a9d..797ddb900b062079cae7aca92b0b7bad2ead619f 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -75,13 +75,15 @@ static int get_stripe(struct dm_target *ti, struct stripe_c *sc, { unsigned long long start; char dummy; + int ret; if (sscanf(argv[1], "%llu%c", &start, &dummy) != 1) return -EINVAL; - if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), - &sc->stripe[stripe].dev)) - return -ENXIO; + ret = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), + &sc->stripe[stripe].dev); + if (ret) + return ret; sc->stripe[stripe].physical_start = start; diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 271a6624936313863a753aa6ddde802a41bc27be..6578b7bc1fbba5339a740c8447f8bfdf8266fd8f 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -332,9 +332,6 @@ struct thin_c { * * Description: * Asynchronously issue a discard request for the sectors in question. - * NOTE: this variant of blk-core's blkdev_issue_discard() is a stop-gap - * that is being kept local to DM thinp until the block changes to allow - * late bio splitting land upstream. */ static int __blkdev_issue_discard_async(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, unsigned long flags, @@ -342,91 +339,36 @@ static int __blkdev_issue_discard_async(struct block_device *bdev, sector_t sect { struct request_queue *q = bdev_get_queue(bdev); int type = REQ_WRITE | REQ_DISCARD; - unsigned int max_discard_sectors, granularity; - int alignment; struct bio *bio; - int ret = 0; - struct blk_plug plug; - if (!q) + if (!q || !nr_sects) return -ENXIO; if (!blk_queue_discard(q)) return -EOPNOTSUPP; - /* Zero-sector (unknown) and one-sector granularities are the same. */ - granularity = max(q->limits.discard_granularity >> 9, 1U); - alignment = (bdev_discard_alignment(bdev) >> 9) % granularity; - - /* - * Ensure that max_discard_sectors is of the proper - * granularity, so that requests stay aligned after a split. - */ - max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9); - max_discard_sectors -= max_discard_sectors % granularity; - if (unlikely(!max_discard_sectors)) { - /* Avoid infinite loop below. Being cautious never hurts. */ - return -EOPNOTSUPP; - } - if (flags & BLKDEV_DISCARD_SECURE) { if (!blk_queue_secdiscard(q)) return -EOPNOTSUPP; type |= REQ_SECURE; } - blk_start_plug(&plug); - while (nr_sects) { - unsigned int req_sects; - sector_t end_sect, tmp; - - /* - * Required bio_put occurs in bio_endio thanks to bio_chain below - */ - bio = bio_alloc(gfp_mask, 1); - if (!bio) { - ret = -ENOMEM; - break; - } - - req_sects = min_t(sector_t, nr_sects, max_discard_sectors); - - /* - * If splitting a request, and the next starting sector would be - * misaligned, stop the discard at the previous aligned sector. - */ - end_sect = sector + req_sects; - tmp = end_sect; - if (req_sects < nr_sects && - sector_div(tmp, granularity) != alignment) { - end_sect = end_sect - alignment; - sector_div(end_sect, granularity); - end_sect = end_sect * granularity + alignment; - req_sects = end_sect - sector; - } - - bio_chain(bio, parent_bio); - - bio->bi_iter.bi_sector = sector; - bio->bi_bdev = bdev; + /* + * Required bio_put occurs in bio_endio thanks to bio_chain below + */ + bio = bio_alloc(gfp_mask, 1); + if (!bio) + return -ENOMEM; - bio->bi_iter.bi_size = req_sects << 9; - nr_sects -= req_sects; - sector = end_sect; + bio_chain(bio, parent_bio); - submit_bio(type, bio); + bio->bi_iter.bi_sector = sector; + bio->bi_bdev = bdev; + bio->bi_iter.bi_size = nr_sects << 9; - /* - * We can loop for a long time in here, if someone does - * full device discards (like mkfs). Be nice and allow - * us to schedule out to avoid softlocking if preempt - * is disabled. - */ - cond_resched(); - } - blk_finish_plug(&plug); + submit_bio(type, bio); - return ret; + return 0; } static bool block_size_is_power_of_two(struct pool *pool) @@ -1543,9 +1485,8 @@ static void process_discard_cell_no_passdown(struct thin_c *tc, } /* - * FIXME: DM local hack to defer parent bios's end_io until we - * _know_ all chained sub range discard bios have completed. - * Will go away once late bio splitting lands upstream! + * __bio_inc_remaining() is used to defer parent bios's end_io until + * we _know_ all chained sub range discard bios have completed. */ static inline void __bio_inc_remaining(struct bio *bio) { diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index c137dcb147b8329015ff1872f09418993eab6df4..edc624bccf9aa841dc76073208735bd2455e12a8 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c @@ -26,8 +26,6 @@ #define DM_VERITY_ENV_LENGTH 42 #define DM_VERITY_ENV_VAR_NAME "DM_VERITY_ERR_BLOCK_NR" -#define DM_VERITY_IO_VEC_INLINE 16 -#define DM_VERITY_MEMPOOL_SIZE 4 #define DM_VERITY_DEFAULT_PREFETCH_SIZE 262144 #define DM_VERITY_MAX_LEVELS 63 @@ -76,8 +74,6 @@ struct dm_verity { enum verity_mode mode; /* mode for handling verification errors */ unsigned corrupted_errs;/* Number of errors for corrupted blocks */ - mempool_t *vec_mempool; /* mempool of bio vector */ - struct workqueue_struct *verify_wq; /* starting blocks for each tree level. 0 is the lowest level. */ @@ -271,7 +267,7 @@ static int verity_verify_level(struct dm_verity_io *io, sector_t block, verity_hash_at_level(v, block, level, &hash_block, &offset); data = dm_bufio_read(v->bufio, hash_block, &buf); - if (unlikely(IS_ERR(data))) + if (IS_ERR(data)) return PTR_ERR(data); aux = dm_bufio_get_aux_data(buf); @@ -677,9 +673,6 @@ static void verity_dtr(struct dm_target *ti) if (v->verify_wq) destroy_workqueue(v->verify_wq); - if (v->vec_mempool) - mempool_destroy(v->vec_mempool); - if (v->bufio) dm_bufio_client_destroy(v->bufio); @@ -948,14 +941,6 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) ti->per_bio_data_size = roundup(sizeof(struct dm_verity_io) + v->shash_descsize + v->digest_size * 2, __alignof__(struct dm_verity_io)); - v->vec_mempool = mempool_create_kmalloc_pool(DM_VERITY_MEMPOOL_SIZE, - BIO_MAX_PAGES * sizeof(struct bio_vec)); - if (!v->vec_mempool) { - ti->error = "Cannot allocate vector mempool"; - r = -ENOMEM; - goto bad; - } - /* WQ_UNBOUND greatly improves performance when running on ramdisk */ v->verify_wq = alloc_workqueue("kverityd", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND, num_online_cpus()); if (!v->verify_wq) { diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 6ffc01bb85f2a8ee6a127cf3c651bebd3cdf6a32..6264781dc69a6066b88d719537c471b7d1cd7b27 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1464,7 +1464,7 @@ static void __map_bio(struct dm_target_io *tio) md = tio->io->md; dec_pending(tio->io, r); free_tio(md, tio); - } else if (r) { + } else if (r != DM_MAPIO_SUBMITTED) { DMWARN("unimplemented target map return value: %d", r); BUG(); } diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c index 4d6c9b689eaa73d4fccbe44991fc04913e413a8c..88dbe7b97c2c3ca7ace7cbee4b75f80496a0c62a 100644 --- a/drivers/md/persistent-data/dm-block-manager.c +++ b/drivers/md/persistent-data/dm-block-manager.c @@ -454,7 +454,7 @@ int dm_bm_read_lock(struct dm_block_manager *bm, dm_block_t b, int r; p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result); - if (unlikely(IS_ERR(p))) + if (IS_ERR(p)) return PTR_ERR(p); aux = dm_bufio_get_aux_data(to_buffer(*result)); @@ -490,7 +490,7 @@ int dm_bm_write_lock(struct dm_block_manager *bm, return -EPERM; p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result); - if (unlikely(IS_ERR(p))) + if (IS_ERR(p)) return PTR_ERR(p); aux = dm_bufio_get_aux_data(to_buffer(*result)); @@ -523,7 +523,7 @@ int dm_bm_read_try_lock(struct dm_block_manager *bm, int r; p = dm_bufio_get(bm->bufio, b, (struct dm_buffer **) result); - if (unlikely(IS_ERR(p))) + if (IS_ERR(p)) return PTR_ERR(p); if (unlikely(!p)) return -EWOULDBLOCK; @@ -559,7 +559,7 @@ int dm_bm_write_lock_zero(struct dm_block_manager *bm, return -EPERM; p = dm_bufio_new(bm->bufio, b, (struct dm_buffer **) result); - if (unlikely(IS_ERR(p))) + if (IS_ERR(p)) return PTR_ERR(p); memset(p, 0, dm_bm_block_size(bm)); diff --git a/drivers/md/persistent-data/dm-btree-remove.c b/drivers/md/persistent-data/dm-btree-remove.c index 4222f774cf369b1eb1b031bd652854c573b224af..421a36c593e3e7dedef785a5f78c12914c13d8b3 100644 --- a/drivers/md/persistent-data/dm-btree-remove.c +++ b/drivers/md/persistent-data/dm-btree-remove.c @@ -409,29 +409,11 @@ static int rebalance3(struct shadow_spine *s, struct dm_btree_info *info, return 0; } -static int get_nr_entries(struct dm_transaction_manager *tm, - dm_block_t b, uint32_t *result) -{ - int r; - struct dm_block *block; - struct btree_node *n; - - r = dm_tm_read_lock(tm, b, &btree_node_validator, &block); - if (r) - return r; - - n = dm_block_data(block); - *result = le32_to_cpu(n->header.nr_entries); - - return dm_tm_unlock(tm, block); -} - static int rebalance_children(struct shadow_spine *s, struct dm_btree_info *info, struct dm_btree_value_type *vt, uint64_t key) { int i, r, has_left_sibling, has_right_sibling; - uint32_t child_entries; struct btree_node *n; n = dm_block_data(shadow_current(s)); @@ -458,10 +440,6 @@ static int rebalance_children(struct shadow_spine *s, if (i < 0) return -ENODATA; - r = get_nr_entries(info->tm, value64(n, i), &child_entries); - if (r) - return r; - has_left_sibling = i > 0; has_right_sibling = i < (le32_to_cpu(n->header.nr_entries) - 1); diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c index c7726cebc4950c24cb6f6f2b7cacfa6465bddeb6..b6cec258cc2138497b3c8875fc4defdb7523701f 100644 --- a/drivers/md/persistent-data/dm-btree.c +++ b/drivers/md/persistent-data/dm-btree.c @@ -420,8 +420,8 @@ EXPORT_SYMBOL_GPL(dm_btree_lookup); * * Where A* is a shadow of A. */ -static int btree_split_sibling(struct shadow_spine *s, dm_block_t root, - unsigned parent_index, uint64_t key) +static int btree_split_sibling(struct shadow_spine *s, unsigned parent_index, + uint64_t key) { int r; size_t size; @@ -625,7 +625,7 @@ static int btree_insert_raw(struct shadow_spine *s, dm_block_t root, if (top) r = btree_split_beneath(s, key); else - r = btree_split_sibling(s, root, i, key); + r = btree_split_sibling(s, i, key); if (r < 0) return r; diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h index 061aca3a962d73a8f9323b6920230d9895a3eb29..d34611e35a30f10995d19cc4ae80aeb4a9547211 100644 --- a/include/uapi/linux/dm-ioctl.h +++ b/include/uapi/linux/dm-ioctl.h @@ -267,9 +267,9 @@ enum { #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) #define DM_VERSION_MAJOR 4 -#define DM_VERSION_MINOR 32 +#define DM_VERSION_MINOR 33 #define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2015-6-26)" +#define DM_VERSION_EXTRA "-ioctl (2015-8-18)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */