提交 154f807e 编写于 作者: L Linus Torvalds

Merge git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm

* git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm:
  dm snapshot: fix on disk chunk size validation
  dm exception store: split set_chunk_size
  dm snapshot: fix header corruption race on invalidation
  dm snapshot: refactor zero_disk_area to use chunk_io
  dm log: userspace add luid to distinguish between concurrent log instances
  dm raid1: do not allow log_failure variable to unset after being set
  dm log: remove incorrect field from userspace table output
  dm log: fix userspace status output
  dm stripe: expose correct io hints
  dm table: add more context to terse warning messages
  dm table: fix queue_limit checking device iterator
  dm snapshot: implement iterate devices
  dm multipath: fix oops when request based io fails when no paths
......@@ -171,6 +171,14 @@ static int set_chunk_size(struct dm_exception_store *store,
*/
chunk_size_ulong = round_up(chunk_size_ulong, PAGE_SIZE >> 9);
return dm_exception_store_set_chunk_size(store, chunk_size_ulong,
error);
}
int dm_exception_store_set_chunk_size(struct dm_exception_store *store,
unsigned long chunk_size_ulong,
char **error)
{
/* Check chunk_size is a power of 2 */
if (!is_power_of_2(chunk_size_ulong)) {
*error = "Chunk size is not a power of 2";
......@@ -183,6 +191,11 @@ static int set_chunk_size(struct dm_exception_store *store,
return -EINVAL;
}
if (chunk_size_ulong > INT_MAX >> SECTOR_SHIFT) {
*error = "Chunk size is too high";
return -EINVAL;
}
store->chunk_size = chunk_size_ulong;
store->chunk_mask = chunk_size_ulong - 1;
store->chunk_shift = ffs(chunk_size_ulong) - 1;
......
......@@ -168,6 +168,10 @@ static inline chunk_t sector_to_chunk(struct dm_exception_store *store,
int dm_exception_store_type_register(struct dm_exception_store_type *type);
int dm_exception_store_type_unregister(struct dm_exception_store_type *type);
int dm_exception_store_set_chunk_size(struct dm_exception_store *store,
unsigned long chunk_size_ulong,
char **error);
int dm_exception_store_create(struct dm_target *ti, int argc, char **argv,
unsigned *args_used,
struct dm_exception_store **store);
......
......@@ -21,6 +21,7 @@ struct log_c {
struct dm_target *ti;
uint32_t region_size;
region_t region_count;
uint64_t luid;
char uuid[DM_UUID_LEN];
char *usr_argv_str;
......@@ -63,7 +64,7 @@ static int userspace_do_request(struct log_c *lc, const char *uuid,
* restored.
*/
retry:
r = dm_consult_userspace(uuid, request_type, data,
r = dm_consult_userspace(uuid, lc->luid, request_type, data,
data_size, rdata, rdata_size);
if (r != -ESRCH)
......@@ -74,14 +75,15 @@ static int userspace_do_request(struct log_c *lc, const char *uuid,
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(2*HZ);
DMWARN("Attempting to contact userspace log server...");
r = dm_consult_userspace(uuid, DM_ULOG_CTR, lc->usr_argv_str,
r = dm_consult_userspace(uuid, lc->luid, DM_ULOG_CTR,
lc->usr_argv_str,
strlen(lc->usr_argv_str) + 1,
NULL, NULL);
if (!r)
break;
}
DMINFO("Reconnected to userspace log server... DM_ULOG_CTR complete");
r = dm_consult_userspace(uuid, DM_ULOG_RESUME, NULL,
r = dm_consult_userspace(uuid, lc->luid, DM_ULOG_RESUME, NULL,
0, NULL, NULL);
if (!r)
goto retry;
......@@ -111,10 +113,9 @@ static int build_constructor_string(struct dm_target *ti,
return -ENOMEM;
}
for (i = 0, str_size = 0; i < argc; i++)
str_size += sprintf(str + str_size, "%s ", argv[i]);
str_size += sprintf(str + str_size, "%llu",
(unsigned long long)ti->len);
str_size = sprintf(str, "%llu", (unsigned long long)ti->len);
for (i = 0; i < argc; i++)
str_size += sprintf(str + str_size, " %s", argv[i]);
*ctr_str = str;
return str_size;
......@@ -154,6 +155,9 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,
return -ENOMEM;
}
/* The ptr value is sufficient for local unique id */
lc->luid = (uint64_t)lc;
lc->ti = ti;
if (strlen(argv[0]) > (DM_UUID_LEN - 1)) {
......@@ -173,7 +177,7 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,
}
/* Send table string */
r = dm_consult_userspace(lc->uuid, DM_ULOG_CTR,
r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_CTR,
ctr_str, str_size, NULL, NULL);
if (r == -ESRCH) {
......@@ -183,7 +187,7 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,
/* Since the region size does not change, get it now */
rdata_size = sizeof(rdata);
r = dm_consult_userspace(lc->uuid, DM_ULOG_GET_REGION_SIZE,
r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_GET_REGION_SIZE,
NULL, 0, (char *)&rdata, &rdata_size);
if (r) {
......@@ -212,7 +216,7 @@ static void userspace_dtr(struct dm_dirty_log *log)
int r;
struct log_c *lc = log->context;
r = dm_consult_userspace(lc->uuid, DM_ULOG_DTR,
r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_DTR,
NULL, 0,
NULL, NULL);
......@@ -227,7 +231,7 @@ static int userspace_presuspend(struct dm_dirty_log *log)
int r;
struct log_c *lc = log->context;
r = dm_consult_userspace(lc->uuid, DM_ULOG_PRESUSPEND,
r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_PRESUSPEND,
NULL, 0,
NULL, NULL);
......@@ -239,7 +243,7 @@ static int userspace_postsuspend(struct dm_dirty_log *log)
int r;
struct log_c *lc = log->context;
r = dm_consult_userspace(lc->uuid, DM_ULOG_POSTSUSPEND,
r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_POSTSUSPEND,
NULL, 0,
NULL, NULL);
......@@ -252,7 +256,7 @@ static int userspace_resume(struct dm_dirty_log *log)
struct log_c *lc = log->context;
lc->in_sync_hint = 0;
r = dm_consult_userspace(lc->uuid, DM_ULOG_RESUME,
r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_RESUME,
NULL, 0,
NULL, NULL);
......@@ -561,6 +565,7 @@ static int userspace_status(struct dm_dirty_log *log, status_type_t status_type,
char *result, unsigned maxlen)
{
int r = 0;
char *table_args;
size_t sz = (size_t)maxlen;
struct log_c *lc = log->context;
......@@ -577,8 +582,12 @@ static int userspace_status(struct dm_dirty_log *log, status_type_t status_type,
break;
case STATUSTYPE_TABLE:
sz = 0;
DMEMIT("%s %u %s %s", log->type->name, lc->usr_argc + 1,
lc->uuid, lc->usr_argv_str);
table_args = strstr(lc->usr_argv_str, " ");
BUG_ON(!table_args); /* There will always be a ' ' */
table_args++;
DMEMIT("%s %u %s %s ", log->type->name, lc->usr_argc,
lc->uuid, table_args);
break;
}
return (r) ? 0 : (int)sz;
......
......@@ -147,7 +147,8 @@ static void cn_ulog_callback(void *data)
/**
* dm_consult_userspace
* @uuid: log's uuid (must be DM_UUID_LEN in size)
* @uuid: log's universal unique identifier (must be DM_UUID_LEN in size)
* @luid: log's local unique identifier
* @request_type: found in include/linux/dm-log-userspace.h
* @data: data to tx to the server
* @data_size: size of data in bytes
......@@ -163,7 +164,7 @@ static void cn_ulog_callback(void *data)
*
* Returns: 0 on success, -EXXX on failure
**/
int dm_consult_userspace(const char *uuid, int request_type,
int dm_consult_userspace(const char *uuid, uint64_t luid, int request_type,
char *data, size_t data_size,
char *rdata, size_t *rdata_size)
{
......@@ -190,6 +191,7 @@ int dm_consult_userspace(const char *uuid, int request_type,
memset(tfr, 0, DM_ULOG_PREALLOCED_SIZE - overhead_size);
memcpy(tfr->uuid, uuid, DM_UUID_LEN);
tfr->luid = luid;
tfr->seq = dm_ulog_seq++;
/*
......
......@@ -11,7 +11,7 @@
int dm_ulog_tfr_init(void);
void dm_ulog_tfr_exit(void);
int dm_consult_userspace(const char *uuid, int request_type,
int dm_consult_userspace(const char *uuid, uint64_t luid, int request_type,
char *data, size_t data_size,
char *rdata, size_t *rdata_size);
......
......@@ -648,7 +648,13 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
*/
dm_rh_inc_pending(ms->rh, &sync);
dm_rh_inc_pending(ms->rh, &nosync);
ms->log_failure = dm_rh_flush(ms->rh) ? 1 : 0;
/*
* If the flush fails on a previous call and succeeds here,
* we must not reset the log_failure variable. We need
* userspace interaction to do that.
*/
ms->log_failure = dm_rh_flush(ms->rh) ? 1 : ms->log_failure;
/*
* Dispatch io.
......
......@@ -105,6 +105,13 @@ struct pstore {
*/
void *zero_area;
/*
* An area used for header. The header can be written
* concurrently with metadata (when invalidating the snapshot),
* so it needs a separate buffer.
*/
void *header_area;
/*
* Used to keep track of which metadata area the data in
* 'chunk' refers to.
......@@ -148,16 +155,27 @@ static int alloc_area(struct pstore *ps)
*/
ps->area = vmalloc(len);
if (!ps->area)
return r;
goto err_area;
ps->zero_area = vmalloc(len);
if (!ps->zero_area) {
vfree(ps->area);
return r;
}
if (!ps->zero_area)
goto err_zero_area;
memset(ps->zero_area, 0, len);
ps->header_area = vmalloc(len);
if (!ps->header_area)
goto err_header_area;
return 0;
err_header_area:
vfree(ps->zero_area);
err_zero_area:
vfree(ps->area);
err_area:
return r;
}
static void free_area(struct pstore *ps)
......@@ -169,6 +187,10 @@ static void free_area(struct pstore *ps)
if (ps->zero_area)
vfree(ps->zero_area);
ps->zero_area = NULL;
if (ps->header_area)
vfree(ps->header_area);
ps->header_area = NULL;
}
struct mdata_req {
......@@ -188,7 +210,8 @@ static void do_metadata(struct work_struct *work)
/*
* Read or write a chunk aligned and sized block of data from a device.
*/
static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata)
static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int rw,
int metadata)
{
struct dm_io_region where = {
.bdev = ps->store->cow->bdev,
......@@ -198,7 +221,7 @@ static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata)
struct dm_io_request io_req = {
.bi_rw = rw,
.mem.type = DM_IO_VMA,
.mem.ptr.vma = ps->area,
.mem.ptr.vma = area,
.client = ps->io_client,
.notify.fn = NULL,
};
......@@ -240,7 +263,7 @@ static int area_io(struct pstore *ps, int rw)
chunk = area_location(ps, ps->current_area);
r = chunk_io(ps, chunk, rw, 0);
r = chunk_io(ps, ps->area, chunk, rw, 0);
if (r)
return r;
......@@ -254,20 +277,7 @@ static void zero_memory_area(struct pstore *ps)
static int zero_disk_area(struct pstore *ps, chunk_t area)
{
struct dm_io_region where = {
.bdev = ps->store->cow->bdev,
.sector = ps->store->chunk_size * area_location(ps, area),
.count = ps->store->chunk_size,
};
struct dm_io_request io_req = {
.bi_rw = WRITE,
.mem.type = DM_IO_VMA,
.mem.ptr.vma = ps->zero_area,
.client = ps->io_client,
.notify.fn = NULL,
};
return dm_io(&io_req, 1, &where, NULL);
return chunk_io(ps, ps->zero_area, area_location(ps, area), WRITE, 0);
}
static int read_header(struct pstore *ps, int *new_snapshot)
......@@ -276,6 +286,7 @@ static int read_header(struct pstore *ps, int *new_snapshot)
struct disk_header *dh;
chunk_t chunk_size;
int chunk_size_supplied = 1;
char *chunk_err;
/*
* Use default chunk size (or hardsect_size, if larger) if none supplied
......@@ -297,11 +308,11 @@ static int read_header(struct pstore *ps, int *new_snapshot)
if (r)
return r;
r = chunk_io(ps, 0, READ, 1);
r = chunk_io(ps, ps->header_area, 0, READ, 1);
if (r)
goto bad;
dh = (struct disk_header *) ps->area;
dh = ps->header_area;
if (le32_to_cpu(dh->magic) == 0) {
*new_snapshot = 1;
......@@ -319,20 +330,25 @@ static int read_header(struct pstore *ps, int *new_snapshot)
ps->version = le32_to_cpu(dh->version);
chunk_size = le32_to_cpu(dh->chunk_size);
if (!chunk_size_supplied || ps->store->chunk_size == chunk_size)
if (ps->store->chunk_size == chunk_size)
return 0;
DMWARN("chunk size %llu in device metadata overrides "
"table chunk size of %llu.",
(unsigned long long)chunk_size,
(unsigned long long)ps->store->chunk_size);
if (chunk_size_supplied)
DMWARN("chunk size %llu in device metadata overrides "
"table chunk size of %llu.",
(unsigned long long)chunk_size,
(unsigned long long)ps->store->chunk_size);
/* We had a bogus chunk_size. Fix stuff up. */
free_area(ps);
ps->store->chunk_size = chunk_size;
ps->store->chunk_mask = chunk_size - 1;
ps->store->chunk_shift = ffs(chunk_size) - 1;
r = dm_exception_store_set_chunk_size(ps->store, chunk_size,
&chunk_err);
if (r) {
DMERR("invalid on-disk chunk size %llu: %s.",
(unsigned long long)chunk_size, chunk_err);
return r;
}
r = dm_io_client_resize(sectors_to_pages(ps->store->chunk_size),
ps->io_client);
......@@ -351,15 +367,15 @@ static int write_header(struct pstore *ps)
{
struct disk_header *dh;
memset(ps->area, 0, ps->store->chunk_size << SECTOR_SHIFT);
memset(ps->header_area, 0, ps->store->chunk_size << SECTOR_SHIFT);
dh = (struct disk_header *) ps->area;
dh = ps->header_area;
dh->magic = cpu_to_le32(SNAP_MAGIC);
dh->valid = cpu_to_le32(ps->valid);
dh->version = cpu_to_le32(ps->version);
dh->chunk_size = cpu_to_le32(ps->store->chunk_size);
return chunk_io(ps, 0, WRITE, 1);
return chunk_io(ps, ps->header_area, 0, WRITE, 1);
}
/*
......@@ -679,6 +695,8 @@ static int persistent_ctr(struct dm_exception_store *store,
ps->valid = 1;
ps->version = SNAPSHOT_DISK_VERSION;
ps->area = NULL;
ps->zero_area = NULL;
ps->header_area = NULL;
ps->next_free = 2; /* skipping the header and first area */
ps->current_committed = 0;
......
......@@ -1176,6 +1176,15 @@ static int snapshot_status(struct dm_target *ti, status_type_t type,
return 0;
}
static int snapshot_iterate_devices(struct dm_target *ti,
iterate_devices_callout_fn fn, void *data)
{
struct dm_snapshot *snap = ti->private;
return fn(ti, snap->origin, 0, ti->len, data);
}
/*-----------------------------------------------------------------
* Origin methods
*---------------------------------------------------------------*/
......@@ -1410,20 +1419,29 @@ static int origin_status(struct dm_target *ti, status_type_t type, char *result,
return 0;
}
static int origin_iterate_devices(struct dm_target *ti,
iterate_devices_callout_fn fn, void *data)
{
struct dm_dev *dev = ti->private;
return fn(ti, dev, 0, ti->len, data);
}
static struct target_type origin_target = {
.name = "snapshot-origin",
.version = {1, 6, 0},
.version = {1, 7, 0},
.module = THIS_MODULE,
.ctr = origin_ctr,
.dtr = origin_dtr,
.map = origin_map,
.resume = origin_resume,
.status = origin_status,
.iterate_devices = origin_iterate_devices,
};
static struct target_type snapshot_target = {
.name = "snapshot",
.version = {1, 6, 0},
.version = {1, 7, 0},
.module = THIS_MODULE,
.ctr = snapshot_ctr,
.dtr = snapshot_dtr,
......@@ -1431,6 +1449,7 @@ static struct target_type snapshot_target = {
.end_io = snapshot_end_io,
.resume = snapshot_resume,
.status = snapshot_status,
.iterate_devices = snapshot_iterate_devices,
};
static int __init dm_snapshot_init(void)
......
......@@ -329,9 +329,19 @@ static int stripe_iterate_devices(struct dm_target *ti,
return ret;
}
static void stripe_io_hints(struct dm_target *ti,
struct queue_limits *limits)
{
struct stripe_c *sc = ti->private;
unsigned chunk_size = (sc->chunk_mask + 1) << 9;
blk_limits_io_min(limits, chunk_size);
limits->io_opt = chunk_size * sc->stripes;
}
static struct target_type stripe_target = {
.name = "striped",
.version = {1, 2, 0},
.version = {1, 3, 0},
.module = THIS_MODULE,
.ctr = stripe_ctr,
.dtr = stripe_dtr,
......@@ -339,6 +349,7 @@ static struct target_type stripe_target = {
.end_io = stripe_end_io,
.status = stripe_status,
.iterate_devices = stripe_iterate_devices,
.io_hints = stripe_io_hints,
};
int __init dm_stripe_init(void)
......
......@@ -343,10 +343,10 @@ static void close_dev(struct dm_dev_internal *d, struct mapped_device *md)
}
/*
* If possible, this checks an area of a destination device is valid.
* If possible, this checks an area of a destination device is invalid.
*/
static int device_area_is_valid(struct dm_target *ti, struct dm_dev *dev,
sector_t start, sector_t len, void *data)
static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev,
sector_t start, sector_t len, void *data)
{
struct queue_limits *limits = data;
struct block_device *bdev = dev->bdev;
......@@ -357,36 +357,40 @@ static int device_area_is_valid(struct dm_target *ti, struct dm_dev *dev,
char b[BDEVNAME_SIZE];
if (!dev_size)
return 1;
return 0;
if ((start >= dev_size) || (start + len > dev_size)) {
DMWARN("%s: %s too small for target",
dm_device_name(ti->table->md), bdevname(bdev, b));
return 0;
DMWARN("%s: %s too small for target: "
"start=%llu, len=%llu, dev_size=%llu",
dm_device_name(ti->table->md), bdevname(bdev, b),
(unsigned long long)start,
(unsigned long long)len,
(unsigned long long)dev_size);
return 1;
}
if (logical_block_size_sectors <= 1)
return 1;
return 0;
if (start & (logical_block_size_sectors - 1)) {
DMWARN("%s: start=%llu not aligned to h/w "
"logical block size %hu of %s",
"logical block size %u of %s",
dm_device_name(ti->table->md),
(unsigned long long)start,
limits->logical_block_size, bdevname(bdev, b));
return 0;
return 1;
}
if (len & (logical_block_size_sectors - 1)) {
DMWARN("%s: len=%llu not aligned to h/w "
"logical block size %hu of %s",
"logical block size %u of %s",
dm_device_name(ti->table->md),
(unsigned long long)len,
limits->logical_block_size, bdevname(bdev, b));
return 0;
return 1;
}
return 1;
return 0;
}
/*
......@@ -496,8 +500,15 @@ int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
}
if (blk_stack_limits(limits, &q->limits, start << 9) < 0)
DMWARN("%s: target device %s is misaligned",
dm_device_name(ti->table->md), bdevname(bdev, b));
DMWARN("%s: target device %s is misaligned: "
"physical_block_size=%u, logical_block_size=%u, "
"alignment_offset=%u, start=%llu",
dm_device_name(ti->table->md), bdevname(bdev, b),
q->limits.physical_block_size,
q->limits.logical_block_size,
q->limits.alignment_offset,
(unsigned long long) start << 9);
/*
* Check if merge fn is supported.
......@@ -698,7 +709,7 @@ static int validate_hardware_logical_block_alignment(struct dm_table *table,
if (remaining) {
DMWARN("%s: table line %u (start sect %llu len %llu) "
"not aligned to h/w logical block size %hu",
"not aligned to h/w logical block size %u",
dm_device_name(table->md), i,
(unsigned long long) ti->begin,
(unsigned long long) ti->len,
......@@ -996,12 +1007,16 @@ int dm_calculate_queue_limits(struct dm_table *table,
ti->type->iterate_devices(ti, dm_set_device_limits,
&ti_limits);
/* Set I/O hints portion of queue limits */
if (ti->type->io_hints)
ti->type->io_hints(ti, &ti_limits);
/*
* Check each device area is consistent with the target's
* overall queue limits.
*/
if (!ti->type->iterate_devices(ti, device_area_is_valid,
&ti_limits))
if (ti->type->iterate_devices(ti, device_area_is_invalid,
&ti_limits))
return -EINVAL;
combine_limits:
......
......@@ -738,16 +738,22 @@ static void rq_completed(struct mapped_device *md, int run_queue)
dm_put(md);
}
static void free_rq_clone(struct request *clone)
{
struct dm_rq_target_io *tio = clone->end_io_data;
blk_rq_unprep_clone(clone);
free_rq_tio(tio);
}
static void dm_unprep_request(struct request *rq)
{
struct request *clone = rq->special;
struct dm_rq_target_io *tio = clone->end_io_data;
rq->special = NULL;
rq->cmd_flags &= ~REQ_DONTPREP;
blk_rq_unprep_clone(clone);
free_rq_tio(tio);
free_rq_clone(clone);
}
/*
......@@ -825,8 +831,7 @@ static void dm_end_request(struct request *clone, int error)
rq->sense_len = clone->sense_len;
}
BUG_ON(clone->bio);
free_rq_tio(tio);
free_rq_clone(clone);
blk_end_request_all(rq, error);
......
......@@ -91,6 +91,9 @@ typedef int (*dm_iterate_devices_fn) (struct dm_target *ti,
iterate_devices_callout_fn fn,
void *data);
typedef void (*dm_io_hints_fn) (struct dm_target *ti,
struct queue_limits *limits);
/*
* Returns:
* 0: The target can handle the next I/O immediately.
......@@ -151,6 +154,7 @@ struct target_type {
dm_merge_fn merge;
dm_busy_fn busy;
dm_iterate_devices_fn iterate_devices;
dm_io_hints_fn io_hints;
/* For internal device-mapper use. */
struct list_head list;
......
......@@ -371,7 +371,18 @@
(DM_ULOG_REQUEST_MASK & (request_type))
struct dm_ulog_request {
char uuid[DM_UUID_LEN]; /* Ties a request to a specific mirror log */
/*
* The local unique identifier (luid) and the universally unique
* identifier (uuid) are used to tie a request to a specific
* mirror log. A single machine log could probably make due with
* just the 'luid', but a cluster-aware log must use the 'uuid' and
* the 'luid'. The uuid is what is required for node to node
* communication concerning a particular log, but the 'luid' helps
* differentiate between logs that are being swapped and have the
* same 'uuid'. (Think "live" and "inactive" device-mapper tables.)
*/
uint64_t luid;
char uuid[DM_UUID_LEN];
char padding[7]; /* Padding because DM_UUID_LEN = 129 */
int32_t error; /* Used to report back processing errors */
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册