提交 f8d59dfb 编写于 作者: V Vladimir Sementsov-Ogievskiy 提交者: Kevin Wolf

block/backup: fix fleecing scheme: use serialized writes

Fleecing scheme works as follows: we want a kind of temporary snapshot
of active drive A. We create temporary image B, with B->backing = A.
Then we start backup(sync=none) from A to B. From this point, B reads
as point-in-time snapshot of A (A continues to be active drive,
accepting guest IO).

This scheme needs some additional synchronization between reads from B
and backup COW operations, otherwise, the following situation is
theoretically possible:

(assume B is qcow2, client is NBD client, reading from B)

1. client starts reading and take qcow2 mutex in qcow2_co_preadv, and
   goes up to l2 table loading (assume cache miss)

2) guest write => backup COW => qcow2 write =>
   try to take qcow2 mutex => waiting

3. l2 table loaded, we see that cluster is UNALLOCATED, go to
   "case QCOW2_CLUSTER_UNALLOCATED" and unlock mutex before
   bdrv_co_preadv(bs->backing, ...)

4) aha, mutex unlocked, backup COW continues, and we finally finish
   guest write and change cluster in our active disk A

5. actually, do bdrv_co_preadv(bs->backing, ...) and read
   _new updated_ data.

To avoid this, let's make backup writes serializing, to not intersect
with reads from B.

Note: we expand range of handled cases from (sync=none and
B->backing = A) to just (A in backing chain of B), to finally allow
safe reading from B during backup for all cases when A in backing chain
of B, i.e. B formally looks like point-in-time snapshot of A.
Signed-off-by: NVladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: NFam Zheng <famz@redhat.com>
Signed-off-by: NKevin Wolf <kwolf@redhat.com>
上级 09d2f948
...@@ -47,6 +47,8 @@ typedef struct BackupBlockJob { ...@@ -47,6 +47,8 @@ typedef struct BackupBlockJob {
HBitmap *copy_bitmap; HBitmap *copy_bitmap;
bool use_copy_range; bool use_copy_range;
int64_t copy_range_size; int64_t copy_range_size;
bool serialize_target_writes;
} BackupBlockJob; } BackupBlockJob;
static const BlockJobDriver backup_job_driver; static const BlockJobDriver backup_job_driver;
...@@ -102,6 +104,8 @@ static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job, ...@@ -102,6 +104,8 @@ static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job,
QEMUIOVector qiov; QEMUIOVector qiov;
BlockBackend *blk = job->common.blk; BlockBackend *blk = job->common.blk;
int nbytes; int nbytes;
int read_flags = is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0;
int write_flags = job->serialize_target_writes ? BDRV_REQ_SERIALISING : 0;
hbitmap_reset(job->copy_bitmap, start / job->cluster_size, 1); hbitmap_reset(job->copy_bitmap, start / job->cluster_size, 1);
nbytes = MIN(job->cluster_size, job->len - start); nbytes = MIN(job->cluster_size, job->len - start);
...@@ -112,8 +116,7 @@ static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job, ...@@ -112,8 +116,7 @@ static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job,
iov.iov_len = nbytes; iov.iov_len = nbytes;
qemu_iovec_init_external(&qiov, &iov, 1); qemu_iovec_init_external(&qiov, &iov, 1);
ret = blk_co_preadv(blk, start, qiov.size, &qiov, ret = blk_co_preadv(blk, start, qiov.size, &qiov, read_flags);
is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);
if (ret < 0) { if (ret < 0) {
trace_backup_do_cow_read_fail(job, start, ret); trace_backup_do_cow_read_fail(job, start, ret);
if (error_is_read) { if (error_is_read) {
...@@ -124,11 +127,11 @@ static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job, ...@@ -124,11 +127,11 @@ static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job,
if (qemu_iovec_is_zero(&qiov)) { if (qemu_iovec_is_zero(&qiov)) {
ret = blk_co_pwrite_zeroes(job->target, start, ret = blk_co_pwrite_zeroes(job->target, start,
qiov.size, BDRV_REQ_MAY_UNMAP); qiov.size, write_flags | BDRV_REQ_MAY_UNMAP);
} else { } else {
ret = blk_co_pwritev(job->target, start, ret = blk_co_pwritev(job->target, start,
qiov.size, &qiov, qiov.size, &qiov, write_flags |
job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0); (job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0));
} }
if (ret < 0) { if (ret < 0) {
trace_backup_do_cow_write_fail(job, start, ret); trace_backup_do_cow_write_fail(job, start, ret);
...@@ -156,6 +159,8 @@ static int coroutine_fn backup_cow_with_offload(BackupBlockJob *job, ...@@ -156,6 +159,8 @@ static int coroutine_fn backup_cow_with_offload(BackupBlockJob *job,
int nr_clusters; int nr_clusters;
BlockBackend *blk = job->common.blk; BlockBackend *blk = job->common.blk;
int nbytes; int nbytes;
int read_flags = is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0;
int write_flags = job->serialize_target_writes ? BDRV_REQ_SERIALISING : 0;
assert(QEMU_IS_ALIGNED(job->copy_range_size, job->cluster_size)); assert(QEMU_IS_ALIGNED(job->copy_range_size, job->cluster_size));
nbytes = MIN(job->copy_range_size, end - start); nbytes = MIN(job->copy_range_size, end - start);
...@@ -163,7 +168,7 @@ static int coroutine_fn backup_cow_with_offload(BackupBlockJob *job, ...@@ -163,7 +168,7 @@ static int coroutine_fn backup_cow_with_offload(BackupBlockJob *job,
hbitmap_reset(job->copy_bitmap, start / job->cluster_size, hbitmap_reset(job->copy_bitmap, start / job->cluster_size,
nr_clusters); nr_clusters);
ret = blk_co_copy_range(blk, start, job->target, start, nbytes, ret = blk_co_copy_range(blk, start, job->target, start, nbytes,
is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0, 0); read_flags, write_flags);
if (ret < 0) { if (ret < 0) {
trace_backup_do_cow_copy_range_fail(job, start, ret); trace_backup_do_cow_copy_range_fail(job, start, ret);
hbitmap_set(job->copy_bitmap, start / job->cluster_size, hbitmap_set(job->copy_bitmap, start / job->cluster_size,
...@@ -701,6 +706,9 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, ...@@ -701,6 +706,9 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
sync_bitmap : NULL; sync_bitmap : NULL;
job->compress = compress; job->compress = compress;
/* Detect image-fleecing (and similar) schemes */
job->serialize_target_writes = bdrv_chain_contains(target, bs);
/* If there is no backing file on the target, we cannot rely on COW if our /* If there is no backing file on the target, we cannot rely on COW if our
* backup cluster size is smaller than the target cluster size. Even for * backup cluster size is smaller than the target cluster size. Even for
* targets with a backing file, try to avoid COW if possible. */ * targets with a backing file, try to avoid COW if possible. */
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册