提交 4c790afe 编写于 作者: K Kevin Wolf

Merge remote-tracking branch 'mreitz/tags/pull-block-2018-06-18' into queue-block

Block patches:
- Active mirror (blockdev-mirror copy-mode=write-blocking)

# gpg: Signature made Mon Jun 18 17:08:19 2018 CEST
# gpg:                using RSA key F407DB0061D5CF40
# gpg: Good signature from "Max Reitz <mreitz@redhat.com>"
# Primary key fingerprint: 91BE B60A 30DB 3E88 57D1  1829 F407 DB00 61D5 CF40

* mreitz/tags/pull-block-2018-06-18:
  iotests: Add test for active mirroring
  block/mirror: Add copy mode QAPI interface
  block/mirror: Add active mirroring
  job: Add job_progress_increase_remaining()
  block/mirror: Add MirrorBDSOpaque
  block/dirty-bitmap: Add bdrv_dirty_iter_next_area
  test-hbitmap: Add non-advancing iter_next tests
  hbitmap: Add @advance param to hbitmap_iter_next()
  block: Generalize should_update_child() rule
  block/mirror: Use source as a BdrvChild
  block/mirror: Wait for in-flight op conflicts
  block/mirror: Use CoQueue to wait on in-flight ops
  block/mirror: Convert to coroutines
  block/mirror: Pull out mirror_perform()
Signed-off-by: NKevin Wolf <kwolf@redhat.com>
......@@ -3427,16 +3427,39 @@ static bool should_update_child(BdrvChild *c, BlockDriverState *to)
return false;
}
if (c->role == &child_backing) {
/* If @from is a backing file of @to, ignore the child to avoid
* creating a loop. We only want to change the pointer of other
* parents. */
QLIST_FOREACH(to_c, &to->children, next) {
if (to_c == c) {
break;
}
}
if (to_c) {
/* If the child @c belongs to the BDS @to, replacing the current
* c->bs by @to would mean to create a loop.
*
* Such a case occurs when appending a BDS to a backing chain.
* For instance, imagine the following chain:
*
* guest device -> node A -> further backing chain...
*
* Now we create a new BDS B which we want to put on top of this
* chain, so we first attach A as its backing node:
*
* node B
* |
* v
* guest device -> node A -> further backing chain...
*
* Finally we want to replace A by B. When doing that, we want to
* replace all pointers to A by pointers to B -- except for the
* pointer from B because (1) that would create a loop, and (2)
* that pointer should simply stay intact:
*
* guest device -> node B
* |
* v
* node A -> further backing chain...
*
* In general, when replacing a node A (c->bs) by a node B (@to),
* if A is a child of B, that means we cannot replace A by B there
* because that would create a loop. Silently detaching A from B
* is also not really an option. So overall just leaving A in
* place there is the most sensible choice. */
QLIST_FOREACH(to_c, &to->children, next) {
if (to_c == c) {
return false;
}
}
......@@ -3462,6 +3485,7 @@ void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
/* Put all parents into @list and calculate their cumulative permissions */
QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
assert(c->bs == from);
if (!should_update_child(c, to)) {
continue;
}
......
......@@ -354,7 +354,7 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
HBitmapIter hbi;
hbitmap_iter_init(&hbi, job->copy_bitmap, 0);
while ((cluster = hbitmap_iter_next(&hbi)) != -1) {
while ((cluster = hbitmap_iter_next(&hbi, true)) != -1) {
do {
if (yield_and_check(job)) {
return 0;
......
......@@ -519,7 +519,62 @@ void bdrv_dirty_iter_free(BdrvDirtyBitmapIter *iter)
int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter)
{
return hbitmap_iter_next(&iter->hbi);
return hbitmap_iter_next(&iter->hbi, true);
}
/**
* Return the next consecutively dirty area in the dirty bitmap
* belonging to the given iterator @iter.
*
* @max_offset: Maximum value that may be returned for
* *offset + *bytes
* @offset: Will contain the start offset of the next dirty area
* @bytes: Will contain the length of the next dirty area
*
* Returns: True if a dirty area could be found before max_offset
* (which means that *offset and *bytes then contain valid
* values), false otherwise.
*
* Note that @iter is never advanced if false is returned. If an area
* is found (which means that true is returned), it will be advanced
* past that area.
*/
bool bdrv_dirty_iter_next_area(BdrvDirtyBitmapIter *iter, uint64_t max_offset,
uint64_t *offset, int *bytes)
{
uint32_t granularity = bdrv_dirty_bitmap_granularity(iter->bitmap);
uint64_t gran_max_offset;
int64_t ret;
int size;
if (max_offset == iter->bitmap->size) {
/* If max_offset points to the image end, round it up by the
* bitmap granularity */
gran_max_offset = ROUND_UP(max_offset, granularity);
} else {
gran_max_offset = max_offset;
}
ret = hbitmap_iter_next(&iter->hbi, false);
if (ret < 0 || ret + granularity > gran_max_offset) {
return false;
}
*offset = ret;
size = 0;
assert(granularity <= INT_MAX);
do {
/* Advance iterator */
ret = hbitmap_iter_next(&iter->hbi, true);
size += granularity;
} while (ret + granularity <= gran_max_offset &&
hbitmap_iter_next(&iter->hbi, false) == ret + granularity &&
size <= INT_MAX - granularity);
*bytes = MIN(size, max_offset - *offset);
return true;
}
/* Called within bdrv_dirty_bitmap_lock..unlock */
......
此差异已折叠。
......@@ -3586,6 +3586,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
bool has_unmap, bool unmap,
bool has_filter_node_name,
const char *filter_node_name,
bool has_copy_mode, MirrorCopyMode copy_mode,
Error **errp)
{
......@@ -3610,6 +3611,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
if (!has_filter_node_name) {
filter_node_name = NULL;
}
if (!has_copy_mode) {
copy_mode = MIRROR_COPY_MODE_BACKGROUND;
}
if (granularity != 0 && (granularity < 512 || granularity > 1048576 * 64)) {
error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "granularity",
......@@ -3640,7 +3644,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
has_replaces ? replaces : NULL,
speed, granularity, buf_size, sync, backing_mode,
on_source_error, on_target_error, unmap, filter_node_name,
errp);
copy_mode, errp);
}
void qmp_drive_mirror(DriveMirror *arg, Error **errp)
......@@ -3786,6 +3790,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
arg->has_on_target_error, arg->on_target_error,
arg->has_unmap, arg->unmap,
false, NULL,
arg->has_copy_mode, arg->copy_mode,
&local_err);
bdrv_unref(target_bs);
error_propagate(errp, local_err);
......@@ -3806,6 +3811,7 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
BlockdevOnError on_target_error,
bool has_filter_node_name,
const char *filter_node_name,
bool has_copy_mode, MirrorCopyMode copy_mode,
Error **errp)
{
BlockDriverState *bs;
......@@ -3838,6 +3844,7 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
has_on_target_error, on_target_error,
true, true,
has_filter_node_name, filter_node_name,
has_copy_mode, copy_mode,
&local_err);
error_propagate(errp, local_err);
......
......@@ -1031,6 +1031,7 @@ void commit_active_start(const char *job_id, BlockDriverState *bs,
* @filter_node_name: The node name that should be assigned to the filter
* driver that the mirror job inserts into the graph above @bs. NULL means that
* a node name should be autogenerated.
* @copy_mode: When to trigger writes to the target.
* @errp: Error object.
*
* Start a mirroring operation on @bs. Clusters that are allocated
......@@ -1044,7 +1045,8 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
bool unmap, const char *filter_node_name, Error **errp);
bool unmap, const char *filter_node_name,
MirrorCopyMode copy_mode, Error **errp);
/*
* backup_job_create:
......
......@@ -82,6 +82,8 @@ void bdrv_set_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
void bdrv_reset_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
int64_t offset, int64_t bytes);
int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter);
bool bdrv_dirty_iter_next_area(BdrvDirtyBitmapIter *iter, uint64_t max_offset,
uint64_t *offset, int *bytes);
void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *hbi, int64_t offset);
int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap);
int64_t bdrv_get_meta_dirty_count(BdrvDirtyBitmap *bitmap);
......
......@@ -324,11 +324,14 @@ void hbitmap_free_meta(HBitmap *hb);
/**
* hbitmap_iter_next:
* @hbi: HBitmapIter to operate on.
* @advance: If true, advance the iterator. Otherwise, the next call
* of this function will return the same result (if that
* position is still dirty).
*
* Return the next bit that is set in @hbi's associated HBitmap,
* or -1 if all remaining bits are zero.
*/
int64_t hbitmap_iter_next(HBitmapIter *hbi);
int64_t hbitmap_iter_next(HBitmapIter *hbi, bool advance);
/**
* hbitmap_iter_next_word:
......
......@@ -335,6 +335,21 @@ void job_progress_update(Job *job, uint64_t done);
*/
void job_progress_set_remaining(Job *job, uint64_t remaining);
/**
* @job: The job whose expected progress end value is updated
* @delta: Value which is to be added to the current expected end
* value
*
* Increases the expected end value of the progress counter of a job.
* This is useful for parenthesis operations: If a job has to
* conditionally perform a high-priority operation as part of its
* progress, it calls this function with the expected operation's
* length before, and job_progress_update() afterwards.
* (So the operation acts as a parenthesis in regards to the main job
* operation running in background.)
*/
void job_progress_increase_remaining(Job *job, uint64_t delta);
/** To be called when a cancelled job is finalised. */
void job_event_cancelled(Job *job);
......
......@@ -385,6 +385,11 @@ void job_progress_set_remaining(Job *job, uint64_t remaining)
job->progress_total = job->progress_current + remaining;
}
void job_progress_increase_remaining(Job *job, uint64_t delta)
{
job->progress_total += delta;
}
void job_event_cancelled(Job *job)
{
notifier_list_notify(&job->on_finalize_cancelled, job);
......
......@@ -1050,6 +1050,24 @@
{ 'enum': 'MirrorSyncMode',
'data': ['top', 'full', 'none', 'incremental'] }
##
# @MirrorCopyMode:
#
# An enumeration whose values tell the mirror block job when to
# trigger writes to the target.
#
# @background: copy data in background only.
#
# @write-blocking: when data is written to the source, write it
# (synchronously) to the target as well. In
# addition, data is copied in background just like in
# @background mode.
#
# Since: 3.0
##
{ 'enum': 'MirrorCopyMode',
'data': ['background', 'write-blocking'] }
##
# @BlockJobInfo:
#
......@@ -1692,6 +1710,9 @@
# written. Both will result in identical contents.
# Default is true. (Since 2.4)
#
# @copy-mode: when to copy data to the destination; defaults to 'background'
# (Since: 3.0)
#
# Since: 1.3
##
{ 'struct': 'DriveMirror',
......@@ -1701,7 +1722,7 @@
'*speed': 'int', '*granularity': 'uint32',
'*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
'*on-target-error': 'BlockdevOnError',
'*unmap': 'bool' } }
'*unmap': 'bool', '*copy-mode': 'MirrorCopyMode' } }
##
# @BlockDirtyBitmap:
......@@ -1964,6 +1985,9 @@
# above @device. If this option is not given, a node name is
# autogenerated. (Since: 2.9)
#
# @copy-mode: when to copy data to the destination; defaults to 'background'
# (Since: 3.0)
#
# Returns: nothing on success.
#
# Since: 2.6
......@@ -1984,7 +2008,8 @@
'*speed': 'int', '*granularity': 'uint32',
'*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
'*on-target-error': 'BlockdevOnError',
'*filter-node-name': 'str' } }
'*filter-node-name': 'str',
'*copy-mode': 'MirrorCopyMode' } }
##
# @block_set_io_throttle:
......
#!/usr/bin/env python
#
# Tests for active mirroring
#
# Copyright (C) 2018 Red Hat, Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import os
import iotests
from iotests import qemu_img
source_img = os.path.join(iotests.test_dir, 'source.' + iotests.imgfmt)
target_img = os.path.join(iotests.test_dir, 'target.' + iotests.imgfmt)
class TestActiveMirror(iotests.QMPTestCase):
image_len = 128 * 1024 * 1024 # MB
potential_writes_in_flight = True
def setUp(self):
qemu_img('create', '-f', iotests.imgfmt, source_img, '128M')
qemu_img('create', '-f', iotests.imgfmt, target_img, '128M')
blk_source = {'id': 'source',
'if': 'none',
'node-name': 'source-node',
'driver': iotests.imgfmt,
'file': {'driver': 'file',
'filename': source_img}}
blk_target = {'node-name': 'target-node',
'driver': iotests.imgfmt,
'file': {'driver': 'file',
'filename': target_img}}
self.vm = iotests.VM()
self.vm.add_drive_raw(self.vm.qmp_to_opts(blk_source))
self.vm.add_blockdev(self.vm.qmp_to_opts(blk_target))
self.vm.add_device('virtio-blk,drive=source')
self.vm.launch()
def tearDown(self):
self.vm.shutdown()
if not self.potential_writes_in_flight:
self.assertTrue(iotests.compare_images(source_img, target_img),
'mirror target does not match source')
os.remove(source_img)
os.remove(target_img)
def doActiveIO(self, sync_source_and_target):
# Fill the source image
self.vm.hmp_qemu_io('source',
'write -P 1 0 %i' % self.image_len);
# Start some background requests
for offset in range(1 * self.image_len / 8, 3 * self.image_len / 8, 1024 * 1024):
self.vm.hmp_qemu_io('source', 'aio_write -P 2 %i 1M' % offset)
for offset in range(2 * self.image_len / 8, 3 * self.image_len / 8, 1024 * 1024):
self.vm.hmp_qemu_io('source', 'aio_write -z %i 1M' % offset)
# Start the block job
result = self.vm.qmp('blockdev-mirror',
job_id='mirror',
filter_node_name='mirror-node',
device='source-node',
target='target-node',
sync='full',
copy_mode='write-blocking')
self.assert_qmp(result, 'return', {})
# Start some more requests
for offset in range(3 * self.image_len / 8, 5 * self.image_len / 8, 1024 * 1024):
self.vm.hmp_qemu_io('source', 'aio_write -P 3 %i 1M' % offset)
for offset in range(4 * self.image_len / 8, 5 * self.image_len / 8, 1024 * 1024):
self.vm.hmp_qemu_io('source', 'aio_write -z %i 1M' % offset)
# Wait for the READY event
self.wait_ready(drive='mirror')
# Now start some final requests; all of these (which land on
# the source) should be settled using the active mechanism.
# The mirror code itself asserts that the source BDS's dirty
# bitmap will stay clean between READY and COMPLETED.
for offset in range(5 * self.image_len / 8, 7 * self.image_len / 8, 1024 * 1024):
self.vm.hmp_qemu_io('source', 'aio_write -P 3 %i 1M' % offset)
for offset in range(6 * self.image_len / 8, 7 * self.image_len / 8, 1024 * 1024):
self.vm.hmp_qemu_io('source', 'aio_write -z %i 1M' % offset)
if sync_source_and_target:
# If source and target should be in sync after the mirror,
# we have to flush before completion
self.vm.hmp_qemu_io('source', 'aio_flush')
self.potential_writes_in_flight = False
self.complete_and_wait(drive='mirror', wait_ready=False)
def testActiveIO(self):
self.doActiveIO(False)
def testActiveIOFlushed(self):
self.doActiveIO(True)
if __name__ == '__main__':
iotests.main(supported_fmts=['qcow2', 'raw'])
..
----------------------------------------------------------------------
Ran 2 tests
OK
......@@ -157,6 +157,7 @@
148 rw auto quick
149 rw auto sudo
150 rw auto quick
151 rw auto
152 rw auto quick
153 rw auto quick
154 rw auto backing quick
......
......@@ -30,6 +30,18 @@ typedef struct TestHBitmapData {
} TestHBitmapData;
static int64_t check_hbitmap_iter_next(HBitmapIter *hbi)
{
int next0, next1;
next0 = hbitmap_iter_next(hbi, false);
next1 = hbitmap_iter_next(hbi, true);
g_assert_cmpint(next0, ==, next1);
return next0;
}
/* Check that the HBitmap and the shadow bitmap contain the same data,
* ignoring the same "first" bits.
*/
......@@ -46,7 +58,7 @@ static void hbitmap_test_check(TestHBitmapData *data,
i = first;
for (;;) {
next = hbitmap_iter_next(&hbi);
next = check_hbitmap_iter_next(&hbi);
if (next < 0) {
next = data->size;
}
......@@ -435,25 +447,25 @@ static void test_hbitmap_iter_granularity(TestHBitmapData *data,
/* Note that hbitmap_test_check has to be invoked manually in this test. */
hbitmap_test_init(data, 131072 << 7, 7);
hbitmap_iter_init(&hbi, data->hb, 0);
g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0);
g_assert_cmpint(check_hbitmap_iter_next(&hbi), <, 0);
hbitmap_test_set(data, ((L2 + L1 + 1) << 7) + 8, 8);
hbitmap_iter_init(&hbi, data->hb, 0);
g_assert_cmpint(hbitmap_iter_next(&hbi), ==, (L2 + L1 + 1) << 7);
g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0);
g_assert_cmpint(check_hbitmap_iter_next(&hbi), ==, (L2 + L1 + 1) << 7);
g_assert_cmpint(check_hbitmap_iter_next(&hbi), <, 0);
hbitmap_iter_init(&hbi, data->hb, (L2 + L1 + 2) << 7);
g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0);
g_assert_cmpint(hbitmap_iter_next(&hbi, true), <, 0);
hbitmap_test_set(data, (131072 << 7) - 8, 8);
hbitmap_iter_init(&hbi, data->hb, 0);
g_assert_cmpint(hbitmap_iter_next(&hbi), ==, (L2 + L1 + 1) << 7);
g_assert_cmpint(hbitmap_iter_next(&hbi), ==, 131071 << 7);
g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0);
g_assert_cmpint(check_hbitmap_iter_next(&hbi), ==, (L2 + L1 + 1) << 7);
g_assert_cmpint(check_hbitmap_iter_next(&hbi), ==, 131071 << 7);
g_assert_cmpint(check_hbitmap_iter_next(&hbi), <, 0);
hbitmap_iter_init(&hbi, data->hb, (L2 + L1 + 2) << 7);
g_assert_cmpint(hbitmap_iter_next(&hbi), ==, 131071 << 7);
g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0);
g_assert_cmpint(check_hbitmap_iter_next(&hbi), ==, 131071 << 7);
g_assert_cmpint(check_hbitmap_iter_next(&hbi), <, 0);
}
static void hbitmap_test_set_boundary_bits(TestHBitmapData *data, ssize_t diff)
......@@ -893,7 +905,7 @@ static void test_hbitmap_serialize_zeroes(TestHBitmapData *data,
for (i = 0; i < num_positions; i++) {
hbitmap_deserialize_zeroes(data->hb, positions[i], min_l1, true);
hbitmap_iter_init(&iter, data->hb, 0);
next = hbitmap_iter_next(&iter);
next = check_hbitmap_iter_next(&iter);
if (i == num_positions - 1) {
g_assert_cmpint(next, ==, -1);
} else {
......@@ -919,10 +931,10 @@ static void test_hbitmap_iter_and_reset(TestHBitmapData *data,
hbitmap_iter_init(&hbi, data->hb, BITS_PER_LONG - 1);
hbitmap_iter_next(&hbi);
check_hbitmap_iter_next(&hbi);
hbitmap_reset_all(data->hb);
hbitmap_iter_next(&hbi);
check_hbitmap_iter_next(&hbi);
}
static void test_hbitmap_next_zero_check(TestHBitmapData *data, int64_t start)
......
......@@ -141,7 +141,7 @@ unsigned long hbitmap_iter_skip_words(HBitmapIter *hbi)
return cur;
}
int64_t hbitmap_iter_next(HBitmapIter *hbi)
int64_t hbitmap_iter_next(HBitmapIter *hbi, bool advance)
{
unsigned long cur = hbi->cur[HBITMAP_LEVELS - 1] &
hbi->hb->levels[HBITMAP_LEVELS - 1][hbi->pos];
......@@ -154,8 +154,12 @@ int64_t hbitmap_iter_next(HBitmapIter *hbi)
}
}
/* The next call will resume work from the next bit. */
hbi->cur[HBITMAP_LEVELS - 1] = cur & (cur - 1);
if (advance) {
/* The next call will resume work from the next bit. */
hbi->cur[HBITMAP_LEVELS - 1] = cur & (cur - 1);
} else {
hbi->cur[HBITMAP_LEVELS - 1] = cur;
}
item = ((uint64_t)hbi->pos << BITS_PER_LEVEL) + ctzl(cur);
return item << hbi->granularity;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册