mirror.c 22.6 KB
Newer Older
P
Paolo Bonzini 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
/*
 * Image mirroring
 *
 * Copyright Red Hat, Inc. 2012
 *
 * Authors:
 *  Paolo Bonzini  <pbonzini@redhat.com>
 *
 * This work is licensed under the terms of the GNU LGPL, version 2 or later.
 * See the COPYING.LIB file in the top-level directory.
 *
 */

#include "trace.h"
15 16
#include "block/blockjob.h"
#include "block/block_int.h"
P
Paolo Bonzini 已提交
17
#include "qemu/ratelimit.h"
18
#include "qemu/bitmap.h"
P
Paolo Bonzini 已提交
19

20 21 22 23 24 25 26 27 28
#define SLICE_TIME    100000000ULL /* ns */
#define MAX_IN_FLIGHT 16

/* The mirroring buffer is a list of granularity-sized chunks.
 * Free chunks are organized in a list.
 */
typedef struct MirrorBuffer {
    QSIMPLEQ_ENTRY(MirrorBuffer) next;
} MirrorBuffer;
P
Paolo Bonzini 已提交
29 30 31 32 33

typedef struct MirrorBlockJob {
    BlockJob common;
    RateLimit limit;
    BlockDriverState *target;
F
Fam Zheng 已提交
34
    BlockDriverState *base;
F
Fam Zheng 已提交
35
    bool is_none_mode;
36
    BlockdevOnError on_source_error, on_target_error;
P
Paolo Bonzini 已提交
37 38
    bool synced;
    bool should_complete;
P
Paolo Bonzini 已提交
39
    int64_t sector_num;
40
    int64_t granularity;
41 42
    size_t buf_size;
    unsigned long *cow_bitmap;
F
Fam Zheng 已提交
43
    BdrvDirtyBitmap *dirty_bitmap;
44
    HBitmapIter hbi;
P
Paolo Bonzini 已提交
45
    uint8_t *buf;
46 47
    QSIMPLEQ_HEAD(, MirrorBuffer) buf_free;
    int buf_free_count;
48

49
    unsigned long *in_flight_bitmap;
50 51
    int in_flight;
    int ret;
P
Paolo Bonzini 已提交
52 53
} MirrorBlockJob;

54 55 56 57 58 59 60
typedef struct MirrorOp {
    MirrorBlockJob *s;
    QEMUIOVector qiov;
    int64_t sector_num;
    int nb_sectors;
} MirrorOp;

61 62 63 64 65 66 67 68 69 70 71 72 73
static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read,
                                            int error)
{
    s->synced = false;
    if (read) {
        return block_job_error_action(&s->common, s->common.bs,
                                      s->on_source_error, true, error);
    } else {
        return block_job_error_action(&s->common, s->target,
                                      s->on_target_error, false, error);
    }
}

74 75 76
static void mirror_iteration_done(MirrorOp *op, int ret)
{
    MirrorBlockJob *s = op->s;
77
    struct iovec *iov;
78
    int64_t chunk_num;
79
    int i, nb_chunks, sectors_per_chunk;
80 81 82 83

    trace_mirror_iteration_done(s, op->sector_num, op->nb_sectors, ret);

    s->in_flight--;
84 85 86 87 88 89 90
    iov = op->qiov.iov;
    for (i = 0; i < op->qiov.niov; i++) {
        MirrorBuffer *buf = (MirrorBuffer *) iov[i].iov_base;
        QSIMPLEQ_INSERT_TAIL(&s->buf_free, buf, next);
        s->buf_free_count++;
    }

91 92 93
    sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
    chunk_num = op->sector_num / sectors_per_chunk;
    nb_chunks = op->nb_sectors / sectors_per_chunk;
94
    bitmap_clear(s->in_flight_bitmap, chunk_num, nb_chunks);
95 96 97 98
    if (s->cow_bitmap && ret >= 0) {
        bitmap_set(s->cow_bitmap, chunk_num, nb_chunks);
    }

Z
Zhang Min 已提交
99
    qemu_iovec_destroy(&op->qiov);
100
    g_slice_free(MirrorOp, op);
101 102 103 104 105 106 107 108

    /* Enter coroutine when it is not sleeping.  The coroutine sleeps to
     * rate-limit itself.  The coroutine will eventually resume since there is
     * a sleep timeout so don't wake it early.
     */
    if (s->common.busy) {
        qemu_coroutine_enter(s->common.co, NULL);
    }
109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
}

static void mirror_write_complete(void *opaque, int ret)
{
    MirrorOp *op = opaque;
    MirrorBlockJob *s = op->s;
    if (ret < 0) {
        BlockDriverState *source = s->common.bs;
        BlockErrorAction action;

        bdrv_set_dirty(source, op->sector_num, op->nb_sectors);
        action = mirror_error_action(s, false, -ret);
        if (action == BDRV_ACTION_REPORT && s->ret >= 0) {
            s->ret = ret;
        }
    }
    mirror_iteration_done(op, ret);
}

static void mirror_read_complete(void *opaque, int ret)
{
    MirrorOp *op = opaque;
    MirrorBlockJob *s = op->s;
    if (ret < 0) {
        BlockDriverState *source = s->common.bs;
        BlockErrorAction action;

        bdrv_set_dirty(source, op->sector_num, op->nb_sectors);
        action = mirror_error_action(s, true, -ret);
        if (action == BDRV_ACTION_REPORT && s->ret >= 0) {
            s->ret = ret;
        }

        mirror_iteration_done(op, ret);
        return;
    }
    bdrv_aio_writev(s->target, op->sector_num, &op->qiov, op->nb_sectors,
                    mirror_write_complete, op);
}

149
static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
P
Paolo Bonzini 已提交
150 151
{
    BlockDriverState *source = s->common.bs;
152
    int nb_sectors, sectors_per_chunk, nb_chunks;
153
    int64_t end, sector_num, next_chunk, next_sector, hbitmap_next_sector;
154
    uint64_t delay_ns;
155
    MirrorOp *op;
P
Paolo Bonzini 已提交
156

157 158
    s->sector_num = hbitmap_iter_next(&s->hbi);
    if (s->sector_num < 0) {
F
Fam Zheng 已提交
159
        bdrv_dirty_iter_init(source, s->dirty_bitmap, &s->hbi);
160
        s->sector_num = hbitmap_iter_next(&s->hbi);
F
Fam Zheng 已提交
161 162
        trace_mirror_restart_iter(s,
                                  bdrv_get_dirty_count(source, s->dirty_bitmap));
163 164 165
        assert(s->sector_num >= 0);
    }

166
    hbitmap_next_sector = s->sector_num;
167 168 169
    sector_num = s->sector_num;
    sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
    end = s->common.len >> BDRV_SECTOR_BITS;
170

171 172
    /* Extend the QEMUIOVector to include all adjacent blocks that will
     * be copied in this operation.
173
     *
174 175 176 177 178 179 180 181 182
     * We have to do this if we have no backing file yet in the destination,
     * and the cluster size is very large.  Then we need to do COW ourselves.
     * The first time a cluster is copied, copy it entirely.  Note that,
     * because both the granularity and the cluster size are powers of two,
     * the number of sectors to copy cannot exceed one cluster.
     *
     * We also want to extend the QEMUIOVector to include more adjacent
     * dirty blocks if possible, to limit the number of I/O operations and
     * run efficiently even with a small granularity.
183
     */
184 185 186 187
    nb_chunks = 0;
    nb_sectors = 0;
    next_sector = sector_num;
    next_chunk = sector_num / sectors_per_chunk;
188 189

    /* Wait for I/O to this cluster (from a previous iteration) to be done.  */
190
    while (test_bit(next_chunk, s->in_flight_bitmap)) {
191 192
        trace_mirror_yield_in_flight(s, sector_num, s->in_flight);
        qemu_coroutine_yield();
193 194
    }

195 196 197
    do {
        int added_sectors, added_chunks;

F
Fam Zheng 已提交
198
        if (!bdrv_get_dirty(source, s->dirty_bitmap, next_sector) ||
199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236
            test_bit(next_chunk, s->in_flight_bitmap)) {
            assert(nb_sectors > 0);
            break;
        }

        added_sectors = sectors_per_chunk;
        if (s->cow_bitmap && !test_bit(next_chunk, s->cow_bitmap)) {
            bdrv_round_to_clusters(s->target,
                                   next_sector, added_sectors,
                                   &next_sector, &added_sectors);

            /* On the first iteration, the rounding may make us copy
             * sectors before the first dirty one.
             */
            if (next_sector < sector_num) {
                assert(nb_sectors == 0);
                sector_num = next_sector;
                next_chunk = next_sector / sectors_per_chunk;
            }
        }

        added_sectors = MIN(added_sectors, end - (sector_num + nb_sectors));
        added_chunks = (added_sectors + sectors_per_chunk - 1) / sectors_per_chunk;

        /* When doing COW, it may happen that there is not enough space for
         * a full cluster.  Wait if that is the case.
         */
        while (nb_chunks == 0 && s->buf_free_count < added_chunks) {
            trace_mirror_yield_buf_busy(s, nb_chunks, s->in_flight);
            qemu_coroutine_yield();
        }
        if (s->buf_free_count < nb_chunks + added_chunks) {
            trace_mirror_break_buf_busy(s, nb_chunks, s->in_flight);
            break;
        }

        /* We have enough free space to copy these sectors.  */
        bitmap_set(s->in_flight_bitmap, next_chunk, added_chunks);
237

238 239 240 241
        nb_sectors += added_sectors;
        nb_chunks += added_chunks;
        next_sector += added_sectors;
        next_chunk += added_chunks;
242 243 244 245 246 247
        if (!s->synced && s->common.speed) {
            delay_ns = ratelimit_calculate_delay(&s->limit, added_sectors);
        } else {
            delay_ns = 0;
        }
    } while (delay_ns == 0 && next_sector < end);
248 249 250 251 252 253

    /* Allocate a MirrorOp that is used as an AIO callback.  */
    op = g_slice_new(MirrorOp);
    op->s = s;
    op->sector_num = sector_num;
    op->nb_sectors = nb_sectors;
254 255 256 257 258 259 260 261 262 263 264 265 266 267 268

    /* Now make a QEMUIOVector taking enough granularity-sized chunks
     * from s->buf_free.
     */
    qemu_iovec_init(&op->qiov, nb_chunks);
    next_sector = sector_num;
    while (nb_chunks-- > 0) {
        MirrorBuffer *buf = QSIMPLEQ_FIRST(&s->buf_free);
        QSIMPLEQ_REMOVE_HEAD(&s->buf_free, next);
        s->buf_free_count--;
        qemu_iovec_add(&op->qiov, buf, s->granularity);

        /* Advance the HBitmapIter in parallel, so that we do not examine
         * the same sector twice.
         */
F
Fam Zheng 已提交
269 270
        if (next_sector > hbitmap_next_sector
            && bdrv_get_dirty(source, s->dirty_bitmap, next_sector)) {
271 272 273 274 275
            hbitmap_next_sector = hbitmap_iter_next(&s->hbi);
        }

        next_sector += sectors_per_chunk;
    }
276

277
    bdrv_reset_dirty(source, sector_num, nb_sectors);
P
Paolo Bonzini 已提交
278 279

    /* Copy the dirty cluster.  */
280
    s->in_flight++;
281
    trace_mirror_one_iteration(s, sector_num, nb_sectors);
282 283
    bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors,
                   mirror_read_complete, op);
284
    return delay_ns;
285
}
286

287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303
static void mirror_free_init(MirrorBlockJob *s)
{
    int granularity = s->granularity;
    size_t buf_size = s->buf_size;
    uint8_t *buf = s->buf;

    assert(s->buf_free_count == 0);
    QSIMPLEQ_INIT(&s->buf_free);
    while (buf_size != 0) {
        MirrorBuffer *cur = (MirrorBuffer *)buf;
        QSIMPLEQ_INSERT_TAIL(&s->buf_free, cur, next);
        s->buf_free_count++;
        buf_size -= granularity;
        buf += granularity;
    }
}

304 305 306 307 308
static void mirror_drain(MirrorBlockJob *s)
{
    while (s->in_flight > 0) {
        qemu_coroutine_yield();
    }
P
Paolo Bonzini 已提交
309 310 311 312 313 314
}

static void coroutine_fn mirror_run(void *opaque)
{
    MirrorBlockJob *s = opaque;
    BlockDriverState *bs = s->common.bs;
315
    int64_t sector_num, end, sectors_per_chunk, length;
316
    uint64_t last_pause_ns;
317 318
    BlockDriverInfo bdi;
    char backing_filename[1024];
P
Paolo Bonzini 已提交
319 320 321 322 323 324 325 326
    int ret = 0;
    int n;

    if (block_job_is_cancelled(&s->common)) {
        goto immediate_exit;
    }

    s->common.len = bdrv_getlength(bs);
327
    if (s->common.len <= 0) {
P
Paolo Bonzini 已提交
328 329 330 331
        block_job_completed(&s->common, s->common.len);
        return;
    }

332 333 334
    length = (bdrv_getlength(bs) + s->granularity - 1) / s->granularity;
    s->in_flight_bitmap = bitmap_new(length);

335 336 337 338 339 340 341 342
    /* If we have no backing file yet in the destination, we cannot let
     * the destination do COW.  Instead, we copy sectors around the
     * dirty data if needed.  We need a bitmap to do that.
     */
    bdrv_get_backing_filename(s->target, backing_filename,
                              sizeof(backing_filename));
    if (backing_filename[0] && !s->target->backing_hd) {
        bdrv_get_info(s->target, &bdi);
343
        if (s->granularity < bdi.cluster_size) {
344
            s->buf_size = MAX(s->buf_size, bdi.cluster_size);
345 346 347 348
            s->cow_bitmap = bitmap_new(length);
        }
    }

P
Paolo Bonzini 已提交
349
    end = s->common.len >> BDRV_SECTOR_BITS;
350
    s->buf = qemu_blockalign(bs, s->buf_size);
351
    sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
352
    mirror_free_init(s);
P
Paolo Bonzini 已提交
353

F
Fam Zheng 已提交
354
    if (!s->is_none_mode) {
P
Paolo Bonzini 已提交
355
        /* First part, loop on the sectors and initialize the dirty bitmap.  */
F
Fam Zheng 已提交
356
        BlockDriverState *base = s->base;
P
Paolo Bonzini 已提交
357
        for (sector_num = 0; sector_num < end; ) {
358
            int64_t next = (sector_num | (sectors_per_chunk - 1)) + 1;
359 360
            ret = bdrv_is_allocated_above(bs, base,
                                          sector_num, next - sector_num, &n);
P
Paolo Bonzini 已提交
361 362 363 364 365 366 367 368 369 370 371 372 373 374 375

            if (ret < 0) {
                goto immediate_exit;
            }

            assert(n > 0);
            if (ret == 1) {
                bdrv_set_dirty(bs, sector_num, n);
                sector_num = next;
            } else {
                sector_num += n;
            }
        }
    }

F
Fam Zheng 已提交
376
    bdrv_dirty_iter_init(bs, s->dirty_bitmap, &s->hbi);
377
    last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
P
Paolo Bonzini 已提交
378
    for (;;) {
379
        uint64_t delay_ns = 0;
P
Paolo Bonzini 已提交
380 381 382
        int64_t cnt;
        bool should_complete;

383 384 385 386 387
        if (s->ret < 0) {
            ret = s->ret;
            goto immediate_exit;
        }

F
Fam Zheng 已提交
388
        cnt = bdrv_get_dirty_count(bs, s->dirty_bitmap);
389 390 391 392 393 394

        /* Note that even when no rate limit is applied we need to yield
         * periodically with no pending I/O so that qemu_aio_flush() returns.
         * We do so every SLICE_TIME nanoseconds, or when there is an error,
         * or when the source is clean, whichever comes first.
         */
395
        if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - last_pause_ns < SLICE_TIME &&
396
            s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
397 398 399
            if (s->in_flight == MAX_IN_FLIGHT || s->buf_free_count == 0 ||
                (cnt == 0 && s->in_flight > 0)) {
                trace_mirror_yield(s, s->in_flight, s->buf_free_count, cnt);
400 401 402
                qemu_coroutine_yield();
                continue;
            } else if (cnt != 0) {
403 404 405 406
                delay_ns = mirror_iteration(s);
                if (delay_ns == 0) {
                    continue;
                }
P
Paolo Bonzini 已提交
407 408 409 410
            }
        }

        should_complete = false;
411
        if (s->in_flight == 0 && cnt == 0) {
P
Paolo Bonzini 已提交
412 413 414
            trace_mirror_before_flush(s);
            ret = bdrv_flush(s->target);
            if (ret < 0) {
415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431
                if (mirror_error_action(s, false, -ret) == BDRV_ACTION_REPORT) {
                    goto immediate_exit;
                }
            } else {
                /* We're out of the streaming phase.  From now on, if the job
                 * is cancelled we will actually complete all pending I/O and
                 * report completion.  This way, block-job-cancel will leave
                 * the target in a consistent state.
                 */
                s->common.offset = end * BDRV_SECTOR_SIZE;
                if (!s->synced) {
                    block_job_ready(&s->common);
                    s->synced = true;
                }

                should_complete = s->should_complete ||
                    block_job_is_cancelled(&s->common);
F
Fam Zheng 已提交
432
                cnt = bdrv_get_dirty_count(bs, s->dirty_bitmap);
P
Paolo Bonzini 已提交
433
            }
P
Paolo Bonzini 已提交
434 435 436 437 438 439 440 441 442 443 444 445 446
        }

        if (cnt == 0 && should_complete) {
            /* The dirty bitmap is not updated while operations are pending.
             * If we're about to exit, wait for pending operations before
             * calling bdrv_get_dirty_count(bs), or we may exit while the
             * source has dirty data to copy!
             *
             * Note that I/O can be submitted by the guest while
             * mirror_populate runs.
             */
            trace_mirror_before_drain(s, cnt);
            bdrv_drain_all();
F
Fam Zheng 已提交
447
            cnt = bdrv_get_dirty_count(bs, s->dirty_bitmap);
P
Paolo Bonzini 已提交
448 449 450
        }

        ret = 0;
451
        trace_mirror_before_sleep(s, cnt, s->synced, delay_ns);
P
Paolo Bonzini 已提交
452
        if (!s->synced) {
P
Paolo Bonzini 已提交
453
            /* Publish progress */
454
            s->common.offset = (end - cnt) * BDRV_SECTOR_SIZE;
455
            block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns);
P
Paolo Bonzini 已提交
456 457 458 459
            if (block_job_is_cancelled(&s->common)) {
                break;
            }
        } else if (!should_complete) {
460
            delay_ns = (s->in_flight == 0 && cnt == 0 ? SLICE_TIME : 0);
461
            block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns);
P
Paolo Bonzini 已提交
462 463 464 465 466 467 468 469
        } else if (cnt == 0) {
            /* The two disks are in sync.  Exit and report successful
             * completion.
             */
            assert(QLIST_EMPTY(&bs->tracked_requests));
            s->common.cancelled = false;
            break;
        }
470
        last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
P
Paolo Bonzini 已提交
471 472 473
    }

immediate_exit:
474 475 476 477 478 479 480 481 482 483
    if (s->in_flight > 0) {
        /* We get here only if something went wrong.  Either the job failed,
         * or it was cancelled prematurely so that we do not guarantee that
         * the target is a copy of the source.
         */
        assert(ret < 0 || (!s->synced && block_job_is_cancelled(&s->common)));
        mirror_drain(s);
    }

    assert(s->in_flight == 0);
484
    qemu_vfree(s->buf);
485
    g_free(s->cow_bitmap);
486
    g_free(s->in_flight_bitmap);
F
Fam Zheng 已提交
487
    bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
488
    bdrv_iostatus_disable(s->target);
P
Paolo Bonzini 已提交
489 490 491 492 493
    if (s->should_complete && ret == 0) {
        if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) {
            bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL);
        }
        bdrv_swap(s->target, s->common.bs);
F
Fam Zheng 已提交
494 495 496 497 498 499 500
        if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) {
            /* drop the bs loop chain formed by the swap: break the loop then
             * trigger the unref from the top one */
            BlockDriverState *p = s->base->backing_hd;
            s->base->backing_hd = NULL;
            bdrv_unref(p);
        }
P
Paolo Bonzini 已提交
501
    }
F
Fam Zheng 已提交
502
    bdrv_unref(s->target);
P
Paolo Bonzini 已提交
503 504 505 506 507 508 509 510 511 512 513 514 515 516
    block_job_completed(&s->common, ret);
}

static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp)
{
    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);

    if (speed < 0) {
        error_set(errp, QERR_INVALID_PARAMETER, "speed");
        return;
    }
    ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
}

517 518 519 520 521 522 523
static void mirror_iostatus_reset(BlockJob *job)
{
    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);

    bdrv_iostatus_reset(s->target);
}

P
Paolo Bonzini 已提交
524 525 526
static void mirror_complete(BlockJob *job, Error **errp)
{
    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
527
    Error *local_err = NULL;
P
Paolo Bonzini 已提交
528 529
    int ret;

530
    ret = bdrv_open_backing_file(s->target, NULL, &local_err);
P
Paolo Bonzini 已提交
531
    if (ret < 0) {
532
        error_propagate(errp, local_err);
P
Paolo Bonzini 已提交
533 534 535 536 537 538 539 540 541 542 543
        return;
    }
    if (!s->synced) {
        error_set(errp, QERR_BLOCK_JOB_NOT_READY, job->bs->device_name);
        return;
    }

    s->should_complete = true;
    block_job_resume(job);
}

544
static const BlockJobDriver mirror_job_driver = {
P
Paolo Bonzini 已提交
545
    .instance_size = sizeof(MirrorBlockJob),
F
Fam Zheng 已提交
546
    .job_type      = BLOCK_JOB_TYPE_MIRROR,
P
Paolo Bonzini 已提交
547
    .set_speed     = mirror_set_speed,
548
    .iostatus_reset= mirror_iostatus_reset,
P
Paolo Bonzini 已提交
549
    .complete      = mirror_complete,
P
Paolo Bonzini 已提交
550 551
};

F
Fam Zheng 已提交
552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569
static const BlockJobDriver commit_active_job_driver = {
    .instance_size = sizeof(MirrorBlockJob),
    .job_type      = BLOCK_JOB_TYPE_COMMIT,
    .set_speed     = mirror_set_speed,
    .iostatus_reset
                   = mirror_iostatus_reset,
    .complete      = mirror_complete,
};

static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
                            int64_t speed, int64_t granularity,
                            int64_t buf_size,
                            BlockdevOnError on_source_error,
                            BlockdevOnError on_target_error,
                            BlockDriverCompletionFunc *cb,
                            void *opaque, Error **errp,
                            const BlockJobDriver *driver,
                            bool is_none_mode, BlockDriverState *base)
P
Paolo Bonzini 已提交
570 571 572
{
    MirrorBlockJob *s;

573 574 575 576 577 578 579 580 581 582 583 584 585 586
    if (granularity == 0) {
        /* Choose the default granularity based on the target file's cluster
         * size, clamped between 4k and 64k.  */
        BlockDriverInfo bdi;
        if (bdrv_get_info(target, &bdi) >= 0 && bdi.cluster_size != 0) {
            granularity = MAX(4096, bdi.cluster_size);
            granularity = MIN(65536, granularity);
        } else {
            granularity = 65536;
        }
    }

    assert ((granularity & (granularity - 1)) == 0);

587 588 589 590 591 592 593
    if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
         on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
        !bdrv_iostatus_is_enabled(bs)) {
        error_set(errp, QERR_INVALID_PARAMETER, "on-source-error");
        return;
    }

F
Fam Zheng 已提交
594

F
Fam Zheng 已提交
595
    s = block_job_create(driver, bs, speed, cb, opaque, errp);
P
Paolo Bonzini 已提交
596 597 598 599
    if (!s) {
        return;
    }

600 601
    s->on_source_error = on_source_error;
    s->on_target_error = on_target_error;
P
Paolo Bonzini 已提交
602
    s->target = target;
F
Fam Zheng 已提交
603
    s->is_none_mode = is_none_mode;
F
Fam Zheng 已提交
604
    s->base = base;
605
    s->granularity = granularity;
606
    s->buf_size = MAX(buf_size, granularity);
607

608 609 610 611
    s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity, errp);
    if (!s->dirty_bitmap) {
        return;
    }
P
Paolo Bonzini 已提交
612
    bdrv_set_enable_write_cache(s->target, true);
613 614
    bdrv_set_on_error(s->target, on_target_error, on_target_error);
    bdrv_iostatus_enable(s->target);
P
Paolo Bonzini 已提交
615 616 617 618
    s->common.co = qemu_coroutine_create(mirror_run);
    trace_mirror_start(bs, s, s->common.co, opaque);
    qemu_coroutine_enter(s->common.co, s);
}
F
Fam Zheng 已提交
619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642

void mirror_start(BlockDriverState *bs, BlockDriverState *target,
                  int64_t speed, int64_t granularity, int64_t buf_size,
                  MirrorSyncMode mode, BlockdevOnError on_source_error,
                  BlockdevOnError on_target_error,
                  BlockDriverCompletionFunc *cb,
                  void *opaque, Error **errp)
{
    bool is_none_mode;
    BlockDriverState *base;

    is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
    base = mode == MIRROR_SYNC_MODE_TOP ? bs->backing_hd : NULL;
    mirror_start_job(bs, target, speed, granularity, buf_size,
                     on_source_error, on_target_error, cb, opaque, errp,
                     &mirror_job_driver, is_none_mode, base);
}

void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
                         int64_t speed,
                         BlockdevOnError on_error,
                         BlockDriverCompletionFunc *cb,
                         void *opaque, Error **errp)
{
643 644
    int64_t length, base_length;
    int orig_base_flags;
645
    int ret;
646
    Error *local_err = NULL;
647 648 649

    orig_base_flags = bdrv_get_flags(base);

F
Fam Zheng 已提交
650 651 652
    if (bdrv_reopen(base, bs->open_flags, errp)) {
        return;
    }
653 654 655

    length = bdrv_getlength(bs);
    if (length < 0) {
656 657
        error_setg_errno(errp, -length,
                         "Unable to determine length of %s", bs->filename);
658 659 660 661 662
        goto error_restore_flags;
    }

    base_length = bdrv_getlength(base);
    if (base_length < 0) {
663 664
        error_setg_errno(errp, -base_length,
                         "Unable to determine length of %s", base->filename);
665 666 667 668
        goto error_restore_flags;
    }

    if (length > base_length) {
669 670 671 672
        ret = bdrv_truncate(base, length);
        if (ret < 0) {
            error_setg_errno(errp, -ret,
                            "Top image %s is larger than base image %s, and "
673 674 675 676 677 678
                             "resize of base image failed",
                             bs->filename, base->filename);
            goto error_restore_flags;
        }
    }

F
Fam Zheng 已提交
679
    bdrv_ref(base);
F
Fam Zheng 已提交
680
    mirror_start_job(bs, base, speed, 0, 0,
681
                     on_error, on_error, cb, opaque, &local_err,
F
Fam Zheng 已提交
682
                     &commit_active_job_driver, false, base);
683 684
    if (error_is_set(&local_err)) {
        error_propagate(errp, local_err);
685 686 687 688 689 690 691 692 693 694
        goto error_restore_flags;
    }

    return;

error_restore_flags:
    /* ignore error and errp for bdrv_reopen, because we want to propagate
     * the original error */
    bdrv_reopen(base, orig_base_flags, NULL);
    return;
F
Fam Zheng 已提交
695
}