diff --git a/block/mirror.c b/block/mirror.c index 896972c297e35d030c03c3fa8a846e5b3b318faf..4696dc84ea0d23b02133e5357888380054418aab 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -17,7 +17,15 @@ #include "qemu/ratelimit.h" #include "qemu/bitmap.h" -#define SLICE_TIME 100000000ULL /* ns */ +#define SLICE_TIME 100000000ULL /* ns */ +#define MAX_IN_FLIGHT 16 + +/* The mirroring buffer is a list of granularity-sized chunks. + * Free chunks are organized in a list. + */ +typedef struct MirrorBuffer { + QSIMPLEQ_ENTRY(MirrorBuffer) next; +} MirrorBuffer; typedef struct MirrorBlockJob { BlockJob common; @@ -33,7 +41,10 @@ typedef struct MirrorBlockJob { unsigned long *cow_bitmap; HBitmapIter hbi; uint8_t *buf; + QSIMPLEQ_HEAD(, MirrorBuffer) buf_free; + int buf_free_count; + unsigned long *in_flight_bitmap; int in_flight; int ret; } MirrorBlockJob; @@ -41,7 +52,6 @@ typedef struct MirrorBlockJob { typedef struct MirrorOp { MirrorBlockJob *s; QEMUIOVector qiov; - struct iovec iov; int64_t sector_num; int nb_sectors; } MirrorOp; @@ -62,15 +72,24 @@ static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read, static void mirror_iteration_done(MirrorOp *op, int ret) { MirrorBlockJob *s = op->s; + struct iovec *iov; int64_t chunk_num; - int nb_chunks, sectors_per_chunk; + int i, nb_chunks, sectors_per_chunk; trace_mirror_iteration_done(s, op->sector_num, op->nb_sectors, ret); s->in_flight--; + iov = op->qiov.iov; + for (i = 0; i < op->qiov.niov; i++) { + MirrorBuffer *buf = (MirrorBuffer *) iov[i].iov_base; + QSIMPLEQ_INSERT_TAIL(&s->buf_free, buf, next); + s->buf_free_count++; + } + sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS; chunk_num = op->sector_num / sectors_per_chunk; nb_chunks = op->nb_sectors / sectors_per_chunk; + bitmap_clear(s->in_flight_bitmap, chunk_num, nb_chunks); if (s->cow_bitmap && ret >= 0) { bitmap_set(s->cow_bitmap, chunk_num, nb_chunks); } @@ -120,8 +139,8 @@ static void mirror_read_complete(void *opaque, int ret) static void coroutine_fn mirror_iteration(MirrorBlockJob *s) { BlockDriverState *source = s->common.bs; - int nb_sectors, sectors_per_chunk; - int64_t end, sector_num, chunk_num; + int nb_sectors, sectors_per_chunk, nb_chunks; + int64_t end, sector_num, chunk_num, next_sector, hbitmap_next_sector; MirrorOp *op; s->sector_num = hbitmap_iter_next(&s->hbi); @@ -132,6 +151,8 @@ static void coroutine_fn mirror_iteration(MirrorBlockJob *s) assert(s->sector_num >= 0); } + hbitmap_next_sector = s->sector_num; + /* If we have no backing file yet in the destination, and the cluster size * is very large, we need to do COW ourselves. The first time a cluster is * copied, copy it entirely. @@ -147,19 +168,56 @@ static void coroutine_fn mirror_iteration(MirrorBlockJob *s) bdrv_round_to_clusters(s->target, sector_num, sectors_per_chunk, §or_num, &nb_sectors); + + /* The rounding may make us copy sectors before the + * first dirty one. + */ + chunk_num = sector_num / sectors_per_chunk; + } + + /* Wait for I/O to this cluster (from a previous iteration) to be done. */ + while (test_bit(chunk_num, s->in_flight_bitmap)) { + trace_mirror_yield_in_flight(s, sector_num, s->in_flight); + qemu_coroutine_yield(); } end = s->common.len >> BDRV_SECTOR_BITS; nb_sectors = MIN(nb_sectors, end - sector_num); + nb_chunks = (nb_sectors + sectors_per_chunk - 1) / sectors_per_chunk; + while (s->buf_free_count < nb_chunks) { + trace_mirror_yield_buf_busy(s, nb_chunks, s->in_flight); + qemu_coroutine_yield(); + } + + /* We have enough free space to copy these sectors. */ + bitmap_set(s->in_flight_bitmap, chunk_num, nb_chunks); /* Allocate a MirrorOp that is used as an AIO callback. */ op = g_slice_new(MirrorOp); op->s = s; - op->iov.iov_base = s->buf; - op->iov.iov_len = nb_sectors * 512; op->sector_num = sector_num; op->nb_sectors = nb_sectors; - qemu_iovec_init_external(&op->qiov, &op->iov, 1); + + /* Now make a QEMUIOVector taking enough granularity-sized chunks + * from s->buf_free. + */ + qemu_iovec_init(&op->qiov, nb_chunks); + next_sector = sector_num; + while (nb_chunks-- > 0) { + MirrorBuffer *buf = QSIMPLEQ_FIRST(&s->buf_free); + QSIMPLEQ_REMOVE_HEAD(&s->buf_free, next); + s->buf_free_count--; + qemu_iovec_add(&op->qiov, buf, s->granularity); + + /* Advance the HBitmapIter in parallel, so that we do not examine + * the same sector twice. + */ + if (next_sector > hbitmap_next_sector && bdrv_get_dirty(source, next_sector)) { + hbitmap_next_sector = hbitmap_iter_next(&s->hbi); + } + + next_sector += sectors_per_chunk; + } bdrv_reset_dirty(source, sector_num, nb_sectors); @@ -170,6 +228,23 @@ static void coroutine_fn mirror_iteration(MirrorBlockJob *s) mirror_read_complete, op); } +static void mirror_free_init(MirrorBlockJob *s) +{ + int granularity = s->granularity; + size_t buf_size = s->buf_size; + uint8_t *buf = s->buf; + + assert(s->buf_free_count == 0); + QSIMPLEQ_INIT(&s->buf_free); + while (buf_size != 0) { + MirrorBuffer *cur = (MirrorBuffer *)buf; + QSIMPLEQ_INSERT_TAIL(&s->buf_free, cur, next); + s->buf_free_count++; + buf_size -= granularity; + buf += granularity; + } +} + static void mirror_drain(MirrorBlockJob *s) { while (s->in_flight > 0) { @@ -198,6 +273,9 @@ static void coroutine_fn mirror_run(void *opaque) return; } + length = (bdrv_getlength(bs) + s->granularity - 1) / s->granularity; + s->in_flight_bitmap = bitmap_new(length); + /* If we have no backing file yet in the destination, we cannot let * the destination do COW. Instead, we copy sectors around the * dirty data if needed. We need a bitmap to do that. @@ -208,7 +286,6 @@ static void coroutine_fn mirror_run(void *opaque) bdrv_get_info(s->target, &bdi); if (s->granularity < bdi.cluster_size) { s->buf_size = MAX(s->buf_size, bdi.cluster_size); - length = (bdrv_getlength(bs) + s->granularity - 1) / s->granularity; s->cow_bitmap = bitmap_new(length); } } @@ -216,6 +293,7 @@ static void coroutine_fn mirror_run(void *opaque) end = s->common.len >> BDRV_SECTOR_BITS; s->buf = qemu_blockalign(bs, s->buf_size); sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS; + mirror_free_init(s); if (s->mode != MIRROR_SYNC_MODE_NONE) { /* First part, loop on the sectors and initialize the dirty bitmap. */ @@ -261,8 +339,9 @@ static void coroutine_fn mirror_run(void *opaque) */ if (qemu_get_clock_ns(rt_clock) - last_pause_ns < SLICE_TIME && s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) { - if (s->in_flight > 0) { - trace_mirror_yield(s, s->in_flight, cnt); + if (s->in_flight == MAX_IN_FLIGHT || s->buf_free_count == 0 || + (cnt == 0 && s->in_flight > 0)) { + trace_mirror_yield(s, s->in_flight, s->buf_free_count, cnt); qemu_coroutine_yield(); continue; } else if (cnt != 0) { @@ -354,6 +433,7 @@ immediate_exit: assert(s->in_flight == 0); qemu_vfree(s->buf); g_free(s->cow_bitmap); + g_free(s->in_flight_bitmap); bdrv_set_dirty_tracking(bs, 0); bdrv_iostatus_disable(s->target); if (s->should_complete && ret == 0) { diff --git a/trace-events b/trace-events index 43e6b73fa59fbd48abeb6ed6f3b70d01f68e1e92..b4a23cdc2e7fb6cfc8f1a05b360acd7e9cb508db 100644 --- a/trace-events +++ b/trace-events @@ -86,7 +86,9 @@ mirror_before_sleep(void *s, int64_t cnt, int synced) "s %p dirty count %"PRId64 mirror_one_iteration(void *s, int64_t sector_num, int nb_sectors) "s %p sector_num %"PRId64" nb_sectors %d" mirror_cow(void *s, int64_t sector_num) "s %p sector_num %"PRId64 mirror_iteration_done(void *s, int64_t sector_num, int nb_sectors, int ret) "s %p sector_num %"PRId64" nb_sectors %d ret %d" -mirror_yield(void *s, int64_t cnt, int in_flight) "s %p dirty count %"PRId64" in_flight %d" +mirror_yield(void *s, int64_t cnt, int buf_free_count, int in_flight) "s %p dirty count %"PRId64" free buffers %d in_flight %d" +mirror_yield_in_flight(void *s, int64_t sector_num, int in_flight) "s %p sector_num %"PRId64" in_flight %d" +mirror_yield_buf_busy(void *s, int nb_chunks, int in_flight) "s %p requested chunks %d in_flight %d" # blockdev.c qmp_block_job_cancel(void *job) "job %p"