diff --git a/block/qed.c b/block/qed.c index c8c59304480a745f67dcd818bd27457bea2ccc6f..d8d6ea2126239894d18a28c94bd861779bdcc71a 100644 --- a/block/qed.c +++ b/block/qed.c @@ -12,6 +12,7 @@ * */ +#include "qemu-timer.h" #include "trace.h" #include "qed.h" #include "qerror.h" @@ -291,6 +292,88 @@ static CachedL2Table *qed_new_l2_table(BDRVQEDState *s) static void qed_aio_next_io(void *opaque, int ret); +static void qed_plug_allocating_write_reqs(BDRVQEDState *s) +{ + assert(!s->allocating_write_reqs_plugged); + + s->allocating_write_reqs_plugged = true; +} + +static void qed_unplug_allocating_write_reqs(BDRVQEDState *s) +{ + QEDAIOCB *acb; + + assert(s->allocating_write_reqs_plugged); + + s->allocating_write_reqs_plugged = false; + + acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs); + if (acb) { + qed_aio_next_io(acb, 0); + } +} + +static void qed_finish_clear_need_check(void *opaque, int ret) +{ + /* Do nothing */ +} + +static void qed_flush_after_clear_need_check(void *opaque, int ret) +{ + BDRVQEDState *s = opaque; + + bdrv_aio_flush(s->bs, qed_finish_clear_need_check, s); + + /* No need to wait until flush completes */ + qed_unplug_allocating_write_reqs(s); +} + +static void qed_clear_need_check(void *opaque, int ret) +{ + BDRVQEDState *s = opaque; + + if (ret) { + qed_unplug_allocating_write_reqs(s); + return; + } + + s->header.features &= ~QED_F_NEED_CHECK; + qed_write_header(s, qed_flush_after_clear_need_check, s); +} + +static void qed_need_check_timer_cb(void *opaque) +{ + BDRVQEDState *s = opaque; + + /* The timer should only fire when allocating writes have drained */ + assert(!QSIMPLEQ_FIRST(&s->allocating_write_reqs)); + + trace_qed_need_check_timer_cb(s); + + qed_plug_allocating_write_reqs(s); + + /* Ensure writes are on disk before clearing flag */ + bdrv_aio_flush(s->bs, qed_clear_need_check, s); +} + +static void qed_start_need_check_timer(BDRVQEDState *s) +{ + trace_qed_start_need_check_timer(s); + + /* Use vm_clock so we don't alter the image file while suspended for + * migration. + */ + qemu_mod_timer(s->need_check_timer, qemu_get_clock_ns(vm_clock) + + get_ticks_per_sec() * QED_NEED_CHECK_TIMEOUT); +} + +/* It's okay to call this multiple times or when no timer is started */ +static void qed_cancel_need_check_timer(BDRVQEDState *s) +{ + trace_qed_cancel_need_check_timer(s); + qemu_del_timer(s->need_check_timer); +} + static int bdrv_qed_open(BlockDriverState *bs, int flags) { BDRVQEDState *s = bs->opaque; @@ -406,7 +489,10 @@ static int bdrv_qed_open(BlockDriverState *bs, int flags) BdrvCheckResult result = {0}; ret = qed_check(s, &result, true); - if (!ret && !result.corruptions && !result.check_errors) { + if (ret) { + goto out; + } + if (!result.corruptions && !result.check_errors) { /* Ensure fixes reach storage before clearing check bit */ bdrv_flush(s->bs); @@ -416,6 +502,9 @@ static int bdrv_qed_open(BlockDriverState *bs, int flags) } } + s->need_check_timer = qemu_new_timer_ns(vm_clock, + qed_need_check_timer_cb, s); + out: if (ret) { qed_free_l2_cache(&s->l2_cache); @@ -428,6 +517,9 @@ static void bdrv_qed_close(BlockDriverState *bs) { BDRVQEDState *s = bs->opaque; + qed_cancel_need_check_timer(s); + qemu_free_timer(s->need_check_timer); + /* Ensure writes reach stable storage */ bdrv_flush(bs->file); @@ -809,6 +901,8 @@ static void qed_aio_complete(QEDAIOCB *acb, int ret) acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs); if (acb) { qed_aio_next_io(acb, 0); + } else if (s->header.features & QED_F_NEED_CHECK) { + qed_start_need_check_timer(s); } } } @@ -1014,11 +1108,17 @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len) { BDRVQEDState *s = acb_to_s(acb); + /* Cancel timer when the first allocating request comes in */ + if (QSIMPLEQ_EMPTY(&s->allocating_write_reqs)) { + qed_cancel_need_check_timer(s); + } + /* Freeze this request if another allocating write is in progress */ if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs)) { QSIMPLEQ_INSERT_TAIL(&s->allocating_write_reqs, acb, next); } - if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs)) { + if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs) || + s->allocating_write_reqs_plugged) { return; /* wait for existing request to finish */ } diff --git a/block/qed.h b/block/qed.h index 1d1421fee1230e3c7f0262939cd878cae779cac6..388fdb3760d31d892493ea1d01f2f399340bf026 100644 --- a/block/qed.h +++ b/block/qed.h @@ -78,6 +78,9 @@ enum { QED_MIN_TABLE_SIZE = 1, /* in clusters */ QED_MAX_TABLE_SIZE = 16, QED_DEFAULT_TABLE_SIZE = 4, + + /* Delay to flush and clean image after last allocating write completes */ + QED_NEED_CHECK_TIMEOUT = 5, /* in seconds */ }; typedef struct { @@ -157,6 +160,10 @@ typedef struct { /* Allocating write request queue */ QSIMPLEQ_HEAD(, QEDAIOCB) allocating_write_reqs; + bool allocating_write_reqs_plugged; + + /* Periodic flush and clear need check flag */ + QEMUTimer *need_check_timer; } BDRVQEDState; enum { diff --git a/trace-events b/trace-events index a00b63cefd335ba577fa270e1b307854d2ea8899..385cb000090d24512444d86f4a213e6efd20a2c4 100644 --- a/trace-events +++ b/trace-events @@ -220,6 +220,9 @@ disable qed_write_table(void *s, uint64_t offset, void *table, unsigned int inde disable qed_write_table_cb(void *s, void *table, int flush, int ret) "s %p table %p flush %d ret %d" # block/qed.c +disable qed_need_check_timer_cb(void *s) "s %p" +disable qed_start_need_check_timer(void *s) "s %p" +disable qed_cancel_need_check_timer(void *s) "s %p" disable qed_aio_complete(void *s, void *acb, int ret) "s %p acb %p ret %d" disable qed_aio_setup(void *s, void *acb, int64_t sector_num, int nb_sectors, void *opaque, int is_write) "s %p acb %p sector_num %"PRId64" nb_sectors %d opaque %p is_write %d" disable qed_aio_next_io(void *s, void *acb, int ret, uint64_t cur_pos) "s %p acb %p ret %d cur_pos %"PRIu64""