diff --git a/include/migration/migration.h b/include/migration/migration.h index 7528cc2fbc82983810780e9c4fbe78ceae5d4be2..b9b706a7e31522c10c8a4a343de617cc4fea83de 100644 --- a/include/migration/migration.h +++ b/include/migration/migration.h @@ -304,6 +304,7 @@ int migrate_add_blocker(Error *reason, Error **errp); */ void migrate_del_blocker(Error *reason); +bool migrate_release_ram(void); bool migrate_postcopy_ram(void); bool migrate_zero_blocks(void); diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h index abedd466c945d4bd8a96aec519787338054a5231..0cd648a733e0a3d2b0c0d59fd2fc5215af4a191f 100644 --- a/include/migration/qemu-file.h +++ b/include/migration/qemu-file.h @@ -132,7 +132,8 @@ void qemu_put_byte(QEMUFile *f, int v); * put_buffer without copying the buffer. * The buffer should be available till it is sent asynchronously. */ -void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, size_t size); +void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, size_t size, + bool may_free); bool qemu_file_mode_is_not_valid(const char *mode); bool qemu_file_is_writable(QEMUFile *f); diff --git a/migration/migration.c b/migration/migration.c index 2b179c69fac94d23beab9b9aed9b19d349dca49c..68afc070167049c4eee27380416e9eaca287cd81 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1297,6 +1297,15 @@ void qmp_migrate_set_downtime(double value, Error **errp) qmp_migrate_set_parameters(&p, errp); } +bool migrate_release_ram(void) +{ + MigrationState *s; + + s = migrate_get_current(); + + return s->enabled_capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; +} + bool migrate_postcopy_ram(void) { MigrationState *s; diff --git a/migration/qemu-file.c b/migration/qemu-file.c index e9fae3115882aca356971032602304cfed580deb..195fa94fcf3e8792f4e6d8b820423924ade5eb98 100644 --- a/migration/qemu-file.c +++ b/migration/qemu-file.c @@ -49,6 +49,7 @@ struct QEMUFile { int buf_size; /* 0 when writing */ uint8_t buf[IO_BUF_SIZE]; + DECLARE_BITMAP(may_free, MAX_IOV_SIZE); struct iovec iov[MAX_IOV_SIZE]; unsigned int iovcnt; @@ -132,6 +133,41 @@ bool qemu_file_is_writable(QEMUFile *f) return f->ops->writev_buffer; } +static void qemu_iovec_release_ram(QEMUFile *f) +{ + struct iovec iov; + unsigned long idx; + + /* Find and release all the contiguous memory ranges marked as may_free. */ + idx = find_next_bit(f->may_free, f->iovcnt, 0); + if (idx >= f->iovcnt) { + return; + } + iov = f->iov[idx]; + + /* The madvise() in the loop is called for iov within a continuous range and + * then reinitialize the iov. And in the end, madvise() is called for the + * last iov. + */ + while ((idx = find_next_bit(f->may_free, f->iovcnt, idx + 1)) < f->iovcnt) { + /* check for adjacent buffer and coalesce them */ + if (iov.iov_base + iov.iov_len == f->iov[idx].iov_base) { + iov.iov_len += f->iov[idx].iov_len; + continue; + } + if (qemu_madvise(iov.iov_base, iov.iov_len, QEMU_MADV_DONTNEED) < 0) { + error_report("migrate: madvise DONTNEED failed %p %zd: %s", + iov.iov_base, iov.iov_len, strerror(errno)); + } + iov = f->iov[idx]; + } + if (qemu_madvise(iov.iov_base, iov.iov_len, QEMU_MADV_DONTNEED) < 0) { + error_report("migrate: madvise DONTNEED failed %p %zd: %s", + iov.iov_base, iov.iov_len, strerror(errno)); + } + memset(f->may_free, 0, sizeof(f->may_free)); +} + /** * Flushes QEMUFile buffer * @@ -151,6 +187,8 @@ void qemu_fflush(QEMUFile *f) if (f->iovcnt > 0) { expect = iov_size(f->iov, f->iovcnt); ret = f->ops->writev_buffer(f->opaque, f->iov, f->iovcnt, f->pos); + + qemu_iovec_release_ram(f); } if (ret >= 0) { @@ -304,13 +342,19 @@ int qemu_fclose(QEMUFile *f) return ret; } -static void add_to_iovec(QEMUFile *f, const uint8_t *buf, size_t size) +static void add_to_iovec(QEMUFile *f, const uint8_t *buf, size_t size, + bool may_free) { /* check for adjacent buffer and coalesce them */ if (f->iovcnt > 0 && buf == f->iov[f->iovcnt - 1].iov_base + - f->iov[f->iovcnt - 1].iov_len) { + f->iov[f->iovcnt - 1].iov_len && + may_free == test_bit(f->iovcnt - 1, f->may_free)) + { f->iov[f->iovcnt - 1].iov_len += size; } else { + if (may_free) { + set_bit(f->iovcnt, f->may_free); + } f->iov[f->iovcnt].iov_base = (uint8_t *)buf; f->iov[f->iovcnt++].iov_len = size; } @@ -320,14 +364,15 @@ static void add_to_iovec(QEMUFile *f, const uint8_t *buf, size_t size) } } -void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, size_t size) +void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, size_t size, + bool may_free) { if (f->last_error) { return; } f->bytes_xfer += size; - add_to_iovec(f, buf, size); + add_to_iovec(f, buf, size, may_free); } void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, size_t size) @@ -345,7 +390,7 @@ void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, size_t size) } memcpy(f->buf + f->buf_index, buf, l); f->bytes_xfer += l; - add_to_iovec(f, f->buf + f->buf_index, l); + add_to_iovec(f, f->buf + f->buf_index, l, false); f->buf_index += l; if (f->buf_index == IO_BUF_SIZE) { qemu_fflush(f); @@ -366,7 +411,7 @@ void qemu_put_byte(QEMUFile *f, int v) f->buf[f->buf_index] = v; f->bytes_xfer++; - add_to_iovec(f, f->buf + f->buf_index, 1); + add_to_iovec(f, f->buf + f->buf_index, 1, false); f->buf_index++; if (f->buf_index == IO_BUF_SIZE) { qemu_fflush(f); @@ -647,7 +692,7 @@ ssize_t qemu_put_compression_data(QEMUFile *f, const uint8_t *p, size_t size, } qemu_put_be32(f, blen); if (f->ops->writev_buffer) { - add_to_iovec(f, f->buf + f->buf_index, blen); + add_to_iovec(f, f->buf + f->buf_index, blen, false); } f->buf_index += blen; if (f->buf_index == IO_BUF_SIZE) { diff --git a/migration/ram.c b/migration/ram.c index 91443b39617a5244d4fbdd25237b1d213a415b8e..c22209db30545b98a559180085426b359ad5f21c 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -705,6 +705,16 @@ static int save_zero_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset, return pages; } +static void ram_release_pages(MigrationState *ms, const char *block_name, + uint64_t offset, int pages) +{ + if (!migrate_release_ram() || !migration_in_postcopy(ms)) { + return; + } + + ram_discard_range(NULL, block_name, offset, pages << TARGET_PAGE_BITS); +} + /** * ram_save_page: Send the given page to the stream * @@ -765,6 +775,7 @@ static int ram_save_page(MigrationState *ms, QEMUFile *f, PageSearchStatus *pss, * page would be stale */ xbzrle_cache_zero_page(current_addr); + ram_release_pages(ms, block->idstr, pss->offset, pages); } else if (!ram_bulk_stage && !migration_in_postcopy(ms) && migrate_use_xbzrle()) { pages = save_xbzrle_page(f, &p, current_addr, block, @@ -783,7 +794,9 @@ static int ram_save_page(MigrationState *ms, QEMUFile *f, PageSearchStatus *pss, *bytes_transferred += save_page_header(f, block, offset | RAM_SAVE_FLAG_PAGE); if (send_async) { - qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE); + qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE, + migrate_release_ram() & + migration_in_postcopy(ms)); } else { qemu_put_buffer(f, p, TARGET_PAGE_SIZE); } @@ -813,6 +826,8 @@ static int do_compress_ram_page(QEMUFile *f, RAMBlock *block, error_report("compressed data failed!"); } else { bytes_sent += blen; + ram_release_pages(migrate_get_current(), block->idstr, + offset & TARGET_PAGE_MASK, 1); } return bytes_sent; @@ -952,12 +967,17 @@ static int ram_save_compressed_page(MigrationState *ms, QEMUFile *f, error_report("compressed data failed!"); } } + if (pages > 0) { + ram_release_pages(ms, block->idstr, pss->offset, pages); + } } else { offset |= RAM_SAVE_FLAG_CONTINUE; pages = save_zero_page(f, block, offset, p, bytes_transferred); if (pages == -1) { pages = compress_page_with_multi_thread(f, block, offset, bytes_transferred); + } else { + ram_release_pages(ms, block->idstr, pss->offset, pages); } } } diff --git a/qapi-schema.json b/qapi-schema.json index 61151f34d0cfff3d653782d775ae1e153fb07b7f..93305412dd47f0dd5000721b3610b67c193dcd95 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -865,11 +865,14 @@ # side, this process is called COarse-Grain LOck Stepping (COLO) for # Non-stop Service. (since 2.8) # +# @release-ram: if enabled, qemu will free the migrated ram pages on the source +# during postcopy-ram migration. (since 2.9) +# # Since: 1.2 ## { 'enum': 'MigrationCapability', 'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks', - 'compress', 'events', 'postcopy-ram', 'x-colo'] } + 'compress', 'events', 'postcopy-ram', 'x-colo', 'release-ram'] } ## # @MigrationCapabilityStatus: