block-dirty-bitmap.c 26.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
/*
 * Block dirty bitmap postcopy migration
 *
 * Copyright IBM, Corp. 2009
 * Copyright (c) 2016-2017 Virtuozzo International GmbH. All rights reserved.
 *
 * Authors:
 *  Liran Schour   <lirans@il.ibm.com>
 *  Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
 *
 * This work is licensed under the terms of the GNU GPL, version 2.  See
 * the COPYING file in the top-level directory.
 * This file is derived from migration/block.c, so it's author and IBM copyright
 * are here, although content is quite different.
 *
 * Contributions after 2012-01-13 are licensed under the terms of the
 * GNU GPL, version 2 or (at your option) any later version.
 *
 *                                ***
 *
 * Here postcopy migration of dirty bitmaps is realized. Only QMP-addressable
 * bitmaps are migrated.
 *
 * Bitmap migration implies creating bitmap with the same name and granularity
 * in destination QEMU. If the bitmap with the same name (for the same node)
 * already exists on destination an error will be generated.
 *
 * format of migration:
 *
 * # Header (shared for different chunk types)
 * 1, 2 or 4 bytes: flags (see qemu_{put,put}_flags)
 * [ 1 byte: node name size ] \  flags & DEVICE_NAME
 * [ n bytes: node name     ] /
 * [ 1 byte: bitmap name size ] \  flags & BITMAP_NAME
 * [ n bytes: bitmap name     ] /
 *
 * # Start of bitmap migration (flags & START)
 * header
 * be64: granularity
 * 1 byte: bitmap flags (corresponds to BdrvDirtyBitmap)
 *   bit 0    -  bitmap is enabled
 *   bit 1    -  bitmap is persistent
 *   bit 2    -  bitmap is autoloading
 *   bits 3-7 - reserved, must be zero
 *
 * # Complete of bitmap migration (flags & COMPLETE)
 * header
 *
 * # Data chunk of bitmap migration
 * header
 * be64: start sector
 * be32: number of sectors
 * [ be64: buffer size  ] \ ! (flags & ZEROES)
 * [ n bytes: buffer    ] /
 *
 * The last chunk in stream should contain flags & EOS. The chunk may skip
 * device and/or bitmap names, assuming them to be the same with the previous
 * chunk.
 */

#include "qemu/osdep.h"
#include "block/block.h"
#include "block/block_int.h"
#include "sysemu/block-backend.h"
65
#include "sysemu/runstate.h"
66 67 68 69
#include "qemu/main-loop.h"
#include "qemu/error-report.h"
#include "migration/misc.h"
#include "migration/migration.h"
70
#include "qemu-file.h"
71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99
#include "migration/vmstate.h"
#include "migration/register.h"
#include "qemu/hbitmap.h"
#include "qemu/cutils.h"
#include "qapi/error.h"
#include "trace.h"

#define CHUNK_SIZE     (1 << 10)

/* Flags occupy one, two or four bytes (Big Endian). The size is determined as
 * follows:
 * in first (most significant) byte bit 8 is clear  -->  one byte
 * in first byte bit 8 is set    -->  two or four bytes, depending on second
 *                                    byte:
 *    | in second byte bit 8 is clear  -->  two bytes
 *    | in second byte bit 8 is set    -->  four bytes
 */
#define DIRTY_BITMAP_MIG_FLAG_EOS           0x01
#define DIRTY_BITMAP_MIG_FLAG_ZEROES        0x02
#define DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME   0x04
#define DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME   0x08
#define DIRTY_BITMAP_MIG_FLAG_START         0x10
#define DIRTY_BITMAP_MIG_FLAG_COMPLETE      0x20
#define DIRTY_BITMAP_MIG_FLAG_BITS          0x40

#define DIRTY_BITMAP_MIG_EXTRA_FLAGS        0x80

#define DIRTY_BITMAP_MIG_START_FLAG_ENABLED          0x01
#define DIRTY_BITMAP_MIG_START_FLAG_PERSISTENT       0x02
100
/* 0x04 was "AUTOLOAD" flags on older versions, now it is ignored */
101 102
#define DIRTY_BITMAP_MIG_START_FLAG_RESERVED_MASK    0xf8

103 104
/* State of one bitmap during save process */
typedef struct SaveBitmapState {
105 106 107 108 109 110
    /* Written during setup phase. */
    BlockDriverState *bs;
    const char *node_name;
    BdrvDirtyBitmap *bitmap;
    uint64_t total_sectors;
    uint64_t sectors_per_chunk;
111
    QSIMPLEQ_ENTRY(SaveBitmapState) entry;
112 113 114 115 116
    uint8_t flags;

    /* For bulk phase. */
    bool bulk_completed;
    uint64_t cur_sector;
117
} SaveBitmapState;
118

119 120 121
/* State of the dirty bitmap migration (DBM) during save process */
typedef struct DBMSaveState {
    QSIMPLEQ_HEAD(, SaveBitmapState) dbms_list;
122 123 124 125 126 127 128

    bool bulk_completed;
    bool no_bitmaps;

    /* for send_bitmap_bits() */
    BlockDriverState *prev_bs;
    BdrvDirtyBitmap *prev_bitmap;
129
} DBMSaveState;
130

131 132 133 134
typedef struct LoadBitmapState {
    BlockDriverState *bs;
    BdrvDirtyBitmap *bitmap;
    bool migrated;
135
    bool enabled;
136 137
} LoadBitmapState;

138 139
/* State of the dirty bitmap migration (DBM) during load process */
typedef struct DBMLoadState {
140 141 142 143 144
    uint32_t flags;
    char node_name[256];
    char bitmap_name[256];
    BlockDriverState *bs;
    BdrvDirtyBitmap *bitmap;
145

146 147
    bool before_vm_start_handled; /* set in dirty_bitmap_mig_before_vm_start */

148 149 150 151 152 153 154 155 156
    /*
     * cancelled
     * Incoming migration is cancelled for some reason. That means that we
     * still should read our chunks from migration stream, to not affect other
     * migration objects (like RAM), but just ignore them and do not touch any
     * bitmaps or nodes.
     */
    bool cancelled;

157 158
    GSList *bitmaps;
    QemuMutex lock; /* protect bitmaps */
159
} DBMLoadState;
160

161 162 163 164
typedef struct DBMState {
    DBMSaveState save;
    DBMLoadState load;
} DBMState;
165

166
static DBMState dbm_state;
167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182

static uint32_t qemu_get_bitmap_flags(QEMUFile *f)
{
    uint8_t flags = qemu_get_byte(f);
    if (flags & DIRTY_BITMAP_MIG_EXTRA_FLAGS) {
        flags = flags << 8 | qemu_get_byte(f);
        if (flags & DIRTY_BITMAP_MIG_EXTRA_FLAGS) {
            flags = flags << 16 | qemu_get_be16(f);
        }
    }

    return flags;
}

static void qemu_put_bitmap_flags(QEMUFile *f, uint32_t flags)
{
183
    /* The code currently does not send flags as more than one byte */
184 185 186 187 188
    assert(!(flags & (0xffffff00 | DIRTY_BITMAP_MIG_EXTRA_FLAGS)));

    qemu_put_byte(f, flags);
}

189 190
static void send_bitmap_header(QEMUFile *f, DBMSaveState *s,
                               SaveBitmapState *dbms, uint32_t additional_flags)
191 192 193 194 195 196
{
    BlockDriverState *bs = dbms->bs;
    BdrvDirtyBitmap *bitmap = dbms->bitmap;
    uint32_t flags = additional_flags;
    trace_send_bitmap_header_enter();

197 198
    if (bs != s->prev_bs) {
        s->prev_bs = bs;
199 200 201
        flags |= DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME;
    }

202 203
    if (bitmap != s->prev_bitmap) {
        s->prev_bitmap = bitmap;
204 205 206 207 208 209 210 211 212 213 214 215 216 217
        flags |= DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME;
    }

    qemu_put_bitmap_flags(f, flags);

    if (flags & DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME) {
        qemu_put_counted_string(f, dbms->node_name);
    }

    if (flags & DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME) {
        qemu_put_counted_string(f, bdrv_dirty_bitmap_name(bitmap));
    }
}

218 219
static void send_bitmap_start(QEMUFile *f, DBMSaveState *s,
                              SaveBitmapState *dbms)
220
{
221
    send_bitmap_header(f, s, dbms, DIRTY_BITMAP_MIG_FLAG_START);
222 223 224 225
    qemu_put_be32(f, bdrv_dirty_bitmap_granularity(dbms->bitmap));
    qemu_put_byte(f, dbms->flags);
}

226 227
static void send_bitmap_complete(QEMUFile *f, DBMSaveState *s,
                                 SaveBitmapState *dbms)
228
{
229
    send_bitmap_header(f, s, dbms, DIRTY_BITMAP_MIG_FLAG_COMPLETE);
230 231
}

232 233
static void send_bitmap_bits(QEMUFile *f, DBMSaveState *s,
                             SaveBitmapState *dbms,
234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257
                             uint64_t start_sector, uint32_t nr_sectors)
{
    /* align for buffer_is_zero() */
    uint64_t align = 4 * sizeof(long);
    uint64_t unaligned_size =
        bdrv_dirty_bitmap_serialization_size(
            dbms->bitmap, start_sector << BDRV_SECTOR_BITS,
            (uint64_t)nr_sectors << BDRV_SECTOR_BITS);
    uint64_t buf_size = QEMU_ALIGN_UP(unaligned_size, align);
    uint8_t *buf = g_malloc0(buf_size);
    uint32_t flags = DIRTY_BITMAP_MIG_FLAG_BITS;

    bdrv_dirty_bitmap_serialize_part(
        dbms->bitmap, buf, start_sector << BDRV_SECTOR_BITS,
        (uint64_t)nr_sectors << BDRV_SECTOR_BITS);

    if (buffer_is_zero(buf, buf_size)) {
        g_free(buf);
        buf = NULL;
        flags |= DIRTY_BITMAP_MIG_FLAG_ZEROES;
    }

    trace_send_bitmap_bits(flags, start_sector, nr_sectors, buf_size);

258
    send_bitmap_header(f, s, dbms, flags);
259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276

    qemu_put_be64(f, start_sector);
    qemu_put_be32(f, nr_sectors);

    /* if a block is zero we need to flush here since the network
     * bandwidth is now a lot higher than the storage device bandwidth.
     * thus if we queue zero blocks we slow down the migration. */
    if (flags & DIRTY_BITMAP_MIG_FLAG_ZEROES) {
        qemu_fflush(f);
    } else {
        qemu_put_be64(f, buf_size);
        qemu_put_buffer(f, buf, buf_size);
    }

    g_free(buf);
}

/* Called with iothread lock taken.  */
277
static void dirty_bitmap_do_save_cleanup(DBMSaveState *s)
278
{
279
    SaveBitmapState *dbms;
280

281 282
    while ((dbms = QSIMPLEQ_FIRST(&s->dbms_list)) != NULL) {
        QSIMPLEQ_REMOVE_HEAD(&s->dbms_list, entry);
283
        bdrv_dirty_bitmap_set_busy(dbms->bitmap, false);
284 285 286 287 288 289
        bdrv_unref(dbms->bs);
        g_free(dbms);
    }
}

/* Called with iothread lock taken. */
290 291
static int add_bitmaps_to_list(DBMSaveState *s, BlockDriverState *bs,
                               const char *bs_name)
292 293
{
    BdrvDirtyBitmap *bitmap;
294
    SaveBitmapState *dbms;
295
    Error *local_err = NULL;
296

297 298 299 300 301
    FOR_EACH_DIRTY_BITMAP(bs, bitmap) {
        if (bdrv_dirty_bitmap_name(bitmap)) {
            break;
        }
    }
302 303 304 305 306 307 308 309 310 311
    if (!bitmap) {
        return 0;
    }

    if (!bs_name || strcmp(bs_name, "") == 0) {
        error_report("Bitmap '%s' in unnamed node can't be migrated",
                     bdrv_dirty_bitmap_name(bitmap));
        return -1;
    }

312 313 314 315 316 317 318
    if (bs_name[0] == '#') {
        error_report("Bitmap '%s' in a node with auto-generated "
                     "name '%s' can't be migrated",
                     bdrv_dirty_bitmap_name(bitmap), bs_name);
        return -1;
    }

319 320 321 322 323 324 325 326 327 328 329 330 331
    FOR_EACH_DIRTY_BITMAP(bs, bitmap) {
        if (!bdrv_dirty_bitmap_name(bitmap)) {
            continue;
        }

        if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_DEFAULT, &local_err)) {
            error_report_err(local_err);
            return -1;
        }

        bdrv_ref(bs);
        bdrv_dirty_bitmap_set_busy(bitmap, true);

332
        dbms = g_new0(SaveBitmapState, 1);
333 334 335 336 337 338 339 340 341 342 343 344 345
        dbms->bs = bs;
        dbms->node_name = bs_name;
        dbms->bitmap = bitmap;
        dbms->total_sectors = bdrv_nb_sectors(bs);
        dbms->sectors_per_chunk = CHUNK_SIZE * 8 *
            bdrv_dirty_bitmap_granularity(bitmap) >> BDRV_SECTOR_BITS;
        if (bdrv_dirty_bitmap_enabled(bitmap)) {
            dbms->flags |= DIRTY_BITMAP_MIG_START_FLAG_ENABLED;
        }
        if (bdrv_dirty_bitmap_get_persistence(bitmap)) {
            dbms->flags |= DIRTY_BITMAP_MIG_START_FLAG_PERSISTENT;
        }

346
        QSIMPLEQ_INSERT_TAIL(&s->dbms_list, dbms, entry);
347 348 349 350 351 352
    }

    return 0;
}

/* Called with iothread lock taken. */
353
static int init_dirty_bitmap_migration(DBMSaveState *s)
354 355
{
    BlockDriverState *bs;
356
    SaveBitmapState *dbms;
357 358
    GHashTable *handled_by_blk = g_hash_table_new(NULL, NULL);
    BlockBackend *blk;
359

360 361 362 363
    s->bulk_completed = false;
    s->prev_bs = NULL;
    s->prev_bitmap = NULL;
    s->no_bitmaps = false;
364

365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391
    /*
     * Use blockdevice name for direct (or filtered) children of named block
     * backends.
     */
    for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
        const char *name = blk_name(blk);

        if (!name || strcmp(name, "") == 0) {
            continue;
        }

        bs = blk_bs(blk);

        /* Skip filters without bitmaps */
        while (bs && bs->drv && bs->drv->is_filter &&
               !bdrv_has_named_bitmaps(bs))
        {
            if (bs->backing) {
                bs = bs->backing->bs;
            } else if (bs->file) {
                bs = bs->file->bs;
            } else {
                bs = NULL;
            }
        }

        if (bs && bs->drv && !bs->drv->is_filter) {
392
            if (add_bitmaps_to_list(s, bs, name)) {
393 394 395 396 397 398
                goto fail;
            }
            g_hash_table_add(handled_by_blk, bs);
        }
    }

399
    for (bs = bdrv_next_all_states(NULL); bs; bs = bdrv_next_all_states(bs)) {
400 401 402 403
        if (g_hash_table_contains(handled_by_blk, bs)) {
            continue;
        }

404
        if (add_bitmaps_to_list(s, bs, bdrv_get_node_name(bs))) {
405
            goto fail;
406 407 408
        }
    }

409
    /* unset migration flags here, to not roll back it */
410
    QSIMPLEQ_FOREACH(dbms, &s->dbms_list, entry) {
411
        bdrv_dirty_bitmap_skip_store(dbms->bitmap, true);
412 413
    }

414 415
    if (QSIMPLEQ_EMPTY(&s->dbms_list)) {
        s->no_bitmaps = true;
416 417
    }

418 419
    g_hash_table_destroy(handled_by_blk);

420 421 422
    return 0;

fail:
423
    g_hash_table_destroy(handled_by_blk);
424
    dirty_bitmap_do_save_cleanup(s);
425 426 427 428 429

    return -1;
}

/* Called with no lock taken.  */
430 431
static void bulk_phase_send_chunk(QEMUFile *f, DBMSaveState *s,
                                  SaveBitmapState *dbms)
432 433 434 435
{
    uint32_t nr_sectors = MIN(dbms->total_sectors - dbms->cur_sector,
                             dbms->sectors_per_chunk);

436
    send_bitmap_bits(f, s, dbms, dbms->cur_sector, nr_sectors);
437 438 439 440 441 442 443 444

    dbms->cur_sector += nr_sectors;
    if (dbms->cur_sector >= dbms->total_sectors) {
        dbms->bulk_completed = true;
    }
}

/* Called with no lock taken.  */
445
static void bulk_phase(QEMUFile *f, DBMSaveState *s, bool limit)
446
{
447
    SaveBitmapState *dbms;
448

449
    QSIMPLEQ_FOREACH(dbms, &s->dbms_list, entry) {
450
        while (!dbms->bulk_completed) {
451
            bulk_phase_send_chunk(f, s, dbms);
452 453 454 455 456 457
            if (limit && qemu_file_rate_limit(f)) {
                return;
            }
        }
    }

458
    s->bulk_completed = true;
459 460 461 462 463
}

/* for SaveVMHandlers */
static void dirty_bitmap_save_cleanup(void *opaque)
{
464 465 466
    DBMSaveState *s = &((DBMState *)opaque)->save;

    dirty_bitmap_do_save_cleanup(s);
467 468 469 470
}

static int dirty_bitmap_save_iterate(QEMUFile *f, void *opaque)
{
471 472
    DBMSaveState *s = &((DBMState *)opaque)->save;

473 474
    trace_dirty_bitmap_save_iterate(migration_in_postcopy());

475 476
    if (migration_in_postcopy() && !s->bulk_completed) {
        bulk_phase(f, s, true);
477 478 479 480
    }

    qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS);

481
    return s->bulk_completed;
482 483 484 485 486 487
}

/* Called with iothread lock taken.  */

static int dirty_bitmap_save_complete(QEMUFile *f, void *opaque)
{
488
    DBMSaveState *s = &((DBMState *)opaque)->save;
489
    SaveBitmapState *dbms;
490 491
    trace_dirty_bitmap_save_complete_enter();

492 493
    if (!s->bulk_completed) {
        bulk_phase(f, s, false);
494 495
    }

496 497
    QSIMPLEQ_FOREACH(dbms, &s->dbms_list, entry) {
        send_bitmap_complete(f, s, dbms);
498 499 500 501 502 503
    }

    qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS);

    trace_dirty_bitmap_save_complete_finish();

504
    dirty_bitmap_save_cleanup(opaque);
505 506 507 508 509 510 511 512 513
    return 0;
}

static void dirty_bitmap_save_pending(QEMUFile *f, void *opaque,
                                      uint64_t max_size,
                                      uint64_t *res_precopy_only,
                                      uint64_t *res_compatible,
                                      uint64_t *res_postcopy_only)
{
514
    DBMSaveState *s = &((DBMState *)opaque)->save;
515
    SaveBitmapState *dbms;
516 517 518 519
    uint64_t pending = 0;

    qemu_mutex_lock_iothread();

520
    QSIMPLEQ_FOREACH(dbms, &s->dbms_list, entry) {
521 522 523 524 525 526 527 528 529 530 531 532 533 534 535
        uint64_t gran = bdrv_dirty_bitmap_granularity(dbms->bitmap);
        uint64_t sectors = dbms->bulk_completed ? 0 :
                           dbms->total_sectors - dbms->cur_sector;

        pending += DIV_ROUND_UP(sectors * BDRV_SECTOR_SIZE, gran);
    }

    qemu_mutex_unlock_iothread();

    trace_dirty_bitmap_save_pending(pending, max_size);

    *res_postcopy_only += pending;
}

/* First occurrence of this bitmap. It should be created if doesn't exist */
536
static int dirty_bitmap_load_start(QEMUFile *f, DBMLoadState *s)
537 538 539 540
{
    Error *local_err = NULL;
    uint32_t granularity = qemu_get_be32(f);
    uint8_t flags = qemu_get_byte(f);
541
    LoadBitmapState *b;
542

543 544 545 546
    if (s->cancelled) {
        return 0;
    }

547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566
    if (s->bitmap) {
        error_report("Bitmap with the same name ('%s') already exists on "
                     "destination", bdrv_dirty_bitmap_name(s->bitmap));
        return -EINVAL;
    } else {
        s->bitmap = bdrv_create_dirty_bitmap(s->bs, granularity,
                                             s->bitmap_name, &local_err);
        if (!s->bitmap) {
            error_report_err(local_err);
            return -EINVAL;
        }
    }

    if (flags & DIRTY_BITMAP_MIG_START_FLAG_RESERVED_MASK) {
        error_report("Unknown flags in migrated dirty bitmap header: %x",
                     flags);
        return -EINVAL;
    }

    if (flags & DIRTY_BITMAP_MIG_START_FLAG_PERSISTENT) {
E
Eric Blake 已提交
567
        bdrv_dirty_bitmap_set_persistence(s->bitmap, true);
568 569 570 571
    }

    bdrv_disable_dirty_bitmap(s->bitmap);
    if (flags & DIRTY_BITMAP_MIG_START_FLAG_ENABLED) {
572
        bdrv_dirty_bitmap_create_successor(s->bitmap, &local_err);
573 574 575 576 577 578
        if (local_err) {
            error_report_err(local_err);
            return -EINVAL;
        }
    }

579 580 581 582 583 584 585 586
    b = g_new(LoadBitmapState, 1);
    b->bs = s->bs;
    b->bitmap = s->bitmap;
    b->migrated = false;
    b->enabled = flags & DIRTY_BITMAP_MIG_START_FLAG_ENABLED;

    s->bitmaps = g_slist_prepend(s->bitmaps, b);

587 588 589
    return 0;
}

590 591 592 593 594 595 596 597 598
/*
 * before_vm_start_handle_item
 *
 * g_slist_foreach helper
 *
 * item is LoadBitmapState*
 * opaque is DBMLoadState*
 */
static void before_vm_start_handle_item(void *item, void *opaque)
599
{
600 601
    DBMLoadState *s = opaque;
    LoadBitmapState *b = item;
602

603
    if (b->enabled) {
604
        if (b->migrated) {
605
            bdrv_enable_dirty_bitmap(b->bitmap);
606 607 608
        } else {
            bdrv_dirty_bitmap_enable_successor(b->bitmap);
        }
609
    }
610

611 612
    if (b->migrated) {
        s->bitmaps = g_slist_remove(s->bitmaps, b);
613 614
        g_free(b);
    }
615
}
616

617 618 619 620 621 622 623 624
void dirty_bitmap_mig_before_vm_start(void)
{
    DBMLoadState *s = &dbm_state.load;
    qemu_mutex_lock(&s->lock);

    assert(!s->before_vm_start_handled);
    g_slist_foreach(s->bitmaps, before_vm_start_handle_item, s);
    s->before_vm_start_handled = true;
625

626
    qemu_mutex_unlock(&s->lock);
627 628
}

629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659
static void cancel_incoming_locked(DBMLoadState *s)
{
    GSList *item;

    if (s->cancelled) {
        return;
    }

    s->cancelled = true;
    s->bs = NULL;
    s->bitmap = NULL;

    /* Drop all unfinished bitmaps */
    for (item = s->bitmaps; item; item = g_slist_next(item)) {
        LoadBitmapState *b = item->data;

        /*
         * Bitmap must be unfinished, as finished bitmaps should already be
         * removed from the list.
         */
        assert(!s->before_vm_start_handled || !b->migrated);
        if (bdrv_dirty_bitmap_has_successor(b->bitmap)) {
            bdrv_reclaim_dirty_bitmap(b->bitmap, &error_abort);
        }
        bdrv_release_dirty_bitmap(b->bitmap);
    }

    g_slist_free_full(s->bitmaps, g_free);
    s->bitmaps = NULL;
}

660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675
void dirty_bitmap_mig_cancel_outgoing(void)
{
    dirty_bitmap_do_save_cleanup(&dbm_state.save);
}

void dirty_bitmap_mig_cancel_incoming(void)
{
    DBMLoadState *s = &dbm_state.load;

    qemu_mutex_lock(&s->lock);

    cancel_incoming_locked(s);

    qemu_mutex_unlock(&s->lock);
}

676
static void dirty_bitmap_load_complete(QEMUFile *f, DBMLoadState *s)
677 678 679 680
{
    GSList *item;
    trace_dirty_bitmap_load_complete();

681 682 683 684 685
    if (s->cancelled) {
        return;
    }

    bdrv_dirty_bitmap_deserialize_finish(s->bitmap);
686

687 688 689 690
    if (bdrv_dirty_bitmap_has_successor(s->bitmap)) {
        bdrv_reclaim_dirty_bitmap(s->bitmap, &error_abort);
    }

691
    for (item = s->bitmaps; item; item = g_slist_next(item)) {
692
        LoadBitmapState *b = item->data;
693 694 695

        if (b->bitmap == s->bitmap) {
            b->migrated = true;
696 697 698 699
            if (s->before_vm_start_handled) {
                s->bitmaps = g_slist_remove(s->bitmaps, b);
                g_free(b);
            }
700 701 702 703 704
            break;
        }
    }
}

705
static int dirty_bitmap_load_bits(QEMUFile *f, DBMLoadState *s)
706 707 708 709 710 711 712 713
{
    uint64_t first_byte = qemu_get_be64(f) << BDRV_SECTOR_BITS;
    uint64_t nr_bytes = (uint64_t)qemu_get_be32(f) << BDRV_SECTOR_BITS;
    trace_dirty_bitmap_load_bits_enter(first_byte >> BDRV_SECTOR_BITS,
                                       nr_bytes >> BDRV_SECTOR_BITS);

    if (s->flags & DIRTY_BITMAP_MIG_FLAG_ZEROES) {
        trace_dirty_bitmap_load_bits_zeroes();
714 715 716 717
        if (!s->cancelled) {
            bdrv_dirty_bitmap_deserialize_zeroes(s->bitmap, first_byte,
                                                 nr_bytes, false);
        }
718 719
    } else {
        size_t ret;
720
        g_autofree uint8_t *buf = NULL;
721
        uint64_t buf_size = qemu_get_be64(f);
722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753
        uint64_t needed_size;

        /*
         * The actual check for buf_size is done a bit later. We can't do it in
         * cancelled mode as we don't have the bitmap to check the constraints
         * (so, we allocate a buffer and read prior to the check). On the other
         * hand, we shouldn't blindly g_malloc the number from the stream.
         * Actually one chunk should not be larger than CHUNK_SIZE. Let's allow
         * a bit larger (which means that bitmap migration will fail anyway and
         * the whole migration will most probably fail soon due to broken
         * stream).
         */
        if (buf_size > 10 * CHUNK_SIZE) {
            error_report("Bitmap migration stream buffer allocation request "
                         "is too large");
            return -EIO;
        }

        buf = g_malloc(buf_size);
        ret = qemu_get_buffer(f, buf, buf_size);
        if (ret != buf_size) {
            error_report("Failed to read bitmap bits");
            return -EIO;
        }

        if (s->cancelled) {
            return 0;
        }

        needed_size = bdrv_dirty_bitmap_serialization_size(s->bitmap,
                                                           first_byte,
                                                           nr_bytes);
754 755 756 757 758 759 760 761

        if (needed_size > buf_size ||
            buf_size > QEMU_ALIGN_UP(needed_size, 4 * sizeof(long))
             /* Here used same alignment as in send_bitmap_bits */
        ) {
            error_report("Migrated bitmap granularity doesn't "
                         "match the destination bitmap '%s' granularity",
                         bdrv_dirty_bitmap_name(s->bitmap));
762 763
            cancel_incoming_locked(s);
            return 0;
764 765 766 767 768 769 770 771 772
        }

        bdrv_dirty_bitmap_deserialize_part(s->bitmap, buf, first_byte, nr_bytes,
                                           false);
    }

    return 0;
}

773
static int dirty_bitmap_load_header(QEMUFile *f, DBMLoadState *s)
774 775 776 777 778 779 780 781 782 783 784 785 786
{
    Error *local_err = NULL;
    bool nothing;
    s->flags = qemu_get_bitmap_flags(f);
    trace_dirty_bitmap_load_header(s->flags);

    nothing = s->flags == (s->flags & DIRTY_BITMAP_MIG_FLAG_EOS);

    if (s->flags & DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME) {
        if (!qemu_get_counted_string(f, s->node_name)) {
            error_report("Unable to read node name string");
            return -EINVAL;
        }
787 788 789 790 791 792
        if (!s->cancelled) {
            s->bs = bdrv_lookup_bs(s->node_name, s->node_name, &local_err);
            if (!s->bs) {
                error_report_err(local_err);
                cancel_incoming_locked(s);
            }
793
        }
794
    } else if (!s->bs && !nothing && !s->cancelled) {
795
        error_report("Error: block device name is not set");
796
        cancel_incoming_locked(s);
797 798 799 800 801 802 803
    }

    if (s->flags & DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME) {
        if (!qemu_get_counted_string(f, s->bitmap_name)) {
            error_report("Unable to read bitmap name string");
            return -EINVAL;
        }
804 805 806 807 808 809 810 811 812 813 814 815 816
        if (!s->cancelled) {
            s->bitmap = bdrv_find_dirty_bitmap(s->bs, s->bitmap_name);

            /*
             * bitmap may be NULL here, it wouldn't be an error if it is the
             * first occurrence of the bitmap
             */
            if (!s->bitmap && !(s->flags & DIRTY_BITMAP_MIG_FLAG_START)) {
                error_report("Error: unknown dirty bitmap "
                             "'%s' for block device '%s'",
                             s->bitmap_name, s->node_name);
                cancel_incoming_locked(s);
            }
817
        }
818
    } else if (!s->bitmap && !nothing && !s->cancelled) {
819
        error_report("Error: block device name is not set");
820
        cancel_incoming_locked(s);
821 822 823 824 825
    }

    return 0;
}

826 827 828 829 830 831 832 833 834 835
/*
 * dirty_bitmap_load
 *
 * Load sequence of dirty bitmap chunks. Return error only on fatal io stream
 * violations. On other errors just cancel bitmaps incoming migration and return
 * 0.
 *
 * Note, than when incoming bitmap migration is canceled, we still must read all
 * our chunks (and just ignore them), to not affect other migration objects.
 */
836 837
static int dirty_bitmap_load(QEMUFile *f, void *opaque, int version_id)
{
838
    DBMLoadState *s = &((DBMState *)opaque)->load;
839 840 841 842 843
    int ret = 0;

    trace_dirty_bitmap_load_enter();

    if (version_id != 1) {
844 845
        QEMU_LOCK_GUARD(&s->lock);
        cancel_incoming_locked(s);
846 847 848 849
        return -EINVAL;
    }

    do {
850 851
        QEMU_LOCK_GUARD(&s->lock);

852
        ret = dirty_bitmap_load_header(f, s);
853
        if (ret < 0) {
854
            cancel_incoming_locked(s);
855 856
            return ret;
        }
857

858 859 860 861 862 863
        if (s->flags & DIRTY_BITMAP_MIG_FLAG_START) {
            ret = dirty_bitmap_load_start(f, s);
        } else if (s->flags & DIRTY_BITMAP_MIG_FLAG_COMPLETE) {
            dirty_bitmap_load_complete(f, s);
        } else if (s->flags & DIRTY_BITMAP_MIG_FLAG_BITS) {
            ret = dirty_bitmap_load_bits(f, s);
864 865 866 867 868 869 870
        }

        if (!ret) {
            ret = qemu_file_get_error(f);
        }

        if (ret) {
871
            cancel_incoming_locked(s);
872 873
            return ret;
        }
874
    } while (!(s->flags & DIRTY_BITMAP_MIG_FLAG_EOS));
875 876 877 878 879 880 881

    trace_dirty_bitmap_load_success();
    return 0;
}

static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque)
{
882
    DBMSaveState *s = &((DBMState *)opaque)->save;
883
    SaveBitmapState *dbms = NULL;
884
    if (init_dirty_bitmap_migration(s) < 0) {
885 886 887
        return -1;
    }

888 889
    QSIMPLEQ_FOREACH(dbms, &s->dbms_list, entry) {
        send_bitmap_start(f, s, dbms);
890 891 892 893 894 895 896 897
    }
    qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS);

    return 0;
}

static bool dirty_bitmap_is_active(void *opaque)
{
898 899 900
    DBMSaveState *s = &((DBMState *)opaque)->save;

    return migrate_dirty_bitmaps() && !s->no_bitmaps;
901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927
}

static bool dirty_bitmap_is_active_iterate(void *opaque)
{
    return dirty_bitmap_is_active(opaque) && !runstate_is_running();
}

static bool dirty_bitmap_has_postcopy(void *opaque)
{
    return true;
}

static SaveVMHandlers savevm_dirty_bitmap_handlers = {
    .save_setup = dirty_bitmap_save_setup,
    .save_live_complete_postcopy = dirty_bitmap_save_complete,
    .save_live_complete_precopy = dirty_bitmap_save_complete,
    .has_postcopy = dirty_bitmap_has_postcopy,
    .save_live_pending = dirty_bitmap_save_pending,
    .save_live_iterate = dirty_bitmap_save_iterate,
    .is_active_iterate = dirty_bitmap_is_active_iterate,
    .load_state = dirty_bitmap_load,
    .save_cleanup = dirty_bitmap_save_cleanup,
    .is_active = dirty_bitmap_is_active,
};

void dirty_bitmap_mig_init(void)
{
928
    QSIMPLEQ_INIT(&dbm_state.save.dbms_list);
929
    qemu_mutex_init(&dbm_state.load.lock);
930

931
    register_savevm_live("dirty-bitmap", 0, 1,
932
                         &savevm_dirty_bitmap_handlers,
933
                         &dbm_state);
934
}