postcopy-ram.c 23.5 KB
Newer Older
D
Dr. David Alan Gilbert 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
/*
 * Postcopy migration for RAM
 *
 * Copyright 2013-2015 Red Hat, Inc. and/or its affiliates
 *
 * Authors:
 *  Dave Gilbert  <dgilbert@redhat.com>
 *
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
 * See the COPYING file in the top-level directory.
 *
 */

/*
 * Postcopy is a migration technique where the execution flips from the
 * source to the destination before all the data has been copied.
 */

P
Peter Maydell 已提交
19
#include "qemu/osdep.h"
D
Dr. David Alan Gilbert 已提交
20 21 22 23 24

#include "qemu-common.h"
#include "migration/migration.h"
#include "migration/postcopy-ram.h"
#include "sysemu/sysemu.h"
25
#include "sysemu/balloon.h"
D
Dr. David Alan Gilbert 已提交
26 27 28
#include "qemu/error-report.h"
#include "trace.h"

29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
/* Arbitrary limit on size of each discard command,
 * keeps them around ~200 bytes
 */
#define MAX_DISCARDS_PER_COMMAND 12

struct PostcopyDiscardState {
    const char *ramblock_name;
    uint64_t offset; /* Bitmap entry for the 1st bit of this RAMBlock */
    uint16_t cur_entry;
    /*
     * Start and length of a discard range (bytes)
     */
    uint64_t start_list[MAX_DISCARDS_PER_COMMAND];
    uint64_t length_list[MAX_DISCARDS_PER_COMMAND];
    unsigned int nsentwords;
    unsigned int nsentcmds;
};

D
Dr. David Alan Gilbert 已提交
47 48 49 50 51 52
/* Postcopy needs to detect accesses to pages that haven't yet been copied
 * across, and efficiently map new pages in, the techniques for doing this
 * are target OS specific.
 */
#if defined(__linux__)

53
#include <poll.h>
D
Dr. David Alan Gilbert 已提交
54 55 56 57 58
#include <sys/ioctl.h>
#include <sys/syscall.h>
#include <asm/types.h> /* for __u64 */
#endif

59 60
#if defined(__linux__) && defined(__NR_userfaultfd) && defined(CONFIG_EVENTFD)
#include <sys/eventfd.h>
D
Dr. David Alan Gilbert 已提交
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83
#include <linux/userfaultfd.h>

static bool ufd_version_check(int ufd)
{
    struct uffdio_api api_struct;
    uint64_t ioctl_mask;

    api_struct.api = UFFD_API;
    api_struct.features = 0;
    if (ioctl(ufd, UFFDIO_API, &api_struct)) {
        error_report("postcopy_ram_supported_by_host: UFFDIO_API failed: %s",
                     strerror(errno));
        return false;
    }

    ioctl_mask = (__u64)1 << _UFFDIO_REGISTER |
                 (__u64)1 << _UFFDIO_UNREGISTER;
    if ((api_struct.ioctls & ioctl_mask) != ioctl_mask) {
        error_report("Missing userfault features: %" PRIx64,
                     (uint64_t)(~api_struct.ioctls & ioctl_mask));
        return false;
    }

84 85 86 87 88 89 90 91 92 93 94
    if (getpagesize() != ram_pagesize_summary()) {
        bool have_hp = false;
        /* We've got a huge page */
#ifdef UFFD_FEATURE_MISSING_HUGETLBFS
        have_hp = api_struct.features & UFFD_FEATURE_MISSING_HUGETLBFS;
#endif
        if (!have_hp) {
            error_report("Userfault on this host does not support huge pages");
            return false;
        }
    }
D
Dr. David Alan Gilbert 已提交
95 96 97
    return true;
}

98 99 100 101 102
/*
 * Note: This has the side effect of munlock'ing all of RAM, that's
 * normally fine since if the postcopy succeeds it gets turned back on at the
 * end.
 */
D
Dr. David Alan Gilbert 已提交
103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129
bool postcopy_ram_supported_by_host(void)
{
    long pagesize = getpagesize();
    int ufd = -1;
    bool ret = false; /* Error unless we change it */
    void *testarea = NULL;
    struct uffdio_register reg_struct;
    struct uffdio_range range_struct;
    uint64_t feature_mask;

    if ((1ul << qemu_target_page_bits()) > pagesize) {
        error_report("Target page size bigger than host page size");
        goto out;
    }

    ufd = syscall(__NR_userfaultfd, O_CLOEXEC);
    if (ufd == -1) {
        error_report("%s: userfaultfd not available: %s", __func__,
                     strerror(errno));
        goto out;
    }

    /* Version and features check */
    if (!ufd_version_check(ufd)) {
        goto out;
    }

130 131 132 133 134 135 136 137 138
    /*
     * userfault and mlock don't go together; we'll put it back later if
     * it was enabled.
     */
    if (munlockall()) {
        error_report("%s: munlockall: %s", __func__,  strerror(errno));
        return -1;
    }

D
Dr. David Alan Gilbert 已提交
139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189
    /*
     *  We need to check that the ops we need are supported on anon memory
     *  To do that we need to register a chunk and see the flags that
     *  are returned.
     */
    testarea = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE |
                                    MAP_ANONYMOUS, -1, 0);
    if (testarea == MAP_FAILED) {
        error_report("%s: Failed to map test area: %s", __func__,
                     strerror(errno));
        goto out;
    }
    g_assert(((size_t)testarea & (pagesize-1)) == 0);

    reg_struct.range.start = (uintptr_t)testarea;
    reg_struct.range.len = pagesize;
    reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;

    if (ioctl(ufd, UFFDIO_REGISTER, &reg_struct)) {
        error_report("%s userfault register: %s", __func__, strerror(errno));
        goto out;
    }

    range_struct.start = (uintptr_t)testarea;
    range_struct.len = pagesize;
    if (ioctl(ufd, UFFDIO_UNREGISTER, &range_struct)) {
        error_report("%s userfault unregister: %s", __func__, strerror(errno));
        goto out;
    }

    feature_mask = (__u64)1 << _UFFDIO_WAKE |
                   (__u64)1 << _UFFDIO_COPY |
                   (__u64)1 << _UFFDIO_ZEROPAGE;
    if ((reg_struct.ioctls & feature_mask) != feature_mask) {
        error_report("Missing userfault map features: %" PRIx64,
                     (uint64_t)(~reg_struct.ioctls & feature_mask));
        goto out;
    }

    /* Success! */
    ret = true;
out:
    if (testarea) {
        munmap(testarea, pagesize);
    }
    if (ufd != -1) {
        close(ufd);
    }
    return ret;
}

190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207
/*
 * Setup an area of RAM so that it *can* be used for postcopy later; this
 * must be done right at the start prior to pre-copy.
 * opaque should be the MIS.
 */
static int init_range(const char *block_name, void *host_addr,
                      ram_addr_t offset, ram_addr_t length, void *opaque)
{
    MigrationIncomingState *mis = opaque;

    trace_postcopy_init_range(block_name, host_addr, offset, length);

    /*
     * We need the whole of RAM to be truly empty for postcopy, so things
     * like ROMs and any data tables built during init must be zero'd
     * - we're going to get the copy from the source anyway.
     * (Precopy will just overwrite this data, so doesn't need the discard)
     */
208
    if (ram_discard_range(mis, block_name, 0, length)) {
209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229
        return -1;
    }

    return 0;
}

/*
 * At the end of migration, undo the effects of init_range
 * opaque should be the MIS.
 */
static int cleanup_range(const char *block_name, void *host_addr,
                        ram_addr_t offset, ram_addr_t length, void *opaque)
{
    MigrationIncomingState *mis = opaque;
    struct uffdio_range range_struct;
    trace_postcopy_cleanup_range(block_name, host_addr, offset, length);

    /*
     * We turned off hugepage for the precopy stage with postcopy enabled
     * we can turn it back on now.
     */
230
    qemu_madvise(host_addr, length, QEMU_MADV_HUGEPAGE);
231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267

    /*
     * We can also turn off userfault now since we should have all the
     * pages.   It can be useful to leave it on to debug postcopy
     * if you're not sure it's always getting every page.
     */
    range_struct.start = (uintptr_t)host_addr;
    range_struct.len = length;

    if (ioctl(mis->userfault_fd, UFFDIO_UNREGISTER, &range_struct)) {
        error_report("%s: userfault unregister %s", __func__, strerror(errno));

        return -1;
    }

    return 0;
}

/*
 * Initialise postcopy-ram, setting the RAM to a state where we can go into
 * postcopy later; must be called prior to any precopy.
 * called from arch_init's similarly named ram_postcopy_incoming_init
 */
int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages)
{
    if (qemu_ram_foreach_block(init_range, mis)) {
        return -1;
    }

    return 0;
}

/*
 * At the end of a migration where postcopy_ram_incoming_init was called.
 */
int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
{
268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292
    trace_postcopy_ram_incoming_cleanup_entry();

    if (mis->have_fault_thread) {
        uint64_t tmp64;

        if (qemu_ram_foreach_block(cleanup_range, mis)) {
            return -1;
        }
        /*
         * Tell the fault_thread to exit, it's an eventfd that should
         * currently be at 0, we're going to increment it to 1
         */
        tmp64 = 1;
        if (write(mis->userfault_quit_fd, &tmp64, 8) == 8) {
            trace_postcopy_ram_incoming_cleanup_join();
            qemu_thread_join(&mis->fault_thread);
        } else {
            /* Not much we can do here, but may as well report it */
            error_report("%s: incrementing userfault_quit_fd: %s", __func__,
                         strerror(errno));
        }
        trace_postcopy_ram_incoming_cleanup_closeuf();
        close(mis->userfault_fd);
        close(mis->userfault_quit_fd);
        mis->have_fault_thread = false;
293 294
    }

295 296
    qemu_balloon_inhibit(false);

297 298 299 300 301 302 303 304 305 306
    if (enable_mlock) {
        if (os_mlock() < 0) {
            error_report("mlock: %s", strerror(errno));
            /*
             * It doesn't feel right to fail at this point, we have a valid
             * VM state.
             */
        }
    }

307 308 309
    postcopy_state_set(POSTCOPY_INCOMING_END);
    migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0);

310
    if (mis->postcopy_tmp_page) {
311
        munmap(mis->postcopy_tmp_page, mis->largest_page_size);
312 313
        mis->postcopy_tmp_page = NULL;
    }
314 315 316 317
    if (mis->postcopy_tmp_zero_page) {
        munmap(mis->postcopy_tmp_zero_page, mis->largest_page_size);
        mis->postcopy_tmp_zero_page = NULL;
    }
318
    trace_postcopy_ram_incoming_cleanup_exit();
319 320 321
    return 0;
}

322 323 324 325 326 327 328 329 330 331 332 333 334
/*
 * Disable huge pages on an area
 */
static int nhp_range(const char *block_name, void *host_addr,
                    ram_addr_t offset, ram_addr_t length, void *opaque)
{
    trace_postcopy_nhp_range(block_name, host_addr, offset, length);

    /*
     * Before we do discards we need to ensure those discards really
     * do delete areas of the page, even if THP thinks a hugepage would
     * be a good idea, so force hugepages off.
     */
335
    qemu_madvise(host_addr, length, QEMU_MADV_NOHUGEPAGE);
336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355

    return 0;
}

/*
 * Userfault requires us to mark RAM as NOHUGEPAGE prior to discard
 * however leaving it until after precopy means that most of the precopy
 * data is still THPd
 */
int postcopy_ram_prepare_discard(MigrationIncomingState *mis)
{
    if (qemu_ram_foreach_block(nhp_range, mis)) {
        return -1;
    }

    postcopy_state_set(POSTCOPY_INCOMING_DISCARD);

    return 0;
}

356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380
/*
 * Mark the given area of RAM as requiring notification to unwritten areas
 * Used as a  callback on qemu_ram_foreach_block.
 *   host_addr: Base of area to mark
 *   offset: Offset in the whole ram arena
 *   length: Length of the section
 *   opaque: MigrationIncomingState pointer
 * Returns 0 on success
 */
static int ram_block_enable_notify(const char *block_name, void *host_addr,
                                   ram_addr_t offset, ram_addr_t length,
                                   void *opaque)
{
    MigrationIncomingState *mis = opaque;
    struct uffdio_register reg_struct;

    reg_struct.range.start = (uintptr_t)host_addr;
    reg_struct.range.len = length;
    reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;

    /* Now tell our userfault_fd that it's responsible for this area */
    if (ioctl(mis->userfault_fd, UFFDIO_REGISTER, &reg_struct)) {
        error_report("%s userfault register: %s", __func__, strerror(errno));
        return -1;
    }
381 382 383 384
    if (!(reg_struct.ioctls & ((__u64)1 << _UFFDIO_COPY))) {
        error_report("%s userfault: Region doesn't support COPY", __func__);
        return -1;
    }
385 386 387 388 389 390 391 392 393 394

    return 0;
}

/*
 * Handle faults detected by the USERFAULT markings
 */
static void *postcopy_ram_fault_thread(void *opaque)
{
    MigrationIncomingState *mis = opaque;
395 396 397 398
    struct uffd_msg msg;
    int ret;
    RAMBlock *rb = NULL;
    RAMBlock *last_rb = NULL; /* last RAMBlock we sent part of */
399

400
    trace_postcopy_ram_fault_thread_entry();
401 402
    qemu_sem_post(&mis->fault_thread_sem);

403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455
    while (true) {
        ram_addr_t rb_offset;
        struct pollfd pfd[2];

        /*
         * We're mainly waiting for the kernel to give us a faulting HVA,
         * however we can be told to quit via userfault_quit_fd which is
         * an eventfd
         */
        pfd[0].fd = mis->userfault_fd;
        pfd[0].events = POLLIN;
        pfd[0].revents = 0;
        pfd[1].fd = mis->userfault_quit_fd;
        pfd[1].events = POLLIN; /* Waiting for eventfd to go positive */
        pfd[1].revents = 0;

        if (poll(pfd, 2, -1 /* Wait forever */) == -1) {
            error_report("%s: userfault poll: %s", __func__, strerror(errno));
            break;
        }

        if (pfd[1].revents) {
            trace_postcopy_ram_fault_thread_quit();
            break;
        }

        ret = read(mis->userfault_fd, &msg, sizeof(msg));
        if (ret != sizeof(msg)) {
            if (errno == EAGAIN) {
                /*
                 * if a wake up happens on the other thread just after
                 * the poll, there is nothing to read.
                 */
                continue;
            }
            if (ret < 0) {
                error_report("%s: Failed to read full userfault message: %s",
                             __func__, strerror(errno));
                break;
            } else {
                error_report("%s: Read %d bytes from userfaultfd expected %zd",
                             __func__, ret, sizeof(msg));
                break; /* Lost alignment, don't know what we'd read next */
            }
        }
        if (msg.event != UFFD_EVENT_PAGEFAULT) {
            error_report("%s: Read unexpected event %ud from userfaultfd",
                         __func__, msg.event);
            continue; /* It's not a page fault, shouldn't happen */
        }

        rb = qemu_ram_block_from_host(
                 (void *)(uintptr_t)msg.arg.pagefault.address,
456
                 true, &rb_offset);
457 458 459 460 461 462
        if (!rb) {
            error_report("postcopy_ram_fault_thread: Fault outside guest: %"
                         PRIx64, (uint64_t)msg.arg.pagefault.address);
            break;
        }

463
        rb_offset &= ~(qemu_ram_pagesize(rb) - 1);
464 465 466 467 468 469 470 471 472 473 474
        trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address,
                                                qemu_ram_get_idstr(rb),
                                                rb_offset);

        /*
         * Send the request to the source - we want to request one
         * of our host page sizes (which is >= TPS)
         */
        if (rb != last_rb) {
            last_rb = rb;
            migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb),
475
                                     rb_offset, qemu_ram_pagesize(rb));
476 477 478
        } else {
            /* Save some space */
            migrate_send_rp_req_pages(mis, NULL,
479
                                     rb_offset, qemu_ram_pagesize(rb));
480 481 482
        }
    }
    trace_postcopy_ram_fault_thread_exit();
483 484 485 486 487
    return NULL;
}

int postcopy_ram_enable_notify(MigrationIncomingState *mis)
{
488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512
    /* Open the fd for the kernel to give us userfaults */
    mis->userfault_fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
    if (mis->userfault_fd == -1) {
        error_report("%s: Failed to open userfault fd: %s", __func__,
                     strerror(errno));
        return -1;
    }

    /*
     * Although the host check already tested the API, we need to
     * do the check again as an ABI handshake on the new fd.
     */
    if (!ufd_version_check(mis->userfault_fd)) {
        return -1;
    }

    /* Now an eventfd we use to tell the fault-thread to quit */
    mis->userfault_quit_fd = eventfd(0, EFD_CLOEXEC);
    if (mis->userfault_quit_fd == -1) {
        error_report("%s: Opening userfault_quit_fd: %s", __func__,
                     strerror(errno));
        close(mis->userfault_fd);
        return -1;
    }

513 514 515 516 517
    qemu_sem_init(&mis->fault_thread_sem, 0);
    qemu_thread_create(&mis->fault_thread, "postcopy/fault",
                       postcopy_ram_fault_thread, mis, QEMU_THREAD_JOINABLE);
    qemu_sem_wait(&mis->fault_thread_sem);
    qemu_sem_destroy(&mis->fault_thread_sem);
518
    mis->have_fault_thread = true;
519 520 521 522 523 524

    /* Mark so that we get notified of accesses to unwritten areas */
    if (qemu_ram_foreach_block(ram_block_enable_notify, mis)) {
        return -1;
    }

525 526 527 528 529 530
    /*
     * Ballooning can mark pages as absent while we're postcopying
     * that would cause false userfaults.
     */
    qemu_balloon_inhibit(true);

531 532
    trace_postcopy_ram_enable_notify();

533 534 535
    return 0;
}

536 537 538 539
/*
 * Place a host page (from) at (host) atomically
 * returns 0 on success
 */
540 541
int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
                        size_t pagesize)
542 543 544 545 546
{
    struct uffdio_copy copy_struct;

    copy_struct.dst = (uint64_t)(uintptr_t)host;
    copy_struct.src = (uint64_t)(uintptr_t)from;
547
    copy_struct.len = pagesize;
548 549 550 551 552 553 554 555 556
    copy_struct.mode = 0;

    /* copy also acks to the kernel waking the stalled thread up
     * TODO: We can inhibit that ack and only do it if it was requested
     * which would be slightly cheaper, but we'd have to be careful
     * of the order of updating our page state.
     */
    if (ioctl(mis->userfault_fd, UFFDIO_COPY, &copy_struct)) {
        int e = errno;
557 558
        error_report("%s: %s copy host: %p from: %p (size: %zd)",
                     __func__, strerror(e), host, from, pagesize);
559 560 561 562 563 564 565 566 567 568 569 570

        return -e;
    }

    trace_postcopy_place_page(host);
    return 0;
}

/*
 * Place a zero page at (host) atomically
 * returns 0 on success
 */
571 572
int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
                             size_t pagesize)
573
{
574
    trace_postcopy_place_page_zero(host);
575

576 577 578 579 580
    if (pagesize == getpagesize()) {
        struct uffdio_zeropage zero_struct;
        zero_struct.range.start = (uint64_t)(uintptr_t)host;
        zero_struct.range.len = getpagesize();
        zero_struct.mode = 0;
581

582 583 584 585
        if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, &zero_struct)) {
            int e = errno;
            error_report("%s: %s zero host: %p",
                         __func__, strerror(e), host);
586

587 588 589
            return -e;
        }
    } else {
590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606
        /* The kernel can't use UFFDIO_ZEROPAGE for hugepages */
        if (!mis->postcopy_tmp_zero_page) {
            mis->postcopy_tmp_zero_page = mmap(NULL, mis->largest_page_size,
                                               PROT_READ | PROT_WRITE,
                                               MAP_PRIVATE | MAP_ANONYMOUS,
                                               -1, 0);
            if (mis->postcopy_tmp_zero_page == MAP_FAILED) {
                int e = errno;
                mis->postcopy_tmp_zero_page = NULL;
                error_report("%s: %s mapping large zero page",
                             __func__, strerror(e));
                return -e;
            }
            memset(mis->postcopy_tmp_zero_page, '\0', mis->largest_page_size);
        }
        return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page,
                                   pagesize);
607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622
    }

    return 0;
}

/*
 * Returns a target page of memory that can be mapped at a later point in time
 * using postcopy_place_page
 * The same address is used repeatedly, postcopy_place_page just takes the
 * backing page away.
 * Returns: Pointer to allocated page
 *
 */
void *postcopy_get_tmp_page(MigrationIncomingState *mis)
{
    if (!mis->postcopy_tmp_page) {
623
        mis->postcopy_tmp_page = mmap(NULL, mis->largest_page_size,
624 625
                             PROT_READ | PROT_WRITE, MAP_PRIVATE |
                             MAP_ANONYMOUS, -1, 0);
626 627
        if (mis->postcopy_tmp_page == MAP_FAILED) {
            mis->postcopy_tmp_page = NULL;
628 629 630 631 632 633 634 635
            error_report("%s: %s", __func__, strerror(errno));
            return NULL;
        }
    }

    return mis->postcopy_tmp_page;
}

D
Dr. David Alan Gilbert 已提交
636 637 638 639 640 641 642 643
#else
/* No target OS support, stubs just fail */
bool postcopy_ram_supported_by_host(void)
{
    error_report("%s: No OS support", __func__);
    return false;
}

644 645 646 647 648 649 650 651 652 653 654 655
int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages)
{
    error_report("postcopy_ram_incoming_init: No OS support");
    return -1;
}

int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
{
    assert(0);
    return -1;
}

656 657 658 659 660 661
int postcopy_ram_prepare_discard(MigrationIncomingState *mis)
{
    assert(0);
    return -1;
}

662 663 664 665 666
int postcopy_ram_enable_notify(MigrationIncomingState *mis)
{
    assert(0);
    return -1;
}
667

668 669
int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
                        size_t pagesize)
670 671 672 673 674
{
    assert(0);
    return -1;
}

675 676
int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
                        size_t pagesize)
677 678 679 680 681 682 683 684 685 686 687
{
    assert(0);
    return -1;
}

void *postcopy_get_tmp_page(MigrationIncomingState *mis)
{
    assert(0);
    return NULL;
}

D
Dr. David Alan Gilbert 已提交
688 689
#endif

690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739
/* ------------------------------------------------------------------------- */

/**
 * postcopy_discard_send_init: Called at the start of each RAMBlock before
 *   asking to discard individual ranges.
 *
 * @ms: The current migration state.
 * @offset: the bitmap offset of the named RAMBlock in the migration
 *   bitmap.
 * @name: RAMBlock that discards will operate on.
 *
 * returns: a new PDS.
 */
PostcopyDiscardState *postcopy_discard_send_init(MigrationState *ms,
                                                 unsigned long offset,
                                                 const char *name)
{
    PostcopyDiscardState *res = g_malloc0(sizeof(PostcopyDiscardState));

    if (res) {
        res->ramblock_name = name;
        res->offset = offset;
    }

    return res;
}

/**
 * postcopy_discard_send_range: Called by the bitmap code for each chunk to
 *   discard. May send a discard message, may just leave it queued to
 *   be sent later.
 *
 * @ms: Current migration state.
 * @pds: Structure initialised by postcopy_discard_send_init().
 * @start,@length: a range of pages in the migration bitmap in the
 *   RAM block passed to postcopy_discard_send_init() (length=1 is one page)
 */
void postcopy_discard_send_range(MigrationState *ms, PostcopyDiscardState *pds,
                                unsigned long start, unsigned long length)
{
    size_t tp_bits = qemu_target_page_bits();
    /* Convert to byte offsets within the RAM block */
    pds->start_list[pds->cur_entry] = (start - pds->offset) << tp_bits;
    pds->length_list[pds->cur_entry] = length << tp_bits;
    trace_postcopy_discard_send_range(pds->ramblock_name, start, length);
    pds->cur_entry++;
    pds->nsentwords++;

    if (pds->cur_entry == MAX_DISCARDS_PER_COMMAND) {
        /* Full set, ship it! */
740 741
        qemu_savevm_send_postcopy_ram_discard(ms->to_dst_file,
                                              pds->ramblock_name,
742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760
                                              pds->cur_entry,
                                              pds->start_list,
                                              pds->length_list);
        pds->nsentcmds++;
        pds->cur_entry = 0;
    }
}

/**
 * postcopy_discard_send_finish: Called at the end of each RAMBlock by the
 * bitmap code. Sends any outstanding discard messages, frees the PDS
 *
 * @ms: Current migration state.
 * @pds: Structure initialised by postcopy_discard_send_init().
 */
void postcopy_discard_send_finish(MigrationState *ms, PostcopyDiscardState *pds)
{
    /* Anything unsent? */
    if (pds->cur_entry) {
761 762
        qemu_savevm_send_postcopy_ram_discard(ms->to_dst_file,
                                              pds->ramblock_name,
763 764 765 766 767 768 769 770 771 772 773
                                              pds->cur_entry,
                                              pds->start_list,
                                              pds->length_list);
        pds->nsentcmds++;
    }

    trace_postcopy_discard_send_finish(pds->ramblock_name, pds->nsentwords,
                                       pds->nsentcmds);

    g_free(pds);
}