migration.c 41.2 KB
Newer Older
A
aliguori 已提交
1 2 3 4 5 6 7 8 9 10 11
/*
 * QEMU live migration
 *
 * Copyright IBM, Corp. 2008
 *
 * Authors:
 *  Anthony Liguori   <aliguori@us.ibm.com>
 *
 * This work is licensed under the terms of the GNU GPL, version 2.  See
 * the COPYING file in the top-level directory.
 *
12 13
 * Contributions after 2012-01-13 are licensed under the terms of the
 * GNU GPL, version 2 or (at your option) any later version.
A
aliguori 已提交
14 15 16
 */

#include "qemu-common.h"
17
#include "qemu/error-report.h"
18
#include "qemu/main-loop.h"
19
#include "migration/migration.h"
20
#include "migration/qemu-file.h"
21
#include "sysemu/sysemu.h"
22
#include "block/block.h"
23
#include "qapi/qmp/qerror.h"
24
#include "qemu/sockets.h"
25
#include "qemu/rcu.h"
26
#include "migration/block.h"
27
#include "qemu/thread.h"
L
Luiz Capitulino 已提交
28
#include "qmp-commands.h"
29
#include "trace.h"
30
#include "qapi/util.h"
31
#include "qapi-event.h"
32
#include "qom/cpu.h"
33

34
#define MAX_THROTTLE  (32 << 20)      /* Migration transfer speed throttling */
A
aliguori 已提交
35

J
Juan Quintela 已提交
36 37 38 39 40
/* Amount of time to allocate to each "chunk" of bandwidth-throttled
 * data. */
#define BUFFER_DELAY     100
#define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY)

41 42
/* Default compression thread count */
#define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8
43 44 45
/* Default decompression thread count, usually decompression is at
 * least 4 times as fast as compression.*/
#define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2
46 47
/*0: means nocompress, 1: best speed, ... 9: best compress ratio */
#define DEFAULT_MIGRATE_COMPRESS_LEVEL 1
48 49 50
/* Define default autoconverge cpu throttle migration parameters */
#define DEFAULT_MIGRATE_X_CPU_THROTTLE_INITIAL 20
#define DEFAULT_MIGRATE_X_CPU_THROTTLE_INCREMENT 10
51

52 53 54
/* Migration XBZRLE default cache size */
#define DEFAULT_MIGRATE_CACHE_SIZE (64 * 1024 * 1024)

55 56 57
static NotifierList migration_state_notifiers =
    NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);

D
Dr. David Alan Gilbert 已提交
58 59
static bool deferred_incoming;

60 61 62 63 64 65 66
/*
 * Current state of incoming postcopy; note this is not part of
 * MigrationIncomingState since it's state is used during cleanup
 * at the end as MIS is being freed.
 */
static PostcopyState incoming_postcopy_state;

67 68 69 70
/* When we add fault tolerance, we could have several
   migrations at once.  For now we don't need to add
   dynamic creation of migration */

71
/* For outgoing */
72
MigrationState *migrate_get_current(void)
73 74
{
    static MigrationState current_migration = {
75
        .state = MIGRATION_STATUS_NONE,
76
        .bandwidth_limit = MAX_THROTTLE,
77
        .xbzrle_cache_size = DEFAULT_MIGRATE_CACHE_SIZE,
78
        .mbps = -1,
79 80 81 82 83 84
        .parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL] =
                DEFAULT_MIGRATE_COMPRESS_LEVEL,
        .parameters[MIGRATION_PARAMETER_COMPRESS_THREADS] =
                DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT,
        .parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS] =
                DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT,
85 86 87 88
        .parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL] =
                DEFAULT_MIGRATE_X_CPU_THROTTLE_INITIAL,
        .parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT] =
                DEFAULT_MIGRATE_X_CPU_THROTTLE_INCREMENT,
89 90 91 92 93
    };

    return &current_migration;
}

94 95 96 97 98 99 100 101 102 103
/* For incoming */
static MigrationIncomingState *mis_current;

MigrationIncomingState *migration_incoming_get_current(void)
{
    return mis_current;
}

MigrationIncomingState *migration_incoming_state_new(QEMUFile* f)
{
104
    mis_current = g_new0(MigrationIncomingState, 1);
105
    mis_current->from_src_file = f;
106
    QLIST_INIT(&mis_current->loadvm_handlers);
107
    qemu_mutex_init(&mis_current->rp_mutex);
108
    qemu_event_init(&mis_current->main_thread_load_event, false);
109 110 111 112 113 114

    return mis_current;
}

void migration_incoming_state_destroy(void)
{
115
    qemu_event_destroy(&mis_current->main_thread_load_event);
116
    loadvm_free_handlers(mis_current);
117 118 119 120
    g_free(mis_current);
    mis_current = NULL;
}

121 122

typedef struct {
123
    bool optional;
124 125
    uint32_t size;
    uint8_t runstate[100];
126 127
    RunState state;
    bool received;
128 129 130 131
} GlobalState;

static GlobalState global_state;

132
int global_state_store(void)
133 134 135 136 137 138 139 140 141 142
{
    if (!runstate_store((char *)global_state.runstate,
                        sizeof(global_state.runstate))) {
        error_report("runstate name too big: %s", global_state.runstate);
        trace_migrate_state_too_big();
        return -EINVAL;
    }
    return 0;
}

143 144 145 146 147 148 149
void global_state_store_running(void)
{
    const char *state = RunState_lookup[RUN_STATE_RUNNING];
    strncpy((char *)global_state.runstate,
           state, sizeof(global_state.runstate));
}

150
static bool global_state_received(void)
151
{
152 153 154 155 156 157
    return global_state.received;
}

static RunState global_state_get_runstate(void)
{
    return global_state.state;
158 159
}

160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
void global_state_set_optional(void)
{
    global_state.optional = true;
}

static bool global_state_needed(void *opaque)
{
    GlobalState *s = opaque;
    char *runstate = (char *)s->runstate;

    /* If it is not optional, it is mandatory */

    if (s->optional == false) {
        return true;
    }

    /* If state is running or paused, it is not needed */

    if (strcmp(runstate, "running") == 0 ||
        strcmp(runstate, "paused") == 0) {
        return false;
    }

    /* for any other state it is needed */
    return true;
}

187 188 189
static int global_state_post_load(void *opaque, int version_id)
{
    GlobalState *s = opaque;
190 191
    Error *local_err = NULL;
    int r;
192 193
    char *runstate = (char *)s->runstate;

194
    s->received = true;
195 196
    trace_migrate_global_state_post_load(runstate);

197
    r = qapi_enum_parse(RunState_lookup, runstate, RUN_STATE_MAX,
198 199
                                -1, &local_err);

200 201 202
    if (r == -1) {
        if (local_err) {
            error_report_err(local_err);
203
        }
204
        return -EINVAL;
205
    }
206
    s->state = r;
207

208
    return 0;
209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224
}

static void global_state_pre_save(void *opaque)
{
    GlobalState *s = opaque;

    trace_migrate_global_state_pre_save((char *)s->runstate);
    s->size = strlen((char *)s->runstate) + 1;
}

static const VMStateDescription vmstate_globalstate = {
    .name = "globalstate",
    .version_id = 1,
    .minimum_version_id = 1,
    .post_load = global_state_post_load,
    .pre_save = global_state_pre_save,
225
    .needed = global_state_needed,
226 227 228 229 230 231 232 233 234 235 236
    .fields = (VMStateField[]) {
        VMSTATE_UINT32(size, GlobalState),
        VMSTATE_BUFFER(runstate, GlobalState),
        VMSTATE_END_OF_LIST()
    },
};

void register_global_state(void)
{
    /* We would use it independently that we receive it */
    strcpy((char *)&global_state.runstate, "");
237
    global_state.received = false;
238 239 240
    vmstate_register(NULL, 0, &vmstate_globalstate, &global_state);
}

241 242 243 244 245 246 247
static void migrate_generate_event(int new_state)
{
    if (migrate_use_events()) {
        qapi_event_send_migration(new_state, &error_abort);
    }
}

D
Dr. David Alan Gilbert 已提交
248 249 250 251 252 253 254 255 256 257 258 259 260
/*
 * Called on -incoming with a defer: uri.
 * The migration can be started later after any parameters have been
 * changed.
 */
static void deferred_incoming_migration(Error **errp)
{
    if (deferred_incoming) {
        error_setg(errp, "Incoming migration already deferred");
    }
    deferred_incoming = true;
}

261
void qemu_start_incoming_migration(const char *uri, Error **errp)
A
aliguori 已提交
262
{
A
aliguori 已提交
263 264
    const char *p;

265
    qapi_event_send_migration(MIGRATION_STATUS_SETUP, &error_abort);
D
Dr. David Alan Gilbert 已提交
266 267 268
    if (!strcmp(uri, "defer")) {
        deferred_incoming_migration(errp);
    } else if (strstart(uri, "tcp:", &p)) {
269
        tcp_start_incoming_migration(p, errp);
M
Michael R. Hines 已提交
270
#ifdef CONFIG_RDMA
D
Dr. David Alan Gilbert 已提交
271
    } else if (strstart(uri, "rdma:", &p)) {
M
Michael R. Hines 已提交
272 273
        rdma_start_incoming_migration(p, errp);
#endif
274
#if !defined(WIN32)
D
Dr. David Alan Gilbert 已提交
275
    } else if (strstart(uri, "exec:", &p)) {
276
        exec_start_incoming_migration(p, errp);
D
Dr. David Alan Gilbert 已提交
277
    } else if (strstart(uri, "unix:", &p)) {
278
        unix_start_incoming_migration(p, errp);
D
Dr. David Alan Gilbert 已提交
279
    } else if (strstart(uri, "fd:", &p)) {
280
        fd_start_incoming_migration(p, errp);
281
#endif
D
Dr. David Alan Gilbert 已提交
282
    } else {
283
        error_setg(errp, "unknown migration protocol: %s", uri);
J
Juan Quintela 已提交
284
    }
A
aliguori 已提交
285 286
}

287
static void process_incoming_migration_co(void *opaque)
288
{
289
    QEMUFile *f = opaque;
290
    Error *local_err = NULL;
291 292
    int ret;

293
    migration_incoming_state_new(f);
294
    postcopy_state_set(POSTCOPY_INCOMING_NONE);
295
    migrate_generate_event(MIGRATION_STATUS_ACTIVE);
296
    ret = qemu_loadvm_state(f);
297

298
    qemu_fclose(f);
299
    free_xbzrle_decoded_buf();
300 301
    migration_incoming_state_destroy();

302
    if (ret < 0) {
303
        migrate_generate_event(MIGRATION_STATUS_FAILED);
304
        error_report("load of migration failed: %s", strerror(-ret));
305
        migrate_decompress_threads_join();
306
        exit(EXIT_FAILURE);
307 308
    }

309
    /* Make sure all file formats flush their mutable metadata */
310 311
    bdrv_invalidate_cache_all(&local_err);
    if (local_err) {
312
        migrate_generate_event(MIGRATION_STATUS_FAILED);
313
        error_report_err(local_err);
314
        migrate_decompress_threads_join();
315 316
        exit(EXIT_FAILURE);
    }
317

318 319 320 321 322 323
    /*
     * This must happen after all error conditions are dealt with and
     * we're sure the VM is going to be running on this host.
     */
    qemu_announce_self();

324 325 326
    /* If global state section was not received or we are in running
       state, we need to obey autostart. Any other state is set with
       runstate_set. */
327

328 329
    if (!global_state_received() ||
        global_state_get_runstate() == RUN_STATE_RUNNING) {
330 331 332 333 334
        if (autostart) {
            vm_start();
        } else {
            runstate_set(RUN_STATE_PAUSED);
        }
335 336
    } else {
        runstate_set(global_state_get_runstate());
337
    }
338
    migrate_decompress_threads_join();
339 340 341 342 343 344
    /*
     * This must happen after any state changes since as soon as an external
     * observer sees this event they might start to prod at the VM assuming
     * it's ready to use.
     */
    migrate_generate_event(MIGRATION_STATUS_COMPLETED);
345 346
}

347 348 349 350 351 352
void process_incoming_migration(QEMUFile *f)
{
    Coroutine *co = qemu_coroutine_create(process_incoming_migration_co);
    int fd = qemu_get_fd(f);

    assert(fd != -1);
353
    migrate_decompress_threads_create();
354
    qemu_set_nonblock(fd);
355 356 357
    qemu_coroutine_enter(co, f);
}

358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401
/*
 * Send a message on the return channel back to the source
 * of the migration.
 */
void migrate_send_rp_message(MigrationIncomingState *mis,
                             enum mig_rp_message_type message_type,
                             uint16_t len, void *data)
{
    trace_migrate_send_rp_message((int)message_type, len);
    qemu_mutex_lock(&mis->rp_mutex);
    qemu_put_be16(mis->to_src_file, (unsigned int)message_type);
    qemu_put_be16(mis->to_src_file, len);
    qemu_put_buffer(mis->to_src_file, data, len);
    qemu_fflush(mis->to_src_file);
    qemu_mutex_unlock(&mis->rp_mutex);
}

/*
 * Send a 'SHUT' message on the return channel with the given value
 * to indicate that we've finished with the RP.  Non-0 value indicates
 * error.
 */
void migrate_send_rp_shut(MigrationIncomingState *mis,
                          uint32_t value)
{
    uint32_t buf;

    buf = cpu_to_be32(value);
    migrate_send_rp_message(mis, MIG_RP_MSG_SHUT, sizeof(buf), &buf);
}

/*
 * Send a 'PONG' message on the return channel with the given value
 * (normally in response to a 'PING')
 */
void migrate_send_rp_pong(MigrationIncomingState *mis,
                          uint32_t value)
{
    uint32_t buf;

    buf = cpu_to_be32(value);
    migrate_send_rp_message(mis, MIG_RP_MSG_PONG, sizeof(buf), &buf);
}

402 403 404 405
/* amount of nanoseconds we are willing to wait for migration to be down.
 * the choice of nanoseconds is because it is the maximum resolution that
 * get_clock() can achieve. It is an internal measure. All user-visible
 * units must be in seconds */
406
static uint64_t max_downtime = 300000000;
407 408 409 410 411 412

uint64_t migrate_max_downtime(void)
{
    return max_downtime;
}

O
Orit Wasserman 已提交
413 414 415 416 417 418 419
MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp)
{
    MigrationCapabilityStatusList *head = NULL;
    MigrationCapabilityStatusList *caps;
    MigrationState *s = migrate_get_current();
    int i;

420
    caps = NULL; /* silence compiler warning */
O
Orit Wasserman 已提交
421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437
    for (i = 0; i < MIGRATION_CAPABILITY_MAX; i++) {
        if (head == NULL) {
            head = g_malloc0(sizeof(*caps));
            caps = head;
        } else {
            caps->next = g_malloc0(sizeof(*caps));
            caps = caps->next;
        }
        caps->value =
            g_malloc(sizeof(*caps->value));
        caps->value->capability = i;
        caps->value->state = s->enabled_capabilities[i];
    }

    return head;
}

438 439 440 441 442 443 444 445 446 447 448
MigrationParameters *qmp_query_migrate_parameters(Error **errp)
{
    MigrationParameters *params;
    MigrationState *s = migrate_get_current();

    params = g_malloc0(sizeof(*params));
    params->compress_level = s->parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL];
    params->compress_threads =
            s->parameters[MIGRATION_PARAMETER_COMPRESS_THREADS];
    params->decompress_threads =
            s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS];
449 450 451 452
    params->x_cpu_throttle_initial =
            s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL];
    params->x_cpu_throttle_increment =
            s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT];
453 454 455 456

    return params;
}

457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473
/*
 * Return true if we're already in the middle of a migration
 * (i.e. any of the active or setup states)
 */
static bool migration_is_setup_or_active(int state)
{
    switch (state) {
    case MIGRATION_STATUS_ACTIVE:
    case MIGRATION_STATUS_SETUP:
        return true;

    default:
        return false;

    }
}

O
Orit Wasserman 已提交
474 475 476 477 478 479 480 481 482
static void get_xbzrle_cache_stats(MigrationInfo *info)
{
    if (migrate_use_xbzrle()) {
        info->has_xbzrle_cache = true;
        info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache));
        info->xbzrle_cache->cache_size = migrate_xbzrle_cache_size();
        info->xbzrle_cache->bytes = xbzrle_mig_bytes_transferred();
        info->xbzrle_cache->pages = xbzrle_mig_pages_transferred();
        info->xbzrle_cache->cache_miss = xbzrle_mig_pages_cache_miss();
483
        info->xbzrle_cache->cache_miss_rate = xbzrle_mig_cache_miss_rate();
O
Orit Wasserman 已提交
484 485 486 487
        info->xbzrle_cache->overflow = xbzrle_mig_pages_overflow();
    }
}

L
Luiz Capitulino 已提交
488
MigrationInfo *qmp_query_migrate(Error **errp)
A
aliguori 已提交
489
{
L
Luiz Capitulino 已提交
490
    MigrationInfo *info = g_malloc0(sizeof(*info));
491 492 493
    MigrationState *s = migrate_get_current();

    switch (s->state) {
494
    case MIGRATION_STATUS_NONE:
495 496
        /* no migration has happened ever */
        break;
497
    case MIGRATION_STATUS_SETUP:
498
        info->has_status = true;
499
        info->has_total_time = false;
500
        break;
501 502
    case MIGRATION_STATUS_ACTIVE:
    case MIGRATION_STATUS_CANCELLING:
L
Luiz Capitulino 已提交
503
        info->has_status = true;
504
        info->has_total_time = true;
505
        info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
506
            - s->total_time;
507 508
        info->has_expected_downtime = true;
        info->expected_downtime = s->expected_downtime;
509 510
        info->has_setup_time = true;
        info->setup_time = s->setup_time;
511

L
Luiz Capitulino 已提交
512 513 514 515 516
        info->has_ram = true;
        info->ram = g_malloc0(sizeof(*info->ram));
        info->ram->transferred = ram_bytes_transferred();
        info->ram->remaining = ram_bytes_remaining();
        info->ram->total = ram_bytes_total();
517
        info->ram->duplicate = dup_mig_pages_transferred();
518
        info->ram->skipped = skipped_mig_pages_transferred();
519 520
        info->ram->normal = norm_mig_pages_transferred();
        info->ram->normal_bytes = norm_mig_bytes_transferred();
521
        info->ram->dirty_pages_rate = s->dirty_pages_rate;
522
        info->ram->mbps = s->mbps;
523
        info->ram->dirty_sync_count = s->dirty_sync_count;
524

525
        if (blk_mig_active()) {
L
Luiz Capitulino 已提交
526 527 528 529 530
            info->has_disk = true;
            info->disk = g_malloc0(sizeof(*info->disk));
            info->disk->transferred = blk_mig_bytes_transferred();
            info->disk->remaining = blk_mig_bytes_remaining();
            info->disk->total = blk_mig_bytes_total();
A
aliguori 已提交
531
        }
O
Orit Wasserman 已提交
532

533 534 535 536 537
        if (cpu_throttle_active()) {
            info->has_x_cpu_throttle_percentage = true;
            info->x_cpu_throttle_percentage = cpu_throttle_get_percentage();
        }

O
Orit Wasserman 已提交
538
        get_xbzrle_cache_stats(info);
539
        break;
540
    case MIGRATION_STATUS_COMPLETED:
O
Orit Wasserman 已提交
541 542
        get_xbzrle_cache_stats(info);

L
Luiz Capitulino 已提交
543
        info->has_status = true;
544
        info->has_total_time = true;
545
        info->total_time = s->total_time;
546 547
        info->has_downtime = true;
        info->downtime = s->downtime;
548 549
        info->has_setup_time = true;
        info->setup_time = s->setup_time;
J
Juan Quintela 已提交
550 551 552 553 554 555

        info->has_ram = true;
        info->ram = g_malloc0(sizeof(*info->ram));
        info->ram->transferred = ram_bytes_transferred();
        info->ram->remaining = 0;
        info->ram->total = ram_bytes_total();
556
        info->ram->duplicate = dup_mig_pages_transferred();
557
        info->ram->skipped = skipped_mig_pages_transferred();
558 559
        info->ram->normal = norm_mig_pages_transferred();
        info->ram->normal_bytes = norm_mig_bytes_transferred();
560
        info->ram->mbps = s->mbps;
561
        info->ram->dirty_sync_count = s->dirty_sync_count;
562
        break;
563
    case MIGRATION_STATUS_FAILED:
L
Luiz Capitulino 已提交
564
        info->has_status = true;
565
        break;
566
    case MIGRATION_STATUS_CANCELLED:
L
Luiz Capitulino 已提交
567
        info->has_status = true;
568
        break;
A
aliguori 已提交
569
    }
570
    info->status = s->state;
L
Luiz Capitulino 已提交
571 572

    return info;
A
aliguori 已提交
573 574
}

O
Orit Wasserman 已提交
575 576 577 578 579 580
void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
                                  Error **errp)
{
    MigrationState *s = migrate_get_current();
    MigrationCapabilityStatusList *cap;

581
    if (migration_is_setup_or_active(s->state)) {
582
        error_setg(errp, QERR_MIGRATION_ACTIVE);
O
Orit Wasserman 已提交
583 584 585 586 587 588
        return;
    }

    for (cap = params; cap; cap = cap->next) {
        s->enabled_capabilities[cap->value->capability] = cap->value->state;
    }
589 590 591 592 593 594 595 596 597 598 599 600 601 602

    if (migrate_postcopy_ram()) {
        if (migrate_use_compression()) {
            /* The decompression threads asynchronously write into RAM
             * rather than use the atomic copies needed to avoid
             * userfaulting.  It should be possible to fix the decompression
             * threads for compatibility in future.
             */
            error_report("Postcopy is not currently compatible with "
                         "compression");
            s->enabled_capabilities[MIGRATION_CAPABILITY_X_POSTCOPY_RAM] =
                false;
        }
    }
O
Orit Wasserman 已提交
603 604
}

605 606 607 608 609
void qmp_migrate_set_parameters(bool has_compress_level,
                                int64_t compress_level,
                                bool has_compress_threads,
                                int64_t compress_threads,
                                bool has_decompress_threads,
610 611 612 613 614
                                int64_t decompress_threads,
                                bool has_x_cpu_throttle_initial,
                                int64_t x_cpu_throttle_initial,
                                bool has_x_cpu_throttle_increment,
                                int64_t x_cpu_throttle_increment, Error **errp)
615 616 617 618
{
    MigrationState *s = migrate_get_current();

    if (has_compress_level && (compress_level < 0 || compress_level > 9)) {
619 620
        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level",
                   "is invalid, it should be in the range of 0 to 9");
621 622 623 624
        return;
    }
    if (has_compress_threads &&
            (compress_threads < 1 || compress_threads > 255)) {
625 626 627
        error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
                   "compress_threads",
                   "is invalid, it should be in the range of 1 to 255");
628 629 630 631
        return;
    }
    if (has_decompress_threads &&
            (decompress_threads < 1 || decompress_threads > 255)) {
632 633 634
        error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
                   "decompress_threads",
                   "is invalid, it should be in the range of 1 to 255");
635 636
        return;
    }
637 638 639 640 641 642 643 644 645 646 647 648
    if (has_x_cpu_throttle_initial &&
            (x_cpu_throttle_initial < 1 || x_cpu_throttle_initial > 99)) {
        error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
                   "x_cpu_throttle_initial",
                   "an integer in the range of 1 to 99");
    }
    if (has_x_cpu_throttle_increment &&
            (x_cpu_throttle_increment < 1 || x_cpu_throttle_increment > 99)) {
        error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
                   "x_cpu_throttle_increment",
                   "an integer in the range of 1 to 99");
    }
649 650 651 652 653 654 655 656 657 658 659

    if (has_compress_level) {
        s->parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL] = compress_level;
    }
    if (has_compress_threads) {
        s->parameters[MIGRATION_PARAMETER_COMPRESS_THREADS] = compress_threads;
    }
    if (has_decompress_threads) {
        s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS] =
                                                    decompress_threads;
    }
660 661 662 663 664 665 666 667 668
    if (has_x_cpu_throttle_initial) {
        s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL] =
                                                    x_cpu_throttle_initial;
    }

    if (has_x_cpu_throttle_increment) {
        s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT] =
                                                    x_cpu_throttle_increment;
    }
669 670
}

671 672
/* shared migration helpers */

673 674
static void migrate_set_state(MigrationState *s, int old_state, int new_state)
{
J
Juan Quintela 已提交
675
    if (atomic_cmpxchg(&s->state, old_state, new_state) == old_state) {
676
        trace_migrate_set_state(new_state);
677
        migrate_generate_event(new_state);
678 679 680
    }
}

681
static void migrate_fd_cleanup(void *opaque)
682
{
683 684 685 686 687
    MigrationState *s = opaque;

    qemu_bh_delete(s->cleanup_bh);
    s->cleanup_bh = NULL;

688
    if (s->file) {
689
        trace_migrate_fd_cleanup();
690 691 692 693
        qemu_mutex_unlock_iothread();
        qemu_thread_join(&s->thread);
        qemu_mutex_lock_iothread();

694
        migrate_compress_threads_join();
695 696
        qemu_fclose(s->file);
        s->file = NULL;
697 698
    }

699
    assert(s->state != MIGRATION_STATUS_ACTIVE);
700

701 702 703
    if (s->state == MIGRATION_STATUS_CANCELLING) {
        migrate_set_state(s, MIGRATION_STATUS_CANCELLING,
                          MIGRATION_STATUS_CANCELLED);
704
    }
705 706

    notifier_list_notify(&migration_state_notifiers, s);
707 708
}

709
void migrate_fd_error(MigrationState *s)
710
{
711
    trace_migrate_fd_error();
712
    assert(s->file == NULL);
713
    migrate_set_state(s, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_FAILED);
714
    notifier_list_notify(&migration_state_notifiers, s);
715 716
}

717
static void migrate_fd_cancel(MigrationState *s)
718
{
719
    int old_state ;
720
    QEMUFile *f = migrate_get_current()->file;
721
    trace_migrate_fd_cancel();
722

723 724 725 726 727
    if (s->rp_state.from_dst_file) {
        /* shutdown the rp socket, so causing the rp thread to shutdown */
        qemu_file_shutdown(s->rp_state.from_dst_file);
    }

728 729
    do {
        old_state = s->state;
730
        if (!migration_is_setup_or_active(old_state)) {
731 732
            break;
        }
733 734
        migrate_set_state(s, old_state, MIGRATION_STATUS_CANCELLING);
    } while (s->state != MIGRATION_STATUS_CANCELLING);
735 736 737 738 739 740 741 742

    /*
     * If we're unlucky the migration code might be stuck somewhere in a
     * send/write while the network has failed and is waiting to timeout;
     * if we've got shutdown(2) available then we can force it to quit.
     * The outgoing qemu file gets closed in migrate_fd_cleanup that is
     * called in a bh, so there is no race against this cancel.
     */
743
    if (s->state == MIGRATION_STATUS_CANCELLING && f) {
744 745
        qemu_file_shutdown(f);
    }
746 747
}

748 749 750 751 752 753 754
void add_migration_state_change_notifier(Notifier *notify)
{
    notifier_list_add(&migration_state_notifiers, notify);
}

void remove_migration_state_change_notifier(Notifier *notify)
{
P
Paolo Bonzini 已提交
755
    notifier_remove(notify);
756 757
}

S
Stefan Hajnoczi 已提交
758
bool migration_in_setup(MigrationState *s)
759
{
760
    return s->state == MIGRATION_STATUS_SETUP;
761 762
}

763
bool migration_has_finished(MigrationState *s)
764
{
765
    return s->state == MIGRATION_STATUS_COMPLETED;
766
}
767

768 769
bool migration_has_failed(MigrationState *s)
{
770 771
    return (s->state == MIGRATION_STATUS_CANCELLED ||
            s->state == MIGRATION_STATUS_FAILED);
772 773
}

774
MigrationState *migrate_init(const MigrationParams *params)
775
{
776
    MigrationState *s = migrate_get_current();
777
    int64_t bandwidth_limit = s->bandwidth_limit;
O
Orit Wasserman 已提交
778
    bool enabled_capabilities[MIGRATION_CAPABILITY_MAX];
779
    int64_t xbzrle_cache_size = s->xbzrle_cache_size;
780 781 782 783 784
    int compress_level = s->parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL];
    int compress_thread_count =
            s->parameters[MIGRATION_PARAMETER_COMPRESS_THREADS];
    int decompress_thread_count =
            s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS];
785 786 787 788
    int x_cpu_throttle_initial =
            s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL];
    int x_cpu_throttle_increment =
            s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT];
O
Orit Wasserman 已提交
789 790 791

    memcpy(enabled_capabilities, s->enabled_capabilities,
           sizeof(enabled_capabilities));
792

793
    memset(s, 0, sizeof(*s));
I
Isaku Yamahata 已提交
794
    s->params = *params;
O
Orit Wasserman 已提交
795 796
    memcpy(s->enabled_capabilities, enabled_capabilities,
           sizeof(enabled_capabilities));
797
    s->xbzrle_cache_size = xbzrle_cache_size;
798

799 800 801 802 803
    s->parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL] = compress_level;
    s->parameters[MIGRATION_PARAMETER_COMPRESS_THREADS] =
               compress_thread_count;
    s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS] =
               decompress_thread_count;
804 805 806 807
    s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL] =
                x_cpu_throttle_initial;
    s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT] =
                x_cpu_throttle_increment;
808
    s->bandwidth_limit = bandwidth_limit;
809
    migrate_set_state(s, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP);
810

811
    s->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
812 813
    return s;
}
814

A
Anthony Liguori 已提交
815 816 817 818 819 820 821 822 823 824 825 826
static GSList *migration_blockers;

void migrate_add_blocker(Error *reason)
{
    migration_blockers = g_slist_prepend(migration_blockers, reason);
}

void migrate_del_blocker(Error *reason)
{
    migration_blockers = g_slist_remove(migration_blockers, reason);
}

D
Dr. David Alan Gilbert 已提交
827 828 829
void qmp_migrate_incoming(const char *uri, Error **errp)
{
    Error *local_err = NULL;
830
    static bool once = true;
D
Dr. David Alan Gilbert 已提交
831 832

    if (!deferred_incoming) {
833
        error_setg(errp, "For use with '-incoming defer'");
D
Dr. David Alan Gilbert 已提交
834 835
        return;
    }
836 837 838
    if (!once) {
        error_setg(errp, "The incoming migration has already been started");
    }
D
Dr. David Alan Gilbert 已提交
839 840 841 842 843 844 845 846

    qemu_start_incoming_migration(uri, &local_err);

    if (local_err) {
        error_propagate(errp, local_err);
        return;
    }

847
    once = false;
D
Dr. David Alan Gilbert 已提交
848 849
}

L
Luiz Capitulino 已提交
850 851 852
void qmp_migrate(const char *uri, bool has_blk, bool blk,
                 bool has_inc, bool inc, bool has_detach, bool detach,
                 Error **errp)
853
{
854
    Error *local_err = NULL;
855
    MigrationState *s = migrate_get_current();
I
Isaku Yamahata 已提交
856
    MigrationParams params;
857 858
    const char *p;

859 860
    params.blk = has_blk && blk;
    params.shared = has_inc && inc;
I
Isaku Yamahata 已提交
861

862
    if (migration_is_setup_or_active(s->state) ||
863
        s->state == MIGRATION_STATUS_CANCELLING) {
864
        error_setg(errp, QERR_MIGRATION_ACTIVE);
L
Luiz Capitulino 已提交
865
        return;
866
    }
867 868 869 870 871
    if (runstate_check(RUN_STATE_INMIGRATE)) {
        error_setg(errp, "Guest is waiting for an incoming migration");
        return;
    }

L
Luiz Capitulino 已提交
872 873
    if (qemu_savevm_state_blocked(errp)) {
        return;
874 875
    }

A
Anthony Liguori 已提交
876
    if (migration_blockers) {
L
Luiz Capitulino 已提交
877 878
        *errp = error_copy(migration_blockers->data);
        return;
A
Anthony Liguori 已提交
879 880
    }

881 882 883 884 885 886
    /* We are starting a new migration, so we want to start in a clean
       state.  This change is only needed if previous migration
       failed/was cancelled.  We don't use migrate_set_state() because
       we are setting the initial state, not changing it. */
    s->state = MIGRATION_STATUS_NONE;

I
Isaku Yamahata 已提交
887
    s = migrate_init(&params);
888 889

    if (strstart(uri, "tcp:", &p)) {
890
        tcp_start_outgoing_migration(s, p, &local_err);
M
Michael R. Hines 已提交
891
#ifdef CONFIG_RDMA
892
    } else if (strstart(uri, "rdma:", &p)) {
M
Michael R. Hines 已提交
893 894
        rdma_start_outgoing_migration(s, p, &local_err);
#endif
895 896
#if !defined(WIN32)
    } else if (strstart(uri, "exec:", &p)) {
897
        exec_start_outgoing_migration(s, p, &local_err);
898
    } else if (strstart(uri, "unix:", &p)) {
899
        unix_start_outgoing_migration(s, p, &local_err);
900
    } else if (strstart(uri, "fd:", &p)) {
901
        fd_start_outgoing_migration(s, p, &local_err);
902
#endif
903
    } else {
904 905
        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "uri",
                   "a valid migration protocol");
906
        migrate_set_state(s, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_FAILED);
L
Luiz Capitulino 已提交
907
        return;
908 909
    }

910
    if (local_err) {
911
        migrate_fd_error(s);
912
        error_propagate(errp, local_err);
L
Luiz Capitulino 已提交
913
        return;
914
    }
915 916
}

L
Luiz Capitulino 已提交
917
void qmp_migrate_cancel(Error **errp)
918
{
919
    migrate_fd_cancel(migrate_get_current());
920 921
}

922 923 924
void qmp_migrate_set_cache_size(int64_t value, Error **errp)
{
    MigrationState *s = migrate_get_current();
925
    int64_t new_size;
926 927 928

    /* Check for truncation */
    if (value != (size_t)value) {
929 930
        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
                   "exceeding address space");
931 932 933
        return;
    }

934 935
    /* Cache should not be larger than guest ram size */
    if (value > ram_bytes_total()) {
936 937
        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
                   "exceeds guest ram size ");
938 939 940
        return;
    }

941 942
    new_size = xbzrle_cache_resize(value);
    if (new_size < 0) {
943 944
        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
                   "is smaller than page size");
945 946 947 948
        return;
    }

    s->xbzrle_cache_size = new_size;
949 950 951 952 953 954 955
}

int64_t qmp_query_migrate_cache_size(Error **errp)
{
    return migrate_xbzrle_cache_size();
}

L
Luiz Capitulino 已提交
956
void qmp_migrate_set_speed(int64_t value, Error **errp)
957 958 959
{
    MigrationState *s;

L
Luiz Capitulino 已提交
960 961
    if (value < 0) {
        value = 0;
962
    }
963 964 965
    if (value > SIZE_MAX) {
        value = SIZE_MAX;
    }
966

967
    s = migrate_get_current();
L
Luiz Capitulino 已提交
968
    s->bandwidth_limit = value;
969 970 971
    if (s->file) {
        qemu_file_set_rate_limit(s->file, s->bandwidth_limit / XFER_LIMIT_RATIO);
    }
972 973
}

974
void qmp_migrate_set_downtime(double value, Error **errp)
975
{
976 977 978
    value *= 1e9;
    value = MAX(0, MIN(UINT64_MAX, value));
    max_downtime = (uint64_t)value;
979
}
980

981 982 983 984 985 986 987 988 989
bool migrate_postcopy_ram(void)
{
    MigrationState *s;

    s = migrate_get_current();

    return s->enabled_capabilities[MIGRATION_CAPABILITY_X_POSTCOPY_RAM];
}

990 991 992 993 994 995 996 997 998
bool migrate_auto_converge(void)
{
    MigrationState *s;

    s = migrate_get_current();

    return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE];
}

999 1000 1001 1002 1003 1004 1005 1006 1007
bool migrate_zero_blocks(void)
{
    MigrationState *s;

    s = migrate_get_current();

    return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS];
}

1008 1009
bool migrate_use_compression(void)
{
1010 1011 1012 1013 1014
    MigrationState *s;

    s = migrate_get_current();

    return s->enabled_capabilities[MIGRATION_CAPABILITY_COMPRESS];
1015 1016 1017 1018 1019 1020 1021 1022
}

int migrate_compress_level(void)
{
    MigrationState *s;

    s = migrate_get_current();

1023
    return s->parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL];
1024 1025 1026 1027 1028 1029 1030 1031
}

int migrate_compress_threads(void)
{
    MigrationState *s;

    s = migrate_get_current();

1032
    return s->parameters[MIGRATION_PARAMETER_COMPRESS_THREADS];
1033 1034
}

1035 1036 1037 1038 1039 1040
int migrate_decompress_threads(void)
{
    MigrationState *s;

    s = migrate_get_current();

1041
    return s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS];
1042 1043
}

1044 1045 1046 1047 1048 1049 1050 1051 1052
bool migrate_use_events(void)
{
    MigrationState *s;

    s = migrate_get_current();

    return s->enabled_capabilities[MIGRATION_CAPABILITY_EVENTS];
}

1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069
int migrate_use_xbzrle(void)
{
    MigrationState *s;

    s = migrate_get_current();

    return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE];
}

int64_t migrate_xbzrle_cache_size(void)
{
    MigrationState *s;

    s = migrate_get_current();

    return s->xbzrle_cache_size;
}
1070

1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218
/* migration thread support */
/*
 * Something bad happened to the RP stream, mark an error
 * The caller shall print or trace something to indicate why
 */
static void mark_source_rp_bad(MigrationState *s)
{
    s->rp_state.error = true;
}

static struct rp_cmd_args {
    ssize_t     len; /* -1 = variable */
    const char *name;
} rp_cmd_args[] = {
    [MIG_RP_MSG_INVALID]        = { .len = -1, .name = "INVALID" },
    [MIG_RP_MSG_SHUT]           = { .len =  4, .name = "SHUT" },
    [MIG_RP_MSG_PONG]           = { .len =  4, .name = "PONG" },
    [MIG_RP_MSG_MAX]            = { .len = -1, .name = "MAX" },
};

/*
 * Handles messages sent on the return path towards the source VM
 *
 */
static void *source_return_path_thread(void *opaque)
{
    MigrationState *ms = opaque;
    QEMUFile *rp = ms->rp_state.from_dst_file;
    uint16_t header_len, header_type;
    const int max_len = 512;
    uint8_t buf[max_len];
    uint32_t tmp32, sibling_error;
    int res;

    trace_source_return_path_thread_entry();
    while (!ms->rp_state.error && !qemu_file_get_error(rp) &&
           migration_is_setup_or_active(ms->state)) {
        trace_source_return_path_thread_loop_top();
        header_type = qemu_get_be16(rp);
        header_len = qemu_get_be16(rp);

        if (header_type >= MIG_RP_MSG_MAX ||
            header_type == MIG_RP_MSG_INVALID) {
            error_report("RP: Received invalid message 0x%04x length 0x%04x",
                    header_type, header_len);
            mark_source_rp_bad(ms);
            goto out;
        }

        if ((rp_cmd_args[header_type].len != -1 &&
            header_len != rp_cmd_args[header_type].len) ||
            header_len > max_len) {
            error_report("RP: Received '%s' message (0x%04x) with"
                    "incorrect length %d expecting %zu",
                    rp_cmd_args[header_type].name, header_type, header_len,
                    (size_t)rp_cmd_args[header_type].len);
            mark_source_rp_bad(ms);
            goto out;
        }

        /* We know we've got a valid header by this point */
        res = qemu_get_buffer(rp, buf, header_len);
        if (res != header_len) {
            error_report("RP: Failed reading data for message 0x%04x"
                         " read %d expected %d",
                         header_type, res, header_len);
            mark_source_rp_bad(ms);
            goto out;
        }

        /* OK, we have the message and the data */
        switch (header_type) {
        case MIG_RP_MSG_SHUT:
            sibling_error = be32_to_cpup((uint32_t *)buf);
            trace_source_return_path_thread_shut(sibling_error);
            if (sibling_error) {
                error_report("RP: Sibling indicated error %d", sibling_error);
                mark_source_rp_bad(ms);
            }
            /*
             * We'll let the main thread deal with closing the RP
             * we could do a shutdown(2) on it, but we're the only user
             * anyway, so there's nothing gained.
             */
            goto out;

        case MIG_RP_MSG_PONG:
            tmp32 = be32_to_cpup((uint32_t *)buf);
            trace_source_return_path_thread_pong(tmp32);
            break;

        default:
            break;
        }
    }
    if (rp && qemu_file_get_error(rp)) {
        trace_source_return_path_thread_bad_end();
        mark_source_rp_bad(ms);
    }

    trace_source_return_path_thread_end();
out:
    ms->rp_state.from_dst_file = NULL;
    qemu_fclose(rp);
    return NULL;
}

__attribute__ (( unused )) /* Until later in patch series */
static int open_return_path_on_source(MigrationState *ms)
{

    ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->file);
    if (!ms->rp_state.from_dst_file) {
        return -1;
    }

    trace_open_return_path_on_source();
    qemu_thread_create(&ms->rp_state.rp_thread, "return path",
                       source_return_path_thread, ms, QEMU_THREAD_JOINABLE);

    trace_open_return_path_on_source_continue();

    return 0;
}

__attribute__ (( unused )) /* Until later in patch series */
/* Returns 0 if the RP was ok, otherwise there was an error on the RP */
static int await_return_path_close_on_source(MigrationState *ms)
{
    /*
     * If this is a normal exit then the destination will send a SHUT and the
     * rp_thread will exit, however if there's an error we need to cause
     * it to exit.
     */
    if (qemu_file_get_error(ms->file) && ms->rp_state.from_dst_file) {
        /*
         * shutdown(2), if we have it, will cause it to unblock if it's stuck
         * waiting for the destination.
         */
        qemu_file_shutdown(ms->rp_state.from_dst_file);
        mark_source_rp_bad(ms);
    }
    trace_await_return_path_close_on_source_joining();
    qemu_thread_join(&ms->rp_state.rp_thread);
    trace_await_return_path_close_on_source_close();
    return ms->rp_state.error;
}

1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241
/**
 * migration_completion: Used by migration_thread when there's not much left.
 *   The caller 'breaks' the loop when this returns.
 *
 * @s: Current migration state
 * @*old_vm_running: Pointer to old_vm_running flag
 * @*start_time: Pointer to time to update
 */
static void migration_completion(MigrationState *s, bool *old_vm_running,
                                 int64_t *start_time)
{
    int ret;

    qemu_mutex_lock_iothread();
    *start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
    qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
    *old_vm_running = runstate_is_running();

    ret = global_state_store();
    if (!ret) {
        ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
        if (ret >= 0) {
            qemu_file_set_rate_limit(s->file, INT64_MAX);
1242
            qemu_savevm_state_complete_precopy(s->file);
1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262
        }
    }
    qemu_mutex_unlock_iothread();

    if (ret < 0) {
        goto fail;
    }

    if (qemu_file_get_error(s->file)) {
        trace_migration_completion_file_err();
        goto fail;
    }

    migrate_set_state(s, MIGRATION_STATUS_ACTIVE, MIGRATION_STATUS_COMPLETED);
    return;

fail:
    migrate_set_state(s, MIGRATION_STATUS_ACTIVE, MIGRATION_STATUS_FAILED);
}

1263 1264 1265 1266
/*
 * Master migration thread on the source VM.
 * It drives the migration and pumps the data down the outgoing channel.
 */
J
Juan Quintela 已提交
1267
static void *migration_thread(void *opaque)
1268
{
1269
    MigrationState *s = opaque;
1270 1271
    int64_t initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
    int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
1272
    int64_t initial_bytes = 0;
1273
    int64_t max_size = 0;
1274
    int64_t start_time = initial_time;
1275
    int64_t end_time;
1276
    bool old_vm_running = false;
1277

1278 1279
    rcu_register_thread();

1280
    qemu_savevm_state_header(s->file);
1281
    qemu_savevm_state_begin(s->file, &s->params);
1282

1283
    s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
1284
    migrate_set_state(s, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_ACTIVE);
1285

1286
    while (s->state == MIGRATION_STATUS_ACTIVE) {
1287
        int64_t current_time;
1288
        uint64_t pending_size;
1289

1290
        if (!qemu_file_rate_limit(s->file)) {
1291
            pending_size = qemu_savevm_state_pending(s->file, max_size);
1292
            trace_migrate_pending(pending_size, max_size);
1293
            if (pending_size && pending_size >= max_size) {
1294
                qemu_savevm_state_iterate(s->file);
1295
            } else {
1296 1297 1298
                trace_migration_thread_low_pending(pending_size);
                migration_completion(s, &old_vm_running, &start_time);
                break;
1299 1300
            }
        }
1301

1302
        if (qemu_file_get_error(s->file)) {
1303 1304
            migrate_set_state(s, MIGRATION_STATUS_ACTIVE,
                              MIGRATION_STATUS_FAILED);
1305 1306
            break;
        }
1307
        current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1308
        if (current_time >= initial_time + BUFFER_DELAY) {
1309
            uint64_t transferred_bytes = qemu_ftell(s->file) - initial_bytes;
1310
            uint64_t time_spent = current_time - initial_time;
1311 1312 1313
            double bandwidth = transferred_bytes / time_spent;
            max_size = bandwidth * migrate_max_downtime() / 1000000;

1314 1315 1316
            s->mbps = time_spent ? (((double) transferred_bytes * 8.0) /
                    ((double) time_spent / 1000.0)) / 1000.0 / 1000.0 : -1;

1317 1318
            trace_migrate_transferred(transferred_bytes, time_spent,
                                      bandwidth, max_size);
1319 1320 1321 1322 1323
            /* if we haven't sent anything, we don't want to recalculate
               10000 is a small enough number for our purposes */
            if (s->dirty_bytes_rate && transferred_bytes > 10000) {
                s->expected_downtime = s->dirty_bytes_rate / bandwidth;
            }
1324

1325
            qemu_file_reset_rate_limit(s->file);
1326
            initial_time = current_time;
1327
            initial_bytes = qemu_ftell(s->file);
1328
        }
1329
        if (qemu_file_rate_limit(s->file)) {
1330 1331 1332
            /* usleep expects microseconds */
            g_usleep((initial_time + BUFFER_DELAY - current_time)*1000);
        }
1333 1334
    }

1335 1336
    /* If we enabled cpu throttling for auto-converge, turn it off. */
    cpu_throttle_stop();
1337
    end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1338

1339
    qemu_mutex_lock_iothread();
1340
    qemu_savevm_state_cleanup();
1341
    if (s->state == MIGRATION_STATUS_COMPLETED) {
1342
        uint64_t transferred_bytes = qemu_ftell(s->file);
1343 1344
        s->total_time = end_time - s->total_time;
        s->downtime = end_time - start_time;
1345 1346 1347 1348
        if (s->total_time) {
            s->mbps = (((double) transferred_bytes * 8.0) /
                       ((double) s->total_time)) / 1000;
        }
1349 1350 1351 1352
        runstate_set(RUN_STATE_POSTMIGRATE);
    } else {
        if (old_vm_running) {
            vm_start();
1353
        }
1354
    }
1355
    qemu_bh_schedule(s->cleanup_bh);
1356
    qemu_mutex_unlock_iothread();
1357

1358
    rcu_unregister_thread();
1359 1360 1361
    return NULL;
}

1362
void migrate_fd_connect(MigrationState *s)
1363
{
1364 1365
    /* This is a best 1st approximation. ns to ms */
    s->expected_downtime = max_downtime/1000000;
1366
    s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup, s);
1367

1368 1369 1370
    qemu_file_set_rate_limit(s->file,
                             s->bandwidth_limit / XFER_LIMIT_RATIO);

1371 1372 1373
    /* Notify before starting migration thread */
    notifier_list_notify(&migration_state_notifiers, s);

1374
    migrate_compress_threads_create();
1375
    qemu_thread_create(&s->thread, "migration", migration_thread, s,
1376
                       QEMU_THREAD_JOINABLE);
1377
}
1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389

PostcopyState  postcopy_state_get(void)
{
    return atomic_mb_read(&incoming_postcopy_state);
}

/* Set the state and return the old state */
PostcopyState postcopy_state_set(PostcopyState new_state)
{
    return atomic_xchg(&incoming_postcopy_state, new_state);
}