migration.c 33.3 KB
Newer Older
A
aliguori 已提交
1 2 3 4 5 6 7 8 9 10 11
/*
 * QEMU live migration
 *
 * Copyright IBM, Corp. 2008
 *
 * Authors:
 *  Anthony Liguori   <aliguori@us.ibm.com>
 *
 * This work is licensed under the terms of the GNU GPL, version 2.  See
 * the COPYING file in the top-level directory.
 *
12 13
 * Contributions after 2012-01-13 are licensed under the terms of the
 * GNU GPL, version 2 or (at your option) any later version.
A
aliguori 已提交
14 15 16
 */

#include "qemu-common.h"
17
#include "qemu/error-report.h"
18
#include "qemu/main-loop.h"
19
#include "migration/migration.h"
20
#include "migration/qemu-file.h"
21
#include "sysemu/sysemu.h"
22
#include "block/block.h"
23
#include "qapi/qmp/qerror.h"
24
#include "qemu/sockets.h"
25
#include "qemu/rcu.h"
26
#include "migration/block.h"
27
#include "qemu/thread.h"
L
Luiz Capitulino 已提交
28
#include "qmp-commands.h"
29
#include "trace.h"
30
#include "qapi/util.h"
31
#include "qapi-event.h"
32
#include "qom/cpu.h"
33

34
#define MAX_THROTTLE  (32 << 20)      /* Migration transfer speed throttling */
A
aliguori 已提交
35

J
Juan Quintela 已提交
36 37 38 39 40
/* Amount of time to allocate to each "chunk" of bandwidth-throttled
 * data. */
#define BUFFER_DELAY     100
#define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY)

41 42
/* Default compression thread count */
#define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8
43 44 45
/* Default decompression thread count, usually decompression is at
 * least 4 times as fast as compression.*/
#define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2
46 47
/*0: means nocompress, 1: best speed, ... 9: best compress ratio */
#define DEFAULT_MIGRATE_COMPRESS_LEVEL 1
48 49 50
/* Define default autoconverge cpu throttle migration parameters */
#define DEFAULT_MIGRATE_X_CPU_THROTTLE_INITIAL 20
#define DEFAULT_MIGRATE_X_CPU_THROTTLE_INCREMENT 10
51

52 53 54
/* Migration XBZRLE default cache size */
#define DEFAULT_MIGRATE_CACHE_SIZE (64 * 1024 * 1024)

55 56 57
static NotifierList migration_state_notifiers =
    NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);

D
Dr. David Alan Gilbert 已提交
58 59
static bool deferred_incoming;

60 61 62 63
/* When we add fault tolerance, we could have several
   migrations at once.  For now we don't need to add
   dynamic creation of migration */

64
/* For outgoing */
65
MigrationState *migrate_get_current(void)
66 67
{
    static MigrationState current_migration = {
68
        .state = MIGRATION_STATUS_NONE,
69
        .bandwidth_limit = MAX_THROTTLE,
70
        .xbzrle_cache_size = DEFAULT_MIGRATE_CACHE_SIZE,
71
        .mbps = -1,
72 73 74 75 76 77
        .parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL] =
                DEFAULT_MIGRATE_COMPRESS_LEVEL,
        .parameters[MIGRATION_PARAMETER_COMPRESS_THREADS] =
                DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT,
        .parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS] =
                DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT,
78 79 80 81
        .parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL] =
                DEFAULT_MIGRATE_X_CPU_THROTTLE_INITIAL,
        .parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT] =
                DEFAULT_MIGRATE_X_CPU_THROTTLE_INCREMENT,
82 83 84 85 86
    };

    return &current_migration;
}

87 88 89 90 91 92 93 94 95 96
/* For incoming */
static MigrationIncomingState *mis_current;

MigrationIncomingState *migration_incoming_get_current(void)
{
    return mis_current;
}

MigrationIncomingState *migration_incoming_state_new(QEMUFile* f)
{
97
    mis_current = g_new0(MigrationIncomingState, 1);
98
    mis_current->file = f;
99
    QLIST_INIT(&mis_current->loadvm_handlers);
100 101 102 103 104 105

    return mis_current;
}

void migration_incoming_state_destroy(void)
{
106
    loadvm_free_handlers(mis_current);
107 108 109 110
    g_free(mis_current);
    mis_current = NULL;
}

111 112

typedef struct {
113
    bool optional;
114 115
    uint32_t size;
    uint8_t runstate[100];
116 117
    RunState state;
    bool received;
118 119 120 121
} GlobalState;

static GlobalState global_state;

122
int global_state_store(void)
123 124 125 126 127 128 129 130 131 132
{
    if (!runstate_store((char *)global_state.runstate,
                        sizeof(global_state.runstate))) {
        error_report("runstate name too big: %s", global_state.runstate);
        trace_migrate_state_too_big();
        return -EINVAL;
    }
    return 0;
}

133 134 135 136 137 138 139
void global_state_store_running(void)
{
    const char *state = RunState_lookup[RUN_STATE_RUNNING];
    strncpy((char *)global_state.runstate,
           state, sizeof(global_state.runstate));
}

140
static bool global_state_received(void)
141
{
142 143 144 145 146 147
    return global_state.received;
}

static RunState global_state_get_runstate(void)
{
    return global_state.state;
148 149
}

150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
void global_state_set_optional(void)
{
    global_state.optional = true;
}

static bool global_state_needed(void *opaque)
{
    GlobalState *s = opaque;
    char *runstate = (char *)s->runstate;

    /* If it is not optional, it is mandatory */

    if (s->optional == false) {
        return true;
    }

    /* If state is running or paused, it is not needed */

    if (strcmp(runstate, "running") == 0 ||
        strcmp(runstate, "paused") == 0) {
        return false;
    }

    /* for any other state it is needed */
    return true;
}

177 178 179
static int global_state_post_load(void *opaque, int version_id)
{
    GlobalState *s = opaque;
180 181
    Error *local_err = NULL;
    int r;
182 183
    char *runstate = (char *)s->runstate;

184
    s->received = true;
185 186
    trace_migrate_global_state_post_load(runstate);

187
    r = qapi_enum_parse(RunState_lookup, runstate, RUN_STATE_MAX,
188 189
                                -1, &local_err);

190 191 192
    if (r == -1) {
        if (local_err) {
            error_report_err(local_err);
193
        }
194
        return -EINVAL;
195
    }
196
    s->state = r;
197

198
    return 0;
199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214
}

static void global_state_pre_save(void *opaque)
{
    GlobalState *s = opaque;

    trace_migrate_global_state_pre_save((char *)s->runstate);
    s->size = strlen((char *)s->runstate) + 1;
}

static const VMStateDescription vmstate_globalstate = {
    .name = "globalstate",
    .version_id = 1,
    .minimum_version_id = 1,
    .post_load = global_state_post_load,
    .pre_save = global_state_pre_save,
215
    .needed = global_state_needed,
216 217 218 219 220 221 222 223 224 225 226
    .fields = (VMStateField[]) {
        VMSTATE_UINT32(size, GlobalState),
        VMSTATE_BUFFER(runstate, GlobalState),
        VMSTATE_END_OF_LIST()
    },
};

void register_global_state(void)
{
    /* We would use it independently that we receive it */
    strcpy((char *)&global_state.runstate, "");
227
    global_state.received = false;
228 229 230
    vmstate_register(NULL, 0, &vmstate_globalstate, &global_state);
}

231 232 233 234 235 236 237
static void migrate_generate_event(int new_state)
{
    if (migrate_use_events()) {
        qapi_event_send_migration(new_state, &error_abort);
    }
}

D
Dr. David Alan Gilbert 已提交
238 239 240 241 242 243 244 245 246 247 248 249 250
/*
 * Called on -incoming with a defer: uri.
 * The migration can be started later after any parameters have been
 * changed.
 */
static void deferred_incoming_migration(Error **errp)
{
    if (deferred_incoming) {
        error_setg(errp, "Incoming migration already deferred");
    }
    deferred_incoming = true;
}

251
void qemu_start_incoming_migration(const char *uri, Error **errp)
A
aliguori 已提交
252
{
A
aliguori 已提交
253 254
    const char *p;

255
    qapi_event_send_migration(MIGRATION_STATUS_SETUP, &error_abort);
D
Dr. David Alan Gilbert 已提交
256 257 258
    if (!strcmp(uri, "defer")) {
        deferred_incoming_migration(errp);
    } else if (strstart(uri, "tcp:", &p)) {
259
        tcp_start_incoming_migration(p, errp);
M
Michael R. Hines 已提交
260
#ifdef CONFIG_RDMA
D
Dr. David Alan Gilbert 已提交
261
    } else if (strstart(uri, "rdma:", &p)) {
M
Michael R. Hines 已提交
262 263
        rdma_start_incoming_migration(p, errp);
#endif
264
#if !defined(WIN32)
D
Dr. David Alan Gilbert 已提交
265
    } else if (strstart(uri, "exec:", &p)) {
266
        exec_start_incoming_migration(p, errp);
D
Dr. David Alan Gilbert 已提交
267
    } else if (strstart(uri, "unix:", &p)) {
268
        unix_start_incoming_migration(p, errp);
D
Dr. David Alan Gilbert 已提交
269
    } else if (strstart(uri, "fd:", &p)) {
270
        fd_start_incoming_migration(p, errp);
271
#endif
D
Dr. David Alan Gilbert 已提交
272
    } else {
273
        error_setg(errp, "unknown migration protocol: %s", uri);
J
Juan Quintela 已提交
274
    }
A
aliguori 已提交
275 276
}

277
static void process_incoming_migration_co(void *opaque)
278
{
279
    QEMUFile *f = opaque;
280
    Error *local_err = NULL;
281 282
    int ret;

283
    migration_incoming_state_new(f);
284
    migrate_generate_event(MIGRATION_STATUS_ACTIVE);
285
    ret = qemu_loadvm_state(f);
286

287
    qemu_fclose(f);
288
    free_xbzrle_decoded_buf();
289 290
    migration_incoming_state_destroy();

291
    if (ret < 0) {
292
        migrate_generate_event(MIGRATION_STATUS_FAILED);
293
        error_report("load of migration failed: %s", strerror(-ret));
294
        migrate_decompress_threads_join();
295
        exit(EXIT_FAILURE);
296 297
    }

298
    /* Make sure all file formats flush their mutable metadata */
299 300
    bdrv_invalidate_cache_all(&local_err);
    if (local_err) {
301
        migrate_generate_event(MIGRATION_STATUS_FAILED);
302
        error_report_err(local_err);
303
        migrate_decompress_threads_join();
304 305
        exit(EXIT_FAILURE);
    }
306

307 308 309 310 311 312
    /*
     * This must happen after all error conditions are dealt with and
     * we're sure the VM is going to be running on this host.
     */
    qemu_announce_self();

313 314 315
    /* If global state section was not received or we are in running
       state, we need to obey autostart. Any other state is set with
       runstate_set. */
316

317 318
    if (!global_state_received() ||
        global_state_get_runstate() == RUN_STATE_RUNNING) {
319 320 321 322 323
        if (autostart) {
            vm_start();
        } else {
            runstate_set(RUN_STATE_PAUSED);
        }
324 325
    } else {
        runstate_set(global_state_get_runstate());
326
    }
327
    migrate_decompress_threads_join();
328 329 330 331 332 333
    /*
     * This must happen after any state changes since as soon as an external
     * observer sees this event they might start to prod at the VM assuming
     * it's ready to use.
     */
    migrate_generate_event(MIGRATION_STATUS_COMPLETED);
334 335
}

336 337 338 339 340 341
void process_incoming_migration(QEMUFile *f)
{
    Coroutine *co = qemu_coroutine_create(process_incoming_migration_co);
    int fd = qemu_get_fd(f);

    assert(fd != -1);
342
    migrate_decompress_threads_create();
343
    qemu_set_nonblock(fd);
344 345 346
    qemu_coroutine_enter(co, f);
}

347 348 349 350
/* amount of nanoseconds we are willing to wait for migration to be down.
 * the choice of nanoseconds is because it is the maximum resolution that
 * get_clock() can achieve. It is an internal measure. All user-visible
 * units must be in seconds */
351
static uint64_t max_downtime = 300000000;
352 353 354 355 356 357

uint64_t migrate_max_downtime(void)
{
    return max_downtime;
}

O
Orit Wasserman 已提交
358 359 360 361 362 363 364
MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp)
{
    MigrationCapabilityStatusList *head = NULL;
    MigrationCapabilityStatusList *caps;
    MigrationState *s = migrate_get_current();
    int i;

365
    caps = NULL; /* silence compiler warning */
O
Orit Wasserman 已提交
366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382
    for (i = 0; i < MIGRATION_CAPABILITY_MAX; i++) {
        if (head == NULL) {
            head = g_malloc0(sizeof(*caps));
            caps = head;
        } else {
            caps->next = g_malloc0(sizeof(*caps));
            caps = caps->next;
        }
        caps->value =
            g_malloc(sizeof(*caps->value));
        caps->value->capability = i;
        caps->value->state = s->enabled_capabilities[i];
    }

    return head;
}

383 384 385 386 387 388 389 390 391 392 393
MigrationParameters *qmp_query_migrate_parameters(Error **errp)
{
    MigrationParameters *params;
    MigrationState *s = migrate_get_current();

    params = g_malloc0(sizeof(*params));
    params->compress_level = s->parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL];
    params->compress_threads =
            s->parameters[MIGRATION_PARAMETER_COMPRESS_THREADS];
    params->decompress_threads =
            s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS];
394 395 396 397
    params->x_cpu_throttle_initial =
            s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL];
    params->x_cpu_throttle_increment =
            s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT];
398 399 400 401

    return params;
}

O
Orit Wasserman 已提交
402 403 404 405 406 407 408 409 410
static void get_xbzrle_cache_stats(MigrationInfo *info)
{
    if (migrate_use_xbzrle()) {
        info->has_xbzrle_cache = true;
        info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache));
        info->xbzrle_cache->cache_size = migrate_xbzrle_cache_size();
        info->xbzrle_cache->bytes = xbzrle_mig_bytes_transferred();
        info->xbzrle_cache->pages = xbzrle_mig_pages_transferred();
        info->xbzrle_cache->cache_miss = xbzrle_mig_pages_cache_miss();
411
        info->xbzrle_cache->cache_miss_rate = xbzrle_mig_cache_miss_rate();
O
Orit Wasserman 已提交
412 413 414 415
        info->xbzrle_cache->overflow = xbzrle_mig_pages_overflow();
    }
}

L
Luiz Capitulino 已提交
416
MigrationInfo *qmp_query_migrate(Error **errp)
A
aliguori 已提交
417
{
L
Luiz Capitulino 已提交
418
    MigrationInfo *info = g_malloc0(sizeof(*info));
419 420 421
    MigrationState *s = migrate_get_current();

    switch (s->state) {
422
    case MIGRATION_STATUS_NONE:
423 424
        /* no migration has happened ever */
        break;
425
    case MIGRATION_STATUS_SETUP:
426
        info->has_status = true;
427
        info->has_total_time = false;
428
        break;
429 430
    case MIGRATION_STATUS_ACTIVE:
    case MIGRATION_STATUS_CANCELLING:
L
Luiz Capitulino 已提交
431
        info->has_status = true;
432
        info->has_total_time = true;
433
        info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
434
            - s->total_time;
435 436
        info->has_expected_downtime = true;
        info->expected_downtime = s->expected_downtime;
437 438
        info->has_setup_time = true;
        info->setup_time = s->setup_time;
439

L
Luiz Capitulino 已提交
440 441 442 443 444
        info->has_ram = true;
        info->ram = g_malloc0(sizeof(*info->ram));
        info->ram->transferred = ram_bytes_transferred();
        info->ram->remaining = ram_bytes_remaining();
        info->ram->total = ram_bytes_total();
445
        info->ram->duplicate = dup_mig_pages_transferred();
446
        info->ram->skipped = skipped_mig_pages_transferred();
447 448
        info->ram->normal = norm_mig_pages_transferred();
        info->ram->normal_bytes = norm_mig_bytes_transferred();
449
        info->ram->dirty_pages_rate = s->dirty_pages_rate;
450
        info->ram->mbps = s->mbps;
451
        info->ram->dirty_sync_count = s->dirty_sync_count;
452

453
        if (blk_mig_active()) {
L
Luiz Capitulino 已提交
454 455 456 457 458
            info->has_disk = true;
            info->disk = g_malloc0(sizeof(*info->disk));
            info->disk->transferred = blk_mig_bytes_transferred();
            info->disk->remaining = blk_mig_bytes_remaining();
            info->disk->total = blk_mig_bytes_total();
A
aliguori 已提交
459
        }
O
Orit Wasserman 已提交
460

461 462 463 464 465
        if (cpu_throttle_active()) {
            info->has_x_cpu_throttle_percentage = true;
            info->x_cpu_throttle_percentage = cpu_throttle_get_percentage();
        }

O
Orit Wasserman 已提交
466
        get_xbzrle_cache_stats(info);
467
        break;
468
    case MIGRATION_STATUS_COMPLETED:
O
Orit Wasserman 已提交
469 470
        get_xbzrle_cache_stats(info);

L
Luiz Capitulino 已提交
471
        info->has_status = true;
472
        info->has_total_time = true;
473
        info->total_time = s->total_time;
474 475
        info->has_downtime = true;
        info->downtime = s->downtime;
476 477
        info->has_setup_time = true;
        info->setup_time = s->setup_time;
J
Juan Quintela 已提交
478 479 480 481 482 483

        info->has_ram = true;
        info->ram = g_malloc0(sizeof(*info->ram));
        info->ram->transferred = ram_bytes_transferred();
        info->ram->remaining = 0;
        info->ram->total = ram_bytes_total();
484
        info->ram->duplicate = dup_mig_pages_transferred();
485
        info->ram->skipped = skipped_mig_pages_transferred();
486 487
        info->ram->normal = norm_mig_pages_transferred();
        info->ram->normal_bytes = norm_mig_bytes_transferred();
488
        info->ram->mbps = s->mbps;
489
        info->ram->dirty_sync_count = s->dirty_sync_count;
490
        break;
491
    case MIGRATION_STATUS_FAILED:
L
Luiz Capitulino 已提交
492
        info->has_status = true;
493
        break;
494
    case MIGRATION_STATUS_CANCELLED:
L
Luiz Capitulino 已提交
495
        info->has_status = true;
496
        break;
A
aliguori 已提交
497
    }
498
    info->status = s->state;
L
Luiz Capitulino 已提交
499 500

    return info;
A
aliguori 已提交
501 502
}

O
Orit Wasserman 已提交
503 504 505 506 507 508
void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
                                  Error **errp)
{
    MigrationState *s = migrate_get_current();
    MigrationCapabilityStatusList *cap;

509 510
    if (s->state == MIGRATION_STATUS_ACTIVE ||
        s->state == MIGRATION_STATUS_SETUP) {
511
        error_setg(errp, QERR_MIGRATION_ACTIVE);
O
Orit Wasserman 已提交
512 513 514 515 516 517 518 519
        return;
    }

    for (cap = params; cap; cap = cap->next) {
        s->enabled_capabilities[cap->value->capability] = cap->value->state;
    }
}

520 521 522 523 524
void qmp_migrate_set_parameters(bool has_compress_level,
                                int64_t compress_level,
                                bool has_compress_threads,
                                int64_t compress_threads,
                                bool has_decompress_threads,
525 526 527 528 529
                                int64_t decompress_threads,
                                bool has_x_cpu_throttle_initial,
                                int64_t x_cpu_throttle_initial,
                                bool has_x_cpu_throttle_increment,
                                int64_t x_cpu_throttle_increment, Error **errp)
530 531 532 533
{
    MigrationState *s = migrate_get_current();

    if (has_compress_level && (compress_level < 0 || compress_level > 9)) {
534 535
        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level",
                   "is invalid, it should be in the range of 0 to 9");
536 537 538 539
        return;
    }
    if (has_compress_threads &&
            (compress_threads < 1 || compress_threads > 255)) {
540 541 542
        error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
                   "compress_threads",
                   "is invalid, it should be in the range of 1 to 255");
543 544 545 546
        return;
    }
    if (has_decompress_threads &&
            (decompress_threads < 1 || decompress_threads > 255)) {
547 548 549
        error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
                   "decompress_threads",
                   "is invalid, it should be in the range of 1 to 255");
550 551
        return;
    }
552 553 554 555 556 557 558 559 560 561 562 563
    if (has_x_cpu_throttle_initial &&
            (x_cpu_throttle_initial < 1 || x_cpu_throttle_initial > 99)) {
        error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
                   "x_cpu_throttle_initial",
                   "an integer in the range of 1 to 99");
    }
    if (has_x_cpu_throttle_increment &&
            (x_cpu_throttle_increment < 1 || x_cpu_throttle_increment > 99)) {
        error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
                   "x_cpu_throttle_increment",
                   "an integer in the range of 1 to 99");
    }
564 565 566 567 568 569 570 571 572 573 574

    if (has_compress_level) {
        s->parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL] = compress_level;
    }
    if (has_compress_threads) {
        s->parameters[MIGRATION_PARAMETER_COMPRESS_THREADS] = compress_threads;
    }
    if (has_decompress_threads) {
        s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS] =
                                                    decompress_threads;
    }
575 576 577 578 579 580 581 582 583
    if (has_x_cpu_throttle_initial) {
        s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL] =
                                                    x_cpu_throttle_initial;
    }

    if (has_x_cpu_throttle_increment) {
        s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT] =
                                                    x_cpu_throttle_increment;
    }
584 585
}

586 587
/* shared migration helpers */

588 589
static void migrate_set_state(MigrationState *s, int old_state, int new_state)
{
J
Juan Quintela 已提交
590
    if (atomic_cmpxchg(&s->state, old_state, new_state) == old_state) {
591
        trace_migrate_set_state(new_state);
592
        migrate_generate_event(new_state);
593 594 595
    }
}

596
static void migrate_fd_cleanup(void *opaque)
597
{
598 599 600 601 602
    MigrationState *s = opaque;

    qemu_bh_delete(s->cleanup_bh);
    s->cleanup_bh = NULL;

603
    if (s->file) {
604
        trace_migrate_fd_cleanup();
605 606 607 608
        qemu_mutex_unlock_iothread();
        qemu_thread_join(&s->thread);
        qemu_mutex_lock_iothread();

609
        migrate_compress_threads_join();
610 611
        qemu_fclose(s->file);
        s->file = NULL;
612 613
    }

614
    assert(s->state != MIGRATION_STATUS_ACTIVE);
615

616
    if (s->state != MIGRATION_STATUS_COMPLETED) {
617
        qemu_savevm_state_cancel();
618 619 620
        if (s->state == MIGRATION_STATUS_CANCELLING) {
            migrate_set_state(s, MIGRATION_STATUS_CANCELLING,
                              MIGRATION_STATUS_CANCELLED);
621
        }
622
    }
623 624

    notifier_list_notify(&migration_state_notifiers, s);
625 626
}

627
void migrate_fd_error(MigrationState *s)
628
{
629
    trace_migrate_fd_error();
630
    assert(s->file == NULL);
631
    migrate_set_state(s, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_FAILED);
632
    notifier_list_notify(&migration_state_notifiers, s);
633 634
}

635
static void migrate_fd_cancel(MigrationState *s)
636
{
637
    int old_state ;
638
    QEMUFile *f = migrate_get_current()->file;
639
    trace_migrate_fd_cancel();
640

641 642
    do {
        old_state = s->state;
643 644
        if (old_state != MIGRATION_STATUS_SETUP &&
            old_state != MIGRATION_STATUS_ACTIVE) {
645 646
            break;
        }
647 648
        migrate_set_state(s, old_state, MIGRATION_STATUS_CANCELLING);
    } while (s->state != MIGRATION_STATUS_CANCELLING);
649 650 651 652 653 654 655 656

    /*
     * If we're unlucky the migration code might be stuck somewhere in a
     * send/write while the network has failed and is waiting to timeout;
     * if we've got shutdown(2) available then we can force it to quit.
     * The outgoing qemu file gets closed in migrate_fd_cleanup that is
     * called in a bh, so there is no race against this cancel.
     */
657
    if (s->state == MIGRATION_STATUS_CANCELLING && f) {
658 659
        qemu_file_shutdown(f);
    }
660 661
}

662 663 664 665 666 667 668
void add_migration_state_change_notifier(Notifier *notify)
{
    notifier_list_add(&migration_state_notifiers, notify);
}

void remove_migration_state_change_notifier(Notifier *notify)
{
P
Paolo Bonzini 已提交
669
    notifier_remove(notify);
670 671
}

S
Stefan Hajnoczi 已提交
672
bool migration_in_setup(MigrationState *s)
673
{
674
    return s->state == MIGRATION_STATUS_SETUP;
675 676
}

677
bool migration_has_finished(MigrationState *s)
678
{
679
    return s->state == MIGRATION_STATUS_COMPLETED;
680
}
681

682 683
bool migration_has_failed(MigrationState *s)
{
684 685
    return (s->state == MIGRATION_STATUS_CANCELLED ||
            s->state == MIGRATION_STATUS_FAILED);
686 687
}

I
Isaku Yamahata 已提交
688
static MigrationState *migrate_init(const MigrationParams *params)
689
{
690
    MigrationState *s = migrate_get_current();
691
    int64_t bandwidth_limit = s->bandwidth_limit;
O
Orit Wasserman 已提交
692
    bool enabled_capabilities[MIGRATION_CAPABILITY_MAX];
693
    int64_t xbzrle_cache_size = s->xbzrle_cache_size;
694 695 696 697 698
    int compress_level = s->parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL];
    int compress_thread_count =
            s->parameters[MIGRATION_PARAMETER_COMPRESS_THREADS];
    int decompress_thread_count =
            s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS];
699 700 701 702
    int x_cpu_throttle_initial =
            s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL];
    int x_cpu_throttle_increment =
            s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT];
O
Orit Wasserman 已提交
703 704 705

    memcpy(enabled_capabilities, s->enabled_capabilities,
           sizeof(enabled_capabilities));
706

707
    memset(s, 0, sizeof(*s));
I
Isaku Yamahata 已提交
708
    s->params = *params;
O
Orit Wasserman 已提交
709 710
    memcpy(s->enabled_capabilities, enabled_capabilities,
           sizeof(enabled_capabilities));
711
    s->xbzrle_cache_size = xbzrle_cache_size;
712

713 714 715 716 717
    s->parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL] = compress_level;
    s->parameters[MIGRATION_PARAMETER_COMPRESS_THREADS] =
               compress_thread_count;
    s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS] =
               decompress_thread_count;
718 719 720 721
    s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL] =
                x_cpu_throttle_initial;
    s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT] =
                x_cpu_throttle_increment;
722
    s->bandwidth_limit = bandwidth_limit;
723
    migrate_set_state(s, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP);
724

725
    s->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
726 727
    return s;
}
728

A
Anthony Liguori 已提交
729 730 731 732 733 734 735 736 737 738 739 740
static GSList *migration_blockers;

void migrate_add_blocker(Error *reason)
{
    migration_blockers = g_slist_prepend(migration_blockers, reason);
}

void migrate_del_blocker(Error *reason)
{
    migration_blockers = g_slist_remove(migration_blockers, reason);
}

D
Dr. David Alan Gilbert 已提交
741 742 743
void qmp_migrate_incoming(const char *uri, Error **errp)
{
    Error *local_err = NULL;
744
    static bool once = true;
D
Dr. David Alan Gilbert 已提交
745 746

    if (!deferred_incoming) {
747
        error_setg(errp, "For use with '-incoming defer'");
D
Dr. David Alan Gilbert 已提交
748 749
        return;
    }
750 751 752
    if (!once) {
        error_setg(errp, "The incoming migration has already been started");
    }
D
Dr. David Alan Gilbert 已提交
753 754 755 756 757 758 759 760

    qemu_start_incoming_migration(uri, &local_err);

    if (local_err) {
        error_propagate(errp, local_err);
        return;
    }

761
    once = false;
D
Dr. David Alan Gilbert 已提交
762 763
}

L
Luiz Capitulino 已提交
764 765 766
void qmp_migrate(const char *uri, bool has_blk, bool blk,
                 bool has_inc, bool inc, bool has_detach, bool detach,
                 Error **errp)
767
{
768
    Error *local_err = NULL;
769
    MigrationState *s = migrate_get_current();
I
Isaku Yamahata 已提交
770
    MigrationParams params;
771 772
    const char *p;

773 774
    params.blk = has_blk && blk;
    params.shared = has_inc && inc;
I
Isaku Yamahata 已提交
775

776 777 778
    if (s->state == MIGRATION_STATUS_ACTIVE ||
        s->state == MIGRATION_STATUS_SETUP ||
        s->state == MIGRATION_STATUS_CANCELLING) {
779
        error_setg(errp, QERR_MIGRATION_ACTIVE);
L
Luiz Capitulino 已提交
780
        return;
781
    }
782 783 784 785 786
    if (runstate_check(RUN_STATE_INMIGRATE)) {
        error_setg(errp, "Guest is waiting for an incoming migration");
        return;
    }

L
Luiz Capitulino 已提交
787 788
    if (qemu_savevm_state_blocked(errp)) {
        return;
789 790
    }

A
Anthony Liguori 已提交
791
    if (migration_blockers) {
L
Luiz Capitulino 已提交
792 793
        *errp = error_copy(migration_blockers->data);
        return;
A
Anthony Liguori 已提交
794 795
    }

796 797 798 799 800 801
    /* We are starting a new migration, so we want to start in a clean
       state.  This change is only needed if previous migration
       failed/was cancelled.  We don't use migrate_set_state() because
       we are setting the initial state, not changing it. */
    s->state = MIGRATION_STATUS_NONE;

I
Isaku Yamahata 已提交
802
    s = migrate_init(&params);
803 804

    if (strstart(uri, "tcp:", &p)) {
805
        tcp_start_outgoing_migration(s, p, &local_err);
M
Michael R. Hines 已提交
806
#ifdef CONFIG_RDMA
807
    } else if (strstart(uri, "rdma:", &p)) {
M
Michael R. Hines 已提交
808 809
        rdma_start_outgoing_migration(s, p, &local_err);
#endif
810 811
#if !defined(WIN32)
    } else if (strstart(uri, "exec:", &p)) {
812
        exec_start_outgoing_migration(s, p, &local_err);
813
    } else if (strstart(uri, "unix:", &p)) {
814
        unix_start_outgoing_migration(s, p, &local_err);
815
    } else if (strstart(uri, "fd:", &p)) {
816
        fd_start_outgoing_migration(s, p, &local_err);
817
#endif
818
    } else {
819 820
        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "uri",
                   "a valid migration protocol");
821
        migrate_set_state(s, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_FAILED);
L
Luiz Capitulino 已提交
822
        return;
823 824
    }

825
    if (local_err) {
826
        migrate_fd_error(s);
827
        error_propagate(errp, local_err);
L
Luiz Capitulino 已提交
828
        return;
829
    }
830 831
}

L
Luiz Capitulino 已提交
832
void qmp_migrate_cancel(Error **errp)
833
{
834
    migrate_fd_cancel(migrate_get_current());
835 836
}

837 838 839
void qmp_migrate_set_cache_size(int64_t value, Error **errp)
{
    MigrationState *s = migrate_get_current();
840
    int64_t new_size;
841 842 843

    /* Check for truncation */
    if (value != (size_t)value) {
844 845
        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
                   "exceeding address space");
846 847 848
        return;
    }

849 850
    /* Cache should not be larger than guest ram size */
    if (value > ram_bytes_total()) {
851 852
        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
                   "exceeds guest ram size ");
853 854 855
        return;
    }

856 857
    new_size = xbzrle_cache_resize(value);
    if (new_size < 0) {
858 859
        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
                   "is smaller than page size");
860 861 862 863
        return;
    }

    s->xbzrle_cache_size = new_size;
864 865 866 867 868 869 870
}

int64_t qmp_query_migrate_cache_size(Error **errp)
{
    return migrate_xbzrle_cache_size();
}

L
Luiz Capitulino 已提交
871
void qmp_migrate_set_speed(int64_t value, Error **errp)
872 873 874
{
    MigrationState *s;

L
Luiz Capitulino 已提交
875 876
    if (value < 0) {
        value = 0;
877
    }
878 879 880
    if (value > SIZE_MAX) {
        value = SIZE_MAX;
    }
881

882
    s = migrate_get_current();
L
Luiz Capitulino 已提交
883
    s->bandwidth_limit = value;
884 885 886
    if (s->file) {
        qemu_file_set_rate_limit(s->file, s->bandwidth_limit / XFER_LIMIT_RATIO);
    }
887 888
}

889
void qmp_migrate_set_downtime(double value, Error **errp)
890
{
891 892 893
    value *= 1e9;
    value = MAX(0, MIN(UINT64_MAX, value));
    max_downtime = (uint64_t)value;
894
}
895

896 897 898 899 900 901 902 903 904
bool migrate_auto_converge(void)
{
    MigrationState *s;

    s = migrate_get_current();

    return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE];
}

905 906 907 908 909 910 911 912 913
bool migrate_zero_blocks(void)
{
    MigrationState *s;

    s = migrate_get_current();

    return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS];
}

914 915
bool migrate_use_compression(void)
{
916 917 918 919 920
    MigrationState *s;

    s = migrate_get_current();

    return s->enabled_capabilities[MIGRATION_CAPABILITY_COMPRESS];
921 922 923 924 925 926 927 928
}

int migrate_compress_level(void)
{
    MigrationState *s;

    s = migrate_get_current();

929
    return s->parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL];
930 931 932 933 934 935 936 937
}

int migrate_compress_threads(void)
{
    MigrationState *s;

    s = migrate_get_current();

938
    return s->parameters[MIGRATION_PARAMETER_COMPRESS_THREADS];
939 940
}

941 942 943 944 945 946
int migrate_decompress_threads(void)
{
    MigrationState *s;

    s = migrate_get_current();

947
    return s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS];
948 949
}

950 951 952 953 954 955 956 957 958
bool migrate_use_events(void)
{
    MigrationState *s;

    s = migrate_get_current();

    return s->enabled_capabilities[MIGRATION_CAPABILITY_EVENTS];
}

959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975
int migrate_use_xbzrle(void)
{
    MigrationState *s;

    s = migrate_get_current();

    return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE];
}

int64_t migrate_xbzrle_cache_size(void)
{
    MigrationState *s;

    s = migrate_get_current();

    return s->xbzrle_cache_size;
}
976

977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020
/**
 * migration_completion: Used by migration_thread when there's not much left.
 *   The caller 'breaks' the loop when this returns.
 *
 * @s: Current migration state
 * @*old_vm_running: Pointer to old_vm_running flag
 * @*start_time: Pointer to time to update
 */
static void migration_completion(MigrationState *s, bool *old_vm_running,
                                 int64_t *start_time)
{
    int ret;

    qemu_mutex_lock_iothread();
    *start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
    qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
    *old_vm_running = runstate_is_running();

    ret = global_state_store();
    if (!ret) {
        ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
        if (ret >= 0) {
            qemu_file_set_rate_limit(s->file, INT64_MAX);
            qemu_savevm_state_complete(s->file);
        }
    }
    qemu_mutex_unlock_iothread();

    if (ret < 0) {
        goto fail;
    }

    if (qemu_file_get_error(s->file)) {
        trace_migration_completion_file_err();
        goto fail;
    }

    migrate_set_state(s, MIGRATION_STATUS_ACTIVE, MIGRATION_STATUS_COMPLETED);
    return;

fail:
    migrate_set_state(s, MIGRATION_STATUS_ACTIVE, MIGRATION_STATUS_FAILED);
}

1021 1022
/* migration thread support */

J
Juan Quintela 已提交
1023
static void *migration_thread(void *opaque)
1024
{
1025
    MigrationState *s = opaque;
1026 1027
    int64_t initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
    int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
1028
    int64_t initial_bytes = 0;
1029
    int64_t max_size = 0;
1030 1031
    int64_t start_time = initial_time;
    bool old_vm_running = false;
1032

1033 1034
    rcu_register_thread();

1035
    qemu_savevm_state_header(s->file);
1036
    qemu_savevm_state_begin(s->file, &s->params);
1037

1038
    s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
1039
    migrate_set_state(s, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_ACTIVE);
1040

1041
    while (s->state == MIGRATION_STATUS_ACTIVE) {
1042
        int64_t current_time;
1043
        uint64_t pending_size;
1044

1045
        if (!qemu_file_rate_limit(s->file)) {
1046
            pending_size = qemu_savevm_state_pending(s->file, max_size);
1047
            trace_migrate_pending(pending_size, max_size);
1048
            if (pending_size && pending_size >= max_size) {
1049
                qemu_savevm_state_iterate(s->file);
1050
            } else {
1051 1052 1053
                trace_migration_thread_low_pending(pending_size);
                migration_completion(s, &old_vm_running, &start_time);
                break;
1054 1055
            }
        }
1056

1057
        if (qemu_file_get_error(s->file)) {
1058 1059
            migrate_set_state(s, MIGRATION_STATUS_ACTIVE,
                              MIGRATION_STATUS_FAILED);
1060 1061
            break;
        }
1062
        current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1063
        if (current_time >= initial_time + BUFFER_DELAY) {
1064
            uint64_t transferred_bytes = qemu_ftell(s->file) - initial_bytes;
1065
            uint64_t time_spent = current_time - initial_time;
1066 1067 1068
            double bandwidth = transferred_bytes / time_spent;
            max_size = bandwidth * migrate_max_downtime() / 1000000;

1069 1070 1071
            s->mbps = time_spent ? (((double) transferred_bytes * 8.0) /
                    ((double) time_spent / 1000.0)) / 1000.0 / 1000.0 : -1;

1072 1073
            trace_migrate_transferred(transferred_bytes, time_spent,
                                      bandwidth, max_size);
1074 1075 1076 1077 1078
            /* if we haven't sent anything, we don't want to recalculate
               10000 is a small enough number for our purposes */
            if (s->dirty_bytes_rate && transferred_bytes > 10000) {
                s->expected_downtime = s->dirty_bytes_rate / bandwidth;
            }
1079

1080
            qemu_file_reset_rate_limit(s->file);
1081
            initial_time = current_time;
1082
            initial_bytes = qemu_ftell(s->file);
1083
        }
1084
        if (qemu_file_rate_limit(s->file)) {
1085 1086 1087
            /* usleep expects microseconds */
            g_usleep((initial_time + BUFFER_DELAY - current_time)*1000);
        }
1088 1089
    }

1090 1091 1092
    /* If we enabled cpu throttling for auto-converge, turn it off. */
    cpu_throttle_stop();

1093
    qemu_mutex_lock_iothread();
1094
    if (s->state == MIGRATION_STATUS_COMPLETED) {
1095
        int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1096
        uint64_t transferred_bytes = qemu_ftell(s->file);
1097 1098
        s->total_time = end_time - s->total_time;
        s->downtime = end_time - start_time;
1099 1100 1101 1102
        if (s->total_time) {
            s->mbps = (((double) transferred_bytes * 8.0) /
                       ((double) s->total_time)) / 1000;
        }
1103 1104 1105 1106
        runstate_set(RUN_STATE_POSTMIGRATE);
    } else {
        if (old_vm_running) {
            vm_start();
1107
        }
1108
    }
1109
    qemu_bh_schedule(s->cleanup_bh);
1110
    qemu_mutex_unlock_iothread();
1111

1112
    rcu_unregister_thread();
1113 1114 1115
    return NULL;
}

1116
void migrate_fd_connect(MigrationState *s)
1117
{
1118 1119
    /* This is a best 1st approximation. ns to ms */
    s->expected_downtime = max_downtime/1000000;
1120
    s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup, s);
1121

1122 1123 1124
    qemu_file_set_rate_limit(s->file,
                             s->bandwidth_limit / XFER_LIMIT_RATIO);

1125 1126 1127
    /* Notify before starting migration thread */
    notifier_list_notify(&migration_state_notifiers, s);

1128
    migrate_compress_threads_create();
1129
    qemu_thread_create(&s->thread, "migration", migration_thread, s,
1130
                       QEMU_THREAD_JOINABLE);
1131
}