migration.c 29.4 KB
Newer Older
A
aliguori 已提交
1 2 3 4 5 6 7 8 9 10 11
/*
 * QEMU live migration
 *
 * Copyright IBM, Corp. 2008
 *
 * Authors:
 *  Anthony Liguori   <aliguori@us.ibm.com>
 *
 * This work is licensed under the terms of the GNU GPL, version 2.  See
 * the COPYING file in the top-level directory.
 *
12 13
 * Contributions after 2012-01-13 are licensed under the terms of the
 * GNU GPL, version 2 or (at your option) any later version.
A
aliguori 已提交
14 15 16
 */

#include "qemu-common.h"
17
#include "qemu/error-report.h"
18
#include "qemu/main-loop.h"
19
#include "migration/migration.h"
20
#include "migration/qemu-file.h"
21
#include "sysemu/sysemu.h"
22
#include "block/block.h"
23
#include "qapi/qmp/qerror.h"
24
#include "qemu/sockets.h"
25
#include "migration/block.h"
26
#include "qemu/thread.h"
L
Luiz Capitulino 已提交
27
#include "qmp-commands.h"
28
#include "trace.h"
29
#include "qapi/util.h"
30

31
#define MAX_THROTTLE  (32 << 20)      /* Migration speed throttling */
A
aliguori 已提交
32

J
Juan Quintela 已提交
33 34 35 36 37
/* Amount of time to allocate to each "chunk" of bandwidth-throttled
 * data. */
#define BUFFER_DELAY     100
#define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY)

38 39
/* Default compression thread count */
#define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8
40 41 42
/* Default decompression thread count, usually decompression is at
 * least 4 times as fast as compression.*/
#define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2
43 44 45
/*0: means nocompress, 1: best speed, ... 9: best compress ratio */
#define DEFAULT_MIGRATE_COMPRESS_LEVEL 1

46 47 48
/* Migration XBZRLE default cache size */
#define DEFAULT_MIGRATE_CACHE_SIZE (64 * 1024 * 1024)

49 50 51
static NotifierList migration_state_notifiers =
    NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);

D
Dr. David Alan Gilbert 已提交
52 53
static bool deferred_incoming;

54 55 56 57
/* When we add fault tolerance, we could have several
   migrations at once.  For now we don't need to add
   dynamic creation of migration */

58
/* For outgoing */
59
MigrationState *migrate_get_current(void)
60 61
{
    static MigrationState current_migration = {
62
        .state = MIGRATION_STATUS_NONE,
63
        .bandwidth_limit = MAX_THROTTLE,
64
        .xbzrle_cache_size = DEFAULT_MIGRATE_CACHE_SIZE,
65
        .mbps = -1,
66 67 68 69 70 71
        .parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL] =
                DEFAULT_MIGRATE_COMPRESS_LEVEL,
        .parameters[MIGRATION_PARAMETER_COMPRESS_THREADS] =
                DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT,
        .parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS] =
                DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT,
72 73 74 75 76
    };

    return &current_migration;
}

77 78 79 80 81 82 83 84 85 86 87 88
/* For incoming */
static MigrationIncomingState *mis_current;

MigrationIncomingState *migration_incoming_get_current(void)
{
    return mis_current;
}

MigrationIncomingState *migration_incoming_state_new(QEMUFile* f)
{
    mis_current = g_malloc0(sizeof(MigrationIncomingState));
    mis_current->file = f;
89
    QLIST_INIT(&mis_current->loadvm_handlers);
90 91 92 93 94 95

    return mis_current;
}

void migration_incoming_state_destroy(void)
{
96
    loadvm_free_handlers(mis_current);
97 98 99 100
    g_free(mis_current);
    mis_current = NULL;
}

101 102

typedef struct {
103
    bool optional;
104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
    uint32_t size;
    uint8_t runstate[100];
} GlobalState;

static GlobalState global_state;

static int global_state_store(void)
{
    if (!runstate_store((char *)global_state.runstate,
                        sizeof(global_state.runstate))) {
        error_report("runstate name too big: %s", global_state.runstate);
        trace_migrate_state_too_big();
        return -EINVAL;
    }
    return 0;
}

static char *global_state_get_runstate(void)
{
    return (char *)global_state.runstate;
}

126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
void global_state_set_optional(void)
{
    global_state.optional = true;
}

static bool global_state_needed(void *opaque)
{
    GlobalState *s = opaque;
    char *runstate = (char *)s->runstate;

    /* If it is not optional, it is mandatory */

    if (s->optional == false) {
        return true;
    }

    /* If state is running or paused, it is not needed */

    if (strcmp(runstate, "running") == 0 ||
        strcmp(runstate, "paused") == 0) {
        return false;
    }

    /* for any other state it is needed */
    return true;
}

153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191
static int global_state_post_load(void *opaque, int version_id)
{
    GlobalState *s = opaque;
    int ret = 0;
    char *runstate = (char *)s->runstate;

    trace_migrate_global_state_post_load(runstate);

    if (strcmp(runstate, "running") != 0) {
        Error *local_err = NULL;
        int r = qapi_enum_parse(RunState_lookup, runstate, RUN_STATE_MAX,
                                -1, &local_err);

        if (r == -1) {
            if (local_err) {
                error_report_err(local_err);
            }
            return -EINVAL;
        }
        ret = vm_stop_force_state(r);
    }

   return ret;
}

static void global_state_pre_save(void *opaque)
{
    GlobalState *s = opaque;

    trace_migrate_global_state_pre_save((char *)s->runstate);
    s->size = strlen((char *)s->runstate) + 1;
}

static const VMStateDescription vmstate_globalstate = {
    .name = "globalstate",
    .version_id = 1,
    .minimum_version_id = 1,
    .post_load = global_state_post_load,
    .pre_save = global_state_pre_save,
192
    .needed = global_state_needed,
193 194 195 196 197 198 199 200 201 202 203 204 205 206
    .fields = (VMStateField[]) {
        VMSTATE_UINT32(size, GlobalState),
        VMSTATE_BUFFER(runstate, GlobalState),
        VMSTATE_END_OF_LIST()
    },
};

void register_global_state(void)
{
    /* We would use it independently that we receive it */
    strcpy((char *)&global_state.runstate, "");
    vmstate_register(NULL, 0, &vmstate_globalstate, &global_state);
}

D
Dr. David Alan Gilbert 已提交
207 208 209 210 211 212 213 214 215 216 217 218 219
/*
 * Called on -incoming with a defer: uri.
 * The migration can be started later after any parameters have been
 * changed.
 */
static void deferred_incoming_migration(Error **errp)
{
    if (deferred_incoming) {
        error_setg(errp, "Incoming migration already deferred");
    }
    deferred_incoming = true;
}

220
void qemu_start_incoming_migration(const char *uri, Error **errp)
A
aliguori 已提交
221
{
A
aliguori 已提交
222 223
    const char *p;

D
Dr. David Alan Gilbert 已提交
224 225 226
    if (!strcmp(uri, "defer")) {
        deferred_incoming_migration(errp);
    } else if (strstart(uri, "tcp:", &p)) {
227
        tcp_start_incoming_migration(p, errp);
M
Michael R. Hines 已提交
228
#ifdef CONFIG_RDMA
D
Dr. David Alan Gilbert 已提交
229
    } else if (strstart(uri, "rdma:", &p)) {
M
Michael R. Hines 已提交
230 231
        rdma_start_incoming_migration(p, errp);
#endif
232
#if !defined(WIN32)
D
Dr. David Alan Gilbert 已提交
233
    } else if (strstart(uri, "exec:", &p)) {
234
        exec_start_incoming_migration(p, errp);
D
Dr. David Alan Gilbert 已提交
235
    } else if (strstart(uri, "unix:", &p)) {
236
        unix_start_incoming_migration(p, errp);
D
Dr. David Alan Gilbert 已提交
237
    } else if (strstart(uri, "fd:", &p)) {
238
        fd_start_incoming_migration(p, errp);
239
#endif
D
Dr. David Alan Gilbert 已提交
240
    } else {
241
        error_setg(errp, "unknown migration protocol: %s", uri);
J
Juan Quintela 已提交
242
    }
A
aliguori 已提交
243 244
}

245
static void process_incoming_migration_co(void *opaque)
246
{
247
    QEMUFile *f = opaque;
248
    Error *local_err = NULL;
249 250
    int ret;

251 252
    migration_incoming_state_new(f);

253
    ret = qemu_loadvm_state(f);
254

255
    qemu_fclose(f);
256
    free_xbzrle_decoded_buf();
257 258
    migration_incoming_state_destroy();

259
    if (ret < 0) {
260
        error_report("load of migration failed: %s", strerror(-ret));
261
        migrate_decompress_threads_join();
262
        exit(EXIT_FAILURE);
263 264 265
    }
    qemu_announce_self();

266
    /* Make sure all file formats flush their mutable metadata */
267 268
    bdrv_invalidate_cache_all(&local_err);
    if (local_err) {
269
        error_report_err(local_err);
270
        migrate_decompress_threads_join();
271 272
        exit(EXIT_FAILURE);
    }
273

274 275 276 277 278 279 280
    /* runstate == "" means that we haven't received it through the
     * wire, so we obey autostart.  runstate == runing means that we
     * need to run it, we need to make sure that we do it after
     * everything else has finished.  Every other state change is done
     * at the post_load function */

    if (strcmp(global_state_get_runstate(), "running") == 0) {
281
        vm_start();
282 283 284 285 286 287
    } else if (strcmp(global_state_get_runstate(), "") == 0) {
        if (autostart) {
            vm_start();
        } else {
            runstate_set(RUN_STATE_PAUSED);
        }
288
    }
289
    migrate_decompress_threads_join();
290 291
}

292 293 294 295 296 297
void process_incoming_migration(QEMUFile *f)
{
    Coroutine *co = qemu_coroutine_create(process_incoming_migration_co);
    int fd = qemu_get_fd(f);

    assert(fd != -1);
298
    migrate_decompress_threads_create();
299
    qemu_set_nonblock(fd);
300 301 302
    qemu_coroutine_enter(co, f);
}

303 304 305 306
/* amount of nanoseconds we are willing to wait for migration to be down.
 * the choice of nanoseconds is because it is the maximum resolution that
 * get_clock() can achieve. It is an internal measure. All user-visible
 * units must be in seconds */
307
static uint64_t max_downtime = 300000000;
308 309 310 311 312 313

uint64_t migrate_max_downtime(void)
{
    return max_downtime;
}

O
Orit Wasserman 已提交
314 315 316 317 318 319 320
MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp)
{
    MigrationCapabilityStatusList *head = NULL;
    MigrationCapabilityStatusList *caps;
    MigrationState *s = migrate_get_current();
    int i;

321
    caps = NULL; /* silence compiler warning */
O
Orit Wasserman 已提交
322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338
    for (i = 0; i < MIGRATION_CAPABILITY_MAX; i++) {
        if (head == NULL) {
            head = g_malloc0(sizeof(*caps));
            caps = head;
        } else {
            caps->next = g_malloc0(sizeof(*caps));
            caps = caps->next;
        }
        caps->value =
            g_malloc(sizeof(*caps->value));
        caps->value->capability = i;
        caps->value->state = s->enabled_capabilities[i];
    }

    return head;
}

339 340 341 342 343 344 345 346 347 348 349 350 351 352 353
MigrationParameters *qmp_query_migrate_parameters(Error **errp)
{
    MigrationParameters *params;
    MigrationState *s = migrate_get_current();

    params = g_malloc0(sizeof(*params));
    params->compress_level = s->parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL];
    params->compress_threads =
            s->parameters[MIGRATION_PARAMETER_COMPRESS_THREADS];
    params->decompress_threads =
            s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS];

    return params;
}

O
Orit Wasserman 已提交
354 355 356 357 358 359 360 361 362
static void get_xbzrle_cache_stats(MigrationInfo *info)
{
    if (migrate_use_xbzrle()) {
        info->has_xbzrle_cache = true;
        info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache));
        info->xbzrle_cache->cache_size = migrate_xbzrle_cache_size();
        info->xbzrle_cache->bytes = xbzrle_mig_bytes_transferred();
        info->xbzrle_cache->pages = xbzrle_mig_pages_transferred();
        info->xbzrle_cache->cache_miss = xbzrle_mig_pages_cache_miss();
363
        info->xbzrle_cache->cache_miss_rate = xbzrle_mig_cache_miss_rate();
O
Orit Wasserman 已提交
364 365 366 367
        info->xbzrle_cache->overflow = xbzrle_mig_pages_overflow();
    }
}

L
Luiz Capitulino 已提交
368
MigrationInfo *qmp_query_migrate(Error **errp)
A
aliguori 已提交
369
{
L
Luiz Capitulino 已提交
370
    MigrationInfo *info = g_malloc0(sizeof(*info));
371 372 373
    MigrationState *s = migrate_get_current();

    switch (s->state) {
374
    case MIGRATION_STATUS_NONE:
375 376
        /* no migration has happened ever */
        break;
377
    case MIGRATION_STATUS_SETUP:
378
        info->has_status = true;
379
        info->has_total_time = false;
380
        break;
381 382
    case MIGRATION_STATUS_ACTIVE:
    case MIGRATION_STATUS_CANCELLING:
L
Luiz Capitulino 已提交
383
        info->has_status = true;
384
        info->has_total_time = true;
385
        info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
386
            - s->total_time;
387 388
        info->has_expected_downtime = true;
        info->expected_downtime = s->expected_downtime;
389 390
        info->has_setup_time = true;
        info->setup_time = s->setup_time;
391

L
Luiz Capitulino 已提交
392 393 394 395 396
        info->has_ram = true;
        info->ram = g_malloc0(sizeof(*info->ram));
        info->ram->transferred = ram_bytes_transferred();
        info->ram->remaining = ram_bytes_remaining();
        info->ram->total = ram_bytes_total();
397
        info->ram->duplicate = dup_mig_pages_transferred();
398
        info->ram->skipped = skipped_mig_pages_transferred();
399 400
        info->ram->normal = norm_mig_pages_transferred();
        info->ram->normal_bytes = norm_mig_bytes_transferred();
401
        info->ram->dirty_pages_rate = s->dirty_pages_rate;
402
        info->ram->mbps = s->mbps;
403
        info->ram->dirty_sync_count = s->dirty_sync_count;
404

405
        if (blk_mig_active()) {
L
Luiz Capitulino 已提交
406 407 408 409 410
            info->has_disk = true;
            info->disk = g_malloc0(sizeof(*info->disk));
            info->disk->transferred = blk_mig_bytes_transferred();
            info->disk->remaining = blk_mig_bytes_remaining();
            info->disk->total = blk_mig_bytes_total();
A
aliguori 已提交
411
        }
O
Orit Wasserman 已提交
412 413

        get_xbzrle_cache_stats(info);
414
        break;
415
    case MIGRATION_STATUS_COMPLETED:
O
Orit Wasserman 已提交
416 417
        get_xbzrle_cache_stats(info);

L
Luiz Capitulino 已提交
418
        info->has_status = true;
419
        info->has_total_time = true;
420
        info->total_time = s->total_time;
421 422
        info->has_downtime = true;
        info->downtime = s->downtime;
423 424
        info->has_setup_time = true;
        info->setup_time = s->setup_time;
J
Juan Quintela 已提交
425 426 427 428 429 430

        info->has_ram = true;
        info->ram = g_malloc0(sizeof(*info->ram));
        info->ram->transferred = ram_bytes_transferred();
        info->ram->remaining = 0;
        info->ram->total = ram_bytes_total();
431
        info->ram->duplicate = dup_mig_pages_transferred();
432
        info->ram->skipped = skipped_mig_pages_transferred();
433 434
        info->ram->normal = norm_mig_pages_transferred();
        info->ram->normal_bytes = norm_mig_bytes_transferred();
435
        info->ram->mbps = s->mbps;
436
        info->ram->dirty_sync_count = s->dirty_sync_count;
437
        break;
438
    case MIGRATION_STATUS_FAILED:
L
Luiz Capitulino 已提交
439
        info->has_status = true;
440
        break;
441
    case MIGRATION_STATUS_CANCELLED:
L
Luiz Capitulino 已提交
442
        info->has_status = true;
443
        break;
A
aliguori 已提交
444
    }
445
    info->status = s->state;
L
Luiz Capitulino 已提交
446 447

    return info;
A
aliguori 已提交
448 449
}

O
Orit Wasserman 已提交
450 451 452 453 454 455
void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
                                  Error **errp)
{
    MigrationState *s = migrate_get_current();
    MigrationCapabilityStatusList *cap;

456 457
    if (s->state == MIGRATION_STATUS_ACTIVE ||
        s->state == MIGRATION_STATUS_SETUP) {
458
        error_setg(errp, QERR_MIGRATION_ACTIVE);
O
Orit Wasserman 已提交
459 460 461 462 463 464 465 466
        return;
    }

    for (cap = params; cap; cap = cap->next) {
        s->enabled_capabilities[cap->value->capability] = cap->value->state;
    }
}

467 468 469 470 471 472 473 474 475 476
void qmp_migrate_set_parameters(bool has_compress_level,
                                int64_t compress_level,
                                bool has_compress_threads,
                                int64_t compress_threads,
                                bool has_decompress_threads,
                                int64_t decompress_threads, Error **errp)
{
    MigrationState *s = migrate_get_current();

    if (has_compress_level && (compress_level < 0 || compress_level > 9)) {
477 478
        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level",
                   "is invalid, it should be in the range of 0 to 9");
479 480 481 482
        return;
    }
    if (has_compress_threads &&
            (compress_threads < 1 || compress_threads > 255)) {
483 484 485
        error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
                   "compress_threads",
                   "is invalid, it should be in the range of 1 to 255");
486 487 488 489
        return;
    }
    if (has_decompress_threads &&
            (decompress_threads < 1 || decompress_threads > 255)) {
490 491 492
        error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
                   "decompress_threads",
                   "is invalid, it should be in the range of 1 to 255");
493 494 495 496 497 498 499 500 501 502 503 504 505 506 507
        return;
    }

    if (has_compress_level) {
        s->parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL] = compress_level;
    }
    if (has_compress_threads) {
        s->parameters[MIGRATION_PARAMETER_COMPRESS_THREADS] = compress_threads;
    }
    if (has_decompress_threads) {
        s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS] =
                                                    decompress_threads;
    }
}

508 509
/* shared migration helpers */

510 511
static void migrate_set_state(MigrationState *s, int old_state, int new_state)
{
J
Juan Quintela 已提交
512
    if (atomic_cmpxchg(&s->state, old_state, new_state) == old_state) {
513 514 515 516
        trace_migrate_set_state(new_state);
    }
}

517
static void migrate_fd_cleanup(void *opaque)
518
{
519 520 521 522 523
    MigrationState *s = opaque;

    qemu_bh_delete(s->cleanup_bh);
    s->cleanup_bh = NULL;

524
    if (s->file) {
525
        trace_migrate_fd_cleanup();
526 527 528 529
        qemu_mutex_unlock_iothread();
        qemu_thread_join(&s->thread);
        qemu_mutex_lock_iothread();

530
        migrate_compress_threads_join();
531 532
        qemu_fclose(s->file);
        s->file = NULL;
533 534
    }

535
    assert(s->state != MIGRATION_STATUS_ACTIVE);
536

537
    if (s->state != MIGRATION_STATUS_COMPLETED) {
538
        qemu_savevm_state_cancel();
539 540 541
        if (s->state == MIGRATION_STATUS_CANCELLING) {
            migrate_set_state(s, MIGRATION_STATUS_CANCELLING,
                              MIGRATION_STATUS_CANCELLED);
542
        }
543
    }
544 545

    notifier_list_notify(&migration_state_notifiers, s);
546 547
}

548
void migrate_fd_error(MigrationState *s)
549
{
550
    trace_migrate_fd_error();
551
    assert(s->file == NULL);
552
    migrate_set_state(s, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_FAILED);
553
    notifier_list_notify(&migration_state_notifiers, s);
554 555
}

556
static void migrate_fd_cancel(MigrationState *s)
557
{
558
    int old_state ;
559
    QEMUFile *f = migrate_get_current()->file;
560
    trace_migrate_fd_cancel();
561

562 563
    do {
        old_state = s->state;
564 565
        if (old_state != MIGRATION_STATUS_SETUP &&
            old_state != MIGRATION_STATUS_ACTIVE) {
566 567
            break;
        }
568 569
        migrate_set_state(s, old_state, MIGRATION_STATUS_CANCELLING);
    } while (s->state != MIGRATION_STATUS_CANCELLING);
570 571 572 573 574 575 576 577

    /*
     * If we're unlucky the migration code might be stuck somewhere in a
     * send/write while the network has failed and is waiting to timeout;
     * if we've got shutdown(2) available then we can force it to quit.
     * The outgoing qemu file gets closed in migrate_fd_cleanup that is
     * called in a bh, so there is no race against this cancel.
     */
578
    if (s->state == MIGRATION_STATUS_CANCELLING && f) {
579 580
        qemu_file_shutdown(f);
    }
581 582
}

583 584 585 586 587 588 589
void add_migration_state_change_notifier(Notifier *notify)
{
    notifier_list_add(&migration_state_notifiers, notify);
}

void remove_migration_state_change_notifier(Notifier *notify)
{
P
Paolo Bonzini 已提交
590
    notifier_remove(notify);
591 592
}

S
Stefan Hajnoczi 已提交
593
bool migration_in_setup(MigrationState *s)
594
{
595
    return s->state == MIGRATION_STATUS_SETUP;
596 597
}

598
bool migration_has_finished(MigrationState *s)
599
{
600
    return s->state == MIGRATION_STATUS_COMPLETED;
601
}
602

603 604
bool migration_has_failed(MigrationState *s)
{
605 606
    return (s->state == MIGRATION_STATUS_CANCELLED ||
            s->state == MIGRATION_STATUS_FAILED);
607 608
}

I
Isaku Yamahata 已提交
609
static MigrationState *migrate_init(const MigrationParams *params)
610
{
611
    MigrationState *s = migrate_get_current();
612
    int64_t bandwidth_limit = s->bandwidth_limit;
O
Orit Wasserman 已提交
613
    bool enabled_capabilities[MIGRATION_CAPABILITY_MAX];
614
    int64_t xbzrle_cache_size = s->xbzrle_cache_size;
615 616 617 618 619
    int compress_level = s->parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL];
    int compress_thread_count =
            s->parameters[MIGRATION_PARAMETER_COMPRESS_THREADS];
    int decompress_thread_count =
            s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS];
O
Orit Wasserman 已提交
620 621 622

    memcpy(enabled_capabilities, s->enabled_capabilities,
           sizeof(enabled_capabilities));
623

624
    memset(s, 0, sizeof(*s));
I
Isaku Yamahata 已提交
625
    s->params = *params;
O
Orit Wasserman 已提交
626 627
    memcpy(s->enabled_capabilities, enabled_capabilities,
           sizeof(enabled_capabilities));
628
    s->xbzrle_cache_size = xbzrle_cache_size;
629

630 631 632 633 634
    s->parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL] = compress_level;
    s->parameters[MIGRATION_PARAMETER_COMPRESS_THREADS] =
               compress_thread_count;
    s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS] =
               decompress_thread_count;
635
    s->bandwidth_limit = bandwidth_limit;
636
    migrate_set_state(s, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP);
637

638
    s->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
639 640
    return s;
}
641

A
Anthony Liguori 已提交
642 643 644 645 646 647 648 649 650 651 652 653
static GSList *migration_blockers;

void migrate_add_blocker(Error *reason)
{
    migration_blockers = g_slist_prepend(migration_blockers, reason);
}

void migrate_del_blocker(Error *reason)
{
    migration_blockers = g_slist_remove(migration_blockers, reason);
}

D
Dr. David Alan Gilbert 已提交
654 655 656
void qmp_migrate_incoming(const char *uri, Error **errp)
{
    Error *local_err = NULL;
657
    static bool once = true;
D
Dr. David Alan Gilbert 已提交
658 659

    if (!deferred_incoming) {
660
        error_setg(errp, "For use with '-incoming defer'");
D
Dr. David Alan Gilbert 已提交
661 662
        return;
    }
663 664 665
    if (!once) {
        error_setg(errp, "The incoming migration has already been started");
    }
D
Dr. David Alan Gilbert 已提交
666 667 668 669 670 671 672 673

    qemu_start_incoming_migration(uri, &local_err);

    if (local_err) {
        error_propagate(errp, local_err);
        return;
    }

674
    once = false;
D
Dr. David Alan Gilbert 已提交
675 676
}

L
Luiz Capitulino 已提交
677 678 679
void qmp_migrate(const char *uri, bool has_blk, bool blk,
                 bool has_inc, bool inc, bool has_detach, bool detach,
                 Error **errp)
680
{
681
    Error *local_err = NULL;
682
    MigrationState *s = migrate_get_current();
I
Isaku Yamahata 已提交
683
    MigrationParams params;
684 685
    const char *p;

686 687
    params.blk = has_blk && blk;
    params.shared = has_inc && inc;
I
Isaku Yamahata 已提交
688

689 690 691
    if (s->state == MIGRATION_STATUS_ACTIVE ||
        s->state == MIGRATION_STATUS_SETUP ||
        s->state == MIGRATION_STATUS_CANCELLING) {
692
        error_setg(errp, QERR_MIGRATION_ACTIVE);
L
Luiz Capitulino 已提交
693
        return;
694
    }
695 696 697 698 699
    if (runstate_check(RUN_STATE_INMIGRATE)) {
        error_setg(errp, "Guest is waiting for an incoming migration");
        return;
    }

L
Luiz Capitulino 已提交
700 701
    if (qemu_savevm_state_blocked(errp)) {
        return;
702 703
    }

A
Anthony Liguori 已提交
704
    if (migration_blockers) {
L
Luiz Capitulino 已提交
705 706
        *errp = error_copy(migration_blockers->data);
        return;
A
Anthony Liguori 已提交
707 708
    }

709 710 711 712 713 714
    /* We are starting a new migration, so we want to start in a clean
       state.  This change is only needed if previous migration
       failed/was cancelled.  We don't use migrate_set_state() because
       we are setting the initial state, not changing it. */
    s->state = MIGRATION_STATUS_NONE;

I
Isaku Yamahata 已提交
715
    s = migrate_init(&params);
716 717

    if (strstart(uri, "tcp:", &p)) {
718
        tcp_start_outgoing_migration(s, p, &local_err);
M
Michael R. Hines 已提交
719
#ifdef CONFIG_RDMA
720
    } else if (strstart(uri, "rdma:", &p)) {
M
Michael R. Hines 已提交
721 722
        rdma_start_outgoing_migration(s, p, &local_err);
#endif
723 724
#if !defined(WIN32)
    } else if (strstart(uri, "exec:", &p)) {
725
        exec_start_outgoing_migration(s, p, &local_err);
726
    } else if (strstart(uri, "unix:", &p)) {
727
        unix_start_outgoing_migration(s, p, &local_err);
728
    } else if (strstart(uri, "fd:", &p)) {
729
        fd_start_outgoing_migration(s, p, &local_err);
730
#endif
731
    } else {
732 733
        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "uri",
                   "a valid migration protocol");
734
        migrate_set_state(s, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_FAILED);
L
Luiz Capitulino 已提交
735
        return;
736 737
    }

738
    if (local_err) {
739
        migrate_fd_error(s);
740
        error_propagate(errp, local_err);
L
Luiz Capitulino 已提交
741
        return;
742
    }
743 744
}

L
Luiz Capitulino 已提交
745
void qmp_migrate_cancel(Error **errp)
746
{
747
    migrate_fd_cancel(migrate_get_current());
748 749
}

750 751 752
void qmp_migrate_set_cache_size(int64_t value, Error **errp)
{
    MigrationState *s = migrate_get_current();
753
    int64_t new_size;
754 755 756

    /* Check for truncation */
    if (value != (size_t)value) {
757 758
        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
                   "exceeding address space");
759 760 761
        return;
    }

762 763
    /* Cache should not be larger than guest ram size */
    if (value > ram_bytes_total()) {
764 765
        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
                   "exceeds guest ram size ");
766 767 768
        return;
    }

769 770
    new_size = xbzrle_cache_resize(value);
    if (new_size < 0) {
771 772
        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
                   "is smaller than page size");
773 774 775 776
        return;
    }

    s->xbzrle_cache_size = new_size;
777 778 779 780 781 782 783
}

int64_t qmp_query_migrate_cache_size(Error **errp)
{
    return migrate_xbzrle_cache_size();
}

L
Luiz Capitulino 已提交
784
void qmp_migrate_set_speed(int64_t value, Error **errp)
785 786 787
{
    MigrationState *s;

L
Luiz Capitulino 已提交
788 789
    if (value < 0) {
        value = 0;
790
    }
791 792 793
    if (value > SIZE_MAX) {
        value = SIZE_MAX;
    }
794

795
    s = migrate_get_current();
L
Luiz Capitulino 已提交
796
    s->bandwidth_limit = value;
797 798 799
    if (s->file) {
        qemu_file_set_rate_limit(s->file, s->bandwidth_limit / XFER_LIMIT_RATIO);
    }
800 801
}

802
void qmp_migrate_set_downtime(double value, Error **errp)
803
{
804 805 806
    value *= 1e9;
    value = MAX(0, MIN(UINT64_MAX, value));
    max_downtime = (uint64_t)value;
807
}
808

809 810 811 812 813 814 815 816 817
bool migrate_auto_converge(void)
{
    MigrationState *s;

    s = migrate_get_current();

    return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE];
}

818 819 820 821 822 823 824 825 826
bool migrate_zero_blocks(void)
{
    MigrationState *s;

    s = migrate_get_current();

    return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS];
}

827 828
bool migrate_use_compression(void)
{
829 830 831 832 833
    MigrationState *s;

    s = migrate_get_current();

    return s->enabled_capabilities[MIGRATION_CAPABILITY_COMPRESS];
834 835 836 837 838 839 840 841
}

int migrate_compress_level(void)
{
    MigrationState *s;

    s = migrate_get_current();

842
    return s->parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL];
843 844 845 846 847 848 849 850
}

int migrate_compress_threads(void)
{
    MigrationState *s;

    s = migrate_get_current();

851
    return s->parameters[MIGRATION_PARAMETER_COMPRESS_THREADS];
852 853
}

854 855 856 857 858 859
int migrate_decompress_threads(void)
{
    MigrationState *s;

    s = migrate_get_current();

860
    return s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS];
861 862
}

863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879
int migrate_use_xbzrle(void)
{
    MigrationState *s;

    s = migrate_get_current();

    return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE];
}

int64_t migrate_xbzrle_cache_size(void)
{
    MigrationState *s;

    s = migrate_get_current();

    return s->xbzrle_cache_size;
}
880 881 882

/* migration thread support */

J
Juan Quintela 已提交
883
static void *migration_thread(void *opaque)
884
{
885
    MigrationState *s = opaque;
886 887
    int64_t initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
    int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
888
    int64_t initial_bytes = 0;
889
    int64_t max_size = 0;
890 891
    int64_t start_time = initial_time;
    bool old_vm_running = false;
892

893
    qemu_savevm_state_header(s->file);
894
    qemu_savevm_state_begin(s->file, &s->params);
895

896
    s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
897
    migrate_set_state(s, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_ACTIVE);
898

899
    while (s->state == MIGRATION_STATUS_ACTIVE) {
900
        int64_t current_time;
901
        uint64_t pending_size;
902

903
        if (!qemu_file_rate_limit(s->file)) {
904
            pending_size = qemu_savevm_state_pending(s->file, max_size);
905
            trace_migrate_pending(pending_size, max_size);
906
            if (pending_size && pending_size >= max_size) {
907
                qemu_savevm_state_iterate(s->file);
908
            } else {
909 910
                int ret;

911
                qemu_mutex_lock_iothread();
912
                start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
913
                qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
914
                old_vm_running = runstate_is_running();
915

916 917 918 919 920 921 922
                ret = global_state_store();
                if (!ret) {
                    ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
                    if (ret >= 0) {
                        qemu_file_set_rate_limit(s->file, INT64_MAX);
                        qemu_savevm_state_complete(s->file);
                    }
923
                }
924
                qemu_mutex_unlock_iothread();
925 926

                if (ret < 0) {
927 928
                    migrate_set_state(s, MIGRATION_STATUS_ACTIVE,
                                      MIGRATION_STATUS_FAILED);
929 930 931
                    break;
                }

P
Paolo Bonzini 已提交
932
                if (!qemu_file_get_error(s->file)) {
933 934
                    migrate_set_state(s, MIGRATION_STATUS_ACTIVE,
                                      MIGRATION_STATUS_COMPLETED);
P
Paolo Bonzini 已提交
935 936
                    break;
                }
937 938
            }
        }
939

940
        if (qemu_file_get_error(s->file)) {
941 942
            migrate_set_state(s, MIGRATION_STATUS_ACTIVE,
                              MIGRATION_STATUS_FAILED);
943 944
            break;
        }
945
        current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
946
        if (current_time >= initial_time + BUFFER_DELAY) {
947
            uint64_t transferred_bytes = qemu_ftell(s->file) - initial_bytes;
948
            uint64_t time_spent = current_time - initial_time;
949 950 951
            double bandwidth = transferred_bytes / time_spent;
            max_size = bandwidth * migrate_max_downtime() / 1000000;

952 953 954
            s->mbps = time_spent ? (((double) transferred_bytes * 8.0) /
                    ((double) time_spent / 1000.0)) / 1000.0 / 1000.0 : -1;

955 956
            trace_migrate_transferred(transferred_bytes, time_spent,
                                      bandwidth, max_size);
957 958 959 960 961
            /* if we haven't sent anything, we don't want to recalculate
               10000 is a small enough number for our purposes */
            if (s->dirty_bytes_rate && transferred_bytes > 10000) {
                s->expected_downtime = s->dirty_bytes_rate / bandwidth;
            }
962

963
            qemu_file_reset_rate_limit(s->file);
964
            initial_time = current_time;
965
            initial_bytes = qemu_ftell(s->file);
966
        }
967
        if (qemu_file_rate_limit(s->file)) {
968 969 970
            /* usleep expects microseconds */
            g_usleep((initial_time + BUFFER_DELAY - current_time)*1000);
        }
971 972
    }

973
    qemu_mutex_lock_iothread();
974
    if (s->state == MIGRATION_STATUS_COMPLETED) {
975
        int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
976
        uint64_t transferred_bytes = qemu_ftell(s->file);
977 978
        s->total_time = end_time - s->total_time;
        s->downtime = end_time - start_time;
979 980 981 982
        if (s->total_time) {
            s->mbps = (((double) transferred_bytes * 8.0) /
                       ((double) s->total_time)) / 1000;
        }
983 984 985 986
        runstate_set(RUN_STATE_POSTMIGRATE);
    } else {
        if (old_vm_running) {
            vm_start();
987
        }
988
    }
989
    qemu_bh_schedule(s->cleanup_bh);
990
    qemu_mutex_unlock_iothread();
991

992 993 994
    return NULL;
}

995
void migrate_fd_connect(MigrationState *s)
996
{
997 998
    /* This is a best 1st approximation. ns to ms */
    s->expected_downtime = max_downtime/1000000;
999
    s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup, s);
1000

1001 1002 1003
    qemu_file_set_rate_limit(s->file,
                             s->bandwidth_limit / XFER_LIMIT_RATIO);

1004 1005 1006
    /* Notify before starting migration thread */
    notifier_list_notify(&migration_state_notifiers, s);

1007
    migrate_compress_threads_create();
1008
    qemu_thread_create(&s->thread, "migration", migration_thread, s,
1009
                       QEMU_THREAD_JOINABLE);
1010
}