arch_init.c 45.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
/*
 * QEMU System Emulator
 *
 * Copyright (c) 2003-2008 Fabrice Bellard
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
#include <stdint.h>
#include <stdarg.h>
26
#include <stdlib.h>
27
#include <zlib.h>
28
#ifndef _WIN32
29
#include <sys/types.h>
30 31 32
#include <sys/mman.h>
#endif
#include "config.h"
33
#include "monitor/monitor.h"
34
#include "sysemu/sysemu.h"
35 36
#include "qemu/bitops.h"
#include "qemu/bitmap.h"
37
#include "sysemu/arch_init.h"
38
#include "audio/audio.h"
P
Paolo Bonzini 已提交
39
#include "hw/i386/pc.h"
40
#include "hw/pci/pci.h"
P
Paolo Bonzini 已提交
41
#include "hw/audio/audio.h"
42
#include "sysemu/kvm.h"
43
#include "migration/migration.h"
P
Paolo Bonzini 已提交
44
#include "hw/i386/smbios.h"
45
#include "exec/address-spaces.h"
P
Paolo Bonzini 已提交
46
#include "hw/audio/pcspk.h"
47
#include "migration/page_cache.h"
48
#include "qemu/config-file.h"
49
#include "qemu/error-report.h"
50
#include "qmp-commands.h"
51
#include "trace.h"
52
#include "exec/cpu-all.h"
53
#include "exec/ram_addr.h"
54
#include "hw/acpi/acpi.h"
55
#include "qemu/host-utils.h"
M
Mike Day 已提交
56
#include "qemu/rcu_queue.h"
57

O
Orit Wasserman 已提交
58 59 60 61 62 63 64 65
#ifdef DEBUG_ARCH_INIT
#define DPRINTF(fmt, ...) \
    do { fprintf(stdout, "arch_init: " fmt, ## __VA_ARGS__); } while (0)
#else
#define DPRINTF(fmt, ...) \
    do { } while (0)
#endif

66 67 68 69 70 71 72
#ifdef TARGET_SPARC
int graphic_width = 1024;
int graphic_height = 768;
int graphic_depth = 8;
#else
int graphic_width = 800;
int graphic_height = 600;
73
int graphic_depth = 32;
74 75 76 77 78 79 80 81 82 83 84 85 86
#endif


#if defined(TARGET_ALPHA)
#define QEMU_ARCH QEMU_ARCH_ALPHA
#elif defined(TARGET_ARM)
#define QEMU_ARCH QEMU_ARCH_ARM
#elif defined(TARGET_CRIS)
#define QEMU_ARCH QEMU_ARCH_CRIS
#elif defined(TARGET_I386)
#define QEMU_ARCH QEMU_ARCH_I386
#elif defined(TARGET_M68K)
#define QEMU_ARCH QEMU_ARCH_M68K
M
Michael Walle 已提交
87 88
#elif defined(TARGET_LM32)
#define QEMU_ARCH QEMU_ARCH_LM32
89 90 91 92
#elif defined(TARGET_MICROBLAZE)
#define QEMU_ARCH QEMU_ARCH_MICROBLAZE
#elif defined(TARGET_MIPS)
#define QEMU_ARCH QEMU_ARCH_MIPS
A
Anthony Green 已提交
93 94
#elif defined(TARGET_MOXIE)
#define QEMU_ARCH QEMU_ARCH_MOXIE
95 96
#elif defined(TARGET_OPENRISC)
#define QEMU_ARCH QEMU_ARCH_OPENRISC
97 98 99 100 101 102 103 104
#elif defined(TARGET_PPC)
#define QEMU_ARCH QEMU_ARCH_PPC
#elif defined(TARGET_S390X)
#define QEMU_ARCH QEMU_ARCH_S390X
#elif defined(TARGET_SH4)
#define QEMU_ARCH QEMU_ARCH_SH4
#elif defined(TARGET_SPARC)
#define QEMU_ARCH QEMU_ARCH_SPARC
M
Max Filippov 已提交
105 106
#elif defined(TARGET_XTENSA)
#define QEMU_ARCH QEMU_ARCH_XTENSA
107 108
#elif defined(TARGET_UNICORE32)
#define QEMU_ARCH QEMU_ARCH_UNICORE32
109 110
#elif defined(TARGET_TRICORE)
#define QEMU_ARCH QEMU_ARCH_TRICORE
111 112 113
#endif

const uint32_t arch_type = QEMU_ARCH;
114 115 116
static bool mig_throttle_on;
static int dirty_rate_high_cnt;
static void check_guest_throttling(void);
117

118 119
static uint64_t bitmap_sync_count;

120 121 122
/***********************************************************/
/* ram save/restore */

123 124 125 126 127 128
#define RAM_SAVE_FLAG_FULL     0x01 /* Obsolete, not used anymore */
#define RAM_SAVE_FLAG_COMPRESS 0x02
#define RAM_SAVE_FLAG_MEM_SIZE 0x04
#define RAM_SAVE_FLAG_PAGE     0x08
#define RAM_SAVE_FLAG_EOS      0x10
#define RAM_SAVE_FLAG_CONTINUE 0x20
129
#define RAM_SAVE_FLAG_XBZRLE   0x40
M
Michael R. Hines 已提交
130
/* 0x80 is reserved in migration.h start with 0x100 next */
131
#define RAM_SAVE_FLAG_COMPRESS_PAGE    0x100
132

133 134
static struct defconfig_file {
    const char *filename;
135 136
    /* Indicates it is an user config file (disabled by -no-user-config) */
    bool userconfig;
137
} default_config_files[] = {
138
    { CONFIG_QEMU_CONFDIR "/qemu.conf",                   true },
139
    { CONFIG_QEMU_CONFDIR "/target-" TARGET_NAME ".conf", true },
140 141 142
    { NULL }, /* end of list */
};

143
static const uint8_t ZERO_TARGET_PAGE[TARGET_PAGE_SIZE];
144

145
int qemu_read_default_config_files(bool userconfig)
146 147
{
    int ret;
148
    struct defconfig_file *f;
149

150
    for (f = default_config_files; f->filename; f++) {
151 152 153
        if (!userconfig && f->userconfig) {
            continue;
        }
154 155 156 157
        ret = qemu_read_config_file(f->filename);
        if (ret < 0 && ret != -ENOENT) {
            return ret;
        }
158
    }
L
Laszlo Ersek 已提交
159

160 161 162
    return 0;
}

163
static inline bool is_zero_range(uint8_t *p, uint64_t size)
164
{
165
    return buffer_find_nonzero_offset(p, size) == size;
166 167
}

168 169 170 171 172 173 174
/* struct contains XBZRLE cache and a static page
   used by the compression */
static struct {
    /* buffer used for XBZRLE encoding */
    uint8_t *encoded_buf;
    /* buffer for storing page content */
    uint8_t *current_buf;
175
    /* Cache for XBZRLE, Protected by lock. */
176
    PageCache *cache;
177
    QemuMutex lock;
178 179
} XBZRLE;

180 181
/* buffer used for XBZRLE decoding */
static uint8_t *xbzrle_decoded_buf;
182

183 184 185 186 187 188 189 190 191 192 193 194
static void XBZRLE_cache_lock(void)
{
    if (migrate_use_xbzrle())
        qemu_mutex_lock(&XBZRLE.lock);
}

static void XBZRLE_cache_unlock(void)
{
    if (migrate_use_xbzrle())
        qemu_mutex_unlock(&XBZRLE.lock);
}

195 196 197 198 199 200
/*
 * called from qmp_migrate_set_cache_size in main thread, possibly while
 * a migration is in progress.
 * A running migration maybe using the cache and might finish during this
 * call, hence changes to the cache are protected by XBZRLE.lock().
 */
201 202
int64_t xbzrle_cache_resize(int64_t new_size)
{
203 204
    PageCache *new_cache;
    int64_t ret;
205

206 207 208 209
    if (new_size < TARGET_PAGE_SIZE) {
        return -1;
    }

210 211
    XBZRLE_cache_lock();

212
    if (XBZRLE.cache != NULL) {
213
        if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
214
            goto out_new_size;
215 216 217 218
        }
        new_cache = cache_init(new_size / TARGET_PAGE_SIZE,
                                        TARGET_PAGE_SIZE);
        if (!new_cache) {
219 220 221
            error_report("Error creating cache");
            ret = -1;
            goto out;
222 223
        }

224 225
        cache_fini(XBZRLE.cache);
        XBZRLE.cache = new_cache;
226
    }
227

228 229 230 231 232
out_new_size:
    ret = pow2floor(new_size);
out:
    XBZRLE_cache_unlock();
    return ret;
233 234
}

235 236 237
/* accounting for migration statistics */
typedef struct AccountingInfo {
    uint64_t dup_pages;
238
    uint64_t skipped_pages;
239 240
    uint64_t norm_pages;
    uint64_t iterations;
O
Orit Wasserman 已提交
241 242 243
    uint64_t xbzrle_bytes;
    uint64_t xbzrle_pages;
    uint64_t xbzrle_cache_miss;
244
    double xbzrle_cache_miss_rate;
O
Orit Wasserman 已提交
245
    uint64_t xbzrle_overflows;
246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264
} AccountingInfo;

static AccountingInfo acct_info;

static void acct_clear(void)
{
    memset(&acct_info, 0, sizeof(acct_info));
}

uint64_t dup_mig_bytes_transferred(void)
{
    return acct_info.dup_pages * TARGET_PAGE_SIZE;
}

uint64_t dup_mig_pages_transferred(void)
{
    return acct_info.dup_pages;
}

265 266 267 268 269 270 271 272 273 274
uint64_t skipped_mig_bytes_transferred(void)
{
    return acct_info.skipped_pages * TARGET_PAGE_SIZE;
}

uint64_t skipped_mig_pages_transferred(void)
{
    return acct_info.skipped_pages;
}

275 276 277 278 279 280 281 282 283 284
uint64_t norm_mig_bytes_transferred(void)
{
    return acct_info.norm_pages * TARGET_PAGE_SIZE;
}

uint64_t norm_mig_pages_transferred(void)
{
    return acct_info.norm_pages;
}

O
Orit Wasserman 已提交
285 286 287 288 289 290 291 292 293 294 295 296 297 298 299
uint64_t xbzrle_mig_bytes_transferred(void)
{
    return acct_info.xbzrle_bytes;
}

uint64_t xbzrle_mig_pages_transferred(void)
{
    return acct_info.xbzrle_pages;
}

uint64_t xbzrle_mig_pages_cache_miss(void)
{
    return acct_info.xbzrle_cache_miss;
}

300 301 302 303 304
double xbzrle_mig_cache_miss_rate(void)
{
    return acct_info.xbzrle_cache_miss_rate;
}

O
Orit Wasserman 已提交
305 306 307 308 309
uint64_t xbzrle_mig_pages_overflow(void)
{
    return acct_info.xbzrle_overflows;
}

310 311 312 313 314 315 316 317 318 319 320
/* This is the last block that we have visited serching for dirty pages
 */
static RAMBlock *last_seen_block;
/* This is the last block from where we have sent data */
static RAMBlock *last_sent_block;
static ram_addr_t last_offset;
static unsigned long *migration_bitmap;
static uint64_t migration_dirty_pages;
static uint32_t last_version;
static bool ram_bulk_stage;

321 322 323 324 325
struct CompressParam {
    /* To be done */
};
typedef struct CompressParam CompressParam;

326 327 328 329 330
struct DecompressParam {
    /* To be done */
};
typedef struct DecompressParam DecompressParam;

331 332 333
static CompressParam *comp_param;
static QemuThread *compress_threads;
static bool quit_comp_thread;
334 335 336 337
static bool quit_decomp_thread;
static DecompressParam *decomp_param;
static QemuThread *decompress_threads;
static uint8_t *compressed_data_buf;
338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392

static void *do_data_compress(void *opaque)
{
    while (!quit_comp_thread) {

    /* To be done */

    }

    return NULL;
}

static inline void terminate_compression_threads(void)
{
    quit_comp_thread = true;

    /* To be done */
}

void migrate_compress_threads_join(void)
{
    int i, thread_count;

    if (!migrate_use_compression()) {
        return;
    }
    terminate_compression_threads();
    thread_count = migrate_compress_threads();
    for (i = 0; i < thread_count; i++) {
        qemu_thread_join(compress_threads + i);
    }
    g_free(compress_threads);
    g_free(comp_param);
    compress_threads = NULL;
    comp_param = NULL;
}

void migrate_compress_threads_create(void)
{
    int i, thread_count;

    if (!migrate_use_compression()) {
        return;
    }
    quit_comp_thread = false;
    thread_count = migrate_compress_threads();
    compress_threads = g_new0(QemuThread, thread_count);
    comp_param = g_new0(CompressParam, thread_count);
    for (i = 0; i < thread_count; i++) {
        qemu_thread_create(compress_threads + i, "compress",
                           do_data_compress, comp_param + i,
                           QEMU_THREAD_JOINABLE);
    }
}

393 394 395 396 397 398 399 400 401 402 403 404 405
/**
 * save_page_header: Write page header to wire
 *
 * If this is the 1st block, it also writes the block identification
 *
 * Returns: Number of bytes written
 *
 * @f: QEMUFile where to send the data
 * @block: block that contains the page we want to send
 * @offset: offset inside the block for the page
 *          in the lower bits, it contains flags
 */
static size_t save_page_header(QEMUFile *f, RAMBlock *block, ram_addr_t offset)
O
Orit Wasserman 已提交
406
{
407 408
    size_t size;

409
    qemu_put_be64(f, offset);
410
    size = 8;
O
Orit Wasserman 已提交
411

412
    if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
413 414 415 416 417 418
        qemu_put_byte(f, strlen(block->idstr));
        qemu_put_buffer(f, (uint8_t *)block->idstr,
                        strlen(block->idstr));
        size += 1 + strlen(block->idstr);
    }
    return size;
O
Orit Wasserman 已提交
419 420
}

421 422 423 424 425 426 427 428 429 430 431 432 433 434
/* Update the xbzrle cache to reflect a page that's been sent as all 0.
 * The important thing is that a stale (not-yet-0'd) page be replaced
 * by the new data.
 * As a bonus, if the page wasn't in the cache it gets added so that
 * when a small write is made into the 0'd page it gets XBZRLE sent
 */
static void xbzrle_cache_zero_page(ram_addr_t current_addr)
{
    if (ram_bulk_stage || !migrate_use_xbzrle()) {
        return;
    }

    /* We don't care if this fails to allocate a new cache page
     * as long as it updated an old one */
435 436
    cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE,
                 bitmap_sync_count);
437 438
}

439 440
#define ENCODING_FLAG_XBZRLE 0x1

441 442 443 444 445 446 447 448 449 450 451 452 453 454 455
/**
 * save_xbzrle_page: compress and send current page
 *
 * Returns: 1 means that we wrote the page
 *          0 means that page is identical to the one already sent
 *          -1 means that xbzrle would be longer than normal
 *
 * @f: QEMUFile where to send the data
 * @current_data:
 * @current_addr:
 * @block: block that contains the page we want to send
 * @offset: offset inside the block for the page
 * @last_stage: if we are at the completion stage
 * @bytes_transferred: increase it with the number of transferred bytes
 */
456
static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data,
457
                            ram_addr_t current_addr, RAMBlock *block,
458
                            ram_addr_t offset, bool last_stage,
459
                            uint64_t *bytes_transferred)
460
{
461
    int encoded_len = 0, bytes_xbzrle;
462 463
    uint8_t *prev_cached_page;

464
    if (!cache_is_cached(XBZRLE.cache, current_addr, bitmap_sync_count)) {
465
        acct_info.xbzrle_cache_miss++;
466
        if (!last_stage) {
467 468
            if (cache_insert(XBZRLE.cache, current_addr, *current_data,
                             bitmap_sync_count) == -1) {
469
                return -1;
470 471 472 473
            } else {
                /* update *current_data when the page has been
                   inserted into cache */
                *current_data = get_cached_data(XBZRLE.cache, current_addr);
474
            }
475
        }
476 477 478 479 480 481
        return -1;
    }

    prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);

    /* save current buffer into memory */
482
    memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
483 484 485 486 487 488 489 490 491 492

    /* XBZRLE encoding (if there is no overflow) */
    encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
                                       TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
                                       TARGET_PAGE_SIZE);
    if (encoded_len == 0) {
        DPRINTF("Skipping unmodified page\n");
        return 0;
    } else if (encoded_len == -1) {
        DPRINTF("Overflow\n");
O
Orit Wasserman 已提交
493
        acct_info.xbzrle_overflows++;
494
        /* update data in the cache */
495 496 497 498
        if (!last_stage) {
            memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
            *current_data = prev_cached_page;
        }
499 500 501 502
        return -1;
    }

    /* we need to update the data in the cache, in order to get the same data */
503 504 505
    if (!last_stage) {
        memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
    }
506 507

    /* Send XBZRLE based compressed page */
508
    bytes_xbzrle = save_page_header(f, block, offset | RAM_SAVE_FLAG_XBZRLE);
509 510 511
    qemu_put_byte(f, ENCODING_FLAG_XBZRLE);
    qemu_put_be16(f, encoded_len);
    qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len);
512
    bytes_xbzrle += encoded_len + 1 + 2;
O
Orit Wasserman 已提交
513
    acct_info.xbzrle_pages++;
514 515
    acct_info.xbzrle_bytes += bytes_xbzrle;
    *bytes_transferred += bytes_xbzrle;
516

517
    return 1;
518 519
}

520 521 522
static inline
ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
                                                 ram_addr_t start)
523
{
524 525
    unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS;
    unsigned long nr = base + (start >> TARGET_PAGE_BITS);
526 527
    uint64_t mr_size = TARGET_PAGE_ALIGN(memory_region_size(mr));
    unsigned long size = base + (mr_size >> TARGET_PAGE_BITS);
J
Juan Quintela 已提交
528

529 530 531 532 533 534 535
    unsigned long next;

    if (ram_bulk_stage && nr > base) {
        next = nr + 1;
    } else {
        next = find_next_bit(migration_bitmap, size, nr);
    }
536

537 538
    if (next < size) {
        clear_bit(next, migration_bitmap);
J
Juan Quintela 已提交
539
        migration_dirty_pages--;
540
    }
541
    return (next - base) << TARGET_PAGE_BITS;
542 543
}

544
static inline bool migration_bitmap_set_dirty(ram_addr_t addr)
545
{
J
Juan Quintela 已提交
546
    bool ret;
547
    int nr = addr >> TARGET_PAGE_BITS;
548

J
Juan Quintela 已提交
549 550 551 552
    ret = test_and_set_bit(nr, migration_bitmap);

    if (!ret) {
        migration_dirty_pages++;
553
    }
J
Juan Quintela 已提交
554
    return ret;
555 556
}

557 558 559
static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length)
{
    ram_addr_t addr;
560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587
    unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS);

    /* start address is aligned at the start of a word? */
    if (((page * BITS_PER_LONG) << TARGET_PAGE_BITS) == start) {
        int k;
        int nr = BITS_TO_LONGS(length >> TARGET_PAGE_BITS);
        unsigned long *src = ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION];

        for (k = page; k < page + nr; k++) {
            if (src[k]) {
                unsigned long new_dirty;
                new_dirty = ~migration_bitmap[k];
                migration_bitmap[k] |= src[k];
                new_dirty &= src[k];
                migration_dirty_pages += ctpopl(new_dirty);
                src[k] = 0;
            }
        }
    } else {
        for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) {
            if (cpu_physical_memory_get_dirty(start + addr,
                                              TARGET_PAGE_SIZE,
                                              DIRTY_MEMORY_MIGRATION)) {
                cpu_physical_memory_reset_dirty(start + addr,
                                                TARGET_PAGE_SIZE,
                                                DIRTY_MEMORY_MIGRATION);
                migration_bitmap_set_dirty(start + addr);
            }
588 589 590 591 592
        }
    }
}


593 594 595 596 597 598 599 600 601 602 603
/* Fix me: there are too many global variables used in migration process. */
static int64_t start_time;
static int64_t bytes_xfer_prev;
static int64_t num_dirty_pages_period;

static void migration_bitmap_sync_init(void)
{
    start_time = 0;
    bytes_xfer_prev = 0;
    num_dirty_pages_period = 0;
}
604

605
/* Called with iothread lock held, to protect ram_list.dirty_memory[] */
606 607
static void migration_bitmap_sync(void)
{
J
Juan Quintela 已提交
608 609
    RAMBlock *block;
    uint64_t num_dirty_pages_init = migration_dirty_pages;
610 611
    MigrationState *s = migrate_get_current();
    int64_t end_time;
612
    int64_t bytes_xfer_now;
613 614
    static uint64_t xbzrle_cache_miss_prev;
    static uint64_t iterations_prev;
615

616 617
    bitmap_sync_count++;

618 619 620
    if (!bytes_xfer_prev) {
        bytes_xfer_prev = ram_bytes_transferred();
    }
621 622

    if (!start_time) {
623
        start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
624
    }
625 626

    trace_migration_bitmap_sync_start();
627
    address_space_sync_dirty_bitmap(&address_space_memory);
J
Juan Quintela 已提交
628

M
Mike Day 已提交
629 630
    rcu_read_lock();
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
631
        migration_bitmap_sync_range(block->mr->ram_addr, block->used_length);
J
Juan Quintela 已提交
632
    }
M
Mike Day 已提交
633 634
    rcu_read_unlock();

J
Juan Quintela 已提交
635
    trace_migration_bitmap_sync_end(migration_dirty_pages
636
                                    - num_dirty_pages_init);
637
    num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init;
638
    end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
639 640 641

    /* more than 1 second = 1000 millisecons */
    if (end_time > start_time + 1000) {
642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660
        if (migrate_auto_converge()) {
            /* The following detection logic can be refined later. For now:
               Check to see if the dirtied bytes is 50% more than the approx.
               amount of bytes that just got transferred since the last time we
               were in this routine. If that happens >N times (for now N==4)
               we turn on the throttle down logic */
            bytes_xfer_now = ram_bytes_transferred();
            if (s->dirty_pages_rate &&
               (num_dirty_pages_period * TARGET_PAGE_SIZE >
                   (bytes_xfer_now - bytes_xfer_prev)/2) &&
               (dirty_rate_high_cnt++ > 4)) {
                    trace_migration_throttle();
                    mig_throttle_on = true;
                    dirty_rate_high_cnt = 0;
             }
             bytes_xfer_prev = bytes_xfer_now;
        } else {
             mig_throttle_on = false;
        }
661 662 663 664 665 666 667 668 669 670
        if (migrate_use_xbzrle()) {
            if (iterations_prev != 0) {
                acct_info.xbzrle_cache_miss_rate =
                   (double)(acct_info.xbzrle_cache_miss -
                            xbzrle_cache_miss_prev) /
                   (acct_info.iterations - iterations_prev);
            }
            iterations_prev = acct_info.iterations;
            xbzrle_cache_miss_prev = acct_info.xbzrle_cache_miss;
        }
671 672
        s->dirty_pages_rate = num_dirty_pages_period * 1000
            / (end_time - start_time);
673
        s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
674 675
        start_time = end_time;
        num_dirty_pages_period = 0;
676
        s->dirty_sync_count = bitmap_sync_count;
677
    }
678 679
}

680
/**
D
Dr. David Alan Gilbert 已提交
681 682
 * ram_save_page: Send the given page to the stream
 *
683 684 685 686 687 688 689
 * Returns: Number of pages written.
 *
 * @f: QEMUFile where to send the data
 * @block: block that contains the page we want to send
 * @offset: offset inside the block for the page
 * @last_stage: if we are at the completion stage
 * @bytes_transferred: increase it with the number of transferred bytes
D
Dr. David Alan Gilbert 已提交
690 691
 */
static int ram_save_page(QEMUFile *f, RAMBlock* block, ram_addr_t offset,
692
                         bool last_stage, uint64_t *bytes_transferred)
D
Dr. David Alan Gilbert 已提交
693
{
694
    int pages = -1;
695
    uint64_t bytes_xmit;
D
Dr. David Alan Gilbert 已提交
696 697 698 699 700 701 702 703 704
    ram_addr_t current_addr;
    MemoryRegion *mr = block->mr;
    uint8_t *p;
    int ret;
    bool send_async = true;

    p = memory_region_get_ram_ptr(mr) + offset;

    /* In doubt sent page as normal */
705
    bytes_xmit = 0;
D
Dr. David Alan Gilbert 已提交
706
    ret = ram_control_save_page(f, block->offset,
707 708
                           offset, TARGET_PAGE_SIZE, &bytes_xmit);
    if (bytes_xmit) {
709 710
        *bytes_transferred += bytes_xmit;
        pages = 1;
711
    }
D
Dr. David Alan Gilbert 已提交
712 713 714 715

    XBZRLE_cache_lock();

    current_addr = block->offset + offset;
716 717 718 719

    if (block == last_sent_block) {
        offset |= RAM_SAVE_FLAG_CONTINUE;
    }
D
Dr. David Alan Gilbert 已提交
720 721
    if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
        if (ret != RAM_SAVE_CONTROL_DELAYED) {
722
            if (bytes_xmit > 0) {
D
Dr. David Alan Gilbert 已提交
723
                acct_info.norm_pages++;
724
            } else if (bytes_xmit == 0) {
D
Dr. David Alan Gilbert 已提交
725 726 727 728 729
                acct_info.dup_pages++;
            }
        }
    } else if (is_zero_range(p, TARGET_PAGE_SIZE)) {
        acct_info.dup_pages++;
730 731
        *bytes_transferred += save_page_header(f, block,
                                               offset | RAM_SAVE_FLAG_COMPRESS);
D
Dr. David Alan Gilbert 已提交
732
        qemu_put_byte(f, 0);
733 734
        *bytes_transferred += 1;
        pages = 1;
D
Dr. David Alan Gilbert 已提交
735 736 737 738 739
        /* Must let xbzrle know, otherwise a previous (now 0'd) cached
         * page would be stale
         */
        xbzrle_cache_zero_page(current_addr);
    } else if (!ram_bulk_stage && migrate_use_xbzrle()) {
740
        pages = save_xbzrle_page(f, &p, current_addr, block,
741
                                 offset, last_stage, bytes_transferred);
D
Dr. David Alan Gilbert 已提交
742 743 744 745 746 747 748 749 750
        if (!last_stage) {
            /* Can't send this cached data async, since the cache page
             * might get updated before it gets to the wire
             */
            send_async = false;
        }
    }

    /* XBZRLE overflow or normal page */
751
    if (pages == -1) {
752 753
        *bytes_transferred += save_page_header(f, block,
                                               offset | RAM_SAVE_FLAG_PAGE);
D
Dr. David Alan Gilbert 已提交
754 755 756 757 758
        if (send_async) {
            qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
        } else {
            qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
        }
759 760
        *bytes_transferred += TARGET_PAGE_SIZE;
        pages = 1;
D
Dr. David Alan Gilbert 已提交
761 762 763 764 765
        acct_info.norm_pages++;
    }

    XBZRLE_cache_unlock();

766
    return pages;
D
Dr. David Alan Gilbert 已提交
767 768
}

769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790
/**
 * ram_save_compressed_page: compress the given page and send it to the stream
 *
 * Returns: Number of pages written.
 *
 * @f: QEMUFile where to send the data
 * @block: block that contains the page we want to send
 * @offset: offset inside the block for the page
 * @last_stage: if we are at the completion stage
 * @bytes_transferred: increase it with the number of transferred bytes
 */
static int ram_save_compressed_page(QEMUFile *f, RAMBlock *block,
                                    ram_addr_t offset, bool last_stage,
                                    uint64_t *bytes_transferred)
{
    int pages = -1;

    /* To be done*/

    return pages;
}

791 792
/**
 * ram_find_and_save_block: Finds a dirty page and sends it to f
793
 *
M
Mike Day 已提交
794 795
 * Called within an RCU critical section.
 *
796
 * Returns:  The number of pages written
797
 *           0 means no dirty pages
798 799 800 801
 *
 * @f: QEMUFile where to send the data
 * @last_stage: if we are at the completion stage
 * @bytes_transferred: increase it with the number of transferred bytes
802 803
 */

804 805
static int ram_find_and_save_block(QEMUFile *f, bool last_stage,
                                   uint64_t *bytes_transferred)
806
{
807
    RAMBlock *block = last_seen_block;
808
    ram_addr_t offset = last_offset;
809
    bool complete_round = false;
810
    int pages = 0;
A
Avi Kivity 已提交
811
    MemoryRegion *mr;
812

813
    if (!block)
M
Mike Day 已提交
814
        block = QLIST_FIRST_RCU(&ram_list.blocks);
815

816
    while (true) {
A
Avi Kivity 已提交
817
        mr = block->mr;
818 819 820 821 822
        offset = migration_bitmap_find_and_reset_dirty(mr, offset);
        if (complete_round && block == last_seen_block &&
            offset >= last_offset) {
            break;
        }
823
        if (offset >= block->used_length) {
824
            offset = 0;
M
Mike Day 已提交
825
            block = QLIST_NEXT_RCU(block, next);
826
            if (!block) {
M
Mike Day 已提交
827
                block = QLIST_FIRST_RCU(&ram_list.blocks);
828
                complete_round = true;
829
                ram_bulk_stage = false;
830 831
            }
        } else {
832 833 834 835 836 837 838
            if (migrate_use_compression()) {
                pages = ram_save_compressed_page(f, block, offset, last_stage,
                                                 bytes_transferred);
            } else {
                pages = ram_save_page(f, block, offset, last_stage,
                                      bytes_transferred);
            }
839 840

            /* if page is unmodified, continue to the next */
841
            if (pages > 0) {
842
                last_sent_block = block;
843 844
                break;
            }
845
        }
846
    }
847

848
    last_seen_block = block;
849
    last_offset = offset;
850

851
    return pages;
852 853 854 855
}

static uint64_t bytes_transferred;

856 857 858 859 860 861 862 863 864 865 866 867
void acct_update_position(QEMUFile *f, size_t size, bool zero)
{
    uint64_t pages = size / TARGET_PAGE_SIZE;
    if (zero) {
        acct_info.dup_pages += pages;
    } else {
        acct_info.norm_pages += pages;
        bytes_transferred += size;
        qemu_update_position(f, size);
    }
}

868 869
static ram_addr_t ram_save_remaining(void)
{
J
Juan Quintela 已提交
870
    return migration_dirty_pages;
871 872 873 874 875 876 877 878 879 880 881 882 883 884
}

uint64_t ram_bytes_remaining(void)
{
    return ram_save_remaining() * TARGET_PAGE_SIZE;
}

uint64_t ram_bytes_transferred(void)
{
    return bytes_transferred;
}

uint64_t ram_bytes_total(void)
{
885 886 887
    RAMBlock *block;
    uint64_t total = 0;

M
Mike Day 已提交
888 889
    rcu_read_lock();
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next)
890
        total += block->used_length;
M
Mike Day 已提交
891
    rcu_read_unlock();
892
    return total;
893 894
}

895 896 897 898 899 900
void free_xbzrle_decoded_buf(void)
{
    g_free(xbzrle_decoded_buf);
    xbzrle_decoded_buf = NULL;
}

O
Orit Wasserman 已提交
901 902
static void migration_end(void)
{
903 904 905 906 907
    if (migration_bitmap) {
        memory_global_dirty_log_stop();
        g_free(migration_bitmap);
        migration_bitmap = NULL;
    }
908

909
    XBZRLE_cache_lock();
910
    if (XBZRLE.cache) {
911 912 913 914
        cache_fini(XBZRLE.cache);
        g_free(XBZRLE.encoded_buf);
        g_free(XBZRLE.current_buf);
        XBZRLE.cache = NULL;
915 916
        XBZRLE.encoded_buf = NULL;
        XBZRLE.current_buf = NULL;
917
    }
918
    XBZRLE_cache_unlock();
O
Orit Wasserman 已提交
919 920
}

921 922 923 924 925
static void ram_migration_cancel(void *opaque)
{
    migration_end();
}

926 927
static void reset_ram_globals(void)
{
928
    last_seen_block = NULL;
J
Juan Quintela 已提交
929
    last_sent_block = NULL;
930
    last_offset = 0;
U
Umesh Deshpande 已提交
931
    last_version = ram_list.version;
932
    ram_bulk_stage = true;
933 934
}

935 936
#define MAX_WAIT 50 /* ms, half buffered_file limit */

M
Mike Day 已提交
937 938 939 940 941 942 943

/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
 * long-running RCU critical section.  When rcu-reclaims in the code
 * start to become numerous it will be necessary to reduce the
 * granularity of these critical sections.
 */

944
static int ram_save_setup(QEMUFile *f, void *opaque)
945
{
946
    RAMBlock *block;
947
    int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */
J
Juan Quintela 已提交
948

949 950
    mig_throttle_on = false;
    dirty_rate_high_cnt = 0;
951
    bitmap_sync_count = 0;
952
    migration_bitmap_sync_init();
953

954
    if (migrate_use_xbzrle()) {
955
        XBZRLE_cache_lock();
956 957 958 959
        XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
                                  TARGET_PAGE_SIZE,
                                  TARGET_PAGE_SIZE);
        if (!XBZRLE.cache) {
960 961
            XBZRLE_cache_unlock();
            error_report("Error creating cache");
962 963
            return -1;
        }
964
        XBZRLE_cache_unlock();
965 966 967 968

        /* We prefer not to abort if there is no memory */
        XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
        if (!XBZRLE.encoded_buf) {
969
            error_report("Error allocating encoded_buf");
970 971 972 973 974
            return -1;
        }

        XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
        if (!XBZRLE.current_buf) {
975
            error_report("Error allocating current_buf");
976 977 978 979 980
            g_free(XBZRLE.encoded_buf);
            XBZRLE.encoded_buf = NULL;
            return -1;
        }

981
        acct_clear();
982 983
    }

984
    /* iothread lock needed for ram_list.dirty_memory[] */
985 986
    qemu_mutex_lock_iothread();
    qemu_mutex_lock_ramlist();
M
Mike Day 已提交
987
    rcu_read_lock();
988 989 990
    bytes_transferred = 0;
    reset_ram_globals();

991 992 993 994 995 996 997 998
    ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
    migration_bitmap = bitmap_new(ram_bitmap_pages);
    bitmap_set(migration_bitmap, 0, ram_bitmap_pages);

    /*
     * Count the total number of pages used by ram blocks not including any
     * gaps due to alignment or unplugs.
     */
999
    migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
1000

1001
    memory_global_dirty_log_start();
J
Juan Quintela 已提交
1002
    migration_bitmap_sync();
M
Mike Day 已提交
1003
    qemu_mutex_unlock_ramlist();
1004
    qemu_mutex_unlock_iothread();
1005

1006
    qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
1007

M
Mike Day 已提交
1008
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1009 1010
        qemu_put_byte(f, strlen(block->idstr));
        qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
1011
        qemu_put_be64(f, block->used_length);
1012 1013
    }

M
Mike Day 已提交
1014
    rcu_read_unlock();
M
Michael R. Hines 已提交
1015 1016 1017 1018

    ram_control_before_iterate(f, RAM_CONTROL_SETUP);
    ram_control_after_iterate(f, RAM_CONTROL_SETUP);

1019 1020 1021 1022 1023
    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);

    return 0;
}

1024
static int ram_save_iterate(QEMUFile *f, void *opaque)
1025 1026 1027
{
    int ret;
    int i;
1028
    int64_t t0;
1029
    int pages_sent = 0;
1030

M
Mike Day 已提交
1031
    rcu_read_lock();
U
Umesh Deshpande 已提交
1032 1033 1034 1035
    if (ram_list.version != last_version) {
        reset_ram_globals();
    }

M
Mike Day 已提交
1036 1037 1038
    /* Read version before ram_list.blocks */
    smp_rmb();

M
Michael R. Hines 已提交
1039 1040
    ram_control_before_iterate(f, RAM_CONTROL_ROUND);

1041
    t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1042
    i = 0;
1043
    while ((ret = qemu_file_rate_limit(f)) == 0) {
1044
        int pages;
1045

1046 1047 1048
        pages = ram_find_and_save_block(f, false, &bytes_transferred);
        /* no more pages to sent */
        if (pages == 0) {
1049 1050
            break;
        }
1051
        pages_sent += pages;
1052
        acct_info.iterations++;
1053
        check_guest_throttling();
1054 1055 1056 1057 1058 1059
        /* we want to check in the 1st loop, just in case it was the 1st time
           and we had to sync the dirty bitmap.
           qemu_get_clock_ns() is a bit expensive, so we only check each some
           iterations
        */
        if ((i & 63) == 0) {
1060
            uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
1061
            if (t1 > MAX_WAIT) {
1062
                DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n",
1063 1064 1065 1066 1067
                        t1, i);
                break;
            }
        }
        i++;
1068
    }
M
Mike Day 已提交
1069
    rcu_read_unlock();
1070

M
Michael R. Hines 已提交
1071 1072 1073 1074 1075 1076
    /*
     * Must occur before EOS (or any QEMUFile operation)
     * because of RDMA protocol.
     */
    ram_control_after_iterate(f, RAM_CONTROL_ROUND);

1077 1078 1079 1080
    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
    bytes_transferred += 8;

    ret = qemu_file_get_error(f);
1081 1082 1083 1084
    if (ret < 0) {
        return ret;
    }

1085
    return pages_sent;
1086 1087
}

1088
/* Called with iothread lock */
1089 1090
static int ram_save_complete(QEMUFile *f, void *opaque)
{
M
Mike Day 已提交
1091 1092
    rcu_read_lock();

1093
    migration_bitmap_sync();
1094

M
Michael R. Hines 已提交
1095 1096
    ram_control_before_iterate(f, RAM_CONTROL_FINISH);

1097
    /* try transferring iterative blocks of memory */
O
Orit Wasserman 已提交
1098

1099
    /* flush all remaining blocks regardless of rate limiting */
1100
    while (true) {
1101
        int pages;
1102

1103
        pages = ram_find_and_save_block(f, true, &bytes_transferred);
1104
        /* no more blocks to sent */
1105
        if (pages == 0) {
1106
            break;
1107 1108
        }
    }
M
Michael R. Hines 已提交
1109 1110

    ram_control_after_iterate(f, RAM_CONTROL_FINISH);
1111
    migration_end();
1112

M
Mike Day 已提交
1113
    rcu_read_unlock();
1114 1115
    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);

1116
    return 0;
1117 1118
}

1119 1120 1121 1122 1123 1124 1125
static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size)
{
    uint64_t remaining_size;

    remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;

    if (remaining_size < max_size) {
1126
        qemu_mutex_lock_iothread();
M
Mike Day 已提交
1127
        rcu_read_lock();
1128
        migration_bitmap_sync();
M
Mike Day 已提交
1129
        rcu_read_unlock();
1130
        qemu_mutex_unlock_iothread();
1131 1132 1133 1134 1135
        remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
    }
    return remaining_size;
}

1136 1137 1138 1139 1140
static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
{
    unsigned int xh_len;
    int xh_flags;

1141 1142
    if (!xbzrle_decoded_buf) {
        xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE);
1143 1144 1145 1146 1147 1148 1149
    }

    /* extract RLE header */
    xh_flags = qemu_get_byte(f);
    xh_len = qemu_get_be16(f);

    if (xh_flags != ENCODING_FLAG_XBZRLE) {
1150
        error_report("Failed to load XBZRLE page - wrong compression!");
1151 1152 1153 1154
        return -1;
    }

    if (xh_len > TARGET_PAGE_SIZE) {
1155
        error_report("Failed to load XBZRLE page - len overflow!");
1156 1157 1158
        return -1;
    }
    /* load data and decode */
1159
    qemu_get_buffer(f, xbzrle_decoded_buf, xh_len);
1160 1161

    /* decode RLE */
1162 1163
    if (xbzrle_decode_buffer(xbzrle_decoded_buf, xh_len, host,
                             TARGET_PAGE_SIZE) == -1) {
1164
        error_report("Failed to load XBZRLE page - decode error!");
1165
        return -1;
1166 1167
    }

1168
    return 0;
1169 1170
}

M
Mike Day 已提交
1171 1172 1173
/* Must be called from within a rcu critical section.
 * Returns a pointer from within the RCU-protected ram_list.
 */
1174 1175 1176 1177 1178 1179 1180 1181 1182
static inline void *host_from_stream_offset(QEMUFile *f,
                                            ram_addr_t offset,
                                            int flags)
{
    static RAMBlock *block = NULL;
    char id[256];
    uint8_t len;

    if (flags & RAM_SAVE_FLAG_CONTINUE) {
1183
        if (!block || block->max_length <= offset) {
1184
            error_report("Ack, bad migration stream!");
1185 1186 1187
            return NULL;
        }

1188
        return memory_region_get_ram_ptr(block->mr) + offset;
1189 1190 1191 1192 1193 1194
    }

    len = qemu_get_byte(f);
    qemu_get_buffer(f, (uint8_t *)id, len);
    id[len] = 0;

M
Mike Day 已提交
1195
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1196 1197
        if (!strncmp(id, block->idstr, sizeof(id)) &&
            block->max_length > offset) {
1198
            return memory_region_get_ram_ptr(block->mr) + offset;
1199
        }
1200 1201
    }

1202
    error_report("Can't find block %s!", id);
1203 1204 1205
    return NULL;
}

1206 1207 1208 1209 1210 1211
/*
 * If a page (or a whole RDMA chunk) has been
 * determined to be zero, then zap it.
 */
void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
{
1212
    if (ch != 0 || !is_zero_range(host, size)) {
1213 1214 1215 1216
        memset(host, ch, size);
    }
}

1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264
static void *do_data_decompress(void *opaque)
{
    while (!quit_decomp_thread) {
        /* To be done */
    }

    return NULL;
}

void migrate_decompress_threads_create(void)
{
    int i, thread_count;

    thread_count = migrate_decompress_threads();
    decompress_threads = g_new0(QemuThread, thread_count);
    decomp_param = g_new0(DecompressParam, thread_count);
    compressed_data_buf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
    quit_decomp_thread = false;
    for (i = 0; i < thread_count; i++) {
        qemu_thread_create(decompress_threads + i, "decompress",
                           do_data_decompress, decomp_param + i,
                           QEMU_THREAD_JOINABLE);
    }
}

void migrate_decompress_threads_join(void)
{
    int i, thread_count;

    quit_decomp_thread = true;
    thread_count = migrate_decompress_threads();
    for (i = 0; i < thread_count; i++) {
        qemu_thread_join(decompress_threads + i);
    }
    g_free(decompress_threads);
    g_free(decomp_param);
    g_free(compressed_data_buf);
    decompress_threads = NULL;
    decomp_param = NULL;
    compressed_data_buf = NULL;
}

static void decompress_data_with_multi_threads(uint8_t *compbuf,
                                               void *host, int len)
{
    /* To be done */
}

1265
static int ram_load(QEMUFile *f, void *opaque, int version_id)
1266
{
1267
    int flags = 0, ret = 0;
O
Orit Wasserman 已提交
1268
    static uint64_t seq_iter;
1269
    int len = 0;
O
Orit Wasserman 已提交
1270 1271

    seq_iter++;
1272

C
ChenLiang 已提交
1273
    if (version_id != 4) {
1274
        ret = -EINVAL;
1275 1276
    }

M
Mike Day 已提交
1277 1278 1279 1280 1281 1282
    /* This RCU critical section can be very long running.
     * When RCU reclaims in the code start to become numerous,
     * it will be necessary to reduce the granularity of this
     * critical section.
     */
    rcu_read_lock();
1283 1284 1285 1286
    while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
        ram_addr_t addr, total_ram_bytes;
        void *host;
        uint8_t ch;
1287

1288
        addr = qemu_get_be64(f);
1289 1290 1291
        flags = addr & ~TARGET_PAGE_MASK;
        addr &= TARGET_PAGE_MASK;

1292 1293
        switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
        case RAM_SAVE_FLAG_MEM_SIZE:
C
ChenLiang 已提交
1294
            /* Synchronize RAM block list */
1295 1296
            total_ram_bytes = addr;
            while (!ret && total_ram_bytes) {
C
ChenLiang 已提交
1297 1298
                RAMBlock *block;
                uint8_t len;
1299 1300
                char id[256];
                ram_addr_t length;
C
ChenLiang 已提交
1301 1302 1303 1304 1305 1306

                len = qemu_get_byte(f);
                qemu_get_buffer(f, (uint8_t *)id, len);
                id[len] = 0;
                length = qemu_get_be64(f);

M
Mike Day 已提交
1307
                QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
C
ChenLiang 已提交
1308
                    if (!strncmp(id, block->idstr, sizeof(id))) {
1309 1310 1311 1312 1313
                        if (length != block->used_length) {
                            Error *local_err = NULL;

                            ret = qemu_ram_resize(block->offset, length, &local_err);
                            if (local_err) {
1314
                                error_report_err(local_err);
1315
                            }
1316
                        }
C
ChenLiang 已提交
1317
                        break;
1318
                    }
C
ChenLiang 已提交
1319
                }
1320

C
ChenLiang 已提交
1321
                if (!block) {
1322 1323
                    error_report("Unknown ramblock \"%s\", cannot "
                                 "accept migration", id);
C
ChenLiang 已提交
1324
                    ret = -EINVAL;
1325
                }
C
ChenLiang 已提交
1326 1327

                total_ram_bytes -= length;
1328
            }
1329 1330
            break;
        case RAM_SAVE_FLAG_COMPRESS:
1331
            host = host_from_stream_offset(f, addr, flags);
1332
            if (!host) {
1333
                error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
1334
                ret = -EINVAL;
1335
                break;
1336
            }
1337
            ch = qemu_get_byte(f);
1338
            ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
1339 1340
            break;
        case RAM_SAVE_FLAG_PAGE:
1341
            host = host_from_stream_offset(f, addr, flags);
1342
            if (!host) {
1343
                error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
1344
                ret = -EINVAL;
1345
                break;
1346
            }
1347
            qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
1348
            break;
1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365
        case RAM_SAVE_FLAG_COMPRESS_PAGE:
            host = host_from_stream_offset(f, addr, flags);
            if (!host) {
                error_report("Invalid RAM offset " RAM_ADDR_FMT, addr);
                ret = -EINVAL;
                break;
            }

            len = qemu_get_be32(f);
            if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
                error_report("Invalid compressed data length: %d", len);
                ret = -EINVAL;
                break;
            }
            qemu_get_buffer(f, compressed_data_buf, len);
            decompress_data_with_multi_threads(compressed_data_buf, host, len);
            break;
1366 1367
        case RAM_SAVE_FLAG_XBZRLE:
            host = host_from_stream_offset(f, addr, flags);
1368
            if (!host) {
1369
                error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
1370
                ret = -EINVAL;
1371
                break;
1372 1373
            }
            if (load_xbzrle(f, addr, host) < 0) {
1374 1375
                error_report("Failed to decompress XBZRLE page at "
                             RAM_ADDR_FMT, addr);
1376
                ret = -EINVAL;
1377
                break;
1378
            }
1379
            break;
1380 1381
        case RAM_SAVE_FLAG_EOS:
            /* normal exit */
1382
            break;
1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393
        default:
            if (flags & RAM_SAVE_FLAG_HOOK) {
                ram_control_load_hook(f, flags);
            } else {
                error_report("Unknown combination of migration flags: %#x",
                             flags);
                ret = -EINVAL;
            }
        }
        if (!ret) {
            ret = qemu_file_get_error(f);
1394
        }
1395
    }
1396

M
Mike Day 已提交
1397
    rcu_read_unlock();
1398 1399
    DPRINTF("Completed load of VM with exit code %d seq iteration "
            "%" PRIu64 "\n", ret, seq_iter);
O
Orit Wasserman 已提交
1400
    return ret;
1401 1402
}

1403
static SaveVMHandlers savevm_ram_handlers = {
1404
    .save_live_setup = ram_save_setup,
1405 1406
    .save_live_iterate = ram_save_iterate,
    .save_live_complete = ram_save_complete,
1407
    .save_live_pending = ram_save_pending,
1408
    .load_state = ram_load,
1409
    .cancel = ram_migration_cancel,
1410 1411
};

1412 1413
void ram_mig_init(void)
{
1414
    qemu_mutex_init(&XBZRLE.lock);
1415 1416 1417
    register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL);
}

I
Isaku Yamahata 已提交
1418 1419 1420 1421 1422 1423
struct soundhw {
    const char *name;
    const char *descr;
    int enabled;
    int isa;
    union {
1424
        int (*init_isa) (ISABus *bus);
I
Isaku Yamahata 已提交
1425 1426 1427 1428
        int (*init_pci) (PCIBus *bus);
    } init;
};

1429 1430
static struct soundhw soundhw[9];
static int soundhw_count;
1431

1432 1433 1434 1435 1436 1437 1438 1439 1440 1441
void isa_register_soundhw(const char *name, const char *descr,
                          int (*init_isa)(ISABus *bus))
{
    assert(soundhw_count < ARRAY_SIZE(soundhw) - 1);
    soundhw[soundhw_count].name = name;
    soundhw[soundhw_count].descr = descr;
    soundhw[soundhw_count].isa = 1;
    soundhw[soundhw_count].init.init_isa = init_isa;
    soundhw_count++;
}
1442

1443 1444 1445 1446 1447 1448 1449 1450 1451 1452
void pci_register_soundhw(const char *name, const char *descr,
                          int (*init_pci)(PCIBus *bus))
{
    assert(soundhw_count < ARRAY_SIZE(soundhw) - 1);
    soundhw[soundhw_count].name = name;
    soundhw[soundhw_count].descr = descr;
    soundhw[soundhw_count].isa = 0;
    soundhw[soundhw_count].init.init_pci = init_pci;
    soundhw_count++;
}
1453 1454 1455 1456 1457

void select_soundhw(const char *optarg)
{
    struct soundhw *c;

1458
    if (is_help_option(optarg)) {
1459 1460
    show_valid_cards:

1461 1462 1463 1464 1465 1466 1467 1468 1469
        if (soundhw_count) {
             printf("Valid sound card names (comma separated):\n");
             for (c = soundhw; c->name; ++c) {
                 printf ("%-11s %s\n", c->name, c->descr);
             }
             printf("\n-soundhw all will enable all of the above\n");
        } else {
             printf("Machine has no user-selectable audio hardware "
                    "(it may or may not have always-present audio hardware).\n");
1470
        }
1471
        exit(!is_help_option(optarg));
1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499
    }
    else {
        size_t l;
        const char *p;
        char *e;
        int bad_card = 0;

        if (!strcmp(optarg, "all")) {
            for (c = soundhw; c->name; ++c) {
                c->enabled = 1;
            }
            return;
        }

        p = optarg;
        while (*p) {
            e = strchr(p, ',');
            l = !e ? strlen(p) : (size_t) (e - p);

            for (c = soundhw; c->name; ++c) {
                if (!strncmp(c->name, p, l) && !c->name[l]) {
                    c->enabled = 1;
                    break;
                }
            }

            if (!c->name) {
                if (l > 80) {
1500
                    error_report("Unknown sound card name (too big to show)");
1501 1502
                }
                else {
1503 1504
                    error_report("Unknown sound card name `%.*s'",
                                 (int) l, p);
1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515
                }
                bad_card = 1;
            }
            p += l + (e != NULL);
        }

        if (bad_card) {
            goto show_valid_cards;
        }
    }
}
I
Isaku Yamahata 已提交
1516

1517
void audio_init(void)
I
Isaku Yamahata 已提交
1518 1519
{
    struct soundhw *c;
1520 1521
    ISABus *isa_bus = (ISABus *) object_resolve_path_type("", TYPE_ISA_BUS, NULL);
    PCIBus *pci_bus = (PCIBus *) object_resolve_path_type("", TYPE_PCI_BUS, NULL);
I
Isaku Yamahata 已提交
1522 1523 1524 1525

    for (c = soundhw; c->name; ++c) {
        if (c->enabled) {
            if (c->isa) {
1526
                if (!isa_bus) {
1527
                    error_report("ISA bus not available for %s", c->name);
1528
                    exit(1);
I
Isaku Yamahata 已提交
1529
                }
1530
                c->init.init_isa(isa_bus);
I
Isaku Yamahata 已提交
1531
            } else {
1532
                if (!pci_bus) {
1533
                    error_report("PCI bus not available for %s", c->name);
1534
                    exit(1);
I
Isaku Yamahata 已提交
1535
                }
1536
                c->init.init_pci(pci_bus);
I
Isaku Yamahata 已提交
1537 1538 1539 1540
            }
        }
    }
}
1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560

int qemu_uuid_parse(const char *str, uint8_t *uuid)
{
    int ret;

    if (strlen(str) != 36) {
        return -1;
    }

    ret = sscanf(str, UUID_FMT, &uuid[0], &uuid[1], &uuid[2], &uuid[3],
                 &uuid[4], &uuid[5], &uuid[6], &uuid[7], &uuid[8], &uuid[9],
                 &uuid[10], &uuid[11], &uuid[12], &uuid[13], &uuid[14],
                 &uuid[15]);

    if (ret != 16) {
        return -1;
    }
    return 0;
}

1561
void do_acpitable_option(const QemuOpts *opts)
1562 1563
{
#ifdef TARGET_I386
1564 1565 1566 1567
    Error *err = NULL;

    acpi_table_add(opts, &err);
    if (err) {
1568 1569
        error_report("Wrong acpi table provided: %s",
                     error_get_pretty(err));
1570
        error_free(err);
1571 1572 1573 1574 1575
        exit(1);
    }
#endif
}

M
Markus Armbruster 已提交
1576
void do_smbios_option(QemuOpts *opts)
1577 1578
{
#ifdef TARGET_I386
M
Markus Armbruster 已提交
1579
    smbios_entry_add(opts);
1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606
#endif
}

void cpudef_init(void)
{
#if defined(cpudef_setup)
    cpudef_setup(); /* parse cpu definitions in target config file */
#endif
}

int kvm_available(void)
{
#ifdef CONFIG_KVM
    return 1;
#else
    return 0;
#endif
}

int xen_available(void)
{
#ifdef CONFIG_XEN
    return 1;
#else
    return 0;
#endif
}
1607 1608 1609 1610 1611 1612


TargetInfo *qmp_query_target(Error **errp)
{
    TargetInfo *info = g_malloc0(sizeof(*info));

P
Paolo Bonzini 已提交
1613
    info->arch = g_strdup(TARGET_NAME);
1614 1615 1616

    return info;
}
1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632

/* Stub function that's gets run on the vcpu when its brought out of the
   VM to run inside qemu via async_run_on_cpu()*/
static void mig_sleep_cpu(void *opq)
{
    qemu_mutex_unlock_iothread();
    g_usleep(30*1000);
    qemu_mutex_lock_iothread();
}

/* To reduce the dirty rate explicitly disallow the VCPUs from spending
   much time in the VM. The migration thread will try to catchup.
   Workload will experience a performance drop.
*/
static void mig_throttle_guest_down(void)
{
1633 1634
    CPUState *cpu;

1635
    qemu_mutex_lock_iothread();
1636 1637 1638
    CPU_FOREACH(cpu) {
        async_run_on_cpu(cpu, mig_sleep_cpu, NULL);
    }
1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651
    qemu_mutex_unlock_iothread();
}

static void check_guest_throttling(void)
{
    static int64_t t0;
    int64_t        t1;

    if (!mig_throttle_on) {
        return;
    }

    if (!t0)  {
1652
        t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1653 1654 1655
        return;
    }

1656
    t1 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1657 1658 1659 1660 1661 1662 1663 1664 1665

    /* If it has been more than 40 ms since the last time the guest
     * was throttled then do it again.
     */
    if (40 < (t1-t0)/1000000) {
        mig_throttle_guest_down();
        t0 = t1;
    }
}