dump.c 48.9 KB
Newer Older
1 2 3 4 5 6 7 8
/*
 * QEMU dump
 *
 * Copyright Fujitsu, Corp. 2011, 2012
 *
 * Authors:
 *     Wen Congyang <wency@cn.fujitsu.com>
 *
9 10
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
 * See the COPYING file in the top-level directory.
11 12 13 14 15 16
 *
 */

#include "qemu-common.h"
#include "elf.h"
#include "cpu.h"
17 18
#include "exec/cpu-all.h"
#include "exec/hwaddr.h"
19
#include "monitor/monitor.h"
20 21 22 23
#include "sysemu/kvm.h"
#include "sysemu/dump.h"
#include "sysemu/sysemu.h"
#include "sysemu/memory_mapping.h"
24
#include "sysemu/cpus.h"
25
#include "qapi/error.h"
26 27
#include "qmp-commands.h"

Q
qiaonuohan 已提交
28 29 30 31 32 33 34
#include <zlib.h>
#ifdef CONFIG_LZO
#include <lzo/lzo1x.h>
#endif
#ifdef CONFIG_SNAPPY
#include <snappy-c.h>
#endif
35 36 37
#ifndef ELF_MACHINE_UNAME
#define ELF_MACHINE_UNAME "Unknown"
#endif
Q
qiaonuohan 已提交
38

39
uint16_t cpu_to_dump16(DumpState *s, uint16_t val)
40
{
41
    if (s->dump_info.d_endian == ELFDATA2LSB) {
42 43 44 45 46 47 48 49
        val = cpu_to_le16(val);
    } else {
        val = cpu_to_be16(val);
    }

    return val;
}

50
uint32_t cpu_to_dump32(DumpState *s, uint32_t val)
51
{
52
    if (s->dump_info.d_endian == ELFDATA2LSB) {
53 54 55 56 57 58 59 60
        val = cpu_to_le32(val);
    } else {
        val = cpu_to_be32(val);
    }

    return val;
}

61
uint64_t cpu_to_dump64(DumpState *s, uint64_t val)
62
{
63
    if (s->dump_info.d_endian == ELFDATA2LSB) {
64 65 66 67 68 69 70 71 72 73
        val = cpu_to_le64(val);
    } else {
        val = cpu_to_be64(val);
    }

    return val;
}

static int dump_cleanup(DumpState *s)
{
74
    guest_phys_blocks_free(&s->guest_phys_blocks);
75
    memory_mapping_list_free(&s->list);
76
    close(s->fd);
77 78 79 80
    if (s->resume) {
        vm_start();
    }

81
    return 0;
82 83
}

84
static void dump_error(DumpState *s, const char *reason, Error **errp)
85 86
{
    dump_cleanup(s);
87
    error_setg(errp, "%s", reason);
88 89
}

90
static int fd_write_vmcore(const void *buf, size_t size, void *opaque)
91 92
{
    DumpState *s = opaque;
93 94 95 96 97
    size_t written_size;

    written_size = qemu_write_full(s->fd, buf, size);
    if (written_size != size) {
        return -1;
98 99 100 101 102
    }

    return 0;
}

103
static void write_elf64_header(DumpState *s, Error **errp)
104 105 106 107 108 109 110 111 112
{
    Elf64_Ehdr elf_header;
    int ret;

    memset(&elf_header, 0, sizeof(Elf64_Ehdr));
    memcpy(&elf_header, ELFMAG, SELFMAG);
    elf_header.e_ident[EI_CLASS] = ELFCLASS64;
    elf_header.e_ident[EI_DATA] = s->dump_info.d_endian;
    elf_header.e_ident[EI_VERSION] = EV_CURRENT;
113 114 115 116 117 118 119
    elf_header.e_type = cpu_to_dump16(s, ET_CORE);
    elf_header.e_machine = cpu_to_dump16(s, s->dump_info.d_machine);
    elf_header.e_version = cpu_to_dump32(s, EV_CURRENT);
    elf_header.e_ehsize = cpu_to_dump16(s, sizeof(elf_header));
    elf_header.e_phoff = cpu_to_dump64(s, sizeof(Elf64_Ehdr));
    elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf64_Phdr));
    elf_header.e_phnum = cpu_to_dump16(s, s->phdr_num);
120 121 122
    if (s->have_section) {
        uint64_t shoff = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr) * s->sh_info;

123 124 125
        elf_header.e_shoff = cpu_to_dump64(s, shoff);
        elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr));
        elf_header.e_shnum = cpu_to_dump16(s, 1);
126 127 128 129
    }

    ret = fd_write_vmcore(&elf_header, sizeof(elf_header), s);
    if (ret < 0) {
130
        dump_error(s, "dump: failed to write elf header", errp);
131 132 133
    }
}

134
static void write_elf32_header(DumpState *s, Error **errp)
135 136 137 138 139 140 141
{
    Elf32_Ehdr elf_header;
    int ret;

    memset(&elf_header, 0, sizeof(Elf32_Ehdr));
    memcpy(&elf_header, ELFMAG, SELFMAG);
    elf_header.e_ident[EI_CLASS] = ELFCLASS32;
142
    elf_header.e_ident[EI_DATA] = s->dump_info.d_endian;
143
    elf_header.e_ident[EI_VERSION] = EV_CURRENT;
144 145 146 147 148 149 150
    elf_header.e_type = cpu_to_dump16(s, ET_CORE);
    elf_header.e_machine = cpu_to_dump16(s, s->dump_info.d_machine);
    elf_header.e_version = cpu_to_dump32(s, EV_CURRENT);
    elf_header.e_ehsize = cpu_to_dump16(s, sizeof(elf_header));
    elf_header.e_phoff = cpu_to_dump32(s, sizeof(Elf32_Ehdr));
    elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf32_Phdr));
    elf_header.e_phnum = cpu_to_dump16(s, s->phdr_num);
151 152 153
    if (s->have_section) {
        uint32_t shoff = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr) * s->sh_info;

154 155 156
        elf_header.e_shoff = cpu_to_dump32(s, shoff);
        elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr));
        elf_header.e_shnum = cpu_to_dump16(s, 1);
157 158 159 160
    }

    ret = fd_write_vmcore(&elf_header, sizeof(elf_header), s);
    if (ret < 0) {
161
        dump_error(s, "dump: failed to write elf header", errp);
162 163 164
    }
}

165 166 167
static void write_elf64_load(DumpState *s, MemoryMapping *memory_mapping,
                             int phdr_index, hwaddr offset,
                             hwaddr filesz, Error **errp)
168 169 170 171 172
{
    Elf64_Phdr phdr;
    int ret;

    memset(&phdr, 0, sizeof(Elf64_Phdr));
173 174 175 176 177 178
    phdr.p_type = cpu_to_dump32(s, PT_LOAD);
    phdr.p_offset = cpu_to_dump64(s, offset);
    phdr.p_paddr = cpu_to_dump64(s, memory_mapping->phys_addr);
    phdr.p_filesz = cpu_to_dump64(s, filesz);
    phdr.p_memsz = cpu_to_dump64(s, memory_mapping->length);
    phdr.p_vaddr = cpu_to_dump64(s, memory_mapping->virt_addr);
179

180 181
    assert(memory_mapping->length >= filesz);

182 183
    ret = fd_write_vmcore(&phdr, sizeof(Elf64_Phdr), s);
    if (ret < 0) {
184
        dump_error(s, "dump: failed to write program header table", errp);
185 186 187
    }
}

188 189 190
static void write_elf32_load(DumpState *s, MemoryMapping *memory_mapping,
                             int phdr_index, hwaddr offset,
                             hwaddr filesz, Error **errp)
191 192 193 194 195
{
    Elf32_Phdr phdr;
    int ret;

    memset(&phdr, 0, sizeof(Elf32_Phdr));
196 197 198 199 200 201
    phdr.p_type = cpu_to_dump32(s, PT_LOAD);
    phdr.p_offset = cpu_to_dump32(s, offset);
    phdr.p_paddr = cpu_to_dump32(s, memory_mapping->phys_addr);
    phdr.p_filesz = cpu_to_dump32(s, filesz);
    phdr.p_memsz = cpu_to_dump32(s, memory_mapping->length);
    phdr.p_vaddr = cpu_to_dump32(s, memory_mapping->virt_addr);
202

203 204
    assert(memory_mapping->length >= filesz);

205 206
    ret = fd_write_vmcore(&phdr, sizeof(Elf32_Phdr), s);
    if (ret < 0) {
207
        dump_error(s, "dump: failed to write program header table", errp);
208 209 210
    }
}

211
static void write_elf64_note(DumpState *s, Error **errp)
212 213
{
    Elf64_Phdr phdr;
A
Avi Kivity 已提交
214
    hwaddr begin = s->memory_offset - s->note_size;
215 216 217
    int ret;

    memset(&phdr, 0, sizeof(Elf64_Phdr));
218 219
    phdr.p_type = cpu_to_dump32(s, PT_NOTE);
    phdr.p_offset = cpu_to_dump64(s, begin);
220
    phdr.p_paddr = 0;
221 222
    phdr.p_filesz = cpu_to_dump64(s, s->note_size);
    phdr.p_memsz = cpu_to_dump64(s, s->note_size);
223 224 225 226
    phdr.p_vaddr = 0;

    ret = fd_write_vmcore(&phdr, sizeof(Elf64_Phdr), s);
    if (ret < 0) {
227
        dump_error(s, "dump: failed to write program header table", errp);
228 229 230
    }
}

231 232 233 234 235
static inline int cpu_index(CPUState *cpu)
{
    return cpu->cpu_index + 1;
}

236 237
static void write_elf64_notes(WriteCoreDumpFunction f, DumpState *s,
                              Error **errp)
238
{
239
    CPUState *cpu;
240 241 242
    int ret;
    int id;

A
Andreas Färber 已提交
243
    CPU_FOREACH(cpu) {
244
        id = cpu_index(cpu);
245
        ret = cpu_write_elf64_note(f, cpu, id, s);
246
        if (ret < 0) {
247
            dump_error(s, "dump: failed to write elf notes", errp);
248
            return;
249 250 251
        }
    }

A
Andreas Färber 已提交
252
    CPU_FOREACH(cpu) {
253
        ret = cpu_write_elf64_qemunote(f, cpu, s);
254
        if (ret < 0) {
255
            dump_error(s, "dump: failed to write CPU status", errp);
256
            return;
257 258 259 260
        }
    }
}

261
static void write_elf32_note(DumpState *s, Error **errp)
262
{
A
Avi Kivity 已提交
263
    hwaddr begin = s->memory_offset - s->note_size;
264 265 266 267
    Elf32_Phdr phdr;
    int ret;

    memset(&phdr, 0, sizeof(Elf32_Phdr));
268 269
    phdr.p_type = cpu_to_dump32(s, PT_NOTE);
    phdr.p_offset = cpu_to_dump32(s, begin);
270
    phdr.p_paddr = 0;
271 272
    phdr.p_filesz = cpu_to_dump32(s, s->note_size);
    phdr.p_memsz = cpu_to_dump32(s, s->note_size);
273 274 275 276
    phdr.p_vaddr = 0;

    ret = fd_write_vmcore(&phdr, sizeof(Elf32_Phdr), s);
    if (ret < 0) {
277
        dump_error(s, "dump: failed to write program header table", errp);
278 279 280
    }
}

281 282
static void write_elf32_notes(WriteCoreDumpFunction f, DumpState *s,
                              Error **errp)
283
{
284
    CPUState *cpu;
285 286 287
    int ret;
    int id;

A
Andreas Färber 已提交
288
    CPU_FOREACH(cpu) {
289
        id = cpu_index(cpu);
290
        ret = cpu_write_elf32_note(f, cpu, id, s);
291
        if (ret < 0) {
292
            dump_error(s, "dump: failed to write elf notes", errp);
293
            return;
294 295 296
        }
    }

A
Andreas Färber 已提交
297
    CPU_FOREACH(cpu) {
298
        ret = cpu_write_elf32_qemunote(f, cpu, s);
299
        if (ret < 0) {
300
            dump_error(s, "dump: failed to write CPU status", errp);
301
            return;
302 303 304 305
        }
    }
}

306
static void write_elf_section(DumpState *s, int type, Error **errp)
307 308 309 310 311 312 313 314 315 316
{
    Elf32_Shdr shdr32;
    Elf64_Shdr shdr64;
    int shdr_size;
    void *shdr;
    int ret;

    if (type == 0) {
        shdr_size = sizeof(Elf32_Shdr);
        memset(&shdr32, 0, shdr_size);
317
        shdr32.sh_info = cpu_to_dump32(s, s->sh_info);
318 319 320 321
        shdr = &shdr32;
    } else {
        shdr_size = sizeof(Elf64_Shdr);
        memset(&shdr64, 0, shdr_size);
322
        shdr64.sh_info = cpu_to_dump32(s, s->sh_info);
323 324 325 326 327
        shdr = &shdr64;
    }

    ret = fd_write_vmcore(&shdr, shdr_size, s);
    if (ret < 0) {
328
        dump_error(s, "dump: failed to write section header table", errp);
329 330 331
    }
}

332
static void write_data(DumpState *s, void *buf, int length, Error **errp)
333 334 335 336 337
{
    int ret;

    ret = fd_write_vmcore(buf, length, s);
    if (ret < 0) {
338
        dump_error(s, "dump: failed to save memory", errp);
339 340 341
    }
}

342 343 344
/* write the memory to vmcore. 1 page per I/O. */
static void write_memory(DumpState *s, GuestPhysBlock *block, ram_addr_t start,
                         int64_t size, Error **errp)
345 346
{
    int64_t i;
347
    Error *local_err = NULL;
348 349

    for (i = 0; i < size / TARGET_PAGE_SIZE; i++) {
350 351 352 353 354
        write_data(s, block->host_addr + start + i * TARGET_PAGE_SIZE,
                   TARGET_PAGE_SIZE, &local_err);
        if (local_err) {
            error_propagate(errp, local_err);
            return;
355 356 357 358
        }
    }

    if ((size % TARGET_PAGE_SIZE) != 0) {
359 360 361 362 363
        write_data(s, block->host_addr + start + i * TARGET_PAGE_SIZE,
                   size % TARGET_PAGE_SIZE, &local_err);
        if (local_err) {
            error_propagate(errp, local_err);
            return;
364 365 366 367
        }
    }
}

368 369 370 371 372 373
/* get the memory's offset and size in the vmcore */
static void get_offset_range(hwaddr phys_addr,
                             ram_addr_t mapping_length,
                             DumpState *s,
                             hwaddr *p_offset,
                             hwaddr *p_filesz)
374
{
375
    GuestPhysBlock *block;
A
Avi Kivity 已提交
376
    hwaddr offset = s->memory_offset;
377 378
    int64_t size_in_block, start;

379 380 381 382
    /* When the memory is not stored into vmcore, offset will be -1 */
    *p_offset = -1;
    *p_filesz = 0;

383 384
    if (s->has_filter) {
        if (phys_addr < s->begin || phys_addr >= s->begin + s->length) {
385
            return;
386 387 388
        }
    }

389
    QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
390
        if (s->has_filter) {
391 392
            if (block->target_start >= s->begin + s->length ||
                block->target_end <= s->begin) {
393 394 395 396
                /* This block is out of the range */
                continue;
            }

397 398
            if (s->begin <= block->target_start) {
                start = block->target_start;
399 400 401 402
            } else {
                start = s->begin;
            }

403 404 405
            size_in_block = block->target_end - start;
            if (s->begin + s->length < block->target_end) {
                size_in_block -= block->target_end - (s->begin + s->length);
406 407
            }
        } else {
408 409
            start = block->target_start;
            size_in_block = block->target_end - block->target_start;
410 411 412
        }

        if (phys_addr >= start && phys_addr < start + size_in_block) {
413 414 415
            *p_offset = phys_addr - start + offset;

            /* The offset range mapped from the vmcore file must not spill over
416
             * the GuestPhysBlock, clamp it. The rest of the mapping will be
417 418 419 420 421 422 423
             * zero-filled in memory at load time; see
             * <http://refspecs.linuxbase.org/elf/gabi4+/ch5.pheader.html>.
             */
            *p_filesz = phys_addr + mapping_length <= start + size_in_block ?
                        mapping_length :
                        size_in_block - (phys_addr - start);
            return;
424 425 426 427 428 429
        }

        offset += size_in_block;
    }
}

430
static void write_elf_loads(DumpState *s, Error **errp)
431
{
432
    hwaddr offset, filesz;
433 434 435
    MemoryMapping *memory_mapping;
    uint32_t phdr_index = 1;
    uint32_t max_index;
436
    Error *local_err = NULL;
437 438 439 440 441 442 443 444

    if (s->have_section) {
        max_index = s->sh_info;
    } else {
        max_index = s->phdr_num;
    }

    QTAILQ_FOREACH(memory_mapping, &s->list.head, next) {
445 446 447
        get_offset_range(memory_mapping->phys_addr,
                         memory_mapping->length,
                         s, &offset, &filesz);
448
        if (s->dump_info.d_class == ELFCLASS64) {
449 450
            write_elf64_load(s, memory_mapping, phdr_index++, offset,
                             filesz, &local_err);
451
        } else {
452 453
            write_elf32_load(s, memory_mapping, phdr_index++, offset,
                             filesz, &local_err);
454 455
        }

456 457 458
        if (local_err) {
            error_propagate(errp, local_err);
            return;
459 460 461 462 463 464 465 466 467
        }

        if (phdr_index >= max_index) {
            break;
        }
    }
}

/* write elf header, PT_NOTE and elf note to vmcore. */
468
static void dump_begin(DumpState *s, Error **errp)
469
{
470
    Error *local_err = NULL;
471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497

    /*
     * the vmcore's format is:
     *   --------------
     *   |  elf header |
     *   --------------
     *   |  PT_NOTE    |
     *   --------------
     *   |  PT_LOAD    |
     *   --------------
     *   |  ......     |
     *   --------------
     *   |  PT_LOAD    |
     *   --------------
     *   |  sec_hdr    |
     *   --------------
     *   |  elf note   |
     *   --------------
     *   |  memory     |
     *   --------------
     *
     * we only know where the memory is saved after we write elf note into
     * vmcore.
     */

    /* write elf header to vmcore */
    if (s->dump_info.d_class == ELFCLASS64) {
498
        write_elf64_header(s, &local_err);
499
    } else {
500
        write_elf32_header(s, &local_err);
501
    }
502 503 504
    if (local_err) {
        error_propagate(errp, local_err);
        return;
505 506 507 508
    }

    if (s->dump_info.d_class == ELFCLASS64) {
        /* write PT_NOTE to vmcore */
509 510 511 512
        write_elf64_note(s, &local_err);
        if (local_err) {
            error_propagate(errp, local_err);
            return;
513 514 515
        }

        /* write all PT_LOAD to vmcore */
516 517 518 519
        write_elf_loads(s, &local_err);
        if (local_err) {
            error_propagate(errp, local_err);
            return;
520 521 522 523
        }

        /* write section to vmcore */
        if (s->have_section) {
524 525 526 527
            write_elf_section(s, 1, &local_err);
            if (local_err) {
                error_propagate(errp, local_err);
                return;
528 529 530 531
            }
        }

        /* write notes to vmcore */
532 533 534 535
        write_elf64_notes(fd_write_vmcore, s, &local_err);
        if (local_err) {
            error_propagate(errp, local_err);
            return;
536 537 538
        }
    } else {
        /* write PT_NOTE to vmcore */
539 540 541 542
        write_elf32_note(s, &local_err);
        if (local_err) {
            error_propagate(errp, local_err);
            return;
543 544 545
        }

        /* write all PT_LOAD to vmcore */
546 547 548 549
        write_elf_loads(s, &local_err);
        if (local_err) {
            error_propagate(errp, local_err);
            return;
550 551 552 553
        }

        /* write section to vmcore */
        if (s->have_section) {
554 555 556 557
            write_elf_section(s, 0, &local_err);
            if (local_err) {
                error_propagate(errp, local_err);
                return;
558 559 560 561
            }
        }

        /* write notes to vmcore */
562 563 564 565
        write_elf32_notes(fd_write_vmcore, s, &local_err);
        if (local_err) {
            error_propagate(errp, local_err);
            return;
566 567 568 569
        }
    }
}

570
static void dump_completed(DumpState *s)
571 572 573 574
{
    dump_cleanup(s);
}

575
static int get_next_block(DumpState *s, GuestPhysBlock *block)
576 577
{
    while (1) {
P
Paolo Bonzini 已提交
578
        block = QTAILQ_NEXT(block, next);
579 580 581 582 583 584
        if (!block) {
            /* no more block */
            return 1;
        }

        s->start = 0;
585
        s->next_block = block;
586
        if (s->has_filter) {
587 588
            if (block->target_start >= s->begin + s->length ||
                block->target_end <= s->begin) {
589 590 591 592
                /* This block is out of the range */
                continue;
            }

593 594
            if (s->begin > block->target_start) {
                s->start = s->begin - block->target_start;
595 596 597 598 599 600 601 602
            }
        }

        return 0;
    }
}

/* write all memory to vmcore */
603
static void dump_iterate(DumpState *s, Error **errp)
604
{
605
    GuestPhysBlock *block;
606
    int64_t size;
607
    Error *local_err = NULL;
608

609
    do {
610
        block = s->next_block;
611

612
        size = block->target_end - block->target_start;
613 614
        if (s->has_filter) {
            size -= s->start;
615 616
            if (s->begin + s->length < block->target_end) {
                size -= block->target_end - (s->begin + s->length);
617 618
            }
        }
619 620 621 622
        write_memory(s, block, s->start, size, &local_err);
        if (local_err) {
            error_propagate(errp, local_err);
            return;
623 624
        }

625 626 627
    } while (!get_next_block(s, block));

    dump_completed(s);
628 629
}

630
static void create_vmcore(DumpState *s, Error **errp)
631
{
632
    Error *local_err = NULL;
633

634 635 636 637
    dump_begin(s, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        return;
638 639
    }

640
    dump_iterate(s, errp);
641 642
}

643 644
static int write_start_flat_header(int fd)
{
645
    MakedumpfileHeader *mh;
646 647
    int ret = 0;

648 649
    QEMU_BUILD_BUG_ON(sizeof *mh > MAX_SIZE_MDF_HEADER);
    mh = g_malloc0(MAX_SIZE_MDF_HEADER);
650

651 652
    memcpy(mh->signature, MAKEDUMPFILE_SIGNATURE,
           MIN(sizeof mh->signature, sizeof MAKEDUMPFILE_SIGNATURE));
653

654 655
    mh->type = cpu_to_be64(TYPE_FLAT_HEADER);
    mh->version = cpu_to_be64(VERSION_FLAT_HEADER);
656 657

    size_t written_size;
658
    written_size = qemu_write_full(fd, mh, MAX_SIZE_MDF_HEADER);
659 660 661 662
    if (written_size != MAX_SIZE_MDF_HEADER) {
        ret = -1;
    }

663
    g_free(mh);
664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682
    return ret;
}

static int write_end_flat_header(int fd)
{
    MakedumpfileDataHeader mdh;

    mdh.offset = END_FLAG_FLAT_HEADER;
    mdh.buf_size = END_FLAG_FLAT_HEADER;

    size_t written_size;
    written_size = qemu_write_full(fd, &mdh, sizeof(mdh));
    if (written_size != sizeof(mdh)) {
        return -1;
    }

    return 0;
}

Q
qiaonuohan 已提交
683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703
static int write_buffer(int fd, off_t offset, const void *buf, size_t size)
{
    size_t written_size;
    MakedumpfileDataHeader mdh;

    mdh.offset = cpu_to_be64(offset);
    mdh.buf_size = cpu_to_be64(size);

    written_size = qemu_write_full(fd, &mdh, sizeof(mdh));
    if (written_size != sizeof(mdh)) {
        return -1;
    }

    written_size = qemu_write_full(fd, buf, size);
    if (written_size != size) {
        return -1;
    }

    return 0;
}

704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719
static int buf_write_note(const void *buf, size_t size, void *opaque)
{
    DumpState *s = opaque;

    /* note_buf is not enough */
    if (s->note_buf_offset + size > s->note_size) {
        return -1;
    }

    memcpy(s->note_buf + s->note_buf_offset, buf, size);

    s->note_buf_offset += size;

    return 0;
}

Q
qiaonuohan 已提交
720
/* write common header, sub header and elf note to vmcore */
721
static void create_header32(DumpState *s, Error **errp)
Q
qiaonuohan 已提交
722 723 724 725 726 727 728 729 730
{
    DiskDumpHeader32 *dh = NULL;
    KdumpSubHeader32 *kh = NULL;
    size_t size;
    uint32_t block_size;
    uint32_t sub_hdr_size;
    uint32_t bitmap_blocks;
    uint32_t status = 0;
    uint64_t offset_note;
731
    Error *local_err = NULL;
Q
qiaonuohan 已提交
732 733 734 735 736 737

    /* write common header, the version of kdump-compressed format is 6th */
    size = sizeof(DiskDumpHeader32);
    dh = g_malloc0(size);

    strncpy(dh->signature, KDUMP_SIGNATURE, strlen(KDUMP_SIGNATURE));
738
    dh->header_version = cpu_to_dump32(s, 6);
739
    block_size = TARGET_PAGE_SIZE;
740
    dh->block_size = cpu_to_dump32(s, block_size);
Q
qiaonuohan 已提交
741 742
    sub_hdr_size = sizeof(struct KdumpSubHeader32) + s->note_size;
    sub_hdr_size = DIV_ROUND_UP(sub_hdr_size, block_size);
743
    dh->sub_hdr_size = cpu_to_dump32(s, sub_hdr_size);
Q
qiaonuohan 已提交
744
    /* dh->max_mapnr may be truncated, full 64bit is in kh.max_mapnr_64 */
745 746
    dh->max_mapnr = cpu_to_dump32(s, MIN(s->max_mapnr, UINT_MAX));
    dh->nr_cpus = cpu_to_dump32(s, s->nr_cpus);
Q
qiaonuohan 已提交
747
    bitmap_blocks = DIV_ROUND_UP(s->len_dump_bitmap, block_size) * 2;
748
    dh->bitmap_blocks = cpu_to_dump32(s, bitmap_blocks);
749
    strncpy(dh->utsname.machine, ELF_MACHINE_UNAME, sizeof(dh->utsname.machine));
Q
qiaonuohan 已提交
750 751 752 753 754 755 756 757 758 759 760 761 762 763

    if (s->flag_compress & DUMP_DH_COMPRESSED_ZLIB) {
        status |= DUMP_DH_COMPRESSED_ZLIB;
    }
#ifdef CONFIG_LZO
    if (s->flag_compress & DUMP_DH_COMPRESSED_LZO) {
        status |= DUMP_DH_COMPRESSED_LZO;
    }
#endif
#ifdef CONFIG_SNAPPY
    if (s->flag_compress & DUMP_DH_COMPRESSED_SNAPPY) {
        status |= DUMP_DH_COMPRESSED_SNAPPY;
    }
#endif
764
    dh->status = cpu_to_dump32(s, status);
Q
qiaonuohan 已提交
765 766

    if (write_buffer(s->fd, 0, dh, size) < 0) {
767
        dump_error(s, "dump: failed to write disk dump header", errp);
Q
qiaonuohan 已提交
768 769 770 771 772 773 774 775
        goto out;
    }

    /* write sub header */
    size = sizeof(KdumpSubHeader32);
    kh = g_malloc0(size);

    /* 64bit max_mapnr_64 */
776 777 778
    kh->max_mapnr_64 = cpu_to_dump64(s, s->max_mapnr);
    kh->phys_base = cpu_to_dump32(s, PHYS_BASE);
    kh->dump_level = cpu_to_dump32(s, DUMP_LEVEL);
Q
qiaonuohan 已提交
779 780

    offset_note = DISKDUMP_HEADER_BLOCKS * block_size + size;
781 782
    kh->offset_note = cpu_to_dump64(s, offset_note);
    kh->note_size = cpu_to_dump32(s, s->note_size);
Q
qiaonuohan 已提交
783 784 785

    if (write_buffer(s->fd, DISKDUMP_HEADER_BLOCKS *
                     block_size, kh, size) < 0) {
786
        dump_error(s, "dump: failed to write kdump sub header", errp);
Q
qiaonuohan 已提交
787 788 789 790 791 792 793 794
        goto out;
    }

    /* write note */
    s->note_buf = g_malloc0(s->note_size);
    s->note_buf_offset = 0;

    /* use s->note_buf to store notes temporarily */
795 796 797
    write_elf32_notes(buf_write_note, s, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
Q
qiaonuohan 已提交
798 799 800 801
        goto out;
    }
    if (write_buffer(s->fd, offset_note, s->note_buf,
                     s->note_size) < 0) {
802
        dump_error(s, "dump: failed to write notes", errp);
Q
qiaonuohan 已提交
803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820
        goto out;
    }

    /* get offset of dump_bitmap */
    s->offset_dump_bitmap = (DISKDUMP_HEADER_BLOCKS + sub_hdr_size) *
                             block_size;

    /* get offset of page */
    s->offset_page = (DISKDUMP_HEADER_BLOCKS + sub_hdr_size + bitmap_blocks) *
                     block_size;

out:
    g_free(dh);
    g_free(kh);
    g_free(s->note_buf);
}

/* write common header, sub header and elf note to vmcore */
821
static void create_header64(DumpState *s, Error **errp)
Q
qiaonuohan 已提交
822 823 824 825 826 827 828 829 830
{
    DiskDumpHeader64 *dh = NULL;
    KdumpSubHeader64 *kh = NULL;
    size_t size;
    uint32_t block_size;
    uint32_t sub_hdr_size;
    uint32_t bitmap_blocks;
    uint32_t status = 0;
    uint64_t offset_note;
831
    Error *local_err = NULL;
Q
qiaonuohan 已提交
832 833 834 835 836 837

    /* write common header, the version of kdump-compressed format is 6th */
    size = sizeof(DiskDumpHeader64);
    dh = g_malloc0(size);

    strncpy(dh->signature, KDUMP_SIGNATURE, strlen(KDUMP_SIGNATURE));
838
    dh->header_version = cpu_to_dump32(s, 6);
839
    block_size = TARGET_PAGE_SIZE;
840
    dh->block_size = cpu_to_dump32(s, block_size);
Q
qiaonuohan 已提交
841 842
    sub_hdr_size = sizeof(struct KdumpSubHeader64) + s->note_size;
    sub_hdr_size = DIV_ROUND_UP(sub_hdr_size, block_size);
843
    dh->sub_hdr_size = cpu_to_dump32(s, sub_hdr_size);
Q
qiaonuohan 已提交
844
    /* dh->max_mapnr may be truncated, full 64bit is in kh.max_mapnr_64 */
845 846
    dh->max_mapnr = cpu_to_dump32(s, MIN(s->max_mapnr, UINT_MAX));
    dh->nr_cpus = cpu_to_dump32(s, s->nr_cpus);
Q
qiaonuohan 已提交
847
    bitmap_blocks = DIV_ROUND_UP(s->len_dump_bitmap, block_size) * 2;
848
    dh->bitmap_blocks = cpu_to_dump32(s, bitmap_blocks);
849
    strncpy(dh->utsname.machine, ELF_MACHINE_UNAME, sizeof(dh->utsname.machine));
Q
qiaonuohan 已提交
850 851 852 853 854 855 856 857 858 859 860 861 862 863

    if (s->flag_compress & DUMP_DH_COMPRESSED_ZLIB) {
        status |= DUMP_DH_COMPRESSED_ZLIB;
    }
#ifdef CONFIG_LZO
    if (s->flag_compress & DUMP_DH_COMPRESSED_LZO) {
        status |= DUMP_DH_COMPRESSED_LZO;
    }
#endif
#ifdef CONFIG_SNAPPY
    if (s->flag_compress & DUMP_DH_COMPRESSED_SNAPPY) {
        status |= DUMP_DH_COMPRESSED_SNAPPY;
    }
#endif
864
    dh->status = cpu_to_dump32(s, status);
Q
qiaonuohan 已提交
865 866

    if (write_buffer(s->fd, 0, dh, size) < 0) {
867
        dump_error(s, "dump: failed to write disk dump header", errp);
Q
qiaonuohan 已提交
868 869 870 871 872 873 874 875
        goto out;
    }

    /* write sub header */
    size = sizeof(KdumpSubHeader64);
    kh = g_malloc0(size);

    /* 64bit max_mapnr_64 */
876 877 878
    kh->max_mapnr_64 = cpu_to_dump64(s, s->max_mapnr);
    kh->phys_base = cpu_to_dump64(s, PHYS_BASE);
    kh->dump_level = cpu_to_dump32(s, DUMP_LEVEL);
Q
qiaonuohan 已提交
879 880

    offset_note = DISKDUMP_HEADER_BLOCKS * block_size + size;
881 882
    kh->offset_note = cpu_to_dump64(s, offset_note);
    kh->note_size = cpu_to_dump64(s, s->note_size);
Q
qiaonuohan 已提交
883 884 885

    if (write_buffer(s->fd, DISKDUMP_HEADER_BLOCKS *
                     block_size, kh, size) < 0) {
886
        dump_error(s, "dump: failed to write kdump sub header", errp);
Q
qiaonuohan 已提交
887 888 889 890 891 892 893 894
        goto out;
    }

    /* write note */
    s->note_buf = g_malloc0(s->note_size);
    s->note_buf_offset = 0;

    /* use s->note_buf to store notes temporarily */
895 896 897
    write_elf64_notes(buf_write_note, s, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
Q
qiaonuohan 已提交
898 899 900 901 902
        goto out;
    }

    if (write_buffer(s->fd, offset_note, s->note_buf,
                     s->note_size) < 0) {
903
        dump_error(s, "dump: failed to write notes", errp);
Q
qiaonuohan 已提交
904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920
        goto out;
    }

    /* get offset of dump_bitmap */
    s->offset_dump_bitmap = (DISKDUMP_HEADER_BLOCKS + sub_hdr_size) *
                             block_size;

    /* get offset of page */
    s->offset_page = (DISKDUMP_HEADER_BLOCKS + sub_hdr_size + bitmap_blocks) *
                     block_size;

out:
    g_free(dh);
    g_free(kh);
    g_free(s->note_buf);
}

921
static void write_dump_header(DumpState *s, Error **errp)
Q
qiaonuohan 已提交
922
{
923 924
     Error *local_err = NULL;

925
    if (s->dump_info.d_class == ELFCLASS32) {
926
        create_header32(s, &local_err);
Q
qiaonuohan 已提交
927
    } else {
928 929 930 931
        create_header64(s, &local_err);
    }
    if (local_err) {
        error_propagate(errp, local_err);
Q
qiaonuohan 已提交
932 933 934
    }
}

Q
qiaonuohan 已提交
935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009
/*
 * set dump_bitmap sequencely. the bit before last_pfn is not allowed to be
 * rewritten, so if need to set the first bit, set last_pfn and pfn to 0.
 * set_dump_bitmap will always leave the recently set bit un-sync. And setting
 * (last bit + sizeof(buf) * 8) to 0 will do flushing the content in buf into
 * vmcore, ie. synchronizing un-sync bit into vmcore.
 */
static int set_dump_bitmap(uint64_t last_pfn, uint64_t pfn, bool value,
                           uint8_t *buf, DumpState *s)
{
    off_t old_offset, new_offset;
    off_t offset_bitmap1, offset_bitmap2;
    uint32_t byte, bit;

    /* should not set the previous place */
    assert(last_pfn <= pfn);

    /*
     * if the bit needed to be set is not cached in buf, flush the data in buf
     * to vmcore firstly.
     * making new_offset be bigger than old_offset can also sync remained data
     * into vmcore.
     */
    old_offset = BUFSIZE_BITMAP * (last_pfn / PFN_BUFBITMAP);
    new_offset = BUFSIZE_BITMAP * (pfn / PFN_BUFBITMAP);

    while (old_offset < new_offset) {
        /* calculate the offset and write dump_bitmap */
        offset_bitmap1 = s->offset_dump_bitmap + old_offset;
        if (write_buffer(s->fd, offset_bitmap1, buf,
                         BUFSIZE_BITMAP) < 0) {
            return -1;
        }

        /* dump level 1 is chosen, so 1st and 2nd bitmap are same */
        offset_bitmap2 = s->offset_dump_bitmap + s->len_dump_bitmap +
                         old_offset;
        if (write_buffer(s->fd, offset_bitmap2, buf,
                         BUFSIZE_BITMAP) < 0) {
            return -1;
        }

        memset(buf, 0, BUFSIZE_BITMAP);
        old_offset += BUFSIZE_BITMAP;
    }

    /* get the exact place of the bit in the buf, and set it */
    byte = (pfn % PFN_BUFBITMAP) / CHAR_BIT;
    bit = (pfn % PFN_BUFBITMAP) % CHAR_BIT;
    if (value) {
        buf[byte] |= 1u << bit;
    } else {
        buf[byte] &= ~(1u << bit);
    }

    return 0;
}

/*
 * exam every page and return the page frame number and the address of the page.
 * bufptr can be NULL. note: the blocks here is supposed to reflect guest-phys
 * blocks, so block->target_start and block->target_end should be interal
 * multiples of the target page size.
 */
static bool get_next_page(GuestPhysBlock **blockptr, uint64_t *pfnptr,
                          uint8_t **bufptr, DumpState *s)
{
    GuestPhysBlock *block = *blockptr;
    hwaddr addr;
    uint8_t *buf;

    /* block == NULL means the start of the iteration */
    if (!block) {
        block = QTAILQ_FIRST(&s->guest_phys_blocks.head);
        *blockptr = block;
1010 1011
        assert((block->target_start & ~TARGET_PAGE_MASK) == 0);
        assert((block->target_end & ~TARGET_PAGE_MASK) == 0);
1012
        *pfnptr = paddr_to_pfn(block->target_start);
Q
qiaonuohan 已提交
1013 1014 1015 1016 1017 1018 1019
        if (bufptr) {
            *bufptr = block->host_addr;
        }
        return true;
    }

    *pfnptr = *pfnptr + 1;
1020
    addr = pfn_to_paddr(*pfnptr);
Q
qiaonuohan 已提交
1021 1022

    if ((addr >= block->target_start) &&
1023
        (addr + TARGET_PAGE_SIZE <= block->target_end)) {
Q
qiaonuohan 已提交
1024 1025 1026 1027 1028 1029 1030 1031
        buf = block->host_addr + (addr - block->target_start);
    } else {
        /* the next page is in the next block */
        block = QTAILQ_NEXT(block, next);
        *blockptr = block;
        if (!block) {
            return false;
        }
1032 1033
        assert((block->target_start & ~TARGET_PAGE_MASK) == 0);
        assert((block->target_end & ~TARGET_PAGE_MASK) == 0);
1034
        *pfnptr = paddr_to_pfn(block->target_start);
Q
qiaonuohan 已提交
1035 1036 1037 1038 1039 1040 1041 1042 1043 1044
        buf = block->host_addr;
    }

    if (bufptr) {
        *bufptr = buf;
    }

    return true;
}

1045
static void write_dump_bitmap(DumpState *s, Error **errp)
Q
qiaonuohan 已提交
1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065
{
    int ret = 0;
    uint64_t last_pfn, pfn;
    void *dump_bitmap_buf;
    size_t num_dumpable;
    GuestPhysBlock *block_iter = NULL;

    /* dump_bitmap_buf is used to store dump_bitmap temporarily */
    dump_bitmap_buf = g_malloc0(BUFSIZE_BITMAP);

    num_dumpable = 0;
    last_pfn = 0;

    /*
     * exam memory page by page, and set the bit in dump_bitmap corresponded
     * to the existing page.
     */
    while (get_next_page(&block_iter, &pfn, NULL, s)) {
        ret = set_dump_bitmap(last_pfn, pfn, true, dump_bitmap_buf, s);
        if (ret < 0) {
1066
            dump_error(s, "dump: failed to set dump_bitmap", errp);
Q
qiaonuohan 已提交
1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082
            goto out;
        }

        last_pfn = pfn;
        num_dumpable++;
    }

    /*
     * set_dump_bitmap will always leave the recently set bit un-sync. Here we
     * set last_pfn + PFN_BUFBITMAP to 0 and those set but un-sync bit will be
     * synchronized into vmcore.
     */
    if (num_dumpable > 0) {
        ret = set_dump_bitmap(last_pfn, last_pfn + PFN_BUFBITMAP, false,
                              dump_bitmap_buf, s);
        if (ret < 0) {
1083
            dump_error(s, "dump: failed to sync dump_bitmap", errp);
Q
qiaonuohan 已提交
1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094
            goto out;
        }
    }

    /* number of dumpable pages that will be dumped later */
    s->num_dumpable = num_dumpable;

out:
    g_free(dump_bitmap_buf);
}

Q
qiaonuohan 已提交
1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141
static void prepare_data_cache(DataCache *data_cache, DumpState *s,
                               off_t offset)
{
    data_cache->fd = s->fd;
    data_cache->data_size = 0;
    data_cache->buf_size = BUFSIZE_DATA_CACHE;
    data_cache->buf = g_malloc0(BUFSIZE_DATA_CACHE);
    data_cache->offset = offset;
}

static int write_cache(DataCache *dc, const void *buf, size_t size,
                       bool flag_sync)
{
    /*
     * dc->buf_size should not be less than size, otherwise dc will never be
     * enough
     */
    assert(size <= dc->buf_size);

    /*
     * if flag_sync is set, synchronize data in dc->buf into vmcore.
     * otherwise check if the space is enough for caching data in buf, if not,
     * write the data in dc->buf to dc->fd and reset dc->buf
     */
    if ((!flag_sync && dc->data_size + size > dc->buf_size) ||
        (flag_sync && dc->data_size > 0)) {
        if (write_buffer(dc->fd, dc->offset, dc->buf, dc->data_size) < 0) {
            return -1;
        }

        dc->offset += dc->data_size;
        dc->data_size = 0;
    }

    if (!flag_sync) {
        memcpy(dc->buf + dc->data_size, buf, size);
        dc->data_size += size;
    }

    return 0;
}

static void free_data_cache(DataCache *data_cache)
{
    g_free(data_cache->buf);
}

Q
qiaonuohan 已提交
1142 1143
static size_t get_len_buf_out(size_t page_size, uint32_t flag_compress)
{
L
Laszlo Ersek 已提交
1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154
    switch (flag_compress) {
    case DUMP_DH_COMPRESSED_ZLIB:
        return compressBound(page_size);

    case DUMP_DH_COMPRESSED_LZO:
        /*
         * LZO will expand incompressible data by a little amount. Please check
         * the following URL to see the expansion calculation:
         * http://www.oberhumer.com/opensource/lzo/lzofaq.php
         */
        return page_size + page_size / 16 + 64 + 3;
Q
qiaonuohan 已提交
1155 1156

#ifdef CONFIG_SNAPPY
L
Laszlo Ersek 已提交
1157 1158
    case DUMP_DH_COMPRESSED_SNAPPY:
        return snappy_max_compressed_length(page_size);
Q
qiaonuohan 已提交
1159
#endif
L
Laszlo Ersek 已提交
1160 1161
    }
    return 0;
Q
qiaonuohan 已提交
1162 1163 1164 1165 1166 1167 1168 1169 1170 1171
}

/*
 * check if the page is all 0
 */
static inline bool is_zero_page(const uint8_t *buf, size_t page_size)
{
    return buffer_is_zero(buf, page_size);
}

1172
static void write_dump_pages(DumpState *s, Error **errp)
Q
qiaonuohan 已提交
1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194
{
    int ret = 0;
    DataCache page_desc, page_data;
    size_t len_buf_out, size_out;
#ifdef CONFIG_LZO
    lzo_bytep wrkmem = NULL;
#endif
    uint8_t *buf_out = NULL;
    off_t offset_desc, offset_data;
    PageDescriptor pd, pd_zero;
    uint8_t *buf;
    GuestPhysBlock *block_iter = NULL;
    uint64_t pfn_iter;

    /* get offset of page_desc and page_data in dump file */
    offset_desc = s->offset_page;
    offset_data = offset_desc + sizeof(PageDescriptor) * s->num_dumpable;

    prepare_data_cache(&page_desc, s, offset_desc);
    prepare_data_cache(&page_data, s, offset_data);

    /* prepare buffer to store compressed data */
1195
    len_buf_out = get_len_buf_out(TARGET_PAGE_SIZE, s->flag_compress);
L
Laszlo Ersek 已提交
1196
    assert(len_buf_out != 0);
Q
qiaonuohan 已提交
1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207

#ifdef CONFIG_LZO
    wrkmem = g_malloc(LZO1X_1_MEM_COMPRESS);
#endif

    buf_out = g_malloc(len_buf_out);

    /*
     * init zero page's page_desc and page_data, because every zero page
     * uses the same page_data
     */
1208 1209 1210 1211
    pd_zero.size = cpu_to_dump32(s, TARGET_PAGE_SIZE);
    pd_zero.flags = cpu_to_dump32(s, 0);
    pd_zero.offset = cpu_to_dump64(s, offset_data);
    pd_zero.page_flags = cpu_to_dump64(s, 0);
1212 1213
    buf = g_malloc0(TARGET_PAGE_SIZE);
    ret = write_cache(&page_data, buf, TARGET_PAGE_SIZE, false);
Q
qiaonuohan 已提交
1214 1215
    g_free(buf);
    if (ret < 0) {
1216
        dump_error(s, "dump: failed to write page data (zero page)", errp);
Q
qiaonuohan 已提交
1217 1218 1219
        goto out;
    }

1220
    offset_data += TARGET_PAGE_SIZE;
Q
qiaonuohan 已提交
1221 1222 1223 1224 1225 1226 1227

    /*
     * dump memory to vmcore page by page. zero page will all be resided in the
     * first page of page section
     */
    while (get_next_page(&block_iter, &pfn_iter, &buf, s)) {
        /* check zero page */
1228
        if (is_zero_page(buf, TARGET_PAGE_SIZE)) {
Q
qiaonuohan 已提交
1229 1230 1231
            ret = write_cache(&page_desc, &pd_zero, sizeof(PageDescriptor),
                              false);
            if (ret < 0) {
1232
                dump_error(s, "dump: failed to write page desc", errp);
Q
qiaonuohan 已提交
1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248
                goto out;
            }
        } else {
            /*
             * not zero page, then:
             * 1. compress the page
             * 2. write the compressed page into the cache of page_data
             * 3. get page desc of the compressed page and write it into the
             *    cache of page_desc
             *
             * only one compression format will be used here, for
             * s->flag_compress is set. But when compression fails to work,
             * we fall back to save in plaintext.
             */
             size_out = len_buf_out;
             if ((s->flag_compress & DUMP_DH_COMPRESSED_ZLIB) &&
1249 1250 1251 1252 1253
                    (compress2(buf_out, (uLongf *)&size_out, buf,
                               TARGET_PAGE_SIZE, Z_BEST_SPEED) == Z_OK) &&
                    (size_out < TARGET_PAGE_SIZE)) {
                pd.flags = cpu_to_dump32(s, DUMP_DH_COMPRESSED_ZLIB);
                pd.size  = cpu_to_dump32(s, size_out);
Q
qiaonuohan 已提交
1254 1255 1256

                ret = write_cache(&page_data, buf_out, size_out, false);
                if (ret < 0) {
1257
                    dump_error(s, "dump: failed to write page data", errp);
Q
qiaonuohan 已提交
1258 1259 1260 1261
                    goto out;
                }
#ifdef CONFIG_LZO
            } else if ((s->flag_compress & DUMP_DH_COMPRESSED_LZO) &&
1262
                    (lzo1x_1_compress(buf, TARGET_PAGE_SIZE, buf_out,
Q
qiaonuohan 已提交
1263
                    (lzo_uint *)&size_out, wrkmem) == LZO_E_OK) &&
1264
                    (size_out < TARGET_PAGE_SIZE)) {
1265 1266
                pd.flags = cpu_to_dump32(s, DUMP_DH_COMPRESSED_LZO);
                pd.size  = cpu_to_dump32(s, size_out);
Q
qiaonuohan 已提交
1267 1268 1269

                ret = write_cache(&page_data, buf_out, size_out, false);
                if (ret < 0) {
1270
                    dump_error(s, "dump: failed to write page data", errp);
Q
qiaonuohan 已提交
1271 1272 1273 1274 1275
                    goto out;
                }
#endif
#ifdef CONFIG_SNAPPY
            } else if ((s->flag_compress & DUMP_DH_COMPRESSED_SNAPPY) &&
1276
                    (snappy_compress((char *)buf, TARGET_PAGE_SIZE,
Q
qiaonuohan 已提交
1277
                    (char *)buf_out, &size_out) == SNAPPY_OK) &&
1278
                    (size_out < TARGET_PAGE_SIZE)) {
1279 1280
                pd.flags = cpu_to_dump32(s, DUMP_DH_COMPRESSED_SNAPPY);
                pd.size  = cpu_to_dump32(s, size_out);
Q
qiaonuohan 已提交
1281 1282 1283

                ret = write_cache(&page_data, buf_out, size_out, false);
                if (ret < 0) {
1284
                    dump_error(s, "dump: failed to write page data", errp);
Q
qiaonuohan 已提交
1285 1286 1287 1288 1289 1290
                    goto out;
                }
#endif
            } else {
                /*
                 * fall back to save in plaintext, size_out should be
1291
                 * assigned TARGET_PAGE_SIZE
Q
qiaonuohan 已提交
1292
                 */
1293
                pd.flags = cpu_to_dump32(s, 0);
1294
                size_out = TARGET_PAGE_SIZE;
1295
                pd.size = cpu_to_dump32(s, size_out);
Q
qiaonuohan 已提交
1296

1297
                ret = write_cache(&page_data, buf, TARGET_PAGE_SIZE, false);
Q
qiaonuohan 已提交
1298
                if (ret < 0) {
1299
                    dump_error(s, "dump: failed to write page data", errp);
Q
qiaonuohan 已提交
1300 1301 1302 1303 1304
                    goto out;
                }
            }

            /* get and write page desc here */
1305 1306
            pd.page_flags = cpu_to_dump64(s, 0);
            pd.offset = cpu_to_dump64(s, offset_data);
Q
qiaonuohan 已提交
1307 1308 1309 1310
            offset_data += size_out;

            ret = write_cache(&page_desc, &pd, sizeof(PageDescriptor), false);
            if (ret < 0) {
1311
                dump_error(s, "dump: failed to write page desc", errp);
Q
qiaonuohan 已提交
1312 1313 1314 1315 1316 1317 1318
                goto out;
            }
        }
    }

    ret = write_cache(&page_desc, NULL, 0, true);
    if (ret < 0) {
1319
        dump_error(s, "dump: failed to sync cache for page_desc", errp);
Q
qiaonuohan 已提交
1320 1321 1322 1323
        goto out;
    }
    ret = write_cache(&page_data, NULL, 0, true);
    if (ret < 0) {
1324
        dump_error(s, "dump: failed to sync cache for page_data", errp);
Q
qiaonuohan 已提交
1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338
        goto out;
    }

out:
    free_data_cache(&page_desc);
    free_data_cache(&page_data);

#ifdef CONFIG_LZO
    g_free(wrkmem);
#endif

    g_free(buf_out);
}

1339
static void create_kdump_vmcore(DumpState *s, Error **errp)
1340 1341
{
    int ret;
1342
    Error *local_err = NULL;
1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367

    /*
     * the kdump-compressed format is:
     *                                               File offset
     *  +------------------------------------------+ 0x0
     *  |    main header (struct disk_dump_header) |
     *  |------------------------------------------+ block 1
     *  |    sub header (struct kdump_sub_header)  |
     *  |------------------------------------------+ block 2
     *  |            1st-dump_bitmap               |
     *  |------------------------------------------+ block 2 + X blocks
     *  |            2nd-dump_bitmap               | (aligned by block)
     *  |------------------------------------------+ block 2 + 2 * X blocks
     *  |  page desc for pfn 0 (struct page_desc)  | (aligned by block)
     *  |  page desc for pfn 1 (struct page_desc)  |
     *  |                    :                     |
     *  |------------------------------------------| (not aligned by block)
     *  |         page data (pfn 0)                |
     *  |         page data (pfn 1)                |
     *  |                    :                     |
     *  +------------------------------------------+
     */

    ret = write_start_flat_header(s->fd);
    if (ret < 0) {
1368
        dump_error(s, "dump: failed to write start flat header", errp);
1369
        return;
1370 1371
    }

1372 1373 1374 1375
    write_dump_header(s, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        return;
1376 1377
    }

1378 1379 1380 1381
    write_dump_bitmap(s, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        return;
1382 1383
    }

1384 1385 1386 1387
    write_dump_pages(s, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        return;
1388 1389 1390 1391
    }

    ret = write_end_flat_header(s->fd);
    if (ret < 0) {
1392
        dump_error(s, "dump: failed to write end flat header", errp);
1393
        return;
1394 1395 1396 1397 1398
    }

    dump_completed(s);
}

1399 1400
static ram_addr_t get_start_block(DumpState *s)
{
1401
    GuestPhysBlock *block;
1402 1403

    if (!s->has_filter) {
1404
        s->next_block = QTAILQ_FIRST(&s->guest_phys_blocks.head);
1405 1406 1407
        return 0;
    }

1408 1409 1410
    QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
        if (block->target_start >= s->begin + s->length ||
            block->target_end <= s->begin) {
1411 1412 1413 1414
            /* This block is out of the range */
            continue;
        }

1415 1416 1417
        s->next_block = block;
        if (s->begin > block->target_start) {
            s->start = s->begin - block->target_start;
1418 1419 1420 1421 1422 1423 1424 1425 1426
        } else {
            s->start = 0;
        }
        return s->start;
    }

    return -1;
}

1427 1428 1429 1430 1431
static void get_max_mapnr(DumpState *s)
{
    GuestPhysBlock *last_block;

    last_block = QTAILQ_LAST(&s->guest_phys_blocks.head, GuestPhysBlockHead);
1432
    s->max_mapnr = paddr_to_pfn(last_block->target_end);
1433 1434
}

1435 1436 1437
static void dump_init(DumpState *s, int fd, bool has_format,
                      DumpGuestMemoryFormat format, bool paging, bool has_filter,
                      int64_t begin, int64_t length, Error **errp)
1438
{
1439
    CPUState *cpu;
1440
    int nr_cpus;
1441
    Error *err = NULL;
1442 1443
    int ret;

1444 1445 1446 1447 1448
    /* kdump-compressed is conflict with paging and filter */
    if (has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) {
        assert(!paging && !has_filter);
    }

1449 1450 1451 1452 1453 1454 1455
    if (runstate_is_running()) {
        vm_stop(RUN_STATE_SAVE_VM);
        s->resume = true;
    } else {
        s->resume = false;
    }

1456 1457 1458 1459 1460
    /* If we use KVM, we should synchronize the registers before we get dump
     * info or physmap info.
     */
    cpu_synchronize_all_states();
    nr_cpus = 0;
A
Andreas Färber 已提交
1461
    CPU_FOREACH(cpu) {
1462 1463 1464
        nr_cpus++;
    }

1465 1466 1467 1468
    s->fd = fd;
    s->has_filter = has_filter;
    s->begin = begin;
    s->length = length;
1469

1470 1471
    memory_mapping_list_init(&s->list);

1472
    guest_phys_blocks_init(&s->guest_phys_blocks);
L
Laszlo Ersek 已提交
1473
    guest_phys_blocks_append(&s->guest_phys_blocks);
1474

1475 1476 1477 1478 1479 1480
    s->start = get_start_block(s);
    if (s->start == -1) {
        error_set(errp, QERR_INVALID_PARAMETER, "begin");
        goto cleanup;
    }

1481
    /* get dump info: endian, class and architecture.
1482 1483 1484
     * If the target architecture is not supported, cpu_get_dump_info() will
     * return -1.
     */
1485
    ret = cpu_get_dump_info(&s->dump_info, &s->guest_phys_blocks);
1486 1487 1488 1489 1490
    if (ret < 0) {
        error_set(errp, QERR_UNSUPPORTED);
        goto cleanup;
    }

1491 1492
    s->note_size = cpu_get_note_size(s->dump_info.d_class,
                                     s->dump_info.d_machine, nr_cpus);
1493
    if (s->note_size < 0) {
1494 1495 1496 1497
        error_set(errp, QERR_UNSUPPORTED);
        goto cleanup;
    }

1498 1499
    /* get memory mapping */
    if (paging) {
1500
        qemu_get_guest_memory_mapping(&s->list, &s->guest_phys_blocks, &err);
1501 1502 1503 1504
        if (err != NULL) {
            error_propagate(errp, err);
            goto cleanup;
        }
1505
    } else {
1506
        qemu_get_guest_simple_memory_mapping(&s->list, &s->guest_phys_blocks);
1507 1508
    }

1509 1510 1511 1512 1513
    s->nr_cpus = nr_cpus;

    get_max_mapnr(s);

    uint64_t tmp;
1514 1515
    tmp = DIV_ROUND_UP(DIV_ROUND_UP(s->max_mapnr, CHAR_BIT), TARGET_PAGE_SIZE);
    s->len_dump_bitmap = tmp * TARGET_PAGE_SIZE;
1516

1517 1518 1519 1520 1521 1522 1523 1524
    /* init for kdump-compressed format */
    if (has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) {
        switch (format) {
        case DUMP_GUEST_MEMORY_FORMAT_KDUMP_ZLIB:
            s->flag_compress = DUMP_DH_COMPRESSED_ZLIB;
            break;

        case DUMP_GUEST_MEMORY_FORMAT_KDUMP_LZO:
1525 1526 1527 1528 1529 1530
#ifdef CONFIG_LZO
            if (lzo_init() != LZO_E_OK) {
                error_setg(errp, "failed to initialize the LZO library");
                goto cleanup;
            }
#endif
1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541
            s->flag_compress = DUMP_DH_COMPRESSED_LZO;
            break;

        case DUMP_GUEST_MEMORY_FORMAT_KDUMP_SNAPPY:
            s->flag_compress = DUMP_DH_COMPRESSED_SNAPPY;
            break;

        default:
            s->flag_compress = 0;
        }

1542
        return;
1543 1544
    }

1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590
    if (s->has_filter) {
        memory_mapping_filter(&s->list, s->begin, s->length);
    }

    /*
     * calculate phdr_num
     *
     * the type of ehdr->e_phnum is uint16_t, so we should avoid overflow
     */
    s->phdr_num = 1; /* PT_NOTE */
    if (s->list.num < UINT16_MAX - 2) {
        s->phdr_num += s->list.num;
        s->have_section = false;
    } else {
        s->have_section = true;
        s->phdr_num = PN_XNUM;
        s->sh_info = 1; /* PT_NOTE */

        /* the type of shdr->sh_info is uint32_t, so we should avoid overflow */
        if (s->list.num <= UINT32_MAX - 1) {
            s->sh_info += s->list.num;
        } else {
            s->sh_info = UINT32_MAX;
        }
    }

    if (s->dump_info.d_class == ELFCLASS64) {
        if (s->have_section) {
            s->memory_offset = sizeof(Elf64_Ehdr) +
                               sizeof(Elf64_Phdr) * s->sh_info +
                               sizeof(Elf64_Shdr) + s->note_size;
        } else {
            s->memory_offset = sizeof(Elf64_Ehdr) +
                               sizeof(Elf64_Phdr) * s->phdr_num + s->note_size;
        }
    } else {
        if (s->have_section) {
            s->memory_offset = sizeof(Elf32_Ehdr) +
                               sizeof(Elf32_Phdr) * s->sh_info +
                               sizeof(Elf32_Shdr) + s->note_size;
        } else {
            s->memory_offset = sizeof(Elf32_Ehdr) +
                               sizeof(Elf32_Phdr) * s->phdr_num + s->note_size;
        }
    }

1591
    return;
1592 1593

cleanup:
1594
    dump_cleanup(s);
1595 1596 1597
}

void qmp_dump_guest_memory(bool paging, const char *file, bool has_begin,
1598 1599 1600
                           int64_t begin, bool has_length,
                           int64_t length, bool has_format,
                           DumpGuestMemoryFormat format, Error **errp)
1601 1602 1603 1604
{
    const char *p;
    int fd = -1;
    DumpState *s;
1605
    Error *local_err = NULL;
1606

1607 1608 1609 1610 1611 1612 1613 1614 1615 1616
    /*
     * kdump-compressed format need the whole memory dumped, so paging or
     * filter is not supported here.
     */
    if ((has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) &&
        (paging || has_begin || has_length)) {
        error_setg(errp, "kdump-compressed format doesn't support paging or "
                         "filter");
        return;
    }
1617 1618 1619 1620 1621 1622 1623 1624 1625
    if (has_begin && !has_length) {
        error_set(errp, QERR_MISSING_PARAMETER, "length");
        return;
    }
    if (!has_begin && has_length) {
        error_set(errp, QERR_MISSING_PARAMETER, "begin");
        return;
    }

1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640
    /* check whether lzo/snappy is supported */
#ifndef CONFIG_LZO
    if (has_format && format == DUMP_GUEST_MEMORY_FORMAT_KDUMP_LZO) {
        error_setg(errp, "kdump-lzo is not available now");
        return;
    }
#endif

#ifndef CONFIG_SNAPPY
    if (has_format && format == DUMP_GUEST_MEMORY_FORMAT_KDUMP_SNAPPY) {
        error_setg(errp, "kdump-snappy is not available now");
        return;
    }
#endif

1641 1642
#if !defined(WIN32)
    if (strstart(file, "fd:", &p)) {
1643
        fd = monitor_get_fd(cur_mon, p, errp);
1644 1645 1646 1647 1648 1649 1650 1651 1652
        if (fd == -1) {
            return;
        }
    }
#endif

    if  (strstart(file, "file:", &p)) {
        fd = qemu_open(p, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR);
        if (fd < 0) {
1653
            error_setg_file_open(errp, errno, p);
1654 1655 1656 1657 1658 1659 1660 1661 1662
            return;
        }
    }

    if (fd == -1) {
        error_set(errp, QERR_INVALID_PARAMETER, "protocol");
        return;
    }

1663
    s = g_malloc0(sizeof(DumpState));
1664

1665 1666 1667
    dump_init(s, fd, has_format, format, paging, has_begin,
              begin, length, &local_err);
    if (local_err) {
1668
        g_free(s);
1669
        error_propagate(errp, local_err);
1670 1671 1672
        return;
    }

1673
    if (has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) {
1674
        create_kdump_vmcore(s, errp);
1675
    } else {
1676
        create_vmcore(s, errp);
1677 1678 1679 1680
    }

    g_free(s);
}
1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713

DumpGuestMemoryCapability *qmp_query_dump_guest_memory_capability(Error **errp)
{
    DumpGuestMemoryFormatList *item;
    DumpGuestMemoryCapability *cap =
                                  g_malloc0(sizeof(DumpGuestMemoryCapability));

    /* elf is always available */
    item = g_malloc0(sizeof(DumpGuestMemoryFormatList));
    cap->formats = item;
    item->value = DUMP_GUEST_MEMORY_FORMAT_ELF;

    /* kdump-zlib is always available */
    item->next = g_malloc0(sizeof(DumpGuestMemoryFormatList));
    item = item->next;
    item->value = DUMP_GUEST_MEMORY_FORMAT_KDUMP_ZLIB;

    /* add new item if kdump-lzo is available */
#ifdef CONFIG_LZO
    item->next = g_malloc0(sizeof(DumpGuestMemoryFormatList));
    item = item->next;
    item->value = DUMP_GUEST_MEMORY_FORMAT_KDUMP_LZO;
#endif

    /* add new item if kdump-snappy is available */
#ifdef CONFIG_SNAPPY
    item->next = g_malloc0(sizeof(DumpGuestMemoryFormatList));
    item = item->next;
    item->value = DUMP_GUEST_MEMORY_FORMAT_KDUMP_SNAPPY;
#endif

    return cap;
}