dump.c 50.1 KB
Newer Older
1 2 3 4 5 6 7 8
/*
 * QEMU dump
 *
 * Copyright Fujitsu, Corp. 2011, 2012
 *
 * Authors:
 *     Wen Congyang <wency@cn.fujitsu.com>
 *
9 10
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
 * See the COPYING file in the top-level directory.
11 12 13
 *
 */

P
Peter Maydell 已提交
14
#include "qemu/osdep.h"
15 16 17
#include "qemu-common.h"
#include "elf.h"
#include "cpu.h"
18 19
#include "exec/cpu-all.h"
#include "exec/hwaddr.h"
20
#include "monitor/monitor.h"
21 22 23 24
#include "sysemu/kvm.h"
#include "sysemu/dump.h"
#include "sysemu/sysemu.h"
#include "sysemu/memory_mapping.h"
25
#include "sysemu/cpus.h"
26
#include "qapi/qmp/qerror.h"
27 28
#include "qmp-commands.h"

Q
qiaonuohan 已提交
29 30 31 32 33 34 35
#include <zlib.h>
#ifdef CONFIG_LZO
#include <lzo/lzo1x.h>
#endif
#ifdef CONFIG_SNAPPY
#include <snappy-c.h>
#endif
36 37 38
#ifndef ELF_MACHINE_UNAME
#define ELF_MACHINE_UNAME "Unknown"
#endif
Q
qiaonuohan 已提交
39

40
uint16_t cpu_to_dump16(DumpState *s, uint16_t val)
41
{
42
    if (s->dump_info.d_endian == ELFDATA2LSB) {
43 44 45 46 47 48 49 50
        val = cpu_to_le16(val);
    } else {
        val = cpu_to_be16(val);
    }

    return val;
}

51
uint32_t cpu_to_dump32(DumpState *s, uint32_t val)
52
{
53
    if (s->dump_info.d_endian == ELFDATA2LSB) {
54 55 56 57 58 59 60 61
        val = cpu_to_le32(val);
    } else {
        val = cpu_to_be32(val);
    }

    return val;
}

62
uint64_t cpu_to_dump64(DumpState *s, uint64_t val)
63
{
64
    if (s->dump_info.d_endian == ELFDATA2LSB) {
65 66 67 68 69 70 71 72 73 74
        val = cpu_to_le64(val);
    } else {
        val = cpu_to_be64(val);
    }

    return val;
}

static int dump_cleanup(DumpState *s)
{
75
    guest_phys_blocks_free(&s->guest_phys_blocks);
76
    memory_mapping_list_free(&s->list);
77
    close(s->fd);
78 79 80 81
    if (s->resume) {
        vm_start();
    }

82
    return 0;
83 84
}

85
static void dump_error(DumpState *s, const char *reason, Error **errp)
86 87
{
    dump_cleanup(s);
88
    error_setg(errp, "%s", reason);
89 90
}

91
static int fd_write_vmcore(const void *buf, size_t size, void *opaque)
92 93
{
    DumpState *s = opaque;
94 95 96 97 98
    size_t written_size;

    written_size = qemu_write_full(s->fd, buf, size);
    if (written_size != size) {
        return -1;
99 100 101 102 103
    }

    return 0;
}

104
static void write_elf64_header(DumpState *s, Error **errp)
105 106 107 108 109 110 111 112 113
{
    Elf64_Ehdr elf_header;
    int ret;

    memset(&elf_header, 0, sizeof(Elf64_Ehdr));
    memcpy(&elf_header, ELFMAG, SELFMAG);
    elf_header.e_ident[EI_CLASS] = ELFCLASS64;
    elf_header.e_ident[EI_DATA] = s->dump_info.d_endian;
    elf_header.e_ident[EI_VERSION] = EV_CURRENT;
114 115 116 117 118 119 120
    elf_header.e_type = cpu_to_dump16(s, ET_CORE);
    elf_header.e_machine = cpu_to_dump16(s, s->dump_info.d_machine);
    elf_header.e_version = cpu_to_dump32(s, EV_CURRENT);
    elf_header.e_ehsize = cpu_to_dump16(s, sizeof(elf_header));
    elf_header.e_phoff = cpu_to_dump64(s, sizeof(Elf64_Ehdr));
    elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf64_Phdr));
    elf_header.e_phnum = cpu_to_dump16(s, s->phdr_num);
121 122 123
    if (s->have_section) {
        uint64_t shoff = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr) * s->sh_info;

124 125 126
        elf_header.e_shoff = cpu_to_dump64(s, shoff);
        elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr));
        elf_header.e_shnum = cpu_to_dump16(s, 1);
127 128 129 130
    }

    ret = fd_write_vmcore(&elf_header, sizeof(elf_header), s);
    if (ret < 0) {
131
        dump_error(s, "dump: failed to write elf header", errp);
132 133 134
    }
}

135
static void write_elf32_header(DumpState *s, Error **errp)
136 137 138 139 140 141 142
{
    Elf32_Ehdr elf_header;
    int ret;

    memset(&elf_header, 0, sizeof(Elf32_Ehdr));
    memcpy(&elf_header, ELFMAG, SELFMAG);
    elf_header.e_ident[EI_CLASS] = ELFCLASS32;
143
    elf_header.e_ident[EI_DATA] = s->dump_info.d_endian;
144
    elf_header.e_ident[EI_VERSION] = EV_CURRENT;
145 146 147 148 149 150 151
    elf_header.e_type = cpu_to_dump16(s, ET_CORE);
    elf_header.e_machine = cpu_to_dump16(s, s->dump_info.d_machine);
    elf_header.e_version = cpu_to_dump32(s, EV_CURRENT);
    elf_header.e_ehsize = cpu_to_dump16(s, sizeof(elf_header));
    elf_header.e_phoff = cpu_to_dump32(s, sizeof(Elf32_Ehdr));
    elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf32_Phdr));
    elf_header.e_phnum = cpu_to_dump16(s, s->phdr_num);
152 153 154
    if (s->have_section) {
        uint32_t shoff = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr) * s->sh_info;

155 156 157
        elf_header.e_shoff = cpu_to_dump32(s, shoff);
        elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr));
        elf_header.e_shnum = cpu_to_dump16(s, 1);
158 159 160 161
    }

    ret = fd_write_vmcore(&elf_header, sizeof(elf_header), s);
    if (ret < 0) {
162
        dump_error(s, "dump: failed to write elf header", errp);
163 164 165
    }
}

166 167 168
static void write_elf64_load(DumpState *s, MemoryMapping *memory_mapping,
                             int phdr_index, hwaddr offset,
                             hwaddr filesz, Error **errp)
169 170 171 172 173
{
    Elf64_Phdr phdr;
    int ret;

    memset(&phdr, 0, sizeof(Elf64_Phdr));
174 175 176 177 178 179
    phdr.p_type = cpu_to_dump32(s, PT_LOAD);
    phdr.p_offset = cpu_to_dump64(s, offset);
    phdr.p_paddr = cpu_to_dump64(s, memory_mapping->phys_addr);
    phdr.p_filesz = cpu_to_dump64(s, filesz);
    phdr.p_memsz = cpu_to_dump64(s, memory_mapping->length);
    phdr.p_vaddr = cpu_to_dump64(s, memory_mapping->virt_addr);
180

181 182
    assert(memory_mapping->length >= filesz);

183 184
    ret = fd_write_vmcore(&phdr, sizeof(Elf64_Phdr), s);
    if (ret < 0) {
185
        dump_error(s, "dump: failed to write program header table", errp);
186 187 188
    }
}

189 190 191
static void write_elf32_load(DumpState *s, MemoryMapping *memory_mapping,
                             int phdr_index, hwaddr offset,
                             hwaddr filesz, Error **errp)
192 193 194 195 196
{
    Elf32_Phdr phdr;
    int ret;

    memset(&phdr, 0, sizeof(Elf32_Phdr));
197 198 199 200 201 202
    phdr.p_type = cpu_to_dump32(s, PT_LOAD);
    phdr.p_offset = cpu_to_dump32(s, offset);
    phdr.p_paddr = cpu_to_dump32(s, memory_mapping->phys_addr);
    phdr.p_filesz = cpu_to_dump32(s, filesz);
    phdr.p_memsz = cpu_to_dump32(s, memory_mapping->length);
    phdr.p_vaddr = cpu_to_dump32(s, memory_mapping->virt_addr);
203

204 205
    assert(memory_mapping->length >= filesz);

206 207
    ret = fd_write_vmcore(&phdr, sizeof(Elf32_Phdr), s);
    if (ret < 0) {
208
        dump_error(s, "dump: failed to write program header table", errp);
209 210 211
    }
}

212
static void write_elf64_note(DumpState *s, Error **errp)
213 214
{
    Elf64_Phdr phdr;
A
Avi Kivity 已提交
215
    hwaddr begin = s->memory_offset - s->note_size;
216 217 218
    int ret;

    memset(&phdr, 0, sizeof(Elf64_Phdr));
219 220
    phdr.p_type = cpu_to_dump32(s, PT_NOTE);
    phdr.p_offset = cpu_to_dump64(s, begin);
221
    phdr.p_paddr = 0;
222 223
    phdr.p_filesz = cpu_to_dump64(s, s->note_size);
    phdr.p_memsz = cpu_to_dump64(s, s->note_size);
224 225 226 227
    phdr.p_vaddr = 0;

    ret = fd_write_vmcore(&phdr, sizeof(Elf64_Phdr), s);
    if (ret < 0) {
228
        dump_error(s, "dump: failed to write program header table", errp);
229 230 231
    }
}

232 233 234 235 236
static inline int cpu_index(CPUState *cpu)
{
    return cpu->cpu_index + 1;
}

237 238
static void write_elf64_notes(WriteCoreDumpFunction f, DumpState *s,
                              Error **errp)
239
{
240
    CPUState *cpu;
241 242 243
    int ret;
    int id;

A
Andreas Färber 已提交
244
    CPU_FOREACH(cpu) {
245
        id = cpu_index(cpu);
246
        ret = cpu_write_elf64_note(f, cpu, id, s);
247
        if (ret < 0) {
248
            dump_error(s, "dump: failed to write elf notes", errp);
249
            return;
250 251 252
        }
    }

A
Andreas Färber 已提交
253
    CPU_FOREACH(cpu) {
254
        ret = cpu_write_elf64_qemunote(f, cpu, s);
255
        if (ret < 0) {
256
            dump_error(s, "dump: failed to write CPU status", errp);
257
            return;
258 259 260 261
        }
    }
}

262
static void write_elf32_note(DumpState *s, Error **errp)
263
{
A
Avi Kivity 已提交
264
    hwaddr begin = s->memory_offset - s->note_size;
265 266 267 268
    Elf32_Phdr phdr;
    int ret;

    memset(&phdr, 0, sizeof(Elf32_Phdr));
269 270
    phdr.p_type = cpu_to_dump32(s, PT_NOTE);
    phdr.p_offset = cpu_to_dump32(s, begin);
271
    phdr.p_paddr = 0;
272 273
    phdr.p_filesz = cpu_to_dump32(s, s->note_size);
    phdr.p_memsz = cpu_to_dump32(s, s->note_size);
274 275 276 277
    phdr.p_vaddr = 0;

    ret = fd_write_vmcore(&phdr, sizeof(Elf32_Phdr), s);
    if (ret < 0) {
278
        dump_error(s, "dump: failed to write program header table", errp);
279 280 281
    }
}

282 283
static void write_elf32_notes(WriteCoreDumpFunction f, DumpState *s,
                              Error **errp)
284
{
285
    CPUState *cpu;
286 287 288
    int ret;
    int id;

A
Andreas Färber 已提交
289
    CPU_FOREACH(cpu) {
290
        id = cpu_index(cpu);
291
        ret = cpu_write_elf32_note(f, cpu, id, s);
292
        if (ret < 0) {
293
            dump_error(s, "dump: failed to write elf notes", errp);
294
            return;
295 296 297
        }
    }

A
Andreas Färber 已提交
298
    CPU_FOREACH(cpu) {
299
        ret = cpu_write_elf32_qemunote(f, cpu, s);
300
        if (ret < 0) {
301
            dump_error(s, "dump: failed to write CPU status", errp);
302
            return;
303 304 305 306
        }
    }
}

307
static void write_elf_section(DumpState *s, int type, Error **errp)
308 309 310 311 312 313 314 315 316 317
{
    Elf32_Shdr shdr32;
    Elf64_Shdr shdr64;
    int shdr_size;
    void *shdr;
    int ret;

    if (type == 0) {
        shdr_size = sizeof(Elf32_Shdr);
        memset(&shdr32, 0, shdr_size);
318
        shdr32.sh_info = cpu_to_dump32(s, s->sh_info);
319 320 321 322
        shdr = &shdr32;
    } else {
        shdr_size = sizeof(Elf64_Shdr);
        memset(&shdr64, 0, shdr_size);
323
        shdr64.sh_info = cpu_to_dump32(s, s->sh_info);
324 325 326 327 328
        shdr = &shdr64;
    }

    ret = fd_write_vmcore(&shdr, shdr_size, s);
    if (ret < 0) {
329
        dump_error(s, "dump: failed to write section header table", errp);
330 331 332
    }
}

333
static void write_data(DumpState *s, void *buf, int length, Error **errp)
334 335 336 337 338
{
    int ret;

    ret = fd_write_vmcore(buf, length, s);
    if (ret < 0) {
339
        dump_error(s, "dump: failed to save memory", errp);
340 341 342
    }
}

343 344 345
/* write the memory to vmcore. 1 page per I/O. */
static void write_memory(DumpState *s, GuestPhysBlock *block, ram_addr_t start,
                         int64_t size, Error **errp)
346 347
{
    int64_t i;
348
    Error *local_err = NULL;
349

350 351 352
    for (i = 0; i < size / s->dump_info.page_size; i++) {
        write_data(s, block->host_addr + start + i * s->dump_info.page_size,
                   s->dump_info.page_size, &local_err);
353 354 355
        if (local_err) {
            error_propagate(errp, local_err);
            return;
356 357 358
        }
    }

359 360 361
    if ((size % s->dump_info.page_size) != 0) {
        write_data(s, block->host_addr + start + i * s->dump_info.page_size,
                   size % s->dump_info.page_size, &local_err);
362 363 364
        if (local_err) {
            error_propagate(errp, local_err);
            return;
365 366 367 368
        }
    }
}

369 370 371 372 373 374
/* get the memory's offset and size in the vmcore */
static void get_offset_range(hwaddr phys_addr,
                             ram_addr_t mapping_length,
                             DumpState *s,
                             hwaddr *p_offset,
                             hwaddr *p_filesz)
375
{
376
    GuestPhysBlock *block;
A
Avi Kivity 已提交
377
    hwaddr offset = s->memory_offset;
378 379
    int64_t size_in_block, start;

380 381 382 383
    /* When the memory is not stored into vmcore, offset will be -1 */
    *p_offset = -1;
    *p_filesz = 0;

384 385
    if (s->has_filter) {
        if (phys_addr < s->begin || phys_addr >= s->begin + s->length) {
386
            return;
387 388 389
        }
    }

390
    QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
391
        if (s->has_filter) {
392 393
            if (block->target_start >= s->begin + s->length ||
                block->target_end <= s->begin) {
394 395 396 397
                /* This block is out of the range */
                continue;
            }

398 399
            if (s->begin <= block->target_start) {
                start = block->target_start;
400 401 402 403
            } else {
                start = s->begin;
            }

404 405 406
            size_in_block = block->target_end - start;
            if (s->begin + s->length < block->target_end) {
                size_in_block -= block->target_end - (s->begin + s->length);
407 408
            }
        } else {
409 410
            start = block->target_start;
            size_in_block = block->target_end - block->target_start;
411 412 413
        }

        if (phys_addr >= start && phys_addr < start + size_in_block) {
414 415 416
            *p_offset = phys_addr - start + offset;

            /* The offset range mapped from the vmcore file must not spill over
417
             * the GuestPhysBlock, clamp it. The rest of the mapping will be
418 419 420 421 422 423 424
             * zero-filled in memory at load time; see
             * <http://refspecs.linuxbase.org/elf/gabi4+/ch5.pheader.html>.
             */
            *p_filesz = phys_addr + mapping_length <= start + size_in_block ?
                        mapping_length :
                        size_in_block - (phys_addr - start);
            return;
425 426 427 428 429 430
        }

        offset += size_in_block;
    }
}

431
static void write_elf_loads(DumpState *s, Error **errp)
432
{
433
    hwaddr offset, filesz;
434 435 436
    MemoryMapping *memory_mapping;
    uint32_t phdr_index = 1;
    uint32_t max_index;
437
    Error *local_err = NULL;
438 439 440 441 442 443 444 445

    if (s->have_section) {
        max_index = s->sh_info;
    } else {
        max_index = s->phdr_num;
    }

    QTAILQ_FOREACH(memory_mapping, &s->list.head, next) {
446 447 448
        get_offset_range(memory_mapping->phys_addr,
                         memory_mapping->length,
                         s, &offset, &filesz);
449
        if (s->dump_info.d_class == ELFCLASS64) {
450 451
            write_elf64_load(s, memory_mapping, phdr_index++, offset,
                             filesz, &local_err);
452
        } else {
453 454
            write_elf32_load(s, memory_mapping, phdr_index++, offset,
                             filesz, &local_err);
455 456
        }

457 458 459
        if (local_err) {
            error_propagate(errp, local_err);
            return;
460 461 462 463 464 465 466 467 468
        }

        if (phdr_index >= max_index) {
            break;
        }
    }
}

/* write elf header, PT_NOTE and elf note to vmcore. */
469
static void dump_begin(DumpState *s, Error **errp)
470
{
471
    Error *local_err = NULL;
472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498

    /*
     * the vmcore's format is:
     *   --------------
     *   |  elf header |
     *   --------------
     *   |  PT_NOTE    |
     *   --------------
     *   |  PT_LOAD    |
     *   --------------
     *   |  ......     |
     *   --------------
     *   |  PT_LOAD    |
     *   --------------
     *   |  sec_hdr    |
     *   --------------
     *   |  elf note   |
     *   --------------
     *   |  memory     |
     *   --------------
     *
     * we only know where the memory is saved after we write elf note into
     * vmcore.
     */

    /* write elf header to vmcore */
    if (s->dump_info.d_class == ELFCLASS64) {
499
        write_elf64_header(s, &local_err);
500
    } else {
501
        write_elf32_header(s, &local_err);
502
    }
503 504 505
    if (local_err) {
        error_propagate(errp, local_err);
        return;
506 507 508 509
    }

    if (s->dump_info.d_class == ELFCLASS64) {
        /* write PT_NOTE to vmcore */
510 511 512 513
        write_elf64_note(s, &local_err);
        if (local_err) {
            error_propagate(errp, local_err);
            return;
514 515 516
        }

        /* write all PT_LOAD to vmcore */
517 518 519 520
        write_elf_loads(s, &local_err);
        if (local_err) {
            error_propagate(errp, local_err);
            return;
521 522 523 524
        }

        /* write section to vmcore */
        if (s->have_section) {
525 526 527 528
            write_elf_section(s, 1, &local_err);
            if (local_err) {
                error_propagate(errp, local_err);
                return;
529 530 531 532
            }
        }

        /* write notes to vmcore */
533 534 535 536
        write_elf64_notes(fd_write_vmcore, s, &local_err);
        if (local_err) {
            error_propagate(errp, local_err);
            return;
537 538 539
        }
    } else {
        /* write PT_NOTE to vmcore */
540 541 542 543
        write_elf32_note(s, &local_err);
        if (local_err) {
            error_propagate(errp, local_err);
            return;
544 545 546
        }

        /* write all PT_LOAD to vmcore */
547 548 549 550
        write_elf_loads(s, &local_err);
        if (local_err) {
            error_propagate(errp, local_err);
            return;
551 552 553 554
        }

        /* write section to vmcore */
        if (s->have_section) {
555 556 557 558
            write_elf_section(s, 0, &local_err);
            if (local_err) {
                error_propagate(errp, local_err);
                return;
559 560 561 562
            }
        }

        /* write notes to vmcore */
563 564 565 566
        write_elf32_notes(fd_write_vmcore, s, &local_err);
        if (local_err) {
            error_propagate(errp, local_err);
            return;
567 568 569 570
        }
    }
}

571
static void dump_completed(DumpState *s)
572 573 574 575
{
    dump_cleanup(s);
}

576
static int get_next_block(DumpState *s, GuestPhysBlock *block)
577 578
{
    while (1) {
P
Paolo Bonzini 已提交
579
        block = QTAILQ_NEXT(block, next);
580 581 582 583 584 585
        if (!block) {
            /* no more block */
            return 1;
        }

        s->start = 0;
586
        s->next_block = block;
587
        if (s->has_filter) {
588 589
            if (block->target_start >= s->begin + s->length ||
                block->target_end <= s->begin) {
590 591 592 593
                /* This block is out of the range */
                continue;
            }

594 595
            if (s->begin > block->target_start) {
                s->start = s->begin - block->target_start;
596 597 598 599 600 601 602 603
            }
        }

        return 0;
    }
}

/* write all memory to vmcore */
604
static void dump_iterate(DumpState *s, Error **errp)
605
{
606
    GuestPhysBlock *block;
607
    int64_t size;
608
    Error *local_err = NULL;
609

610
    do {
611
        block = s->next_block;
612

613
        size = block->target_end - block->target_start;
614 615
        if (s->has_filter) {
            size -= s->start;
616 617
            if (s->begin + s->length < block->target_end) {
                size -= block->target_end - (s->begin + s->length);
618 619
            }
        }
620 621 622 623
        write_memory(s, block, s->start, size, &local_err);
        if (local_err) {
            error_propagate(errp, local_err);
            return;
624 625
        }

626 627 628
    } while (!get_next_block(s, block));

    dump_completed(s);
629 630
}

631
static void create_vmcore(DumpState *s, Error **errp)
632
{
633
    Error *local_err = NULL;
634

635 636 637 638
    dump_begin(s, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        return;
639 640
    }

641
    dump_iterate(s, errp);
642 643
}

644 645
static int write_start_flat_header(int fd)
{
646
    MakedumpfileHeader *mh;
647 648
    int ret = 0;

649 650
    QEMU_BUILD_BUG_ON(sizeof *mh > MAX_SIZE_MDF_HEADER);
    mh = g_malloc0(MAX_SIZE_MDF_HEADER);
651

652 653
    memcpy(mh->signature, MAKEDUMPFILE_SIGNATURE,
           MIN(sizeof mh->signature, sizeof MAKEDUMPFILE_SIGNATURE));
654

655 656
    mh->type = cpu_to_be64(TYPE_FLAT_HEADER);
    mh->version = cpu_to_be64(VERSION_FLAT_HEADER);
657 658

    size_t written_size;
659
    written_size = qemu_write_full(fd, mh, MAX_SIZE_MDF_HEADER);
660 661 662 663
    if (written_size != MAX_SIZE_MDF_HEADER) {
        ret = -1;
    }

664
    g_free(mh);
665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683
    return ret;
}

static int write_end_flat_header(int fd)
{
    MakedumpfileDataHeader mdh;

    mdh.offset = END_FLAG_FLAT_HEADER;
    mdh.buf_size = END_FLAG_FLAT_HEADER;

    size_t written_size;
    written_size = qemu_write_full(fd, &mdh, sizeof(mdh));
    if (written_size != sizeof(mdh)) {
        return -1;
    }

    return 0;
}

Q
qiaonuohan 已提交
684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704
static int write_buffer(int fd, off_t offset, const void *buf, size_t size)
{
    size_t written_size;
    MakedumpfileDataHeader mdh;

    mdh.offset = cpu_to_be64(offset);
    mdh.buf_size = cpu_to_be64(size);

    written_size = qemu_write_full(fd, &mdh, sizeof(mdh));
    if (written_size != sizeof(mdh)) {
        return -1;
    }

    written_size = qemu_write_full(fd, buf, size);
    if (written_size != size) {
        return -1;
    }

    return 0;
}

705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720
static int buf_write_note(const void *buf, size_t size, void *opaque)
{
    DumpState *s = opaque;

    /* note_buf is not enough */
    if (s->note_buf_offset + size > s->note_size) {
        return -1;
    }

    memcpy(s->note_buf + s->note_buf_offset, buf, size);

    s->note_buf_offset += size;

    return 0;
}

Q
qiaonuohan 已提交
721
/* write common header, sub header and elf note to vmcore */
722
static void create_header32(DumpState *s, Error **errp)
Q
qiaonuohan 已提交
723 724 725 726 727 728 729 730 731
{
    DiskDumpHeader32 *dh = NULL;
    KdumpSubHeader32 *kh = NULL;
    size_t size;
    uint32_t block_size;
    uint32_t sub_hdr_size;
    uint32_t bitmap_blocks;
    uint32_t status = 0;
    uint64_t offset_note;
732
    Error *local_err = NULL;
Q
qiaonuohan 已提交
733 734 735 736 737 738

    /* write common header, the version of kdump-compressed format is 6th */
    size = sizeof(DiskDumpHeader32);
    dh = g_malloc0(size);

    strncpy(dh->signature, KDUMP_SIGNATURE, strlen(KDUMP_SIGNATURE));
739
    dh->header_version = cpu_to_dump32(s, 6);
740
    block_size = s->dump_info.page_size;
741
    dh->block_size = cpu_to_dump32(s, block_size);
Q
qiaonuohan 已提交
742 743
    sub_hdr_size = sizeof(struct KdumpSubHeader32) + s->note_size;
    sub_hdr_size = DIV_ROUND_UP(sub_hdr_size, block_size);
744
    dh->sub_hdr_size = cpu_to_dump32(s, sub_hdr_size);
Q
qiaonuohan 已提交
745
    /* dh->max_mapnr may be truncated, full 64bit is in kh.max_mapnr_64 */
746 747
    dh->max_mapnr = cpu_to_dump32(s, MIN(s->max_mapnr, UINT_MAX));
    dh->nr_cpus = cpu_to_dump32(s, s->nr_cpus);
Q
qiaonuohan 已提交
748
    bitmap_blocks = DIV_ROUND_UP(s->len_dump_bitmap, block_size) * 2;
749
    dh->bitmap_blocks = cpu_to_dump32(s, bitmap_blocks);
750
    strncpy(dh->utsname.machine, ELF_MACHINE_UNAME, sizeof(dh->utsname.machine));
Q
qiaonuohan 已提交
751 752 753 754 755 756 757 758 759 760 761 762 763 764

    if (s->flag_compress & DUMP_DH_COMPRESSED_ZLIB) {
        status |= DUMP_DH_COMPRESSED_ZLIB;
    }
#ifdef CONFIG_LZO
    if (s->flag_compress & DUMP_DH_COMPRESSED_LZO) {
        status |= DUMP_DH_COMPRESSED_LZO;
    }
#endif
#ifdef CONFIG_SNAPPY
    if (s->flag_compress & DUMP_DH_COMPRESSED_SNAPPY) {
        status |= DUMP_DH_COMPRESSED_SNAPPY;
    }
#endif
765
    dh->status = cpu_to_dump32(s, status);
Q
qiaonuohan 已提交
766 767

    if (write_buffer(s->fd, 0, dh, size) < 0) {
768
        dump_error(s, "dump: failed to write disk dump header", errp);
Q
qiaonuohan 已提交
769 770 771 772 773 774 775 776
        goto out;
    }

    /* write sub header */
    size = sizeof(KdumpSubHeader32);
    kh = g_malloc0(size);

    /* 64bit max_mapnr_64 */
777
    kh->max_mapnr_64 = cpu_to_dump64(s, s->max_mapnr);
778
    kh->phys_base = cpu_to_dump32(s, s->dump_info.phys_base);
779
    kh->dump_level = cpu_to_dump32(s, DUMP_LEVEL);
Q
qiaonuohan 已提交
780 781

    offset_note = DISKDUMP_HEADER_BLOCKS * block_size + size;
782 783
    kh->offset_note = cpu_to_dump64(s, offset_note);
    kh->note_size = cpu_to_dump32(s, s->note_size);
Q
qiaonuohan 已提交
784 785 786

    if (write_buffer(s->fd, DISKDUMP_HEADER_BLOCKS *
                     block_size, kh, size) < 0) {
787
        dump_error(s, "dump: failed to write kdump sub header", errp);
Q
qiaonuohan 已提交
788 789 790 791 792 793 794 795
        goto out;
    }

    /* write note */
    s->note_buf = g_malloc0(s->note_size);
    s->note_buf_offset = 0;

    /* use s->note_buf to store notes temporarily */
796 797 798
    write_elf32_notes(buf_write_note, s, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
Q
qiaonuohan 已提交
799 800 801 802
        goto out;
    }
    if (write_buffer(s->fd, offset_note, s->note_buf,
                     s->note_size) < 0) {
803
        dump_error(s, "dump: failed to write notes", errp);
Q
qiaonuohan 已提交
804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821
        goto out;
    }

    /* get offset of dump_bitmap */
    s->offset_dump_bitmap = (DISKDUMP_HEADER_BLOCKS + sub_hdr_size) *
                             block_size;

    /* get offset of page */
    s->offset_page = (DISKDUMP_HEADER_BLOCKS + sub_hdr_size + bitmap_blocks) *
                     block_size;

out:
    g_free(dh);
    g_free(kh);
    g_free(s->note_buf);
}

/* write common header, sub header and elf note to vmcore */
822
static void create_header64(DumpState *s, Error **errp)
Q
qiaonuohan 已提交
823 824 825 826 827 828 829 830 831
{
    DiskDumpHeader64 *dh = NULL;
    KdumpSubHeader64 *kh = NULL;
    size_t size;
    uint32_t block_size;
    uint32_t sub_hdr_size;
    uint32_t bitmap_blocks;
    uint32_t status = 0;
    uint64_t offset_note;
832
    Error *local_err = NULL;
Q
qiaonuohan 已提交
833 834 835 836 837 838

    /* write common header, the version of kdump-compressed format is 6th */
    size = sizeof(DiskDumpHeader64);
    dh = g_malloc0(size);

    strncpy(dh->signature, KDUMP_SIGNATURE, strlen(KDUMP_SIGNATURE));
839
    dh->header_version = cpu_to_dump32(s, 6);
840
    block_size = s->dump_info.page_size;
841
    dh->block_size = cpu_to_dump32(s, block_size);
Q
qiaonuohan 已提交
842 843
    sub_hdr_size = sizeof(struct KdumpSubHeader64) + s->note_size;
    sub_hdr_size = DIV_ROUND_UP(sub_hdr_size, block_size);
844
    dh->sub_hdr_size = cpu_to_dump32(s, sub_hdr_size);
Q
qiaonuohan 已提交
845
    /* dh->max_mapnr may be truncated, full 64bit is in kh.max_mapnr_64 */
846 847
    dh->max_mapnr = cpu_to_dump32(s, MIN(s->max_mapnr, UINT_MAX));
    dh->nr_cpus = cpu_to_dump32(s, s->nr_cpus);
Q
qiaonuohan 已提交
848
    bitmap_blocks = DIV_ROUND_UP(s->len_dump_bitmap, block_size) * 2;
849
    dh->bitmap_blocks = cpu_to_dump32(s, bitmap_blocks);
850
    strncpy(dh->utsname.machine, ELF_MACHINE_UNAME, sizeof(dh->utsname.machine));
Q
qiaonuohan 已提交
851 852 853 854 855 856 857 858 859 860 861 862 863 864

    if (s->flag_compress & DUMP_DH_COMPRESSED_ZLIB) {
        status |= DUMP_DH_COMPRESSED_ZLIB;
    }
#ifdef CONFIG_LZO
    if (s->flag_compress & DUMP_DH_COMPRESSED_LZO) {
        status |= DUMP_DH_COMPRESSED_LZO;
    }
#endif
#ifdef CONFIG_SNAPPY
    if (s->flag_compress & DUMP_DH_COMPRESSED_SNAPPY) {
        status |= DUMP_DH_COMPRESSED_SNAPPY;
    }
#endif
865
    dh->status = cpu_to_dump32(s, status);
Q
qiaonuohan 已提交
866 867

    if (write_buffer(s->fd, 0, dh, size) < 0) {
868
        dump_error(s, "dump: failed to write disk dump header", errp);
Q
qiaonuohan 已提交
869 870 871 872 873 874 875 876
        goto out;
    }

    /* write sub header */
    size = sizeof(KdumpSubHeader64);
    kh = g_malloc0(size);

    /* 64bit max_mapnr_64 */
877
    kh->max_mapnr_64 = cpu_to_dump64(s, s->max_mapnr);
878
    kh->phys_base = cpu_to_dump64(s, s->dump_info.phys_base);
879
    kh->dump_level = cpu_to_dump32(s, DUMP_LEVEL);
Q
qiaonuohan 已提交
880 881

    offset_note = DISKDUMP_HEADER_BLOCKS * block_size + size;
882 883
    kh->offset_note = cpu_to_dump64(s, offset_note);
    kh->note_size = cpu_to_dump64(s, s->note_size);
Q
qiaonuohan 已提交
884 885 886

    if (write_buffer(s->fd, DISKDUMP_HEADER_BLOCKS *
                     block_size, kh, size) < 0) {
887
        dump_error(s, "dump: failed to write kdump sub header", errp);
Q
qiaonuohan 已提交
888 889 890 891 892 893 894 895
        goto out;
    }

    /* write note */
    s->note_buf = g_malloc0(s->note_size);
    s->note_buf_offset = 0;

    /* use s->note_buf to store notes temporarily */
896 897 898
    write_elf64_notes(buf_write_note, s, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
Q
qiaonuohan 已提交
899 900 901 902 903
        goto out;
    }

    if (write_buffer(s->fd, offset_note, s->note_buf,
                     s->note_size) < 0) {
904
        dump_error(s, "dump: failed to write notes", errp);
Q
qiaonuohan 已提交
905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921
        goto out;
    }

    /* get offset of dump_bitmap */
    s->offset_dump_bitmap = (DISKDUMP_HEADER_BLOCKS + sub_hdr_size) *
                             block_size;

    /* get offset of page */
    s->offset_page = (DISKDUMP_HEADER_BLOCKS + sub_hdr_size + bitmap_blocks) *
                     block_size;

out:
    g_free(dh);
    g_free(kh);
    g_free(s->note_buf);
}

922
static void write_dump_header(DumpState *s, Error **errp)
Q
qiaonuohan 已提交
923
{
924 925
     Error *local_err = NULL;

926
    if (s->dump_info.d_class == ELFCLASS32) {
927
        create_header32(s, &local_err);
Q
qiaonuohan 已提交
928
    } else {
929 930 931 932
        create_header64(s, &local_err);
    }
    if (local_err) {
        error_propagate(errp, local_err);
Q
qiaonuohan 已提交
933 934 935
    }
}

936 937 938 939 940
static size_t dump_bitmap_get_bufsize(DumpState *s)
{
    return s->dump_info.page_size;
}

Q
qiaonuohan 已提交
941 942 943 944 945 946 947 948 949 950 951 952 953
/*
 * set dump_bitmap sequencely. the bit before last_pfn is not allowed to be
 * rewritten, so if need to set the first bit, set last_pfn and pfn to 0.
 * set_dump_bitmap will always leave the recently set bit un-sync. And setting
 * (last bit + sizeof(buf) * 8) to 0 will do flushing the content in buf into
 * vmcore, ie. synchronizing un-sync bit into vmcore.
 */
static int set_dump_bitmap(uint64_t last_pfn, uint64_t pfn, bool value,
                           uint8_t *buf, DumpState *s)
{
    off_t old_offset, new_offset;
    off_t offset_bitmap1, offset_bitmap2;
    uint32_t byte, bit;
954 955
    size_t bitmap_bufsize = dump_bitmap_get_bufsize(s);
    size_t bits_per_buf = bitmap_bufsize * CHAR_BIT;
Q
qiaonuohan 已提交
956 957 958 959 960 961 962 963 964 965

    /* should not set the previous place */
    assert(last_pfn <= pfn);

    /*
     * if the bit needed to be set is not cached in buf, flush the data in buf
     * to vmcore firstly.
     * making new_offset be bigger than old_offset can also sync remained data
     * into vmcore.
     */
966 967
    old_offset = bitmap_bufsize * (last_pfn / bits_per_buf);
    new_offset = bitmap_bufsize * (pfn / bits_per_buf);
Q
qiaonuohan 已提交
968 969 970 971 972

    while (old_offset < new_offset) {
        /* calculate the offset and write dump_bitmap */
        offset_bitmap1 = s->offset_dump_bitmap + old_offset;
        if (write_buffer(s->fd, offset_bitmap1, buf,
973
                         bitmap_bufsize) < 0) {
Q
qiaonuohan 已提交
974 975 976 977 978 979 980
            return -1;
        }

        /* dump level 1 is chosen, so 1st and 2nd bitmap are same */
        offset_bitmap2 = s->offset_dump_bitmap + s->len_dump_bitmap +
                         old_offset;
        if (write_buffer(s->fd, offset_bitmap2, buf,
981
                         bitmap_bufsize) < 0) {
Q
qiaonuohan 已提交
982 983 984
            return -1;
        }

985 986
        memset(buf, 0, bitmap_bufsize);
        old_offset += bitmap_bufsize;
Q
qiaonuohan 已提交
987 988 989
    }

    /* get the exact place of the bit in the buf, and set it */
990 991
    byte = (pfn % bits_per_buf) / CHAR_BIT;
    bit = (pfn % bits_per_buf) % CHAR_BIT;
Q
qiaonuohan 已提交
992 993 994 995 996 997 998 999 1000
    if (value) {
        buf[byte] |= 1u << bit;
    } else {
        buf[byte] &= ~(1u << bit);
    }

    return 0;
}

1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014
static uint64_t dump_paddr_to_pfn(DumpState *s, uint64_t addr)
{
    int target_page_shift = ctz32(s->dump_info.page_size);

    return (addr >> target_page_shift) - ARCH_PFN_OFFSET;
}

static uint64_t dump_pfn_to_paddr(DumpState *s, uint64_t pfn)
{
    int target_page_shift = ctz32(s->dump_info.page_size);

    return (pfn + ARCH_PFN_OFFSET) << target_page_shift;
}

Q
qiaonuohan 已提交
1015 1016 1017 1018 1019 1020 1021 1022 1023 1024
/*
 * exam every page and return the page frame number and the address of the page.
 * bufptr can be NULL. note: the blocks here is supposed to reflect guest-phys
 * blocks, so block->target_start and block->target_end should be interal
 * multiples of the target page size.
 */
static bool get_next_page(GuestPhysBlock **blockptr, uint64_t *pfnptr,
                          uint8_t **bufptr, DumpState *s)
{
    GuestPhysBlock *block = *blockptr;
1025
    hwaddr addr, target_page_mask = ~((hwaddr)s->dump_info.page_size - 1);
Q
qiaonuohan 已提交
1026 1027 1028 1029 1030 1031
    uint8_t *buf;

    /* block == NULL means the start of the iteration */
    if (!block) {
        block = QTAILQ_FIRST(&s->guest_phys_blocks.head);
        *blockptr = block;
1032 1033 1034
        assert((block->target_start & ~target_page_mask) == 0);
        assert((block->target_end & ~target_page_mask) == 0);
        *pfnptr = dump_paddr_to_pfn(s, block->target_start);
Q
qiaonuohan 已提交
1035 1036 1037 1038 1039 1040 1041
        if (bufptr) {
            *bufptr = block->host_addr;
        }
        return true;
    }

    *pfnptr = *pfnptr + 1;
1042
    addr = dump_pfn_to_paddr(s, *pfnptr);
Q
qiaonuohan 已提交
1043 1044

    if ((addr >= block->target_start) &&
1045
        (addr + s->dump_info.page_size <= block->target_end)) {
Q
qiaonuohan 已提交
1046 1047 1048 1049 1050 1051 1052 1053
        buf = block->host_addr + (addr - block->target_start);
    } else {
        /* the next page is in the next block */
        block = QTAILQ_NEXT(block, next);
        *blockptr = block;
        if (!block) {
            return false;
        }
1054 1055 1056
        assert((block->target_start & ~target_page_mask) == 0);
        assert((block->target_end & ~target_page_mask) == 0);
        *pfnptr = dump_paddr_to_pfn(s, block->target_start);
Q
qiaonuohan 已提交
1057 1058 1059 1060 1061 1062 1063 1064 1065 1066
        buf = block->host_addr;
    }

    if (bufptr) {
        *bufptr = buf;
    }

    return true;
}

1067
static void write_dump_bitmap(DumpState *s, Error **errp)
Q
qiaonuohan 已提交
1068 1069 1070 1071 1072 1073
{
    int ret = 0;
    uint64_t last_pfn, pfn;
    void *dump_bitmap_buf;
    size_t num_dumpable;
    GuestPhysBlock *block_iter = NULL;
1074 1075
    size_t bitmap_bufsize = dump_bitmap_get_bufsize(s);
    size_t bits_per_buf = bitmap_bufsize * CHAR_BIT;
Q
qiaonuohan 已提交
1076 1077

    /* dump_bitmap_buf is used to store dump_bitmap temporarily */
1078
    dump_bitmap_buf = g_malloc0(bitmap_bufsize);
Q
qiaonuohan 已提交
1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089

    num_dumpable = 0;
    last_pfn = 0;

    /*
     * exam memory page by page, and set the bit in dump_bitmap corresponded
     * to the existing page.
     */
    while (get_next_page(&block_iter, &pfn, NULL, s)) {
        ret = set_dump_bitmap(last_pfn, pfn, true, dump_bitmap_buf, s);
        if (ret < 0) {
1090
            dump_error(s, "dump: failed to set dump_bitmap", errp);
Q
qiaonuohan 已提交
1091 1092 1093 1094 1095 1096 1097 1098 1099
            goto out;
        }

        last_pfn = pfn;
        num_dumpable++;
    }

    /*
     * set_dump_bitmap will always leave the recently set bit un-sync. Here we
1100 1101
     * set the remaining bits from last_pfn to the end of the bitmap buffer to
     * 0. With those set, the un-sync bit will be synchronized into the vmcore.
Q
qiaonuohan 已提交
1102 1103
     */
    if (num_dumpable > 0) {
1104
        ret = set_dump_bitmap(last_pfn, last_pfn + bits_per_buf, false,
Q
qiaonuohan 已提交
1105 1106
                              dump_bitmap_buf, s);
        if (ret < 0) {
1107
            dump_error(s, "dump: failed to sync dump_bitmap", errp);
Q
qiaonuohan 已提交
1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118
            goto out;
        }
    }

    /* number of dumpable pages that will be dumped later */
    s->num_dumpable = num_dumpable;

out:
    g_free(dump_bitmap_buf);
}

Q
qiaonuohan 已提交
1119 1120 1121 1122 1123
static void prepare_data_cache(DataCache *data_cache, DumpState *s,
                               off_t offset)
{
    data_cache->fd = s->fd;
    data_cache->data_size = 0;
1124 1125
    data_cache->buf_size = 4 * dump_bitmap_get_bufsize(s);
    data_cache->buf = g_malloc0(data_cache->buf_size);
Q
qiaonuohan 已提交
1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165
    data_cache->offset = offset;
}

static int write_cache(DataCache *dc, const void *buf, size_t size,
                       bool flag_sync)
{
    /*
     * dc->buf_size should not be less than size, otherwise dc will never be
     * enough
     */
    assert(size <= dc->buf_size);

    /*
     * if flag_sync is set, synchronize data in dc->buf into vmcore.
     * otherwise check if the space is enough for caching data in buf, if not,
     * write the data in dc->buf to dc->fd and reset dc->buf
     */
    if ((!flag_sync && dc->data_size + size > dc->buf_size) ||
        (flag_sync && dc->data_size > 0)) {
        if (write_buffer(dc->fd, dc->offset, dc->buf, dc->data_size) < 0) {
            return -1;
        }

        dc->offset += dc->data_size;
        dc->data_size = 0;
    }

    if (!flag_sync) {
        memcpy(dc->buf + dc->data_size, buf, size);
        dc->data_size += size;
    }

    return 0;
}

static void free_data_cache(DataCache *data_cache)
{
    g_free(data_cache->buf);
}

Q
qiaonuohan 已提交
1166 1167
static size_t get_len_buf_out(size_t page_size, uint32_t flag_compress)
{
L
Laszlo Ersek 已提交
1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178
    switch (flag_compress) {
    case DUMP_DH_COMPRESSED_ZLIB:
        return compressBound(page_size);

    case DUMP_DH_COMPRESSED_LZO:
        /*
         * LZO will expand incompressible data by a little amount. Please check
         * the following URL to see the expansion calculation:
         * http://www.oberhumer.com/opensource/lzo/lzofaq.php
         */
        return page_size + page_size / 16 + 64 + 3;
Q
qiaonuohan 已提交
1179 1180

#ifdef CONFIG_SNAPPY
L
Laszlo Ersek 已提交
1181 1182
    case DUMP_DH_COMPRESSED_SNAPPY:
        return snappy_max_compressed_length(page_size);
Q
qiaonuohan 已提交
1183
#endif
L
Laszlo Ersek 已提交
1184 1185
    }
    return 0;
Q
qiaonuohan 已提交
1186 1187 1188 1189 1190 1191 1192 1193 1194 1195
}

/*
 * check if the page is all 0
 */
static inline bool is_zero_page(const uint8_t *buf, size_t page_size)
{
    return buffer_is_zero(buf, page_size);
}

1196
static void write_dump_pages(DumpState *s, Error **errp)
Q
qiaonuohan 已提交
1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218
{
    int ret = 0;
    DataCache page_desc, page_data;
    size_t len_buf_out, size_out;
#ifdef CONFIG_LZO
    lzo_bytep wrkmem = NULL;
#endif
    uint8_t *buf_out = NULL;
    off_t offset_desc, offset_data;
    PageDescriptor pd, pd_zero;
    uint8_t *buf;
    GuestPhysBlock *block_iter = NULL;
    uint64_t pfn_iter;

    /* get offset of page_desc and page_data in dump file */
    offset_desc = s->offset_page;
    offset_data = offset_desc + sizeof(PageDescriptor) * s->num_dumpable;

    prepare_data_cache(&page_desc, s, offset_desc);
    prepare_data_cache(&page_data, s, offset_data);

    /* prepare buffer to store compressed data */
1219
    len_buf_out = get_len_buf_out(s->dump_info.page_size, s->flag_compress);
L
Laszlo Ersek 已提交
1220
    assert(len_buf_out != 0);
Q
qiaonuohan 已提交
1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231

#ifdef CONFIG_LZO
    wrkmem = g_malloc(LZO1X_1_MEM_COMPRESS);
#endif

    buf_out = g_malloc(len_buf_out);

    /*
     * init zero page's page_desc and page_data, because every zero page
     * uses the same page_data
     */
1232
    pd_zero.size = cpu_to_dump32(s, s->dump_info.page_size);
1233 1234 1235
    pd_zero.flags = cpu_to_dump32(s, 0);
    pd_zero.offset = cpu_to_dump64(s, offset_data);
    pd_zero.page_flags = cpu_to_dump64(s, 0);
1236 1237
    buf = g_malloc0(s->dump_info.page_size);
    ret = write_cache(&page_data, buf, s->dump_info.page_size, false);
Q
qiaonuohan 已提交
1238 1239
    g_free(buf);
    if (ret < 0) {
1240
        dump_error(s, "dump: failed to write page data (zero page)", errp);
Q
qiaonuohan 已提交
1241 1242 1243
        goto out;
    }

1244
    offset_data += s->dump_info.page_size;
Q
qiaonuohan 已提交
1245 1246 1247 1248 1249 1250 1251

    /*
     * dump memory to vmcore page by page. zero page will all be resided in the
     * first page of page section
     */
    while (get_next_page(&block_iter, &pfn_iter, &buf, s)) {
        /* check zero page */
1252
        if (is_zero_page(buf, s->dump_info.page_size)) {
Q
qiaonuohan 已提交
1253 1254 1255
            ret = write_cache(&page_desc, &pd_zero, sizeof(PageDescriptor),
                              false);
            if (ret < 0) {
1256
                dump_error(s, "dump: failed to write page desc", errp);
Q
qiaonuohan 已提交
1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272
                goto out;
            }
        } else {
            /*
             * not zero page, then:
             * 1. compress the page
             * 2. write the compressed page into the cache of page_data
             * 3. get page desc of the compressed page and write it into the
             *    cache of page_desc
             *
             * only one compression format will be used here, for
             * s->flag_compress is set. But when compression fails to work,
             * we fall back to save in plaintext.
             */
             size_out = len_buf_out;
             if ((s->flag_compress & DUMP_DH_COMPRESSED_ZLIB) &&
1273
                    (compress2(buf_out, (uLongf *)&size_out, buf,
1274 1275
                               s->dump_info.page_size, Z_BEST_SPEED) == Z_OK) &&
                    (size_out < s->dump_info.page_size)) {
1276 1277
                pd.flags = cpu_to_dump32(s, DUMP_DH_COMPRESSED_ZLIB);
                pd.size  = cpu_to_dump32(s, size_out);
Q
qiaonuohan 已提交
1278 1279 1280

                ret = write_cache(&page_data, buf_out, size_out, false);
                if (ret < 0) {
1281
                    dump_error(s, "dump: failed to write page data", errp);
Q
qiaonuohan 已提交
1282 1283 1284 1285
                    goto out;
                }
#ifdef CONFIG_LZO
            } else if ((s->flag_compress & DUMP_DH_COMPRESSED_LZO) &&
1286
                    (lzo1x_1_compress(buf, s->dump_info.page_size, buf_out,
Q
qiaonuohan 已提交
1287
                    (lzo_uint *)&size_out, wrkmem) == LZO_E_OK) &&
1288
                    (size_out < s->dump_info.page_size)) {
1289 1290
                pd.flags = cpu_to_dump32(s, DUMP_DH_COMPRESSED_LZO);
                pd.size  = cpu_to_dump32(s, size_out);
Q
qiaonuohan 已提交
1291 1292 1293

                ret = write_cache(&page_data, buf_out, size_out, false);
                if (ret < 0) {
1294
                    dump_error(s, "dump: failed to write page data", errp);
Q
qiaonuohan 已提交
1295 1296 1297 1298 1299
                    goto out;
                }
#endif
#ifdef CONFIG_SNAPPY
            } else if ((s->flag_compress & DUMP_DH_COMPRESSED_SNAPPY) &&
1300
                    (snappy_compress((char *)buf, s->dump_info.page_size,
Q
qiaonuohan 已提交
1301
                    (char *)buf_out, &size_out) == SNAPPY_OK) &&
1302
                    (size_out < s->dump_info.page_size)) {
1303 1304
                pd.flags = cpu_to_dump32(s, DUMP_DH_COMPRESSED_SNAPPY);
                pd.size  = cpu_to_dump32(s, size_out);
Q
qiaonuohan 已提交
1305 1306 1307

                ret = write_cache(&page_data, buf_out, size_out, false);
                if (ret < 0) {
1308
                    dump_error(s, "dump: failed to write page data", errp);
Q
qiaonuohan 已提交
1309 1310 1311 1312 1313 1314
                    goto out;
                }
#endif
            } else {
                /*
                 * fall back to save in plaintext, size_out should be
1315
                 * assigned the target's page size
Q
qiaonuohan 已提交
1316
                 */
1317
                pd.flags = cpu_to_dump32(s, 0);
1318
                size_out = s->dump_info.page_size;
1319
                pd.size = cpu_to_dump32(s, size_out);
Q
qiaonuohan 已提交
1320

1321 1322
                ret = write_cache(&page_data, buf,
                                  s->dump_info.page_size, false);
Q
qiaonuohan 已提交
1323
                if (ret < 0) {
1324
                    dump_error(s, "dump: failed to write page data", errp);
Q
qiaonuohan 已提交
1325 1326 1327 1328 1329
                    goto out;
                }
            }

            /* get and write page desc here */
1330 1331
            pd.page_flags = cpu_to_dump64(s, 0);
            pd.offset = cpu_to_dump64(s, offset_data);
Q
qiaonuohan 已提交
1332 1333 1334 1335
            offset_data += size_out;

            ret = write_cache(&page_desc, &pd, sizeof(PageDescriptor), false);
            if (ret < 0) {
1336
                dump_error(s, "dump: failed to write page desc", errp);
Q
qiaonuohan 已提交
1337 1338 1339 1340 1341 1342 1343
                goto out;
            }
        }
    }

    ret = write_cache(&page_desc, NULL, 0, true);
    if (ret < 0) {
1344
        dump_error(s, "dump: failed to sync cache for page_desc", errp);
Q
qiaonuohan 已提交
1345 1346 1347 1348
        goto out;
    }
    ret = write_cache(&page_data, NULL, 0, true);
    if (ret < 0) {
1349
        dump_error(s, "dump: failed to sync cache for page_data", errp);
Q
qiaonuohan 已提交
1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363
        goto out;
    }

out:
    free_data_cache(&page_desc);
    free_data_cache(&page_data);

#ifdef CONFIG_LZO
    g_free(wrkmem);
#endif

    g_free(buf_out);
}

1364
static void create_kdump_vmcore(DumpState *s, Error **errp)
1365 1366
{
    int ret;
1367
    Error *local_err = NULL;
1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392

    /*
     * the kdump-compressed format is:
     *                                               File offset
     *  +------------------------------------------+ 0x0
     *  |    main header (struct disk_dump_header) |
     *  |------------------------------------------+ block 1
     *  |    sub header (struct kdump_sub_header)  |
     *  |------------------------------------------+ block 2
     *  |            1st-dump_bitmap               |
     *  |------------------------------------------+ block 2 + X blocks
     *  |            2nd-dump_bitmap               | (aligned by block)
     *  |------------------------------------------+ block 2 + 2 * X blocks
     *  |  page desc for pfn 0 (struct page_desc)  | (aligned by block)
     *  |  page desc for pfn 1 (struct page_desc)  |
     *  |                    :                     |
     *  |------------------------------------------| (not aligned by block)
     *  |         page data (pfn 0)                |
     *  |         page data (pfn 1)                |
     *  |                    :                     |
     *  +------------------------------------------+
     */

    ret = write_start_flat_header(s->fd);
    if (ret < 0) {
1393
        dump_error(s, "dump: failed to write start flat header", errp);
1394
        return;
1395 1396
    }

1397 1398 1399 1400
    write_dump_header(s, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        return;
1401 1402
    }

1403 1404 1405 1406
    write_dump_bitmap(s, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        return;
1407 1408
    }

1409 1410 1411 1412
    write_dump_pages(s, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        return;
1413 1414 1415 1416
    }

    ret = write_end_flat_header(s->fd);
    if (ret < 0) {
1417
        dump_error(s, "dump: failed to write end flat header", errp);
1418
        return;
1419 1420 1421 1422 1423
    }

    dump_completed(s);
}

1424 1425
static ram_addr_t get_start_block(DumpState *s)
{
1426
    GuestPhysBlock *block;
1427 1428

    if (!s->has_filter) {
1429
        s->next_block = QTAILQ_FIRST(&s->guest_phys_blocks.head);
1430 1431 1432
        return 0;
    }

1433 1434 1435
    QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
        if (block->target_start >= s->begin + s->length ||
            block->target_end <= s->begin) {
1436 1437 1438 1439
            /* This block is out of the range */
            continue;
        }

1440 1441 1442
        s->next_block = block;
        if (s->begin > block->target_start) {
            s->start = s->begin - block->target_start;
1443 1444 1445 1446 1447 1448 1449 1450 1451
        } else {
            s->start = 0;
        }
        return s->start;
    }

    return -1;
}

1452 1453 1454 1455 1456
static void get_max_mapnr(DumpState *s)
{
    GuestPhysBlock *last_block;

    last_block = QTAILQ_LAST(&s->guest_phys_blocks.head, GuestPhysBlockHead);
1457
    s->max_mapnr = dump_paddr_to_pfn(s, last_block->target_end);
1458 1459
}

1460 1461 1462
static void dump_init(DumpState *s, int fd, bool has_format,
                      DumpGuestMemoryFormat format, bool paging, bool has_filter,
                      int64_t begin, int64_t length, Error **errp)
1463
{
1464
    CPUState *cpu;
1465
    int nr_cpus;
1466
    Error *err = NULL;
1467 1468
    int ret;

1469 1470 1471 1472 1473
    /* kdump-compressed is conflict with paging and filter */
    if (has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) {
        assert(!paging && !has_filter);
    }

1474 1475 1476 1477 1478 1479 1480
    if (runstate_is_running()) {
        vm_stop(RUN_STATE_SAVE_VM);
        s->resume = true;
    } else {
        s->resume = false;
    }

1481 1482 1483 1484 1485
    /* If we use KVM, we should synchronize the registers before we get dump
     * info or physmap info.
     */
    cpu_synchronize_all_states();
    nr_cpus = 0;
A
Andreas Färber 已提交
1486
    CPU_FOREACH(cpu) {
1487 1488 1489
        nr_cpus++;
    }

1490 1491 1492 1493
    s->fd = fd;
    s->has_filter = has_filter;
    s->begin = begin;
    s->length = length;
1494

1495 1496
    memory_mapping_list_init(&s->list);

1497
    guest_phys_blocks_init(&s->guest_phys_blocks);
L
Laszlo Ersek 已提交
1498
    guest_phys_blocks_append(&s->guest_phys_blocks);
1499

1500 1501
    s->start = get_start_block(s);
    if (s->start == -1) {
1502
        error_setg(errp, QERR_INVALID_PARAMETER, "begin");
1503 1504 1505
        goto cleanup;
    }

1506
    /* get dump info: endian, class and architecture.
1507 1508 1509
     * If the target architecture is not supported, cpu_get_dump_info() will
     * return -1.
     */
1510
    ret = cpu_get_dump_info(&s->dump_info, &s->guest_phys_blocks);
1511
    if (ret < 0) {
1512
        error_setg(errp, QERR_UNSUPPORTED);
1513 1514 1515
        goto cleanup;
    }

1516 1517 1518 1519
    if (!s->dump_info.page_size) {
        s->dump_info.page_size = TARGET_PAGE_SIZE;
    }

1520 1521
    s->note_size = cpu_get_note_size(s->dump_info.d_class,
                                     s->dump_info.d_machine, nr_cpus);
1522
    if (s->note_size < 0) {
1523
        error_setg(errp, QERR_UNSUPPORTED);
1524 1525 1526
        goto cleanup;
    }

1527 1528
    /* get memory mapping */
    if (paging) {
1529
        qemu_get_guest_memory_mapping(&s->list, &s->guest_phys_blocks, &err);
1530 1531 1532 1533
        if (err != NULL) {
            error_propagate(errp, err);
            goto cleanup;
        }
1534
    } else {
1535
        qemu_get_guest_simple_memory_mapping(&s->list, &s->guest_phys_blocks);
1536 1537
    }

1538 1539 1540 1541 1542
    s->nr_cpus = nr_cpus;

    get_max_mapnr(s);

    uint64_t tmp;
1543 1544 1545
    tmp = DIV_ROUND_UP(DIV_ROUND_UP(s->max_mapnr, CHAR_BIT),
                       s->dump_info.page_size);
    s->len_dump_bitmap = tmp * s->dump_info.page_size;
1546

1547 1548 1549 1550 1551 1552 1553 1554
    /* init for kdump-compressed format */
    if (has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) {
        switch (format) {
        case DUMP_GUEST_MEMORY_FORMAT_KDUMP_ZLIB:
            s->flag_compress = DUMP_DH_COMPRESSED_ZLIB;
            break;

        case DUMP_GUEST_MEMORY_FORMAT_KDUMP_LZO:
1555 1556 1557 1558 1559 1560
#ifdef CONFIG_LZO
            if (lzo_init() != LZO_E_OK) {
                error_setg(errp, "failed to initialize the LZO library");
                goto cleanup;
            }
#endif
1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571
            s->flag_compress = DUMP_DH_COMPRESSED_LZO;
            break;

        case DUMP_GUEST_MEMORY_FORMAT_KDUMP_SNAPPY:
            s->flag_compress = DUMP_DH_COMPRESSED_SNAPPY;
            break;

        default:
            s->flag_compress = 0;
        }

1572
        return;
1573 1574
    }

1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620
    if (s->has_filter) {
        memory_mapping_filter(&s->list, s->begin, s->length);
    }

    /*
     * calculate phdr_num
     *
     * the type of ehdr->e_phnum is uint16_t, so we should avoid overflow
     */
    s->phdr_num = 1; /* PT_NOTE */
    if (s->list.num < UINT16_MAX - 2) {
        s->phdr_num += s->list.num;
        s->have_section = false;
    } else {
        s->have_section = true;
        s->phdr_num = PN_XNUM;
        s->sh_info = 1; /* PT_NOTE */

        /* the type of shdr->sh_info is uint32_t, so we should avoid overflow */
        if (s->list.num <= UINT32_MAX - 1) {
            s->sh_info += s->list.num;
        } else {
            s->sh_info = UINT32_MAX;
        }
    }

    if (s->dump_info.d_class == ELFCLASS64) {
        if (s->have_section) {
            s->memory_offset = sizeof(Elf64_Ehdr) +
                               sizeof(Elf64_Phdr) * s->sh_info +
                               sizeof(Elf64_Shdr) + s->note_size;
        } else {
            s->memory_offset = sizeof(Elf64_Ehdr) +
                               sizeof(Elf64_Phdr) * s->phdr_num + s->note_size;
        }
    } else {
        if (s->have_section) {
            s->memory_offset = sizeof(Elf32_Ehdr) +
                               sizeof(Elf32_Phdr) * s->sh_info +
                               sizeof(Elf32_Shdr) + s->note_size;
        } else {
            s->memory_offset = sizeof(Elf32_Ehdr) +
                               sizeof(Elf32_Phdr) * s->phdr_num + s->note_size;
        }
    }

1621
    return;
1622 1623

cleanup:
1624
    dump_cleanup(s);
1625 1626 1627
}

void qmp_dump_guest_memory(bool paging, const char *file, bool has_begin,
1628 1629 1630
                           int64_t begin, bool has_length,
                           int64_t length, bool has_format,
                           DumpGuestMemoryFormat format, Error **errp)
1631 1632 1633 1634
{
    const char *p;
    int fd = -1;
    DumpState *s;
1635
    Error *local_err = NULL;
1636

1637 1638 1639 1640 1641 1642 1643 1644 1645 1646
    /*
     * kdump-compressed format need the whole memory dumped, so paging or
     * filter is not supported here.
     */
    if ((has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) &&
        (paging || has_begin || has_length)) {
        error_setg(errp, "kdump-compressed format doesn't support paging or "
                         "filter");
        return;
    }
1647
    if (has_begin && !has_length) {
1648
        error_setg(errp, QERR_MISSING_PARAMETER, "length");
1649 1650 1651
        return;
    }
    if (!has_begin && has_length) {
1652
        error_setg(errp, QERR_MISSING_PARAMETER, "begin");
1653 1654 1655
        return;
    }

1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670
    /* check whether lzo/snappy is supported */
#ifndef CONFIG_LZO
    if (has_format && format == DUMP_GUEST_MEMORY_FORMAT_KDUMP_LZO) {
        error_setg(errp, "kdump-lzo is not available now");
        return;
    }
#endif

#ifndef CONFIG_SNAPPY
    if (has_format && format == DUMP_GUEST_MEMORY_FORMAT_KDUMP_SNAPPY) {
        error_setg(errp, "kdump-snappy is not available now");
        return;
    }
#endif

1671 1672
#if !defined(WIN32)
    if (strstart(file, "fd:", &p)) {
1673
        fd = monitor_get_fd(cur_mon, p, errp);
1674 1675 1676 1677 1678 1679 1680 1681 1682
        if (fd == -1) {
            return;
        }
    }
#endif

    if  (strstart(file, "file:", &p)) {
        fd = qemu_open(p, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR);
        if (fd < 0) {
1683
            error_setg_file_open(errp, errno, p);
1684 1685 1686 1687 1688
            return;
        }
    }

    if (fd == -1) {
1689
        error_setg(errp, QERR_INVALID_PARAMETER, "protocol");
1690 1691 1692
        return;
    }

1693
    s = g_malloc0(sizeof(DumpState));
1694

1695 1696 1697
    dump_init(s, fd, has_format, format, paging, has_begin,
              begin, length, &local_err);
    if (local_err) {
1698
        g_free(s);
1699
        error_propagate(errp, local_err);
1700 1701 1702
        return;
    }

1703
    if (has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) {
1704
        create_kdump_vmcore(s, errp);
1705
    } else {
1706
        create_vmcore(s, errp);
1707 1708 1709 1710
    }

    g_free(s);
}
1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743

DumpGuestMemoryCapability *qmp_query_dump_guest_memory_capability(Error **errp)
{
    DumpGuestMemoryFormatList *item;
    DumpGuestMemoryCapability *cap =
                                  g_malloc0(sizeof(DumpGuestMemoryCapability));

    /* elf is always available */
    item = g_malloc0(sizeof(DumpGuestMemoryFormatList));
    cap->formats = item;
    item->value = DUMP_GUEST_MEMORY_FORMAT_ELF;

    /* kdump-zlib is always available */
    item->next = g_malloc0(sizeof(DumpGuestMemoryFormatList));
    item = item->next;
    item->value = DUMP_GUEST_MEMORY_FORMAT_KDUMP_ZLIB;

    /* add new item if kdump-lzo is available */
#ifdef CONFIG_LZO
    item->next = g_malloc0(sizeof(DumpGuestMemoryFormatList));
    item = item->next;
    item->value = DUMP_GUEST_MEMORY_FORMAT_KDUMP_LZO;
#endif

    /* add new item if kdump-snappy is available */
#ifdef CONFIG_SNAPPY
    item->next = g_malloc0(sizeof(DumpGuestMemoryFormatList));
    item = item->next;
    item->value = DUMP_GUEST_MEMORY_FORMAT_KDUMP_SNAPPY;
#endif

    return cap;
}