pc.c 54.6 KB
Newer Older
B
bellard 已提交
1 2
/*
 * QEMU PC System Emulator
3
 *
B
bellard 已提交
4
 * Copyright (c) 2003-2004 Fabrice Bellard
5
 *
B
bellard 已提交
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
24
#include "hw/hw.h"
P
Paolo Bonzini 已提交
25 26 27 28
#include "hw/i386/pc.h"
#include "hw/char/serial.h"
#include "hw/i386/apic.h"
#include "hw/block/fdc.h"
29 30
#include "hw/ide.h"
#include "hw/pci/pci.h"
31
#include "monitor/monitor.h"
P
Paolo Bonzini 已提交
32 33 34
#include "hw/nvram/fw_cfg.h"
#include "hw/timer/hpet.h"
#include "hw/i386/smbios.h"
35
#include "hw/loader.h"
B
Blue Swirl 已提交
36
#include "elf.h"
37
#include "multiboot.h"
P
Paolo Bonzini 已提交
38 39 40
#include "hw/timer/mc146818rtc.h"
#include "hw/timer/i8254.h"
#include "hw/audio/pcspk.h"
41 42
#include "hw/pci/msi.h"
#include "hw/sysbus.h"
43 44
#include "sysemu/sysemu.h"
#include "sysemu/kvm.h"
45
#include "kvm_i386.h"
P
Paolo Bonzini 已提交
46
#include "hw/xen/xen.h"
47
#include "sysemu/block-backend.h"
P
Paolo Bonzini 已提交
48
#include "hw/block/block.h"
G
Gerd Hoffmann 已提交
49
#include "ui/qemu-spice.h"
50 51
#include "exec/memory.h"
#include "exec/address-spaces.h"
52
#include "sysemu/arch_init.h"
53
#include "qemu/bitmap.h"
54
#include "qemu/config-file.h"
55
#include "hw/acpi/acpi.h"
56
#include "hw/acpi/cpu_hotplug.h"
57
#include "hw/cpu/icc_bus.h"
58
#include "hw/boards.h"
59
#include "hw/pci/pci_host.h"
60
#include "acpi-build.h"
61
#include "hw/mem/pc-dimm.h"
62
#include "trace.h"
63
#include "qapi/visitor.h"
B
bellard 已提交
64

B
Blue Swirl 已提交
65 66 67 68 69 70 71 72 73 74
/* debug PC/ISA interrupts */
//#define DEBUG_IRQ

#ifdef DEBUG_IRQ
#define DPRINTF(fmt, ...)                                       \
    do { printf("CPUIRQ: " fmt , ## __VA_ARGS__); } while (0)
#else
#define DPRINTF(fmt, ...)
#endif

75 76 77
/* Leave a chunk of memory at the top of RAM for the BIOS ACPI tables
 * (128K) and other BIOS datastructures (less than 4K reported to be used at
 * the moment, 32K should be enough for a while).  */
S
Stefan Weil 已提交
78
static unsigned acpi_data_size = 0x20000 + 0x8000;
79 80 81 82 83
void pc_set_legacy_acpi_data_size(void)
{
    acpi_data_size = 0x10000;
}

84
#define BIOS_CFG_IOPORT 0x510
85
#define FW_CFG_ACPI_TABLES (FW_CFG_ARCH_LOCAL + 0)
86
#define FW_CFG_SMBIOS_ENTRIES (FW_CFG_ARCH_LOCAL + 1)
J
Jes Sorensen 已提交
87
#define FW_CFG_IRQ0_OVERRIDE (FW_CFG_ARCH_LOCAL + 2)
J
Jes Sorensen 已提交
88
#define FW_CFG_E820_TABLE (FW_CFG_ARCH_LOCAL + 3)
89
#define FW_CFG_HPET (FW_CFG_ARCH_LOCAL + 4)
B
bellard 已提交
90

J
Jes Sorensen 已提交
91 92 93 94 95 96
#define E820_NR_ENTRIES		16

struct e820_entry {
    uint64_t address;
    uint64_t length;
    uint32_t type;
97
} QEMU_PACKED __attribute((__aligned__(4)));
J
Jes Sorensen 已提交
98 99 100 101

struct e820_table {
    uint32_t count;
    struct e820_entry entry[E820_NR_ENTRIES];
102
} QEMU_PACKED __attribute((__aligned__(4)));
J
Jes Sorensen 已提交
103

G
Gerd Hoffmann 已提交
104 105 106
static struct e820_table e820_reserve;
static struct e820_entry *e820_table;
static unsigned e820_entries;
B
Blue Swirl 已提交
107
struct hpet_fw_config hpet_cfg = {.count = UINT8_MAX};
J
Jes Sorensen 已提交
108

J
Jan Kiszka 已提交
109
void gsi_handler(void *opaque, int n, int level)
110
{
J
Jan Kiszka 已提交
111
    GSIState *s = opaque;
112

J
Jan Kiszka 已提交
113 114 115
    DPRINTF("pc: %s GSI %d\n", level ? "raising" : "lowering", n);
    if (n < ISA_NUM_IRQS) {
        qemu_set_irq(s->i8259_irq[n], level);
A
Avi Kivity 已提交
116
    }
J
Jan Kiszka 已提交
117
    qemu_set_irq(s->ioapic_irq[n], level);
118
}
119

J
Julien Grall 已提交
120 121
static void ioport80_write(void *opaque, hwaddr addr, uint64_t data,
                           unsigned size)
B
bellard 已提交
122 123 124
{
}

125 126
static uint64_t ioport80_read(void *opaque, hwaddr addr, unsigned size)
{
127
    return 0xffffffffffffffffULL;
128 129
}

130
/* MSDOS compatibility mode FPU exception support */
P
pbrook 已提交
131
static qemu_irq ferr_irq;
132 133 134 135 136 137

void pc_register_ferr_irq(qemu_irq irq)
{
    ferr_irq = irq;
}

138 139 140
/* XXX: add IGNNE support */
void cpu_set_ferr(CPUX86State *s)
{
P
pbrook 已提交
141
    qemu_irq_raise(ferr_irq);
142 143
}

J
Julien Grall 已提交
144 145
static void ioportF0_write(void *opaque, hwaddr addr, uint64_t data,
                           unsigned size)
146
{
P
pbrook 已提交
147
    qemu_irq_lower(ferr_irq);
148 149
}

150 151
static uint64_t ioportF0_read(void *opaque, hwaddr addr, unsigned size)
{
152
    return 0xffffffffffffffffULL;
153 154
}

B
bellard 已提交
155 156 157
/* TSC handling */
uint64_t cpu_get_tsc(CPUX86State *env)
{
158
    return cpu_get_ticks();
B
bellard 已提交
159 160
}

B
bellard 已提交
161
/* SMM support */
162 163 164 165 166 167 168 169 170 171 172 173

static cpu_set_smm_t smm_set;
static void *smm_arg;

void cpu_smm_register(cpu_set_smm_t callback, void *arg)
{
    assert(smm_set == NULL);
    assert(smm_arg == NULL);
    smm_set = callback;
    smm_arg = arg;
}

A
Andreas Färber 已提交
174
void cpu_smm_update(CPUX86State *env)
B
bellard 已提交
175
{
176
    if (smm_set && smm_arg && CPU(x86_env_get_cpu(env)) == first_cpu) {
177
        smm_set(!!(env->hflags & HF_SMM_MASK), smm_arg);
178
    }
B
bellard 已提交
179 180 181
}


B
bellard 已提交
182
/* IRQ handling */
A
Andreas Färber 已提交
183
int cpu_get_pic_interrupt(CPUX86State *env)
B
bellard 已提交
184
{
185
    X86CPU *cpu = x86_env_get_cpu(env);
B
bellard 已提交
186 187
    int intno;

188
    intno = apic_get_interrupt(cpu->apic_state);
B
bellard 已提交
189 190 191 192
    if (intno >= 0) {
        return intno;
    }
    /* read the irq from the PIC */
193
    if (!apic_accept_pic_intr(cpu->apic_state)) {
194
        return -1;
195
    }
196

B
bellard 已提交
197 198 199 200
    intno = pic_read_irq(isa_pic);
    return intno;
}

P
pbrook 已提交
201
static void pic_irq_request(void *opaque, int irq, int level)
B
bellard 已提交
202
{
203 204
    CPUState *cs = first_cpu;
    X86CPU *cpu = X86_CPU(cs);
205

B
Blue Swirl 已提交
206
    DPRINTF("pic_irqs: %s irq %d\n", level? "raise" : "lower", irq);
207
    if (cpu->apic_state) {
A
Andreas Färber 已提交
208
        CPU_FOREACH(cs) {
209
            cpu = X86_CPU(cs);
210 211
            if (apic_accept_pic_intr(cpu->apic_state)) {
                apic_deliver_pic_intr(cpu->apic_state, level);
212
            }
A
aurel32 已提交
213 214
        }
    } else {
215
        if (level) {
216
            cpu_interrupt(cs, CPU_INTERRUPT_HARD);
217 218 219
        } else {
            cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD);
        }
220
    }
B
bellard 已提交
221 222
}

B
bellard 已提交
223 224
/* PC cmos mappings */

B
bellard 已提交
225 226
#define REG_EQUIPMENT_BYTE          0x14

227
static int cmos_get_fd_drive_type(FDriveType fd0)
228 229 230 231
{
    int val;

    switch (fd0) {
232
    case FDRIVE_DRV_144:
233 234 235
        /* 1.44 Mb 3"5 drive */
        val = 4;
        break;
236
    case FDRIVE_DRV_288:
237 238 239
        /* 2.88 Mb 3"5 drive */
        val = 5;
        break;
240
    case FDRIVE_DRV_120:
241 242 243
        /* 1.2 Mb 5"5 drive */
        val = 2;
        break;
244
    case FDRIVE_DRV_NONE:
245 246 247 248 249 250 251
    default:
        val = 0;
        break;
    }
    return val;
}

252 253
static void cmos_init_hd(ISADevice *s, int type_ofs, int info_ofs,
                         int16_t cylinders, int8_t heads, int8_t sectors)
B
bellard 已提交
254 255 256 257 258 259 260 261 262 263 264 265 266
{
    rtc_set_memory(s, type_ofs, 47);
    rtc_set_memory(s, info_ofs, cylinders);
    rtc_set_memory(s, info_ofs + 1, cylinders >> 8);
    rtc_set_memory(s, info_ofs + 2, heads);
    rtc_set_memory(s, info_ofs + 3, 0xff);
    rtc_set_memory(s, info_ofs + 4, 0xff);
    rtc_set_memory(s, info_ofs + 5, 0xc0 | ((heads > 8) << 3));
    rtc_set_memory(s, info_ofs + 6, cylinders);
    rtc_set_memory(s, info_ofs + 7, cylinders >> 8);
    rtc_set_memory(s, info_ofs + 8, sectors);
}

267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283
/* convert boot_device letter to something recognizable by the bios */
static int boot_device2nibble(char boot_device)
{
    switch(boot_device) {
    case 'a':
    case 'b':
        return 0x01; /* floppy boot */
    case 'c':
        return 0x02; /* hard drive boot */
    case 'd':
        return 0x03; /* CD-ROM boot */
    case 'n':
        return 0x04; /* Network boot */
    }
    return 0;
}

284
static int set_boot_dev(ISADevice *s, const char *boot_device)
285 286 287 288 289 290 291
{
#define PC_MAX_BOOT_DEVICES 3
    int nbds, bds[3] = { 0, };
    int i;

    nbds = strlen(boot_device);
    if (nbds > PC_MAX_BOOT_DEVICES) {
292
        error_report("Too many boot devices for PC");
293 294 295 296 297
        return(1);
    }
    for (i = 0; i < nbds; i++) {
        bds[i] = boot_device2nibble(boot_device[i]);
        if (bds[i] == 0) {
298 299
            error_report("Invalid boot device for PC: '%c'",
                         boot_device[i]);
300 301 302 303
            return(1);
        }
    }
    rtc_set_memory(s, 0x3d, (bds[1] << 4) | bds[0]);
304
    rtc_set_memory(s, 0x38, (bds[2] << 4) | (fd_bootchk ? 0x0 : 0x1));
305 306 307
    return(0);
}

308 309
static int pc_boot_set(void *opaque, const char *boot_device)
{
310
    return set_boot_dev(opaque, boot_device);
311 312
}

313 314
typedef struct pc_cmos_init_late_arg {
    ISADevice *rtc_state;
315
    BusState *idebus[2];
316 317 318 319 320 321
} pc_cmos_init_late_arg;

static void pc_cmos_init_late(void *opaque)
{
    pc_cmos_init_late_arg *arg = opaque;
    ISADevice *s = arg->rtc_state;
322 323
    int16_t cylinders;
    int8_t heads, sectors;
324
    int val;
325
    int i, trans;
326

327 328 329 330 331 332 333 334 335 336 337 338
    val = 0;
    if (ide_get_geometry(arg->idebus[0], 0,
                         &cylinders, &heads, &sectors) >= 0) {
        cmos_init_hd(s, 0x19, 0x1b, cylinders, heads, sectors);
        val |= 0xf0;
    }
    if (ide_get_geometry(arg->idebus[0], 1,
                         &cylinders, &heads, &sectors) >= 0) {
        cmos_init_hd(s, 0x1a, 0x24, cylinders, heads, sectors);
        val |= 0x0f;
    }
    rtc_set_memory(s, 0x12, val);
339 340 341

    val = 0;
    for (i = 0; i < 4; i++) {
342 343 344 345 346 347
        /* NOTE: ide_get_geometry() returns the physical
           geometry.  It is always such that: 1 <= sects <= 63, 1
           <= heads <= 16, 1 <= cylinders <= 16383. The BIOS
           geometry can be different if a translation is done. */
        if (ide_get_geometry(arg->idebus[i / 2], i % 2,
                             &cylinders, &heads, &sectors) >= 0) {
348 349 350
            trans = ide_get_bios_chs_trans(arg->idebus[i / 2], i % 2) - 1;
            assert((trans & ~3) == 0);
            val |= trans << (i * 2);
351 352 353 354 355 356 357
        }
    }
    rtc_set_memory(s, 0x39, val);

    qemu_unregister_reset(pc_cmos_init_late, opaque);
}

358
void pc_cmos_init(ram_addr_t ram_size, ram_addr_t above_4g_mem_size,
G
Gu Zheng 已提交
359
                  const char *boot_device, MachineState *machine,
K
Kevin Wolf 已提交
360
                  ISADevice *floppy, BusState *idebus0, BusState *idebus1,
B
Blue Swirl 已提交
361
                  ISADevice *s)
B
bellard 已提交
362
{
363
    int val, nb, i;
364
    FDriveType fd_type[2] = { FDRIVE_DRV_NONE, FDRIVE_DRV_NONE };
365
    static pc_cmos_init_late_arg arg;
G
Gu Zheng 已提交
366
    PCMachineState *pc_machine = PC_MACHINE(machine);
B
bellard 已提交
367 368

    /* various important CMOS locations needed by PC/Bochs bios */
B
bellard 已提交
369 370

    /* memory size */
371 372
    /* base memory (first MiB) */
    val = MIN(ram_size / 1024, 640);
B
bellard 已提交
373 374
    rtc_set_memory(s, 0x15, val);
    rtc_set_memory(s, 0x16, val >> 8);
375 376 377 378 379 380
    /* extended memory (next 64MiB) */
    if (ram_size > 1024 * 1024) {
        val = (ram_size - 1024 * 1024) / 1024;
    } else {
        val = 0;
    }
B
bellard 已提交
381 382
    if (val > 65535)
        val = 65535;
B
bellard 已提交
383 384 385 386
    rtc_set_memory(s, 0x17, val);
    rtc_set_memory(s, 0x18, val >> 8);
    rtc_set_memory(s, 0x30, val);
    rtc_set_memory(s, 0x31, val >> 8);
387 388 389 390
    /* memory between 16MiB and 4GiB */
    if (ram_size > 16 * 1024 * 1024) {
        val = (ram_size - 16 * 1024 * 1024) / 65536;
    } else {
B
bellard 已提交
391
        val = 0;
392
    }
B
bellard 已提交
393 394
    if (val > 65535)
        val = 65535;
B
bellard 已提交
395 396
    rtc_set_memory(s, 0x34, val);
    rtc_set_memory(s, 0x35, val >> 8);
397 398 399 400 401
    /* memory above 4GiB */
    val = above_4g_mem_size / 65536;
    rtc_set_memory(s, 0x5b, val);
    rtc_set_memory(s, 0x5c, val >> 8);
    rtc_set_memory(s, 0x5d, val >> 16);
402

A
aurel32 已提交
403 404
    /* set the number of CPU */
    rtc_set_memory(s, 0x5f, smp_cpus - 1);
G
Gu Zheng 已提交
405 406 407 408 409 410 411 412

    object_property_add_link(OBJECT(machine), "rtc_state",
                             TYPE_ISA_DEVICE,
                             (Object **)&pc_machine->rtc,
                             object_property_allow_set_link,
                             OBJ_PROP_LINK_UNREF_ON_RELEASE, &error_abort);
    object_property_set_link(OBJECT(machine), OBJECT(s),
                             "rtc_state", &error_abort);
A
aurel32 已提交
413

414
    if (set_boot_dev(s, boot_device)) {
415 416
        exit(1);
    }
B
bellard 已提交
417

B
bellard 已提交
418
    /* floppy type */
K
Kevin Wolf 已提交
419 420
    if (floppy) {
        for (i = 0; i < 2; i++) {
421
            fd_type[i] = isa_fdc_get_drive_type(floppy, i);
B
Blue Swirl 已提交
422 423 424 425
        }
    }
    val = (cmos_get_fd_drive_type(fd_type[0]) << 4) |
        cmos_get_fd_drive_type(fd_type[1]);
B
bellard 已提交
426
    rtc_set_memory(s, 0x10, val);
427

B
bellard 已提交
428
    val = 0;
B
bellard 已提交
429
    nb = 0;
B
Blue Swirl 已提交
430
    if (fd_type[0] < FDRIVE_DRV_NONE) {
B
bellard 已提交
431
        nb++;
432
    }
B
Blue Swirl 已提交
433
    if (fd_type[1] < FDRIVE_DRV_NONE) {
B
bellard 已提交
434
        nb++;
435
    }
B
bellard 已提交
436 437 438 439
    switch (nb) {
    case 0:
        break;
    case 1:
B
bellard 已提交
440
        val |= 0x01; /* 1 drive, ready for boot */
B
bellard 已提交
441 442
        break;
    case 2:
B
bellard 已提交
443
        val |= 0x41; /* 2 drives, ready for boot */
B
bellard 已提交
444 445
        break;
    }
B
bellard 已提交
446 447 448 449
    val |= 0x02; /* FPU is there */
    val |= 0x04; /* PS/2 mouse installed */
    rtc_set_memory(s, REG_EQUIPMENT_BYTE, val);

B
bellard 已提交
450
    /* hard drives */
451
    arg.rtc_state = s;
452 453
    arg.idebus[0] = idebus0;
    arg.idebus[1] = idebus1;
454
    qemu_register_reset(pc_cmos_init_late, &arg);
B
bellard 已提交
455 456
}

A
Andreas Färber 已提交
457 458 459
#define TYPE_PORT92 "port92"
#define PORT92(obj) OBJECT_CHECK(Port92State, (obj), TYPE_PORT92)

460 461
/* port 92 stuff: could be split off */
typedef struct Port92State {
A
Andreas Färber 已提交
462 463
    ISADevice parent_obj;

464
    MemoryRegion io;
465 466 467 468
    uint8_t outport;
    qemu_irq *a20_out;
} Port92State;

469 470
static void port92_write(void *opaque, hwaddr addr, uint64_t val,
                         unsigned size)
471 472
{
    Port92State *s = opaque;
473
    int oldval = s->outport;
474

G
Gonglei 已提交
475
    DPRINTF("port92: write 0x%02" PRIx64 "\n", val);
476 477
    s->outport = val;
    qemu_set_irq(*s->a20_out, (val >> 1) & 1);
478
    if ((val & 1) && !(oldval & 1)) {
479 480 481 482
        qemu_system_reset_request();
    }
}

483 484
static uint64_t port92_read(void *opaque, hwaddr addr,
                            unsigned size)
485 486 487 488 489 490 491 492 493 494 495
{
    Port92State *s = opaque;
    uint32_t ret;

    ret = s->outport;
    DPRINTF("port92: read 0x%02x\n", ret);
    return ret;
}

static void port92_init(ISADevice *dev, qemu_irq *a20_out)
{
A
Andreas Färber 已提交
496
    Port92State *s = PORT92(dev);
497 498 499 500 501 502 503 504

    s->a20_out = a20_out;
}

static const VMStateDescription vmstate_port92_isa = {
    .name = "port92",
    .version_id = 1,
    .minimum_version_id = 1,
505
    .fields = (VMStateField[]) {
506 507 508 509 510 511 512
        VMSTATE_UINT8(outport, Port92State),
        VMSTATE_END_OF_LIST()
    }
};

static void port92_reset(DeviceState *d)
{
A
Andreas Färber 已提交
513
    Port92State *s = PORT92(d);
514 515 516 517

    s->outport &= ~1;
}

518
static const MemoryRegionOps port92_ops = {
519 520 521 522 523 524 525
    .read = port92_read,
    .write = port92_write,
    .impl = {
        .min_access_size = 1,
        .max_access_size = 1,
    },
    .endianness = DEVICE_LITTLE_ENDIAN,
526 527
};

528
static void port92_initfn(Object *obj)
529
{
530
    Port92State *s = PORT92(obj);
531

532
    memory_region_init_io(&s->io, OBJECT(s), &port92_ops, s, "port92", 1);
533

534
    s->outport = 0;
535 536 537 538 539 540 541 542
}

static void port92_realizefn(DeviceState *dev, Error **errp)
{
    ISADevice *isadev = ISA_DEVICE(dev);
    Port92State *s = PORT92(dev);

    isa_register_ioport(isadev, &s->io, 0x92);
543 544
}

545 546
static void port92_class_initfn(ObjectClass *klass, void *data)
{
547
    DeviceClass *dc = DEVICE_CLASS(klass);
548 549

    dc->realize = port92_realizefn;
550 551
    dc->reset = port92_reset;
    dc->vmsd = &vmstate_port92_isa;
552 553 554 555 556 557
    /*
     * Reason: unlike ordinary ISA devices, this one needs additional
     * wiring: its A20 output line needs to be wired up by
     * port92_init().
     */
    dc->cannot_instantiate_with_device_add_yet = true;
558 559
}

560
static const TypeInfo port92_info = {
A
Andreas Färber 已提交
561
    .name          = TYPE_PORT92,
562 563
    .parent        = TYPE_ISA_DEVICE,
    .instance_size = sizeof(Port92State),
564
    .instance_init = port92_initfn,
565
    .class_init    = port92_class_initfn,
566 567
};

A
Andreas Färber 已提交
568
static void port92_register_types(void)
569
{
570
    type_register_static(&port92_info);
571
}
A
Andreas Färber 已提交
572 573

type_init(port92_register_types)
574

B
Blue Swirl 已提交
575
static void handle_a20_line_change(void *opaque, int irq, int level)
576
{
577
    X86CPU *cpu = opaque;
B
bellard 已提交
578

B
Blue Swirl 已提交
579
    /* XXX: send to all CPUs ? */
580
    /* XXX: add logic to handle multiple A20 line sources */
581
    x86_cpu_set_a20(cpu, level);
B
bellard 已提交
582 583
}

J
Jes Sorensen 已提交
584 585
int e820_add_entry(uint64_t address, uint64_t length, uint32_t type)
{
G
Gerd Hoffmann 已提交
586
    int index = le32_to_cpu(e820_reserve.count);
J
Jes Sorensen 已提交
587 588
    struct e820_entry *entry;

G
Gerd Hoffmann 已提交
589 590 591 592 593 594 595 596 597 598 599 600 601
    if (type != E820_RAM) {
        /* old FW_CFG_E820_TABLE entry -- reservations only */
        if (index >= E820_NR_ENTRIES) {
            return -EBUSY;
        }
        entry = &e820_reserve.entry[index++];

        entry->address = cpu_to_le64(address);
        entry->length = cpu_to_le64(length);
        entry->type = cpu_to_le32(type);

        e820_reserve.count = cpu_to_le32(index);
    }
J
Jes Sorensen 已提交
602

G
Gerd Hoffmann 已提交
603 604 605 606 607 608 609
    /* new "etc/e820" file -- include ram too */
    e820_table = g_realloc(e820_table,
                           sizeof(struct e820_entry) * (e820_entries+1));
    e820_table[e820_entries].address = cpu_to_le64(address);
    e820_table[e820_entries].length = cpu_to_le64(length);
    e820_table[e820_entries].type = cpu_to_le32(type);
    e820_entries++;
J
Jes Sorensen 已提交
610

G
Gerd Hoffmann 已提交
611
    return e820_entries;
J
Jes Sorensen 已提交
612 613
}

614 615 616 617 618 619 620 621 622 623 624 625 626 627 628
int e820_get_num_entries(void)
{
    return e820_entries;
}

bool e820_get_entry(int idx, uint32_t type, uint64_t *address, uint64_t *length)
{
    if (idx < e820_entries && e820_table[idx].type == cpu_to_le32(type)) {
        *address = le64_to_cpu(e820_table[idx].address);
        *length = le64_to_cpu(e820_table[idx].length);
        return true;
    }
    return false;
}

629 630 631 632 633 634 635 636 637 638 639 640
/* Calculates the limit to CPU APIC ID values
 *
 * This function returns the limit for the APIC ID value, so that all
 * CPU APIC IDs are < pc_apic_id_limit().
 *
 * This is used for FW_CFG_MAX_CPUS. See comments on bochs_bios_init().
 */
static unsigned int pc_apic_id_limit(unsigned int max_cpus)
{
    return x86_cpu_apic_id_from_index(max_cpus - 1) + 1;
}

L
Laszlo Ersek 已提交
641
static FWCfgState *bochs_bios_init(void)
B
bellard 已提交
642
{
L
Laszlo Ersek 已提交
643
    FWCfgState *fw_cfg;
644 645
    uint8_t *smbios_tables, *smbios_anchor;
    size_t smbios_tables_len, smbios_anchor_len;
646 647
    uint64_t *numa_fw_cfg;
    int i, j;
648
    unsigned int apic_id_limit = pc_apic_id_limit(max_cpus);
649 650

    fw_cfg = fw_cfg_init(BIOS_CFG_IOPORT, BIOS_CFG_IOPORT + 1, 0, 0);
651 652 653 654 655 656 657 658 659 660 661 662 663 664 665
    /* FW_CFG_MAX_CPUS is a bit confusing/problematic on x86:
     *
     * SeaBIOS needs FW_CFG_MAX_CPUS for CPU hotplug, but the CPU hotplug
     * QEMU<->SeaBIOS interface is not based on the "CPU index", but on the APIC
     * ID of hotplugged CPUs[1]. This means that FW_CFG_MAX_CPUS is not the
     * "maximum number of CPUs", but the "limit to the APIC ID values SeaBIOS
     * may see".
     *
     * So, this means we must not use max_cpus, here, but the maximum possible
     * APIC ID value, plus one.
     *
     * [1] The only kind of "CPU identifier" used between SeaBIOS and QEMU is
     *     the APIC ID, not the "CPU index"
     */
    fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)apic_id_limit);
666
    fw_cfg_add_i32(fw_cfg, FW_CFG_ID, 1);
667
    fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size);
668 669
    fw_cfg_add_bytes(fw_cfg, FW_CFG_ACPI_TABLES,
                     acpi_tables, acpi_tables_len);
670
    fw_cfg_add_i32(fw_cfg, FW_CFG_IRQ0_OVERRIDE, kvm_allows_irq0_override());
671

672 673
    smbios_tables = smbios_get_table_legacy(&smbios_tables_len);
    if (smbios_tables) {
674
        fw_cfg_add_bytes(fw_cfg, FW_CFG_SMBIOS_ENTRIES,
675 676 677 678 679 680 681 682 683 684 685 686
                         smbios_tables, smbios_tables_len);
    }

    smbios_get_tables(&smbios_tables, &smbios_tables_len,
                      &smbios_anchor, &smbios_anchor_len);
    if (smbios_anchor) {
        fw_cfg_add_file(fw_cfg, "etc/smbios/smbios-tables",
                        smbios_tables, smbios_tables_len);
        fw_cfg_add_file(fw_cfg, "etc/smbios/smbios-anchor",
                        smbios_anchor, smbios_anchor_len);
    }

687
    fw_cfg_add_bytes(fw_cfg, FW_CFG_E820_TABLE,
G
Gerd Hoffmann 已提交
688 689 690
                     &e820_reserve, sizeof(e820_reserve));
    fw_cfg_add_file(fw_cfg, "etc/e820", e820_table,
                    sizeof(struct e820_entry) * e820_entries);
691

692
    fw_cfg_add_bytes(fw_cfg, FW_CFG_HPET, &hpet_cfg, sizeof(hpet_cfg));
693 694 695 696
    /* allocate memory for the NUMA channel: one (64bit) word for the number
     * of nodes, one word for each VCPU->node and one word for each node to
     * hold the amount of memory.
     */
697
    numa_fw_cfg = g_new0(uint64_t, 1 + apic_id_limit + nb_numa_nodes);
698
    numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes);
699
    for (i = 0; i < max_cpus; i++) {
700 701
        unsigned int apic_id = x86_cpu_apic_id_from_index(i);
        assert(apic_id < apic_id_limit);
702
        for (j = 0; j < nb_numa_nodes; j++) {
703
            if (test_bit(i, numa_info[j].node_cpu)) {
704
                numa_fw_cfg[apic_id + 1] = cpu_to_le64(j);
705 706 707 708 709
                break;
            }
        }
    }
    for (i = 0; i < nb_numa_nodes; i++) {
710
        numa_fw_cfg[apic_id_limit + 1 + i] = cpu_to_le64(numa_info[i].node_mem);
711
    }
712
    fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, numa_fw_cfg,
713 714
                     (1 + apic_id_limit + nb_numa_nodes) *
                     sizeof(*numa_fw_cfg));
A
Alexander Graf 已提交
715 716

    return fw_cfg;
B
bellard 已提交
717 718
}

T
ths 已提交
719 720 721 722 723 724 725 726 727 728 729 730 731 732
static long get_file_size(FILE *f)
{
    long where, size;

    /* XXX: on Unix systems, using fstat() probably makes more sense */

    where = ftell(f);
    fseek(f, 0, SEEK_END);
    size = ftell(f);
    fseek(f, where, SEEK_SET);

    return size;
}

L
Laszlo Ersek 已提交
733
static void load_linux(FWCfgState *fw_cfg,
734
                       const char *kernel_filename,
L
liguang 已提交
735 736
                       const char *initrd_filename,
                       const char *kernel_cmdline,
A
Avi Kivity 已提交
737
                       hwaddr max_ram_size)
T
ths 已提交
738 739
{
    uint16_t protocol;
P
Paul Brook 已提交
740
    int setup_size, kernel_size, initrd_size = 0, cmdline_size;
T
ths 已提交
741
    uint32_t initrd_max;
742
    uint8_t header[8192], *setup, *kernel, *initrd_data;
A
Avi Kivity 已提交
743
    hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0;
744
    FILE *f;
P
Pascal Terjan 已提交
745
    char *vmode;
T
ths 已提交
746 747 748 749 750 751 752

    /* Align to 16 bytes as a paranoia measure */
    cmdline_size = (strlen(kernel_cmdline)+16) & ~15;

    /* load the kernel header */
    f = fopen(kernel_filename, "rb");
    if (!f || !(kernel_size = get_file_size(f)) ||
L
liguang 已提交
753 754 755 756 757
        fread(header, 1, MIN(ARRAY_SIZE(header), kernel_size), f) !=
        MIN(ARRAY_SIZE(header), kernel_size)) {
        fprintf(stderr, "qemu: could not load kernel '%s': %s\n",
                kernel_filename, strerror(errno));
        exit(1);
T
ths 已提交
758 759 760
    }

    /* kernel protocol version */
B
bellard 已提交
761
#if 0
T
ths 已提交
762
    fprintf(stderr, "header magic: %#x\n", ldl_p(header+0x202));
B
bellard 已提交
763
#endif
L
liguang 已提交
764 765 766 767 768
    if (ldl_p(header+0x202) == 0x53726448) {
        protocol = lduw_p(header+0x206);
    } else {
        /* This looks like a multiboot kernel. If it is, let's stop
           treating it like a Linux kernel. */
769
        if (load_multiboot(fw_cfg, f, kernel_filename, initrd_filename,
L
liguang 已提交
770
                           kernel_cmdline, kernel_size, header)) {
B
Blue Swirl 已提交
771
            return;
L
liguang 已提交
772 773
        }
        protocol = 0;
A
Alexander Graf 已提交
774
    }
T
ths 已提交
775 776

    if (protocol < 0x200 || !(header[0x211] & 0x01)) {
L
liguang 已提交
777 778 779 780
        /* Low kernel */
        real_addr    = 0x90000;
        cmdline_addr = 0x9a000 - cmdline_size;
        prot_addr    = 0x10000;
T
ths 已提交
781
    } else if (protocol < 0x202) {
L
liguang 已提交
782 783 784 785
        /* High but ancient kernel */
        real_addr    = 0x90000;
        cmdline_addr = 0x9a000 - cmdline_size;
        prot_addr    = 0x100000;
T
ths 已提交
786
    } else {
L
liguang 已提交
787 788 789 790
        /* High and recent kernel */
        real_addr    = 0x10000;
        cmdline_addr = 0x20000;
        prot_addr    = 0x100000;
T
ths 已提交
791 792
    }

B
bellard 已提交
793
#if 0
T
ths 已提交
794
    fprintf(stderr,
L
liguang 已提交
795 796 797 798 799 800
            "qemu: real_addr     = 0x" TARGET_FMT_plx "\n"
            "qemu: cmdline_addr  = 0x" TARGET_FMT_plx "\n"
            "qemu: prot_addr     = 0x" TARGET_FMT_plx "\n",
            real_addr,
            cmdline_addr,
            prot_addr);
B
bellard 已提交
801
#endif
T
ths 已提交
802 803

    /* highest address for loading the initrd */
L
liguang 已提交
804 805 806 807 808
    if (protocol >= 0x203) {
        initrd_max = ldl_p(header+0x22c);
    } else {
        initrd_max = 0x37ffffff;
    }
T
ths 已提交
809

810 811 812
    if (initrd_max >= max_ram_size - acpi_data_size) {
        initrd_max = max_ram_size - acpi_data_size - 1;
    }
T
ths 已提交
813

814 815
    fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr);
    fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(kernel_cmdline)+1);
816
    fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline);
T
ths 已提交
817 818

    if (protocol >= 0x202) {
L
liguang 已提交
819
        stl_p(header+0x228, cmdline_addr);
T
ths 已提交
820
    } else {
L
liguang 已提交
821 822
        stw_p(header+0x20, 0xA33F);
        stw_p(header+0x22, cmdline_addr-real_addr);
T
ths 已提交
823 824
    }

P
Pascal Terjan 已提交
825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842
    /* handle vga= parameter */
    vmode = strstr(kernel_cmdline, "vga=");
    if (vmode) {
        unsigned int video_mode;
        /* skip "vga=" */
        vmode += 4;
        if (!strncmp(vmode, "normal", 6)) {
            video_mode = 0xffff;
        } else if (!strncmp(vmode, "ext", 3)) {
            video_mode = 0xfffe;
        } else if (!strncmp(vmode, "ask", 3)) {
            video_mode = 0xfffd;
        } else {
            video_mode = strtol(vmode, NULL, 0);
        }
        stw_p(header+0x1fa, video_mode);
    }

T
ths 已提交
843
    /* loader type */
S
Stefan Weil 已提交
844
    /* High nybble = B reserved for QEMU; low nybble is revision number.
T
ths 已提交
845 846
       If this code is substantially changed, you may want to consider
       incrementing the revision. */
L
liguang 已提交
847 848 849
    if (protocol >= 0x200) {
        header[0x210] = 0xB0;
    }
T
ths 已提交
850 851
    /* heap */
    if (protocol >= 0x201) {
L
liguang 已提交
852 853
        header[0x211] |= 0x80;	/* CAN_USE_HEAP */
        stw_p(header+0x224, cmdline_addr-real_addr-0x200);
T
ths 已提交
854 855 856 857
    }

    /* load initrd */
    if (initrd_filename) {
L
liguang 已提交
858 859 860 861
        if (protocol < 0x200) {
            fprintf(stderr, "qemu: linux kernel too old to load a ram disk\n");
            exit(1);
        }
T
ths 已提交
862

L
liguang 已提交
863
        initrd_size = get_image_size(initrd_filename);
M
M. Mohan Kumar 已提交
864
        if (initrd_size < 0) {
865 866
            fprintf(stderr, "qemu: error reading initrd %s: %s\n",
                    initrd_filename, strerror(errno));
M
M. Mohan Kumar 已提交
867 868 869
            exit(1);
        }

870
        initrd_addr = (initrd_max-initrd_size) & ~4095;
871

872
        initrd_data = g_malloc(initrd_size);
873 874 875 876 877
        load_image(initrd_filename, initrd_data);

        fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr);
        fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size);
        fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, initrd_size);
T
ths 已提交
878

L
liguang 已提交
879 880
        stl_p(header+0x218, initrd_addr);
        stl_p(header+0x21c, initrd_size);
T
ths 已提交
881 882
    }

883
    /* load kernel and setup */
T
ths 已提交
884
    setup_size = header[0x1f1];
L
liguang 已提交
885 886 887
    if (setup_size == 0) {
        setup_size = 4;
    }
T
ths 已提交
888
    setup_size = (setup_size+1)*512;
889
    kernel_size -= setup_size;
T
ths 已提交
890

891 892
    setup  = g_malloc(setup_size);
    kernel = g_malloc(kernel_size);
893
    fseek(f, 0, SEEK_SET);
894 895 896 897 898 899 900 901
    if (fread(setup, 1, setup_size, f) != setup_size) {
        fprintf(stderr, "fread() failed\n");
        exit(1);
    }
    if (fread(kernel, 1, kernel_size, f) != kernel_size) {
        fprintf(stderr, "fread() failed\n");
        exit(1);
    }
T
ths 已提交
902
    fclose(f);
903
    memcpy(setup, header, MIN(sizeof(header), setup_size));
904 905 906 907 908 909 910 911 912

    fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr);
    fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size);
    fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size);

    fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr);
    fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size);
    fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size);

G
Gleb Natapov 已提交
913 914
    option_rom[nb_option_roms].name = "linuxboot.bin";
    option_rom[nb_option_roms].bootindex = 0;
915
    nb_option_roms++;
T
ths 已提交
916 917
}

B
bellard 已提交
918 919
#define NE2000_NB_MAX 6

B
Blue Swirl 已提交
920 921 922
static const int ne2000_io[NE2000_NB_MAX] = { 0x300, 0x320, 0x340, 0x360,
                                              0x280, 0x380 };
static const int ne2000_irq[NE2000_NB_MAX] = { 9, 10, 11, 3, 4, 5 };
B
bellard 已提交
923

924
void pc_init_ne2k_isa(ISABus *bus, NICInfo *nd)
925 926 927 928 929
{
    static int nb_ne2k = 0;

    if (nb_ne2k == NE2000_NB_MAX)
        return;
930
    isa_ne2000_init(bus, ne2000_io[nb_ne2k],
G
Gerd Hoffmann 已提交
931
                    ne2000_irq[nb_ne2k], nd);
932 933 934
    nb_ne2k++;
}

B
Blue Swirl 已提交
935
DeviceState *cpu_get_current_apic(void)
936
{
937 938
    if (current_cpu) {
        X86CPU *cpu = X86_CPU(current_cpu);
939
        return cpu->apic_state;
940 941 942 943 944
    } else {
        return NULL;
    }
}

945
void pc_acpi_smi_interrupt(void *opaque, int irq, int level)
B
Blue Swirl 已提交
946
{
947
    X86CPU *cpu = opaque;
B
Blue Swirl 已提交
948 949

    if (level) {
950
        cpu_interrupt(CPU(cpu), CPU_INTERRUPT_SMI);
B
Blue Swirl 已提交
951 952 953
    }
}

954 955
static X86CPU *pc_new_cpu(const char *cpu_model, int64_t apic_id,
                          DeviceState *icc_bridge, Error **errp)
956 957 958 959
{
    X86CPU *cpu;
    Error *local_err = NULL;

960 961 962 963
    cpu = cpu_x86_create(cpu_model, icc_bridge, &local_err);
    if (local_err != NULL) {
        error_propagate(errp, local_err);
        return NULL;
964 965 966 967 968 969 970
    }

    object_property_set_int(OBJECT(cpu), apic_id, "apic-id", &local_err);
    object_property_set_bool(OBJECT(cpu), true, "realized", &local_err);

    if (local_err) {
        error_propagate(errp, local_err);
971 972
        object_unref(OBJECT(cpu));
        cpu = NULL;
973 974 975 976
    }
    return cpu;
}

977 978 979 980 981 982 983
static const char *current_cpu_model;

void pc_hot_add_cpu(const int64_t id, Error **errp)
{
    DeviceState *icc_bridge;
    int64_t apic_id = x86_cpu_apic_id_from_index(id);

984 985 986 987 988
    if (id < 0) {
        error_setg(errp, "Invalid CPU id: %" PRIi64, id);
        return;
    }

989 990 991 992 993 994 995 996 997 998 999 1000
    if (cpu_exists(apic_id)) {
        error_setg(errp, "Unable to add CPU: %" PRIi64
                   ", it already exists", id);
        return;
    }

    if (id >= max_cpus) {
        error_setg(errp, "Unable to add CPU: %" PRIi64
                   ", max allowed: %d", id, max_cpus - 1);
        return;
    }

1001 1002 1003 1004 1005 1006 1007
    if (apic_id >= ACPI_CPU_HOTPLUG_ID_LIMIT) {
        error_setg(errp, "Unable to add CPU: %" PRIi64
                   ", resulting APIC ID (%" PRIi64 ") is too large",
                   id, apic_id);
        return;
    }

1008 1009 1010 1011 1012
    icc_bridge = DEVICE(object_resolve_path_type("icc-bridge",
                                                 TYPE_ICC_BRIDGE, NULL));
    pc_new_cpu(current_cpu_model, apic_id, icc_bridge, errp);
}

1013
void pc_cpus_init(const char *cpu_model, DeviceState *icc_bridge)
1014 1015
{
    int i;
1016
    X86CPU *cpu = NULL;
1017
    Error *error = NULL;
1018
    unsigned long apic_id_limit;
1019 1020 1021 1022 1023 1024 1025 1026 1027

    /* init CPUs */
    if (cpu_model == NULL) {
#ifdef TARGET_X86_64
        cpu_model = "qemu64";
#else
        cpu_model = "qemu32";
#endif
    }
1028
    current_cpu_model = cpu_model;
1029

1030 1031 1032 1033 1034 1035 1036
    apic_id_limit = pc_apic_id_limit(max_cpus);
    if (apic_id_limit > ACPI_CPU_HOTPLUG_ID_LIMIT) {
        error_report("max_cpus is too large. APIC ID of last CPU is %lu",
                     apic_id_limit - 1);
        exit(1);
    }

1037
    for (i = 0; i < smp_cpus; i++) {
1038 1039
        cpu = pc_new_cpu(cpu_model, x86_cpu_apic_id_from_index(i),
                         icc_bridge, &error);
1040
        if (error) {
1041
            error_report("%s", error_get_pretty(error));
1042
            error_free(error);
1043 1044
            exit(1);
        }
1045
    }
1046 1047

    /* map APIC MMIO area if CPU has APIC */
1048
    if (cpu && cpu->apic_state) {
1049 1050 1051 1052
        /* XXX: what if the base changes? */
        sysbus_mmio_map_overlap(SYS_BUS_DEVICE(icc_bridge), 0,
                                APIC_DEFAULT_ADDRESS, 0x1000);
    }
1053 1054 1055

    /* tell smbios about cpuid version and features */
    smbios_set_cpuid(cpu->env.cpuid_version, cpu->env.features[FEAT_1_EDX]);
1056 1057
}

1058 1059 1060 1061 1062 1063 1064 1065
/* pci-info ROM file. Little endian format */
typedef struct PcRomPciInfo {
    uint64_t w32_min;
    uint64_t w32_max;
    uint64_t w64_min;
    uint64_t w64_max;
} PcRomPciInfo;

1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076
typedef struct PcGuestInfoState {
    PcGuestInfo info;
    Notifier machine_done;
} PcGuestInfoState;

static
void pc_guest_info_machine_done(Notifier *notifier, void *data)
{
    PcGuestInfoState *guest_info_state = container_of(notifier,
                                                      PcGuestInfoState,
                                                      machine_done);
1077
    acpi_setup(&guest_info_state->info);
1078 1079 1080 1081 1082 1083 1084
}

PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size,
                                ram_addr_t above_4g_mem_size)
{
    PcGuestInfoState *guest_info_state = g_malloc0(sizeof *guest_info_state);
    PcGuestInfo *guest_info = &guest_info_state->info;
M
Michael S. Tsirkin 已提交
1085 1086
    int i, j;

E
Eduardo Habkost 已提交
1087
    guest_info->ram_size_below_4g = below_4g_mem_size;
M
Michael S. Tsirkin 已提交
1088 1089 1090 1091
    guest_info->ram_size = below_4g_mem_size + above_4g_mem_size;
    guest_info->apic_id_limit = pc_apic_id_limit(max_cpus);
    guest_info->apic_xrupt_override = kvm_allows_irq0_override();
    guest_info->numa_nodes = nb_numa_nodes;
1092
    guest_info->node_mem = g_malloc0(guest_info->numa_nodes *
M
Michael S. Tsirkin 已提交
1093
                                    sizeof *guest_info->node_mem);
1094 1095 1096 1097
    for (i = 0; i < nb_numa_nodes; i++) {
        guest_info->node_mem[i] = numa_info[i].node_mem;
    }

M
Michael S. Tsirkin 已提交
1098 1099 1100 1101 1102 1103 1104
    guest_info->node_cpu = g_malloc0(guest_info->apic_id_limit *
                                     sizeof *guest_info->node_cpu);

    for (i = 0; i < max_cpus; i++) {
        unsigned int apic_id = x86_cpu_apic_id_from_index(i);
        assert(apic_id < guest_info->apic_id_limit);
        for (j = 0; j < nb_numa_nodes; j++) {
1105
            if (test_bit(i, numa_info[j].node_cpu)) {
M
Michael S. Tsirkin 已提交
1106 1107 1108 1109 1110
                guest_info->node_cpu[apic_id] = j;
                break;
            }
        }
    }
1111 1112 1113 1114 1115 1116

    guest_info_state->machine_done.notify = pc_guest_info_machine_done;
    qemu_add_machine_init_done_notifier(&guest_info_state->machine_done);
    return guest_info;
}

1117 1118 1119
/* setup pci memory address space mapping into system address space */
void pc_pci_as_mapping_init(Object *owner, MemoryRegion *system_memory,
                            MemoryRegion *pci_address_space)
1120
{
1121 1122 1123
    /* Set to lower priority than RAM */
    memory_region_add_subregion_overlap(system_memory, 0x0,
                                        pci_address_space, -1);
1124 1125
}

G
Gerd Hoffmann 已提交
1126 1127
void pc_acpi_init(const char *default_dsdt)
{
1128
    char *filename;
G
Gerd Hoffmann 已提交
1129 1130 1131 1132 1133 1134 1135 1136 1137

    if (acpi_tables != NULL) {
        /* manually set via -acpitable, leave it alone */
        return;
    }

    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, default_dsdt);
    if (filename == NULL) {
        fprintf(stderr, "WARNING: failed to find %s\n", default_dsdt);
1138 1139 1140 1141
    } else {
        char *arg;
        QemuOpts *opts;
        Error *err = NULL;
G
Gerd Hoffmann 已提交
1142

1143
        arg = g_strdup_printf("file=%s", filename);
1144

1145 1146 1147
        /* creates a deep copy of "arg" */
        opts = qemu_opts_parse(qemu_find_opts("acpi"), arg, 0);
        g_assert(opts != NULL);
1148

1149
        acpi_table_add_builtin(opts, &err);
1150
        if (err) {
1151 1152
            error_report("WARNING: failed to load %s: %s", filename,
                         error_get_pretty(err));
1153 1154 1155 1156
            error_free(err);
        }
        g_free(arg);
        g_free(filename);
G
Gerd Hoffmann 已提交
1157 1158 1159
    }
}

1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184
FWCfgState *xen_load_linux(const char *kernel_filename,
                           const char *kernel_cmdline,
                           const char *initrd_filename,
                           ram_addr_t below_4g_mem_size,
                           PcGuestInfo *guest_info)
{
    int i;
    FWCfgState *fw_cfg;

    assert(kernel_filename != NULL);

    fw_cfg = fw_cfg_init(BIOS_CFG_IOPORT, BIOS_CFG_IOPORT + 1, 0, 0);
    rom_set_fw(fw_cfg);

    load_linux(fw_cfg, kernel_filename, initrd_filename,
               kernel_cmdline, below_4g_mem_size);
    for (i = 0; i < nb_option_roms; i++) {
        assert(!strcmp(option_rom[i].name, "linuxboot.bin") ||
               !strcmp(option_rom[i].name, "multiboot.bin"));
        rom_add_option(option_rom[i].name, option_rom[i].bootindex);
    }
    guest_info->fw_cfg = fw_cfg;
    return fw_cfg;
}

1185 1186
FWCfgState *pc_memory_init(MachineState *machine,
                           MemoryRegion *system_memory,
L
Laszlo Ersek 已提交
1187 1188 1189
                           ram_addr_t below_4g_mem_size,
                           ram_addr_t above_4g_mem_size,
                           MemoryRegion *rom_memory,
1190 1191
                           MemoryRegion **ram_memory,
                           PcGuestInfo *guest_info)
B
bellard 已提交
1192
{
1193 1194
    int linux_boot, i;
    MemoryRegion *ram, *option_rom_mr;
1195
    MemoryRegion *ram_below_4g, *ram_above_4g;
L
Laszlo Ersek 已提交
1196
    FWCfgState *fw_cfg;
1197
    PCMachineState *pcms = PC_MACHINE(machine);
1198

1199 1200 1201
    assert(machine->ram_size == below_4g_mem_size + above_4g_mem_size);

    linux_boot = (machine->kernel_filename != NULL);
B
bellard 已提交
1202

1203
    /* Allocate RAM.  We allocate it as a single memory region and use
D
Dong Xu Wang 已提交
1204
     * aliases to address portions of it, mostly for backwards compatibility
1205 1206
     * with older qemus that used qemu_ram_alloc().
     */
1207
    ram = g_malloc(sizeof(*ram));
1208 1209
    memory_region_allocate_system_memory(ram, NULL, "pc.ram",
                                         machine->ram_size);
A
Avi Kivity 已提交
1210
    *ram_memory = ram;
1211
    ram_below_4g = g_malloc(sizeof(*ram_below_4g));
1212
    memory_region_init_alias(ram_below_4g, NULL, "ram-below-4g", ram,
1213 1214
                             0, below_4g_mem_size);
    memory_region_add_subregion(system_memory, 0, ram_below_4g);
G
Gerd Hoffmann 已提交
1215
    e820_add_entry(0, below_4g_mem_size, E820_RAM);
1216
    if (above_4g_mem_size > 0) {
1217
        ram_above_4g = g_malloc(sizeof(*ram_above_4g));
1218
        memory_region_init_alias(ram_above_4g, NULL, "ram-above-4g", ram,
1219 1220 1221
                                 below_4g_mem_size, above_4g_mem_size);
        memory_region_add_subregion(system_memory, 0x100000000ULL,
                                    ram_above_4g);
G
Gerd Hoffmann 已提交
1222
        e820_add_entry(0x100000000ULL, above_4g_mem_size, E820_RAM);
1223
    }
1224

1225 1226
    if (!guest_info->has_reserved_memory &&
        (machine->ram_slots ||
1227
         (machine->maxram_size > machine->ram_size))) {
1228 1229 1230 1231 1232 1233 1234
        MachineClass *mc = MACHINE_GET_CLASS(machine);

        error_report("\"-memory 'slots|maxmem'\" is not supported by: %s",
                     mc->name);
        exit(EXIT_FAILURE);
    }

1235
    /* initialize hotplug memory address space */
1236
    if (guest_info->has_reserved_memory &&
1237
        (machine->ram_size < machine->maxram_size)) {
1238
        ram_addr_t hotplug_mem_size =
1239
            machine->maxram_size - machine->ram_size;
1240

1241 1242 1243 1244 1245 1246
        if (machine->ram_slots > ACPI_MAX_RAM_SLOTS) {
            error_report("unsupported amount of memory slots: %"PRIu64,
                         machine->ram_slots);
            exit(EXIT_FAILURE);
        }

1247 1248 1249
        pcms->hotplug_memory_base =
            ROUND_UP(0x100000000ULL + above_4g_mem_size, 1ULL << 30);

1250 1251 1252 1253 1254
        if (pcms->enforce_aligned_dimm) {
            /* size hotplug region assuming 1G page max alignment per slot */
            hotplug_mem_size += (1ULL << 30) * machine->ram_slots;
        }

1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266
        if ((pcms->hotplug_memory_base + hotplug_mem_size) <
            hotplug_mem_size) {
            error_report("unsupported amount of maximum memory: " RAM_ADDR_FMT,
                         machine->maxram_size);
            exit(EXIT_FAILURE);
        }

        memory_region_init(&pcms->hotplug_memory, OBJECT(pcms),
                           "hotplug-memory", hotplug_mem_size);
        memory_region_add_subregion(system_memory, pcms->hotplug_memory_base,
                                    &pcms->hotplug_memory);
    }
1267 1268

    /* Initialize PC system firmware */
1269
    pc_system_firmware_init(rom_memory, guest_info->isapc_ram_fw);
1270

1271
    option_rom_mr = g_malloc(sizeof(*option_rom_mr));
1272 1273
    memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
                           &error_abort);
1274
    vmstate_register_ram_global(option_rom_mr);
1275
    memory_region_add_subregion_overlap(rom_memory,
1276 1277 1278
                                        PC_ROM_MIN_VGA,
                                        option_rom_mr,
                                        1);
1279

A
Alexander Graf 已提交
1280
    fw_cfg = bochs_bios_init();
G
Gerd Hoffmann 已提交
1281
    rom_set_fw(fw_cfg);
A
Alexander Graf 已提交
1282

1283 1284 1285 1286 1287 1288
    if (guest_info->has_reserved_memory && pcms->hotplug_memory_base) {
        uint64_t *val = g_malloc(sizeof(*val));
        *val = cpu_to_le64(ROUND_UP(pcms->hotplug_memory_base, 0x1ULL << 30));
        fw_cfg_add_file(fw_cfg, "etc/reserved-memory-end", val, sizeof(*val));
    }

1289
    if (linux_boot) {
1290 1291
        load_linux(fw_cfg, machine->kernel_filename, machine->initrd_filename,
                   machine->kernel_cmdline, below_4g_mem_size);
1292 1293 1294
    }

    for (i = 0; i < nb_option_roms; i++) {
G
Gleb Natapov 已提交
1295
        rom_add_option(option_rom[i].name, option_rom[i].bootindex);
1296
    }
1297
    guest_info->fw_cfg = fw_cfg;
1298
    return fw_cfg;
1299 1300
}

1301 1302 1303 1304 1305
qemu_irq *pc_allocate_cpu_irq(void)
{
    return qemu_allocate_irqs(pic_irq_request, NULL, 1);
}

1306
DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus)
1307
{
1308 1309
    DeviceState *dev = NULL;

1310 1311 1312 1313 1314
    if (pci_bus) {
        PCIDevice *pcidev = pci_vga_init(pci_bus);
        dev = pcidev ? &pcidev->qdev : NULL;
    } else if (isa_bus) {
        ISADevice *isadev = isa_vga_init(isa_bus);
A
Andreas Färber 已提交
1315
        dev = isadev ? DEVICE(isadev) : NULL;
1316
    }
1317
    return dev;
1318 1319
}

B
Blue Swirl 已提交
1320 1321
static void cpu_request_exit(void *opaque, int irq, int level)
{
1322
    CPUState *cpu = current_cpu;
B
Blue Swirl 已提交
1323

1324 1325
    if (cpu && level) {
        cpu_exit(cpu);
B
Blue Swirl 已提交
1326 1327 1328
    }
}

J
Julien Grall 已提交
1329 1330
static const MemoryRegionOps ioport80_io_ops = {
    .write = ioport80_write,
1331
    .read = ioport80_read,
J
Julien Grall 已提交
1332 1333 1334 1335 1336 1337 1338 1339 1340
    .endianness = DEVICE_NATIVE_ENDIAN,
    .impl = {
        .min_access_size = 1,
        .max_access_size = 1,
    },
};

static const MemoryRegionOps ioportF0_io_ops = {
    .write = ioportF0_write,
1341
    .read = ioportF0_read,
J
Julien Grall 已提交
1342 1343 1344 1345 1346 1347 1348
    .endianness = DEVICE_NATIVE_ENDIAN,
    .impl = {
        .min_access_size = 1,
        .max_access_size = 1,
    },
};

1349
void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi,
1350
                          ISADevice **rtc_state,
K
Kevin Wolf 已提交
1351
                          ISADevice **floppy,
1352 1353
                          bool no_vmport,
                          uint32 hpet_irqs)
1354 1355 1356
{
    int i;
    DriveInfo *fd[MAX_FD];
1357 1358 1359
    DeviceState *hpet = NULL;
    int pit_isa_irq = 0;
    qemu_irq pit_alt_irq = NULL;
1360
    qemu_irq rtc_irq = NULL;
B
Blue Swirl 已提交
1361
    qemu_irq *a20_line;
1362
    ISADevice *i8042, *port92, *vmmouse, *pit = NULL;
B
Blue Swirl 已提交
1363
    qemu_irq *cpu_exit_irq;
J
Julien Grall 已提交
1364 1365
    MemoryRegion *ioport80_io = g_new(MemoryRegion, 1);
    MemoryRegion *ioportF0_io = g_new(MemoryRegion, 1);
1366

1367
    memory_region_init_io(ioport80_io, NULL, &ioport80_io_ops, NULL, "ioport80", 1);
J
Julien Grall 已提交
1368
    memory_region_add_subregion(isa_bus->address_space_io, 0x80, ioport80_io);
1369

1370
    memory_region_init_io(ioportF0_io, NULL, &ioportF0_io_ops, NULL, "ioportF0", 1);
J
Julien Grall 已提交
1371
    memory_region_add_subregion(isa_bus->address_space_io, 0xf0, ioportF0_io);
1372

1373 1374 1375 1376 1377 1378 1379
    /*
     * Check if an HPET shall be created.
     *
     * Without KVM_CAP_PIT_STATE2, we cannot switch off the in-kernel PIT
     * when the HPET wants to take over. Thus we have to disable the latter.
     */
    if (!no_hpet && (!kvm_irqchip_in_kernel() || kvm_has_pit_state2())) {
1380
        /* In order to set property, here not using sysbus_try_create_simple */
M
Michael S. Tsirkin 已提交
1381
        hpet = qdev_try_create(NULL, TYPE_HPET);
B
Blue Swirl 已提交
1382
        if (hpet) {
1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394
            /* For pc-piix-*, hpet's intcap is always IRQ2. For pc-q35-1.7
             * and earlier, use IRQ2 for compat. Otherwise, use IRQ16~23,
             * IRQ8 and IRQ2.
             */
            uint8_t compat = object_property_get_int(OBJECT(hpet),
                    HPET_INTCAP, NULL);
            if (!compat) {
                qdev_prop_set_uint32(hpet, HPET_INTCAP, hpet_irqs);
            }
            qdev_init_nofail(hpet);
            sysbus_mmio_map(SYS_BUS_DEVICE(hpet), 0, HPET_BASE);

J
Jan Kiszka 已提交
1395
            for (i = 0; i < GSI_NUM_PINS; i++) {
1396
                sysbus_connect_irq(SYS_BUS_DEVICE(hpet), i, gsi[i]);
B
Blue Swirl 已提交
1397
            }
1398 1399 1400
            pit_isa_irq = -1;
            pit_alt_irq = qdev_get_gpio_in(hpet, HPET_LEGACY_PIT_INT);
            rtc_irq = qdev_get_gpio_in(hpet, HPET_LEGACY_RTC_INT);
J
Jan Kiszka 已提交
1401
        }
1402
    }
1403
    *rtc_state = rtc_init(isa_bus, 2000, rtc_irq);
1404 1405 1406

    qemu_register_boot_set(pc_boot_set, *rtc_state);

1407 1408 1409 1410 1411 1412 1413 1414
    if (!xen_enabled()) {
        if (kvm_irqchip_in_kernel()) {
            pit = kvm_pit_init(isa_bus, 0x40);
        } else {
            pit = pit_init(isa_bus, 0x40, pit_isa_irq, pit_alt_irq);
        }
        if (hpet) {
            /* connect PIT to output control line of the HPET */
A
Andreas Färber 已提交
1415
            qdev_connect_gpio_out(hpet, 0, qdev_get_gpio_in(DEVICE(pit), 0));
1416 1417
        }
        pcspk_init(isa_bus, pit);
1418
    }
1419 1420 1421

    for(i = 0; i < MAX_SERIAL_PORTS; i++) {
        if (serial_hds[i]) {
1422
            serial_isa_init(isa_bus, i, serial_hds[i]);
1423 1424 1425 1426 1427
        }
    }

    for(i = 0; i < MAX_PARALLEL_PORTS; i++) {
        if (parallel_hds[i]) {
1428
            parallel_init(isa_bus, i, parallel_hds[i]);
1429 1430 1431
        }
    }

1432
    a20_line = qemu_allocate_irqs(handle_a20_line_change, first_cpu, 2);
1433
    i8042 = isa_create_simple(isa_bus, "i8042");
1434
    i8042_setup_a20_line(i8042, &a20_line[0]);
1435
    if (!no_vmport) {
1436 1437
        vmport_init(isa_bus);
        vmmouse = isa_try_create(isa_bus, "vmmouse");
1438 1439 1440
    } else {
        vmmouse = NULL;
    }
B
Blue Swirl 已提交
1441
    if (vmmouse) {
A
Andreas Färber 已提交
1442 1443 1444
        DeviceState *dev = DEVICE(vmmouse);
        qdev_prop_set_ptr(dev, "ps2_mouse", i8042);
        qdev_init_nofail(dev);
B
Blue Swirl 已提交
1445
    }
1446
    port92 = isa_create_simple(isa_bus, "port92");
1447
    port92_init(port92, &a20_line[1]);
B
Blue Swirl 已提交
1448

B
Blue Swirl 已提交
1449 1450
    cpu_exit_irq = qemu_allocate_irqs(cpu_request_exit, NULL, 1);
    DMA_init(0, cpu_exit_irq);
1451 1452 1453 1454

    for(i = 0; i < MAX_FD; i++) {
        fd[i] = drive_get(IF_FLOPPY, 0, i);
    }
1455
    *floppy = fdctrl_init_isa(isa_bus, fd);
1456 1457
}

1458 1459 1460 1461 1462 1463 1464 1465 1466 1467
void pc_nic_init(ISABus *isa_bus, PCIBus *pci_bus)
{
    int i;

    for (i = 0; i < nb_nics; i++) {
        NICInfo *nd = &nd_table[i];

        if (!pci_bus || (nd->model && strcmp(nd->model, "ne2k_isa") == 0)) {
            pc_init_ne2k_isa(isa_bus, nd);
        } else {
1468
            pci_nic_init_nofail(nd, pci_bus, "e1000", NULL);
1469 1470 1471 1472
        }
    }
}

1473
void pc_pci_device_init(PCIBus *pci_bus)
1474 1475 1476 1477 1478 1479 1480 1481 1482
{
    int max_bus;
    int bus;

    max_bus = drive_get_max_bus(IF_SCSI);
    for (bus = 0; bus <= max_bus; bus++) {
        pci_create_simple(pci_bus, -1, "lsi53c895a");
    }
}
1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499

void ioapic_init_gsi(GSIState *gsi_state, const char *parent_name)
{
    DeviceState *dev;
    SysBusDevice *d;
    unsigned int i;

    if (kvm_irqchip_in_kernel()) {
        dev = qdev_create(NULL, "kvm-ioapic");
    } else {
        dev = qdev_create(NULL, "ioapic");
    }
    if (parent_name) {
        object_property_add_child(object_resolve_path(parent_name, NULL),
                                  "ioapic", OBJECT(dev), NULL);
    }
    qdev_init_nofail(dev);
1500
    d = SYS_BUS_DEVICE(dev);
1501
    sysbus_mmio_map(d, 0, IO_APIC_DEFAULT_ADDRESS);
1502 1503 1504 1505 1506

    for (i = 0; i < IOAPIC_NUM_PINS; i++) {
        gsi_state->ioapic_irq[i] = qdev_get_gpio_in(dev, i);
    }
}
1507 1508 1509 1510 1511 1512

static void pc_generic_machine_class_init(ObjectClass *oc, void *data)
{
    MachineClass *mc = MACHINE_CLASS(oc);
    QEMUMachine *qm = data;

1513
    mc->family = qm->family;
1514 1515 1516 1517 1518 1519 1520 1521
    mc->name = qm->name;
    mc->alias = qm->alias;
    mc->desc = qm->desc;
    mc->init = qm->init;
    mc->reset = qm->reset;
    mc->hot_add_cpu = qm->hot_add_cpu;
    mc->kvm_type = qm->kvm_type;
    mc->block_default_type = qm->block_default_type;
1522
    mc->units_per_default_bus = qm->units_per_default_bus;
1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533
    mc->max_cpus = qm->max_cpus;
    mc->no_serial = qm->no_serial;
    mc->no_parallel = qm->no_parallel;
    mc->use_virtcon = qm->use_virtcon;
    mc->use_sclp = qm->use_sclp;
    mc->no_floppy = qm->no_floppy;
    mc->no_cdrom = qm->no_cdrom;
    mc->no_sdcard = qm->no_sdcard;
    mc->is_default = qm->is_default;
    mc->default_machine_opts = qm->default_machine_opts;
    mc->default_boot_order = qm->default_boot_order;
1534
    mc->default_display = qm->default_display;
1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552
    mc->compat_props = qm->compat_props;
    mc->hw_version = qm->hw_version;
}

void qemu_register_pc_machine(QEMUMachine *m)
{
    char *name = g_strconcat(m->name, TYPE_MACHINE_SUFFIX, NULL);
    TypeInfo ti = {
        .name       = name,
        .parent     = TYPE_PC_MACHINE,
        .class_init = pc_generic_machine_class_init,
        .class_data = (void *)m,
    };

    type_register(&ti);
    g_free(name);
}

1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583
static int pc_dimm_count(Object *obj, void *opaque)
{
    int *count = opaque;

    if (object_dynamic_cast(obj, TYPE_PC_DIMM)) {
        (*count)++;
    }

    object_child_foreach(obj, pc_dimm_count, opaque);
    return 0;
}

static int pc_existing_dimms_capacity(Object *obj, void *opaque)
{
    Error *local_err = NULL;
    uint64_t *size = opaque;

    if (object_dynamic_cast(obj, TYPE_PC_DIMM)) {
        (*size) += object_property_get_int(obj, PC_DIMM_SIZE_PROP, &local_err);

        if (local_err) {
            qerror_report_err(local_err);
            error_free(local_err);
            return 1;
        }
    }

    object_child_foreach(obj, pc_dimm_count, opaque);
    return 0;
}

1584 1585 1586
static void pc_dimm_plug(HotplugHandler *hotplug_dev,
                         DeviceState *dev, Error **errp)
{
1587
    int slot;
1588
    HotplugHandlerClass *hhc;
1589 1590
    Error *local_err = NULL;
    PCMachineState *pcms = PC_MACHINE(hotplug_dev);
1591
    MachineState *machine = MACHINE(hotplug_dev);
1592 1593 1594
    PCDIMMDevice *dimm = PC_DIMM(dev);
    PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
    MemoryRegion *mr = ddc->get_memory_region(dimm);
1595
    uint64_t existing_dimms_capacity = 0;
1596
    uint64_t align = TARGET_PAGE_SIZE;
1597 1598 1599
    uint64_t addr;

    addr = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP, &local_err);
1600 1601 1602 1603
    if (local_err) {
        goto out;
    }

1604 1605 1606 1607
    if (memory_region_get_alignment(mr) && pcms->enforce_aligned_dimm) {
        align = memory_region_get_alignment(mr);
    }

1608 1609
    addr = pc_dimm_get_free_addr(pcms->hotplug_memory_base,
                                 memory_region_size(&pcms->hotplug_memory),
1610
                                 !addr ? NULL : &addr, align,
1611 1612 1613 1614
                                 memory_region_size(mr), &local_err);
    if (local_err) {
        goto out;
    }
1615

1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628
    if (pc_existing_dimms_capacity(OBJECT(machine), &existing_dimms_capacity)) {
        error_setg(&local_err, "failed to get total size of existing DIMMs");
        goto out;
    }

    if (existing_dimms_capacity + memory_region_size(mr) >
        machine->maxram_size - machine->ram_size) {
        error_setg(&local_err, "not enough space, currently 0x%" PRIx64
                   " in use of total 0x" RAM_ADDR_FMT,
                   existing_dimms_capacity, machine->maxram_size);
        goto out;
    }

1629
    object_property_set_int(OBJECT(dev), addr, PC_DIMM_ADDR_PROP, &local_err);
1630 1631 1632
    if (local_err) {
        goto out;
    }
1633
    trace_mhp_pc_dimm_assigned_address(addr);
1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648

    slot = object_property_get_int(OBJECT(dev), PC_DIMM_SLOT_PROP, &local_err);
    if (local_err) {
        goto out;
    }

    slot = pc_dimm_get_free_slot(slot == PC_DIMM_UNASSIGNED_SLOT ? NULL : &slot,
                                 machine->ram_slots, &local_err);
    if (local_err) {
        goto out;
    }
    object_property_set_int(OBJECT(dev), slot, PC_DIMM_SLOT_PROP, &local_err);
    if (local_err) {
        goto out;
    }
1649
    trace_mhp_pc_dimm_assigned_slot(slot);
1650

1651 1652 1653 1654 1655 1656
    if (!pcms->acpi_dev) {
        error_setg(&local_err,
                   "memory hotplug is not enabled: missing acpi device");
        goto out;
    }

1657 1658 1659 1660 1661
    if (kvm_enabled() && !kvm_has_free_slot(machine)) {
        error_setg(&local_err, "hypervisor has no free memory slots left");
        goto out;
    }

1662 1663 1664
    memory_region_add_subregion(&pcms->hotplug_memory,
                                addr - pcms->hotplug_memory_base, mr);
    vmstate_register_ram(mr, dev);
1665 1666 1667

    hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev);
    hhc->plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err);
1668 1669 1670 1671
out:
    error_propagate(errp, local_err);
}

1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690
static void pc_cpu_plug(HotplugHandler *hotplug_dev,
                        DeviceState *dev, Error **errp)
{
    HotplugHandlerClass *hhc;
    Error *local_err = NULL;
    PCMachineState *pcms = PC_MACHINE(hotplug_dev);

    if (!dev->hotplugged) {
        goto out;
    }

    if (!pcms->acpi_dev) {
        error_setg(&local_err,
                   "cpu hotplug is not enabled: missing acpi device");
        goto out;
    }

    hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev);
    hhc->plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err);
G
Gu Zheng 已提交
1691 1692 1693 1694 1695 1696
    if (local_err) {
        goto out;
    }

    /* increment the number of CPUs */
    rtc_set_memory(pcms->rtc, 0x5f, rtc_get_memory(pcms->rtc, 0x5f) + 1);
1697 1698 1699 1700
out:
    error_propagate(errp, local_err);
}

1701 1702 1703 1704 1705
static void pc_machine_device_plug_cb(HotplugHandler *hotplug_dev,
                                      DeviceState *dev, Error **errp)
{
    if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
        pc_dimm_plug(hotplug_dev, dev, errp);
1706 1707
    } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
        pc_cpu_plug(hotplug_dev, dev, errp);
1708 1709 1710 1711 1712 1713 1714 1715
    }
}

static HotplugHandler *pc_get_hotpug_handler(MachineState *machine,
                                             DeviceState *dev)
{
    PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(machine);

1716 1717
    if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) ||
        object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
1718 1719 1720 1721 1722 1723 1724
        return HOTPLUG_HANDLER(machine);
    }

    return pcmc->get_hotplug_handler ?
        pcmc->get_hotplug_handler(machine, dev) : NULL;
}

1725 1726 1727 1728 1729 1730 1731 1732 1733 1734
static void
pc_machine_get_hotplug_memory_region_size(Object *obj, Visitor *v, void *opaque,
                                          const char *name, Error **errp)
{
    PCMachineState *pcms = PC_MACHINE(obj);
    int64_t value = memory_region_size(&pcms->hotplug_memory);

    visit_type_int(v, &value, name, errp);
}

1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774
static void pc_machine_get_max_ram_below_4g(Object *obj, Visitor *v,
                                         void *opaque, const char *name,
                                         Error **errp)
{
    PCMachineState *pcms = PC_MACHINE(obj);
    uint64_t value = pcms->max_ram_below_4g;

    visit_type_size(v, &value, name, errp);
}

static void pc_machine_set_max_ram_below_4g(Object *obj, Visitor *v,
                                         void *opaque, const char *name,
                                         Error **errp)
{
    PCMachineState *pcms = PC_MACHINE(obj);
    Error *error = NULL;
    uint64_t value;

    visit_type_size(v, &value, name, &error);
    if (error) {
        error_propagate(errp, error);
        return;
    }
    if (value > (1ULL << 32)) {
        error_set(&error, ERROR_CLASS_GENERIC_ERROR,
                  "Machine option 'max-ram-below-4g=%"PRIu64
                  "' expects size less than or equal to 4G", value);
        error_propagate(errp, error);
        return;
    }

    if (value < (1ULL << 20)) {
        error_report("Warning: small max_ram_below_4g(%"PRIu64
                     ") less than 1M.  BIOS may not work..",
                     value);
    }

    pcms->max_ram_below_4g = value;
}

1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788
static bool pc_machine_get_vmport(Object *obj, Error **errp)
{
    PCMachineState *pcms = PC_MACHINE(obj);

    return pcms->vmport;
}

static void pc_machine_set_vmport(Object *obj, bool value, Error **errp)
{
    PCMachineState *pcms = PC_MACHINE(obj);

    pcms->vmport = value;
}

1789 1790 1791 1792 1793 1794 1795
static bool pc_machine_get_aligned_dimm(Object *obj, Error **errp)
{
    PCMachineState *pcms = PC_MACHINE(obj);

    return pcms->enforce_aligned_dimm;
}

1796 1797
static void pc_machine_initfn(Object *obj)
{
1798 1799
    PCMachineState *pcms = PC_MACHINE(obj);

1800 1801 1802
    object_property_add(obj, PC_MACHINE_MEMHP_REGION_SIZE, "int",
                        pc_machine_get_hotplug_memory_region_size,
                        NULL, NULL, NULL, NULL);
1803 1804 1805 1806 1807
    pcms->max_ram_below_4g = 1ULL << 32; /* 4G */
    object_property_add(obj, PC_MACHINE_MAX_RAM_BELOW_4G, "size",
                        pc_machine_get_max_ram_below_4g,
                        pc_machine_set_max_ram_below_4g,
                        NULL, NULL, NULL);
1808

1809 1810 1811 1812 1813
    pcms->vmport = !xen_enabled();
    object_property_add_bool(obj, PC_MACHINE_VMPORT,
                             pc_machine_get_vmport,
                             pc_machine_set_vmport,
                             NULL);
1814 1815 1816 1817 1818

    pcms->enforce_aligned_dimm = true;
    object_property_add_bool(obj, PC_MACHINE_ENFORCE_ALIGNED_DIMM,
                             pc_machine_get_aligned_dimm,
                             NULL, NULL);
1819 1820
}

1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831
static void pc_machine_class_init(ObjectClass *oc, void *data)
{
    MachineClass *mc = MACHINE_CLASS(oc);
    PCMachineClass *pcmc = PC_MACHINE_CLASS(oc);
    HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);

    pcmc->get_hotplug_handler = mc->get_hotplug_handler;
    mc->get_hotplug_handler = pc_get_hotpug_handler;
    hc->plug = pc_machine_device_plug_cb;
}

1832 1833 1834 1835 1836
static const TypeInfo pc_machine_info = {
    .name = TYPE_PC_MACHINE,
    .parent = TYPE_MACHINE,
    .abstract = true,
    .instance_size = sizeof(PCMachineState),
1837
    .instance_init = pc_machine_initfn,
1838
    .class_size = sizeof(PCMachineClass),
1839 1840 1841 1842 1843
    .class_init = pc_machine_class_init,
    .interfaces = (InterfaceInfo[]) {
         { TYPE_HOTPLUG_HANDLER },
         { }
    },
1844 1845 1846 1847 1848 1849 1850 1851
};

static void pc_machine_register_types(void)
{
    type_register_static(&pc_machine_info);
}

type_init(pc_machine_register_types)