pc.c 54.6 KB
Newer Older
B
bellard 已提交
1 2
/*
 * QEMU PC System Emulator
3
 *
B
bellard 已提交
4
 * Copyright (c) 2003-2004 Fabrice Bellard
5
 *
B
bellard 已提交
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
24
#include "hw/hw.h"
P
Paolo Bonzini 已提交
25 26 27 28
#include "hw/i386/pc.h"
#include "hw/char/serial.h"
#include "hw/i386/apic.h"
#include "hw/block/fdc.h"
29 30
#include "hw/ide.h"
#include "hw/pci/pci.h"
31
#include "monitor/monitor.h"
P
Paolo Bonzini 已提交
32 33 34
#include "hw/nvram/fw_cfg.h"
#include "hw/timer/hpet.h"
#include "hw/i386/smbios.h"
35
#include "hw/loader.h"
B
Blue Swirl 已提交
36
#include "elf.h"
37
#include "multiboot.h"
P
Paolo Bonzini 已提交
38 39 40
#include "hw/timer/mc146818rtc.h"
#include "hw/timer/i8254.h"
#include "hw/audio/pcspk.h"
41 42
#include "hw/pci/msi.h"
#include "hw/sysbus.h"
43 44
#include "sysemu/sysemu.h"
#include "sysemu/kvm.h"
45
#include "kvm_i386.h"
P
Paolo Bonzini 已提交
46
#include "hw/xen/xen.h"
47
#include "sysemu/block-backend.h"
P
Paolo Bonzini 已提交
48
#include "hw/block/block.h"
G
Gerd Hoffmann 已提交
49
#include "ui/qemu-spice.h"
50 51
#include "exec/memory.h"
#include "exec/address-spaces.h"
52
#include "sysemu/arch_init.h"
53
#include "qemu/bitmap.h"
54
#include "qemu/config-file.h"
55
#include "hw/acpi/acpi.h"
56
#include "hw/acpi/cpu_hotplug.h"
57
#include "hw/cpu/icc_bus.h"
58
#include "hw/boards.h"
59
#include "hw/pci/pci_host.h"
60
#include "acpi-build.h"
61
#include "hw/mem/pc-dimm.h"
62
#include "trace.h"
63
#include "qapi/visitor.h"
64
#include "qapi-visit.h"
B
bellard 已提交
65

B
Blue Swirl 已提交
66 67 68 69 70 71 72 73 74 75
/* debug PC/ISA interrupts */
//#define DEBUG_IRQ

#ifdef DEBUG_IRQ
#define DPRINTF(fmt, ...)                                       \
    do { printf("CPUIRQ: " fmt , ## __VA_ARGS__); } while (0)
#else
#define DPRINTF(fmt, ...)
#endif

76 77 78
/* Leave a chunk of memory at the top of RAM for the BIOS ACPI tables
 * (128K) and other BIOS datastructures (less than 4K reported to be used at
 * the moment, 32K should be enough for a while).  */
S
Stefan Weil 已提交
79
static unsigned acpi_data_size = 0x20000 + 0x8000;
80 81 82 83 84
void pc_set_legacy_acpi_data_size(void)
{
    acpi_data_size = 0x10000;
}

85
#define BIOS_CFG_IOPORT 0x510
86
#define FW_CFG_ACPI_TABLES (FW_CFG_ARCH_LOCAL + 0)
87
#define FW_CFG_SMBIOS_ENTRIES (FW_CFG_ARCH_LOCAL + 1)
J
Jes Sorensen 已提交
88
#define FW_CFG_IRQ0_OVERRIDE (FW_CFG_ARCH_LOCAL + 2)
J
Jes Sorensen 已提交
89
#define FW_CFG_E820_TABLE (FW_CFG_ARCH_LOCAL + 3)
90
#define FW_CFG_HPET (FW_CFG_ARCH_LOCAL + 4)
B
bellard 已提交
91

J
Jes Sorensen 已提交
92 93 94 95 96 97
#define E820_NR_ENTRIES		16

struct e820_entry {
    uint64_t address;
    uint64_t length;
    uint32_t type;
98
} QEMU_PACKED __attribute((__aligned__(4)));
J
Jes Sorensen 已提交
99 100 101 102

struct e820_table {
    uint32_t count;
    struct e820_entry entry[E820_NR_ENTRIES];
103
} QEMU_PACKED __attribute((__aligned__(4)));
J
Jes Sorensen 已提交
104

G
Gerd Hoffmann 已提交
105 106 107
static struct e820_table e820_reserve;
static struct e820_entry *e820_table;
static unsigned e820_entries;
B
Blue Swirl 已提交
108
struct hpet_fw_config hpet_cfg = {.count = UINT8_MAX};
J
Jes Sorensen 已提交
109

J
Jan Kiszka 已提交
110
void gsi_handler(void *opaque, int n, int level)
111
{
J
Jan Kiszka 已提交
112
    GSIState *s = opaque;
113

J
Jan Kiszka 已提交
114 115 116
    DPRINTF("pc: %s GSI %d\n", level ? "raising" : "lowering", n);
    if (n < ISA_NUM_IRQS) {
        qemu_set_irq(s->i8259_irq[n], level);
A
Avi Kivity 已提交
117
    }
J
Jan Kiszka 已提交
118
    qemu_set_irq(s->ioapic_irq[n], level);
119
}
120

J
Julien Grall 已提交
121 122
static void ioport80_write(void *opaque, hwaddr addr, uint64_t data,
                           unsigned size)
B
bellard 已提交
123 124 125
{
}

126 127
static uint64_t ioport80_read(void *opaque, hwaddr addr, unsigned size)
{
128
    return 0xffffffffffffffffULL;
129 130
}

131
/* MSDOS compatibility mode FPU exception support */
P
pbrook 已提交
132
static qemu_irq ferr_irq;
133 134 135 136 137 138

void pc_register_ferr_irq(qemu_irq irq)
{
    ferr_irq = irq;
}

139 140 141
/* XXX: add IGNNE support */
void cpu_set_ferr(CPUX86State *s)
{
P
pbrook 已提交
142
    qemu_irq_raise(ferr_irq);
143 144
}

J
Julien Grall 已提交
145 146
static void ioportF0_write(void *opaque, hwaddr addr, uint64_t data,
                           unsigned size)
147
{
P
pbrook 已提交
148
    qemu_irq_lower(ferr_irq);
149 150
}

151 152
static uint64_t ioportF0_read(void *opaque, hwaddr addr, unsigned size)
{
153
    return 0xffffffffffffffffULL;
154 155
}

B
bellard 已提交
156 157 158
/* TSC handling */
uint64_t cpu_get_tsc(CPUX86State *env)
{
159
    return cpu_get_ticks();
B
bellard 已提交
160 161
}

B
bellard 已提交
162
/* SMM support */
163 164 165 166 167 168 169 170 171 172 173 174

static cpu_set_smm_t smm_set;
static void *smm_arg;

void cpu_smm_register(cpu_set_smm_t callback, void *arg)
{
    assert(smm_set == NULL);
    assert(smm_arg == NULL);
    smm_set = callback;
    smm_arg = arg;
}

A
Andreas Färber 已提交
175
void cpu_smm_update(CPUX86State *env)
B
bellard 已提交
176
{
177
    if (smm_set && smm_arg && CPU(x86_env_get_cpu(env)) == first_cpu) {
178
        smm_set(!!(env->hflags & HF_SMM_MASK), smm_arg);
179
    }
B
bellard 已提交
180 181 182
}


B
bellard 已提交
183
/* IRQ handling */
A
Andreas Färber 已提交
184
int cpu_get_pic_interrupt(CPUX86State *env)
B
bellard 已提交
185
{
186
    X86CPU *cpu = x86_env_get_cpu(env);
B
bellard 已提交
187 188
    int intno;

189
    intno = apic_get_interrupt(cpu->apic_state);
B
bellard 已提交
190 191 192 193
    if (intno >= 0) {
        return intno;
    }
    /* read the irq from the PIC */
194
    if (!apic_accept_pic_intr(cpu->apic_state)) {
195
        return -1;
196
    }
197

B
bellard 已提交
198 199 200 201
    intno = pic_read_irq(isa_pic);
    return intno;
}

P
pbrook 已提交
202
static void pic_irq_request(void *opaque, int irq, int level)
B
bellard 已提交
203
{
204 205
    CPUState *cs = first_cpu;
    X86CPU *cpu = X86_CPU(cs);
206

B
Blue Swirl 已提交
207
    DPRINTF("pic_irqs: %s irq %d\n", level? "raise" : "lower", irq);
208
    if (cpu->apic_state) {
A
Andreas Färber 已提交
209
        CPU_FOREACH(cs) {
210
            cpu = X86_CPU(cs);
211 212
            if (apic_accept_pic_intr(cpu->apic_state)) {
                apic_deliver_pic_intr(cpu->apic_state, level);
213
            }
A
aurel32 已提交
214 215
        }
    } else {
216
        if (level) {
217
            cpu_interrupt(cs, CPU_INTERRUPT_HARD);
218 219 220
        } else {
            cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD);
        }
221
    }
B
bellard 已提交
222 223
}

B
bellard 已提交
224 225
/* PC cmos mappings */

B
bellard 已提交
226 227
#define REG_EQUIPMENT_BYTE          0x14

228
static int cmos_get_fd_drive_type(FDriveType fd0)
229 230 231 232
{
    int val;

    switch (fd0) {
233
    case FDRIVE_DRV_144:
234 235 236
        /* 1.44 Mb 3"5 drive */
        val = 4;
        break;
237
    case FDRIVE_DRV_288:
238 239 240
        /* 2.88 Mb 3"5 drive */
        val = 5;
        break;
241
    case FDRIVE_DRV_120:
242 243 244
        /* 1.2 Mb 5"5 drive */
        val = 2;
        break;
245
    case FDRIVE_DRV_NONE:
246 247 248 249 250 251 252
    default:
        val = 0;
        break;
    }
    return val;
}

253 254
static void cmos_init_hd(ISADevice *s, int type_ofs, int info_ofs,
                         int16_t cylinders, int8_t heads, int8_t sectors)
B
bellard 已提交
255 256 257 258 259 260 261 262 263 264 265 266 267
{
    rtc_set_memory(s, type_ofs, 47);
    rtc_set_memory(s, info_ofs, cylinders);
    rtc_set_memory(s, info_ofs + 1, cylinders >> 8);
    rtc_set_memory(s, info_ofs + 2, heads);
    rtc_set_memory(s, info_ofs + 3, 0xff);
    rtc_set_memory(s, info_ofs + 4, 0xff);
    rtc_set_memory(s, info_ofs + 5, 0xc0 | ((heads > 8) << 3));
    rtc_set_memory(s, info_ofs + 6, cylinders);
    rtc_set_memory(s, info_ofs + 7, cylinders >> 8);
    rtc_set_memory(s, info_ofs + 8, sectors);
}

268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284
/* convert boot_device letter to something recognizable by the bios */
static int boot_device2nibble(char boot_device)
{
    switch(boot_device) {
    case 'a':
    case 'b':
        return 0x01; /* floppy boot */
    case 'c':
        return 0x02; /* hard drive boot */
    case 'd':
        return 0x03; /* CD-ROM boot */
    case 'n':
        return 0x04; /* Network boot */
    }
    return 0;
}

285
static void set_boot_dev(ISADevice *s, const char *boot_device, Error **errp)
286 287 288 289 290 291 292
{
#define PC_MAX_BOOT_DEVICES 3
    int nbds, bds[3] = { 0, };
    int i;

    nbds = strlen(boot_device);
    if (nbds > PC_MAX_BOOT_DEVICES) {
293 294
        error_setg(errp, "Too many boot devices for PC");
        return;
295 296 297 298
    }
    for (i = 0; i < nbds; i++) {
        bds[i] = boot_device2nibble(boot_device[i]);
        if (bds[i] == 0) {
299 300 301
            error_setg(errp, "Invalid boot device for PC: '%c'",
                       boot_device[i]);
            return;
302 303 304
        }
    }
    rtc_set_memory(s, 0x3d, (bds[1] << 4) | bds[0]);
305
    rtc_set_memory(s, 0x38, (bds[2] << 4) | (fd_bootchk ? 0x0 : 0x1));
306 307
}

308
static void pc_boot_set(void *opaque, const char *boot_device, Error **errp)
309
{
310
    set_boot_dev(opaque, boot_device, errp);
311 312
}

313 314
typedef struct pc_cmos_init_late_arg {
    ISADevice *rtc_state;
315
    BusState *idebus[2];
316 317 318 319 320 321
} pc_cmos_init_late_arg;

static void pc_cmos_init_late(void *opaque)
{
    pc_cmos_init_late_arg *arg = opaque;
    ISADevice *s = arg->rtc_state;
322 323
    int16_t cylinders;
    int8_t heads, sectors;
324
    int val;
325
    int i, trans;
326

327 328 329 330 331 332 333 334 335 336 337 338
    val = 0;
    if (ide_get_geometry(arg->idebus[0], 0,
                         &cylinders, &heads, &sectors) >= 0) {
        cmos_init_hd(s, 0x19, 0x1b, cylinders, heads, sectors);
        val |= 0xf0;
    }
    if (ide_get_geometry(arg->idebus[0], 1,
                         &cylinders, &heads, &sectors) >= 0) {
        cmos_init_hd(s, 0x1a, 0x24, cylinders, heads, sectors);
        val |= 0x0f;
    }
    rtc_set_memory(s, 0x12, val);
339 340 341

    val = 0;
    for (i = 0; i < 4; i++) {
342 343 344 345 346 347
        /* NOTE: ide_get_geometry() returns the physical
           geometry.  It is always such that: 1 <= sects <= 63, 1
           <= heads <= 16, 1 <= cylinders <= 16383. The BIOS
           geometry can be different if a translation is done. */
        if (ide_get_geometry(arg->idebus[i / 2], i % 2,
                             &cylinders, &heads, &sectors) >= 0) {
348 349 350
            trans = ide_get_bios_chs_trans(arg->idebus[i / 2], i % 2) - 1;
            assert((trans & ~3) == 0);
            val |= trans << (i * 2);
351 352 353 354 355 356 357
        }
    }
    rtc_set_memory(s, 0x39, val);

    qemu_unregister_reset(pc_cmos_init_late, opaque);
}

358
void pc_cmos_init(ram_addr_t ram_size, ram_addr_t above_4g_mem_size,
G
Gu Zheng 已提交
359
                  const char *boot_device, MachineState *machine,
K
Kevin Wolf 已提交
360
                  ISADevice *floppy, BusState *idebus0, BusState *idebus1,
B
Blue Swirl 已提交
361
                  ISADevice *s)
B
bellard 已提交
362
{
363
    int val, nb, i;
364
    FDriveType fd_type[2] = { FDRIVE_DRV_NONE, FDRIVE_DRV_NONE };
365
    static pc_cmos_init_late_arg arg;
G
Gu Zheng 已提交
366
    PCMachineState *pc_machine = PC_MACHINE(machine);
367
    Error *local_err = NULL;
B
bellard 已提交
368 369

    /* various important CMOS locations needed by PC/Bochs bios */
B
bellard 已提交
370 371

    /* memory size */
372 373
    /* base memory (first MiB) */
    val = MIN(ram_size / 1024, 640);
B
bellard 已提交
374 375
    rtc_set_memory(s, 0x15, val);
    rtc_set_memory(s, 0x16, val >> 8);
376 377 378 379 380 381
    /* extended memory (next 64MiB) */
    if (ram_size > 1024 * 1024) {
        val = (ram_size - 1024 * 1024) / 1024;
    } else {
        val = 0;
    }
B
bellard 已提交
382 383
    if (val > 65535)
        val = 65535;
B
bellard 已提交
384 385 386 387
    rtc_set_memory(s, 0x17, val);
    rtc_set_memory(s, 0x18, val >> 8);
    rtc_set_memory(s, 0x30, val);
    rtc_set_memory(s, 0x31, val >> 8);
388 389 390 391
    /* memory between 16MiB and 4GiB */
    if (ram_size > 16 * 1024 * 1024) {
        val = (ram_size - 16 * 1024 * 1024) / 65536;
    } else {
B
bellard 已提交
392
        val = 0;
393
    }
B
bellard 已提交
394 395
    if (val > 65535)
        val = 65535;
B
bellard 已提交
396 397
    rtc_set_memory(s, 0x34, val);
    rtc_set_memory(s, 0x35, val >> 8);
398 399 400 401 402
    /* memory above 4GiB */
    val = above_4g_mem_size / 65536;
    rtc_set_memory(s, 0x5b, val);
    rtc_set_memory(s, 0x5c, val >> 8);
    rtc_set_memory(s, 0x5d, val >> 16);
403

A
aurel32 已提交
404 405
    /* set the number of CPU */
    rtc_set_memory(s, 0x5f, smp_cpus - 1);
G
Gu Zheng 已提交
406 407 408 409 410 411 412 413

    object_property_add_link(OBJECT(machine), "rtc_state",
                             TYPE_ISA_DEVICE,
                             (Object **)&pc_machine->rtc,
                             object_property_allow_set_link,
                             OBJ_PROP_LINK_UNREF_ON_RELEASE, &error_abort);
    object_property_set_link(OBJECT(machine), OBJECT(s),
                             "rtc_state", &error_abort);
A
aurel32 已提交
414

415 416 417
    set_boot_dev(s, boot_device, &local_err);
    if (local_err) {
        error_report("%s", error_get_pretty(local_err));
418 419
        exit(1);
    }
B
bellard 已提交
420

B
bellard 已提交
421
    /* floppy type */
K
Kevin Wolf 已提交
422 423
    if (floppy) {
        for (i = 0; i < 2; i++) {
424
            fd_type[i] = isa_fdc_get_drive_type(floppy, i);
B
Blue Swirl 已提交
425 426 427 428
        }
    }
    val = (cmos_get_fd_drive_type(fd_type[0]) << 4) |
        cmos_get_fd_drive_type(fd_type[1]);
B
bellard 已提交
429
    rtc_set_memory(s, 0x10, val);
430

B
bellard 已提交
431
    val = 0;
B
bellard 已提交
432
    nb = 0;
B
Blue Swirl 已提交
433
    if (fd_type[0] < FDRIVE_DRV_NONE) {
B
bellard 已提交
434
        nb++;
435
    }
B
Blue Swirl 已提交
436
    if (fd_type[1] < FDRIVE_DRV_NONE) {
B
bellard 已提交
437
        nb++;
438
    }
B
bellard 已提交
439 440 441 442
    switch (nb) {
    case 0:
        break;
    case 1:
B
bellard 已提交
443
        val |= 0x01; /* 1 drive, ready for boot */
B
bellard 已提交
444 445
        break;
    case 2:
B
bellard 已提交
446
        val |= 0x41; /* 2 drives, ready for boot */
B
bellard 已提交
447 448
        break;
    }
B
bellard 已提交
449 450 451 452
    val |= 0x02; /* FPU is there */
    val |= 0x04; /* PS/2 mouse installed */
    rtc_set_memory(s, REG_EQUIPMENT_BYTE, val);

B
bellard 已提交
453
    /* hard drives */
454
    arg.rtc_state = s;
455 456
    arg.idebus[0] = idebus0;
    arg.idebus[1] = idebus1;
457
    qemu_register_reset(pc_cmos_init_late, &arg);
B
bellard 已提交
458 459
}

A
Andreas Färber 已提交
460 461 462
#define TYPE_PORT92 "port92"
#define PORT92(obj) OBJECT_CHECK(Port92State, (obj), TYPE_PORT92)

463 464
/* port 92 stuff: could be split off */
typedef struct Port92State {
A
Andreas Färber 已提交
465 466
    ISADevice parent_obj;

467
    MemoryRegion io;
468 469 470 471
    uint8_t outport;
    qemu_irq *a20_out;
} Port92State;

472 473
static void port92_write(void *opaque, hwaddr addr, uint64_t val,
                         unsigned size)
474 475
{
    Port92State *s = opaque;
476
    int oldval = s->outport;
477

G
Gonglei 已提交
478
    DPRINTF("port92: write 0x%02" PRIx64 "\n", val);
479 480
    s->outport = val;
    qemu_set_irq(*s->a20_out, (val >> 1) & 1);
481
    if ((val & 1) && !(oldval & 1)) {
482 483 484 485
        qemu_system_reset_request();
    }
}

486 487
static uint64_t port92_read(void *opaque, hwaddr addr,
                            unsigned size)
488 489 490 491 492 493 494 495 496 497 498
{
    Port92State *s = opaque;
    uint32_t ret;

    ret = s->outport;
    DPRINTF("port92: read 0x%02x\n", ret);
    return ret;
}

static void port92_init(ISADevice *dev, qemu_irq *a20_out)
{
A
Andreas Färber 已提交
499
    Port92State *s = PORT92(dev);
500 501 502 503 504 505 506 507

    s->a20_out = a20_out;
}

static const VMStateDescription vmstate_port92_isa = {
    .name = "port92",
    .version_id = 1,
    .minimum_version_id = 1,
508
    .fields = (VMStateField[]) {
509 510 511 512 513 514 515
        VMSTATE_UINT8(outport, Port92State),
        VMSTATE_END_OF_LIST()
    }
};

static void port92_reset(DeviceState *d)
{
A
Andreas Färber 已提交
516
    Port92State *s = PORT92(d);
517 518 519 520

    s->outport &= ~1;
}

521
static const MemoryRegionOps port92_ops = {
522 523 524 525 526 527 528
    .read = port92_read,
    .write = port92_write,
    .impl = {
        .min_access_size = 1,
        .max_access_size = 1,
    },
    .endianness = DEVICE_LITTLE_ENDIAN,
529 530
};

531
static void port92_initfn(Object *obj)
532
{
533
    Port92State *s = PORT92(obj);
534

535
    memory_region_init_io(&s->io, OBJECT(s), &port92_ops, s, "port92", 1);
536

537
    s->outport = 0;
538 539 540 541 542 543 544 545
}

static void port92_realizefn(DeviceState *dev, Error **errp)
{
    ISADevice *isadev = ISA_DEVICE(dev);
    Port92State *s = PORT92(dev);

    isa_register_ioport(isadev, &s->io, 0x92);
546 547
}

548 549
static void port92_class_initfn(ObjectClass *klass, void *data)
{
550
    DeviceClass *dc = DEVICE_CLASS(klass);
551 552

    dc->realize = port92_realizefn;
553 554
    dc->reset = port92_reset;
    dc->vmsd = &vmstate_port92_isa;
555 556 557 558 559 560
    /*
     * Reason: unlike ordinary ISA devices, this one needs additional
     * wiring: its A20 output line needs to be wired up by
     * port92_init().
     */
    dc->cannot_instantiate_with_device_add_yet = true;
561 562
}

563
static const TypeInfo port92_info = {
A
Andreas Färber 已提交
564
    .name          = TYPE_PORT92,
565 566
    .parent        = TYPE_ISA_DEVICE,
    .instance_size = sizeof(Port92State),
567
    .instance_init = port92_initfn,
568
    .class_init    = port92_class_initfn,
569 570
};

A
Andreas Färber 已提交
571
static void port92_register_types(void)
572
{
573
    type_register_static(&port92_info);
574
}
A
Andreas Färber 已提交
575 576

type_init(port92_register_types)
577

B
Blue Swirl 已提交
578
static void handle_a20_line_change(void *opaque, int irq, int level)
579
{
580
    X86CPU *cpu = opaque;
B
bellard 已提交
581

B
Blue Swirl 已提交
582
    /* XXX: send to all CPUs ? */
583
    /* XXX: add logic to handle multiple A20 line sources */
584
    x86_cpu_set_a20(cpu, level);
B
bellard 已提交
585 586
}

J
Jes Sorensen 已提交
587 588
int e820_add_entry(uint64_t address, uint64_t length, uint32_t type)
{
G
Gerd Hoffmann 已提交
589
    int index = le32_to_cpu(e820_reserve.count);
J
Jes Sorensen 已提交
590 591
    struct e820_entry *entry;

G
Gerd Hoffmann 已提交
592 593 594 595 596 597 598 599 600 601 602 603 604
    if (type != E820_RAM) {
        /* old FW_CFG_E820_TABLE entry -- reservations only */
        if (index >= E820_NR_ENTRIES) {
            return -EBUSY;
        }
        entry = &e820_reserve.entry[index++];

        entry->address = cpu_to_le64(address);
        entry->length = cpu_to_le64(length);
        entry->type = cpu_to_le32(type);

        e820_reserve.count = cpu_to_le32(index);
    }
J
Jes Sorensen 已提交
605

G
Gerd Hoffmann 已提交
606
    /* new "etc/e820" file -- include ram too */
607
    e820_table = g_renew(struct e820_entry, e820_table, e820_entries + 1);
G
Gerd Hoffmann 已提交
608 609 610 611
    e820_table[e820_entries].address = cpu_to_le64(address);
    e820_table[e820_entries].length = cpu_to_le64(length);
    e820_table[e820_entries].type = cpu_to_le32(type);
    e820_entries++;
J
Jes Sorensen 已提交
612

G
Gerd Hoffmann 已提交
613
    return e820_entries;
J
Jes Sorensen 已提交
614 615
}

616 617 618 619 620 621 622 623 624 625 626 627 628 629 630
int e820_get_num_entries(void)
{
    return e820_entries;
}

bool e820_get_entry(int idx, uint32_t type, uint64_t *address, uint64_t *length)
{
    if (idx < e820_entries && e820_table[idx].type == cpu_to_le32(type)) {
        *address = le64_to_cpu(e820_table[idx].address);
        *length = le64_to_cpu(e820_table[idx].length);
        return true;
    }
    return false;
}

631 632 633 634 635 636 637 638 639 640 641 642
/* Calculates the limit to CPU APIC ID values
 *
 * This function returns the limit for the APIC ID value, so that all
 * CPU APIC IDs are < pc_apic_id_limit().
 *
 * This is used for FW_CFG_MAX_CPUS. See comments on bochs_bios_init().
 */
static unsigned int pc_apic_id_limit(unsigned int max_cpus)
{
    return x86_cpu_apic_id_from_index(max_cpus - 1) + 1;
}

L
Laszlo Ersek 已提交
643
static FWCfgState *bochs_bios_init(void)
B
bellard 已提交
644
{
L
Laszlo Ersek 已提交
645
    FWCfgState *fw_cfg;
646 647
    uint8_t *smbios_tables, *smbios_anchor;
    size_t smbios_tables_len, smbios_anchor_len;
648 649
    uint64_t *numa_fw_cfg;
    int i, j;
650
    unsigned int apic_id_limit = pc_apic_id_limit(max_cpus);
651

652
    fw_cfg = fw_cfg_init_io(BIOS_CFG_IOPORT);
653 654 655 656 657 658 659 660 661 662 663 664 665 666 667
    /* FW_CFG_MAX_CPUS is a bit confusing/problematic on x86:
     *
     * SeaBIOS needs FW_CFG_MAX_CPUS for CPU hotplug, but the CPU hotplug
     * QEMU<->SeaBIOS interface is not based on the "CPU index", but on the APIC
     * ID of hotplugged CPUs[1]. This means that FW_CFG_MAX_CPUS is not the
     * "maximum number of CPUs", but the "limit to the APIC ID values SeaBIOS
     * may see".
     *
     * So, this means we must not use max_cpus, here, but the maximum possible
     * APIC ID value, plus one.
     *
     * [1] The only kind of "CPU identifier" used between SeaBIOS and QEMU is
     *     the APIC ID, not the "CPU index"
     */
    fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)apic_id_limit);
668
    fw_cfg_add_i32(fw_cfg, FW_CFG_ID, 1);
669
    fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size);
670 671
    fw_cfg_add_bytes(fw_cfg, FW_CFG_ACPI_TABLES,
                     acpi_tables, acpi_tables_len);
672
    fw_cfg_add_i32(fw_cfg, FW_CFG_IRQ0_OVERRIDE, kvm_allows_irq0_override());
673

674 675
    smbios_tables = smbios_get_table_legacy(&smbios_tables_len);
    if (smbios_tables) {
676
        fw_cfg_add_bytes(fw_cfg, FW_CFG_SMBIOS_ENTRIES,
677 678 679 680 681 682 683 684 685 686 687 688
                         smbios_tables, smbios_tables_len);
    }

    smbios_get_tables(&smbios_tables, &smbios_tables_len,
                      &smbios_anchor, &smbios_anchor_len);
    if (smbios_anchor) {
        fw_cfg_add_file(fw_cfg, "etc/smbios/smbios-tables",
                        smbios_tables, smbios_tables_len);
        fw_cfg_add_file(fw_cfg, "etc/smbios/smbios-anchor",
                        smbios_anchor, smbios_anchor_len);
    }

689
    fw_cfg_add_bytes(fw_cfg, FW_CFG_E820_TABLE,
G
Gerd Hoffmann 已提交
690 691 692
                     &e820_reserve, sizeof(e820_reserve));
    fw_cfg_add_file(fw_cfg, "etc/e820", e820_table,
                    sizeof(struct e820_entry) * e820_entries);
693

694
    fw_cfg_add_bytes(fw_cfg, FW_CFG_HPET, &hpet_cfg, sizeof(hpet_cfg));
695 696 697 698
    /* allocate memory for the NUMA channel: one (64bit) word for the number
     * of nodes, one word for each VCPU->node and one word for each node to
     * hold the amount of memory.
     */
699
    numa_fw_cfg = g_new0(uint64_t, 1 + apic_id_limit + nb_numa_nodes);
700
    numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes);
701
    for (i = 0; i < max_cpus; i++) {
702 703
        unsigned int apic_id = x86_cpu_apic_id_from_index(i);
        assert(apic_id < apic_id_limit);
704
        for (j = 0; j < nb_numa_nodes; j++) {
705
            if (test_bit(i, numa_info[j].node_cpu)) {
706
                numa_fw_cfg[apic_id + 1] = cpu_to_le64(j);
707 708 709 710 711
                break;
            }
        }
    }
    for (i = 0; i < nb_numa_nodes; i++) {
712
        numa_fw_cfg[apic_id_limit + 1 + i] = cpu_to_le64(numa_info[i].node_mem);
713
    }
714
    fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, numa_fw_cfg,
715 716
                     (1 + apic_id_limit + nb_numa_nodes) *
                     sizeof(*numa_fw_cfg));
A
Alexander Graf 已提交
717 718

    return fw_cfg;
B
bellard 已提交
719 720
}

T
ths 已提交
721 722 723 724 725 726 727 728 729 730 731 732 733 734
static long get_file_size(FILE *f)
{
    long where, size;

    /* XXX: on Unix systems, using fstat() probably makes more sense */

    where = ftell(f);
    fseek(f, 0, SEEK_END);
    size = ftell(f);
    fseek(f, where, SEEK_SET);

    return size;
}

L
Laszlo Ersek 已提交
735
static void load_linux(FWCfgState *fw_cfg,
736
                       const char *kernel_filename,
L
liguang 已提交
737 738
                       const char *initrd_filename,
                       const char *kernel_cmdline,
A
Avi Kivity 已提交
739
                       hwaddr max_ram_size)
T
ths 已提交
740 741
{
    uint16_t protocol;
P
Paul Brook 已提交
742
    int setup_size, kernel_size, initrd_size = 0, cmdline_size;
T
ths 已提交
743
    uint32_t initrd_max;
744
    uint8_t header[8192], *setup, *kernel, *initrd_data;
A
Avi Kivity 已提交
745
    hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0;
746
    FILE *f;
P
Pascal Terjan 已提交
747
    char *vmode;
T
ths 已提交
748 749 750 751 752 753 754

    /* Align to 16 bytes as a paranoia measure */
    cmdline_size = (strlen(kernel_cmdline)+16) & ~15;

    /* load the kernel header */
    f = fopen(kernel_filename, "rb");
    if (!f || !(kernel_size = get_file_size(f)) ||
L
liguang 已提交
755 756 757 758 759
        fread(header, 1, MIN(ARRAY_SIZE(header), kernel_size), f) !=
        MIN(ARRAY_SIZE(header), kernel_size)) {
        fprintf(stderr, "qemu: could not load kernel '%s': %s\n",
                kernel_filename, strerror(errno));
        exit(1);
T
ths 已提交
760 761 762
    }

    /* kernel protocol version */
B
bellard 已提交
763
#if 0
T
ths 已提交
764
    fprintf(stderr, "header magic: %#x\n", ldl_p(header+0x202));
B
bellard 已提交
765
#endif
L
liguang 已提交
766 767 768 769 770
    if (ldl_p(header+0x202) == 0x53726448) {
        protocol = lduw_p(header+0x206);
    } else {
        /* This looks like a multiboot kernel. If it is, let's stop
           treating it like a Linux kernel. */
771
        if (load_multiboot(fw_cfg, f, kernel_filename, initrd_filename,
L
liguang 已提交
772
                           kernel_cmdline, kernel_size, header)) {
B
Blue Swirl 已提交
773
            return;
L
liguang 已提交
774 775
        }
        protocol = 0;
A
Alexander Graf 已提交
776
    }
T
ths 已提交
777 778

    if (protocol < 0x200 || !(header[0x211] & 0x01)) {
L
liguang 已提交
779 780 781 782
        /* Low kernel */
        real_addr    = 0x90000;
        cmdline_addr = 0x9a000 - cmdline_size;
        prot_addr    = 0x10000;
T
ths 已提交
783
    } else if (protocol < 0x202) {
L
liguang 已提交
784 785 786 787
        /* High but ancient kernel */
        real_addr    = 0x90000;
        cmdline_addr = 0x9a000 - cmdline_size;
        prot_addr    = 0x100000;
T
ths 已提交
788
    } else {
L
liguang 已提交
789 790 791 792
        /* High and recent kernel */
        real_addr    = 0x10000;
        cmdline_addr = 0x20000;
        prot_addr    = 0x100000;
T
ths 已提交
793 794
    }

B
bellard 已提交
795
#if 0
T
ths 已提交
796
    fprintf(stderr,
L
liguang 已提交
797 798 799 800 801 802
            "qemu: real_addr     = 0x" TARGET_FMT_plx "\n"
            "qemu: cmdline_addr  = 0x" TARGET_FMT_plx "\n"
            "qemu: prot_addr     = 0x" TARGET_FMT_plx "\n",
            real_addr,
            cmdline_addr,
            prot_addr);
B
bellard 已提交
803
#endif
T
ths 已提交
804 805

    /* highest address for loading the initrd */
L
liguang 已提交
806 807 808 809 810
    if (protocol >= 0x203) {
        initrd_max = ldl_p(header+0x22c);
    } else {
        initrd_max = 0x37ffffff;
    }
T
ths 已提交
811

812 813 814
    if (initrd_max >= max_ram_size - acpi_data_size) {
        initrd_max = max_ram_size - acpi_data_size - 1;
    }
T
ths 已提交
815

816 817
    fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr);
    fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(kernel_cmdline)+1);
818
    fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline);
T
ths 已提交
819 820

    if (protocol >= 0x202) {
L
liguang 已提交
821
        stl_p(header+0x228, cmdline_addr);
T
ths 已提交
822
    } else {
L
liguang 已提交
823 824
        stw_p(header+0x20, 0xA33F);
        stw_p(header+0x22, cmdline_addr-real_addr);
T
ths 已提交
825 826
    }

P
Pascal Terjan 已提交
827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844
    /* handle vga= parameter */
    vmode = strstr(kernel_cmdline, "vga=");
    if (vmode) {
        unsigned int video_mode;
        /* skip "vga=" */
        vmode += 4;
        if (!strncmp(vmode, "normal", 6)) {
            video_mode = 0xffff;
        } else if (!strncmp(vmode, "ext", 3)) {
            video_mode = 0xfffe;
        } else if (!strncmp(vmode, "ask", 3)) {
            video_mode = 0xfffd;
        } else {
            video_mode = strtol(vmode, NULL, 0);
        }
        stw_p(header+0x1fa, video_mode);
    }

T
ths 已提交
845
    /* loader type */
S
Stefan Weil 已提交
846
    /* High nybble = B reserved for QEMU; low nybble is revision number.
T
ths 已提交
847 848
       If this code is substantially changed, you may want to consider
       incrementing the revision. */
L
liguang 已提交
849 850 851
    if (protocol >= 0x200) {
        header[0x210] = 0xB0;
    }
T
ths 已提交
852 853
    /* heap */
    if (protocol >= 0x201) {
L
liguang 已提交
854 855
        header[0x211] |= 0x80;	/* CAN_USE_HEAP */
        stw_p(header+0x224, cmdline_addr-real_addr-0x200);
T
ths 已提交
856 857 858 859
    }

    /* load initrd */
    if (initrd_filename) {
L
liguang 已提交
860 861 862 863
        if (protocol < 0x200) {
            fprintf(stderr, "qemu: linux kernel too old to load a ram disk\n");
            exit(1);
        }
T
ths 已提交
864

L
liguang 已提交
865
        initrd_size = get_image_size(initrd_filename);
M
M. Mohan Kumar 已提交
866
        if (initrd_size < 0) {
867 868
            fprintf(stderr, "qemu: error reading initrd %s: %s\n",
                    initrd_filename, strerror(errno));
M
M. Mohan Kumar 已提交
869 870 871
            exit(1);
        }

872
        initrd_addr = (initrd_max-initrd_size) & ~4095;
873

874
        initrd_data = g_malloc(initrd_size);
875 876 877 878 879
        load_image(initrd_filename, initrd_data);

        fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr);
        fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size);
        fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, initrd_size);
T
ths 已提交
880

L
liguang 已提交
881 882
        stl_p(header+0x218, initrd_addr);
        stl_p(header+0x21c, initrd_size);
T
ths 已提交
883 884
    }

885
    /* load kernel and setup */
T
ths 已提交
886
    setup_size = header[0x1f1];
L
liguang 已提交
887 888 889
    if (setup_size == 0) {
        setup_size = 4;
    }
T
ths 已提交
890
    setup_size = (setup_size+1)*512;
891
    kernel_size -= setup_size;
T
ths 已提交
892

893 894
    setup  = g_malloc(setup_size);
    kernel = g_malloc(kernel_size);
895
    fseek(f, 0, SEEK_SET);
896 897 898 899 900 901 902 903
    if (fread(setup, 1, setup_size, f) != setup_size) {
        fprintf(stderr, "fread() failed\n");
        exit(1);
    }
    if (fread(kernel, 1, kernel_size, f) != kernel_size) {
        fprintf(stderr, "fread() failed\n");
        exit(1);
    }
T
ths 已提交
904
    fclose(f);
905
    memcpy(setup, header, MIN(sizeof(header), setup_size));
906 907 908 909 910 911 912 913 914

    fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr);
    fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size);
    fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size);

    fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr);
    fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size);
    fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size);

G
Gleb Natapov 已提交
915 916
    option_rom[nb_option_roms].name = "linuxboot.bin";
    option_rom[nb_option_roms].bootindex = 0;
917
    nb_option_roms++;
T
ths 已提交
918 919
}

B
bellard 已提交
920 921
#define NE2000_NB_MAX 6

B
Blue Swirl 已提交
922 923 924
static const int ne2000_io[NE2000_NB_MAX] = { 0x300, 0x320, 0x340, 0x360,
                                              0x280, 0x380 };
static const int ne2000_irq[NE2000_NB_MAX] = { 9, 10, 11, 3, 4, 5 };
B
bellard 已提交
925

926
void pc_init_ne2k_isa(ISABus *bus, NICInfo *nd)
927 928 929 930 931
{
    static int nb_ne2k = 0;

    if (nb_ne2k == NE2000_NB_MAX)
        return;
932
    isa_ne2000_init(bus, ne2000_io[nb_ne2k],
G
Gerd Hoffmann 已提交
933
                    ne2000_irq[nb_ne2k], nd);
934 935 936
    nb_ne2k++;
}

B
Blue Swirl 已提交
937
DeviceState *cpu_get_current_apic(void)
938
{
939 940
    if (current_cpu) {
        X86CPU *cpu = X86_CPU(current_cpu);
941
        return cpu->apic_state;
942 943 944 945 946
    } else {
        return NULL;
    }
}

947
void pc_acpi_smi_interrupt(void *opaque, int irq, int level)
B
Blue Swirl 已提交
948
{
949
    X86CPU *cpu = opaque;
B
Blue Swirl 已提交
950 951

    if (level) {
952
        cpu_interrupt(CPU(cpu), CPU_INTERRUPT_SMI);
B
Blue Swirl 已提交
953 954 955
    }
}

956 957
static X86CPU *pc_new_cpu(const char *cpu_model, int64_t apic_id,
                          DeviceState *icc_bridge, Error **errp)
958 959 960 961
{
    X86CPU *cpu;
    Error *local_err = NULL;

962 963 964 965
    cpu = cpu_x86_create(cpu_model, icc_bridge, &local_err);
    if (local_err != NULL) {
        error_propagate(errp, local_err);
        return NULL;
966 967 968 969 970 971 972
    }

    object_property_set_int(OBJECT(cpu), apic_id, "apic-id", &local_err);
    object_property_set_bool(OBJECT(cpu), true, "realized", &local_err);

    if (local_err) {
        error_propagate(errp, local_err);
973 974
        object_unref(OBJECT(cpu));
        cpu = NULL;
975 976 977 978
    }
    return cpu;
}

979 980 981 982 983 984 985
static const char *current_cpu_model;

void pc_hot_add_cpu(const int64_t id, Error **errp)
{
    DeviceState *icc_bridge;
    int64_t apic_id = x86_cpu_apic_id_from_index(id);

986 987 988 989 990
    if (id < 0) {
        error_setg(errp, "Invalid CPU id: %" PRIi64, id);
        return;
    }

991 992 993 994 995 996 997 998 999 1000 1001 1002
    if (cpu_exists(apic_id)) {
        error_setg(errp, "Unable to add CPU: %" PRIi64
                   ", it already exists", id);
        return;
    }

    if (id >= max_cpus) {
        error_setg(errp, "Unable to add CPU: %" PRIi64
                   ", max allowed: %d", id, max_cpus - 1);
        return;
    }

1003 1004 1005 1006 1007 1008 1009
    if (apic_id >= ACPI_CPU_HOTPLUG_ID_LIMIT) {
        error_setg(errp, "Unable to add CPU: %" PRIi64
                   ", resulting APIC ID (%" PRIi64 ") is too large",
                   id, apic_id);
        return;
    }

1010 1011 1012 1013 1014
    icc_bridge = DEVICE(object_resolve_path_type("icc-bridge",
                                                 TYPE_ICC_BRIDGE, NULL));
    pc_new_cpu(current_cpu_model, apic_id, icc_bridge, errp);
}

1015
void pc_cpus_init(const char *cpu_model, DeviceState *icc_bridge)
1016 1017
{
    int i;
1018
    X86CPU *cpu = NULL;
1019
    Error *error = NULL;
1020
    unsigned long apic_id_limit;
1021 1022 1023 1024 1025 1026 1027 1028 1029

    /* init CPUs */
    if (cpu_model == NULL) {
#ifdef TARGET_X86_64
        cpu_model = "qemu64";
#else
        cpu_model = "qemu32";
#endif
    }
1030
    current_cpu_model = cpu_model;
1031

1032 1033 1034 1035 1036 1037 1038
    apic_id_limit = pc_apic_id_limit(max_cpus);
    if (apic_id_limit > ACPI_CPU_HOTPLUG_ID_LIMIT) {
        error_report("max_cpus is too large. APIC ID of last CPU is %lu",
                     apic_id_limit - 1);
        exit(1);
    }

1039
    for (i = 0; i < smp_cpus; i++) {
1040 1041
        cpu = pc_new_cpu(cpu_model, x86_cpu_apic_id_from_index(i),
                         icc_bridge, &error);
1042
        if (error) {
1043
            error_report("%s", error_get_pretty(error));
1044
            error_free(error);
1045 1046
            exit(1);
        }
1047
    }
1048 1049

    /* map APIC MMIO area if CPU has APIC */
1050
    if (cpu && cpu->apic_state) {
1051 1052 1053 1054
        /* XXX: what if the base changes? */
        sysbus_mmio_map_overlap(SYS_BUS_DEVICE(icc_bridge), 0,
                                APIC_DEFAULT_ADDRESS, 0x1000);
    }
1055 1056 1057

    /* tell smbios about cpuid version and features */
    smbios_set_cpuid(cpu->env.cpuid_version, cpu->env.features[FEAT_1_EDX]);
1058 1059
}

1060 1061 1062 1063 1064 1065 1066 1067
/* pci-info ROM file. Little endian format */
typedef struct PcRomPciInfo {
    uint64_t w32_min;
    uint64_t w32_max;
    uint64_t w64_min;
    uint64_t w64_max;
} PcRomPciInfo;

1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078
typedef struct PcGuestInfoState {
    PcGuestInfo info;
    Notifier machine_done;
} PcGuestInfoState;

static
void pc_guest_info_machine_done(Notifier *notifier, void *data)
{
    PcGuestInfoState *guest_info_state = container_of(notifier,
                                                      PcGuestInfoState,
                                                      machine_done);
1079
    acpi_setup(&guest_info_state->info);
1080 1081 1082 1083 1084 1085 1086
}

PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size,
                                ram_addr_t above_4g_mem_size)
{
    PcGuestInfoState *guest_info_state = g_malloc0(sizeof *guest_info_state);
    PcGuestInfo *guest_info = &guest_info_state->info;
M
Michael S. Tsirkin 已提交
1087 1088
    int i, j;

E
Eduardo Habkost 已提交
1089
    guest_info->ram_size_below_4g = below_4g_mem_size;
M
Michael S. Tsirkin 已提交
1090 1091 1092 1093
    guest_info->ram_size = below_4g_mem_size + above_4g_mem_size;
    guest_info->apic_id_limit = pc_apic_id_limit(max_cpus);
    guest_info->apic_xrupt_override = kvm_allows_irq0_override();
    guest_info->numa_nodes = nb_numa_nodes;
1094
    guest_info->node_mem = g_malloc0(guest_info->numa_nodes *
M
Michael S. Tsirkin 已提交
1095
                                    sizeof *guest_info->node_mem);
1096 1097 1098 1099
    for (i = 0; i < nb_numa_nodes; i++) {
        guest_info->node_mem[i] = numa_info[i].node_mem;
    }

M
Michael S. Tsirkin 已提交
1100 1101 1102 1103 1104 1105 1106
    guest_info->node_cpu = g_malloc0(guest_info->apic_id_limit *
                                     sizeof *guest_info->node_cpu);

    for (i = 0; i < max_cpus; i++) {
        unsigned int apic_id = x86_cpu_apic_id_from_index(i);
        assert(apic_id < guest_info->apic_id_limit);
        for (j = 0; j < nb_numa_nodes; j++) {
1107
            if (test_bit(i, numa_info[j].node_cpu)) {
M
Michael S. Tsirkin 已提交
1108 1109 1110 1111 1112
                guest_info->node_cpu[apic_id] = j;
                break;
            }
        }
    }
1113 1114 1115 1116 1117 1118

    guest_info_state->machine_done.notify = pc_guest_info_machine_done;
    qemu_add_machine_init_done_notifier(&guest_info_state->machine_done);
    return guest_info;
}

1119 1120 1121
/* setup pci memory address space mapping into system address space */
void pc_pci_as_mapping_init(Object *owner, MemoryRegion *system_memory,
                            MemoryRegion *pci_address_space)
1122
{
1123 1124 1125
    /* Set to lower priority than RAM */
    memory_region_add_subregion_overlap(system_memory, 0x0,
                                        pci_address_space, -1);
1126 1127
}

G
Gerd Hoffmann 已提交
1128 1129
void pc_acpi_init(const char *default_dsdt)
{
1130
    char *filename;
G
Gerd Hoffmann 已提交
1131 1132 1133 1134 1135 1136 1137 1138 1139

    if (acpi_tables != NULL) {
        /* manually set via -acpitable, leave it alone */
        return;
    }

    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, default_dsdt);
    if (filename == NULL) {
        fprintf(stderr, "WARNING: failed to find %s\n", default_dsdt);
1140 1141 1142 1143
    } else {
        char *arg;
        QemuOpts *opts;
        Error *err = NULL;
G
Gerd Hoffmann 已提交
1144

1145
        arg = g_strdup_printf("file=%s", filename);
1146

1147 1148 1149
        /* creates a deep copy of "arg" */
        opts = qemu_opts_parse(qemu_find_opts("acpi"), arg, 0);
        g_assert(opts != NULL);
1150

1151
        acpi_table_add_builtin(opts, &err);
1152
        if (err) {
1153 1154
            error_report("WARNING: failed to load %s: %s", filename,
                         error_get_pretty(err));
1155 1156 1157 1158
            error_free(err);
        }
        g_free(arg);
        g_free(filename);
G
Gerd Hoffmann 已提交
1159 1160 1161
    }
}

1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172
FWCfgState *xen_load_linux(const char *kernel_filename,
                           const char *kernel_cmdline,
                           const char *initrd_filename,
                           ram_addr_t below_4g_mem_size,
                           PcGuestInfo *guest_info)
{
    int i;
    FWCfgState *fw_cfg;

    assert(kernel_filename != NULL);

1173
    fw_cfg = fw_cfg_init_io(BIOS_CFG_IOPORT);
1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186
    rom_set_fw(fw_cfg);

    load_linux(fw_cfg, kernel_filename, initrd_filename,
               kernel_cmdline, below_4g_mem_size);
    for (i = 0; i < nb_option_roms; i++) {
        assert(!strcmp(option_rom[i].name, "linuxboot.bin") ||
               !strcmp(option_rom[i].name, "multiboot.bin"));
        rom_add_option(option_rom[i].name, option_rom[i].bootindex);
    }
    guest_info->fw_cfg = fw_cfg;
    return fw_cfg;
}

1187 1188
FWCfgState *pc_memory_init(MachineState *machine,
                           MemoryRegion *system_memory,
L
Laszlo Ersek 已提交
1189 1190 1191
                           ram_addr_t below_4g_mem_size,
                           ram_addr_t above_4g_mem_size,
                           MemoryRegion *rom_memory,
1192 1193
                           MemoryRegion **ram_memory,
                           PcGuestInfo *guest_info)
B
bellard 已提交
1194
{
1195 1196
    int linux_boot, i;
    MemoryRegion *ram, *option_rom_mr;
1197
    MemoryRegion *ram_below_4g, *ram_above_4g;
L
Laszlo Ersek 已提交
1198
    FWCfgState *fw_cfg;
1199
    PCMachineState *pcms = PC_MACHINE(machine);
1200

1201 1202 1203
    assert(machine->ram_size == below_4g_mem_size + above_4g_mem_size);

    linux_boot = (machine->kernel_filename != NULL);
B
bellard 已提交
1204

1205
    /* Allocate RAM.  We allocate it as a single memory region and use
D
Dong Xu Wang 已提交
1206
     * aliases to address portions of it, mostly for backwards compatibility
1207 1208
     * with older qemus that used qemu_ram_alloc().
     */
1209
    ram = g_malloc(sizeof(*ram));
1210 1211
    memory_region_allocate_system_memory(ram, NULL, "pc.ram",
                                         machine->ram_size);
A
Avi Kivity 已提交
1212
    *ram_memory = ram;
1213
    ram_below_4g = g_malloc(sizeof(*ram_below_4g));
1214
    memory_region_init_alias(ram_below_4g, NULL, "ram-below-4g", ram,
1215 1216
                             0, below_4g_mem_size);
    memory_region_add_subregion(system_memory, 0, ram_below_4g);
G
Gerd Hoffmann 已提交
1217
    e820_add_entry(0, below_4g_mem_size, E820_RAM);
1218
    if (above_4g_mem_size > 0) {
1219
        ram_above_4g = g_malloc(sizeof(*ram_above_4g));
1220
        memory_region_init_alias(ram_above_4g, NULL, "ram-above-4g", ram,
1221 1222 1223
                                 below_4g_mem_size, above_4g_mem_size);
        memory_region_add_subregion(system_memory, 0x100000000ULL,
                                    ram_above_4g);
G
Gerd Hoffmann 已提交
1224
        e820_add_entry(0x100000000ULL, above_4g_mem_size, E820_RAM);
1225
    }
1226

1227 1228
    if (!guest_info->has_reserved_memory &&
        (machine->ram_slots ||
1229
         (machine->maxram_size > machine->ram_size))) {
1230 1231 1232 1233 1234 1235 1236
        MachineClass *mc = MACHINE_GET_CLASS(machine);

        error_report("\"-memory 'slots|maxmem'\" is not supported by: %s",
                     mc->name);
        exit(EXIT_FAILURE);
    }

1237
    /* initialize hotplug memory address space */
1238
    if (guest_info->has_reserved_memory &&
1239
        (machine->ram_size < machine->maxram_size)) {
1240
        ram_addr_t hotplug_mem_size =
1241
            machine->maxram_size - machine->ram_size;
1242

1243 1244 1245 1246 1247 1248
        if (machine->ram_slots > ACPI_MAX_RAM_SLOTS) {
            error_report("unsupported amount of memory slots: %"PRIu64,
                         machine->ram_slots);
            exit(EXIT_FAILURE);
        }

1249 1250 1251
        pcms->hotplug_memory_base =
            ROUND_UP(0x100000000ULL + above_4g_mem_size, 1ULL << 30);

1252 1253 1254 1255 1256
        if (pcms->enforce_aligned_dimm) {
            /* size hotplug region assuming 1G page max alignment per slot */
            hotplug_mem_size += (1ULL << 30) * machine->ram_slots;
        }

1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268
        if ((pcms->hotplug_memory_base + hotplug_mem_size) <
            hotplug_mem_size) {
            error_report("unsupported amount of maximum memory: " RAM_ADDR_FMT,
                         machine->maxram_size);
            exit(EXIT_FAILURE);
        }

        memory_region_init(&pcms->hotplug_memory, OBJECT(pcms),
                           "hotplug-memory", hotplug_mem_size);
        memory_region_add_subregion(system_memory, pcms->hotplug_memory_base,
                                    &pcms->hotplug_memory);
    }
1269 1270

    /* Initialize PC system firmware */
1271
    pc_system_firmware_init(rom_memory, guest_info->isapc_ram_fw);
1272

1273
    option_rom_mr = g_malloc(sizeof(*option_rom_mr));
1274 1275
    memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
                           &error_abort);
1276
    vmstate_register_ram_global(option_rom_mr);
1277
    memory_region_add_subregion_overlap(rom_memory,
1278 1279 1280
                                        PC_ROM_MIN_VGA,
                                        option_rom_mr,
                                        1);
1281

A
Alexander Graf 已提交
1282
    fw_cfg = bochs_bios_init();
G
Gerd Hoffmann 已提交
1283
    rom_set_fw(fw_cfg);
A
Alexander Graf 已提交
1284

1285 1286 1287 1288 1289 1290
    if (guest_info->has_reserved_memory && pcms->hotplug_memory_base) {
        uint64_t *val = g_malloc(sizeof(*val));
        *val = cpu_to_le64(ROUND_UP(pcms->hotplug_memory_base, 0x1ULL << 30));
        fw_cfg_add_file(fw_cfg, "etc/reserved-memory-end", val, sizeof(*val));
    }

1291
    if (linux_boot) {
1292 1293
        load_linux(fw_cfg, machine->kernel_filename, machine->initrd_filename,
                   machine->kernel_cmdline, below_4g_mem_size);
1294 1295 1296
    }

    for (i = 0; i < nb_option_roms; i++) {
G
Gleb Natapov 已提交
1297
        rom_add_option(option_rom[i].name, option_rom[i].bootindex);
1298
    }
1299
    guest_info->fw_cfg = fw_cfg;
1300
    return fw_cfg;
1301 1302
}

1303 1304 1305 1306 1307
qemu_irq *pc_allocate_cpu_irq(void)
{
    return qemu_allocate_irqs(pic_irq_request, NULL, 1);
}

1308
DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus)
1309
{
1310 1311
    DeviceState *dev = NULL;

1312 1313 1314 1315 1316
    if (pci_bus) {
        PCIDevice *pcidev = pci_vga_init(pci_bus);
        dev = pcidev ? &pcidev->qdev : NULL;
    } else if (isa_bus) {
        ISADevice *isadev = isa_vga_init(isa_bus);
A
Andreas Färber 已提交
1317
        dev = isadev ? DEVICE(isadev) : NULL;
1318
    }
1319
    return dev;
1320 1321
}

B
Blue Swirl 已提交
1322 1323
static void cpu_request_exit(void *opaque, int irq, int level)
{
1324
    CPUState *cpu = current_cpu;
B
Blue Swirl 已提交
1325

1326 1327
    if (cpu && level) {
        cpu_exit(cpu);
B
Blue Swirl 已提交
1328 1329 1330
    }
}

J
Julien Grall 已提交
1331 1332
static const MemoryRegionOps ioport80_io_ops = {
    .write = ioport80_write,
1333
    .read = ioport80_read,
J
Julien Grall 已提交
1334 1335 1336 1337 1338 1339 1340 1341 1342
    .endianness = DEVICE_NATIVE_ENDIAN,
    .impl = {
        .min_access_size = 1,
        .max_access_size = 1,
    },
};

static const MemoryRegionOps ioportF0_io_ops = {
    .write = ioportF0_write,
1343
    .read = ioportF0_read,
J
Julien Grall 已提交
1344 1345 1346 1347 1348 1349 1350
    .endianness = DEVICE_NATIVE_ENDIAN,
    .impl = {
        .min_access_size = 1,
        .max_access_size = 1,
    },
};

1351
void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi,
1352
                          ISADevice **rtc_state,
K
Kevin Wolf 已提交
1353
                          ISADevice **floppy,
1354 1355
                          bool no_vmport,
                          uint32 hpet_irqs)
1356 1357 1358
{
    int i;
    DriveInfo *fd[MAX_FD];
1359 1360 1361
    DeviceState *hpet = NULL;
    int pit_isa_irq = 0;
    qemu_irq pit_alt_irq = NULL;
1362
    qemu_irq rtc_irq = NULL;
B
Blue Swirl 已提交
1363
    qemu_irq *a20_line;
1364
    ISADevice *i8042, *port92, *vmmouse, *pit = NULL;
B
Blue Swirl 已提交
1365
    qemu_irq *cpu_exit_irq;
J
Julien Grall 已提交
1366 1367
    MemoryRegion *ioport80_io = g_new(MemoryRegion, 1);
    MemoryRegion *ioportF0_io = g_new(MemoryRegion, 1);
1368

1369
    memory_region_init_io(ioport80_io, NULL, &ioport80_io_ops, NULL, "ioport80", 1);
J
Julien Grall 已提交
1370
    memory_region_add_subregion(isa_bus->address_space_io, 0x80, ioport80_io);
1371

1372
    memory_region_init_io(ioportF0_io, NULL, &ioportF0_io_ops, NULL, "ioportF0", 1);
J
Julien Grall 已提交
1373
    memory_region_add_subregion(isa_bus->address_space_io, 0xf0, ioportF0_io);
1374

1375 1376 1377 1378 1379 1380 1381
    /*
     * Check if an HPET shall be created.
     *
     * Without KVM_CAP_PIT_STATE2, we cannot switch off the in-kernel PIT
     * when the HPET wants to take over. Thus we have to disable the latter.
     */
    if (!no_hpet && (!kvm_irqchip_in_kernel() || kvm_has_pit_state2())) {
1382
        /* In order to set property, here not using sysbus_try_create_simple */
M
Michael S. Tsirkin 已提交
1383
        hpet = qdev_try_create(NULL, TYPE_HPET);
B
Blue Swirl 已提交
1384
        if (hpet) {
1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396
            /* For pc-piix-*, hpet's intcap is always IRQ2. For pc-q35-1.7
             * and earlier, use IRQ2 for compat. Otherwise, use IRQ16~23,
             * IRQ8 and IRQ2.
             */
            uint8_t compat = object_property_get_int(OBJECT(hpet),
                    HPET_INTCAP, NULL);
            if (!compat) {
                qdev_prop_set_uint32(hpet, HPET_INTCAP, hpet_irqs);
            }
            qdev_init_nofail(hpet);
            sysbus_mmio_map(SYS_BUS_DEVICE(hpet), 0, HPET_BASE);

J
Jan Kiszka 已提交
1397
            for (i = 0; i < GSI_NUM_PINS; i++) {
1398
                sysbus_connect_irq(SYS_BUS_DEVICE(hpet), i, gsi[i]);
B
Blue Swirl 已提交
1399
            }
1400 1401 1402
            pit_isa_irq = -1;
            pit_alt_irq = qdev_get_gpio_in(hpet, HPET_LEGACY_PIT_INT);
            rtc_irq = qdev_get_gpio_in(hpet, HPET_LEGACY_RTC_INT);
J
Jan Kiszka 已提交
1403
        }
1404
    }
1405
    *rtc_state = rtc_init(isa_bus, 2000, rtc_irq);
1406 1407 1408

    qemu_register_boot_set(pc_boot_set, *rtc_state);

1409 1410 1411 1412 1413 1414 1415 1416
    if (!xen_enabled()) {
        if (kvm_irqchip_in_kernel()) {
            pit = kvm_pit_init(isa_bus, 0x40);
        } else {
            pit = pit_init(isa_bus, 0x40, pit_isa_irq, pit_alt_irq);
        }
        if (hpet) {
            /* connect PIT to output control line of the HPET */
A
Andreas Färber 已提交
1417
            qdev_connect_gpio_out(hpet, 0, qdev_get_gpio_in(DEVICE(pit), 0));
1418 1419
        }
        pcspk_init(isa_bus, pit);
1420
    }
1421 1422 1423

    for(i = 0; i < MAX_SERIAL_PORTS; i++) {
        if (serial_hds[i]) {
1424
            serial_isa_init(isa_bus, i, serial_hds[i]);
1425 1426 1427 1428 1429
        }
    }

    for(i = 0; i < MAX_PARALLEL_PORTS; i++) {
        if (parallel_hds[i]) {
1430
            parallel_init(isa_bus, i, parallel_hds[i]);
1431 1432 1433
        }
    }

1434
    a20_line = qemu_allocate_irqs(handle_a20_line_change, first_cpu, 2);
1435
    i8042 = isa_create_simple(isa_bus, "i8042");
1436
    i8042_setup_a20_line(i8042, &a20_line[0]);
1437
    if (!no_vmport) {
1438 1439
        vmport_init(isa_bus);
        vmmouse = isa_try_create(isa_bus, "vmmouse");
1440 1441 1442
    } else {
        vmmouse = NULL;
    }
B
Blue Swirl 已提交
1443
    if (vmmouse) {
A
Andreas Färber 已提交
1444 1445 1446
        DeviceState *dev = DEVICE(vmmouse);
        qdev_prop_set_ptr(dev, "ps2_mouse", i8042);
        qdev_init_nofail(dev);
B
Blue Swirl 已提交
1447
    }
1448
    port92 = isa_create_simple(isa_bus, "port92");
1449
    port92_init(port92, &a20_line[1]);
B
Blue Swirl 已提交
1450

B
Blue Swirl 已提交
1451 1452
    cpu_exit_irq = qemu_allocate_irqs(cpu_request_exit, NULL, 1);
    DMA_init(0, cpu_exit_irq);
1453 1454 1455 1456

    for(i = 0; i < MAX_FD; i++) {
        fd[i] = drive_get(IF_FLOPPY, 0, i);
    }
1457
    *floppy = fdctrl_init_isa(isa_bus, fd);
1458 1459
}

1460 1461 1462 1463 1464 1465 1466 1467 1468 1469
void pc_nic_init(ISABus *isa_bus, PCIBus *pci_bus)
{
    int i;

    for (i = 0; i < nb_nics; i++) {
        NICInfo *nd = &nd_table[i];

        if (!pci_bus || (nd->model && strcmp(nd->model, "ne2k_isa") == 0)) {
            pc_init_ne2k_isa(isa_bus, nd);
        } else {
1470
            pci_nic_init_nofail(nd, pci_bus, "e1000", NULL);
1471 1472 1473 1474
        }
    }
}

1475
void pc_pci_device_init(PCIBus *pci_bus)
1476 1477 1478 1479 1480 1481 1482 1483 1484
{
    int max_bus;
    int bus;

    max_bus = drive_get_max_bus(IF_SCSI);
    for (bus = 0; bus <= max_bus; bus++) {
        pci_create_simple(pci_bus, -1, "lsi53c895a");
    }
}
1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501

void ioapic_init_gsi(GSIState *gsi_state, const char *parent_name)
{
    DeviceState *dev;
    SysBusDevice *d;
    unsigned int i;

    if (kvm_irqchip_in_kernel()) {
        dev = qdev_create(NULL, "kvm-ioapic");
    } else {
        dev = qdev_create(NULL, "ioapic");
    }
    if (parent_name) {
        object_property_add_child(object_resolve_path(parent_name, NULL),
                                  "ioapic", OBJECT(dev), NULL);
    }
    qdev_init_nofail(dev);
1502
    d = SYS_BUS_DEVICE(dev);
1503
    sysbus_mmio_map(d, 0, IO_APIC_DEFAULT_ADDRESS);
1504 1505 1506 1507 1508

    for (i = 0; i < IOAPIC_NUM_PINS; i++) {
        gsi_state->ioapic_irq[i] = qdev_get_gpio_in(dev, i);
    }
}
1509 1510 1511 1512 1513 1514

static void pc_generic_machine_class_init(ObjectClass *oc, void *data)
{
    MachineClass *mc = MACHINE_CLASS(oc);
    QEMUMachine *qm = data;

1515
    mc->family = qm->family;
1516 1517 1518 1519 1520 1521 1522 1523
    mc->name = qm->name;
    mc->alias = qm->alias;
    mc->desc = qm->desc;
    mc->init = qm->init;
    mc->reset = qm->reset;
    mc->hot_add_cpu = qm->hot_add_cpu;
    mc->kvm_type = qm->kvm_type;
    mc->block_default_type = qm->block_default_type;
1524
    mc->units_per_default_bus = qm->units_per_default_bus;
1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535
    mc->max_cpus = qm->max_cpus;
    mc->no_serial = qm->no_serial;
    mc->no_parallel = qm->no_parallel;
    mc->use_virtcon = qm->use_virtcon;
    mc->use_sclp = qm->use_sclp;
    mc->no_floppy = qm->no_floppy;
    mc->no_cdrom = qm->no_cdrom;
    mc->no_sdcard = qm->no_sdcard;
    mc->is_default = qm->is_default;
    mc->default_machine_opts = qm->default_machine_opts;
    mc->default_boot_order = qm->default_boot_order;
1536
    mc->default_display = qm->default_display;
1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554
    mc->compat_props = qm->compat_props;
    mc->hw_version = qm->hw_version;
}

void qemu_register_pc_machine(QEMUMachine *m)
{
    char *name = g_strconcat(m->name, TYPE_MACHINE_SUFFIX, NULL);
    TypeInfo ti = {
        .name       = name,
        .parent     = TYPE_PC_MACHINE,
        .class_init = pc_generic_machine_class_init,
        .class_data = (void *)m,
    };

    type_register(&ti);
    g_free(name);
}

1555 1556 1557
static void pc_dimm_plug(HotplugHandler *hotplug_dev,
                         DeviceState *dev, Error **errp)
{
1558
    int slot;
1559
    HotplugHandlerClass *hhc;
1560 1561
    Error *local_err = NULL;
    PCMachineState *pcms = PC_MACHINE(hotplug_dev);
1562
    MachineState *machine = MACHINE(hotplug_dev);
1563 1564 1565
    PCDIMMDevice *dimm = PC_DIMM(dev);
    PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
    MemoryRegion *mr = ddc->get_memory_region(dimm);
1566
    uint64_t existing_dimms_capacity = 0;
1567
    uint64_t align = TARGET_PAGE_SIZE;
1568 1569 1570
    uint64_t addr;

    addr = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP, &local_err);
1571 1572 1573 1574
    if (local_err) {
        goto out;
    }

1575 1576 1577 1578
    if (memory_region_get_alignment(mr) && pcms->enforce_aligned_dimm) {
        align = memory_region_get_alignment(mr);
    }

1579 1580
    addr = pc_dimm_get_free_addr(pcms->hotplug_memory_base,
                                 memory_region_size(&pcms->hotplug_memory),
1581
                                 !addr ? NULL : &addr, align,
1582 1583 1584 1585
                                 memory_region_size(mr), &local_err);
    if (local_err) {
        goto out;
    }
1586

1587 1588
    existing_dimms_capacity = pc_existing_dimms_capacity(&local_err);
    if (local_err) {
1589 1590 1591 1592 1593 1594
        goto out;
    }

    if (existing_dimms_capacity + memory_region_size(mr) >
        machine->maxram_size - machine->ram_size) {
        error_setg(&local_err, "not enough space, currently 0x%" PRIx64
1595 1596 1597
                   " in use of total hot pluggable 0x" RAM_ADDR_FMT,
                   existing_dimms_capacity,
                   machine->maxram_size - machine->ram_size);
1598 1599 1600
        goto out;
    }

1601
    object_property_set_int(OBJECT(dev), addr, PC_DIMM_ADDR_PROP, &local_err);
1602 1603 1604
    if (local_err) {
        goto out;
    }
1605
    trace_mhp_pc_dimm_assigned_address(addr);
1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620

    slot = object_property_get_int(OBJECT(dev), PC_DIMM_SLOT_PROP, &local_err);
    if (local_err) {
        goto out;
    }

    slot = pc_dimm_get_free_slot(slot == PC_DIMM_UNASSIGNED_SLOT ? NULL : &slot,
                                 machine->ram_slots, &local_err);
    if (local_err) {
        goto out;
    }
    object_property_set_int(OBJECT(dev), slot, PC_DIMM_SLOT_PROP, &local_err);
    if (local_err) {
        goto out;
    }
1621
    trace_mhp_pc_dimm_assigned_slot(slot);
1622

1623 1624 1625 1626 1627 1628
    if (!pcms->acpi_dev) {
        error_setg(&local_err,
                   "memory hotplug is not enabled: missing acpi device");
        goto out;
    }

1629 1630 1631 1632 1633
    if (kvm_enabled() && !kvm_has_free_slot(machine)) {
        error_setg(&local_err, "hypervisor has no free memory slots left");
        goto out;
    }

1634 1635 1636
    memory_region_add_subregion(&pcms->hotplug_memory,
                                addr - pcms->hotplug_memory_base, mr);
    vmstate_register_ram(mr, dev);
1637 1638 1639

    hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev);
    hhc->plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err);
1640 1641 1642 1643
out:
    error_propagate(errp, local_err);
}

1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662
static void pc_cpu_plug(HotplugHandler *hotplug_dev,
                        DeviceState *dev, Error **errp)
{
    HotplugHandlerClass *hhc;
    Error *local_err = NULL;
    PCMachineState *pcms = PC_MACHINE(hotplug_dev);

    if (!dev->hotplugged) {
        goto out;
    }

    if (!pcms->acpi_dev) {
        error_setg(&local_err,
                   "cpu hotplug is not enabled: missing acpi device");
        goto out;
    }

    hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev);
    hhc->plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err);
G
Gu Zheng 已提交
1663 1664 1665 1666 1667 1668
    if (local_err) {
        goto out;
    }

    /* increment the number of CPUs */
    rtc_set_memory(pcms->rtc, 0x5f, rtc_get_memory(pcms->rtc, 0x5f) + 1);
1669 1670 1671 1672
out:
    error_propagate(errp, local_err);
}

1673 1674 1675 1676 1677
static void pc_machine_device_plug_cb(HotplugHandler *hotplug_dev,
                                      DeviceState *dev, Error **errp)
{
    if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
        pc_dimm_plug(hotplug_dev, dev, errp);
1678 1679
    } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
        pc_cpu_plug(hotplug_dev, dev, errp);
1680 1681 1682 1683 1684 1685 1686 1687
    }
}

static HotplugHandler *pc_get_hotpug_handler(MachineState *machine,
                                             DeviceState *dev)
{
    PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(machine);

1688 1689
    if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) ||
        object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
1690 1691 1692 1693 1694 1695 1696
        return HOTPLUG_HANDLER(machine);
    }

    return pcmc->get_hotplug_handler ?
        pcmc->get_hotplug_handler(machine, dev) : NULL;
}

1697 1698 1699 1700 1701 1702 1703 1704 1705 1706
static void
pc_machine_get_hotplug_memory_region_size(Object *obj, Visitor *v, void *opaque,
                                          const char *name, Error **errp)
{
    PCMachineState *pcms = PC_MACHINE(obj);
    int64_t value = memory_region_size(&pcms->hotplug_memory);

    visit_type_int(v, &value, name, errp);
}

1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746
static void pc_machine_get_max_ram_below_4g(Object *obj, Visitor *v,
                                         void *opaque, const char *name,
                                         Error **errp)
{
    PCMachineState *pcms = PC_MACHINE(obj);
    uint64_t value = pcms->max_ram_below_4g;

    visit_type_size(v, &value, name, errp);
}

static void pc_machine_set_max_ram_below_4g(Object *obj, Visitor *v,
                                         void *opaque, const char *name,
                                         Error **errp)
{
    PCMachineState *pcms = PC_MACHINE(obj);
    Error *error = NULL;
    uint64_t value;

    visit_type_size(v, &value, name, &error);
    if (error) {
        error_propagate(errp, error);
        return;
    }
    if (value > (1ULL << 32)) {
        error_set(&error, ERROR_CLASS_GENERIC_ERROR,
                  "Machine option 'max-ram-below-4g=%"PRIu64
                  "' expects size less than or equal to 4G", value);
        error_propagate(errp, error);
        return;
    }

    if (value < (1ULL << 20)) {
        error_report("Warning: small max_ram_below_4g(%"PRIu64
                     ") less than 1M.  BIOS may not work..",
                     value);
    }

    pcms->max_ram_below_4g = value;
}

1747 1748
static void pc_machine_get_vmport(Object *obj, Visitor *v, void *opaque,
                                  const char *name, Error **errp)
1749 1750
{
    PCMachineState *pcms = PC_MACHINE(obj);
1751
    OnOffAuto vmport = pcms->vmport;
1752

1753
    visit_type_OnOffAuto(v, &vmport, name, errp);
1754 1755
}

1756 1757
static void pc_machine_set_vmport(Object *obj, Visitor *v, void *opaque,
                                  const char *name, Error **errp)
1758 1759 1760
{
    PCMachineState *pcms = PC_MACHINE(obj);

1761
    visit_type_OnOffAuto(v, &pcms->vmport, name, errp);
1762 1763
}

1764 1765 1766 1767 1768 1769 1770
static bool pc_machine_get_aligned_dimm(Object *obj, Error **errp)
{
    PCMachineState *pcms = PC_MACHINE(obj);

    return pcms->enforce_aligned_dimm;
}

1771 1772
static void pc_machine_initfn(Object *obj)
{
1773 1774
    PCMachineState *pcms = PC_MACHINE(obj);

1775 1776 1777
    object_property_add(obj, PC_MACHINE_MEMHP_REGION_SIZE, "int",
                        pc_machine_get_hotplug_memory_region_size,
                        NULL, NULL, NULL, NULL);
1778

1779 1780 1781 1782 1783
    pcms->max_ram_below_4g = 1ULL << 32; /* 4G */
    object_property_add(obj, PC_MACHINE_MAX_RAM_BELOW_4G, "size",
                        pc_machine_get_max_ram_below_4g,
                        pc_machine_set_max_ram_below_4g,
                        NULL, NULL, NULL);
1784 1785 1786
    object_property_set_description(obj, PC_MACHINE_MAX_RAM_BELOW_4G,
                                    "Maximum ram below the 4G boundary (32bit boundary)",
                                    NULL);
1787

1788 1789 1790 1791 1792
    pcms->vmport = ON_OFF_AUTO_AUTO;
    object_property_add(obj, PC_MACHINE_VMPORT, "OnOffAuto",
                        pc_machine_get_vmport,
                        pc_machine_set_vmport,
                        NULL, NULL, NULL);
1793 1794 1795
    object_property_set_description(obj, PC_MACHINE_VMPORT,
                                    "Enable vmport (pc & q35)",
                                    NULL);
1796 1797 1798 1799 1800

    pcms->enforce_aligned_dimm = true;
    object_property_add_bool(obj, PC_MACHINE_ENFORCE_ALIGNED_DIMM,
                             pc_machine_get_aligned_dimm,
                             NULL, NULL);
1801 1802
}

1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813
static void pc_machine_class_init(ObjectClass *oc, void *data)
{
    MachineClass *mc = MACHINE_CLASS(oc);
    PCMachineClass *pcmc = PC_MACHINE_CLASS(oc);
    HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);

    pcmc->get_hotplug_handler = mc->get_hotplug_handler;
    mc->get_hotplug_handler = pc_get_hotpug_handler;
    hc->plug = pc_machine_device_plug_cb;
}

1814 1815 1816 1817 1818
static const TypeInfo pc_machine_info = {
    .name = TYPE_PC_MACHINE,
    .parent = TYPE_MACHINE,
    .abstract = true,
    .instance_size = sizeof(PCMachineState),
1819
    .instance_init = pc_machine_initfn,
1820
    .class_size = sizeof(PCMachineClass),
1821 1822 1823 1824 1825
    .class_init = pc_machine_class_init,
    .interfaces = (InterfaceInfo[]) {
         { TYPE_HOTPLUG_HANDLER },
         { }
    },
1826 1827 1828 1829 1830 1831 1832 1833
};

static void pc_machine_register_types(void)
{
    type_register_static(&pc_machine_info);
}

type_init(pc_machine_register_types)