pc.c 59.5 KB
Newer Older
B
bellard 已提交
1 2
/*
 * QEMU PC System Emulator
3
 *
B
bellard 已提交
4
 * Copyright (c) 2003-2004 Fabrice Bellard
5
 *
B
bellard 已提交
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
P
Peter Maydell 已提交
24
#include "qemu/osdep.h"
25
#include "hw/hw.h"
P
Paolo Bonzini 已提交
26 27 28
#include "hw/i386/pc.h"
#include "hw/char/serial.h"
#include "hw/i386/apic.h"
29 30
#include "hw/i386/topology.h"
#include "sysemu/cpus.h"
P
Paolo Bonzini 已提交
31
#include "hw/block/fdc.h"
32 33
#include "hw/ide.h"
#include "hw/pci/pci.h"
34
#include "hw/pci/pci_bus.h"
P
Paolo Bonzini 已提交
35 36
#include "hw/nvram/fw_cfg.h"
#include "hw/timer/hpet.h"
37
#include "hw/smbios/smbios.h"
38
#include "hw/loader.h"
B
Blue Swirl 已提交
39
#include "elf.h"
40
#include "multiboot.h"
P
Paolo Bonzini 已提交
41 42 43
#include "hw/timer/mc146818rtc.h"
#include "hw/timer/i8254.h"
#include "hw/audio/pcspk.h"
44 45
#include "hw/pci/msi.h"
#include "hw/sysbus.h"
46
#include "sysemu/sysemu.h"
47
#include "sysemu/numa.h"
48
#include "sysemu/kvm.h"
49
#include "sysemu/qtest.h"
50
#include "kvm_i386.h"
P
Paolo Bonzini 已提交
51
#include "hw/xen/xen.h"
52
#include "sysemu/block-backend.h"
P
Paolo Bonzini 已提交
53
#include "hw/block/block.h"
G
Gerd Hoffmann 已提交
54
#include "ui/qemu-spice.h"
55 56
#include "exec/memory.h"
#include "exec/address-spaces.h"
57
#include "sysemu/arch_init.h"
58
#include "qemu/bitmap.h"
59
#include "qemu/config-file.h"
60
#include "qemu/error-report.h"
61
#include "hw/acpi/acpi.h"
62
#include "hw/acpi/cpu_hotplug.h"
63
#include "hw/boards.h"
64
#include "hw/pci/pci_host.h"
65
#include "acpi-build.h"
66
#include "hw/mem/pc-dimm.h"
67
#include "qapi/visitor.h"
68
#include "qapi-visit.h"
69
#include "qom/cpu.h"
B
bellard 已提交
70

B
Blue Swirl 已提交
71 72 73 74 75 76 77 78 79 80
/* debug PC/ISA interrupts */
//#define DEBUG_IRQ

#ifdef DEBUG_IRQ
#define DPRINTF(fmt, ...)                                       \
    do { printf("CPUIRQ: " fmt , ## __VA_ARGS__); } while (0)
#else
#define DPRINTF(fmt, ...)
#endif

81
#define FW_CFG_ACPI_TABLES (FW_CFG_ARCH_LOCAL + 0)
82
#define FW_CFG_SMBIOS_ENTRIES (FW_CFG_ARCH_LOCAL + 1)
J
Jes Sorensen 已提交
83
#define FW_CFG_IRQ0_OVERRIDE (FW_CFG_ARCH_LOCAL + 2)
J
Jes Sorensen 已提交
84
#define FW_CFG_E820_TABLE (FW_CFG_ARCH_LOCAL + 3)
85
#define FW_CFG_HPET (FW_CFG_ARCH_LOCAL + 4)
B
bellard 已提交
86

J
Jes Sorensen 已提交
87 88 89 90 91 92
#define E820_NR_ENTRIES		16

struct e820_entry {
    uint64_t address;
    uint64_t length;
    uint32_t type;
93
} QEMU_PACKED __attribute((__aligned__(4)));
J
Jes Sorensen 已提交
94 95 96 97

struct e820_table {
    uint32_t count;
    struct e820_entry entry[E820_NR_ENTRIES];
98
} QEMU_PACKED __attribute((__aligned__(4)));
J
Jes Sorensen 已提交
99

G
Gerd Hoffmann 已提交
100 101 102
static struct e820_table e820_reserve;
static struct e820_entry *e820_table;
static unsigned e820_entries;
B
Blue Swirl 已提交
103
struct hpet_fw_config hpet_cfg = {.count = UINT8_MAX};
J
Jes Sorensen 已提交
104

J
Jan Kiszka 已提交
105
void gsi_handler(void *opaque, int n, int level)
106
{
J
Jan Kiszka 已提交
107
    GSIState *s = opaque;
108

J
Jan Kiszka 已提交
109 110 111
    DPRINTF("pc: %s GSI %d\n", level ? "raising" : "lowering", n);
    if (n < ISA_NUM_IRQS) {
        qemu_set_irq(s->i8259_irq[n], level);
A
Avi Kivity 已提交
112
    }
J
Jan Kiszka 已提交
113
    qemu_set_irq(s->ioapic_irq[n], level);
114
}
115

J
Julien Grall 已提交
116 117
static void ioport80_write(void *opaque, hwaddr addr, uint64_t data,
                           unsigned size)
B
bellard 已提交
118 119 120
{
}

121 122
static uint64_t ioport80_read(void *opaque, hwaddr addr, unsigned size)
{
123
    return 0xffffffffffffffffULL;
124 125
}

126
/* MSDOS compatibility mode FPU exception support */
P
pbrook 已提交
127
static qemu_irq ferr_irq;
128 129 130 131 132 133

void pc_register_ferr_irq(qemu_irq irq)
{
    ferr_irq = irq;
}

134 135 136
/* XXX: add IGNNE support */
void cpu_set_ferr(CPUX86State *s)
{
P
pbrook 已提交
137
    qemu_irq_raise(ferr_irq);
138 139
}

J
Julien Grall 已提交
140 141
static void ioportF0_write(void *opaque, hwaddr addr, uint64_t data,
                           unsigned size)
142
{
P
pbrook 已提交
143
    qemu_irq_lower(ferr_irq);
144 145
}

146 147
static uint64_t ioportF0_read(void *opaque, hwaddr addr, unsigned size)
{
148
    return 0xffffffffffffffffULL;
149 150
}

B
bellard 已提交
151 152 153
/* TSC handling */
uint64_t cpu_get_tsc(CPUX86State *env)
{
154
    return cpu_get_ticks();
B
bellard 已提交
155 156
}

B
bellard 已提交
157
/* IRQ handling */
A
Andreas Färber 已提交
158
int cpu_get_pic_interrupt(CPUX86State *env)
B
bellard 已提交
159
{
160
    X86CPU *cpu = x86_env_get_cpu(env);
B
bellard 已提交
161 162
    int intno;

163
    intno = apic_get_interrupt(cpu->apic_state);
B
bellard 已提交
164 165 166 167
    if (intno >= 0) {
        return intno;
    }
    /* read the irq from the PIC */
168
    if (!apic_accept_pic_intr(cpu->apic_state)) {
169
        return -1;
170
    }
171

B
bellard 已提交
172 173 174 175
    intno = pic_read_irq(isa_pic);
    return intno;
}

P
pbrook 已提交
176
static void pic_irq_request(void *opaque, int irq, int level)
B
bellard 已提交
177
{
178 179
    CPUState *cs = first_cpu;
    X86CPU *cpu = X86_CPU(cs);
180

B
Blue Swirl 已提交
181
    DPRINTF("pic_irqs: %s irq %d\n", level? "raise" : "lower", irq);
182
    if (cpu->apic_state) {
A
Andreas Färber 已提交
183
        CPU_FOREACH(cs) {
184
            cpu = X86_CPU(cs);
185 186
            if (apic_accept_pic_intr(cpu->apic_state)) {
                apic_deliver_pic_intr(cpu->apic_state, level);
187
            }
A
aurel32 已提交
188 189
        }
    } else {
190
        if (level) {
191
            cpu_interrupt(cs, CPU_INTERRUPT_HARD);
192 193 194
        } else {
            cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD);
        }
195
    }
B
bellard 已提交
196 197
}

B
bellard 已提交
198 199
/* PC cmos mappings */

B
bellard 已提交
200 201
#define REG_EQUIPMENT_BYTE          0x14

202
int cmos_get_fd_drive_type(FloppyDriveType fd0)
203 204 205 206
{
    int val;

    switch (fd0) {
J
John Snow 已提交
207
    case FLOPPY_DRIVE_TYPE_144:
208 209 210
        /* 1.44 Mb 3"5 drive */
        val = 4;
        break;
J
John Snow 已提交
211
    case FLOPPY_DRIVE_TYPE_288:
212 213 214
        /* 2.88 Mb 3"5 drive */
        val = 5;
        break;
J
John Snow 已提交
215
    case FLOPPY_DRIVE_TYPE_120:
216 217 218
        /* 1.2 Mb 5"5 drive */
        val = 2;
        break;
J
John Snow 已提交
219
    case FLOPPY_DRIVE_TYPE_NONE:
220 221 222 223 224 225 226
    default:
        val = 0;
        break;
    }
    return val;
}

227 228
static void cmos_init_hd(ISADevice *s, int type_ofs, int info_ofs,
                         int16_t cylinders, int8_t heads, int8_t sectors)
B
bellard 已提交
229 230 231 232 233 234 235 236 237 238 239 240 241
{
    rtc_set_memory(s, type_ofs, 47);
    rtc_set_memory(s, info_ofs, cylinders);
    rtc_set_memory(s, info_ofs + 1, cylinders >> 8);
    rtc_set_memory(s, info_ofs + 2, heads);
    rtc_set_memory(s, info_ofs + 3, 0xff);
    rtc_set_memory(s, info_ofs + 4, 0xff);
    rtc_set_memory(s, info_ofs + 5, 0xc0 | ((heads > 8) << 3));
    rtc_set_memory(s, info_ofs + 6, cylinders);
    rtc_set_memory(s, info_ofs + 7, cylinders >> 8);
    rtc_set_memory(s, info_ofs + 8, sectors);
}

242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258
/* convert boot_device letter to something recognizable by the bios */
static int boot_device2nibble(char boot_device)
{
    switch(boot_device) {
    case 'a':
    case 'b':
        return 0x01; /* floppy boot */
    case 'c':
        return 0x02; /* hard drive boot */
    case 'd':
        return 0x03; /* CD-ROM boot */
    case 'n':
        return 0x04; /* Network boot */
    }
    return 0;
}

259
static void set_boot_dev(ISADevice *s, const char *boot_device, Error **errp)
260 261 262 263 264 265 266
{
#define PC_MAX_BOOT_DEVICES 3
    int nbds, bds[3] = { 0, };
    int i;

    nbds = strlen(boot_device);
    if (nbds > PC_MAX_BOOT_DEVICES) {
267 268
        error_setg(errp, "Too many boot devices for PC");
        return;
269 270 271 272
    }
    for (i = 0; i < nbds; i++) {
        bds[i] = boot_device2nibble(boot_device[i]);
        if (bds[i] == 0) {
273 274 275
            error_setg(errp, "Invalid boot device for PC: '%c'",
                       boot_device[i]);
            return;
276 277 278
        }
    }
    rtc_set_memory(s, 0x3d, (bds[1] << 4) | bds[0]);
279
    rtc_set_memory(s, 0x38, (bds[2] << 4) | (fd_bootchk ? 0x0 : 0x1));
280 281
}

282
static void pc_boot_set(void *opaque, const char *boot_device, Error **errp)
283
{
284
    set_boot_dev(opaque, boot_device, errp);
285 286
}

287 288 289
static void pc_cmos_init_floppy(ISADevice *rtc_state, ISADevice *floppy)
{
    int val, nb, i;
J
John Snow 已提交
290 291
    FloppyDriveType fd_type[2] = { FLOPPY_DRIVE_TYPE_NONE,
                                   FLOPPY_DRIVE_TYPE_NONE };
292 293 294 295 296 297 298 299 300 301 302 303 304

    /* floppy type */
    if (floppy) {
        for (i = 0; i < 2; i++) {
            fd_type[i] = isa_fdc_get_drive_type(floppy, i);
        }
    }
    val = (cmos_get_fd_drive_type(fd_type[0]) << 4) |
        cmos_get_fd_drive_type(fd_type[1]);
    rtc_set_memory(rtc_state, 0x10, val);

    val = rtc_get_memory(rtc_state, REG_EQUIPMENT_BYTE);
    nb = 0;
J
John Snow 已提交
305
    if (fd_type[0] != FLOPPY_DRIVE_TYPE_NONE) {
306 307
        nb++;
    }
J
John Snow 已提交
308
    if (fd_type[1] != FLOPPY_DRIVE_TYPE_NONE) {
309 310 311 312 313 314 315 316 317 318 319 320 321 322 323
        nb++;
    }
    switch (nb) {
    case 0:
        break;
    case 1:
        val |= 0x01; /* 1 drive, ready for boot */
        break;
    case 2:
        val |= 0x41; /* 2 drives, ready for boot */
        break;
    }
    rtc_set_memory(rtc_state, REG_EQUIPMENT_BYTE, val);
}

324 325
typedef struct pc_cmos_init_late_arg {
    ISADevice *rtc_state;
326
    BusState *idebus[2];
327 328
} pc_cmos_init_late_arg;

329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363
typedef struct check_fdc_state {
    ISADevice *floppy;
    bool multiple;
} CheckFdcState;

static int check_fdc(Object *obj, void *opaque)
{
    CheckFdcState *state = opaque;
    Object *fdc;
    uint32_t iobase;
    Error *local_err = NULL;

    fdc = object_dynamic_cast(obj, TYPE_ISA_FDC);
    if (!fdc) {
        return 0;
    }

    iobase = object_property_get_int(obj, "iobase", &local_err);
    if (local_err || iobase != 0x3f0) {
        error_free(local_err);
        return 0;
    }

    if (state->floppy) {
        state->multiple = true;
    } else {
        state->floppy = ISA_DEVICE(obj);
    }
    return 0;
}

static const char * const fdc_container_path[] = {
    "/unattached", "/peripheral", "/peripheral-anon"
};

364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380
/*
 * Locate the FDC at IO address 0x3f0, in order to configure the CMOS registers
 * and ACPI objects.
 */
ISADevice *pc_find_fdc0(void)
{
    int i;
    Object *container;
    CheckFdcState state = { 0 };

    for (i = 0; i < ARRAY_SIZE(fdc_container_path); i++) {
        container = container_get(qdev_get_machine(), fdc_container_path[i]);
        object_child_foreach(container, check_fdc, &state);
    }

    if (state.multiple) {
        error_report("warning: multiple floppy disk controllers with "
381 382
                     "iobase=0x3f0 have been found");
        error_printf("the one being picked for CMOS setup might not reflect "
383 384 385 386 387 388
                     "your intent");
    }

    return state.floppy;
}

389 390 391 392
static void pc_cmos_init_late(void *opaque)
{
    pc_cmos_init_late_arg *arg = opaque;
    ISADevice *s = arg->rtc_state;
393 394
    int16_t cylinders;
    int8_t heads, sectors;
395
    int val;
396
    int i, trans;
397

398 399 400 401 402 403 404 405 406 407 408 409
    val = 0;
    if (ide_get_geometry(arg->idebus[0], 0,
                         &cylinders, &heads, &sectors) >= 0) {
        cmos_init_hd(s, 0x19, 0x1b, cylinders, heads, sectors);
        val |= 0xf0;
    }
    if (ide_get_geometry(arg->idebus[0], 1,
                         &cylinders, &heads, &sectors) >= 0) {
        cmos_init_hd(s, 0x1a, 0x24, cylinders, heads, sectors);
        val |= 0x0f;
    }
    rtc_set_memory(s, 0x12, val);
410 411 412

    val = 0;
    for (i = 0; i < 4; i++) {
413 414 415 416 417 418
        /* NOTE: ide_get_geometry() returns the physical
           geometry.  It is always such that: 1 <= sects <= 63, 1
           <= heads <= 16, 1 <= cylinders <= 16383. The BIOS
           geometry can be different if a translation is done. */
        if (ide_get_geometry(arg->idebus[i / 2], i % 2,
                             &cylinders, &heads, &sectors) >= 0) {
419 420 421
            trans = ide_get_bios_chs_trans(arg->idebus[i / 2], i % 2) - 1;
            assert((trans & ~3) == 0);
            val |= trans << (i * 2);
422 423 424 425
        }
    }
    rtc_set_memory(s, 0x39, val);

426
    pc_cmos_init_floppy(s, pc_find_fdc0());
427

428 429 430
    qemu_unregister_reset(pc_cmos_init_late, opaque);
}

431
void pc_cmos_init(PCMachineState *pcms,
432
                  BusState *idebus0, BusState *idebus1,
B
Blue Swirl 已提交
433
                  ISADevice *s)
B
bellard 已提交
434
{
435
    int val;
436
    static pc_cmos_init_late_arg arg;
B
bellard 已提交
437 438

    /* various important CMOS locations needed by PC/Bochs bios */
B
bellard 已提交
439 440

    /* memory size */
441
    /* base memory (first MiB) */
442
    val = MIN(pcms->below_4g_mem_size / 1024, 640);
B
bellard 已提交
443 444
    rtc_set_memory(s, 0x15, val);
    rtc_set_memory(s, 0x16, val >> 8);
445
    /* extended memory (next 64MiB) */
446 447
    if (pcms->below_4g_mem_size > 1024 * 1024) {
        val = (pcms->below_4g_mem_size - 1024 * 1024) / 1024;
448 449 450
    } else {
        val = 0;
    }
B
bellard 已提交
451 452
    if (val > 65535)
        val = 65535;
B
bellard 已提交
453 454 455 456
    rtc_set_memory(s, 0x17, val);
    rtc_set_memory(s, 0x18, val >> 8);
    rtc_set_memory(s, 0x30, val);
    rtc_set_memory(s, 0x31, val >> 8);
457
    /* memory between 16MiB and 4GiB */
458 459
    if (pcms->below_4g_mem_size > 16 * 1024 * 1024) {
        val = (pcms->below_4g_mem_size - 16 * 1024 * 1024) / 65536;
460
    } else {
B
bellard 已提交
461
        val = 0;
462
    }
B
bellard 已提交
463 464
    if (val > 65535)
        val = 65535;
B
bellard 已提交
465 466
    rtc_set_memory(s, 0x34, val);
    rtc_set_memory(s, 0x35, val >> 8);
467
    /* memory above 4GiB */
468
    val = pcms->above_4g_mem_size / 65536;
469 470 471
    rtc_set_memory(s, 0x5b, val);
    rtc_set_memory(s, 0x5c, val >> 8);
    rtc_set_memory(s, 0x5d, val >> 16);
472

A
aurel32 已提交
473 474
    /* set the number of CPU */
    rtc_set_memory(s, 0x5f, smp_cpus - 1);
G
Gu Zheng 已提交
475

476
    object_property_add_link(OBJECT(pcms), "rtc_state",
G
Gu Zheng 已提交
477
                             TYPE_ISA_DEVICE,
478
                             (Object **)&pcms->rtc,
G
Gu Zheng 已提交
479 480
                             object_property_allow_set_link,
                             OBJ_PROP_LINK_UNREF_ON_RELEASE, &error_abort);
481
    object_property_set_link(OBJECT(pcms), OBJECT(s),
G
Gu Zheng 已提交
482
                             "rtc_state", &error_abort);
A
aurel32 已提交
483

484
    set_boot_dev(s, MACHINE(pcms)->boot_order, &error_fatal);
B
bellard 已提交
485

B
bellard 已提交
486 487 488 489 490
    val = 0;
    val |= 0x02; /* FPU is there */
    val |= 0x04; /* PS/2 mouse installed */
    rtc_set_memory(s, REG_EQUIPMENT_BYTE, val);

491
    /* hard drives and FDC */
492
    arg.rtc_state = s;
493 494
    arg.idebus[0] = idebus0;
    arg.idebus[1] = idebus1;
495
    qemu_register_reset(pc_cmos_init_late, &arg);
B
bellard 已提交
496 497
}

A
Andreas Färber 已提交
498 499 500
#define TYPE_PORT92 "port92"
#define PORT92(obj) OBJECT_CHECK(Port92State, (obj), TYPE_PORT92)

501 502
/* port 92 stuff: could be split off */
typedef struct Port92State {
A
Andreas Färber 已提交
503 504
    ISADevice parent_obj;

505
    MemoryRegion io;
506 507 508 509
    uint8_t outport;
    qemu_irq *a20_out;
} Port92State;

510 511
static void port92_write(void *opaque, hwaddr addr, uint64_t val,
                         unsigned size)
512 513
{
    Port92State *s = opaque;
514
    int oldval = s->outport;
515

G
Gonglei 已提交
516
    DPRINTF("port92: write 0x%02" PRIx64 "\n", val);
517 518
    s->outport = val;
    qemu_set_irq(*s->a20_out, (val >> 1) & 1);
519
    if ((val & 1) && !(oldval & 1)) {
520 521 522 523
        qemu_system_reset_request();
    }
}

524 525
static uint64_t port92_read(void *opaque, hwaddr addr,
                            unsigned size)
526 527 528 529 530 531 532 533 534 535 536
{
    Port92State *s = opaque;
    uint32_t ret;

    ret = s->outport;
    DPRINTF("port92: read 0x%02x\n", ret);
    return ret;
}

static void port92_init(ISADevice *dev, qemu_irq *a20_out)
{
A
Andreas Färber 已提交
537
    Port92State *s = PORT92(dev);
538 539 540 541 542 543 544 545

    s->a20_out = a20_out;
}

static const VMStateDescription vmstate_port92_isa = {
    .name = "port92",
    .version_id = 1,
    .minimum_version_id = 1,
546
    .fields = (VMStateField[]) {
547 548 549 550 551 552 553
        VMSTATE_UINT8(outport, Port92State),
        VMSTATE_END_OF_LIST()
    }
};

static void port92_reset(DeviceState *d)
{
A
Andreas Färber 已提交
554
    Port92State *s = PORT92(d);
555 556 557 558

    s->outport &= ~1;
}

559
static const MemoryRegionOps port92_ops = {
560 561 562 563 564 565 566
    .read = port92_read,
    .write = port92_write,
    .impl = {
        .min_access_size = 1,
        .max_access_size = 1,
    },
    .endianness = DEVICE_LITTLE_ENDIAN,
567 568
};

569
static void port92_initfn(Object *obj)
570
{
571
    Port92State *s = PORT92(obj);
572

573
    memory_region_init_io(&s->io, OBJECT(s), &port92_ops, s, "port92", 1);
574

575
    s->outport = 0;
576 577 578 579 580 581 582 583
}

static void port92_realizefn(DeviceState *dev, Error **errp)
{
    ISADevice *isadev = ISA_DEVICE(dev);
    Port92State *s = PORT92(dev);

    isa_register_ioport(isadev, &s->io, 0x92);
584 585
}

586 587
static void port92_class_initfn(ObjectClass *klass, void *data)
{
588
    DeviceClass *dc = DEVICE_CLASS(klass);
589 590

    dc->realize = port92_realizefn;
591 592
    dc->reset = port92_reset;
    dc->vmsd = &vmstate_port92_isa;
593 594 595 596 597 598
    /*
     * Reason: unlike ordinary ISA devices, this one needs additional
     * wiring: its A20 output line needs to be wired up by
     * port92_init().
     */
    dc->cannot_instantiate_with_device_add_yet = true;
599 600
}

601
static const TypeInfo port92_info = {
A
Andreas Färber 已提交
602
    .name          = TYPE_PORT92,
603 604
    .parent        = TYPE_ISA_DEVICE,
    .instance_size = sizeof(Port92State),
605
    .instance_init = port92_initfn,
606
    .class_init    = port92_class_initfn,
607 608
};

A
Andreas Färber 已提交
609
static void port92_register_types(void)
610
{
611
    type_register_static(&port92_info);
612
}
A
Andreas Färber 已提交
613 614

type_init(port92_register_types)
615

B
Blue Swirl 已提交
616
static void handle_a20_line_change(void *opaque, int irq, int level)
617
{
618
    X86CPU *cpu = opaque;
B
bellard 已提交
619

B
Blue Swirl 已提交
620
    /* XXX: send to all CPUs ? */
621
    /* XXX: add logic to handle multiple A20 line sources */
622
    x86_cpu_set_a20(cpu, level);
B
bellard 已提交
623 624
}

J
Jes Sorensen 已提交
625 626
int e820_add_entry(uint64_t address, uint64_t length, uint32_t type)
{
G
Gerd Hoffmann 已提交
627
    int index = le32_to_cpu(e820_reserve.count);
J
Jes Sorensen 已提交
628 629
    struct e820_entry *entry;

G
Gerd Hoffmann 已提交
630 631 632 633 634 635 636 637 638 639 640 641 642
    if (type != E820_RAM) {
        /* old FW_CFG_E820_TABLE entry -- reservations only */
        if (index >= E820_NR_ENTRIES) {
            return -EBUSY;
        }
        entry = &e820_reserve.entry[index++];

        entry->address = cpu_to_le64(address);
        entry->length = cpu_to_le64(length);
        entry->type = cpu_to_le32(type);

        e820_reserve.count = cpu_to_le32(index);
    }
J
Jes Sorensen 已提交
643

G
Gerd Hoffmann 已提交
644
    /* new "etc/e820" file -- include ram too */
645
    e820_table = g_renew(struct e820_entry, e820_table, e820_entries + 1);
G
Gerd Hoffmann 已提交
646 647 648 649
    e820_table[e820_entries].address = cpu_to_le64(address);
    e820_table[e820_entries].length = cpu_to_le64(length);
    e820_table[e820_entries].type = cpu_to_le32(type);
    e820_entries++;
J
Jes Sorensen 已提交
650

G
Gerd Hoffmann 已提交
651
    return e820_entries;
J
Jes Sorensen 已提交
652 653
}

654 655 656 657 658 659 660 661 662 663 664 665 666 667 668
int e820_get_num_entries(void)
{
    return e820_entries;
}

bool e820_get_entry(int idx, uint32_t type, uint64_t *address, uint64_t *length)
{
    if (idx < e820_entries && e820_table[idx].type == cpu_to_le32(type)) {
        *address = le64_to_cpu(e820_table[idx].address);
        *length = le64_to_cpu(e820_table[idx].length);
        return true;
    }
    return false;
}

669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690
/* Enables contiguous-apic-ID mode, for compatibility */
static bool compat_apic_id_mode;

void enable_compat_apic_id_mode(void)
{
    compat_apic_id_mode = true;
}

/* Calculates initial APIC ID for a specific CPU index
 *
 * Currently we need to be able to calculate the APIC ID from the CPU index
 * alone (without requiring a CPU object), as the QEMU<->Seabios interfaces have
 * no concept of "CPU index", and the NUMA tables on fw_cfg need the APIC ID of
 * all CPUs up to max_cpus.
 */
static uint32_t x86_cpu_apic_id_from_index(unsigned int cpu_index)
{
    uint32_t correct_id;
    static bool warned;

    correct_id = x86_apicid_from_cpu_idx(smp_cores, smp_threads, cpu_index);
    if (compat_apic_id_mode) {
691
        if (cpu_index != correct_id && !warned && !qtest_enabled()) {
692 693 694 695 696 697 698 699 700 701
            error_report("APIC IDs set in compatibility mode, "
                         "CPU topology won't match the configuration");
            warned = true;
        }
        return cpu_index;
    } else {
        return correct_id;
    }
}

702
static void pc_build_smbios(FWCfgState *fw_cfg)
B
bellard 已提交
703
{
704 705
    uint8_t *smbios_tables, *smbios_anchor;
    size_t smbios_tables_len, smbios_anchor_len;
706 707
    struct smbios_phys_mem_area *mem_array;
    unsigned i, array_count;
708 709 710 711 712 713 714

    smbios_tables = smbios_get_table_legacy(&smbios_tables_len);
    if (smbios_tables) {
        fw_cfg_add_bytes(fw_cfg, FW_CFG_SMBIOS_ENTRIES,
                         smbios_tables, smbios_tables_len);
    }

715 716 717 718 719 720 721 722 723 724 725 726 727
    /* build the array of physical mem area from e820 table */
    mem_array = g_malloc0(sizeof(*mem_array) * e820_get_num_entries());
    for (i = 0, array_count = 0; i < e820_get_num_entries(); i++) {
        uint64_t addr, len;

        if (e820_get_entry(i, E820_RAM, &addr, &len)) {
            mem_array[array_count].address = addr;
            mem_array[array_count].length = len;
            array_count++;
        }
    }
    smbios_get_tables(mem_array, array_count,
                      &smbios_tables, &smbios_tables_len,
728
                      &smbios_anchor, &smbios_anchor_len);
729 730
    g_free(mem_array);

731 732 733 734 735 736 737 738
    if (smbios_anchor) {
        fw_cfg_add_file(fw_cfg, "etc/smbios/smbios-tables",
                        smbios_tables, smbios_tables_len);
        fw_cfg_add_file(fw_cfg, "etc/smbios/smbios-anchor",
                        smbios_anchor, smbios_anchor_len);
    }
}

739
static FWCfgState *bochs_bios_init(AddressSpace *as, PCMachineState *pcms)
740 741
{
    FWCfgState *fw_cfg;
742 743
    uint64_t *numa_fw_cfg;
    int i, j;
744

745
    fw_cfg = fw_cfg_init_io_dma(FW_CFG_IO_BASE, FW_CFG_IO_BASE + 4, as);
M
Marc Marí 已提交
746

747 748 749 750 751 752 753 754 755 756 757 758 759 760
    /* FW_CFG_MAX_CPUS is a bit confusing/problematic on x86:
     *
     * SeaBIOS needs FW_CFG_MAX_CPUS for CPU hotplug, but the CPU hotplug
     * QEMU<->SeaBIOS interface is not based on the "CPU index", but on the APIC
     * ID of hotplugged CPUs[1]. This means that FW_CFG_MAX_CPUS is not the
     * "maximum number of CPUs", but the "limit to the APIC ID values SeaBIOS
     * may see".
     *
     * So, this means we must not use max_cpus, here, but the maximum possible
     * APIC ID value, plus one.
     *
     * [1] The only kind of "CPU identifier" used between SeaBIOS and QEMU is
     *     the APIC ID, not the "CPU index"
     */
761
    fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)pcms->apic_id_limit);
762
    fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size);
763 764
    fw_cfg_add_bytes(fw_cfg, FW_CFG_ACPI_TABLES,
                     acpi_tables, acpi_tables_len);
765
    fw_cfg_add_i32(fw_cfg, FW_CFG_IRQ0_OVERRIDE, kvm_allows_irq0_override());
766

767
    pc_build_smbios(fw_cfg);
768

769
    fw_cfg_add_bytes(fw_cfg, FW_CFG_E820_TABLE,
G
Gerd Hoffmann 已提交
770 771 772
                     &e820_reserve, sizeof(e820_reserve));
    fw_cfg_add_file(fw_cfg, "etc/e820", e820_table,
                    sizeof(struct e820_entry) * e820_entries);
773

774
    fw_cfg_add_bytes(fw_cfg, FW_CFG_HPET, &hpet_cfg, sizeof(hpet_cfg));
775 776 777 778
    /* allocate memory for the NUMA channel: one (64bit) word for the number
     * of nodes, one word for each VCPU->node and one word for each node to
     * hold the amount of memory.
     */
779
    numa_fw_cfg = g_new0(uint64_t, 1 + pcms->apic_id_limit + nb_numa_nodes);
780
    numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes);
781
    for (i = 0; i < max_cpus; i++) {
782
        unsigned int apic_id = x86_cpu_apic_id_from_index(i);
783
        assert(apic_id < pcms->apic_id_limit);
784
        for (j = 0; j < nb_numa_nodes; j++) {
785
            if (test_bit(i, numa_info[j].node_cpu)) {
786
                numa_fw_cfg[apic_id + 1] = cpu_to_le64(j);
787 788 789 790 791
                break;
            }
        }
    }
    for (i = 0; i < nb_numa_nodes; i++) {
792 793
        numa_fw_cfg[pcms->apic_id_limit + 1 + i] =
            cpu_to_le64(numa_info[i].node_mem);
794
    }
795
    fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, numa_fw_cfg,
796
                     (1 + pcms->apic_id_limit + nb_numa_nodes) *
797
                     sizeof(*numa_fw_cfg));
A
Alexander Graf 已提交
798 799

    return fw_cfg;
B
bellard 已提交
800 801
}

T
ths 已提交
802 803 804 805 806 807 808 809 810 811 812 813 814 815
static long get_file_size(FILE *f)
{
    long where, size;

    /* XXX: on Unix systems, using fstat() probably makes more sense */

    where = ftell(f);
    fseek(f, 0, SEEK_END);
    size = ftell(f);
    fseek(f, where, SEEK_SET);

    return size;
}

816 817
static void load_linux(PCMachineState *pcms,
                       FWCfgState *fw_cfg)
T
ths 已提交
818 819
{
    uint16_t protocol;
P
Paul Brook 已提交
820
    int setup_size, kernel_size, initrd_size = 0, cmdline_size;
T
ths 已提交
821
    uint32_t initrd_max;
822
    uint8_t header[8192], *setup, *kernel, *initrd_data;
A
Avi Kivity 已提交
823
    hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0;
824
    FILE *f;
P
Pascal Terjan 已提交
825
    char *vmode;
826
    MachineState *machine = MACHINE(pcms);
827
    PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
828 829 830
    const char *kernel_filename = machine->kernel_filename;
    const char *initrd_filename = machine->initrd_filename;
    const char *kernel_cmdline = machine->kernel_cmdline;
T
ths 已提交
831 832 833 834 835 836 837

    /* Align to 16 bytes as a paranoia measure */
    cmdline_size = (strlen(kernel_cmdline)+16) & ~15;

    /* load the kernel header */
    f = fopen(kernel_filename, "rb");
    if (!f || !(kernel_size = get_file_size(f)) ||
L
liguang 已提交
838 839 840 841 842
        fread(header, 1, MIN(ARRAY_SIZE(header), kernel_size), f) !=
        MIN(ARRAY_SIZE(header), kernel_size)) {
        fprintf(stderr, "qemu: could not load kernel '%s': %s\n",
                kernel_filename, strerror(errno));
        exit(1);
T
ths 已提交
843 844 845
    }

    /* kernel protocol version */
B
bellard 已提交
846
#if 0
T
ths 已提交
847
    fprintf(stderr, "header magic: %#x\n", ldl_p(header+0x202));
B
bellard 已提交
848
#endif
L
liguang 已提交
849 850 851 852 853
    if (ldl_p(header+0x202) == 0x53726448) {
        protocol = lduw_p(header+0x206);
    } else {
        /* This looks like a multiboot kernel. If it is, let's stop
           treating it like a Linux kernel. */
854
        if (load_multiboot(fw_cfg, f, kernel_filename, initrd_filename,
L
liguang 已提交
855
                           kernel_cmdline, kernel_size, header)) {
B
Blue Swirl 已提交
856
            return;
L
liguang 已提交
857 858
        }
        protocol = 0;
A
Alexander Graf 已提交
859
    }
T
ths 已提交
860 861

    if (protocol < 0x200 || !(header[0x211] & 0x01)) {
L
liguang 已提交
862 863 864 865
        /* Low kernel */
        real_addr    = 0x90000;
        cmdline_addr = 0x9a000 - cmdline_size;
        prot_addr    = 0x10000;
T
ths 已提交
866
    } else if (protocol < 0x202) {
L
liguang 已提交
867 868 869 870
        /* High but ancient kernel */
        real_addr    = 0x90000;
        cmdline_addr = 0x9a000 - cmdline_size;
        prot_addr    = 0x100000;
T
ths 已提交
871
    } else {
L
liguang 已提交
872 873 874 875
        /* High and recent kernel */
        real_addr    = 0x10000;
        cmdline_addr = 0x20000;
        prot_addr    = 0x100000;
T
ths 已提交
876 877
    }

B
bellard 已提交
878
#if 0
T
ths 已提交
879
    fprintf(stderr,
L
liguang 已提交
880 881 882 883 884 885
            "qemu: real_addr     = 0x" TARGET_FMT_plx "\n"
            "qemu: cmdline_addr  = 0x" TARGET_FMT_plx "\n"
            "qemu: prot_addr     = 0x" TARGET_FMT_plx "\n",
            real_addr,
            cmdline_addr,
            prot_addr);
B
bellard 已提交
886
#endif
T
ths 已提交
887 888

    /* highest address for loading the initrd */
L
liguang 已提交
889 890 891 892 893
    if (protocol >= 0x203) {
        initrd_max = ldl_p(header+0x22c);
    } else {
        initrd_max = 0x37ffffff;
    }
T
ths 已提交
894

895 896
    if (initrd_max >= pcms->below_4g_mem_size - pcmc->acpi_data_size) {
        initrd_max = pcms->below_4g_mem_size - pcmc->acpi_data_size - 1;
897
    }
T
ths 已提交
898

899 900
    fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr);
    fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(kernel_cmdline)+1);
901
    fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline);
T
ths 已提交
902 903

    if (protocol >= 0x202) {
L
liguang 已提交
904
        stl_p(header+0x228, cmdline_addr);
T
ths 已提交
905
    } else {
L
liguang 已提交
906 907
        stw_p(header+0x20, 0xA33F);
        stw_p(header+0x22, cmdline_addr-real_addr);
T
ths 已提交
908 909
    }

P
Pascal Terjan 已提交
910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927
    /* handle vga= parameter */
    vmode = strstr(kernel_cmdline, "vga=");
    if (vmode) {
        unsigned int video_mode;
        /* skip "vga=" */
        vmode += 4;
        if (!strncmp(vmode, "normal", 6)) {
            video_mode = 0xffff;
        } else if (!strncmp(vmode, "ext", 3)) {
            video_mode = 0xfffe;
        } else if (!strncmp(vmode, "ask", 3)) {
            video_mode = 0xfffd;
        } else {
            video_mode = strtol(vmode, NULL, 0);
        }
        stw_p(header+0x1fa, video_mode);
    }

T
ths 已提交
928
    /* loader type */
S
Stefan Weil 已提交
929
    /* High nybble = B reserved for QEMU; low nybble is revision number.
T
ths 已提交
930 931
       If this code is substantially changed, you may want to consider
       incrementing the revision. */
L
liguang 已提交
932 933 934
    if (protocol >= 0x200) {
        header[0x210] = 0xB0;
    }
T
ths 已提交
935 936
    /* heap */
    if (protocol >= 0x201) {
L
liguang 已提交
937 938
        header[0x211] |= 0x80;	/* CAN_USE_HEAP */
        stw_p(header+0x224, cmdline_addr-real_addr-0x200);
T
ths 已提交
939 940 941 942
    }

    /* load initrd */
    if (initrd_filename) {
L
liguang 已提交
943 944 945 946
        if (protocol < 0x200) {
            fprintf(stderr, "qemu: linux kernel too old to load a ram disk\n");
            exit(1);
        }
T
ths 已提交
947

L
liguang 已提交
948
        initrd_size = get_image_size(initrd_filename);
M
M. Mohan Kumar 已提交
949
        if (initrd_size < 0) {
950 951
            fprintf(stderr, "qemu: error reading initrd %s: %s\n",
                    initrd_filename, strerror(errno));
M
M. Mohan Kumar 已提交
952 953 954
            exit(1);
        }

955
        initrd_addr = (initrd_max-initrd_size) & ~4095;
956

957
        initrd_data = g_malloc(initrd_size);
958 959 960 961 962
        load_image(initrd_filename, initrd_data);

        fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr);
        fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size);
        fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, initrd_size);
T
ths 已提交
963

L
liguang 已提交
964 965
        stl_p(header+0x218, initrd_addr);
        stl_p(header+0x21c, initrd_size);
T
ths 已提交
966 967
    }

968
    /* load kernel and setup */
T
ths 已提交
969
    setup_size = header[0x1f1];
L
liguang 已提交
970 971 972
    if (setup_size == 0) {
        setup_size = 4;
    }
T
ths 已提交
973
    setup_size = (setup_size+1)*512;
974 975 976 977
    if (setup_size > kernel_size) {
        fprintf(stderr, "qemu: invalid kernel header\n");
        exit(1);
    }
978
    kernel_size -= setup_size;
T
ths 已提交
979

980 981
    setup  = g_malloc(setup_size);
    kernel = g_malloc(kernel_size);
982
    fseek(f, 0, SEEK_SET);
983 984 985 986 987 988 989 990
    if (fread(setup, 1, setup_size, f) != setup_size) {
        fprintf(stderr, "fread() failed\n");
        exit(1);
    }
    if (fread(kernel, 1, kernel_size, f) != kernel_size) {
        fprintf(stderr, "fread() failed\n");
        exit(1);
    }
T
ths 已提交
991
    fclose(f);
992
    memcpy(setup, header, MIN(sizeof(header), setup_size));
993 994 995 996 997 998 999 1000 1001

    fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr);
    fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size);
    fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size);

    fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr);
    fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size);
    fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size);

G
Gleb Natapov 已提交
1002 1003
    option_rom[nb_option_roms].name = "linuxboot.bin";
    option_rom[nb_option_roms].bootindex = 0;
1004
    nb_option_roms++;
T
ths 已提交
1005 1006
}

B
bellard 已提交
1007 1008
#define NE2000_NB_MAX 6

B
Blue Swirl 已提交
1009 1010 1011
static const int ne2000_io[NE2000_NB_MAX] = { 0x300, 0x320, 0x340, 0x360,
                                              0x280, 0x380 };
static const int ne2000_irq[NE2000_NB_MAX] = { 9, 10, 11, 3, 4, 5 };
B
bellard 已提交
1012

1013
void pc_init_ne2k_isa(ISABus *bus, NICInfo *nd)
1014 1015 1016 1017 1018
{
    static int nb_ne2k = 0;

    if (nb_ne2k == NE2000_NB_MAX)
        return;
1019
    isa_ne2000_init(bus, ne2000_io[nb_ne2k],
G
Gerd Hoffmann 已提交
1020
                    ne2000_irq[nb_ne2k], nd);
1021 1022 1023
    nb_ne2k++;
}

B
Blue Swirl 已提交
1024
DeviceState *cpu_get_current_apic(void)
1025
{
1026 1027
    if (current_cpu) {
        X86CPU *cpu = X86_CPU(current_cpu);
1028
        return cpu->apic_state;
1029 1030 1031 1032 1033
    } else {
        return NULL;
    }
}

1034
void pc_acpi_smi_interrupt(void *opaque, int irq, int level)
B
Blue Swirl 已提交
1035
{
1036
    X86CPU *cpu = opaque;
B
Blue Swirl 已提交
1037 1038

    if (level) {
1039
        cpu_interrupt(CPU(cpu), CPU_INTERRUPT_SMI);
B
Blue Swirl 已提交
1040 1041 1042
    }
}

1043
static X86CPU *pc_new_cpu(const char *cpu_model, int64_t apic_id,
C
Chen Fan 已提交
1044
                          Error **errp)
1045
{
1046
    X86CPU *cpu = NULL;
1047 1048
    Error *local_err = NULL;

1049
    cpu = cpu_x86_create(cpu_model, &local_err);
1050
    if (local_err != NULL) {
1051
        goto out;
1052 1053 1054 1055 1056
    }

    object_property_set_int(OBJECT(cpu), apic_id, "apic-id", &local_err);
    object_property_set_bool(OBJECT(cpu), true, "realized", &local_err);

1057
out:
1058 1059
    if (local_err) {
        error_propagate(errp, local_err);
1060 1061
        object_unref(OBJECT(cpu));
        cpu = NULL;
1062 1063 1064 1065
    }
    return cpu;
}

1066 1067
void pc_hot_add_cpu(const int64_t id, Error **errp)
{
1068
    X86CPU *cpu;
1069
    MachineState *machine = MACHINE(qdev_get_machine());
1070
    int64_t apic_id = x86_cpu_apic_id_from_index(id);
1071
    Error *local_err = NULL;
1072

1073 1074 1075 1076 1077
    if (id < 0) {
        error_setg(errp, "Invalid CPU id: %" PRIi64, id);
        return;
    }

1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089
    if (cpu_exists(apic_id)) {
        error_setg(errp, "Unable to add CPU: %" PRIi64
                   ", it already exists", id);
        return;
    }

    if (id >= max_cpus) {
        error_setg(errp, "Unable to add CPU: %" PRIi64
                   ", max allowed: %d", id, max_cpus - 1);
        return;
    }

1090 1091 1092 1093 1094 1095 1096
    if (apic_id >= ACPI_CPU_HOTPLUG_ID_LIMIT) {
        error_setg(errp, "Unable to add CPU: %" PRIi64
                   ", resulting APIC ID (%" PRIi64 ") is too large",
                   id, apic_id);
        return;
    }

1097
    cpu = pc_new_cpu(machine->cpu_model, apic_id, &local_err);
1098 1099 1100 1101 1102
    if (local_err) {
        error_propagate(errp, local_err);
        return;
    }
    object_unref(OBJECT(cpu));
1103 1104
}

1105
void pc_cpus_init(PCMachineState *pcms)
1106 1107
{
    int i;
1108
    X86CPU *cpu = NULL;
1109
    MachineState *machine = MACHINE(pcms);
1110 1111

    /* init CPUs */
1112
    if (machine->cpu_model == NULL) {
1113
#ifdef TARGET_X86_64
1114
        machine->cpu_model = "qemu64";
1115
#else
1116
        machine->cpu_model = "qemu32";
1117 1118 1119
#endif
    }

1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130
    /* Calculates the limit to CPU APIC ID values
     *
     * Limit for the APIC ID value, so that all
     * CPU APIC IDs are < pcms->apic_id_limit.
     *
     * This is used for FW_CFG_MAX_CPUS. See comments on bochs_bios_init().
     */
    pcms->apic_id_limit = x86_cpu_apic_id_from_index(max_cpus - 1) + 1;
    if (pcms->apic_id_limit > ACPI_CPU_HOTPLUG_ID_LIMIT) {
        error_report("max_cpus is too large. APIC ID of last CPU is %u",
                     pcms->apic_id_limit - 1);
1131 1132 1133
        exit(1);
    }

1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144
    pcms->possible_cpus = g_malloc0(sizeof(CPUArchIdList) +
                                    sizeof(CPUArchId) * max_cpus);
    for (i = 0; i < max_cpus; i++) {
        pcms->possible_cpus->cpus[i].arch_id = x86_cpu_apic_id_from_index(i);
        pcms->possible_cpus->len++;
        if (i < smp_cpus) {
            cpu = pc_new_cpu(machine->cpu_model, x86_cpu_apic_id_from_index(i),
                             &error_fatal);
            pcms->possible_cpus->cpus[i].cpu = CPU(cpu);
            object_unref(OBJECT(cpu));
        }
1145
    }
1146

1147 1148
    /* tell smbios about cpuid version and features */
    smbios_set_cpuid(cpu->env.cpuid_version, cpu->env.features[FEAT_1_EDX]);
1149 1150
}

1151 1152 1153 1154 1155 1156 1157 1158
/* pci-info ROM file. Little endian format */
typedef struct PcRomPciInfo {
    uint64_t w32_min;
    uint64_t w32_max;
    uint64_t w64_min;
    uint64_t w64_max;
} PcRomPciInfo;

1159
static
1160
void pc_machine_done(Notifier *notifier, void *data)
1161
{
1162 1163 1164
    PCMachineState *pcms = container_of(notifier,
                                        PCMachineState, machine_done);
    PCIBus *bus = pcms->bus;
1165 1166 1167 1168 1169 1170 1171 1172 1173 1174

    if (bus) {
        int extra_hosts = 0;

        QLIST_FOREACH(bus, &bus->child, sibling) {
            /* look for expander root buses */
            if (pci_bus_is_root(bus)) {
                extra_hosts++;
            }
        }
1175
        if (extra_hosts && pcms->fw_cfg) {
1176 1177
            uint64_t *val = g_malloc(sizeof(*val));
            *val = cpu_to_le64(extra_hosts);
1178
            fw_cfg_add_file(pcms->fw_cfg,
1179 1180 1181 1182
                    "etc/extra-pci-roots", val, sizeof(*val));
        }
    }

1183
    acpi_setup();
1184 1185
}

1186
void pc_guest_info_init(PCMachineState *pcms)
1187
{
M
Michael S. Tsirkin 已提交
1188 1189
    int i, j;

1190 1191 1192 1193
    pcms->apic_xrupt_override = kvm_allows_irq0_override();
    pcms->numa_nodes = nb_numa_nodes;
    pcms->node_mem = g_malloc0(pcms->numa_nodes *
                                    sizeof *pcms->node_mem);
1194
    for (i = 0; i < nb_numa_nodes; i++) {
1195
        pcms->node_mem[i] = numa_info[i].node_mem;
1196 1197
    }

1198 1199
    pcms->node_cpu = g_malloc0(pcms->apic_id_limit *
                                     sizeof *pcms->node_cpu);
M
Michael S. Tsirkin 已提交
1200 1201 1202

    for (i = 0; i < max_cpus; i++) {
        unsigned int apic_id = x86_cpu_apic_id_from_index(i);
1203
        assert(apic_id < pcms->apic_id_limit);
M
Michael S. Tsirkin 已提交
1204
        for (j = 0; j < nb_numa_nodes; j++) {
1205
            if (test_bit(i, numa_info[j].node_cpu)) {
1206
                pcms->node_cpu[apic_id] = j;
M
Michael S. Tsirkin 已提交
1207 1208 1209 1210
                break;
            }
        }
    }
1211

1212 1213
    pcms->machine_done.notify = pc_machine_done;
    qemu_add_machine_init_done_notifier(&pcms->machine_done);
1214 1215
}

1216 1217 1218
/* setup pci memory address space mapping into system address space */
void pc_pci_as_mapping_init(Object *owner, MemoryRegion *system_memory,
                            MemoryRegion *pci_address_space)
1219
{
1220 1221 1222
    /* Set to lower priority than RAM */
    memory_region_add_subregion_overlap(system_memory, 0x0,
                                        pci_address_space, -1);
1223 1224
}

G
Gerd Hoffmann 已提交
1225 1226
void pc_acpi_init(const char *default_dsdt)
{
1227
    char *filename;
G
Gerd Hoffmann 已提交
1228 1229 1230 1231 1232 1233 1234 1235 1236

    if (acpi_tables != NULL) {
        /* manually set via -acpitable, leave it alone */
        return;
    }

    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, default_dsdt);
    if (filename == NULL) {
        fprintf(stderr, "WARNING: failed to find %s\n", default_dsdt);
1237
    } else {
1238 1239
        QemuOpts *opts = qemu_opts_create(qemu_find_opts("acpi"), NULL, 0,
                                          &error_abort);
1240
        Error *err = NULL;
G
Gerd Hoffmann 已提交
1241

1242
        qemu_opt_set(opts, "file", filename, &error_abort);
1243

1244
        acpi_table_add_builtin(opts, &err);
1245
        if (err) {
1246 1247
            error_reportf_err(err, "WARNING: failed to load %s: ",
                              filename);
1248 1249
        }
        g_free(filename);
G
Gerd Hoffmann 已提交
1250 1251 1252
    }
}

1253
void xen_load_linux(PCMachineState *pcms)
1254 1255 1256 1257
{
    int i;
    FWCfgState *fw_cfg;

1258
    assert(MACHINE(pcms)->kernel_filename != NULL);
1259

1260
    fw_cfg = fw_cfg_init_io(FW_CFG_IO_BASE);
1261 1262
    rom_set_fw(fw_cfg);

1263
    load_linux(pcms, fw_cfg);
1264 1265 1266 1267 1268
    for (i = 0; i < nb_option_roms; i++) {
        assert(!strcmp(option_rom[i].name, "linuxboot.bin") ||
               !strcmp(option_rom[i].name, "multiboot.bin"));
        rom_add_option(option_rom[i].name, option_rom[i].bootindex);
    }
1269
    pcms->fw_cfg = fw_cfg;
1270 1271
}

1272 1273 1274 1275
void pc_memory_init(PCMachineState *pcms,
                    MemoryRegion *system_memory,
                    MemoryRegion *rom_memory,
                    MemoryRegion **ram_memory)
B
bellard 已提交
1276
{
1277 1278
    int linux_boot, i;
    MemoryRegion *ram, *option_rom_mr;
1279
    MemoryRegion *ram_below_4g, *ram_above_4g;
L
Laszlo Ersek 已提交
1280
    FWCfgState *fw_cfg;
1281
    MachineState *machine = MACHINE(pcms);
1282
    PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
1283

1284 1285
    assert(machine->ram_size == pcms->below_4g_mem_size +
                                pcms->above_4g_mem_size);
1286 1287

    linux_boot = (machine->kernel_filename != NULL);
B
bellard 已提交
1288

1289
    /* Allocate RAM.  We allocate it as a single memory region and use
D
Dong Xu Wang 已提交
1290
     * aliases to address portions of it, mostly for backwards compatibility
1291 1292
     * with older qemus that used qemu_ram_alloc().
     */
1293
    ram = g_malloc(sizeof(*ram));
1294 1295
    memory_region_allocate_system_memory(ram, NULL, "pc.ram",
                                         machine->ram_size);
A
Avi Kivity 已提交
1296
    *ram_memory = ram;
1297
    ram_below_4g = g_malloc(sizeof(*ram_below_4g));
1298
    memory_region_init_alias(ram_below_4g, NULL, "ram-below-4g", ram,
1299
                             0, pcms->below_4g_mem_size);
1300
    memory_region_add_subregion(system_memory, 0, ram_below_4g);
1301 1302
    e820_add_entry(0, pcms->below_4g_mem_size, E820_RAM);
    if (pcms->above_4g_mem_size > 0) {
1303
        ram_above_4g = g_malloc(sizeof(*ram_above_4g));
1304
        memory_region_init_alias(ram_above_4g, NULL, "ram-above-4g", ram,
1305 1306
                                 pcms->below_4g_mem_size,
                                 pcms->above_4g_mem_size);
1307 1308
        memory_region_add_subregion(system_memory, 0x100000000ULL,
                                    ram_above_4g);
1309
        e820_add_entry(0x100000000ULL, pcms->above_4g_mem_size, E820_RAM);
1310
    }
1311

1312
    if (!pcmc->has_reserved_memory &&
1313
        (machine->ram_slots ||
1314
         (machine->maxram_size > machine->ram_size))) {
1315 1316 1317 1318 1319 1320 1321
        MachineClass *mc = MACHINE_GET_CLASS(machine);

        error_report("\"-memory 'slots|maxmem'\" is not supported by: %s",
                     mc->name);
        exit(EXIT_FAILURE);
    }

1322
    /* initialize hotplug memory address space */
1323
    if (pcmc->has_reserved_memory &&
1324
        (machine->ram_size < machine->maxram_size)) {
1325
        ram_addr_t hotplug_mem_size =
1326
            machine->maxram_size - machine->ram_size;
1327

1328 1329 1330 1331 1332 1333
        if (machine->ram_slots > ACPI_MAX_RAM_SLOTS) {
            error_report("unsupported amount of memory slots: %"PRIu64,
                         machine->ram_slots);
            exit(EXIT_FAILURE);
        }

1334 1335 1336 1337 1338 1339 1340
        if (QEMU_ALIGN_UP(machine->maxram_size,
                          TARGET_PAGE_SIZE) != machine->maxram_size) {
            error_report("maximum memory size must by aligned to multiple of "
                         "%d bytes", TARGET_PAGE_SIZE);
            exit(EXIT_FAILURE);
        }

1341
        pcms->hotplug_memory.base =
1342
            ROUND_UP(0x100000000ULL + pcms->above_4g_mem_size, 1ULL << 30);
1343

1344
        if (pcmc->enforce_aligned_dimm) {
1345 1346 1347 1348
            /* size hotplug region assuming 1G page max alignment per slot */
            hotplug_mem_size += (1ULL << 30) * machine->ram_slots;
        }

1349
        if ((pcms->hotplug_memory.base + hotplug_mem_size) <
1350 1351 1352 1353 1354 1355
            hotplug_mem_size) {
            error_report("unsupported amount of maximum memory: " RAM_ADDR_FMT,
                         machine->maxram_size);
            exit(EXIT_FAILURE);
        }

1356
        memory_region_init(&pcms->hotplug_memory.mr, OBJECT(pcms),
1357
                           "hotplug-memory", hotplug_mem_size);
1358 1359
        memory_region_add_subregion(system_memory, pcms->hotplug_memory.base,
                                    &pcms->hotplug_memory.mr);
1360
    }
1361 1362

    /* Initialize PC system firmware */
1363
    pc_system_firmware_init(rom_memory, !pcmc->pci_enabled);
1364

1365
    option_rom_mr = g_malloc(sizeof(*option_rom_mr));
1366
    memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
1367
                           &error_fatal);
1368
    vmstate_register_ram_global(option_rom_mr);
1369
    memory_region_add_subregion_overlap(rom_memory,
1370 1371 1372
                                        PC_ROM_MIN_VGA,
                                        option_rom_mr,
                                        1);
1373

1374
    fw_cfg = bochs_bios_init(&address_space_memory, pcms);
M
Marc Marí 已提交
1375

G
Gerd Hoffmann 已提交
1376
    rom_set_fw(fw_cfg);
A
Alexander Graf 已提交
1377

1378
    if (pcmc->has_reserved_memory && pcms->hotplug_memory.base) {
1379
        uint64_t *val = g_malloc(sizeof(*val));
1380 1381 1382 1383 1384 1385
        PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
        uint64_t res_mem_end = pcms->hotplug_memory.base;

        if (!pcmc->broken_reserved_end) {
            res_mem_end += memory_region_size(&pcms->hotplug_memory.mr);
        }
1386
        *val = cpu_to_le64(ROUND_UP(res_mem_end, 0x1ULL << 30));
1387 1388 1389
        fw_cfg_add_file(fw_cfg, "etc/reserved-memory-end", val, sizeof(*val));
    }

1390
    if (linux_boot) {
1391
        load_linux(pcms, fw_cfg);
1392 1393 1394
    }

    for (i = 0; i < nb_option_roms; i++) {
G
Gleb Natapov 已提交
1395
        rom_add_option(option_rom[i].name, option_rom[i].bootindex);
1396
    }
1397
    pcms->fw_cfg = fw_cfg;
1398 1399
}

1400
qemu_irq pc_allocate_cpu_irq(void)
1401
{
1402
    return qemu_allocate_irq(pic_irq_request, NULL, 0);
1403 1404
}

1405
DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus)
1406
{
1407 1408
    DeviceState *dev = NULL;

1409 1410 1411 1412 1413
    if (pci_bus) {
        PCIDevice *pcidev = pci_vga_init(pci_bus);
        dev = pcidev ? &pcidev->qdev : NULL;
    } else if (isa_bus) {
        ISADevice *isadev = isa_vga_init(isa_bus);
A
Andreas Färber 已提交
1414
        dev = isadev ? DEVICE(isadev) : NULL;
1415
    }
1416
    return dev;
1417 1418
}

J
Julien Grall 已提交
1419 1420
static const MemoryRegionOps ioport80_io_ops = {
    .write = ioport80_write,
1421
    .read = ioport80_read,
J
Julien Grall 已提交
1422 1423 1424 1425 1426 1427 1428 1429 1430
    .endianness = DEVICE_NATIVE_ENDIAN,
    .impl = {
        .min_access_size = 1,
        .max_access_size = 1,
    },
};

static const MemoryRegionOps ioportF0_io_ops = {
    .write = ioportF0_write,
1431
    .read = ioportF0_read,
J
Julien Grall 已提交
1432 1433 1434 1435 1436 1437 1438
    .endianness = DEVICE_NATIVE_ENDIAN,
    .impl = {
        .min_access_size = 1,
        .max_access_size = 1,
    },
};

1439
void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi,
1440
                          ISADevice **rtc_state,
1441
                          bool create_fdctrl,
1442
                          bool no_vmport,
1443
                          uint32_t hpet_irqs)
1444 1445 1446
{
    int i;
    DriveInfo *fd[MAX_FD];
1447 1448 1449
    DeviceState *hpet = NULL;
    int pit_isa_irq = 0;
    qemu_irq pit_alt_irq = NULL;
1450
    qemu_irq rtc_irq = NULL;
B
Blue Swirl 已提交
1451
    qemu_irq *a20_line;
1452
    ISADevice *i8042, *port92, *vmmouse, *pit = NULL;
J
Julien Grall 已提交
1453 1454
    MemoryRegion *ioport80_io = g_new(MemoryRegion, 1);
    MemoryRegion *ioportF0_io = g_new(MemoryRegion, 1);
1455

1456
    memory_region_init_io(ioport80_io, NULL, &ioport80_io_ops, NULL, "ioport80", 1);
J
Julien Grall 已提交
1457
    memory_region_add_subregion(isa_bus->address_space_io, 0x80, ioport80_io);
1458

1459
    memory_region_init_io(ioportF0_io, NULL, &ioportF0_io_ops, NULL, "ioportF0", 1);
J
Julien Grall 已提交
1460
    memory_region_add_subregion(isa_bus->address_space_io, 0xf0, ioportF0_io);
1461

1462 1463 1464 1465 1466 1467 1468
    /*
     * Check if an HPET shall be created.
     *
     * Without KVM_CAP_PIT_STATE2, we cannot switch off the in-kernel PIT
     * when the HPET wants to take over. Thus we have to disable the latter.
     */
    if (!no_hpet && (!kvm_irqchip_in_kernel() || kvm_has_pit_state2())) {
1469
        /* In order to set property, here not using sysbus_try_create_simple */
M
Michael S. Tsirkin 已提交
1470
        hpet = qdev_try_create(NULL, TYPE_HPET);
B
Blue Swirl 已提交
1471
        if (hpet) {
1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483
            /* For pc-piix-*, hpet's intcap is always IRQ2. For pc-q35-1.7
             * and earlier, use IRQ2 for compat. Otherwise, use IRQ16~23,
             * IRQ8 and IRQ2.
             */
            uint8_t compat = object_property_get_int(OBJECT(hpet),
                    HPET_INTCAP, NULL);
            if (!compat) {
                qdev_prop_set_uint32(hpet, HPET_INTCAP, hpet_irqs);
            }
            qdev_init_nofail(hpet);
            sysbus_mmio_map(SYS_BUS_DEVICE(hpet), 0, HPET_BASE);

J
Jan Kiszka 已提交
1484
            for (i = 0; i < GSI_NUM_PINS; i++) {
1485
                sysbus_connect_irq(SYS_BUS_DEVICE(hpet), i, gsi[i]);
B
Blue Swirl 已提交
1486
            }
1487 1488 1489
            pit_isa_irq = -1;
            pit_alt_irq = qdev_get_gpio_in(hpet, HPET_LEGACY_PIT_INT);
            rtc_irq = qdev_get_gpio_in(hpet, HPET_LEGACY_RTC_INT);
J
Jan Kiszka 已提交
1490
        }
1491
    }
1492
    *rtc_state = rtc_init(isa_bus, 2000, rtc_irq);
1493 1494 1495

    qemu_register_boot_set(pc_boot_set, *rtc_state);

1496
    if (!xen_enabled()) {
1497
        if (kvm_pit_in_kernel()) {
1498 1499 1500 1501 1502 1503
            pit = kvm_pit_init(isa_bus, 0x40);
        } else {
            pit = pit_init(isa_bus, 0x40, pit_isa_irq, pit_alt_irq);
        }
        if (hpet) {
            /* connect PIT to output control line of the HPET */
A
Andreas Färber 已提交
1504
            qdev_connect_gpio_out(hpet, 0, qdev_get_gpio_in(DEVICE(pit), 0));
1505 1506
        }
        pcspk_init(isa_bus, pit);
1507
    }
1508

1509
    serial_hds_isa_init(isa_bus, MAX_SERIAL_PORTS);
1510
    parallel_hds_isa_init(isa_bus, MAX_PARALLEL_PORTS);
1511

1512
    a20_line = qemu_allocate_irqs(handle_a20_line_change, first_cpu, 2);
1513
    i8042 = isa_create_simple(isa_bus, "i8042");
1514
    i8042_setup_a20_line(i8042, &a20_line[0]);
1515
    if (!no_vmport) {
1516 1517
        vmport_init(isa_bus);
        vmmouse = isa_try_create(isa_bus, "vmmouse");
1518 1519 1520
    } else {
        vmmouse = NULL;
    }
B
Blue Swirl 已提交
1521
    if (vmmouse) {
A
Andreas Färber 已提交
1522 1523 1524
        DeviceState *dev = DEVICE(vmmouse);
        qdev_prop_set_ptr(dev, "ps2_mouse", i8042);
        qdev_init_nofail(dev);
B
Blue Swirl 已提交
1525
    }
1526
    port92 = isa_create_simple(isa_bus, "port92");
1527
    port92_init(port92, &a20_line[1]);
B
Blue Swirl 已提交
1528

1529
    DMA_init(isa_bus, 0);
1530 1531 1532

    for(i = 0; i < MAX_FD; i++) {
        fd[i] = drive_get(IF_FLOPPY, 0, i);
1533
        create_fdctrl |= !!fd[i];
1534
    }
1535 1536 1537
    if (create_fdctrl) {
        fdctrl_init_isa(isa_bus, fd);
    }
1538 1539
}

1540 1541 1542 1543 1544 1545 1546 1547 1548 1549
void pc_nic_init(ISABus *isa_bus, PCIBus *pci_bus)
{
    int i;

    for (i = 0; i < nb_nics; i++) {
        NICInfo *nd = &nd_table[i];

        if (!pci_bus || (nd->model && strcmp(nd->model, "ne2k_isa") == 0)) {
            pc_init_ne2k_isa(isa_bus, nd);
        } else {
1550
            pci_nic_init_nofail(nd, pci_bus, "e1000", NULL);
1551 1552 1553 1554
        }
    }
}

1555
void pc_pci_device_init(PCIBus *pci_bus)
1556 1557 1558 1559 1560 1561 1562 1563 1564
{
    int max_bus;
    int bus;

    max_bus = drive_get_max_bus(IF_SCSI);
    for (bus = 0; bus <= max_bus; bus++) {
        pci_create_simple(pci_bus, -1, "lsi53c895a");
    }
}
1565 1566 1567 1568 1569 1570 1571

void ioapic_init_gsi(GSIState *gsi_state, const char *parent_name)
{
    DeviceState *dev;
    SysBusDevice *d;
    unsigned int i;

1572
    if (kvm_ioapic_in_kernel()) {
1573 1574 1575 1576 1577 1578 1579 1580 1581
        dev = qdev_create(NULL, "kvm-ioapic");
    } else {
        dev = qdev_create(NULL, "ioapic");
    }
    if (parent_name) {
        object_property_add_child(object_resolve_path(parent_name, NULL),
                                  "ioapic", OBJECT(dev), NULL);
    }
    qdev_init_nofail(dev);
1582
    d = SYS_BUS_DEVICE(dev);
1583
    sysbus_mmio_map(d, 0, IO_APIC_DEFAULT_ADDRESS);
1584 1585 1586 1587 1588

    for (i = 0; i < IOAPIC_NUM_PINS; i++) {
        gsi_state->ioapic_irq[i] = qdev_get_gpio_in(dev, i);
    }
}
1589

1590 1591 1592
static void pc_dimm_plug(HotplugHandler *hotplug_dev,
                         DeviceState *dev, Error **errp)
{
1593
    HotplugHandlerClass *hhc;
1594 1595
    Error *local_err = NULL;
    PCMachineState *pcms = PC_MACHINE(hotplug_dev);
1596
    PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
1597 1598 1599
    PCDIMMDevice *dimm = PC_DIMM(dev);
    PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
    MemoryRegion *mr = ddc->get_memory_region(dimm);
1600
    uint64_t align = TARGET_PAGE_SIZE;
1601

1602
    if (memory_region_get_alignment(mr) && pcmc->enforce_aligned_dimm) {
1603 1604 1605
        align = memory_region_get_alignment(mr);
    }

1606 1607 1608 1609 1610 1611
    if (!pcms->acpi_dev) {
        error_setg(&local_err,
                   "memory hotplug is not enabled: missing acpi device");
        goto out;
    }

1612
    pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr, align, &local_err);
1613
    if (local_err) {
1614 1615 1616
        goto out;
    }

1617
    hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev);
1618
    hhc->plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &error_abort);
1619 1620 1621 1622
out:
    error_propagate(errp, local_err);
}

1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642
static void pc_dimm_unplug_request(HotplugHandler *hotplug_dev,
                                   DeviceState *dev, Error **errp)
{
    HotplugHandlerClass *hhc;
    Error *local_err = NULL;
    PCMachineState *pcms = PC_MACHINE(hotplug_dev);

    if (!pcms->acpi_dev) {
        error_setg(&local_err,
                   "memory hotplug is not enabled: missing acpi device");
        goto out;
    }

    hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev);
    hhc->unplug_request(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err);

out:
    error_propagate(errp, local_err);
}

1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659
static void pc_dimm_unplug(HotplugHandler *hotplug_dev,
                           DeviceState *dev, Error **errp)
{
    PCMachineState *pcms = PC_MACHINE(hotplug_dev);
    PCDIMMDevice *dimm = PC_DIMM(dev);
    PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
    MemoryRegion *mr = ddc->get_memory_region(dimm);
    HotplugHandlerClass *hhc;
    Error *local_err = NULL;

    hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev);
    hhc->unplug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err);

    if (local_err) {
        goto out;
    }

1660
    pc_dimm_memory_unplug(dev, &pcms->hotplug_memory, mr);
1661 1662 1663 1664 1665 1666
    object_unparent(OBJECT(dev));

 out:
    error_propagate(errp, local_err);
}

1667 1668 1669 1670 1671 1672 1673 1674
static int pc_apic_cmp(const void *a, const void *b)
{
   CPUArchId *apic_a = (CPUArchId *)a;
   CPUArchId *apic_b = (CPUArchId *)b;

   return apic_a->arch_id - apic_b->arch_id;
}

1675 1676 1677
static void pc_cpu_plug(HotplugHandler *hotplug_dev,
                        DeviceState *dev, Error **errp)
{
1678 1679
    CPUClass *cc = CPU_GET_CLASS(dev);
    CPUArchId apic_id, *found_cpu;
1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695
    HotplugHandlerClass *hhc;
    Error *local_err = NULL;
    PCMachineState *pcms = PC_MACHINE(hotplug_dev);

    if (!dev->hotplugged) {
        goto out;
    }

    if (!pcms->acpi_dev) {
        error_setg(&local_err,
                   "cpu hotplug is not enabled: missing acpi device");
        goto out;
    }

    hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev);
    hhc->plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err);
G
Gu Zheng 已提交
1696 1697 1698 1699 1700 1701
    if (local_err) {
        goto out;
    }

    /* increment the number of CPUs */
    rtc_set_memory(pcms->rtc, 0x5f, rtc_get_memory(pcms->rtc, 0x5f) + 1);
1702 1703 1704 1705 1706 1707 1708

    apic_id.arch_id = cc->get_arch_id(CPU(dev));
    found_cpu = bsearch(&apic_id, pcms->possible_cpus->cpus,
        pcms->possible_cpus->len, sizeof(*pcms->possible_cpus->cpus),
        pc_apic_cmp);
    assert(found_cpu);
    found_cpu->cpu = CPU(dev);
1709 1710 1711 1712
out:
    error_propagate(errp, local_err);
}

1713 1714 1715 1716 1717
static void pc_machine_device_plug_cb(HotplugHandler *hotplug_dev,
                                      DeviceState *dev, Error **errp)
{
    if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
        pc_dimm_plug(hotplug_dev, dev, errp);
1718 1719
    } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
        pc_cpu_plug(hotplug_dev, dev, errp);
1720 1721 1722
    }
}

1723 1724 1725
static void pc_machine_device_unplug_request_cb(HotplugHandler *hotplug_dev,
                                                DeviceState *dev, Error **errp)
{
1726 1727 1728 1729 1730 1731
    if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
        pc_dimm_unplug_request(hotplug_dev, dev, errp);
    } else {
        error_setg(errp, "acpi: device unplug request for not supported device"
                   " type: %s", object_get_typename(OBJECT(dev)));
    }
1732 1733
}

1734 1735 1736
static void pc_machine_device_unplug_cb(HotplugHandler *hotplug_dev,
                                        DeviceState *dev, Error **errp)
{
1737 1738 1739 1740 1741 1742
    if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
        pc_dimm_unplug(hotplug_dev, dev, errp);
    } else {
        error_setg(errp, "acpi: device unplug for not supported device"
                   " type: %s", object_get_typename(OBJECT(dev)));
    }
1743 1744
}

1745 1746 1747 1748 1749
static HotplugHandler *pc_get_hotpug_handler(MachineState *machine,
                                             DeviceState *dev)
{
    PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(machine);

1750 1751
    if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) ||
        object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
1752 1753 1754 1755 1756 1757 1758
        return HOTPLUG_HANDLER(machine);
    }

    return pcmc->get_hotplug_handler ?
        pcmc->get_hotplug_handler(machine, dev) : NULL;
}

1759
static void
1760 1761 1762
pc_machine_get_hotplug_memory_region_size(Object *obj, Visitor *v,
                                          const char *name, void *opaque,
                                          Error **errp)
1763 1764
{
    PCMachineState *pcms = PC_MACHINE(obj);
1765
    int64_t value = memory_region_size(&pcms->hotplug_memory.mr);
1766

1767
    visit_type_int(v, name, &value, errp);
1768 1769
}

1770
static void pc_machine_get_max_ram_below_4g(Object *obj, Visitor *v,
1771 1772
                                            const char *name, void *opaque,
                                            Error **errp)
1773 1774 1775 1776
{
    PCMachineState *pcms = PC_MACHINE(obj);
    uint64_t value = pcms->max_ram_below_4g;

1777
    visit_type_size(v, name, &value, errp);
1778 1779 1780
}

static void pc_machine_set_max_ram_below_4g(Object *obj, Visitor *v,
1781 1782
                                            const char *name, void *opaque,
                                            Error **errp)
1783 1784 1785 1786 1787
{
    PCMachineState *pcms = PC_MACHINE(obj);
    Error *error = NULL;
    uint64_t value;

1788
    visit_type_size(v, name, &value, &error);
1789 1790 1791 1792 1793
    if (error) {
        error_propagate(errp, error);
        return;
    }
    if (value > (1ULL << 32)) {
E
Eric Blake 已提交
1794 1795 1796
        error_setg(&error,
                   "Machine option 'max-ram-below-4g=%"PRIu64
                   "' expects size less than or equal to 4G", value);
1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809
        error_propagate(errp, error);
        return;
    }

    if (value < (1ULL << 20)) {
        error_report("Warning: small max_ram_below_4g(%"PRIu64
                     ") less than 1M.  BIOS may not work..",
                     value);
    }

    pcms->max_ram_below_4g = value;
}

1810 1811
static void pc_machine_get_vmport(Object *obj, Visitor *v, const char *name,
                                  void *opaque, Error **errp)
1812 1813
{
    PCMachineState *pcms = PC_MACHINE(obj);
1814
    OnOffAuto vmport = pcms->vmport;
1815

1816
    visit_type_OnOffAuto(v, name, &vmport, errp);
1817 1818
}

1819 1820
static void pc_machine_set_vmport(Object *obj, Visitor *v, const char *name,
                                  void *opaque, Error **errp)
1821 1822 1823
{
    PCMachineState *pcms = PC_MACHINE(obj);

1824
    visit_type_OnOffAuto(v, name, &pcms->vmport, errp);
1825 1826
}

P
Paolo Bonzini 已提交
1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851
bool pc_machine_is_smm_enabled(PCMachineState *pcms)
{
    bool smm_available = false;

    if (pcms->smm == ON_OFF_AUTO_OFF) {
        return false;
    }

    if (tcg_enabled() || qtest_enabled()) {
        smm_available = true;
    } else if (kvm_enabled()) {
        smm_available = kvm_has_smm();
    }

    if (smm_available) {
        return true;
    }

    if (pcms->smm == ON_OFF_AUTO_ON) {
        error_report("System Management Mode not supported by this hypervisor.");
        exit(1);
    }
    return false;
}

1852 1853
static void pc_machine_get_smm(Object *obj, Visitor *v, const char *name,
                               void *opaque, Error **errp)
P
Paolo Bonzini 已提交
1854 1855 1856 1857
{
    PCMachineState *pcms = PC_MACHINE(obj);
    OnOffAuto smm = pcms->smm;

1858
    visit_type_OnOffAuto(v, name, &smm, errp);
P
Paolo Bonzini 已提交
1859 1860
}

1861 1862
static void pc_machine_set_smm(Object *obj, Visitor *v, const char *name,
                               void *opaque, Error **errp)
P
Paolo Bonzini 已提交
1863 1864 1865
{
    PCMachineState *pcms = PC_MACHINE(obj);

1866
    visit_type_OnOffAuto(v, name, &pcms->smm, errp);
P
Paolo Bonzini 已提交
1867 1868
}

1869 1870 1871 1872
static bool pc_machine_get_nvdimm(Object *obj, Error **errp)
{
    PCMachineState *pcms = PC_MACHINE(obj);

1873
    return pcms->acpi_nvdimm_state.is_enabled;
1874 1875 1876 1877 1878 1879
}

static void pc_machine_set_nvdimm(Object *obj, bool value, Error **errp)
{
    PCMachineState *pcms = PC_MACHINE(obj);

1880
    pcms->acpi_nvdimm_state.is_enabled = value;
1881 1882
}

1883 1884
static void pc_machine_initfn(Object *obj)
{
1885 1886
    PCMachineState *pcms = PC_MACHINE(obj);

1887 1888
    object_property_add(obj, PC_MACHINE_MEMHP_REGION_SIZE, "int",
                        pc_machine_get_hotplug_memory_region_size,
1889
                        NULL, NULL, NULL, &error_abort);
1890

1891 1892 1893 1894
    pcms->max_ram_below_4g = 1ULL << 32; /* 4G */
    object_property_add(obj, PC_MACHINE_MAX_RAM_BELOW_4G, "size",
                        pc_machine_get_max_ram_below_4g,
                        pc_machine_set_max_ram_below_4g,
1895
                        NULL, NULL, &error_abort);
1896 1897
    object_property_set_description(obj, PC_MACHINE_MAX_RAM_BELOW_4G,
                                    "Maximum ram below the 4G boundary (32bit boundary)",
1898
                                    &error_abort);
1899

P
Paolo Bonzini 已提交
1900 1901 1902 1903
    pcms->smm = ON_OFF_AUTO_AUTO;
    object_property_add(obj, PC_MACHINE_SMM, "OnOffAuto",
                        pc_machine_get_smm,
                        pc_machine_set_smm,
1904
                        NULL, NULL, &error_abort);
P
Paolo Bonzini 已提交
1905 1906
    object_property_set_description(obj, PC_MACHINE_SMM,
                                    "Enable SMM (pc & q35)",
1907
                                    &error_abort);
P
Paolo Bonzini 已提交
1908

1909 1910 1911 1912
    pcms->vmport = ON_OFF_AUTO_AUTO;
    object_property_add(obj, PC_MACHINE_VMPORT, "OnOffAuto",
                        pc_machine_get_vmport,
                        pc_machine_set_vmport,
1913
                        NULL, NULL, &error_abort);
1914 1915
    object_property_set_description(obj, PC_MACHINE_VMPORT,
                                    "Enable vmport (pc & q35)",
1916
                                    &error_abort);
1917 1918

    /* nvdimm is disabled on default. */
1919
    pcms->acpi_nvdimm_state.is_enabled = false;
1920 1921
    object_property_add_bool(obj, PC_MACHINE_NVDIMM, pc_machine_get_nvdimm,
                             pc_machine_set_nvdimm, &error_abort);
1922 1923
}

1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942
static void pc_machine_reset(void)
{
    CPUState *cs;
    X86CPU *cpu;

    qemu_devices_reset();

    /* Reset APIC after devices have been reset to cancel
     * any changes that qemu_devices_reset() might have done.
     */
    CPU_FOREACH(cs) {
        cpu = X86_CPU(cs);

        if (cpu->apic_state) {
            device_reset(cpu->apic_state);
        }
    }
}

1943 1944
static unsigned pc_cpu_index_to_socket_id(unsigned cpu_index)
{
1945
    X86CPUTopoInfo topo;
1946
    x86_topo_ids_from_idx(smp_cores, smp_threads, cpu_index,
1947 1948
                          &topo);
    return topo.pkg_id;
1949 1950
}

1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961
static CPUArchIdList *pc_possible_cpu_arch_ids(MachineState *machine)
{
    PCMachineState *pcms = PC_MACHINE(machine);
    int len = sizeof(CPUArchIdList) +
              sizeof(CPUArchId) * (pcms->possible_cpus->len);
    CPUArchIdList *list = g_malloc(len);

    memcpy(list, pcms->possible_cpus, len);
    return list;
}

1962 1963 1964 1965 1966 1967 1968
static void pc_machine_class_init(ObjectClass *oc, void *data)
{
    MachineClass *mc = MACHINE_CLASS(oc);
    PCMachineClass *pcmc = PC_MACHINE_CLASS(oc);
    HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);

    pcmc->get_hotplug_handler = mc->get_hotplug_handler;
1969 1970 1971 1972 1973 1974 1975 1976
    pcmc->pci_enabled = true;
    pcmc->has_acpi_build = true;
    pcmc->rsdp_in_ram = true;
    pcmc->smbios_defaults = true;
    pcmc->smbios_uuid_encoded = true;
    pcmc->gigabyte_align = true;
    pcmc->has_reserved_memory = true;
    pcmc->kvmclock_enabled = true;
1977
    pcmc->enforce_aligned_dimm = true;
1978 1979 1980
    /* BIOS ACPI tables: 128K. Other BIOS datastructures: less than 4K reported
     * to be used at the moment, 32K should be enough for a while.  */
    pcmc->acpi_data_size = 0x20000 + 0x8000;
1981
    pcmc->save_tsc_khz = true;
1982
    mc->get_hotplug_handler = pc_get_hotpug_handler;
1983
    mc->cpu_index_to_socket_id = pc_cpu_index_to_socket_id;
1984
    mc->possible_cpu_arch_ids = pc_possible_cpu_arch_ids;
1985
    mc->default_boot_order = "cad";
1986 1987
    mc->hot_add_cpu = pc_hot_add_cpu;
    mc->max_cpus = 255;
1988
    mc->reset = pc_machine_reset;
1989
    hc->plug = pc_machine_device_plug_cb;
1990
    hc->unplug_request = pc_machine_device_unplug_request_cb;
1991
    hc->unplug = pc_machine_device_unplug_cb;
1992 1993
}

1994 1995 1996 1997 1998
static const TypeInfo pc_machine_info = {
    .name = TYPE_PC_MACHINE,
    .parent = TYPE_MACHINE,
    .abstract = true,
    .instance_size = sizeof(PCMachineState),
1999
    .instance_init = pc_machine_initfn,
2000
    .class_size = sizeof(PCMachineClass),
2001 2002 2003 2004 2005
    .class_init = pc_machine_class_init,
    .interfaces = (InterfaceInfo[]) {
         { TYPE_HOTPLUG_HANDLER },
         { }
    },
2006 2007 2008 2009 2010 2011 2012 2013
};

static void pc_machine_register_types(void)
{
    type_register_static(&pc_machine_info);
}

type_init(pc_machine_register_types)