pc.c 51.3 KB
Newer Older
B
bellard 已提交
1 2
/*
 * QEMU PC System Emulator
3
 *
B
bellard 已提交
4
 * Copyright (c) 2003-2004 Fabrice Bellard
5
 *
B
bellard 已提交
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
24
#include "hw/hw.h"
P
Paolo Bonzini 已提交
25 26 27 28
#include "hw/i386/pc.h"
#include "hw/char/serial.h"
#include "hw/i386/apic.h"
#include "hw/block/fdc.h"
29 30
#include "hw/ide.h"
#include "hw/pci/pci.h"
31
#include "monitor/monitor.h"
P
Paolo Bonzini 已提交
32 33 34
#include "hw/nvram/fw_cfg.h"
#include "hw/timer/hpet.h"
#include "hw/i386/smbios.h"
35
#include "hw/loader.h"
B
Blue Swirl 已提交
36
#include "elf.h"
37
#include "multiboot.h"
P
Paolo Bonzini 已提交
38 39 40
#include "hw/timer/mc146818rtc.h"
#include "hw/timer/i8254.h"
#include "hw/audio/pcspk.h"
41 42
#include "hw/pci/msi.h"
#include "hw/sysbus.h"
43 44
#include "sysemu/sysemu.h"
#include "sysemu/kvm.h"
45
#include "kvm_i386.h"
P
Paolo Bonzini 已提交
46
#include "hw/xen/xen.h"
47
#include "sysemu/block-backend.h"
P
Paolo Bonzini 已提交
48
#include "hw/block/block.h"
G
Gerd Hoffmann 已提交
49
#include "ui/qemu-spice.h"
50 51
#include "exec/memory.h"
#include "exec/address-spaces.h"
52
#include "sysemu/arch_init.h"
53
#include "qemu/bitmap.h"
54
#include "qemu/config-file.h"
55
#include "hw/acpi/acpi.h"
56
#include "hw/acpi/cpu_hotplug.h"
57
#include "hw/cpu/icc_bus.h"
58
#include "hw/boards.h"
59
#include "hw/pci/pci_host.h"
60
#include "acpi-build.h"
61
#include "hw/mem/pc-dimm.h"
62
#include "trace.h"
63
#include "qapi/visitor.h"
B
bellard 已提交
64

B
Blue Swirl 已提交
65 66 67 68 69 70 71 72 73 74
/* debug PC/ISA interrupts */
//#define DEBUG_IRQ

#ifdef DEBUG_IRQ
#define DPRINTF(fmt, ...)                                       \
    do { printf("CPUIRQ: " fmt , ## __VA_ARGS__); } while (0)
#else
#define DPRINTF(fmt, ...)
#endif

75 76 77
/* Leave a chunk of memory at the top of RAM for the BIOS ACPI tables
 * (128K) and other BIOS datastructures (less than 4K reported to be used at
 * the moment, 32K should be enough for a while).  */
S
Stefan Weil 已提交
78
static unsigned acpi_data_size = 0x20000 + 0x8000;
79 80 81 82 83
void pc_set_legacy_acpi_data_size(void)
{
    acpi_data_size = 0x10000;
}

84
#define BIOS_CFG_IOPORT 0x510
85
#define FW_CFG_ACPI_TABLES (FW_CFG_ARCH_LOCAL + 0)
86
#define FW_CFG_SMBIOS_ENTRIES (FW_CFG_ARCH_LOCAL + 1)
J
Jes Sorensen 已提交
87
#define FW_CFG_IRQ0_OVERRIDE (FW_CFG_ARCH_LOCAL + 2)
J
Jes Sorensen 已提交
88
#define FW_CFG_E820_TABLE (FW_CFG_ARCH_LOCAL + 3)
89
#define FW_CFG_HPET (FW_CFG_ARCH_LOCAL + 4)
B
bellard 已提交
90

J
Jes Sorensen 已提交
91 92 93 94 95 96
#define E820_NR_ENTRIES		16

struct e820_entry {
    uint64_t address;
    uint64_t length;
    uint32_t type;
97
} QEMU_PACKED __attribute((__aligned__(4)));
J
Jes Sorensen 已提交
98 99 100 101

struct e820_table {
    uint32_t count;
    struct e820_entry entry[E820_NR_ENTRIES];
102
} QEMU_PACKED __attribute((__aligned__(4)));
J
Jes Sorensen 已提交
103

G
Gerd Hoffmann 已提交
104 105 106
static struct e820_table e820_reserve;
static struct e820_entry *e820_table;
static unsigned e820_entries;
B
Blue Swirl 已提交
107
struct hpet_fw_config hpet_cfg = {.count = UINT8_MAX};
J
Jes Sorensen 已提交
108

J
Jan Kiszka 已提交
109
void gsi_handler(void *opaque, int n, int level)
110
{
J
Jan Kiszka 已提交
111
    GSIState *s = opaque;
112

J
Jan Kiszka 已提交
113 114 115
    DPRINTF("pc: %s GSI %d\n", level ? "raising" : "lowering", n);
    if (n < ISA_NUM_IRQS) {
        qemu_set_irq(s->i8259_irq[n], level);
A
Avi Kivity 已提交
116
    }
J
Jan Kiszka 已提交
117
    qemu_set_irq(s->ioapic_irq[n], level);
118
}
119

J
Julien Grall 已提交
120 121
static void ioport80_write(void *opaque, hwaddr addr, uint64_t data,
                           unsigned size)
B
bellard 已提交
122 123 124
{
}

125 126
static uint64_t ioport80_read(void *opaque, hwaddr addr, unsigned size)
{
127
    return 0xffffffffffffffffULL;
128 129
}

130
/* MSDOS compatibility mode FPU exception support */
P
pbrook 已提交
131
static qemu_irq ferr_irq;
132 133 134 135 136 137

void pc_register_ferr_irq(qemu_irq irq)
{
    ferr_irq = irq;
}

138 139 140
/* XXX: add IGNNE support */
void cpu_set_ferr(CPUX86State *s)
{
P
pbrook 已提交
141
    qemu_irq_raise(ferr_irq);
142 143
}

J
Julien Grall 已提交
144 145
static void ioportF0_write(void *opaque, hwaddr addr, uint64_t data,
                           unsigned size)
146
{
P
pbrook 已提交
147
    qemu_irq_lower(ferr_irq);
148 149
}

150 151
static uint64_t ioportF0_read(void *opaque, hwaddr addr, unsigned size)
{
152
    return 0xffffffffffffffffULL;
153 154
}

B
bellard 已提交
155 156 157
/* TSC handling */
uint64_t cpu_get_tsc(CPUX86State *env)
{
158
    return cpu_get_ticks();
B
bellard 已提交
159 160
}

B
bellard 已提交
161
/* SMM support */
162 163 164 165 166 167 168 169 170 171 172 173

static cpu_set_smm_t smm_set;
static void *smm_arg;

void cpu_smm_register(cpu_set_smm_t callback, void *arg)
{
    assert(smm_set == NULL);
    assert(smm_arg == NULL);
    smm_set = callback;
    smm_arg = arg;
}

A
Andreas Färber 已提交
174
void cpu_smm_update(CPUX86State *env)
B
bellard 已提交
175
{
176
    if (smm_set && smm_arg && CPU(x86_env_get_cpu(env)) == first_cpu) {
177
        smm_set(!!(env->hflags & HF_SMM_MASK), smm_arg);
178
    }
B
bellard 已提交
179 180 181
}


B
bellard 已提交
182
/* IRQ handling */
A
Andreas Färber 已提交
183
int cpu_get_pic_interrupt(CPUX86State *env)
B
bellard 已提交
184
{
185
    X86CPU *cpu = x86_env_get_cpu(env);
B
bellard 已提交
186 187
    int intno;

188
    intno = apic_get_interrupt(cpu->apic_state);
B
bellard 已提交
189 190 191 192
    if (intno >= 0) {
        return intno;
    }
    /* read the irq from the PIC */
193
    if (!apic_accept_pic_intr(cpu->apic_state)) {
194
        return -1;
195
    }
196

B
bellard 已提交
197 198 199 200
    intno = pic_read_irq(isa_pic);
    return intno;
}

P
pbrook 已提交
201
static void pic_irq_request(void *opaque, int irq, int level)
B
bellard 已提交
202
{
203 204
    CPUState *cs = first_cpu;
    X86CPU *cpu = X86_CPU(cs);
205

B
Blue Swirl 已提交
206
    DPRINTF("pic_irqs: %s irq %d\n", level? "raise" : "lower", irq);
207
    if (cpu->apic_state) {
A
Andreas Färber 已提交
208
        CPU_FOREACH(cs) {
209
            cpu = X86_CPU(cs);
210 211
            if (apic_accept_pic_intr(cpu->apic_state)) {
                apic_deliver_pic_intr(cpu->apic_state, level);
212
            }
A
aurel32 已提交
213 214
        }
    } else {
215
        if (level) {
216
            cpu_interrupt(cs, CPU_INTERRUPT_HARD);
217 218 219
        } else {
            cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD);
        }
220
    }
B
bellard 已提交
221 222
}

B
bellard 已提交
223 224
/* PC cmos mappings */

B
bellard 已提交
225 226
#define REG_EQUIPMENT_BYTE          0x14

227
static int cmos_get_fd_drive_type(FDriveType fd0)
228 229 230 231
{
    int val;

    switch (fd0) {
232
    case FDRIVE_DRV_144:
233 234 235
        /* 1.44 Mb 3"5 drive */
        val = 4;
        break;
236
    case FDRIVE_DRV_288:
237 238 239
        /* 2.88 Mb 3"5 drive */
        val = 5;
        break;
240
    case FDRIVE_DRV_120:
241 242 243
        /* 1.2 Mb 5"5 drive */
        val = 2;
        break;
244
    case FDRIVE_DRV_NONE:
245 246 247 248 249 250 251
    default:
        val = 0;
        break;
    }
    return val;
}

252 253
static void cmos_init_hd(ISADevice *s, int type_ofs, int info_ofs,
                         int16_t cylinders, int8_t heads, int8_t sectors)
B
bellard 已提交
254 255 256 257 258 259 260 261 262 263 264 265 266
{
    rtc_set_memory(s, type_ofs, 47);
    rtc_set_memory(s, info_ofs, cylinders);
    rtc_set_memory(s, info_ofs + 1, cylinders >> 8);
    rtc_set_memory(s, info_ofs + 2, heads);
    rtc_set_memory(s, info_ofs + 3, 0xff);
    rtc_set_memory(s, info_ofs + 4, 0xff);
    rtc_set_memory(s, info_ofs + 5, 0xc0 | ((heads > 8) << 3));
    rtc_set_memory(s, info_ofs + 6, cylinders);
    rtc_set_memory(s, info_ofs + 7, cylinders >> 8);
    rtc_set_memory(s, info_ofs + 8, sectors);
}

267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283
/* convert boot_device letter to something recognizable by the bios */
static int boot_device2nibble(char boot_device)
{
    switch(boot_device) {
    case 'a':
    case 'b':
        return 0x01; /* floppy boot */
    case 'c':
        return 0x02; /* hard drive boot */
    case 'd':
        return 0x03; /* CD-ROM boot */
    case 'n':
        return 0x04; /* Network boot */
    }
    return 0;
}

284
static int set_boot_dev(ISADevice *s, const char *boot_device)
285 286 287 288 289 290 291
{
#define PC_MAX_BOOT_DEVICES 3
    int nbds, bds[3] = { 0, };
    int i;

    nbds = strlen(boot_device);
    if (nbds > PC_MAX_BOOT_DEVICES) {
292
        error_report("Too many boot devices for PC");
293 294 295 296 297
        return(1);
    }
    for (i = 0; i < nbds; i++) {
        bds[i] = boot_device2nibble(boot_device[i]);
        if (bds[i] == 0) {
298 299
            error_report("Invalid boot device for PC: '%c'",
                         boot_device[i]);
300 301 302 303
            return(1);
        }
    }
    rtc_set_memory(s, 0x3d, (bds[1] << 4) | bds[0]);
304
    rtc_set_memory(s, 0x38, (bds[2] << 4) | (fd_bootchk ? 0x0 : 0x1));
305 306 307
    return(0);
}

308 309
static int pc_boot_set(void *opaque, const char *boot_device)
{
310
    return set_boot_dev(opaque, boot_device);
311 312
}

313 314
typedef struct pc_cmos_init_late_arg {
    ISADevice *rtc_state;
315
    BusState *idebus[2];
316 317 318 319 320 321
} pc_cmos_init_late_arg;

static void pc_cmos_init_late(void *opaque)
{
    pc_cmos_init_late_arg *arg = opaque;
    ISADevice *s = arg->rtc_state;
322 323
    int16_t cylinders;
    int8_t heads, sectors;
324
    int val;
325
    int i, trans;
326

327 328 329 330 331 332 333 334 335 336 337 338
    val = 0;
    if (ide_get_geometry(arg->idebus[0], 0,
                         &cylinders, &heads, &sectors) >= 0) {
        cmos_init_hd(s, 0x19, 0x1b, cylinders, heads, sectors);
        val |= 0xf0;
    }
    if (ide_get_geometry(arg->idebus[0], 1,
                         &cylinders, &heads, &sectors) >= 0) {
        cmos_init_hd(s, 0x1a, 0x24, cylinders, heads, sectors);
        val |= 0x0f;
    }
    rtc_set_memory(s, 0x12, val);
339 340 341

    val = 0;
    for (i = 0; i < 4; i++) {
342 343 344 345 346 347
        /* NOTE: ide_get_geometry() returns the physical
           geometry.  It is always such that: 1 <= sects <= 63, 1
           <= heads <= 16, 1 <= cylinders <= 16383. The BIOS
           geometry can be different if a translation is done. */
        if (ide_get_geometry(arg->idebus[i / 2], i % 2,
                             &cylinders, &heads, &sectors) >= 0) {
348 349 350
            trans = ide_get_bios_chs_trans(arg->idebus[i / 2], i % 2) - 1;
            assert((trans & ~3) == 0);
            val |= trans << (i * 2);
351 352 353 354 355 356 357
        }
    }
    rtc_set_memory(s, 0x39, val);

    qemu_unregister_reset(pc_cmos_init_late, opaque);
}

358 359 360 361 362 363 364 365 366 367 368 369 370 371 372
typedef struct RTCCPUHotplugArg {
    Notifier cpu_added_notifier;
    ISADevice *rtc_state;
} RTCCPUHotplugArg;

static void rtc_notify_cpu_added(Notifier *notifier, void *data)
{
    RTCCPUHotplugArg *arg = container_of(notifier, RTCCPUHotplugArg,
                                         cpu_added_notifier);
    ISADevice *s = arg->rtc_state;

    /* increment the number of CPUs */
    rtc_set_memory(s, 0x5f, rtc_get_memory(s, 0x5f) + 1);
}

373
void pc_cmos_init(ram_addr_t ram_size, ram_addr_t above_4g_mem_size,
374
                  const char *boot_device,
K
Kevin Wolf 已提交
375
                  ISADevice *floppy, BusState *idebus0, BusState *idebus1,
B
Blue Swirl 已提交
376
                  ISADevice *s)
B
bellard 已提交
377
{
378
    int val, nb, i;
379
    FDriveType fd_type[2] = { FDRIVE_DRV_NONE, FDRIVE_DRV_NONE };
380
    static pc_cmos_init_late_arg arg;
381
    static RTCCPUHotplugArg cpu_hotplug_cb;
B
bellard 已提交
382 383

    /* various important CMOS locations needed by PC/Bochs bios */
B
bellard 已提交
384 385

    /* memory size */
386 387
    /* base memory (first MiB) */
    val = MIN(ram_size / 1024, 640);
B
bellard 已提交
388 389
    rtc_set_memory(s, 0x15, val);
    rtc_set_memory(s, 0x16, val >> 8);
390 391 392 393 394 395
    /* extended memory (next 64MiB) */
    if (ram_size > 1024 * 1024) {
        val = (ram_size - 1024 * 1024) / 1024;
    } else {
        val = 0;
    }
B
bellard 已提交
396 397
    if (val > 65535)
        val = 65535;
B
bellard 已提交
398 399 400 401
    rtc_set_memory(s, 0x17, val);
    rtc_set_memory(s, 0x18, val >> 8);
    rtc_set_memory(s, 0x30, val);
    rtc_set_memory(s, 0x31, val >> 8);
402 403 404 405
    /* memory between 16MiB and 4GiB */
    if (ram_size > 16 * 1024 * 1024) {
        val = (ram_size - 16 * 1024 * 1024) / 65536;
    } else {
B
bellard 已提交
406
        val = 0;
407
    }
B
bellard 已提交
408 409
    if (val > 65535)
        val = 65535;
B
bellard 已提交
410 411
    rtc_set_memory(s, 0x34, val);
    rtc_set_memory(s, 0x35, val >> 8);
412 413 414 415 416
    /* memory above 4GiB */
    val = above_4g_mem_size / 65536;
    rtc_set_memory(s, 0x5b, val);
    rtc_set_memory(s, 0x5c, val >> 8);
    rtc_set_memory(s, 0x5d, val >> 16);
417

A
aurel32 已提交
418 419
    /* set the number of CPU */
    rtc_set_memory(s, 0x5f, smp_cpus - 1);
420 421 422 423
    /* init CPU hotplug notifier */
    cpu_hotplug_cb.rtc_state = s;
    cpu_hotplug_cb.cpu_added_notifier.notify = rtc_notify_cpu_added;
    qemu_register_cpu_added_notifier(&cpu_hotplug_cb.cpu_added_notifier);
A
aurel32 已提交
424

425
    if (set_boot_dev(s, boot_device)) {
426 427
        exit(1);
    }
B
bellard 已提交
428

B
bellard 已提交
429
    /* floppy type */
K
Kevin Wolf 已提交
430 431
    if (floppy) {
        for (i = 0; i < 2; i++) {
432
            fd_type[i] = isa_fdc_get_drive_type(floppy, i);
B
Blue Swirl 已提交
433 434 435 436
        }
    }
    val = (cmos_get_fd_drive_type(fd_type[0]) << 4) |
        cmos_get_fd_drive_type(fd_type[1]);
B
bellard 已提交
437
    rtc_set_memory(s, 0x10, val);
438

B
bellard 已提交
439
    val = 0;
B
bellard 已提交
440
    nb = 0;
B
Blue Swirl 已提交
441
    if (fd_type[0] < FDRIVE_DRV_NONE) {
B
bellard 已提交
442
        nb++;
443
    }
B
Blue Swirl 已提交
444
    if (fd_type[1] < FDRIVE_DRV_NONE) {
B
bellard 已提交
445
        nb++;
446
    }
B
bellard 已提交
447 448 449 450
    switch (nb) {
    case 0:
        break;
    case 1:
B
bellard 已提交
451
        val |= 0x01; /* 1 drive, ready for boot */
B
bellard 已提交
452 453
        break;
    case 2:
B
bellard 已提交
454
        val |= 0x41; /* 2 drives, ready for boot */
B
bellard 已提交
455 456
        break;
    }
B
bellard 已提交
457 458 459 460
    val |= 0x02; /* FPU is there */
    val |= 0x04; /* PS/2 mouse installed */
    rtc_set_memory(s, REG_EQUIPMENT_BYTE, val);

B
bellard 已提交
461
    /* hard drives */
462
    arg.rtc_state = s;
463 464
    arg.idebus[0] = idebus0;
    arg.idebus[1] = idebus1;
465
    qemu_register_reset(pc_cmos_init_late, &arg);
B
bellard 已提交
466 467
}

A
Andreas Färber 已提交
468 469 470
#define TYPE_PORT92 "port92"
#define PORT92(obj) OBJECT_CHECK(Port92State, (obj), TYPE_PORT92)

471 472
/* port 92 stuff: could be split off */
typedef struct Port92State {
A
Andreas Färber 已提交
473 474
    ISADevice parent_obj;

475
    MemoryRegion io;
476 477 478 479
    uint8_t outport;
    qemu_irq *a20_out;
} Port92State;

480 481
static void port92_write(void *opaque, hwaddr addr, uint64_t val,
                         unsigned size)
482 483
{
    Port92State *s = opaque;
484
    int oldval = s->outport;
485

G
Gonglei 已提交
486
    DPRINTF("port92: write 0x%02" PRIx64 "\n", val);
487 488
    s->outport = val;
    qemu_set_irq(*s->a20_out, (val >> 1) & 1);
489
    if ((val & 1) && !(oldval & 1)) {
490 491 492 493
        qemu_system_reset_request();
    }
}

494 495
static uint64_t port92_read(void *opaque, hwaddr addr,
                            unsigned size)
496 497 498 499 500 501 502 503 504 505 506
{
    Port92State *s = opaque;
    uint32_t ret;

    ret = s->outport;
    DPRINTF("port92: read 0x%02x\n", ret);
    return ret;
}

static void port92_init(ISADevice *dev, qemu_irq *a20_out)
{
A
Andreas Färber 已提交
507
    Port92State *s = PORT92(dev);
508 509 510 511 512 513 514 515

    s->a20_out = a20_out;
}

static const VMStateDescription vmstate_port92_isa = {
    .name = "port92",
    .version_id = 1,
    .minimum_version_id = 1,
516
    .fields = (VMStateField[]) {
517 518 519 520 521 522 523
        VMSTATE_UINT8(outport, Port92State),
        VMSTATE_END_OF_LIST()
    }
};

static void port92_reset(DeviceState *d)
{
A
Andreas Färber 已提交
524
    Port92State *s = PORT92(d);
525 526 527 528

    s->outport &= ~1;
}

529
static const MemoryRegionOps port92_ops = {
530 531 532 533 534 535 536
    .read = port92_read,
    .write = port92_write,
    .impl = {
        .min_access_size = 1,
        .max_access_size = 1,
    },
    .endianness = DEVICE_LITTLE_ENDIAN,
537 538
};

539
static void port92_initfn(Object *obj)
540
{
541
    Port92State *s = PORT92(obj);
542

543
    memory_region_init_io(&s->io, OBJECT(s), &port92_ops, s, "port92", 1);
544

545
    s->outport = 0;
546 547 548 549 550 551 552 553
}

static void port92_realizefn(DeviceState *dev, Error **errp)
{
    ISADevice *isadev = ISA_DEVICE(dev);
    Port92State *s = PORT92(dev);

    isa_register_ioport(isadev, &s->io, 0x92);
554 555
}

556 557
static void port92_class_initfn(ObjectClass *klass, void *data)
{
558
    DeviceClass *dc = DEVICE_CLASS(klass);
559 560

    dc->realize = port92_realizefn;
561 562
    dc->reset = port92_reset;
    dc->vmsd = &vmstate_port92_isa;
563 564 565 566 567 568
    /*
     * Reason: unlike ordinary ISA devices, this one needs additional
     * wiring: its A20 output line needs to be wired up by
     * port92_init().
     */
    dc->cannot_instantiate_with_device_add_yet = true;
569 570
}

571
static const TypeInfo port92_info = {
A
Andreas Färber 已提交
572
    .name          = TYPE_PORT92,
573 574
    .parent        = TYPE_ISA_DEVICE,
    .instance_size = sizeof(Port92State),
575
    .instance_init = port92_initfn,
576
    .class_init    = port92_class_initfn,
577 578
};

A
Andreas Färber 已提交
579
static void port92_register_types(void)
580
{
581
    type_register_static(&port92_info);
582
}
A
Andreas Färber 已提交
583 584

type_init(port92_register_types)
585

B
Blue Swirl 已提交
586
static void handle_a20_line_change(void *opaque, int irq, int level)
587
{
588
    X86CPU *cpu = opaque;
B
bellard 已提交
589

B
Blue Swirl 已提交
590
    /* XXX: send to all CPUs ? */
591
    /* XXX: add logic to handle multiple A20 line sources */
592
    x86_cpu_set_a20(cpu, level);
B
bellard 已提交
593 594
}

J
Jes Sorensen 已提交
595 596
int e820_add_entry(uint64_t address, uint64_t length, uint32_t type)
{
G
Gerd Hoffmann 已提交
597
    int index = le32_to_cpu(e820_reserve.count);
J
Jes Sorensen 已提交
598 599
    struct e820_entry *entry;

G
Gerd Hoffmann 已提交
600 601 602 603 604 605 606 607 608 609 610 611 612
    if (type != E820_RAM) {
        /* old FW_CFG_E820_TABLE entry -- reservations only */
        if (index >= E820_NR_ENTRIES) {
            return -EBUSY;
        }
        entry = &e820_reserve.entry[index++];

        entry->address = cpu_to_le64(address);
        entry->length = cpu_to_le64(length);
        entry->type = cpu_to_le32(type);

        e820_reserve.count = cpu_to_le32(index);
    }
J
Jes Sorensen 已提交
613

G
Gerd Hoffmann 已提交
614 615 616 617 618 619 620
    /* new "etc/e820" file -- include ram too */
    e820_table = g_realloc(e820_table,
                           sizeof(struct e820_entry) * (e820_entries+1));
    e820_table[e820_entries].address = cpu_to_le64(address);
    e820_table[e820_entries].length = cpu_to_le64(length);
    e820_table[e820_entries].type = cpu_to_le32(type);
    e820_entries++;
J
Jes Sorensen 已提交
621

G
Gerd Hoffmann 已提交
622
    return e820_entries;
J
Jes Sorensen 已提交
623 624
}

625 626 627 628 629 630 631 632 633 634 635 636 637 638 639
int e820_get_num_entries(void)
{
    return e820_entries;
}

bool e820_get_entry(int idx, uint32_t type, uint64_t *address, uint64_t *length)
{
    if (idx < e820_entries && e820_table[idx].type == cpu_to_le32(type)) {
        *address = le64_to_cpu(e820_table[idx].address);
        *length = le64_to_cpu(e820_table[idx].length);
        return true;
    }
    return false;
}

640 641 642 643 644 645 646 647 648 649 650 651
/* Calculates the limit to CPU APIC ID values
 *
 * This function returns the limit for the APIC ID value, so that all
 * CPU APIC IDs are < pc_apic_id_limit().
 *
 * This is used for FW_CFG_MAX_CPUS. See comments on bochs_bios_init().
 */
static unsigned int pc_apic_id_limit(unsigned int max_cpus)
{
    return x86_cpu_apic_id_from_index(max_cpus - 1) + 1;
}

L
Laszlo Ersek 已提交
652
static FWCfgState *bochs_bios_init(void)
B
bellard 已提交
653
{
L
Laszlo Ersek 已提交
654
    FWCfgState *fw_cfg;
655 656
    uint8_t *smbios_tables, *smbios_anchor;
    size_t smbios_tables_len, smbios_anchor_len;
657 658
    uint64_t *numa_fw_cfg;
    int i, j;
659
    unsigned int apic_id_limit = pc_apic_id_limit(max_cpus);
660 661

    fw_cfg = fw_cfg_init(BIOS_CFG_IOPORT, BIOS_CFG_IOPORT + 1, 0, 0);
662 663 664 665 666 667 668 669 670 671 672 673 674 675 676
    /* FW_CFG_MAX_CPUS is a bit confusing/problematic on x86:
     *
     * SeaBIOS needs FW_CFG_MAX_CPUS for CPU hotplug, but the CPU hotplug
     * QEMU<->SeaBIOS interface is not based on the "CPU index", but on the APIC
     * ID of hotplugged CPUs[1]. This means that FW_CFG_MAX_CPUS is not the
     * "maximum number of CPUs", but the "limit to the APIC ID values SeaBIOS
     * may see".
     *
     * So, this means we must not use max_cpus, here, but the maximum possible
     * APIC ID value, plus one.
     *
     * [1] The only kind of "CPU identifier" used between SeaBIOS and QEMU is
     *     the APIC ID, not the "CPU index"
     */
    fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)apic_id_limit);
677
    fw_cfg_add_i32(fw_cfg, FW_CFG_ID, 1);
678
    fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size);
679 680
    fw_cfg_add_bytes(fw_cfg, FW_CFG_ACPI_TABLES,
                     acpi_tables, acpi_tables_len);
681
    fw_cfg_add_i32(fw_cfg, FW_CFG_IRQ0_OVERRIDE, kvm_allows_irq0_override());
682

683 684
    smbios_tables = smbios_get_table_legacy(&smbios_tables_len);
    if (smbios_tables) {
685
        fw_cfg_add_bytes(fw_cfg, FW_CFG_SMBIOS_ENTRIES,
686 687 688 689 690 691 692 693 694 695 696 697
                         smbios_tables, smbios_tables_len);
    }

    smbios_get_tables(&smbios_tables, &smbios_tables_len,
                      &smbios_anchor, &smbios_anchor_len);
    if (smbios_anchor) {
        fw_cfg_add_file(fw_cfg, "etc/smbios/smbios-tables",
                        smbios_tables, smbios_tables_len);
        fw_cfg_add_file(fw_cfg, "etc/smbios/smbios-anchor",
                        smbios_anchor, smbios_anchor_len);
    }

698
    fw_cfg_add_bytes(fw_cfg, FW_CFG_E820_TABLE,
G
Gerd Hoffmann 已提交
699 700 701
                     &e820_reserve, sizeof(e820_reserve));
    fw_cfg_add_file(fw_cfg, "etc/e820", e820_table,
                    sizeof(struct e820_entry) * e820_entries);
702

703
    fw_cfg_add_bytes(fw_cfg, FW_CFG_HPET, &hpet_cfg, sizeof(hpet_cfg));
704 705 706 707
    /* allocate memory for the NUMA channel: one (64bit) word for the number
     * of nodes, one word for each VCPU->node and one word for each node to
     * hold the amount of memory.
     */
708
    numa_fw_cfg = g_new0(uint64_t, 1 + apic_id_limit + nb_numa_nodes);
709
    numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes);
710
    for (i = 0; i < max_cpus; i++) {
711 712
        unsigned int apic_id = x86_cpu_apic_id_from_index(i);
        assert(apic_id < apic_id_limit);
713
        for (j = 0; j < nb_numa_nodes; j++) {
714
            if (test_bit(i, numa_info[j].node_cpu)) {
715
                numa_fw_cfg[apic_id + 1] = cpu_to_le64(j);
716 717 718 719 720
                break;
            }
        }
    }
    for (i = 0; i < nb_numa_nodes; i++) {
721
        numa_fw_cfg[apic_id_limit + 1 + i] = cpu_to_le64(numa_info[i].node_mem);
722
    }
723
    fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, numa_fw_cfg,
724 725
                     (1 + apic_id_limit + nb_numa_nodes) *
                     sizeof(*numa_fw_cfg));
A
Alexander Graf 已提交
726 727

    return fw_cfg;
B
bellard 已提交
728 729
}

T
ths 已提交
730 731 732 733 734 735 736 737 738 739 740 741 742 743
static long get_file_size(FILE *f)
{
    long where, size;

    /* XXX: on Unix systems, using fstat() probably makes more sense */

    where = ftell(f);
    fseek(f, 0, SEEK_END);
    size = ftell(f);
    fseek(f, where, SEEK_SET);

    return size;
}

L
Laszlo Ersek 已提交
744
static void load_linux(FWCfgState *fw_cfg,
745
                       const char *kernel_filename,
L
liguang 已提交
746 747
                       const char *initrd_filename,
                       const char *kernel_cmdline,
A
Avi Kivity 已提交
748
                       hwaddr max_ram_size)
T
ths 已提交
749 750
{
    uint16_t protocol;
P
Paul Brook 已提交
751
    int setup_size, kernel_size, initrd_size = 0, cmdline_size;
T
ths 已提交
752
    uint32_t initrd_max;
753
    uint8_t header[8192], *setup, *kernel, *initrd_data;
A
Avi Kivity 已提交
754
    hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0;
755
    FILE *f;
P
Pascal Terjan 已提交
756
    char *vmode;
T
ths 已提交
757 758 759 760 761 762 763

    /* Align to 16 bytes as a paranoia measure */
    cmdline_size = (strlen(kernel_cmdline)+16) & ~15;

    /* load the kernel header */
    f = fopen(kernel_filename, "rb");
    if (!f || !(kernel_size = get_file_size(f)) ||
L
liguang 已提交
764 765 766 767 768
        fread(header, 1, MIN(ARRAY_SIZE(header), kernel_size), f) !=
        MIN(ARRAY_SIZE(header), kernel_size)) {
        fprintf(stderr, "qemu: could not load kernel '%s': %s\n",
                kernel_filename, strerror(errno));
        exit(1);
T
ths 已提交
769 770 771
    }

    /* kernel protocol version */
B
bellard 已提交
772
#if 0
T
ths 已提交
773
    fprintf(stderr, "header magic: %#x\n", ldl_p(header+0x202));
B
bellard 已提交
774
#endif
L
liguang 已提交
775 776 777 778 779
    if (ldl_p(header+0x202) == 0x53726448) {
        protocol = lduw_p(header+0x206);
    } else {
        /* This looks like a multiboot kernel. If it is, let's stop
           treating it like a Linux kernel. */
780
        if (load_multiboot(fw_cfg, f, kernel_filename, initrd_filename,
L
liguang 已提交
781
                           kernel_cmdline, kernel_size, header)) {
B
Blue Swirl 已提交
782
            return;
L
liguang 已提交
783 784
        }
        protocol = 0;
A
Alexander Graf 已提交
785
    }
T
ths 已提交
786 787

    if (protocol < 0x200 || !(header[0x211] & 0x01)) {
L
liguang 已提交
788 789 790 791
        /* Low kernel */
        real_addr    = 0x90000;
        cmdline_addr = 0x9a000 - cmdline_size;
        prot_addr    = 0x10000;
T
ths 已提交
792
    } else if (protocol < 0x202) {
L
liguang 已提交
793 794 795 796
        /* High but ancient kernel */
        real_addr    = 0x90000;
        cmdline_addr = 0x9a000 - cmdline_size;
        prot_addr    = 0x100000;
T
ths 已提交
797
    } else {
L
liguang 已提交
798 799 800 801
        /* High and recent kernel */
        real_addr    = 0x10000;
        cmdline_addr = 0x20000;
        prot_addr    = 0x100000;
T
ths 已提交
802 803
    }

B
bellard 已提交
804
#if 0
T
ths 已提交
805
    fprintf(stderr,
L
liguang 已提交
806 807 808 809 810 811
            "qemu: real_addr     = 0x" TARGET_FMT_plx "\n"
            "qemu: cmdline_addr  = 0x" TARGET_FMT_plx "\n"
            "qemu: prot_addr     = 0x" TARGET_FMT_plx "\n",
            real_addr,
            cmdline_addr,
            prot_addr);
B
bellard 已提交
812
#endif
T
ths 已提交
813 814

    /* highest address for loading the initrd */
L
liguang 已提交
815 816 817 818 819
    if (protocol >= 0x203) {
        initrd_max = ldl_p(header+0x22c);
    } else {
        initrd_max = 0x37ffffff;
    }
T
ths 已提交
820

821 822 823
    if (initrd_max >= max_ram_size - acpi_data_size) {
        initrd_max = max_ram_size - acpi_data_size - 1;
    }
T
ths 已提交
824

825 826
    fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr);
    fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(kernel_cmdline)+1);
827
    fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline);
T
ths 已提交
828 829

    if (protocol >= 0x202) {
L
liguang 已提交
830
        stl_p(header+0x228, cmdline_addr);
T
ths 已提交
831
    } else {
L
liguang 已提交
832 833
        stw_p(header+0x20, 0xA33F);
        stw_p(header+0x22, cmdline_addr-real_addr);
T
ths 已提交
834 835
    }

P
Pascal Terjan 已提交
836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853
    /* handle vga= parameter */
    vmode = strstr(kernel_cmdline, "vga=");
    if (vmode) {
        unsigned int video_mode;
        /* skip "vga=" */
        vmode += 4;
        if (!strncmp(vmode, "normal", 6)) {
            video_mode = 0xffff;
        } else if (!strncmp(vmode, "ext", 3)) {
            video_mode = 0xfffe;
        } else if (!strncmp(vmode, "ask", 3)) {
            video_mode = 0xfffd;
        } else {
            video_mode = strtol(vmode, NULL, 0);
        }
        stw_p(header+0x1fa, video_mode);
    }

T
ths 已提交
854
    /* loader type */
S
Stefan Weil 已提交
855
    /* High nybble = B reserved for QEMU; low nybble is revision number.
T
ths 已提交
856 857
       If this code is substantially changed, you may want to consider
       incrementing the revision. */
L
liguang 已提交
858 859 860
    if (protocol >= 0x200) {
        header[0x210] = 0xB0;
    }
T
ths 已提交
861 862
    /* heap */
    if (protocol >= 0x201) {
L
liguang 已提交
863 864
        header[0x211] |= 0x80;	/* CAN_USE_HEAP */
        stw_p(header+0x224, cmdline_addr-real_addr-0x200);
T
ths 已提交
865 866 867 868
    }

    /* load initrd */
    if (initrd_filename) {
L
liguang 已提交
869 870 871 872
        if (protocol < 0x200) {
            fprintf(stderr, "qemu: linux kernel too old to load a ram disk\n");
            exit(1);
        }
T
ths 已提交
873

L
liguang 已提交
874
        initrd_size = get_image_size(initrd_filename);
M
M. Mohan Kumar 已提交
875
        if (initrd_size < 0) {
876 877
            fprintf(stderr, "qemu: error reading initrd %s: %s\n",
                    initrd_filename, strerror(errno));
M
M. Mohan Kumar 已提交
878 879 880
            exit(1);
        }

881
        initrd_addr = (initrd_max-initrd_size) & ~4095;
882

883
        initrd_data = g_malloc(initrd_size);
884 885 886 887 888
        load_image(initrd_filename, initrd_data);

        fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr);
        fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size);
        fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, initrd_size);
T
ths 已提交
889

L
liguang 已提交
890 891
        stl_p(header+0x218, initrd_addr);
        stl_p(header+0x21c, initrd_size);
T
ths 已提交
892 893
    }

894
    /* load kernel and setup */
T
ths 已提交
895
    setup_size = header[0x1f1];
L
liguang 已提交
896 897 898
    if (setup_size == 0) {
        setup_size = 4;
    }
T
ths 已提交
899
    setup_size = (setup_size+1)*512;
900
    kernel_size -= setup_size;
T
ths 已提交
901

902 903
    setup  = g_malloc(setup_size);
    kernel = g_malloc(kernel_size);
904
    fseek(f, 0, SEEK_SET);
905 906 907 908 909 910 911 912
    if (fread(setup, 1, setup_size, f) != setup_size) {
        fprintf(stderr, "fread() failed\n");
        exit(1);
    }
    if (fread(kernel, 1, kernel_size, f) != kernel_size) {
        fprintf(stderr, "fread() failed\n");
        exit(1);
    }
T
ths 已提交
913
    fclose(f);
914
    memcpy(setup, header, MIN(sizeof(header), setup_size));
915 916 917 918 919 920 921 922 923

    fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr);
    fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size);
    fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size);

    fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr);
    fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size);
    fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size);

G
Gleb Natapov 已提交
924 925
    option_rom[nb_option_roms].name = "linuxboot.bin";
    option_rom[nb_option_roms].bootindex = 0;
926
    nb_option_roms++;
T
ths 已提交
927 928
}

B
bellard 已提交
929 930
#define NE2000_NB_MAX 6

B
Blue Swirl 已提交
931 932 933
static const int ne2000_io[NE2000_NB_MAX] = { 0x300, 0x320, 0x340, 0x360,
                                              0x280, 0x380 };
static const int ne2000_irq[NE2000_NB_MAX] = { 9, 10, 11, 3, 4, 5 };
B
bellard 已提交
934

935
void pc_init_ne2k_isa(ISABus *bus, NICInfo *nd)
936 937 938 939 940
{
    static int nb_ne2k = 0;

    if (nb_ne2k == NE2000_NB_MAX)
        return;
941
    isa_ne2000_init(bus, ne2000_io[nb_ne2k],
G
Gerd Hoffmann 已提交
942
                    ne2000_irq[nb_ne2k], nd);
943 944 945
    nb_ne2k++;
}

B
Blue Swirl 已提交
946
DeviceState *cpu_get_current_apic(void)
947
{
948 949
    if (current_cpu) {
        X86CPU *cpu = X86_CPU(current_cpu);
950
        return cpu->apic_state;
951 952 953 954 955
    } else {
        return NULL;
    }
}

956
void pc_acpi_smi_interrupt(void *opaque, int irq, int level)
B
Blue Swirl 已提交
957
{
958
    X86CPU *cpu = opaque;
B
Blue Swirl 已提交
959 960

    if (level) {
961
        cpu_interrupt(CPU(cpu), CPU_INTERRUPT_SMI);
B
Blue Swirl 已提交
962 963 964
    }
}

965 966
static X86CPU *pc_new_cpu(const char *cpu_model, int64_t apic_id,
                          DeviceState *icc_bridge, Error **errp)
967 968 969 970
{
    X86CPU *cpu;
    Error *local_err = NULL;

971 972 973 974
    cpu = cpu_x86_create(cpu_model, icc_bridge, &local_err);
    if (local_err != NULL) {
        error_propagate(errp, local_err);
        return NULL;
975 976 977 978 979 980 981
    }

    object_property_set_int(OBJECT(cpu), apic_id, "apic-id", &local_err);
    object_property_set_bool(OBJECT(cpu), true, "realized", &local_err);

    if (local_err) {
        error_propagate(errp, local_err);
982 983
        object_unref(OBJECT(cpu));
        cpu = NULL;
984 985 986 987
    }
    return cpu;
}

988 989 990 991 992 993 994
static const char *current_cpu_model;

void pc_hot_add_cpu(const int64_t id, Error **errp)
{
    DeviceState *icc_bridge;
    int64_t apic_id = x86_cpu_apic_id_from_index(id);

995 996 997 998 999
    if (id < 0) {
        error_setg(errp, "Invalid CPU id: %" PRIi64, id);
        return;
    }

1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011
    if (cpu_exists(apic_id)) {
        error_setg(errp, "Unable to add CPU: %" PRIi64
                   ", it already exists", id);
        return;
    }

    if (id >= max_cpus) {
        error_setg(errp, "Unable to add CPU: %" PRIi64
                   ", max allowed: %d", id, max_cpus - 1);
        return;
    }

1012 1013 1014 1015 1016 1017 1018
    if (apic_id >= ACPI_CPU_HOTPLUG_ID_LIMIT) {
        error_setg(errp, "Unable to add CPU: %" PRIi64
                   ", resulting APIC ID (%" PRIi64 ") is too large",
                   id, apic_id);
        return;
    }

1019 1020 1021 1022 1023
    icc_bridge = DEVICE(object_resolve_path_type("icc-bridge",
                                                 TYPE_ICC_BRIDGE, NULL));
    pc_new_cpu(current_cpu_model, apic_id, icc_bridge, errp);
}

1024
void pc_cpus_init(const char *cpu_model, DeviceState *icc_bridge)
1025 1026
{
    int i;
1027
    X86CPU *cpu = NULL;
1028
    Error *error = NULL;
1029
    unsigned long apic_id_limit;
1030 1031 1032 1033 1034 1035 1036 1037 1038

    /* init CPUs */
    if (cpu_model == NULL) {
#ifdef TARGET_X86_64
        cpu_model = "qemu64";
#else
        cpu_model = "qemu32";
#endif
    }
1039
    current_cpu_model = cpu_model;
1040

1041 1042 1043 1044 1045 1046 1047
    apic_id_limit = pc_apic_id_limit(max_cpus);
    if (apic_id_limit > ACPI_CPU_HOTPLUG_ID_LIMIT) {
        error_report("max_cpus is too large. APIC ID of last CPU is %lu",
                     apic_id_limit - 1);
        exit(1);
    }

1048
    for (i = 0; i < smp_cpus; i++) {
1049 1050
        cpu = pc_new_cpu(cpu_model, x86_cpu_apic_id_from_index(i),
                         icc_bridge, &error);
1051
        if (error) {
1052
            error_report("%s", error_get_pretty(error));
1053
            error_free(error);
1054 1055
            exit(1);
        }
1056
    }
1057 1058

    /* map APIC MMIO area if CPU has APIC */
1059
    if (cpu && cpu->apic_state) {
1060 1061 1062 1063
        /* XXX: what if the base changes? */
        sysbus_mmio_map_overlap(SYS_BUS_DEVICE(icc_bridge), 0,
                                APIC_DEFAULT_ADDRESS, 0x1000);
    }
1064 1065 1066

    /* tell smbios about cpuid version and features */
    smbios_set_cpuid(cpu->env.cpuid_version, cpu->env.features[FEAT_1_EDX]);
1067 1068
}

1069 1070 1071 1072 1073 1074 1075 1076
/* pci-info ROM file. Little endian format */
typedef struct PcRomPciInfo {
    uint64_t w32_min;
    uint64_t w32_max;
    uint64_t w64_min;
    uint64_t w64_max;
} PcRomPciInfo;

1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087
typedef struct PcGuestInfoState {
    PcGuestInfo info;
    Notifier machine_done;
} PcGuestInfoState;

static
void pc_guest_info_machine_done(Notifier *notifier, void *data)
{
    PcGuestInfoState *guest_info_state = container_of(notifier,
                                                      PcGuestInfoState,
                                                      machine_done);
1088
    acpi_setup(&guest_info_state->info);
1089 1090 1091 1092 1093 1094 1095
}

PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size,
                                ram_addr_t above_4g_mem_size)
{
    PcGuestInfoState *guest_info_state = g_malloc0(sizeof *guest_info_state);
    PcGuestInfo *guest_info = &guest_info_state->info;
M
Michael S. Tsirkin 已提交
1096 1097
    int i, j;

E
Eduardo Habkost 已提交
1098
    guest_info->ram_size_below_4g = below_4g_mem_size;
M
Michael S. Tsirkin 已提交
1099 1100 1101 1102
    guest_info->ram_size = below_4g_mem_size + above_4g_mem_size;
    guest_info->apic_id_limit = pc_apic_id_limit(max_cpus);
    guest_info->apic_xrupt_override = kvm_allows_irq0_override();
    guest_info->numa_nodes = nb_numa_nodes;
1103
    guest_info->node_mem = g_malloc0(guest_info->numa_nodes *
M
Michael S. Tsirkin 已提交
1104
                                    sizeof *guest_info->node_mem);
1105 1106 1107 1108
    for (i = 0; i < nb_numa_nodes; i++) {
        guest_info->node_mem[i] = numa_info[i].node_mem;
    }

M
Michael S. Tsirkin 已提交
1109 1110 1111 1112 1113 1114 1115
    guest_info->node_cpu = g_malloc0(guest_info->apic_id_limit *
                                     sizeof *guest_info->node_cpu);

    for (i = 0; i < max_cpus; i++) {
        unsigned int apic_id = x86_cpu_apic_id_from_index(i);
        assert(apic_id < guest_info->apic_id_limit);
        for (j = 0; j < nb_numa_nodes; j++) {
1116
            if (test_bit(i, numa_info[j].node_cpu)) {
M
Michael S. Tsirkin 已提交
1117 1118 1119 1120 1121
                guest_info->node_cpu[apic_id] = j;
                break;
            }
        }
    }
1122 1123 1124 1125 1126 1127

    guest_info_state->machine_done.notify = pc_guest_info_machine_done;
    qemu_add_machine_init_done_notifier(&guest_info_state->machine_done);
    return guest_info;
}

1128 1129 1130
/* setup pci memory address space mapping into system address space */
void pc_pci_as_mapping_init(Object *owner, MemoryRegion *system_memory,
                            MemoryRegion *pci_address_space)
1131
{
1132 1133 1134
    /* Set to lower priority than RAM */
    memory_region_add_subregion_overlap(system_memory, 0x0,
                                        pci_address_space, -1);
1135 1136
}

G
Gerd Hoffmann 已提交
1137 1138
void pc_acpi_init(const char *default_dsdt)
{
1139
    char *filename;
G
Gerd Hoffmann 已提交
1140 1141 1142 1143 1144 1145 1146 1147 1148

    if (acpi_tables != NULL) {
        /* manually set via -acpitable, leave it alone */
        return;
    }

    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, default_dsdt);
    if (filename == NULL) {
        fprintf(stderr, "WARNING: failed to find %s\n", default_dsdt);
1149 1150 1151 1152
    } else {
        char *arg;
        QemuOpts *opts;
        Error *err = NULL;
G
Gerd Hoffmann 已提交
1153

1154
        arg = g_strdup_printf("file=%s", filename);
1155

1156 1157 1158
        /* creates a deep copy of "arg" */
        opts = qemu_opts_parse(qemu_find_opts("acpi"), arg, 0);
        g_assert(opts != NULL);
1159

1160
        acpi_table_add_builtin(opts, &err);
1161
        if (err) {
1162 1163
            error_report("WARNING: failed to load %s: %s", filename,
                         error_get_pretty(err));
1164 1165 1166 1167
            error_free(err);
        }
        g_free(arg);
        g_free(filename);
G
Gerd Hoffmann 已提交
1168 1169 1170
    }
}

1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195
FWCfgState *xen_load_linux(const char *kernel_filename,
                           const char *kernel_cmdline,
                           const char *initrd_filename,
                           ram_addr_t below_4g_mem_size,
                           PcGuestInfo *guest_info)
{
    int i;
    FWCfgState *fw_cfg;

    assert(kernel_filename != NULL);

    fw_cfg = fw_cfg_init(BIOS_CFG_IOPORT, BIOS_CFG_IOPORT + 1, 0, 0);
    rom_set_fw(fw_cfg);

    load_linux(fw_cfg, kernel_filename, initrd_filename,
               kernel_cmdline, below_4g_mem_size);
    for (i = 0; i < nb_option_roms; i++) {
        assert(!strcmp(option_rom[i].name, "linuxboot.bin") ||
               !strcmp(option_rom[i].name, "multiboot.bin"));
        rom_add_option(option_rom[i].name, option_rom[i].bootindex);
    }
    guest_info->fw_cfg = fw_cfg;
    return fw_cfg;
}

1196 1197
FWCfgState *pc_memory_init(MachineState *machine,
                           MemoryRegion *system_memory,
L
Laszlo Ersek 已提交
1198 1199 1200
                           ram_addr_t below_4g_mem_size,
                           ram_addr_t above_4g_mem_size,
                           MemoryRegion *rom_memory,
1201 1202
                           MemoryRegion **ram_memory,
                           PcGuestInfo *guest_info)
B
bellard 已提交
1203
{
1204 1205
    int linux_boot, i;
    MemoryRegion *ram, *option_rom_mr;
1206
    MemoryRegion *ram_below_4g, *ram_above_4g;
L
Laszlo Ersek 已提交
1207
    FWCfgState *fw_cfg;
1208
    PCMachineState *pcms = PC_MACHINE(machine);
1209

1210 1211 1212
    assert(machine->ram_size == below_4g_mem_size + above_4g_mem_size);

    linux_boot = (machine->kernel_filename != NULL);
B
bellard 已提交
1213

1214
    /* Allocate RAM.  We allocate it as a single memory region and use
D
Dong Xu Wang 已提交
1215
     * aliases to address portions of it, mostly for backwards compatibility
1216 1217
     * with older qemus that used qemu_ram_alloc().
     */
1218
    ram = g_malloc(sizeof(*ram));
1219 1220
    memory_region_allocate_system_memory(ram, NULL, "pc.ram",
                                         machine->ram_size);
A
Avi Kivity 已提交
1221
    *ram_memory = ram;
1222
    ram_below_4g = g_malloc(sizeof(*ram_below_4g));
1223
    memory_region_init_alias(ram_below_4g, NULL, "ram-below-4g", ram,
1224 1225
                             0, below_4g_mem_size);
    memory_region_add_subregion(system_memory, 0, ram_below_4g);
G
Gerd Hoffmann 已提交
1226
    e820_add_entry(0, below_4g_mem_size, E820_RAM);
1227
    if (above_4g_mem_size > 0) {
1228
        ram_above_4g = g_malloc(sizeof(*ram_above_4g));
1229
        memory_region_init_alias(ram_above_4g, NULL, "ram-above-4g", ram,
1230 1231 1232
                                 below_4g_mem_size, above_4g_mem_size);
        memory_region_add_subregion(system_memory, 0x100000000ULL,
                                    ram_above_4g);
G
Gerd Hoffmann 已提交
1233
        e820_add_entry(0x100000000ULL, above_4g_mem_size, E820_RAM);
1234
    }
1235

1236 1237
    if (!guest_info->has_reserved_memory &&
        (machine->ram_slots ||
1238
         (machine->maxram_size > machine->ram_size))) {
1239 1240 1241 1242 1243 1244 1245
        MachineClass *mc = MACHINE_GET_CLASS(machine);

        error_report("\"-memory 'slots|maxmem'\" is not supported by: %s",
                     mc->name);
        exit(EXIT_FAILURE);
    }

1246
    /* initialize hotplug memory address space */
1247
    if (guest_info->has_reserved_memory &&
1248
        (machine->ram_size < machine->maxram_size)) {
1249
        ram_addr_t hotplug_mem_size =
1250
            machine->maxram_size - machine->ram_size;
1251

1252 1253 1254 1255 1256 1257
        if (machine->ram_slots > ACPI_MAX_RAM_SLOTS) {
            error_report("unsupported amount of memory slots: %"PRIu64,
                         machine->ram_slots);
            exit(EXIT_FAILURE);
        }

1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272
        pcms->hotplug_memory_base =
            ROUND_UP(0x100000000ULL + above_4g_mem_size, 1ULL << 30);

        if ((pcms->hotplug_memory_base + hotplug_mem_size) <
            hotplug_mem_size) {
            error_report("unsupported amount of maximum memory: " RAM_ADDR_FMT,
                         machine->maxram_size);
            exit(EXIT_FAILURE);
        }

        memory_region_init(&pcms->hotplug_memory, OBJECT(pcms),
                           "hotplug-memory", hotplug_mem_size);
        memory_region_add_subregion(system_memory, pcms->hotplug_memory_base,
                                    &pcms->hotplug_memory);
    }
1273 1274

    /* Initialize PC system firmware */
1275
    pc_system_firmware_init(rom_memory, guest_info->isapc_ram_fw);
1276

1277
    option_rom_mr = g_malloc(sizeof(*option_rom_mr));
1278 1279
    memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
                           &error_abort);
1280
    vmstate_register_ram_global(option_rom_mr);
1281
    memory_region_add_subregion_overlap(rom_memory,
1282 1283 1284
                                        PC_ROM_MIN_VGA,
                                        option_rom_mr,
                                        1);
1285

A
Alexander Graf 已提交
1286
    fw_cfg = bochs_bios_init();
G
Gerd Hoffmann 已提交
1287
    rom_set_fw(fw_cfg);
A
Alexander Graf 已提交
1288

1289 1290 1291 1292 1293 1294
    if (guest_info->has_reserved_memory && pcms->hotplug_memory_base) {
        uint64_t *val = g_malloc(sizeof(*val));
        *val = cpu_to_le64(ROUND_UP(pcms->hotplug_memory_base, 0x1ULL << 30));
        fw_cfg_add_file(fw_cfg, "etc/reserved-memory-end", val, sizeof(*val));
    }

1295
    if (linux_boot) {
1296 1297
        load_linux(fw_cfg, machine->kernel_filename, machine->initrd_filename,
                   machine->kernel_cmdline, below_4g_mem_size);
1298 1299 1300
    }

    for (i = 0; i < nb_option_roms; i++) {
G
Gleb Natapov 已提交
1301
        rom_add_option(option_rom[i].name, option_rom[i].bootindex);
1302
    }
1303
    guest_info->fw_cfg = fw_cfg;
1304
    return fw_cfg;
1305 1306
}

1307 1308 1309 1310 1311
qemu_irq *pc_allocate_cpu_irq(void)
{
    return qemu_allocate_irqs(pic_irq_request, NULL, 1);
}

1312
DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus)
1313
{
1314 1315
    DeviceState *dev = NULL;

1316 1317 1318 1319 1320
    if (pci_bus) {
        PCIDevice *pcidev = pci_vga_init(pci_bus);
        dev = pcidev ? &pcidev->qdev : NULL;
    } else if (isa_bus) {
        ISADevice *isadev = isa_vga_init(isa_bus);
A
Andreas Färber 已提交
1321
        dev = isadev ? DEVICE(isadev) : NULL;
1322
    }
1323
    return dev;
1324 1325
}

B
Blue Swirl 已提交
1326 1327
static void cpu_request_exit(void *opaque, int irq, int level)
{
1328
    CPUState *cpu = current_cpu;
B
Blue Swirl 已提交
1329

1330 1331
    if (cpu && level) {
        cpu_exit(cpu);
B
Blue Swirl 已提交
1332 1333 1334
    }
}

J
Julien Grall 已提交
1335 1336
static const MemoryRegionOps ioport80_io_ops = {
    .write = ioport80_write,
1337
    .read = ioport80_read,
J
Julien Grall 已提交
1338 1339 1340 1341 1342 1343 1344 1345 1346
    .endianness = DEVICE_NATIVE_ENDIAN,
    .impl = {
        .min_access_size = 1,
        .max_access_size = 1,
    },
};

static const MemoryRegionOps ioportF0_io_ops = {
    .write = ioportF0_write,
1347
    .read = ioportF0_read,
J
Julien Grall 已提交
1348 1349 1350 1351 1352 1353 1354
    .endianness = DEVICE_NATIVE_ENDIAN,
    .impl = {
        .min_access_size = 1,
        .max_access_size = 1,
    },
};

1355
void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi,
1356
                          ISADevice **rtc_state,
K
Kevin Wolf 已提交
1357
                          ISADevice **floppy,
1358 1359
                          bool no_vmport,
                          uint32 hpet_irqs)
1360 1361 1362
{
    int i;
    DriveInfo *fd[MAX_FD];
1363 1364 1365
    DeviceState *hpet = NULL;
    int pit_isa_irq = 0;
    qemu_irq pit_alt_irq = NULL;
1366
    qemu_irq rtc_irq = NULL;
B
Blue Swirl 已提交
1367
    qemu_irq *a20_line;
1368
    ISADevice *i8042, *port92, *vmmouse, *pit = NULL;
B
Blue Swirl 已提交
1369
    qemu_irq *cpu_exit_irq;
J
Julien Grall 已提交
1370 1371
    MemoryRegion *ioport80_io = g_new(MemoryRegion, 1);
    MemoryRegion *ioportF0_io = g_new(MemoryRegion, 1);
1372

1373
    memory_region_init_io(ioport80_io, NULL, &ioport80_io_ops, NULL, "ioport80", 1);
J
Julien Grall 已提交
1374
    memory_region_add_subregion(isa_bus->address_space_io, 0x80, ioport80_io);
1375

1376
    memory_region_init_io(ioportF0_io, NULL, &ioportF0_io_ops, NULL, "ioportF0", 1);
J
Julien Grall 已提交
1377
    memory_region_add_subregion(isa_bus->address_space_io, 0xf0, ioportF0_io);
1378

1379 1380 1381 1382 1383 1384 1385
    /*
     * Check if an HPET shall be created.
     *
     * Without KVM_CAP_PIT_STATE2, we cannot switch off the in-kernel PIT
     * when the HPET wants to take over. Thus we have to disable the latter.
     */
    if (!no_hpet && (!kvm_irqchip_in_kernel() || kvm_has_pit_state2())) {
1386
        /* In order to set property, here not using sysbus_try_create_simple */
M
Michael S. Tsirkin 已提交
1387
        hpet = qdev_try_create(NULL, TYPE_HPET);
B
Blue Swirl 已提交
1388
        if (hpet) {
1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400
            /* For pc-piix-*, hpet's intcap is always IRQ2. For pc-q35-1.7
             * and earlier, use IRQ2 for compat. Otherwise, use IRQ16~23,
             * IRQ8 and IRQ2.
             */
            uint8_t compat = object_property_get_int(OBJECT(hpet),
                    HPET_INTCAP, NULL);
            if (!compat) {
                qdev_prop_set_uint32(hpet, HPET_INTCAP, hpet_irqs);
            }
            qdev_init_nofail(hpet);
            sysbus_mmio_map(SYS_BUS_DEVICE(hpet), 0, HPET_BASE);

J
Jan Kiszka 已提交
1401
            for (i = 0; i < GSI_NUM_PINS; i++) {
1402
                sysbus_connect_irq(SYS_BUS_DEVICE(hpet), i, gsi[i]);
B
Blue Swirl 已提交
1403
            }
1404 1405 1406
            pit_isa_irq = -1;
            pit_alt_irq = qdev_get_gpio_in(hpet, HPET_LEGACY_PIT_INT);
            rtc_irq = qdev_get_gpio_in(hpet, HPET_LEGACY_RTC_INT);
J
Jan Kiszka 已提交
1407
        }
1408
    }
1409
    *rtc_state = rtc_init(isa_bus, 2000, rtc_irq);
1410 1411 1412

    qemu_register_boot_set(pc_boot_set, *rtc_state);

1413 1414 1415 1416 1417 1418 1419 1420
    if (!xen_enabled()) {
        if (kvm_irqchip_in_kernel()) {
            pit = kvm_pit_init(isa_bus, 0x40);
        } else {
            pit = pit_init(isa_bus, 0x40, pit_isa_irq, pit_alt_irq);
        }
        if (hpet) {
            /* connect PIT to output control line of the HPET */
A
Andreas Färber 已提交
1421
            qdev_connect_gpio_out(hpet, 0, qdev_get_gpio_in(DEVICE(pit), 0));
1422 1423
        }
        pcspk_init(isa_bus, pit);
1424
    }
1425 1426 1427

    for(i = 0; i < MAX_SERIAL_PORTS; i++) {
        if (serial_hds[i]) {
1428
            serial_isa_init(isa_bus, i, serial_hds[i]);
1429 1430 1431 1432 1433
        }
    }

    for(i = 0; i < MAX_PARALLEL_PORTS; i++) {
        if (parallel_hds[i]) {
1434
            parallel_init(isa_bus, i, parallel_hds[i]);
1435 1436 1437
        }
    }

1438
    a20_line = qemu_allocate_irqs(handle_a20_line_change, first_cpu, 2);
1439
    i8042 = isa_create_simple(isa_bus, "i8042");
1440
    i8042_setup_a20_line(i8042, &a20_line[0]);
1441
    if (!no_vmport) {
1442 1443
        vmport_init(isa_bus);
        vmmouse = isa_try_create(isa_bus, "vmmouse");
1444 1445 1446
    } else {
        vmmouse = NULL;
    }
B
Blue Swirl 已提交
1447
    if (vmmouse) {
A
Andreas Färber 已提交
1448 1449 1450
        DeviceState *dev = DEVICE(vmmouse);
        qdev_prop_set_ptr(dev, "ps2_mouse", i8042);
        qdev_init_nofail(dev);
B
Blue Swirl 已提交
1451
    }
1452
    port92 = isa_create_simple(isa_bus, "port92");
1453
    port92_init(port92, &a20_line[1]);
B
Blue Swirl 已提交
1454

B
Blue Swirl 已提交
1455 1456
    cpu_exit_irq = qemu_allocate_irqs(cpu_request_exit, NULL, 1);
    DMA_init(0, cpu_exit_irq);
1457 1458 1459 1460

    for(i = 0; i < MAX_FD; i++) {
        fd[i] = drive_get(IF_FLOPPY, 0, i);
    }
1461
    *floppy = fdctrl_init_isa(isa_bus, fd);
1462 1463
}

1464 1465 1466 1467 1468 1469 1470 1471 1472 1473
void pc_nic_init(ISABus *isa_bus, PCIBus *pci_bus)
{
    int i;

    for (i = 0; i < nb_nics; i++) {
        NICInfo *nd = &nd_table[i];

        if (!pci_bus || (nd->model && strcmp(nd->model, "ne2k_isa") == 0)) {
            pc_init_ne2k_isa(isa_bus, nd);
        } else {
1474
            pci_nic_init_nofail(nd, pci_bus, "e1000", NULL);
1475 1476 1477 1478
        }
    }
}

1479
void pc_pci_device_init(PCIBus *pci_bus)
1480 1481 1482 1483 1484 1485 1486 1487 1488
{
    int max_bus;
    int bus;

    max_bus = drive_get_max_bus(IF_SCSI);
    for (bus = 0; bus <= max_bus; bus++) {
        pci_create_simple(pci_bus, -1, "lsi53c895a");
    }
}
1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505

void ioapic_init_gsi(GSIState *gsi_state, const char *parent_name)
{
    DeviceState *dev;
    SysBusDevice *d;
    unsigned int i;

    if (kvm_irqchip_in_kernel()) {
        dev = qdev_create(NULL, "kvm-ioapic");
    } else {
        dev = qdev_create(NULL, "ioapic");
    }
    if (parent_name) {
        object_property_add_child(object_resolve_path(parent_name, NULL),
                                  "ioapic", OBJECT(dev), NULL);
    }
    qdev_init_nofail(dev);
1506
    d = SYS_BUS_DEVICE(dev);
1507
    sysbus_mmio_map(d, 0, IO_APIC_DEFAULT_ADDRESS);
1508 1509 1510 1511 1512

    for (i = 0; i < IOAPIC_NUM_PINS; i++) {
        gsi_state->ioapic_irq[i] = qdev_get_gpio_in(dev, i);
    }
}
1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526

static void pc_generic_machine_class_init(ObjectClass *oc, void *data)
{
    MachineClass *mc = MACHINE_CLASS(oc);
    QEMUMachine *qm = data;

    mc->name = qm->name;
    mc->alias = qm->alias;
    mc->desc = qm->desc;
    mc->init = qm->init;
    mc->reset = qm->reset;
    mc->hot_add_cpu = qm->hot_add_cpu;
    mc->kvm_type = qm->kvm_type;
    mc->block_default_type = qm->block_default_type;
1527
    mc->units_per_default_bus = qm->units_per_default_bus;
1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556
    mc->max_cpus = qm->max_cpus;
    mc->no_serial = qm->no_serial;
    mc->no_parallel = qm->no_parallel;
    mc->use_virtcon = qm->use_virtcon;
    mc->use_sclp = qm->use_sclp;
    mc->no_floppy = qm->no_floppy;
    mc->no_cdrom = qm->no_cdrom;
    mc->no_sdcard = qm->no_sdcard;
    mc->is_default = qm->is_default;
    mc->default_machine_opts = qm->default_machine_opts;
    mc->default_boot_order = qm->default_boot_order;
    mc->compat_props = qm->compat_props;
    mc->hw_version = qm->hw_version;
}

void qemu_register_pc_machine(QEMUMachine *m)
{
    char *name = g_strconcat(m->name, TYPE_MACHINE_SUFFIX, NULL);
    TypeInfo ti = {
        .name       = name,
        .parent     = TYPE_PC_MACHINE,
        .class_init = pc_generic_machine_class_init,
        .class_data = (void *)m,
    };

    type_register(&ti);
    g_free(name);
}

1557 1558 1559
static void pc_dimm_plug(HotplugHandler *hotplug_dev,
                         DeviceState *dev, Error **errp)
{
1560
    int slot;
1561
    HotplugHandlerClass *hhc;
1562 1563
    Error *local_err = NULL;
    PCMachineState *pcms = PC_MACHINE(hotplug_dev);
1564
    MachineState *machine = MACHINE(hotplug_dev);
1565 1566 1567 1568 1569 1570 1571 1572 1573
    PCDIMMDevice *dimm = PC_DIMM(dev);
    PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
    MemoryRegion *mr = ddc->get_memory_region(dimm);
    uint64_t addr = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP,
                                            &local_err);
    if (local_err) {
        goto out;
    }

1574 1575 1576 1577 1578 1579 1580
    addr = pc_dimm_get_free_addr(pcms->hotplug_memory_base,
                                 memory_region_size(&pcms->hotplug_memory),
                                 !addr ? NULL : &addr,
                                 memory_region_size(mr), &local_err);
    if (local_err) {
        goto out;
    }
1581

1582
    object_property_set_int(OBJECT(dev), addr, PC_DIMM_ADDR_PROP, &local_err);
1583 1584 1585
    if (local_err) {
        goto out;
    }
1586
    trace_mhp_pc_dimm_assigned_address(addr);
1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601

    slot = object_property_get_int(OBJECT(dev), PC_DIMM_SLOT_PROP, &local_err);
    if (local_err) {
        goto out;
    }

    slot = pc_dimm_get_free_slot(slot == PC_DIMM_UNASSIGNED_SLOT ? NULL : &slot,
                                 machine->ram_slots, &local_err);
    if (local_err) {
        goto out;
    }
    object_property_set_int(OBJECT(dev), slot, PC_DIMM_SLOT_PROP, &local_err);
    if (local_err) {
        goto out;
    }
1602
    trace_mhp_pc_dimm_assigned_slot(slot);
1603

1604 1605 1606 1607 1608 1609
    if (!pcms->acpi_dev) {
        error_setg(&local_err,
                   "memory hotplug is not enabled: missing acpi device");
        goto out;
    }

1610 1611 1612
    memory_region_add_subregion(&pcms->hotplug_memory,
                                addr - pcms->hotplug_memory_base, mr);
    vmstate_register_ram(mr, dev);
1613 1614 1615

    hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev);
    hhc->plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err);
1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640
out:
    error_propagate(errp, local_err);
}

static void pc_machine_device_plug_cb(HotplugHandler *hotplug_dev,
                                      DeviceState *dev, Error **errp)
{
    if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
        pc_dimm_plug(hotplug_dev, dev, errp);
    }
}

static HotplugHandler *pc_get_hotpug_handler(MachineState *machine,
                                             DeviceState *dev)
{
    PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(machine);

    if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
        return HOTPLUG_HANDLER(machine);
    }

    return pcmc->get_hotplug_handler ?
        pcmc->get_hotplug_handler(machine, dev) : NULL;
}

1641 1642 1643 1644 1645 1646 1647 1648 1649 1650
static void
pc_machine_get_hotplug_memory_region_size(Object *obj, Visitor *v, void *opaque,
                                          const char *name, Error **errp)
{
    PCMachineState *pcms = PC_MACHINE(obj);
    int64_t value = memory_region_size(&pcms->hotplug_memory);

    visit_type_int(v, &value, name, errp);
}

1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690
static void pc_machine_get_max_ram_below_4g(Object *obj, Visitor *v,
                                         void *opaque, const char *name,
                                         Error **errp)
{
    PCMachineState *pcms = PC_MACHINE(obj);
    uint64_t value = pcms->max_ram_below_4g;

    visit_type_size(v, &value, name, errp);
}

static void pc_machine_set_max_ram_below_4g(Object *obj, Visitor *v,
                                         void *opaque, const char *name,
                                         Error **errp)
{
    PCMachineState *pcms = PC_MACHINE(obj);
    Error *error = NULL;
    uint64_t value;

    visit_type_size(v, &value, name, &error);
    if (error) {
        error_propagate(errp, error);
        return;
    }
    if (value > (1ULL << 32)) {
        error_set(&error, ERROR_CLASS_GENERIC_ERROR,
                  "Machine option 'max-ram-below-4g=%"PRIu64
                  "' expects size less than or equal to 4G", value);
        error_propagate(errp, error);
        return;
    }

    if (value < (1ULL << 20)) {
        error_report("Warning: small max_ram_below_4g(%"PRIu64
                     ") less than 1M.  BIOS may not work..",
                     value);
    }

    pcms->max_ram_below_4g = value;
}

1691 1692
static void pc_machine_initfn(Object *obj)
{
1693 1694
    PCMachineState *pcms = PC_MACHINE(obj);

1695 1696 1697
    object_property_add(obj, PC_MACHINE_MEMHP_REGION_SIZE, "int",
                        pc_machine_get_hotplug_memory_region_size,
                        NULL, NULL, NULL, NULL);
1698 1699 1700 1701 1702
    pcms->max_ram_below_4g = 1ULL << 32; /* 4G */
    object_property_add(obj, PC_MACHINE_MAX_RAM_BELOW_4G, "size",
                        pc_machine_get_max_ram_below_4g,
                        pc_machine_set_max_ram_below_4g,
                        NULL, NULL, NULL);
1703 1704
}

1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715
static void pc_machine_class_init(ObjectClass *oc, void *data)
{
    MachineClass *mc = MACHINE_CLASS(oc);
    PCMachineClass *pcmc = PC_MACHINE_CLASS(oc);
    HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);

    pcmc->get_hotplug_handler = mc->get_hotplug_handler;
    mc->get_hotplug_handler = pc_get_hotpug_handler;
    hc->plug = pc_machine_device_plug_cb;
}

1716 1717 1718 1719 1720
static const TypeInfo pc_machine_info = {
    .name = TYPE_PC_MACHINE,
    .parent = TYPE_MACHINE,
    .abstract = true,
    .instance_size = sizeof(PCMachineState),
1721
    .instance_init = pc_machine_initfn,
1722
    .class_size = sizeof(PCMachineClass),
1723 1724 1725 1726 1727
    .class_init = pc_machine_class_init,
    .interfaces = (InterfaceInfo[]) {
         { TYPE_HOTPLUG_HANDLER },
         { }
    },
1728 1729 1730 1731 1732 1733 1734 1735
};

static void pc_machine_register_types(void)
{
    type_register_static(&pc_machine_info);
}

type_init(pc_machine_register_types)