virt.c 66.9 KB
Newer Older
P
Peter Maydell 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
/*
 * ARM mach-virt emulation
 *
 * Copyright (c) 2013 Linaro Limited
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2 or later, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 * Emulate a virtual board which works by passing Linux all the information
 * it needs about what devices are present via the device tree.
 * There are some restrictions about what we can do here:
 *  + we can only present devices whose Linux drivers will work based
 *    purely on the device tree with no platform data at all
 *  + we want to present a very stripped-down minimalist platform,
 *    both because this reduces the security attack surface from the guest
 *    and also because it reduces our exposure to being broken when
 *    the kernel updates its device tree bindings and requires further
 *    information in a device binding that we aren't providing.
 * This is essentially the same approach kvmtool uses.
 */

P
Peter Maydell 已提交
31
#include "qemu/osdep.h"
32
#include "qapi/error.h"
P
Peter Maydell 已提交
33 34 35
#include "hw/sysbus.h"
#include "hw/arm/arm.h"
#include "hw/arm/primecell.h"
36
#include "hw/arm/virt.h"
37 38
#include "hw/vfio/vfio-calxeda-xgmac.h"
#include "hw/vfio/vfio-amd-xgbe.h"
P
Peter Maydell 已提交
39 40
#include "hw/devices.h"
#include "net/net.h"
41
#include "sysemu/block-backend.h"
P
Peter Maydell 已提交
42
#include "sysemu/device_tree.h"
43
#include "sysemu/numa.h"
P
Peter Maydell 已提交
44 45
#include "sysemu/sysemu.h"
#include "sysemu/kvm.h"
46
#include "hw/compat.h"
47
#include "hw/loader.h"
P
Peter Maydell 已提交
48 49 50
#include "exec/address-spaces.h"
#include "qemu/bitops.h"
#include "qemu/error-report.h"
51
#include "hw/pci-host/gpex.h"
52 53
#include "hw/arm/sysbus-fdt.h"
#include "hw/platform-bus.h"
54
#include "hw/arm/fdt.h"
55 56
#include "hw/intc/arm_gic.h"
#include "hw/intc/arm_gicv3_common.h"
57
#include "kvm_arm.h"
58
#include "hw/smbios/smbios.h"
59
#include "qapi/visitor.h"
60
#include "standard-headers/linux/input.h"
61
#include "hw/arm/smmuv3.h"
P
Peter Maydell 已提交
62

63
#define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \
64 65 66 67 68 69
    static void virt_##major##_##minor##_class_init(ObjectClass *oc, \
                                                    void *data) \
    { \
        MachineClass *mc = MACHINE_CLASS(oc); \
        virt_machine_##major##_##minor##_options(mc); \
        mc->desc = "QEMU " # major "." # minor " ARM Virtual Machine"; \
70 71 72
        if (latest) { \
            mc->alias = "virt"; \
        } \
73 74 75 76 77 78 79 80 81 82 83 84 85
    } \
    static const TypeInfo machvirt_##major##_##minor##_info = { \
        .name = MACHINE_TYPE_NAME("virt-" # major "." # minor), \
        .parent = TYPE_VIRT_MACHINE, \
        .instance_init = virt_##major##_##minor##_instance_init, \
        .class_init = virt_##major##_##minor##_class_init, \
    }; \
    static void machvirt_machine_##major##_##minor##_init(void) \
    { \
        type_register_static(&machvirt_##major##_##minor##_info); \
    } \
    type_init(machvirt_machine_##major##_##minor##_init);

86 87 88 89 90
#define DEFINE_VIRT_MACHINE_AS_LATEST(major, minor) \
    DEFINE_VIRT_MACHINE_LATEST(major, minor, true)
#define DEFINE_VIRT_MACHINE(major, minor) \
    DEFINE_VIRT_MACHINE_LATEST(major, minor, false)

91

92 93 94 95 96 97 98
/* Number of external interrupt lines to configure the GIC with */
#define NUM_IRQS 256

#define PLATFORM_BUS_NUM_IRQS 64

static ARMPlatformBusSystemParams platform_bus_params;

99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
/* RAM limit in GB. Since VIRT_MEM starts at the 1GB mark, this means
 * RAM can go up to the 256GB mark, leaving 256GB of the physical
 * address space unallocated and free for future use between 256G and 512G.
 * If we need to provide more RAM to VMs in the future then we need to:
 *  * allocate a second bank of RAM starting at 2TB and working up
 *  * fix the DT and ACPI table generation code in QEMU to correctly
 *    report two split lumps of RAM to the guest
 *  * fix KVM in the host kernel to allow guests with >40 bit address spaces
 * (We don't want to fill all the way up to 512GB with RAM because
 * we might want it for non-RAM purposes later. Conversely it seems
 * reasonable to assume that anybody configuring a VM with a quarter
 * of a terabyte of RAM will be doing it on a host with more than a
 * terabyte of physical address space.)
 */
#define RAMLIMIT_GB 255
#define RAMLIMIT_BYTES (RAMLIMIT_GB * 1024ULL * 1024 * 1024)

P
Peter Maydell 已提交
116 117 118 119 120 121 122 123 124
/* Addresses and sizes of our components.
 * 0..128MB is space for a flash device so we can run bootrom code such as UEFI.
 * 128MB..256MB is used for miscellaneous device I/O.
 * 256MB..1GB is reserved for possible future PCI support (ie where the
 * PCI memory window will go if we add a PCI host controller).
 * 1GB and up is RAM (which may happily spill over into the
 * high memory region beyond 4GB).
 * This represents a compromise between how much RAM can be given to
 * a 32 bit VM and leaving space for expansion and in particular for PCI.
P
Peter Maydell 已提交
125 126
 * Note that devices should generally be placed at multiples of 0x10000,
 * to accommodate guests using 64K pages.
P
Peter Maydell 已提交
127 128 129
 */
static const MemMapEntry a15memmap[] = {
    /* Space up to 0x8000000 is reserved for a boot ROM */
130 131
    [VIRT_FLASH] =              {          0, 0x08000000 },
    [VIRT_CPUPERIPHS] =         { 0x08000000, 0x00020000 },
P
Peter Maydell 已提交
132
    /* GIC distributor and CPU interfaces sit inside the CPU peripheral space */
133 134 135
    [VIRT_GIC_DIST] =           { 0x08000000, 0x00010000 },
    [VIRT_GIC_CPU] =            { 0x08010000, 0x00010000 },
    [VIRT_GIC_V2M] =            { 0x08020000, 0x00001000 },
136 137 138 139
    /* The space in between here is reserved for GICv3 CPU/vCPU/HYP */
    [VIRT_GIC_ITS] =            { 0x08080000, 0x00020000 },
    /* This redistributor space allows up to 2*64kB*123 CPUs */
    [VIRT_GIC_REDIST] =         { 0x080A0000, 0x00F60000 },
140 141
    [VIRT_UART] =               { 0x09000000, 0x00001000 },
    [VIRT_RTC] =                { 0x09010000, 0x00001000 },
M
Marc Marí 已提交
142
    [VIRT_FW_CFG] =             { 0x09020000, 0x00000018 },
S
Shannon Zhao 已提交
143
    [VIRT_GPIO] =               { 0x09030000, 0x00001000 },
144
    [VIRT_SECURE_UART] =        { 0x09040000, 0x00001000 },
145
    [VIRT_SMMU] =               { 0x09050000, 0x00020000 },
146
    [VIRT_MMIO] =               { 0x0a000000, 0x00000200 },
P
Peter Maydell 已提交
147
    /* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */
148
    [VIRT_PLATFORM_BUS] =       { 0x0c000000, 0x02000000 },
149
    [VIRT_SECURE_MEM] =         { 0x0e000000, 0x01000000 },
150 151 152
    [VIRT_PCIE_MMIO] =          { 0x10000000, 0x2eff0000 },
    [VIRT_PCIE_PIO] =           { 0x3eff0000, 0x00010000 },
    [VIRT_PCIE_ECAM] =          { 0x3f000000, 0x01000000 },
153
    [VIRT_MEM] =                { 0x40000000, RAMLIMIT_BYTES },
154 155
    /* Second PCIe window, 512GB wide at the 512GB boundary */
    [VIRT_PCIE_MMIO_HIGH] =   { 0x8000000000ULL, 0x8000000000ULL },
P
Peter Maydell 已提交
156 157 158 159
};

static const int a15irqmap[] = {
    [VIRT_UART] = 1,
P
Peter Maydell 已提交
160
    [VIRT_RTC] = 2,
161
    [VIRT_PCIE] = 3, /* ... to 6 */
S
Shannon Zhao 已提交
162
    [VIRT_GPIO] = 7,
163
    [VIRT_SECURE_UART] = 8,
P
Peter Maydell 已提交
164
    [VIRT_MMIO] = 16, /* ...to 16 + NUM_VIRTIO_TRANSPORTS - 1 */
165
    [VIRT_GIC_V2M] = 48, /* ...to 48 + NUM_GICV2M_SPIS - 1 */
166
    [VIRT_SMMU] = 74,    /* ...to 74 + NUM_SMMU_IRQS - 1 */
167
    [VIRT_PLATFORM_BUS] = 112, /* ...to 112 + PLATFORM_BUS_NUM_IRQS -1 */
P
Peter Maydell 已提交
168 169
};

170
static const char *valid_cpus[] = {
171 172 173 174
    ARM_CPU_TYPE_NAME("cortex-a15"),
    ARM_CPU_TYPE_NAME("cortex-a53"),
    ARM_CPU_TYPE_NAME("cortex-a57"),
    ARM_CPU_TYPE_NAME("host"),
175
    ARM_CPU_TYPE_NAME("max"),
P
Peter Maydell 已提交
176 177
};

178
static bool cpu_type_valid(const char *cpu)
P
Peter Maydell 已提交
179 180 181
{
    int i;

182 183 184
    for (i = 0; i < ARRAY_SIZE(valid_cpus); i++) {
        if (strcmp(cpu, valid_cpus[i]) == 0) {
            return true;
P
Peter Maydell 已提交
185 186
        }
    }
187
    return false;
P
Peter Maydell 已提交
188 189
}

190
static void create_fdt(VirtMachineState *vms)
P
Peter Maydell 已提交
191
{
192
    void *fdt = create_device_tree(&vms->fdt_size);
P
Peter Maydell 已提交
193 194 195 196 197 198

    if (!fdt) {
        error_report("create_device_tree() failed");
        exit(1);
    }

199
    vms->fdt = fdt;
P
Peter Maydell 已提交
200 201

    /* Header */
202 203 204
    qemu_fdt_setprop_string(fdt, "/", "compatible", "linux,dummy-virt");
    qemu_fdt_setprop_cell(fdt, "/", "#address-cells", 0x2);
    qemu_fdt_setprop_cell(fdt, "/", "#size-cells", 0x2);
P
Peter Maydell 已提交
205 206 207 208 209

    /*
     * /chosen and /memory nodes must exist for load_dtb
     * to fill in necessary properties later
     */
210 211 212
    qemu_fdt_add_subnode(fdt, "/chosen");
    qemu_fdt_add_subnode(fdt, "/memory");
    qemu_fdt_setprop_string(fdt, "/memory", "device_type", "memory");
P
Peter Maydell 已提交
213 214 215 216 217 218

    /* Clock node, for the benefit of the UART. The kernel device tree
     * binding documentation claims the PL011 node clock properties are
     * optional but in practice if you omit them the kernel refuses to
     * probe for the device.
     */
219
    vms->clock_phandle = qemu_fdt_alloc_phandle(fdt);
220 221 222 223 224
    qemu_fdt_add_subnode(fdt, "/apb-pclk");
    qemu_fdt_setprop_string(fdt, "/apb-pclk", "compatible", "fixed-clock");
    qemu_fdt_setprop_cell(fdt, "/apb-pclk", "#clock-cells", 0x0);
    qemu_fdt_setprop_cell(fdt, "/apb-pclk", "clock-frequency", 24000000);
    qemu_fdt_setprop_string(fdt, "/apb-pclk", "clock-output-names",
P
Peter Maydell 已提交
225
                                "clk24mhz");
226
    qemu_fdt_setprop_cell(fdt, "/apb-pclk", "phandle", vms->clock_phandle);
P
Peter Maydell 已提交
227

228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248
    if (have_numa_distance) {
        int size = nb_numa_nodes * nb_numa_nodes * 3 * sizeof(uint32_t);
        uint32_t *matrix = g_malloc0(size);
        int idx, i, j;

        for (i = 0; i < nb_numa_nodes; i++) {
            for (j = 0; j < nb_numa_nodes; j++) {
                idx = (i * nb_numa_nodes + j) * 3;
                matrix[idx + 0] = cpu_to_be32(i);
                matrix[idx + 1] = cpu_to_be32(j);
                matrix[idx + 2] = cpu_to_be32(numa_info[i].distance[j]);
            }
        }

        qemu_fdt_add_subnode(fdt, "/distance-map");
        qemu_fdt_setprop_string(fdt, "/distance-map", "compatible",
                                "numa-distance-map-v1");
        qemu_fdt_setprop(fdt, "/distance-map", "distance-matrix",
                         matrix, size);
        g_free(matrix);
    }
249 250
}

251
static void fdt_add_timer_nodes(const VirtMachineState *vms)
P
Peter Maydell 已提交
252
{
253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269
    /* On real hardware these interrupts are level-triggered.
     * On KVM they were edge-triggered before host kernel version 4.4,
     * and level-triggered afterwards.
     * On emulated QEMU they are level-triggered.
     *
     * Getting the DTB info about them wrong is awkward for some
     * guest kernels:
     *  pre-4.8 ignore the DT and leave the interrupt configured
     *   with whatever the GIC reset value (or the bootloader) left it at
     *  4.8 before rc6 honour the incorrect data by programming it back
     *   into the GIC, causing problems
     *  4.8rc6 and later ignore the DT and always write "level triggered"
     *   into the GIC
     *
     * For backwards-compatibility, virt-2.8 and earlier will continue
     * to say these are edge-triggered, but later machines will report
     * the correct information.
P
Peter Maydell 已提交
270
     */
271
    ARMCPU *armcpu;
272 273 274 275 276 277
    VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
    uint32_t irqflags = GIC_FDT_IRQ_FLAGS_LEVEL_HI;

    if (vmc->claim_edge_triggered_timers) {
        irqflags = GIC_FDT_IRQ_FLAGS_EDGE_LO_HI;
    }
P
Peter Maydell 已提交
278

279
    if (vms->gic_version == 2) {
280 281
        irqflags = deposit32(irqflags, GIC_FDT_IRQ_PPI_CPU_START,
                             GIC_FDT_IRQ_PPI_CPU_WIDTH,
282
                             (1 << vms->smp_cpus) - 1);
283
    }
P
Peter Maydell 已提交
284

285
    qemu_fdt_add_subnode(vms->fdt, "/timer");
286 287 288 289

    armcpu = ARM_CPU(qemu_get_cpu(0));
    if (arm_feature(&armcpu->env, ARM_FEATURE_V8)) {
        const char compat[] = "arm,armv8-timer\0arm,armv7-timer";
290
        qemu_fdt_setprop(vms->fdt, "/timer", "compatible",
291 292
                         compat, sizeof(compat));
    } else {
293
        qemu_fdt_setprop_string(vms->fdt, "/timer", "compatible",
294 295
                                "arm,armv7-timer");
    }
296 297
    qemu_fdt_setprop(vms->fdt, "/timer", "always-on", NULL, 0);
    qemu_fdt_setprop_cells(vms->fdt, "/timer", "interrupts",
298 299 300 301
                       GIC_FDT_IRQ_TYPE_PPI, ARCH_TIMER_S_EL1_IRQ, irqflags,
                       GIC_FDT_IRQ_TYPE_PPI, ARCH_TIMER_NS_EL1_IRQ, irqflags,
                       GIC_FDT_IRQ_TYPE_PPI, ARCH_TIMER_VIRT_IRQ, irqflags,
                       GIC_FDT_IRQ_TYPE_PPI, ARCH_TIMER_NS_EL2_IRQ, irqflags);
P
Peter Maydell 已提交
302 303
}

304
static void fdt_add_cpu_nodes(const VirtMachineState *vms)
P
Peter Maydell 已提交
305 306
{
    int cpu;
307
    int addr_cells = 1;
308
    const MachineState *ms = MACHINE(vms);
309 310 311 312 313 314 315 316 317 318 319 320 321 322

    /*
     * From Documentation/devicetree/bindings/arm/cpus.txt
     *  On ARM v8 64-bit systems value should be set to 2,
     *  that corresponds to the MPIDR_EL1 register size.
     *  If MPIDR_EL1[63:32] value is equal to 0 on all CPUs
     *  in the system, #address-cells can be set to 1, since
     *  MPIDR_EL1[63:32] bits are not used for CPUs
     *  identification.
     *
     *  Here we actually don't know whether our system is 32- or 64-bit one.
     *  The simplest way to go is to examine affinity IDs of all our CPUs. If
     *  at least one of them has Aff3 populated, we set #address-cells to 2.
     */
323
    for (cpu = 0; cpu < vms->smp_cpus; cpu++) {
324 325 326 327 328 329 330
        ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(cpu));

        if (armcpu->mp_affinity & ARM_AFF3_MASK) {
            addr_cells = 2;
            break;
        }
    }
P
Peter Maydell 已提交
331

332 333 334
    qemu_fdt_add_subnode(vms->fdt, "/cpus");
    qemu_fdt_setprop_cell(vms->fdt, "/cpus", "#address-cells", addr_cells);
    qemu_fdt_setprop_cell(vms->fdt, "/cpus", "#size-cells", 0x0);
P
Peter Maydell 已提交
335

336
    for (cpu = vms->smp_cpus - 1; cpu >= 0; cpu--) {
P
Peter Maydell 已提交
337 338
        char *nodename = g_strdup_printf("/cpus/cpu@%d", cpu);
        ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(cpu));
339
        CPUState *cs = CPU(armcpu);
P
Peter Maydell 已提交
340

341 342 343
        qemu_fdt_add_subnode(vms->fdt, nodename);
        qemu_fdt_setprop_string(vms->fdt, nodename, "device_type", "cpu");
        qemu_fdt_setprop_string(vms->fdt, nodename, "compatible",
P
Peter Maydell 已提交
344 345
                                    armcpu->dtb_compatible);

346 347
        if (vms->psci_conduit != QEMU_PSCI_CONDUIT_DISABLED
            && vms->smp_cpus > 1) {
348
            qemu_fdt_setprop_string(vms->fdt, nodename,
P
Peter Maydell 已提交
349 350 351
                                        "enable-method", "psci");
        }

352
        if (addr_cells == 2) {
353
            qemu_fdt_setprop_u64(vms->fdt, nodename, "reg",
354 355
                                 armcpu->mp_affinity);
        } else {
356
            qemu_fdt_setprop_cell(vms->fdt, nodename, "reg",
357 358 359
                                  armcpu->mp_affinity);
        }

360 361 362
        if (ms->possible_cpus->cpus[cs->cpu_index].props.has_node_id) {
            qemu_fdt_setprop_cell(vms->fdt, nodename, "numa-node-id",
                ms->possible_cpus->cpus[cs->cpu_index].props.node_id);
363 364
        }

P
Peter Maydell 已提交
365 366 367 368
        g_free(nodename);
    }
}

369
static void fdt_add_its_gic_node(VirtMachineState *vms)
P
Pavel Fedin 已提交
370
{
371 372 373
    vms->msi_phandle = qemu_fdt_alloc_phandle(vms->fdt);
    qemu_fdt_add_subnode(vms->fdt, "/intc/its");
    qemu_fdt_setprop_string(vms->fdt, "/intc/its", "compatible",
P
Pavel Fedin 已提交
374
                            "arm,gic-v3-its");
375 376 377 378 379
    qemu_fdt_setprop(vms->fdt, "/intc/its", "msi-controller", NULL, 0);
    qemu_fdt_setprop_sized_cells(vms->fdt, "/intc/its", "reg",
                                 2, vms->memmap[VIRT_GIC_ITS].base,
                                 2, vms->memmap[VIRT_GIC_ITS].size);
    qemu_fdt_setprop_cell(vms->fdt, "/intc/its", "phandle", vms->msi_phandle);
P
Pavel Fedin 已提交
380 381
}

382
static void fdt_add_v2m_gic_node(VirtMachineState *vms)
P
Peter Maydell 已提交
383
{
384 385 386
    vms->msi_phandle = qemu_fdt_alloc_phandle(vms->fdt);
    qemu_fdt_add_subnode(vms->fdt, "/intc/v2m");
    qemu_fdt_setprop_string(vms->fdt, "/intc/v2m", "compatible",
387
                            "arm,gic-v2m-frame");
388 389 390 391 392
    qemu_fdt_setprop(vms->fdt, "/intc/v2m", "msi-controller", NULL, 0);
    qemu_fdt_setprop_sized_cells(vms->fdt, "/intc/v2m", "reg",
                                 2, vms->memmap[VIRT_GIC_V2M].base,
                                 2, vms->memmap[VIRT_GIC_V2M].size);
    qemu_fdt_setprop_cell(vms->fdt, "/intc/v2m", "phandle", vms->msi_phandle);
393
}
P
Peter Maydell 已提交
394

395
static void fdt_add_gic_node(VirtMachineState *vms)
396
{
397 398 399 400 401 402 403 404 405
    vms->gic_phandle = qemu_fdt_alloc_phandle(vms->fdt);
    qemu_fdt_setprop_cell(vms->fdt, "/", "interrupt-parent", vms->gic_phandle);

    qemu_fdt_add_subnode(vms->fdt, "/intc");
    qemu_fdt_setprop_cell(vms->fdt, "/intc", "#interrupt-cells", 3);
    qemu_fdt_setprop(vms->fdt, "/intc", "interrupt-controller", NULL, 0);
    qemu_fdt_setprop_cell(vms->fdt, "/intc", "#address-cells", 0x2);
    qemu_fdt_setprop_cell(vms->fdt, "/intc", "#size-cells", 0x2);
    qemu_fdt_setprop(vms->fdt, "/intc", "ranges", NULL, 0);
406
    if (vms->gic_version == 3) {
407
        qemu_fdt_setprop_string(vms->fdt, "/intc", "compatible",
408
                                "arm,gic-v3");
409 410 411 412 413
        qemu_fdt_setprop_sized_cells(vms->fdt, "/intc", "reg",
                                     2, vms->memmap[VIRT_GIC_DIST].base,
                                     2, vms->memmap[VIRT_GIC_DIST].size,
                                     2, vms->memmap[VIRT_GIC_REDIST].base,
                                     2, vms->memmap[VIRT_GIC_REDIST].size);
414 415 416 417 418
        if (vms->virt) {
            qemu_fdt_setprop_cells(vms->fdt, "/intc", "interrupts",
                                   GIC_FDT_IRQ_TYPE_PPI, ARCH_GICV3_MAINT_IRQ,
                                   GIC_FDT_IRQ_FLAGS_LEVEL_HI);
        }
419 420
    } else {
        /* 'cortex-a15-gic' means 'GIC v2' */
421
        qemu_fdt_setprop_string(vms->fdt, "/intc", "compatible",
422
                                "arm,cortex-a15-gic");
423 424 425 426 427
        qemu_fdt_setprop_sized_cells(vms->fdt, "/intc", "reg",
                                      2, vms->memmap[VIRT_GIC_DIST].base,
                                      2, vms->memmap[VIRT_GIC_DIST].size,
                                      2, vms->memmap[VIRT_GIC_CPU].base,
                                      2, vms->memmap[VIRT_GIC_CPU].size);
428 429
    }

430
    qemu_fdt_setprop_cell(vms->fdt, "/intc", "phandle", vms->gic_phandle);
P
Peter Maydell 已提交
431 432
}

433
static void fdt_add_pmu_nodes(const VirtMachineState *vms)
434 435 436 437 438 439 440
{
    CPUState *cpu;
    ARMCPU *armcpu;
    uint32_t irqflags = GIC_FDT_IRQ_FLAGS_LEVEL_HI;

    CPU_FOREACH(cpu) {
        armcpu = ARM_CPU(cpu);
441
        if (!arm_feature(&armcpu->env, ARM_FEATURE_PMU)) {
442 443
            return;
        }
444
        if (kvm_enabled()) {
445 446
            if (kvm_irqchip_in_kernel()) {
                kvm_arm_pmu_set_irq(cpu, PPI(VIRTUAL_PMU_IRQ));
447
            }
448
            kvm_arm_pmu_init(cpu);
449
        }
450 451
    }

452
    if (vms->gic_version == 2) {
453 454
        irqflags = deposit32(irqflags, GIC_FDT_IRQ_PPI_CPU_START,
                             GIC_FDT_IRQ_PPI_CPU_WIDTH,
455
                             (1 << vms->smp_cpus) - 1);
456 457 458
    }

    armcpu = ARM_CPU(qemu_get_cpu(0));
459
    qemu_fdt_add_subnode(vms->fdt, "/pmu");
460 461
    if (arm_feature(&armcpu->env, ARM_FEATURE_V8)) {
        const char compat[] = "arm,armv8-pmuv3";
462
        qemu_fdt_setprop(vms->fdt, "/pmu", "compatible",
463
                         compat, sizeof(compat));
464
        qemu_fdt_setprop_cells(vms->fdt, "/pmu", "interrupts",
465 466 467 468
                               GIC_FDT_IRQ_TYPE_PPI, VIRTUAL_PMU_IRQ, irqflags);
    }
}

469
static void create_its(VirtMachineState *vms, DeviceState *gicdev)
P
Pavel Fedin 已提交
470 471 472 473 474 475 476 477 478 479 480 481 482 483
{
    const char *itsclass = its_class_name();
    DeviceState *dev;

    if (!itsclass) {
        /* Do nothing if not supported */
        return;
    }

    dev = qdev_create(NULL, itsclass);

    object_property_set_link(OBJECT(dev), OBJECT(gicdev), "parent-gicv3",
                             &error_abort);
    qdev_init_nofail(dev);
484
    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, vms->memmap[VIRT_GIC_ITS].base);
P
Pavel Fedin 已提交
485

486
    fdt_add_its_gic_node(vms);
P
Pavel Fedin 已提交
487 488
}

489
static void create_v2m(VirtMachineState *vms, qemu_irq *pic)
490 491
{
    int i;
492
    int irq = vms->irqmap[VIRT_GIC_V2M];
493 494 495
    DeviceState *dev;

    dev = qdev_create(NULL, "arm-gicv2m");
496
    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, vms->memmap[VIRT_GIC_V2M].base);
497 498 499 500 501 502 503 504
    qdev_prop_set_uint32(dev, "base-spi", irq);
    qdev_prop_set_uint32(dev, "num-spi", NUM_GICV2M_SPIS);
    qdev_init_nofail(dev);

    for (i = 0; i < NUM_GICV2M_SPIS; i++) {
        sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, pic[irq + i]);
    }

505
    fdt_add_v2m_gic_node(vms);
506 507
}

508
static void create_gic(VirtMachineState *vms, qemu_irq *pic)
509
{
510
    /* We create a standalone GIC */
511 512
    DeviceState *gicdev;
    SysBusDevice *gicbusdev;
513
    const char *gictype;
514
    int type = vms->gic_version, i;
515

516
    gictype = (type == 3) ? gicv3_class_name() : gic_class_name();
517 518

    gicdev = qdev_create(NULL, gictype);
519
    qdev_prop_set_uint32(gicdev, "revision", type);
520 521 522 523 524
    qdev_prop_set_uint32(gicdev, "num-cpu", smp_cpus);
    /* Note that the num-irq property counts both internal and external
     * interrupts; there are always 32 of the former (mandated by GIC spec).
     */
    qdev_prop_set_uint32(gicdev, "num-irq", NUM_IRQS + 32);
525
    if (!kvm_irqchip_in_kernel()) {
526
        qdev_prop_set_bit(gicdev, "has-security-extensions", vms->secure);
527
    }
528 529
    qdev_init_nofail(gicdev);
    gicbusdev = SYS_BUS_DEVICE(gicdev);
530
    sysbus_mmio_map(gicbusdev, 0, vms->memmap[VIRT_GIC_DIST].base);
531
    if (type == 3) {
532
        sysbus_mmio_map(gicbusdev, 1, vms->memmap[VIRT_GIC_REDIST].base);
533
    } else {
534
        sysbus_mmio_map(gicbusdev, 1, vms->memmap[VIRT_GIC_CPU].base);
535
    }
536

537 538 539
    /* Wire the outputs from each CPU's generic timer and the GICv3
     * maintenance interrupt signal to the appropriate GIC PPI inputs,
     * and the GIC's IRQ/FIQ/VIRQ/VFIQ interrupt outputs to the CPU's inputs.
540 541 542
     */
    for (i = 0; i < smp_cpus; i++) {
        DeviceState *cpudev = DEVICE(qemu_get_cpu(i));
543
        int ppibase = NUM_IRQS + i * GIC_INTERNAL + GIC_NR_SGIS;
544 545 546
        int irq;
        /* Mapping from the output timer irq lines from the CPU to the
         * GIC PPI inputs we use for the virt board.
547
         */
548 549 550 551 552 553 554 555 556 557 558 559
        const int timer_irq[] = {
            [GTIMER_PHYS] = ARCH_TIMER_NS_EL1_IRQ,
            [GTIMER_VIRT] = ARCH_TIMER_VIRT_IRQ,
            [GTIMER_HYP]  = ARCH_TIMER_NS_EL2_IRQ,
            [GTIMER_SEC]  = ARCH_TIMER_S_EL1_IRQ,
        };

        for (irq = 0; irq < ARRAY_SIZE(timer_irq); irq++) {
            qdev_connect_gpio_out(cpudev, irq,
                                  qdev_get_gpio_in(gicdev,
                                                   ppibase + timer_irq[irq]));
        }
560

561 562 563
        qdev_connect_gpio_out_named(cpudev, "gicv3-maintenance-interrupt", 0,
                                    qdev_get_gpio_in(gicdev, ppibase
                                                     + ARCH_GICV3_MAINT_IRQ));
564 565 566
        qdev_connect_gpio_out_named(cpudev, "pmu-interrupt", 0,
                                    qdev_get_gpio_in(gicdev, ppibase
                                                     + VIRTUAL_PMU_IRQ));
567

568
        sysbus_connect_irq(gicbusdev, i, qdev_get_gpio_in(cpudev, ARM_CPU_IRQ));
569 570
        sysbus_connect_irq(gicbusdev, i + smp_cpus,
                           qdev_get_gpio_in(cpudev, ARM_CPU_FIQ));
571 572 573 574
        sysbus_connect_irq(gicbusdev, i + 2 * smp_cpus,
                           qdev_get_gpio_in(cpudev, ARM_CPU_VIRQ));
        sysbus_connect_irq(gicbusdev, i + 3 * smp_cpus,
                           qdev_get_gpio_in(cpudev, ARM_CPU_VFIQ));
575 576 577 578 579 580
    }

    for (i = 0; i < NUM_IRQS; i++) {
        pic[i] = qdev_get_gpio_in(gicdev, i);
    }

581
    fdt_add_gic_node(vms);
582

583
    if (type == 3 && vms->its) {
584
        create_its(vms, gicdev);
585
    } else if (type == 2) {
586
        create_v2m(vms, pic);
587
    }
588 589
}

590
static void create_uart(const VirtMachineState *vms, qemu_irq *pic, int uart,
591
                        MemoryRegion *mem, Chardev *chr)
P
Peter Maydell 已提交
592 593
{
    char *nodename;
594 595 596
    hwaddr base = vms->memmap[uart].base;
    hwaddr size = vms->memmap[uart].size;
    int irq = vms->irqmap[uart];
P
Peter Maydell 已提交
597 598
    const char compat[] = "arm,pl011\0arm,primecell";
    const char clocknames[] = "uartclk\0apb_pclk";
599 600
    DeviceState *dev = qdev_create(NULL, "pl011");
    SysBusDevice *s = SYS_BUS_DEVICE(dev);
P
Peter Maydell 已提交
601

X
xiaoqiang zhao 已提交
602
    qdev_prop_set_chr(dev, "chardev", chr);
603 604 605 606
    qdev_init_nofail(dev);
    memory_region_add_subregion(mem, base,
                                sysbus_mmio_get_region(s, 0));
    sysbus_connect_irq(s, 0, pic[irq]);
P
Peter Maydell 已提交
607 608

    nodename = g_strdup_printf("/pl011@%" PRIx64, base);
609
    qemu_fdt_add_subnode(vms->fdt, nodename);
P
Peter Maydell 已提交
610
    /* Note that we can't use setprop_string because of the embedded NUL */
611
    qemu_fdt_setprop(vms->fdt, nodename, "compatible",
P
Peter Maydell 已提交
612
                         compat, sizeof(compat));
613
    qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
P
Peter Maydell 已提交
614
                                     2, base, 2, size);
615
    qemu_fdt_setprop_cells(vms->fdt, nodename, "interrupts",
P
Peter Maydell 已提交
616
                               GIC_FDT_IRQ_TYPE_SPI, irq,
617
                               GIC_FDT_IRQ_FLAGS_LEVEL_HI);
618 619 620
    qemu_fdt_setprop_cells(vms->fdt, nodename, "clocks",
                               vms->clock_phandle, vms->clock_phandle);
    qemu_fdt_setprop(vms->fdt, nodename, "clock-names",
P
Peter Maydell 已提交
621
                         clocknames, sizeof(clocknames));
622

623
    if (uart == VIRT_UART) {
624
        qemu_fdt_setprop_string(vms->fdt, "/chosen", "stdout-path", nodename);
625 626
    } else {
        /* Mark as not usable by the normal world */
627 628
        qemu_fdt_setprop_string(vms->fdt, nodename, "status", "disabled");
        qemu_fdt_setprop_string(vms->fdt, nodename, "secure-status", "okay");
629 630
    }

P
Peter Maydell 已提交
631 632 633
    g_free(nodename);
}

634
static void create_rtc(const VirtMachineState *vms, qemu_irq *pic)
P
Peter Maydell 已提交
635 636
{
    char *nodename;
637 638 639
    hwaddr base = vms->memmap[VIRT_RTC].base;
    hwaddr size = vms->memmap[VIRT_RTC].size;
    int irq = vms->irqmap[VIRT_RTC];
P
Peter Maydell 已提交
640 641 642 643 644
    const char compat[] = "arm,pl031\0arm,primecell";

    sysbus_create_simple("pl031", base, pic[irq]);

    nodename = g_strdup_printf("/pl031@%" PRIx64, base);
645 646 647
    qemu_fdt_add_subnode(vms->fdt, nodename);
    qemu_fdt_setprop(vms->fdt, nodename, "compatible", compat, sizeof(compat));
    qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
P
Peter Maydell 已提交
648
                                 2, base, 2, size);
649
    qemu_fdt_setprop_cells(vms->fdt, nodename, "interrupts",
P
Peter Maydell 已提交
650
                           GIC_FDT_IRQ_TYPE_SPI, irq,
651
                           GIC_FDT_IRQ_FLAGS_LEVEL_HI);
652 653
    qemu_fdt_setprop_cell(vms->fdt, nodename, "clocks", vms->clock_phandle);
    qemu_fdt_setprop_string(vms->fdt, nodename, "clock-names", "apb_pclk");
P
Peter Maydell 已提交
654 655 656
    g_free(nodename);
}

657
static DeviceState *gpio_key_dev;
658 659 660
static void virt_powerdown_req(Notifier *n, void *opaque)
{
    /* use gpio Pin 3 for power button event */
661
    qemu_set_irq(qdev_get_gpio_in(gpio_key_dev, 0), 1);
662 663 664 665 666 667
}

static Notifier virt_system_powerdown_notifier = {
    .notify = virt_powerdown_req
};

668
static void create_gpio(const VirtMachineState *vms, qemu_irq *pic)
S
Shannon Zhao 已提交
669 670
{
    char *nodename;
671
    DeviceState *pl061_dev;
672 673 674
    hwaddr base = vms->memmap[VIRT_GPIO].base;
    hwaddr size = vms->memmap[VIRT_GPIO].size;
    int irq = vms->irqmap[VIRT_GPIO];
S
Shannon Zhao 已提交
675 676
    const char compat[] = "arm,pl061\0arm,primecell";

677
    pl061_dev = sysbus_create_simple("pl061", base, pic[irq]);
S
Shannon Zhao 已提交
678

679
    uint32_t phandle = qemu_fdt_alloc_phandle(vms->fdt);
S
Shannon Zhao 已提交
680
    nodename = g_strdup_printf("/pl061@%" PRIx64, base);
681 682
    qemu_fdt_add_subnode(vms->fdt, nodename);
    qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
S
Shannon Zhao 已提交
683
                                 2, base, 2, size);
684 685 686 687
    qemu_fdt_setprop(vms->fdt, nodename, "compatible", compat, sizeof(compat));
    qemu_fdt_setprop_cell(vms->fdt, nodename, "#gpio-cells", 2);
    qemu_fdt_setprop(vms->fdt, nodename, "gpio-controller", NULL, 0);
    qemu_fdt_setprop_cells(vms->fdt, nodename, "interrupts",
S
Shannon Zhao 已提交
688 689
                           GIC_FDT_IRQ_TYPE_SPI, irq,
                           GIC_FDT_IRQ_FLAGS_LEVEL_HI);
690 691 692
    qemu_fdt_setprop_cell(vms->fdt, nodename, "clocks", vms->clock_phandle);
    qemu_fdt_setprop_string(vms->fdt, nodename, "clock-names", "apb_pclk");
    qemu_fdt_setprop_cell(vms->fdt, nodename, "phandle", phandle);
693

694 695
    gpio_key_dev = sysbus_create_simple("gpio-key", -1,
                                        qdev_get_gpio_in(pl061_dev, 3));
696 697 698 699
    qemu_fdt_add_subnode(vms->fdt, "/gpio-keys");
    qemu_fdt_setprop_string(vms->fdt, "/gpio-keys", "compatible", "gpio-keys");
    qemu_fdt_setprop_cell(vms->fdt, "/gpio-keys", "#size-cells", 0);
    qemu_fdt_setprop_cell(vms->fdt, "/gpio-keys", "#address-cells", 1);
700

701 702
    qemu_fdt_add_subnode(vms->fdt, "/gpio-keys/poweroff");
    qemu_fdt_setprop_string(vms->fdt, "/gpio-keys/poweroff",
703
                            "label", "GPIO Key Poweroff");
704
    qemu_fdt_setprop_cell(vms->fdt, "/gpio-keys/poweroff", "linux,code",
705
                          KEY_POWER);
706
    qemu_fdt_setprop_cells(vms->fdt, "/gpio-keys/poweroff",
707
                           "gpios", phandle, 3, 0);
S
Shannon Zhao 已提交
708

709 710 711
    /* connect powerdown request */
    qemu_register_powerdown_notifier(&virt_system_powerdown_notifier);

S
Shannon Zhao 已提交
712 713 714
    g_free(nodename);
}

715
static void create_virtio_devices(const VirtMachineState *vms, qemu_irq *pic)
P
Peter Maydell 已提交
716 717
{
    int i;
718
    hwaddr size = vms->memmap[VIRT_MMIO].size;
P
Peter Maydell 已提交
719

720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745
    /* We create the transports in forwards order. Since qbus_realize()
     * prepends (not appends) new child buses, the incrementing loop below will
     * create a list of virtio-mmio buses with decreasing base addresses.
     *
     * When a -device option is processed from the command line,
     * qbus_find_recursive() picks the next free virtio-mmio bus in forwards
     * order. The upshot is that -device options in increasing command line
     * order are mapped to virtio-mmio buses with decreasing base addresses.
     *
     * When this code was originally written, that arrangement ensured that the
     * guest Linux kernel would give the lowest "name" (/dev/vda, eth0, etc) to
     * the first -device on the command line. (The end-to-end order is a
     * function of this loop, qbus_realize(), qbus_find_recursive(), and the
     * guest kernel's name-to-address assignment strategy.)
     *
     * Meanwhile, the kernel's traversal seems to have been reversed; see eg.
     * the message, if not necessarily the code, of commit 70161ff336.
     * Therefore the loop now establishes the inverse of the original intent.
     *
     * Unfortunately, we can't counteract the kernel change by reversing the
     * loop; it would break existing command lines.
     *
     * In any case, the kernel makes no guarantee about the stability of
     * enumeration order of virtio devices (as demonstrated by it changing
     * between kernel versions). For reliable and stable identification
     * of disks users must use UUIDs or similar mechanisms.
P
Peter Maydell 已提交
746 747
     */
    for (i = 0; i < NUM_VIRTIO_TRANSPORTS; i++) {
748 749
        int irq = vms->irqmap[VIRT_MMIO] + i;
        hwaddr base = vms->memmap[VIRT_MMIO].base + i * size;
P
Peter Maydell 已提交
750 751 752 753

        sysbus_create_simple("virtio-mmio", base, pic[irq]);
    }

754 755 756 757 758 759 760
    /* We add dtb nodes in reverse order so that they appear in the finished
     * device tree lowest address first.
     *
     * Note that this mapping is independent of the loop above. The previous
     * loop influences virtio device to virtio transport assignment, whereas
     * this loop controls how virtio transports are laid out in the dtb.
     */
P
Peter Maydell 已提交
761 762
    for (i = NUM_VIRTIO_TRANSPORTS - 1; i >= 0; i--) {
        char *nodename;
763 764
        int irq = vms->irqmap[VIRT_MMIO] + i;
        hwaddr base = vms->memmap[VIRT_MMIO].base + i * size;
P
Peter Maydell 已提交
765 766

        nodename = g_strdup_printf("/virtio_mmio@%" PRIx64, base);
767 768
        qemu_fdt_add_subnode(vms->fdt, nodename);
        qemu_fdt_setprop_string(vms->fdt, nodename,
769
                                "compatible", "virtio,mmio");
770
        qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
771
                                     2, base, 2, size);
772
        qemu_fdt_setprop_cells(vms->fdt, nodename, "interrupts",
773 774
                               GIC_FDT_IRQ_TYPE_SPI, irq,
                               GIC_FDT_IRQ_FLAGS_EDGE_LO_HI);
775
        qemu_fdt_setprop(vms->fdt, nodename, "dma-coherent", NULL, 0);
P
Peter Maydell 已提交
776 777 778 779
        g_free(nodename);
    }
}

780
static void create_one_flash(const char *name, hwaddr flashbase,
781 782
                             hwaddr flashsize, const char *file,
                             MemoryRegion *sysmem)
783 784 785 786 787 788
{
    /* Create and map a single flash device. We use the same
     * parameters as the flash devices on the Versatile Express board.
     */
    DriveInfo *dinfo = drive_get_next(IF_PFLASH);
    DeviceState *dev = qdev_create(NULL, "cfi.pflash01");
789
    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
790 791
    const uint64_t sectorlength = 256 * 1024;

792 793 794
    if (dinfo) {
        qdev_prop_set_drive(dev, "drive", blk_by_legacy_dinfo(dinfo),
                            &error_abort);
795 796 797 798 799 800
    }

    qdev_prop_set_uint32(dev, "num-blocks", flashsize / sectorlength);
    qdev_prop_set_uint64(dev, "sector-length", sectorlength);
    qdev_prop_set_uint8(dev, "width", 4);
    qdev_prop_set_uint8(dev, "device-width", 2);
801
    qdev_prop_set_bit(dev, "big-endian", false);
802 803 804 805 806 807 808
    qdev_prop_set_uint16(dev, "id0", 0x89);
    qdev_prop_set_uint16(dev, "id1", 0x18);
    qdev_prop_set_uint16(dev, "id2", 0x00);
    qdev_prop_set_uint16(dev, "id3", 0x00);
    qdev_prop_set_string(dev, "name", name);
    qdev_init_nofail(dev);

809 810
    memory_region_add_subregion(sysmem, flashbase,
                                sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0));
811

812
    if (file) {
G
Gonglei 已提交
813
        char *fn;
814
        int image_size;
815 816 817 818 819 820 821

        if (drive_get(IF_PFLASH, 0, 0)) {
            error_report("The contents of the first flash device may be "
                         "specified with -bios or with -drive if=pflash... "
                         "but you cannot use both options at once");
            exit(1);
        }
822
        fn = qemu_find_file(QEMU_FILE_TYPE_BIOS, file);
823
        if (!fn) {
824
            error_report("Could not find ROM image '%s'", file);
825 826
            exit(1);
        }
827
        image_size = load_image_mr(fn, sysbus_mmio_get_region(sbd, 0));
828 829
        g_free(fn);
        if (image_size < 0) {
830
            error_report("Could not load ROM image '%s'", file);
831 832 833
            exit(1);
        }
    }
834 835
}

836
static void create_flash(const VirtMachineState *vms,
837 838
                         MemoryRegion *sysmem,
                         MemoryRegion *secure_sysmem)
839 840 841
{
    /* Create two flash devices to fill the VIRT_FLASH space in the memmap.
     * Any file passed via -bios goes in the first of these.
842 843 844 845 846
     * sysmem is the system memory space. secure_sysmem is the secure view
     * of the system, and the first flash device should be made visible only
     * there. The second flash device is visible to both secure and nonsecure.
     * If sysmem == secure_sysmem this means there is no separate Secure
     * address space and both flash devices are generally visible.
847
     */
848 849
    hwaddr flashsize = vms->memmap[VIRT_FLASH].size / 2;
    hwaddr flashbase = vms->memmap[VIRT_FLASH].base;
850
    char *nodename;
851

852 853 854 855
    create_one_flash("virt.flash0", flashbase, flashsize,
                     bios_name, secure_sysmem);
    create_one_flash("virt.flash1", flashbase + flashsize, flashsize,
                     NULL, sysmem);
856

857 858 859
    if (sysmem == secure_sysmem) {
        /* Report both flash devices as a single node in the DT */
        nodename = g_strdup_printf("/flash@%" PRIx64, flashbase);
860 861 862
        qemu_fdt_add_subnode(vms->fdt, nodename);
        qemu_fdt_setprop_string(vms->fdt, nodename, "compatible", "cfi-flash");
        qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
863 864
                                     2, flashbase, 2, flashsize,
                                     2, flashbase + flashsize, 2, flashsize);
865
        qemu_fdt_setprop_cell(vms->fdt, nodename, "bank-width", 4);
866 867 868 869 870 871
        g_free(nodename);
    } else {
        /* Report the devices as separate nodes so we can mark one as
         * only visible to the secure world.
         */
        nodename = g_strdup_printf("/secflash@%" PRIx64, flashbase);
872 873 874
        qemu_fdt_add_subnode(vms->fdt, nodename);
        qemu_fdt_setprop_string(vms->fdt, nodename, "compatible", "cfi-flash");
        qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
875
                                     2, flashbase, 2, flashsize);
876 877 878
        qemu_fdt_setprop_cell(vms->fdt, nodename, "bank-width", 4);
        qemu_fdt_setprop_string(vms->fdt, nodename, "status", "disabled");
        qemu_fdt_setprop_string(vms->fdt, nodename, "secure-status", "okay");
879 880 881
        g_free(nodename);

        nodename = g_strdup_printf("/flash@%" PRIx64, flashbase);
882 883 884
        qemu_fdt_add_subnode(vms->fdt, nodename);
        qemu_fdt_setprop_string(vms->fdt, nodename, "compatible", "cfi-flash");
        qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
885
                                     2, flashbase + flashsize, 2, flashsize);
886
        qemu_fdt_setprop_cell(vms->fdt, nodename, "bank-width", 4);
887 888
        g_free(nodename);
    }
889 890
}

A
Andrew Jones 已提交
891
static FWCfgState *create_fw_cfg(const VirtMachineState *vms, AddressSpace *as)
L
Laszlo Ersek 已提交
892
{
893 894
    hwaddr base = vms->memmap[VIRT_FW_CFG].base;
    hwaddr size = vms->memmap[VIRT_FW_CFG].size;
895
    FWCfgState *fw_cfg;
L
Laszlo Ersek 已提交
896 897
    char *nodename;

898 899
    fw_cfg = fw_cfg_init_mem_wide(base + 8, base, 8, base + 16, as);
    fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, (uint16_t)smp_cpus);
L
Laszlo Ersek 已提交
900 901

    nodename = g_strdup_printf("/fw-cfg@%" PRIx64, base);
902 903
    qemu_fdt_add_subnode(vms->fdt, nodename);
    qemu_fdt_setprop_string(vms->fdt, nodename,
L
Laszlo Ersek 已提交
904
                            "compatible", "qemu,fw-cfg-mmio");
905
    qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
L
Laszlo Ersek 已提交
906
                                 2, base, 2, size);
907
    qemu_fdt_setprop(vms->fdt, nodename, "dma-coherent", NULL, 0);
L
Laszlo Ersek 已提交
908
    g_free(nodename);
A
Andrew Jones 已提交
909
    return fw_cfg;
L
Laszlo Ersek 已提交
910 911
}

912
static void create_pcie_irq_map(const VirtMachineState *vms,
913
                                uint32_t gic_phandle,
914 915 916
                                int first_irq, const char *nodename)
{
    int devfn, pin;
917
    uint32_t full_irq_map[4 * 4 * 10] = { 0 };
918 919 920 921 922 923 924 925 926 927 928 929
    uint32_t *irq_map = full_irq_map;

    for (devfn = 0; devfn <= 0x18; devfn += 0x8) {
        for (pin = 0; pin < 4; pin++) {
            int irq_type = GIC_FDT_IRQ_TYPE_SPI;
            int irq_nr = first_irq + ((pin + PCI_SLOT(devfn)) % PCI_NUM_PINS);
            int irq_level = GIC_FDT_IRQ_FLAGS_LEVEL_HI;
            int i;

            uint32_t map[] = {
                devfn << 8, 0, 0,                           /* devfn */
                pin + 1,                                    /* PCI pin */
930
                gic_phandle, 0, 0, irq_type, irq_nr, irq_level }; /* GIC irq */
931 932

            /* Convert map to big endian */
933
            for (i = 0; i < 10; i++) {
934 935
                irq_map[i] = cpu_to_be32(map[i]);
            }
936
            irq_map += 10;
937 938 939
        }
    }

940
    qemu_fdt_setprop(vms->fdt, nodename, "interrupt-map",
941 942
                     full_irq_map, sizeof(full_irq_map));

943
    qemu_fdt_setprop_cells(vms->fdt, nodename, "interrupt-map-mask",
944 945 946 947
                           0x1800, 0, 0, /* devfn (PCI_SLOT(3)) */
                           0x7           /* PCI irq */);
}

948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998
static void create_smmu(const VirtMachineState *vms, qemu_irq *pic,
                        PCIBus *bus)
{
    char *node;
    const char compat[] = "arm,smmu-v3";
    int irq =  vms->irqmap[VIRT_SMMU];
    int i;
    hwaddr base = vms->memmap[VIRT_SMMU].base;
    hwaddr size = vms->memmap[VIRT_SMMU].size;
    const char irq_names[] = "eventq\0priq\0cmdq-sync\0gerror";
    DeviceState *dev;

    if (vms->iommu != VIRT_IOMMU_SMMUV3 || !vms->iommu_phandle) {
        return;
    }

    dev = qdev_create(NULL, "arm-smmuv3");

    object_property_set_link(OBJECT(dev), OBJECT(bus), "primary-bus",
                             &error_abort);
    qdev_init_nofail(dev);
    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base);
    for (i = 0; i < NUM_SMMU_IRQS; i++) {
        sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, pic[irq + i]);
    }

    node = g_strdup_printf("/smmuv3@%" PRIx64, base);
    qemu_fdt_add_subnode(vms->fdt, node);
    qemu_fdt_setprop(vms->fdt, node, "compatible", compat, sizeof(compat));
    qemu_fdt_setprop_sized_cells(vms->fdt, node, "reg", 2, base, 2, size);

    qemu_fdt_setprop_cells(vms->fdt, node, "interrupts",
            GIC_FDT_IRQ_TYPE_SPI, irq    , GIC_FDT_IRQ_FLAGS_EDGE_LO_HI,
            GIC_FDT_IRQ_TYPE_SPI, irq + 1, GIC_FDT_IRQ_FLAGS_EDGE_LO_HI,
            GIC_FDT_IRQ_TYPE_SPI, irq + 2, GIC_FDT_IRQ_FLAGS_EDGE_LO_HI,
            GIC_FDT_IRQ_TYPE_SPI, irq + 3, GIC_FDT_IRQ_FLAGS_EDGE_LO_HI);

    qemu_fdt_setprop(vms->fdt, node, "interrupt-names", irq_names,
                     sizeof(irq_names));

    qemu_fdt_setprop_cell(vms->fdt, node, "clocks", vms->clock_phandle);
    qemu_fdt_setprop_string(vms->fdt, node, "clock-names", "apb_pclk");
    qemu_fdt_setprop(vms->fdt, node, "dma-coherent", NULL, 0);

    qemu_fdt_setprop_cell(vms->fdt, node, "#iommu-cells", 1);

    qemu_fdt_setprop_cell(vms->fdt, node, "phandle", vms->iommu_phandle);
    g_free(node);
}

static void create_pcie(VirtMachineState *vms, qemu_irq *pic)
999
{
1000 1001 1002 1003 1004 1005 1006 1007
    hwaddr base_mmio = vms->memmap[VIRT_PCIE_MMIO].base;
    hwaddr size_mmio = vms->memmap[VIRT_PCIE_MMIO].size;
    hwaddr base_mmio_high = vms->memmap[VIRT_PCIE_MMIO_HIGH].base;
    hwaddr size_mmio_high = vms->memmap[VIRT_PCIE_MMIO_HIGH].size;
    hwaddr base_pio = vms->memmap[VIRT_PCIE_PIO].base;
    hwaddr size_pio = vms->memmap[VIRT_PCIE_PIO].size;
    hwaddr base_ecam = vms->memmap[VIRT_PCIE_ECAM].base;
    hwaddr size_ecam = vms->memmap[VIRT_PCIE_ECAM].size;
1008 1009
    hwaddr base = base_mmio;
    int nr_pcie_buses = size_ecam / PCIE_MMCFG_SIZE_MIN;
1010
    int irq = vms->irqmap[VIRT_PCIE];
1011 1012 1013 1014 1015 1016 1017
    MemoryRegion *mmio_alias;
    MemoryRegion *mmio_reg;
    MemoryRegion *ecam_alias;
    MemoryRegion *ecam_reg;
    DeviceState *dev;
    char *nodename;
    int i;
1018
    PCIHostState *pci;
1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040

    dev = qdev_create(NULL, TYPE_GPEX_HOST);
    qdev_init_nofail(dev);

    /* Map only the first size_ecam bytes of ECAM space */
    ecam_alias = g_new0(MemoryRegion, 1);
    ecam_reg = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0);
    memory_region_init_alias(ecam_alias, OBJECT(dev), "pcie-ecam",
                             ecam_reg, 0, size_ecam);
    memory_region_add_subregion(get_system_memory(), base_ecam, ecam_alias);

    /* Map the MMIO window into system address space so as to expose
     * the section of PCI MMIO space which starts at the same base address
     * (ie 1:1 mapping for that part of PCI MMIO space visible through
     * the window).
     */
    mmio_alias = g_new0(MemoryRegion, 1);
    mmio_reg = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 1);
    memory_region_init_alias(mmio_alias, OBJECT(dev), "pcie-mmio",
                             mmio_reg, base_mmio, size_mmio);
    memory_region_add_subregion(get_system_memory(), base_mmio, mmio_alias);

1041
    if (vms->highmem) {
1042 1043 1044 1045 1046 1047 1048 1049 1050
        /* Map high MMIO space */
        MemoryRegion *high_mmio_alias = g_new0(MemoryRegion, 1);

        memory_region_init_alias(high_mmio_alias, OBJECT(dev), "pcie-mmio-high",
                                 mmio_reg, base_mmio_high, size_mmio_high);
        memory_region_add_subregion(get_system_memory(), base_mmio_high,
                                    high_mmio_alias);
    }

1051
    /* Map IO port space */
1052
    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 2, base_pio);
1053 1054 1055

    for (i = 0; i < GPEX_NUM_IRQS; i++) {
        sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, pic[irq + i]);
1056
        gpex_set_irq_num(GPEX_HOST(dev), i, irq + i);
1057 1058
    }

1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071
    pci = PCI_HOST_BRIDGE(dev);
    if (pci->bus) {
        for (i = 0; i < nb_nics; i++) {
            NICInfo *nd = &nd_table[i];

            if (!nd->model) {
                nd->model = g_strdup("virtio");
            }

            pci_nic_init_nofail(nd, pci->bus, nd->model, NULL);
        }
    }

1072
    nodename = g_strdup_printf("/pcie@%" PRIx64, base);
1073 1074
    qemu_fdt_add_subnode(vms->fdt, nodename);
    qemu_fdt_setprop_string(vms->fdt, nodename,
1075
                            "compatible", "pci-host-ecam-generic");
1076 1077 1078
    qemu_fdt_setprop_string(vms->fdt, nodename, "device_type", "pci");
    qemu_fdt_setprop_cell(vms->fdt, nodename, "#address-cells", 3);
    qemu_fdt_setprop_cell(vms->fdt, nodename, "#size-cells", 2);
1079
    qemu_fdt_setprop_cell(vms->fdt, nodename, "linux,pci-domain", 0);
1080
    qemu_fdt_setprop_cells(vms->fdt, nodename, "bus-range", 0,
1081
                           nr_pcie_buses - 1);
1082
    qemu_fdt_setprop(vms->fdt, nodename, "dma-coherent", NULL, 0);
1083

1084 1085 1086
    if (vms->msi_phandle) {
        qemu_fdt_setprop_cells(vms->fdt, nodename, "msi-parent",
                               vms->msi_phandle);
1087
    }
1088

1089
    qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
1090
                                 2, base_ecam, 2, size_ecam);
1091

1092
    if (vms->highmem) {
1093
        qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "ranges",
1094 1095 1096 1097 1098 1099 1100 1101
                                     1, FDT_PCI_RANGE_IOPORT, 2, 0,
                                     2, base_pio, 2, size_pio,
                                     1, FDT_PCI_RANGE_MMIO, 2, base_mmio,
                                     2, base_mmio, 2, size_mmio,
                                     1, FDT_PCI_RANGE_MMIO_64BIT,
                                     2, base_mmio_high,
                                     2, base_mmio_high, 2, size_mmio_high);
    } else {
1102
        qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "ranges",
1103 1104 1105 1106 1107
                                     1, FDT_PCI_RANGE_IOPORT, 2, 0,
                                     2, base_pio, 2, size_pio,
                                     1, FDT_PCI_RANGE_MMIO, 2, base_mmio,
                                     2, base_mmio, 2, size_mmio);
    }
1108

1109 1110
    qemu_fdt_setprop_cell(vms->fdt, nodename, "#interrupt-cells", 1);
    create_pcie_irq_map(vms, vms->gic_phandle, irq, nodename);
1111

1112 1113 1114 1115 1116 1117 1118 1119 1120
    if (vms->iommu) {
        vms->iommu_phandle = qemu_fdt_alloc_phandle(vms->fdt);

        create_smmu(vms, pic, pci->bus);

        qemu_fdt_setprop_cells(vms->fdt, nodename, "iommu-map",
                               0x0, vms->iommu_phandle, 0x0, 0x10000);
    }

1121 1122 1123
    g_free(nodename);
}

1124
static void create_platform_bus(VirtMachineState *vms, qemu_irq *pic)
1125 1126 1127 1128 1129 1130 1131
{
    DeviceState *dev;
    SysBusDevice *s;
    int i;
    ARMPlatformBusFDTParams *fdt_params = g_new(ARMPlatformBusFDTParams, 1);
    MemoryRegion *sysmem = get_system_memory();

1132 1133 1134
    platform_bus_params.platform_bus_base = vms->memmap[VIRT_PLATFORM_BUS].base;
    platform_bus_params.platform_bus_size = vms->memmap[VIRT_PLATFORM_BUS].size;
    platform_bus_params.platform_bus_first_irq = vms->irqmap[VIRT_PLATFORM_BUS];
1135 1136 1137
    platform_bus_params.platform_bus_num_irqs = PLATFORM_BUS_NUM_IRQS;

    fdt_params->system_params = &platform_bus_params;
1138
    fdt_params->binfo = &vms->bootinfo;
1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164
    fdt_params->intc = "/intc";
    /*
     * register a machine init done notifier that creates the device tree
     * nodes of the platform bus and its children dynamic sysbus devices
     */
    arm_register_platform_bus_fdt_creator(fdt_params);

    dev = qdev_create(NULL, TYPE_PLATFORM_BUS_DEVICE);
    dev->id = TYPE_PLATFORM_BUS_DEVICE;
    qdev_prop_set_uint32(dev, "num_irqs",
        platform_bus_params.platform_bus_num_irqs);
    qdev_prop_set_uint32(dev, "mmio_size",
        platform_bus_params.platform_bus_size);
    qdev_init_nofail(dev);
    s = SYS_BUS_DEVICE(dev);

    for (i = 0; i < platform_bus_params.platform_bus_num_irqs; i++) {
        int irqn = platform_bus_params.platform_bus_first_irq + i;
        sysbus_connect_irq(s, i, pic[irqn]);
    }

    memory_region_add_subregion(sysmem,
                                platform_bus_params.platform_bus_base,
                                sysbus_mmio_get_region(s, 0));
}

1165
static void create_secure_ram(VirtMachineState *vms,
1166
                              MemoryRegion *secure_sysmem)
1167 1168 1169
{
    MemoryRegion *secram = g_new(MemoryRegion, 1);
    char *nodename;
1170 1171
    hwaddr base = vms->memmap[VIRT_SECURE_MEM].base;
    hwaddr size = vms->memmap[VIRT_SECURE_MEM].size;
1172

1173 1174
    memory_region_init_ram(secram, NULL, "virt.secure-ram", size,
                           &error_fatal);
1175 1176 1177
    memory_region_add_subregion(secure_sysmem, base, secram);

    nodename = g_strdup_printf("/secram@%" PRIx64, base);
1178 1179 1180 1181 1182
    qemu_fdt_add_subnode(vms->fdt, nodename);
    qemu_fdt_setprop_string(vms->fdt, nodename, "device_type", "memory");
    qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg", 2, base, 2, size);
    qemu_fdt_setprop_string(vms->fdt, nodename, "status", "disabled");
    qemu_fdt_setprop_string(vms->fdt, nodename, "secure-status", "okay");
1183 1184 1185 1186

    g_free(nodename);
}

P
Peter Maydell 已提交
1187 1188
static void *machvirt_dtb(const struct arm_boot_info *binfo, int *fdt_size)
{
1189 1190
    const VirtMachineState *board = container_of(binfo, VirtMachineState,
                                                 bootinfo);
P
Peter Maydell 已提交
1191 1192 1193 1194 1195

    *fdt_size = board->fdt_size;
    return board->fdt;
}

1196
static void virt_build_smbios(VirtMachineState *vms)
1197
{
1198 1199
    MachineClass *mc = MACHINE_GET_CLASS(vms);
    VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
1200 1201
    uint8_t *smbios_tables, *smbios_anchor;
    size_t smbios_tables_len, smbios_anchor_len;
1202
    const char *product = "QEMU Virtual Machine";
1203

A
Andrew Jones 已提交
1204
    if (!vms->fw_cfg) {
1205 1206 1207
        return;
    }

1208 1209 1210 1211 1212
    if (kvm_enabled()) {
        product = "KVM Virtual Machine";
    }

    smbios_set_defaults("QEMU", product,
1213 1214
                        vmc->smbios_old_sys_ver ? "1.0" : mc->name, false,
                        true, SMBIOS_ENTRY_POINT_30);
1215 1216 1217 1218 1219

    smbios_get_tables(NULL, 0, &smbios_tables, &smbios_tables_len,
                      &smbios_anchor, &smbios_anchor_len);

    if (smbios_anchor) {
A
Andrew Jones 已提交
1220
        fw_cfg_add_file(vms->fw_cfg, "etc/smbios/smbios-tables",
1221
                        smbios_tables, smbios_tables_len);
A
Andrew Jones 已提交
1222
        fw_cfg_add_file(vms->fw_cfg, "etc/smbios/smbios-anchor",
1223 1224 1225 1226
                        smbios_anchor, smbios_anchor_len);
    }
}

1227
static
1228
void virt_machine_done(Notifier *notifier, void *data)
1229
{
1230 1231 1232
    VirtMachineState *vms = container_of(notifier, VirtMachineState,
                                         machine_done);

1233 1234
    virt_acpi_setup(vms);
    virt_build_smbios(vms);
1235 1236
}

1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259
static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx)
{
    uint8_t clustersz = ARM_DEFAULT_CPUS_PER_CLUSTER;
    VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);

    if (!vmc->disallow_affinity_adjustment) {
        /* Adjust MPIDR like 64-bit KVM hosts, which incorporate the
         * GIC's target-list limitations. 32-bit KVM hosts currently
         * always create clusters of 4 CPUs, but that is expected to
         * change when they gain support for gicv3. When KVM is enabled
         * it will override the changes we make here, therefore our
         * purposes are to make TCG consistent (with 64-bit KVM hosts)
         * and to improve SGI efficiency.
         */
        if (vms->gic_version == 3) {
            clustersz = GICV3_TARGETLIST_BITS;
        } else {
            clustersz = GIC_TARGETLIST_BITS;
        }
    }
    return arm_cpu_mp_affinity(idx, clustersz);
}

1260
static void machvirt_init(MachineState *machine)
P
Peter Maydell 已提交
1261
{
1262
    VirtMachineState *vms = VIRT_MACHINE(machine);
1263
    VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(machine);
1264 1265
    MachineClass *mc = MACHINE_GET_CLASS(machine);
    const CPUArchIdList *possible_cpus;
P
Peter Maydell 已提交
1266 1267
    qemu_irq pic[NUM_IRQS];
    MemoryRegion *sysmem = get_system_memory();
1268
    MemoryRegion *secure_sysmem = NULL;
A
Andrew Jones 已提交
1269
    int n, virt_max_cpus;
P
Peter Maydell 已提交
1270
    MemoryRegion *ram = g_new(MemoryRegion, 1);
1271
    bool firmware_loaded = bios_name || drive_get(IF_PFLASH, 0, 0);
P
Peter Maydell 已提交
1272

1273 1274 1275
    /* We can probe only here because during property set
     * KVM is not available yet
     */
1276 1277
    if (vms->gic_version <= 0) {
        /* "host" or "max" */
1278
        if (!kvm_enabled()) {
1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292
            if (vms->gic_version == 0) {
                error_report("gic-version=host requires KVM");
                exit(1);
            } else {
                /* "max": currently means 3 for TCG */
                vms->gic_version = 3;
            }
        } else {
            vms->gic_version = kvm_arm_vgic_probe();
            if (!vms->gic_version) {
                error_report(
                    "Unable to determine GIC version supported by host");
                exit(1);
            }
1293 1294 1295
        }
    }

1296 1297
    if (!cpu_type_valid(machine->cpu_type)) {
        error_report("mach-virt: CPU type %s not supported", machine->cpu_type);
P
Peter Maydell 已提交
1298 1299 1300
        exit(1);
    }

1301 1302 1303 1304 1305
    /* If we have an EL3 boot ROM then the assumption is that it will
     * implement PSCI itself, so disable QEMU's internal implementation
     * so it doesn't get in the way. Instead of starting secondary
     * CPUs in PSCI powerdown state we will start them all running and
     * let the boot ROM sort them out.
1306 1307 1308 1309
     * The usual case is that we do use QEMU's PSCI implementation;
     * if the guest has EL2 then we will use SMC as the conduit,
     * and otherwise we will use HVC (for backwards compatibility and
     * because if we're using KVM then we must use HVC).
1310
     */
1311 1312
    if (vms->secure && firmware_loaded) {
        vms->psci_conduit = QEMU_PSCI_CONDUIT_DISABLED;
1313 1314
    } else if (vms->virt) {
        vms->psci_conduit = QEMU_PSCI_CONDUIT_SMC;
1315 1316 1317
    } else {
        vms->psci_conduit = QEMU_PSCI_CONDUIT_HVC;
    }
1318

1319 1320 1321
    /* The maximum number of CPUs depends on the GIC version, or on how
     * many redistributors we can fit into the memory map.
     */
1322
    if (vms->gic_version == 3) {
1323
        virt_max_cpus = vms->memmap[VIRT_GIC_REDIST].size / 0x20000;
1324
    } else {
A
Andrew Jones 已提交
1325
        virt_max_cpus = GIC_NCPU;
1326 1327
    }

A
Andrew Jones 已提交
1328
    if (max_cpus > virt_max_cpus) {
1329 1330
        error_report("Number of SMP CPUs requested (%d) exceeds max CPUs "
                     "supported by machine 'mach-virt' (%d)",
A
Andrew Jones 已提交
1331
                     max_cpus, virt_max_cpus);
1332 1333 1334
        exit(1);
    }

1335
    vms->smp_cpus = smp_cpus;
P
Peter Maydell 已提交
1336

1337
    if (machine->ram_size > vms->memmap[VIRT_MEM].size) {
1338
        error_report("mach-virt: cannot model more than %dGB RAM", RAMLIMIT_GB);
P
Peter Maydell 已提交
1339 1340 1341
        exit(1);
    }

1342 1343 1344 1345 1346 1347
    if (vms->virt && kvm_enabled()) {
        error_report("mach-virt: KVM does not support providing "
                     "Virtualization extensions to the guest CPU");
        exit(1);
    }

1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364
    if (vms->secure) {
        if (kvm_enabled()) {
            error_report("mach-virt: KVM does not support Security extensions");
            exit(1);
        }

        /* The Secure view of the world is the same as the NonSecure,
         * but with a few extra devices. Create it as a container region
         * containing the system memory at low priority; any secure-only
         * devices go in at higher priority and take precedence.
         */
        secure_sysmem = g_new(MemoryRegion, 1);
        memory_region_init(secure_sysmem, OBJECT(machine), "secure-memory",
                           UINT64_MAX);
        memory_region_add_subregion_overlap(secure_sysmem, 0, sysmem, -1);
    }

1365
    create_fdt(vms);
P
Peter Maydell 已提交
1366

1367 1368 1369
    possible_cpus = mc->possible_cpu_arch_ids(machine);
    for (n = 0; n < possible_cpus->len; n++) {
        Object *cpuobj;
1370
        CPUState *cs;
1371

1372 1373 1374 1375
        if (n >= smp_cpus) {
            break;
        }

1376
        cpuobj = object_new(possible_cpus->cpus[n].type);
1377
        object_property_set_int(cpuobj, possible_cpus->cpus[n].arch_id,
1378
                                "mp-affinity", NULL);
1379

1380 1381 1382
        cs = CPU(cpuobj);
        cs->cpu_index = n;

1383 1384
        numa_cpu_pre_plug(&possible_cpus->cpus[cs->cpu_index], DEVICE(cpuobj),
                          &error_fatal);
1385

1386 1387 1388 1389
        if (!vms->secure) {
            object_property_set_bool(cpuobj, false, "has_el3", NULL);
        }

1390
        if (!vms->virt && object_property_find(cpuobj, "has_el2", NULL)) {
1391 1392 1393
            object_property_set_bool(cpuobj, false, "has_el2", NULL);
        }

1394 1395
        if (vms->psci_conduit != QEMU_PSCI_CONDUIT_DISABLED) {
            object_property_set_int(cpuobj, vms->psci_conduit,
1396
                                    "psci-conduit", NULL);
1397

1398 1399 1400 1401 1402
            /* Secondary CPUs start in PSCI powered-down state */
            if (n > 0) {
                object_property_set_bool(cpuobj, true,
                                         "start-powered-off", NULL);
            }
P
Peter Maydell 已提交
1403
        }
P
Peter Maydell 已提交
1404

1405 1406 1407 1408
        if (vmc->no_pmu && object_property_find(cpuobj, "pmu", NULL)) {
            object_property_set_bool(cpuobj, false, "pmu", NULL);
        }

P
Peter Maydell 已提交
1409
        if (object_property_find(cpuobj, "reset-cbar", NULL)) {
1410
            object_property_set_int(cpuobj, vms->memmap[VIRT_CPUPERIPHS].base,
P
Peter Maydell 已提交
1411 1412 1413
                                    "reset-cbar", &error_abort);
        }

1414 1415
        object_property_set_link(cpuobj, OBJECT(sysmem), "memory",
                                 &error_abort);
1416 1417 1418 1419
        if (vms->secure) {
            object_property_set_link(cpuobj, OBJECT(secure_sysmem),
                                     "secure-memory", &error_abort);
        }
1420

1421
        object_property_set_bool(cpuobj, true, "realized", &error_fatal);
1422
        object_unref(cpuobj);
P
Peter Maydell 已提交
1423
    }
1424
    fdt_add_timer_nodes(vms);
1425
    fdt_add_cpu_nodes(vms);
P
Peter Maydell 已提交
1426

1427 1428
    memory_region_allocate_system_memory(ram, NULL, "mach-virt.ram",
                                         machine->ram_size);
1429
    memory_region_add_subregion(sysmem, vms->memmap[VIRT_MEM].base, ram);
P
Peter Maydell 已提交
1430

1431
    create_flash(vms, sysmem, secure_sysmem ? secure_sysmem : sysmem);
1432

1433
    create_gic(vms, pic);
P
Peter Maydell 已提交
1434

1435
    fdt_add_pmu_nodes(vms);
1436

1437
    create_uart(vms, pic, VIRT_UART, sysmem, serial_hd(0));
1438 1439

    if (vms->secure) {
1440
        create_secure_ram(vms, secure_sysmem);
1441
        create_uart(vms, pic, VIRT_SECURE_UART, secure_sysmem, serial_hd(1));
1442
    }
P
Peter Maydell 已提交
1443

1444
    create_rtc(vms, pic);
P
Peter Maydell 已提交
1445

1446
    create_pcie(vms, pic);
1447

1448
    create_gpio(vms, pic);
S
Shannon Zhao 已提交
1449

P
Peter Maydell 已提交
1450 1451 1452 1453
    /* Create mmio transports, so the user can create virtio backends
     * (which will be automatically plugged in to the transports). If
     * no backend is created the transport will just sit harmlessly idle.
     */
1454
    create_virtio_devices(vms, pic);
P
Peter Maydell 已提交
1455

A
Andrew Jones 已提交
1456 1457
    vms->fw_cfg = create_fw_cfg(vms, &address_space_memory);
    rom_set_fw(vms->fw_cfg);
1458

1459 1460
    vms->machine_done.notify = virt_machine_done;
    qemu_add_machine_init_done_notifier(&vms->machine_done);
L
Laszlo Ersek 已提交
1461

1462 1463 1464 1465 1466 1467 1468 1469 1470 1471
    vms->bootinfo.ram_size = machine->ram_size;
    vms->bootinfo.kernel_filename = machine->kernel_filename;
    vms->bootinfo.kernel_cmdline = machine->kernel_cmdline;
    vms->bootinfo.initrd_filename = machine->initrd_filename;
    vms->bootinfo.nb_cpus = smp_cpus;
    vms->bootinfo.board_id = -1;
    vms->bootinfo.loader_start = vms->memmap[VIRT_MEM].base;
    vms->bootinfo.get_dtb = machvirt_dtb;
    vms->bootinfo.firmware_loaded = firmware_loaded;
    arm_load_kernel(ARM_CPU(first_cpu), &vms->bootinfo);
1472 1473 1474 1475 1476 1477 1478

    /*
     * arm_load_kernel machine init done notifier registration must
     * happen before the platform_bus_create call. In this latter,
     * another notifier is registered which adds platform bus nodes.
     * Notifiers are executed in registration reverse order.
     */
1479
    create_platform_bus(vms, pic);
P
Peter Maydell 已提交
1480 1481
}

1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495
static bool virt_get_secure(Object *obj, Error **errp)
{
    VirtMachineState *vms = VIRT_MACHINE(obj);

    return vms->secure;
}

static void virt_set_secure(Object *obj, bool value, Error **errp)
{
    VirtMachineState *vms = VIRT_MACHINE(obj);

    vms->secure = value;
}

1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509
static bool virt_get_virt(Object *obj, Error **errp)
{
    VirtMachineState *vms = VIRT_MACHINE(obj);

    return vms->virt;
}

static void virt_set_virt(Object *obj, bool value, Error **errp)
{
    VirtMachineState *vms = VIRT_MACHINE(obj);

    vms->virt = value;
}

1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523
static bool virt_get_highmem(Object *obj, Error **errp)
{
    VirtMachineState *vms = VIRT_MACHINE(obj);

    return vms->highmem;
}

static void virt_set_highmem(Object *obj, bool value, Error **errp)
{
    VirtMachineState *vms = VIRT_MACHINE(obj);

    vms->highmem = value;
}

1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537
static bool virt_get_its(Object *obj, Error **errp)
{
    VirtMachineState *vms = VIRT_MACHINE(obj);

    return vms->its;
}

static void virt_set_its(Object *obj, bool value, Error **errp)
{
    VirtMachineState *vms = VIRT_MACHINE(obj);

    vms->its = value;
}

1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555
static char *virt_get_gic_version(Object *obj, Error **errp)
{
    VirtMachineState *vms = VIRT_MACHINE(obj);
    const char *val = vms->gic_version == 3 ? "3" : "2";

    return g_strdup(val);
}

static void virt_set_gic_version(Object *obj, const char *value, Error **errp)
{
    VirtMachineState *vms = VIRT_MACHINE(obj);

    if (!strcmp(value, "3")) {
        vms->gic_version = 3;
    } else if (!strcmp(value, "2")) {
        vms->gic_version = 2;
    } else if (!strcmp(value, "host")) {
        vms->gic_version = 0; /* Will probe later */
1556 1557
    } else if (!strcmp(value, "max")) {
        vms->gic_version = -1; /* Will probe later */
1558
    } else {
1559
        error_setg(errp, "Invalid gic-version value");
1560
        error_append_hint(errp, "Valid values are 3, 2, host, max.\n");
1561 1562 1563
    }
}

1564 1565 1566 1567 1568 1569 1570 1571 1572 1573
static CpuInstanceProperties
virt_cpu_index_to_props(MachineState *ms, unsigned cpu_index)
{
    MachineClass *mc = MACHINE_GET_CLASS(ms);
    const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms);

    assert(cpu_index < possible_cpus->len);
    return possible_cpus->cpus[cpu_index].props;
}

1574 1575 1576 1577 1578
static int64_t virt_get_default_cpu_node_id(const MachineState *ms, int idx)
{
    return idx % nb_numa_nodes;
}

1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592
static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms)
{
    int n;
    VirtMachineState *vms = VIRT_MACHINE(ms);

    if (ms->possible_cpus) {
        assert(ms->possible_cpus->len == max_cpus);
        return ms->possible_cpus;
    }

    ms->possible_cpus = g_malloc0(sizeof(CPUArchIdList) +
                                  sizeof(CPUArchId) * max_cpus);
    ms->possible_cpus->len = max_cpus;
    for (n = 0; n < ms->possible_cpus->len; n++) {
1593
        ms->possible_cpus->cpus[n].type = ms->cpu_type;
1594 1595 1596 1597 1598 1599 1600 1601
        ms->possible_cpus->cpus[n].arch_id =
            virt_cpu_mp_affinity(vms, n);
        ms->possible_cpus->cpus[n].props.has_thread_id = true;
        ms->possible_cpus->cpus[n].props.thread_id = n;
    }
    return ms->possible_cpus;
}

1602 1603
static void virt_machine_class_init(ObjectClass *oc, void *data)
{
1604 1605 1606 1607 1608 1609 1610
    MachineClass *mc = MACHINE_CLASS(oc);

    mc->init = machvirt_init;
    /* Start max_cpus at the maximum QEMU supports. We'll further restrict
     * it later in machvirt_init, where we have more information about the
     * configuration of the particular instance.
     */
1611
    mc->max_cpus = 255;
1612 1613
    machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_CALXEDA_XGMAC);
    machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_AMD_XGBE);
1614 1615 1616
    mc->block_default_type = IF_VIRTIO;
    mc->no_cdrom = 1;
    mc->pci_allow_0_address = true;
1617 1618
    /* We know we will never create a pre-ARMv7 CPU which needs 1K pages */
    mc->minimum_page_bits = 12;
1619
    mc->possible_cpu_arch_ids = virt_possible_cpu_arch_ids;
1620
    mc->cpu_index_to_instance_props = virt_cpu_index_to_props;
1621
    mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-a15");
1622
    mc->get_default_cpu_node_id = virt_get_default_cpu_node_id;
1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633
}

static const TypeInfo virt_machine_info = {
    .name          = TYPE_VIRT_MACHINE,
    .parent        = TYPE_MACHINE,
    .abstract      = true,
    .instance_size = sizeof(VirtMachineState),
    .class_size    = sizeof(VirtMachineClass),
    .class_init    = virt_machine_class_init,
};

1634 1635 1636 1637 1638 1639
static void machvirt_machine_init(void)
{
    type_register_static(&virt_machine_info);
}
type_init(machvirt_machine_init);

1640
static void virt_2_12_instance_init(Object *obj)
1641 1642
{
    VirtMachineState *vms = VIRT_MACHINE(obj);
1643
    VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
1644

1645 1646 1647 1648 1649
    /* EL3 is disabled by default on virt: this makes us consistent
     * between KVM and TCG for this board, and it also allows us to
     * boot UEFI blobs which assume no TrustZone support.
     */
    vms->secure = false;
1650 1651 1652 1653 1654 1655
    object_property_add_bool(obj, "secure", virt_get_secure,
                             virt_set_secure, NULL);
    object_property_set_description(obj, "secure",
                                    "Set on/off to enable/disable the ARM "
                                    "Security Extensions (TrustZone)",
                                    NULL);
1656

1657 1658 1659 1660 1661 1662 1663 1664 1665 1666
    /* EL2 is also disabled by default, for similar reasons */
    vms->virt = false;
    object_property_add_bool(obj, "virtualization", virt_get_virt,
                             virt_set_virt, NULL);
    object_property_set_description(obj, "virtualization",
                                    "Set on/off to enable/disable emulating a "
                                    "guest CPU which implements the ARM "
                                    "Virtualization Extensions",
                                    NULL);

1667 1668 1669 1670 1671 1672 1673 1674
    /* High memory is enabled by default */
    vms->highmem = true;
    object_property_add_bool(obj, "highmem", virt_get_highmem,
                             virt_set_highmem, NULL);
    object_property_set_description(obj, "highmem",
                                    "Set on/off to enable/disable using "
                                    "physical address space above 32 bits",
                                    NULL);
1675 1676 1677 1678 1679 1680 1681
    /* Default GIC type is v2 */
    vms->gic_version = 2;
    object_property_add_str(obj, "gic-version", virt_get_gic_version,
                        virt_set_gic_version, NULL);
    object_property_set_description(obj, "gic-version",
                                    "Set GIC version. "
                                    "Valid values are 2, 3 and host", NULL);
1682

1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695
    if (vmc->no_its) {
        vms->its = false;
    } else {
        /* Default allows ITS instantiation */
        vms->its = true;
        object_property_add_bool(obj, "its", virt_get_its,
                                 virt_set_its, NULL);
        object_property_set_description(obj, "its",
                                        "Set on/off to enable/disable "
                                        "ITS instantiation",
                                        NULL);
    }

1696 1697
    vms->memmap = a15memmap;
    vms->irqmap = a15irqmap;
1698 1699
}

1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712
static void virt_machine_2_12_options(MachineClass *mc)
{
}
DEFINE_VIRT_MACHINE_AS_LATEST(2, 12)

#define VIRT_COMPAT_2_11 \
    HW_COMPAT_2_11

static void virt_2_11_instance_init(Object *obj)
{
    virt_2_12_instance_init(obj);
}

E
Eric Auger 已提交
1713 1714
static void virt_machine_2_11_options(MachineClass *mc)
{
1715 1716
    VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc));

1717 1718
    virt_machine_2_12_options(mc);
    SET_MACHINE_COMPAT(mc, VIRT_COMPAT_2_11);
1719
    vmc->smbios_old_sys_ver = true;
E
Eric Auger 已提交
1720
}
1721
DEFINE_VIRT_MACHINE(2, 11)
E
Eric Auger 已提交
1722 1723 1724 1725 1726 1727 1728 1729 1730

#define VIRT_COMPAT_2_10 \
    HW_COMPAT_2_10

static void virt_2_10_instance_init(Object *obj)
{
    virt_2_11_instance_init(obj);
}

E
Eric Auger 已提交
1731 1732
static void virt_machine_2_10_options(MachineClass *mc)
{
E
Eric Auger 已提交
1733 1734
    virt_machine_2_11_options(mc);
    SET_MACHINE_COMPAT(mc, VIRT_COMPAT_2_10);
E
Eric Auger 已提交
1735
}
E
Eric Auger 已提交
1736
DEFINE_VIRT_MACHINE(2, 10)
E
Eric Auger 已提交
1737 1738 1739 1740 1741 1742 1743 1744 1745

#define VIRT_COMPAT_2_9 \
    HW_COMPAT_2_9

static void virt_2_9_instance_init(Object *obj)
{
    virt_2_10_instance_init(obj);
}

P
Peter Maydell 已提交
1746 1747
static void virt_machine_2_9_options(MachineClass *mc)
{
E
Eric Auger 已提交
1748 1749
    virt_machine_2_10_options(mc);
    SET_MACHINE_COMPAT(mc, VIRT_COMPAT_2_9);
P
Peter Maydell 已提交
1750
}
E
Eric Auger 已提交
1751
DEFINE_VIRT_MACHINE(2, 9)
P
Peter Maydell 已提交
1752 1753 1754 1755 1756 1757 1758 1759 1760

#define VIRT_COMPAT_2_8 \
    HW_COMPAT_2_8

static void virt_2_8_instance_init(Object *obj)
{
    virt_2_9_instance_init(obj);
}

A
Andrew Jones 已提交
1761 1762
static void virt_machine_2_8_options(MachineClass *mc)
{
1763 1764
    VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc));

P
Peter Maydell 已提交
1765 1766
    virt_machine_2_9_options(mc);
    SET_MACHINE_COMPAT(mc, VIRT_COMPAT_2_8);
1767 1768 1769 1770
    /* For 2.8 and earlier we falsely claimed in the DT that
     * our timers were edge-triggered, not level-triggered.
     */
    vmc->claim_edge_triggered_timers = true;
A
Andrew Jones 已提交
1771
}
P
Peter Maydell 已提交
1772
DEFINE_VIRT_MACHINE(2, 8)
A
Andrew Jones 已提交
1773 1774 1775 1776 1777 1778 1779 1780 1781

#define VIRT_COMPAT_2_7 \
    HW_COMPAT_2_7

static void virt_2_7_instance_init(Object *obj)
{
    virt_2_8_instance_init(obj);
}

1782 1783
static void virt_machine_2_7_options(MachineClass *mc)
{
1784 1785
    VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc));

A
Andrew Jones 已提交
1786 1787
    virt_machine_2_8_options(mc);
    SET_MACHINE_COMPAT(mc, VIRT_COMPAT_2_7);
1788 1789
    /* ITS was introduced with 2.8 */
    vmc->no_its = true;
1790 1791
    /* Stick with 1K pages for migration compatibility */
    mc->minimum_page_bits = 0;
1792
}
A
Andrew Jones 已提交
1793
DEFINE_VIRT_MACHINE(2, 7)
1794 1795 1796 1797 1798 1799 1800 1801 1802

#define VIRT_COMPAT_2_6 \
    HW_COMPAT_2_6

static void virt_2_6_instance_init(Object *obj)
{
    virt_2_7_instance_init(obj);
}

1803
static void virt_machine_2_6_options(MachineClass *mc)
1804
{
1805 1806
    VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc));

1807 1808
    virt_machine_2_7_options(mc);
    SET_MACHINE_COMPAT(mc, VIRT_COMPAT_2_6);
1809
    vmc->disallow_affinity_adjustment = true;
1810 1811
    /* Disable PMU for 2.6 as PMU support was first introduced in 2.7 */
    vmc->no_pmu = true;
1812
}
1813
DEFINE_VIRT_MACHINE(2, 6)