virt.c 69.0 KB
Newer Older
P
Peter Maydell 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
/*
 * ARM mach-virt emulation
 *
 * Copyright (c) 2013 Linaro Limited
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2 or later, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 * Emulate a virtual board which works by passing Linux all the information
 * it needs about what devices are present via the device tree.
 * There are some restrictions about what we can do here:
 *  + we can only present devices whose Linux drivers will work based
 *    purely on the device tree with no platform data at all
 *  + we want to present a very stripped-down minimalist platform,
 *    both because this reduces the security attack surface from the guest
 *    and also because it reduces our exposure to being broken when
 *    the kernel updates its device tree bindings and requires further
 *    information in a device binding that we aren't providing.
 * This is essentially the same approach kvmtool uses.
 */

P
Peter Maydell 已提交
31
#include "qemu/osdep.h"
32
#include "qapi/error.h"
P
Peter Maydell 已提交
33 34 35
#include "hw/sysbus.h"
#include "hw/arm/arm.h"
#include "hw/arm/primecell.h"
36
#include "hw/arm/virt.h"
37 38
#include "hw/vfio/vfio-calxeda-xgmac.h"
#include "hw/vfio/vfio-amd-xgbe.h"
39
#include "hw/display/ramfb.h"
P
Peter Maydell 已提交
40 41 42
#include "hw/devices.h"
#include "net/net.h"
#include "sysemu/device_tree.h"
43
#include "sysemu/numa.h"
P
Peter Maydell 已提交
44 45
#include "sysemu/sysemu.h"
#include "sysemu/kvm.h"
46
#include "hw/compat.h"
47
#include "hw/loader.h"
P
Peter Maydell 已提交
48 49 50
#include "exec/address-spaces.h"
#include "qemu/bitops.h"
#include "qemu/error-report.h"
51
#include "hw/pci-host/gpex.h"
52 53
#include "hw/arm/sysbus-fdt.h"
#include "hw/platform-bus.h"
54
#include "hw/arm/fdt.h"
55 56
#include "hw/intc/arm_gic.h"
#include "hw/intc/arm_gicv3_common.h"
57
#include "kvm_arm.h"
58
#include "hw/smbios/smbios.h"
59
#include "qapi/visitor.h"
60
#include "standard-headers/linux/input.h"
61
#include "hw/arm/smmuv3.h"
P
Peter Maydell 已提交
62

63
#define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \
64 65 66 67 68 69
    static void virt_##major##_##minor##_class_init(ObjectClass *oc, \
                                                    void *data) \
    { \
        MachineClass *mc = MACHINE_CLASS(oc); \
        virt_machine_##major##_##minor##_options(mc); \
        mc->desc = "QEMU " # major "." # minor " ARM Virtual Machine"; \
70 71 72
        if (latest) { \
            mc->alias = "virt"; \
        } \
73 74 75 76 77 78 79 80 81 82 83 84 85
    } \
    static const TypeInfo machvirt_##major##_##minor##_info = { \
        .name = MACHINE_TYPE_NAME("virt-" # major "." # minor), \
        .parent = TYPE_VIRT_MACHINE, \
        .instance_init = virt_##major##_##minor##_instance_init, \
        .class_init = virt_##major##_##minor##_class_init, \
    }; \
    static void machvirt_machine_##major##_##minor##_init(void) \
    { \
        type_register_static(&machvirt_##major##_##minor##_info); \
    } \
    type_init(machvirt_machine_##major##_##minor##_init);

86 87 88 89 90
#define DEFINE_VIRT_MACHINE_AS_LATEST(major, minor) \
    DEFINE_VIRT_MACHINE_LATEST(major, minor, true)
#define DEFINE_VIRT_MACHINE(major, minor) \
    DEFINE_VIRT_MACHINE_LATEST(major, minor, false)

91

92 93 94 95 96
/* Number of external interrupt lines to configure the GIC with */
#define NUM_IRQS 256

#define PLATFORM_BUS_NUM_IRQS 64

97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113
/* RAM limit in GB. Since VIRT_MEM starts at the 1GB mark, this means
 * RAM can go up to the 256GB mark, leaving 256GB of the physical
 * address space unallocated and free for future use between 256G and 512G.
 * If we need to provide more RAM to VMs in the future then we need to:
 *  * allocate a second bank of RAM starting at 2TB and working up
 *  * fix the DT and ACPI table generation code in QEMU to correctly
 *    report two split lumps of RAM to the guest
 *  * fix KVM in the host kernel to allow guests with >40 bit address spaces
 * (We don't want to fill all the way up to 512GB with RAM because
 * we might want it for non-RAM purposes later. Conversely it seems
 * reasonable to assume that anybody configuring a VM with a quarter
 * of a terabyte of RAM will be doing it on a host with more than a
 * terabyte of physical address space.)
 */
#define RAMLIMIT_GB 255
#define RAMLIMIT_BYTES (RAMLIMIT_GB * 1024ULL * 1024 * 1024)

P
Peter Maydell 已提交
114 115 116 117 118 119 120 121 122
/* Addresses and sizes of our components.
 * 0..128MB is space for a flash device so we can run bootrom code such as UEFI.
 * 128MB..256MB is used for miscellaneous device I/O.
 * 256MB..1GB is reserved for possible future PCI support (ie where the
 * PCI memory window will go if we add a PCI host controller).
 * 1GB and up is RAM (which may happily spill over into the
 * high memory region beyond 4GB).
 * This represents a compromise between how much RAM can be given to
 * a 32 bit VM and leaving space for expansion and in particular for PCI.
P
Peter Maydell 已提交
123 124
 * Note that devices should generally be placed at multiples of 0x10000,
 * to accommodate guests using 64K pages.
P
Peter Maydell 已提交
125 126 127
 */
static const MemMapEntry a15memmap[] = {
    /* Space up to 0x8000000 is reserved for a boot ROM */
128 129
    [VIRT_FLASH] =              {          0, 0x08000000 },
    [VIRT_CPUPERIPHS] =         { 0x08000000, 0x00020000 },
P
Peter Maydell 已提交
130
    /* GIC distributor and CPU interfaces sit inside the CPU peripheral space */
131 132 133
    [VIRT_GIC_DIST] =           { 0x08000000, 0x00010000 },
    [VIRT_GIC_CPU] =            { 0x08010000, 0x00010000 },
    [VIRT_GIC_V2M] =            { 0x08020000, 0x00001000 },
134 135 136 137
    /* The space in between here is reserved for GICv3 CPU/vCPU/HYP */
    [VIRT_GIC_ITS] =            { 0x08080000, 0x00020000 },
    /* This redistributor space allows up to 2*64kB*123 CPUs */
    [VIRT_GIC_REDIST] =         { 0x080A0000, 0x00F60000 },
138 139
    [VIRT_UART] =               { 0x09000000, 0x00001000 },
    [VIRT_RTC] =                { 0x09010000, 0x00001000 },
M
Marc Marí 已提交
140
    [VIRT_FW_CFG] =             { 0x09020000, 0x00000018 },
S
Shannon Zhao 已提交
141
    [VIRT_GPIO] =               { 0x09030000, 0x00001000 },
142
    [VIRT_SECURE_UART] =        { 0x09040000, 0x00001000 },
143
    [VIRT_SMMU] =               { 0x09050000, 0x00020000 },
144
    [VIRT_MMIO] =               { 0x0a000000, 0x00000200 },
P
Peter Maydell 已提交
145
    /* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */
146
    [VIRT_PLATFORM_BUS] =       { 0x0c000000, 0x02000000 },
147
    [VIRT_SECURE_MEM] =         { 0x0e000000, 0x01000000 },
148 149 150
    [VIRT_PCIE_MMIO] =          { 0x10000000, 0x2eff0000 },
    [VIRT_PCIE_PIO] =           { 0x3eff0000, 0x00010000 },
    [VIRT_PCIE_ECAM] =          { 0x3f000000, 0x01000000 },
151
    [VIRT_MEM] =                { 0x40000000, RAMLIMIT_BYTES },
152 153
    /* Second PCIe window, 512GB wide at the 512GB boundary */
    [VIRT_PCIE_MMIO_HIGH] =   { 0x8000000000ULL, 0x8000000000ULL },
P
Peter Maydell 已提交
154 155 156 157
};

static const int a15irqmap[] = {
    [VIRT_UART] = 1,
P
Peter Maydell 已提交
158
    [VIRT_RTC] = 2,
159
    [VIRT_PCIE] = 3, /* ... to 6 */
S
Shannon Zhao 已提交
160
    [VIRT_GPIO] = 7,
161
    [VIRT_SECURE_UART] = 8,
P
Peter Maydell 已提交
162
    [VIRT_MMIO] = 16, /* ...to 16 + NUM_VIRTIO_TRANSPORTS - 1 */
163
    [VIRT_GIC_V2M] = 48, /* ...to 48 + NUM_GICV2M_SPIS - 1 */
164
    [VIRT_SMMU] = 74,    /* ...to 74 + NUM_SMMU_IRQS - 1 */
165
    [VIRT_PLATFORM_BUS] = 112, /* ...to 112 + PLATFORM_BUS_NUM_IRQS -1 */
P
Peter Maydell 已提交
166 167
};

168
static const char *valid_cpus[] = {
169 170 171 172
    ARM_CPU_TYPE_NAME("cortex-a15"),
    ARM_CPU_TYPE_NAME("cortex-a53"),
    ARM_CPU_TYPE_NAME("cortex-a57"),
    ARM_CPU_TYPE_NAME("host"),
173
    ARM_CPU_TYPE_NAME("max"),
P
Peter Maydell 已提交
174 175
};

176
static bool cpu_type_valid(const char *cpu)
P
Peter Maydell 已提交
177 178 179
{
    int i;

180 181 182
    for (i = 0; i < ARRAY_SIZE(valid_cpus); i++) {
        if (strcmp(cpu, valid_cpus[i]) == 0) {
            return true;
P
Peter Maydell 已提交
183 184
        }
    }
185
    return false;
P
Peter Maydell 已提交
186 187
}

188
static void create_fdt(VirtMachineState *vms)
P
Peter Maydell 已提交
189
{
190
    void *fdt = create_device_tree(&vms->fdt_size);
P
Peter Maydell 已提交
191 192 193 194 195 196

    if (!fdt) {
        error_report("create_device_tree() failed");
        exit(1);
    }

197
    vms->fdt = fdt;
P
Peter Maydell 已提交
198 199

    /* Header */
200 201 202
    qemu_fdt_setprop_string(fdt, "/", "compatible", "linux,dummy-virt");
    qemu_fdt_setprop_cell(fdt, "/", "#address-cells", 0x2);
    qemu_fdt_setprop_cell(fdt, "/", "#size-cells", 0x2);
P
Peter Maydell 已提交
203 204 205 206 207

    /*
     * /chosen and /memory nodes must exist for load_dtb
     * to fill in necessary properties later
     */
208 209 210
    qemu_fdt_add_subnode(fdt, "/chosen");
    qemu_fdt_add_subnode(fdt, "/memory");
    qemu_fdt_setprop_string(fdt, "/memory", "device_type", "memory");
P
Peter Maydell 已提交
211 212 213 214 215 216

    /* Clock node, for the benefit of the UART. The kernel device tree
     * binding documentation claims the PL011 node clock properties are
     * optional but in practice if you omit them the kernel refuses to
     * probe for the device.
     */
217
    vms->clock_phandle = qemu_fdt_alloc_phandle(fdt);
218 219 220 221 222
    qemu_fdt_add_subnode(fdt, "/apb-pclk");
    qemu_fdt_setprop_string(fdt, "/apb-pclk", "compatible", "fixed-clock");
    qemu_fdt_setprop_cell(fdt, "/apb-pclk", "#clock-cells", 0x0);
    qemu_fdt_setprop_cell(fdt, "/apb-pclk", "clock-frequency", 24000000);
    qemu_fdt_setprop_string(fdt, "/apb-pclk", "clock-output-names",
P
Peter Maydell 已提交
223
                                "clk24mhz");
224
    qemu_fdt_setprop_cell(fdt, "/apb-pclk", "phandle", vms->clock_phandle);
P
Peter Maydell 已提交
225

226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246
    if (have_numa_distance) {
        int size = nb_numa_nodes * nb_numa_nodes * 3 * sizeof(uint32_t);
        uint32_t *matrix = g_malloc0(size);
        int idx, i, j;

        for (i = 0; i < nb_numa_nodes; i++) {
            for (j = 0; j < nb_numa_nodes; j++) {
                idx = (i * nb_numa_nodes + j) * 3;
                matrix[idx + 0] = cpu_to_be32(i);
                matrix[idx + 1] = cpu_to_be32(j);
                matrix[idx + 2] = cpu_to_be32(numa_info[i].distance[j]);
            }
        }

        qemu_fdt_add_subnode(fdt, "/distance-map");
        qemu_fdt_setprop_string(fdt, "/distance-map", "compatible",
                                "numa-distance-map-v1");
        qemu_fdt_setprop(fdt, "/distance-map", "distance-matrix",
                         matrix, size);
        g_free(matrix);
    }
247 248
}

249
static void fdt_add_timer_nodes(const VirtMachineState *vms)
P
Peter Maydell 已提交
250
{
251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267
    /* On real hardware these interrupts are level-triggered.
     * On KVM they were edge-triggered before host kernel version 4.4,
     * and level-triggered afterwards.
     * On emulated QEMU they are level-triggered.
     *
     * Getting the DTB info about them wrong is awkward for some
     * guest kernels:
     *  pre-4.8 ignore the DT and leave the interrupt configured
     *   with whatever the GIC reset value (or the bootloader) left it at
     *  4.8 before rc6 honour the incorrect data by programming it back
     *   into the GIC, causing problems
     *  4.8rc6 and later ignore the DT and always write "level triggered"
     *   into the GIC
     *
     * For backwards-compatibility, virt-2.8 and earlier will continue
     * to say these are edge-triggered, but later machines will report
     * the correct information.
P
Peter Maydell 已提交
268
     */
269
    ARMCPU *armcpu;
270 271 272 273 274 275
    VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
    uint32_t irqflags = GIC_FDT_IRQ_FLAGS_LEVEL_HI;

    if (vmc->claim_edge_triggered_timers) {
        irqflags = GIC_FDT_IRQ_FLAGS_EDGE_LO_HI;
    }
P
Peter Maydell 已提交
276

277
    if (vms->gic_version == 2) {
278 279
        irqflags = deposit32(irqflags, GIC_FDT_IRQ_PPI_CPU_START,
                             GIC_FDT_IRQ_PPI_CPU_WIDTH,
280
                             (1 << vms->smp_cpus) - 1);
281
    }
P
Peter Maydell 已提交
282

283
    qemu_fdt_add_subnode(vms->fdt, "/timer");
284 285 286 287

    armcpu = ARM_CPU(qemu_get_cpu(0));
    if (arm_feature(&armcpu->env, ARM_FEATURE_V8)) {
        const char compat[] = "arm,armv8-timer\0arm,armv7-timer";
288
        qemu_fdt_setprop(vms->fdt, "/timer", "compatible",
289 290
                         compat, sizeof(compat));
    } else {
291
        qemu_fdt_setprop_string(vms->fdt, "/timer", "compatible",
292 293
                                "arm,armv7-timer");
    }
294 295
    qemu_fdt_setprop(vms->fdt, "/timer", "always-on", NULL, 0);
    qemu_fdt_setprop_cells(vms->fdt, "/timer", "interrupts",
296 297 298 299
                       GIC_FDT_IRQ_TYPE_PPI, ARCH_TIMER_S_EL1_IRQ, irqflags,
                       GIC_FDT_IRQ_TYPE_PPI, ARCH_TIMER_NS_EL1_IRQ, irqflags,
                       GIC_FDT_IRQ_TYPE_PPI, ARCH_TIMER_VIRT_IRQ, irqflags,
                       GIC_FDT_IRQ_TYPE_PPI, ARCH_TIMER_NS_EL2_IRQ, irqflags);
P
Peter Maydell 已提交
300 301
}

302
static void fdt_add_cpu_nodes(const VirtMachineState *vms)
P
Peter Maydell 已提交
303 304
{
    int cpu;
305
    int addr_cells = 1;
306
    const MachineState *ms = MACHINE(vms);
307 308 309 310 311 312 313 314 315 316 317 318 319 320

    /*
     * From Documentation/devicetree/bindings/arm/cpus.txt
     *  On ARM v8 64-bit systems value should be set to 2,
     *  that corresponds to the MPIDR_EL1 register size.
     *  If MPIDR_EL1[63:32] value is equal to 0 on all CPUs
     *  in the system, #address-cells can be set to 1, since
     *  MPIDR_EL1[63:32] bits are not used for CPUs
     *  identification.
     *
     *  Here we actually don't know whether our system is 32- or 64-bit one.
     *  The simplest way to go is to examine affinity IDs of all our CPUs. If
     *  at least one of them has Aff3 populated, we set #address-cells to 2.
     */
321
    for (cpu = 0; cpu < vms->smp_cpus; cpu++) {
322 323 324 325 326 327 328
        ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(cpu));

        if (armcpu->mp_affinity & ARM_AFF3_MASK) {
            addr_cells = 2;
            break;
        }
    }
P
Peter Maydell 已提交
329

330 331 332
    qemu_fdt_add_subnode(vms->fdt, "/cpus");
    qemu_fdt_setprop_cell(vms->fdt, "/cpus", "#address-cells", addr_cells);
    qemu_fdt_setprop_cell(vms->fdt, "/cpus", "#size-cells", 0x0);
P
Peter Maydell 已提交
333

334
    for (cpu = vms->smp_cpus - 1; cpu >= 0; cpu--) {
P
Peter Maydell 已提交
335 336
        char *nodename = g_strdup_printf("/cpus/cpu@%d", cpu);
        ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(cpu));
337
        CPUState *cs = CPU(armcpu);
P
Peter Maydell 已提交
338

339 340 341
        qemu_fdt_add_subnode(vms->fdt, nodename);
        qemu_fdt_setprop_string(vms->fdt, nodename, "device_type", "cpu");
        qemu_fdt_setprop_string(vms->fdt, nodename, "compatible",
P
Peter Maydell 已提交
342 343
                                    armcpu->dtb_compatible);

344 345
        if (vms->psci_conduit != QEMU_PSCI_CONDUIT_DISABLED
            && vms->smp_cpus > 1) {
346
            qemu_fdt_setprop_string(vms->fdt, nodename,
P
Peter Maydell 已提交
347 348 349
                                        "enable-method", "psci");
        }

350
        if (addr_cells == 2) {
351
            qemu_fdt_setprop_u64(vms->fdt, nodename, "reg",
352 353
                                 armcpu->mp_affinity);
        } else {
354
            qemu_fdt_setprop_cell(vms->fdt, nodename, "reg",
355 356 357
                                  armcpu->mp_affinity);
        }

358 359 360
        if (ms->possible_cpus->cpus[cs->cpu_index].props.has_node_id) {
            qemu_fdt_setprop_cell(vms->fdt, nodename, "numa-node-id",
                ms->possible_cpus->cpus[cs->cpu_index].props.node_id);
361 362
        }

P
Peter Maydell 已提交
363 364 365 366
        g_free(nodename);
    }
}

367
static void fdt_add_its_gic_node(VirtMachineState *vms)
P
Pavel Fedin 已提交
368
{
369 370 371
    vms->msi_phandle = qemu_fdt_alloc_phandle(vms->fdt);
    qemu_fdt_add_subnode(vms->fdt, "/intc/its");
    qemu_fdt_setprop_string(vms->fdt, "/intc/its", "compatible",
P
Pavel Fedin 已提交
372
                            "arm,gic-v3-its");
373 374 375 376 377
    qemu_fdt_setprop(vms->fdt, "/intc/its", "msi-controller", NULL, 0);
    qemu_fdt_setprop_sized_cells(vms->fdt, "/intc/its", "reg",
                                 2, vms->memmap[VIRT_GIC_ITS].base,
                                 2, vms->memmap[VIRT_GIC_ITS].size);
    qemu_fdt_setprop_cell(vms->fdt, "/intc/its", "phandle", vms->msi_phandle);
P
Pavel Fedin 已提交
378 379
}

380
static void fdt_add_v2m_gic_node(VirtMachineState *vms)
P
Peter Maydell 已提交
381
{
382 383 384
    vms->msi_phandle = qemu_fdt_alloc_phandle(vms->fdt);
    qemu_fdt_add_subnode(vms->fdt, "/intc/v2m");
    qemu_fdt_setprop_string(vms->fdt, "/intc/v2m", "compatible",
385
                            "arm,gic-v2m-frame");
386 387 388 389 390
    qemu_fdt_setprop(vms->fdt, "/intc/v2m", "msi-controller", NULL, 0);
    qemu_fdt_setprop_sized_cells(vms->fdt, "/intc/v2m", "reg",
                                 2, vms->memmap[VIRT_GIC_V2M].base,
                                 2, vms->memmap[VIRT_GIC_V2M].size);
    qemu_fdt_setprop_cell(vms->fdt, "/intc/v2m", "phandle", vms->msi_phandle);
391
}
P
Peter Maydell 已提交
392

393
static void fdt_add_gic_node(VirtMachineState *vms)
394
{
395 396 397 398 399 400 401 402 403
    vms->gic_phandle = qemu_fdt_alloc_phandle(vms->fdt);
    qemu_fdt_setprop_cell(vms->fdt, "/", "interrupt-parent", vms->gic_phandle);

    qemu_fdt_add_subnode(vms->fdt, "/intc");
    qemu_fdt_setprop_cell(vms->fdt, "/intc", "#interrupt-cells", 3);
    qemu_fdt_setprop(vms->fdt, "/intc", "interrupt-controller", NULL, 0);
    qemu_fdt_setprop_cell(vms->fdt, "/intc", "#address-cells", 0x2);
    qemu_fdt_setprop_cell(vms->fdt, "/intc", "#size-cells", 0x2);
    qemu_fdt_setprop(vms->fdt, "/intc", "ranges", NULL, 0);
404
    if (vms->gic_version == 3) {
405
        qemu_fdt_setprop_string(vms->fdt, "/intc", "compatible",
406
                                "arm,gic-v3");
407 408 409 410 411
        qemu_fdt_setprop_sized_cells(vms->fdt, "/intc", "reg",
                                     2, vms->memmap[VIRT_GIC_DIST].base,
                                     2, vms->memmap[VIRT_GIC_DIST].size,
                                     2, vms->memmap[VIRT_GIC_REDIST].base,
                                     2, vms->memmap[VIRT_GIC_REDIST].size);
412 413 414 415 416
        if (vms->virt) {
            qemu_fdt_setprop_cells(vms->fdt, "/intc", "interrupts",
                                   GIC_FDT_IRQ_TYPE_PPI, ARCH_GICV3_MAINT_IRQ,
                                   GIC_FDT_IRQ_FLAGS_LEVEL_HI);
        }
417 418
    } else {
        /* 'cortex-a15-gic' means 'GIC v2' */
419
        qemu_fdt_setprop_string(vms->fdt, "/intc", "compatible",
420
                                "arm,cortex-a15-gic");
421 422 423 424 425
        qemu_fdt_setprop_sized_cells(vms->fdt, "/intc", "reg",
                                      2, vms->memmap[VIRT_GIC_DIST].base,
                                      2, vms->memmap[VIRT_GIC_DIST].size,
                                      2, vms->memmap[VIRT_GIC_CPU].base,
                                      2, vms->memmap[VIRT_GIC_CPU].size);
426 427
    }

428
    qemu_fdt_setprop_cell(vms->fdt, "/intc", "phandle", vms->gic_phandle);
P
Peter Maydell 已提交
429 430
}

431
static void fdt_add_pmu_nodes(const VirtMachineState *vms)
432 433 434 435 436 437 438
{
    CPUState *cpu;
    ARMCPU *armcpu;
    uint32_t irqflags = GIC_FDT_IRQ_FLAGS_LEVEL_HI;

    CPU_FOREACH(cpu) {
        armcpu = ARM_CPU(cpu);
439
        if (!arm_feature(&armcpu->env, ARM_FEATURE_PMU)) {
440 441
            return;
        }
442
        if (kvm_enabled()) {
443 444
            if (kvm_irqchip_in_kernel()) {
                kvm_arm_pmu_set_irq(cpu, PPI(VIRTUAL_PMU_IRQ));
445
            }
446
            kvm_arm_pmu_init(cpu);
447
        }
448 449
    }

450
    if (vms->gic_version == 2) {
451 452
        irqflags = deposit32(irqflags, GIC_FDT_IRQ_PPI_CPU_START,
                             GIC_FDT_IRQ_PPI_CPU_WIDTH,
453
                             (1 << vms->smp_cpus) - 1);
454 455 456
    }

    armcpu = ARM_CPU(qemu_get_cpu(0));
457
    qemu_fdt_add_subnode(vms->fdt, "/pmu");
458 459
    if (arm_feature(&armcpu->env, ARM_FEATURE_V8)) {
        const char compat[] = "arm,armv8-pmuv3";
460
        qemu_fdt_setprop(vms->fdt, "/pmu", "compatible",
461
                         compat, sizeof(compat));
462
        qemu_fdt_setprop_cells(vms->fdt, "/pmu", "interrupts",
463 464 465 466
                               GIC_FDT_IRQ_TYPE_PPI, VIRTUAL_PMU_IRQ, irqflags);
    }
}

467
static void create_its(VirtMachineState *vms, DeviceState *gicdev)
P
Pavel Fedin 已提交
468 469 470 471 472 473 474 475 476 477 478 479 480 481
{
    const char *itsclass = its_class_name();
    DeviceState *dev;

    if (!itsclass) {
        /* Do nothing if not supported */
        return;
    }

    dev = qdev_create(NULL, itsclass);

    object_property_set_link(OBJECT(dev), OBJECT(gicdev), "parent-gicv3",
                             &error_abort);
    qdev_init_nofail(dev);
482
    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, vms->memmap[VIRT_GIC_ITS].base);
P
Pavel Fedin 已提交
483

484
    fdt_add_its_gic_node(vms);
P
Pavel Fedin 已提交
485 486
}

487
static void create_v2m(VirtMachineState *vms, qemu_irq *pic)
488 489
{
    int i;
490
    int irq = vms->irqmap[VIRT_GIC_V2M];
491 492 493
    DeviceState *dev;

    dev = qdev_create(NULL, "arm-gicv2m");
494
    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, vms->memmap[VIRT_GIC_V2M].base);
495 496 497 498 499 500 501 502
    qdev_prop_set_uint32(dev, "base-spi", irq);
    qdev_prop_set_uint32(dev, "num-spi", NUM_GICV2M_SPIS);
    qdev_init_nofail(dev);

    for (i = 0; i < NUM_GICV2M_SPIS; i++) {
        sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, pic[irq + i]);
    }

503
    fdt_add_v2m_gic_node(vms);
504 505
}

506
static void create_gic(VirtMachineState *vms, qemu_irq *pic)
507
{
508
    /* We create a standalone GIC */
509 510
    DeviceState *gicdev;
    SysBusDevice *gicbusdev;
511
    const char *gictype;
512
    int type = vms->gic_version, i;
513

514
    gictype = (type == 3) ? gicv3_class_name() : gic_class_name();
515 516

    gicdev = qdev_create(NULL, gictype);
517
    qdev_prop_set_uint32(gicdev, "revision", type);
518 519 520 521 522
    qdev_prop_set_uint32(gicdev, "num-cpu", smp_cpus);
    /* Note that the num-irq property counts both internal and external
     * interrupts; there are always 32 of the former (mandated by GIC spec).
     */
    qdev_prop_set_uint32(gicdev, "num-irq", NUM_IRQS + 32);
523
    if (!kvm_irqchip_in_kernel()) {
524
        qdev_prop_set_bit(gicdev, "has-security-extensions", vms->secure);
525
    }
526 527
    qdev_init_nofail(gicdev);
    gicbusdev = SYS_BUS_DEVICE(gicdev);
528
    sysbus_mmio_map(gicbusdev, 0, vms->memmap[VIRT_GIC_DIST].base);
529
    if (type == 3) {
530
        sysbus_mmio_map(gicbusdev, 1, vms->memmap[VIRT_GIC_REDIST].base);
531
    } else {
532
        sysbus_mmio_map(gicbusdev, 1, vms->memmap[VIRT_GIC_CPU].base);
533
    }
534

535 536 537
    /* Wire the outputs from each CPU's generic timer and the GICv3
     * maintenance interrupt signal to the appropriate GIC PPI inputs,
     * and the GIC's IRQ/FIQ/VIRQ/VFIQ interrupt outputs to the CPU's inputs.
538 539 540
     */
    for (i = 0; i < smp_cpus; i++) {
        DeviceState *cpudev = DEVICE(qemu_get_cpu(i));
541
        int ppibase = NUM_IRQS + i * GIC_INTERNAL + GIC_NR_SGIS;
542 543 544
        int irq;
        /* Mapping from the output timer irq lines from the CPU to the
         * GIC PPI inputs we use for the virt board.
545
         */
546 547 548 549 550 551 552 553 554 555 556 557
        const int timer_irq[] = {
            [GTIMER_PHYS] = ARCH_TIMER_NS_EL1_IRQ,
            [GTIMER_VIRT] = ARCH_TIMER_VIRT_IRQ,
            [GTIMER_HYP]  = ARCH_TIMER_NS_EL2_IRQ,
            [GTIMER_SEC]  = ARCH_TIMER_S_EL1_IRQ,
        };

        for (irq = 0; irq < ARRAY_SIZE(timer_irq); irq++) {
            qdev_connect_gpio_out(cpudev, irq,
                                  qdev_get_gpio_in(gicdev,
                                                   ppibase + timer_irq[irq]));
        }
558

559 560 561
        qdev_connect_gpio_out_named(cpudev, "gicv3-maintenance-interrupt", 0,
                                    qdev_get_gpio_in(gicdev, ppibase
                                                     + ARCH_GICV3_MAINT_IRQ));
562 563 564
        qdev_connect_gpio_out_named(cpudev, "pmu-interrupt", 0,
                                    qdev_get_gpio_in(gicdev, ppibase
                                                     + VIRTUAL_PMU_IRQ));
565

566
        sysbus_connect_irq(gicbusdev, i, qdev_get_gpio_in(cpudev, ARM_CPU_IRQ));
567 568
        sysbus_connect_irq(gicbusdev, i + smp_cpus,
                           qdev_get_gpio_in(cpudev, ARM_CPU_FIQ));
569 570 571 572
        sysbus_connect_irq(gicbusdev, i + 2 * smp_cpus,
                           qdev_get_gpio_in(cpudev, ARM_CPU_VIRQ));
        sysbus_connect_irq(gicbusdev, i + 3 * smp_cpus,
                           qdev_get_gpio_in(cpudev, ARM_CPU_VFIQ));
573 574 575 576 577 578
    }

    for (i = 0; i < NUM_IRQS; i++) {
        pic[i] = qdev_get_gpio_in(gicdev, i);
    }

579
    fdt_add_gic_node(vms);
580

581
    if (type == 3 && vms->its) {
582
        create_its(vms, gicdev);
583
    } else if (type == 2) {
584
        create_v2m(vms, pic);
585
    }
586 587
}

588
static void create_uart(const VirtMachineState *vms, qemu_irq *pic, int uart,
589
                        MemoryRegion *mem, Chardev *chr)
P
Peter Maydell 已提交
590 591
{
    char *nodename;
592 593 594
    hwaddr base = vms->memmap[uart].base;
    hwaddr size = vms->memmap[uart].size;
    int irq = vms->irqmap[uart];
P
Peter Maydell 已提交
595 596
    const char compat[] = "arm,pl011\0arm,primecell";
    const char clocknames[] = "uartclk\0apb_pclk";
597 598
    DeviceState *dev = qdev_create(NULL, "pl011");
    SysBusDevice *s = SYS_BUS_DEVICE(dev);
P
Peter Maydell 已提交
599

X
xiaoqiang zhao 已提交
600
    qdev_prop_set_chr(dev, "chardev", chr);
601 602 603 604
    qdev_init_nofail(dev);
    memory_region_add_subregion(mem, base,
                                sysbus_mmio_get_region(s, 0));
    sysbus_connect_irq(s, 0, pic[irq]);
P
Peter Maydell 已提交
605 606

    nodename = g_strdup_printf("/pl011@%" PRIx64, base);
607
    qemu_fdt_add_subnode(vms->fdt, nodename);
P
Peter Maydell 已提交
608
    /* Note that we can't use setprop_string because of the embedded NUL */
609
    qemu_fdt_setprop(vms->fdt, nodename, "compatible",
P
Peter Maydell 已提交
610
                         compat, sizeof(compat));
611
    qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
P
Peter Maydell 已提交
612
                                     2, base, 2, size);
613
    qemu_fdt_setprop_cells(vms->fdt, nodename, "interrupts",
P
Peter Maydell 已提交
614
                               GIC_FDT_IRQ_TYPE_SPI, irq,
615
                               GIC_FDT_IRQ_FLAGS_LEVEL_HI);
616 617 618
    qemu_fdt_setprop_cells(vms->fdt, nodename, "clocks",
                               vms->clock_phandle, vms->clock_phandle);
    qemu_fdt_setprop(vms->fdt, nodename, "clock-names",
P
Peter Maydell 已提交
619
                         clocknames, sizeof(clocknames));
620

621
    if (uart == VIRT_UART) {
622
        qemu_fdt_setprop_string(vms->fdt, "/chosen", "stdout-path", nodename);
623 624
    } else {
        /* Mark as not usable by the normal world */
625 626
        qemu_fdt_setprop_string(vms->fdt, nodename, "status", "disabled");
        qemu_fdt_setprop_string(vms->fdt, nodename, "secure-status", "okay");
627 628
    }

P
Peter Maydell 已提交
629 630 631
    g_free(nodename);
}

632
static void create_rtc(const VirtMachineState *vms, qemu_irq *pic)
P
Peter Maydell 已提交
633 634
{
    char *nodename;
635 636 637
    hwaddr base = vms->memmap[VIRT_RTC].base;
    hwaddr size = vms->memmap[VIRT_RTC].size;
    int irq = vms->irqmap[VIRT_RTC];
P
Peter Maydell 已提交
638 639 640 641 642
    const char compat[] = "arm,pl031\0arm,primecell";

    sysbus_create_simple("pl031", base, pic[irq]);

    nodename = g_strdup_printf("/pl031@%" PRIx64, base);
643 644 645
    qemu_fdt_add_subnode(vms->fdt, nodename);
    qemu_fdt_setprop(vms->fdt, nodename, "compatible", compat, sizeof(compat));
    qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
P
Peter Maydell 已提交
646
                                 2, base, 2, size);
647
    qemu_fdt_setprop_cells(vms->fdt, nodename, "interrupts",
P
Peter Maydell 已提交
648
                           GIC_FDT_IRQ_TYPE_SPI, irq,
649
                           GIC_FDT_IRQ_FLAGS_LEVEL_HI);
650 651
    qemu_fdt_setprop_cell(vms->fdt, nodename, "clocks", vms->clock_phandle);
    qemu_fdt_setprop_string(vms->fdt, nodename, "clock-names", "apb_pclk");
P
Peter Maydell 已提交
652 653 654
    g_free(nodename);
}

655
static DeviceState *gpio_key_dev;
656 657 658
static void virt_powerdown_req(Notifier *n, void *opaque)
{
    /* use gpio Pin 3 for power button event */
659
    qemu_set_irq(qdev_get_gpio_in(gpio_key_dev, 0), 1);
660 661 662 663 664 665
}

static Notifier virt_system_powerdown_notifier = {
    .notify = virt_powerdown_req
};

666
static void create_gpio(const VirtMachineState *vms, qemu_irq *pic)
S
Shannon Zhao 已提交
667 668
{
    char *nodename;
669
    DeviceState *pl061_dev;
670 671 672
    hwaddr base = vms->memmap[VIRT_GPIO].base;
    hwaddr size = vms->memmap[VIRT_GPIO].size;
    int irq = vms->irqmap[VIRT_GPIO];
S
Shannon Zhao 已提交
673 674
    const char compat[] = "arm,pl061\0arm,primecell";

675
    pl061_dev = sysbus_create_simple("pl061", base, pic[irq]);
S
Shannon Zhao 已提交
676

677
    uint32_t phandle = qemu_fdt_alloc_phandle(vms->fdt);
S
Shannon Zhao 已提交
678
    nodename = g_strdup_printf("/pl061@%" PRIx64, base);
679 680
    qemu_fdt_add_subnode(vms->fdt, nodename);
    qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
S
Shannon Zhao 已提交
681
                                 2, base, 2, size);
682 683 684 685
    qemu_fdt_setprop(vms->fdt, nodename, "compatible", compat, sizeof(compat));
    qemu_fdt_setprop_cell(vms->fdt, nodename, "#gpio-cells", 2);
    qemu_fdt_setprop(vms->fdt, nodename, "gpio-controller", NULL, 0);
    qemu_fdt_setprop_cells(vms->fdt, nodename, "interrupts",
S
Shannon Zhao 已提交
686 687
                           GIC_FDT_IRQ_TYPE_SPI, irq,
                           GIC_FDT_IRQ_FLAGS_LEVEL_HI);
688 689 690
    qemu_fdt_setprop_cell(vms->fdt, nodename, "clocks", vms->clock_phandle);
    qemu_fdt_setprop_string(vms->fdt, nodename, "clock-names", "apb_pclk");
    qemu_fdt_setprop_cell(vms->fdt, nodename, "phandle", phandle);
691

692 693
    gpio_key_dev = sysbus_create_simple("gpio-key", -1,
                                        qdev_get_gpio_in(pl061_dev, 3));
694 695 696 697
    qemu_fdt_add_subnode(vms->fdt, "/gpio-keys");
    qemu_fdt_setprop_string(vms->fdt, "/gpio-keys", "compatible", "gpio-keys");
    qemu_fdt_setprop_cell(vms->fdt, "/gpio-keys", "#size-cells", 0);
    qemu_fdt_setprop_cell(vms->fdt, "/gpio-keys", "#address-cells", 1);
698

699 700
    qemu_fdt_add_subnode(vms->fdt, "/gpio-keys/poweroff");
    qemu_fdt_setprop_string(vms->fdt, "/gpio-keys/poweroff",
701
                            "label", "GPIO Key Poweroff");
702
    qemu_fdt_setprop_cell(vms->fdt, "/gpio-keys/poweroff", "linux,code",
703
                          KEY_POWER);
704
    qemu_fdt_setprop_cells(vms->fdt, "/gpio-keys/poweroff",
705
                           "gpios", phandle, 3, 0);
S
Shannon Zhao 已提交
706

707 708 709
    /* connect powerdown request */
    qemu_register_powerdown_notifier(&virt_system_powerdown_notifier);

S
Shannon Zhao 已提交
710 711 712
    g_free(nodename);
}

713
static void create_virtio_devices(const VirtMachineState *vms, qemu_irq *pic)
P
Peter Maydell 已提交
714 715
{
    int i;
716
    hwaddr size = vms->memmap[VIRT_MMIO].size;
P
Peter Maydell 已提交
717

718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743
    /* We create the transports in forwards order. Since qbus_realize()
     * prepends (not appends) new child buses, the incrementing loop below will
     * create a list of virtio-mmio buses with decreasing base addresses.
     *
     * When a -device option is processed from the command line,
     * qbus_find_recursive() picks the next free virtio-mmio bus in forwards
     * order. The upshot is that -device options in increasing command line
     * order are mapped to virtio-mmio buses with decreasing base addresses.
     *
     * When this code was originally written, that arrangement ensured that the
     * guest Linux kernel would give the lowest "name" (/dev/vda, eth0, etc) to
     * the first -device on the command line. (The end-to-end order is a
     * function of this loop, qbus_realize(), qbus_find_recursive(), and the
     * guest kernel's name-to-address assignment strategy.)
     *
     * Meanwhile, the kernel's traversal seems to have been reversed; see eg.
     * the message, if not necessarily the code, of commit 70161ff336.
     * Therefore the loop now establishes the inverse of the original intent.
     *
     * Unfortunately, we can't counteract the kernel change by reversing the
     * loop; it would break existing command lines.
     *
     * In any case, the kernel makes no guarantee about the stability of
     * enumeration order of virtio devices (as demonstrated by it changing
     * between kernel versions). For reliable and stable identification
     * of disks users must use UUIDs or similar mechanisms.
P
Peter Maydell 已提交
744 745
     */
    for (i = 0; i < NUM_VIRTIO_TRANSPORTS; i++) {
746 747
        int irq = vms->irqmap[VIRT_MMIO] + i;
        hwaddr base = vms->memmap[VIRT_MMIO].base + i * size;
P
Peter Maydell 已提交
748 749 750 751

        sysbus_create_simple("virtio-mmio", base, pic[irq]);
    }

752 753 754 755 756 757 758
    /* We add dtb nodes in reverse order so that they appear in the finished
     * device tree lowest address first.
     *
     * Note that this mapping is independent of the loop above. The previous
     * loop influences virtio device to virtio transport assignment, whereas
     * this loop controls how virtio transports are laid out in the dtb.
     */
P
Peter Maydell 已提交
759 760
    for (i = NUM_VIRTIO_TRANSPORTS - 1; i >= 0; i--) {
        char *nodename;
761 762
        int irq = vms->irqmap[VIRT_MMIO] + i;
        hwaddr base = vms->memmap[VIRT_MMIO].base + i * size;
P
Peter Maydell 已提交
763 764

        nodename = g_strdup_printf("/virtio_mmio@%" PRIx64, base);
765 766
        qemu_fdt_add_subnode(vms->fdt, nodename);
        qemu_fdt_setprop_string(vms->fdt, nodename,
767
                                "compatible", "virtio,mmio");
768
        qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
769
                                     2, base, 2, size);
770
        qemu_fdt_setprop_cells(vms->fdt, nodename, "interrupts",
771 772
                               GIC_FDT_IRQ_TYPE_SPI, irq,
                               GIC_FDT_IRQ_FLAGS_EDGE_LO_HI);
773
        qemu_fdt_setprop(vms->fdt, nodename, "dma-coherent", NULL, 0);
P
Peter Maydell 已提交
774 775 776 777
        g_free(nodename);
    }
}

778
static void create_one_flash(const char *name, hwaddr flashbase,
779 780
                             hwaddr flashsize, const char *file,
                             MemoryRegion *sysmem)
781 782 783 784 785 786
{
    /* Create and map a single flash device. We use the same
     * parameters as the flash devices on the Versatile Express board.
     */
    DriveInfo *dinfo = drive_get_next(IF_PFLASH);
    DeviceState *dev = qdev_create(NULL, "cfi.pflash01");
787
    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
788 789
    const uint64_t sectorlength = 256 * 1024;

790 791 792
    if (dinfo) {
        qdev_prop_set_drive(dev, "drive", blk_by_legacy_dinfo(dinfo),
                            &error_abort);
793 794 795 796 797 798
    }

    qdev_prop_set_uint32(dev, "num-blocks", flashsize / sectorlength);
    qdev_prop_set_uint64(dev, "sector-length", sectorlength);
    qdev_prop_set_uint8(dev, "width", 4);
    qdev_prop_set_uint8(dev, "device-width", 2);
799
    qdev_prop_set_bit(dev, "big-endian", false);
800 801 802 803 804 805 806
    qdev_prop_set_uint16(dev, "id0", 0x89);
    qdev_prop_set_uint16(dev, "id1", 0x18);
    qdev_prop_set_uint16(dev, "id2", 0x00);
    qdev_prop_set_uint16(dev, "id3", 0x00);
    qdev_prop_set_string(dev, "name", name);
    qdev_init_nofail(dev);

807 808
    memory_region_add_subregion(sysmem, flashbase,
                                sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0));
809

810
    if (file) {
G
Gonglei 已提交
811
        char *fn;
812
        int image_size;
813 814 815 816 817 818 819

        if (drive_get(IF_PFLASH, 0, 0)) {
            error_report("The contents of the first flash device may be "
                         "specified with -bios or with -drive if=pflash... "
                         "but you cannot use both options at once");
            exit(1);
        }
820
        fn = qemu_find_file(QEMU_FILE_TYPE_BIOS, file);
821
        if (!fn) {
822
            error_report("Could not find ROM image '%s'", file);
823 824
            exit(1);
        }
825
        image_size = load_image_mr(fn, sysbus_mmio_get_region(sbd, 0));
826 827
        g_free(fn);
        if (image_size < 0) {
828
            error_report("Could not load ROM image '%s'", file);
829 830 831
            exit(1);
        }
    }
832 833
}

834
static void create_flash(const VirtMachineState *vms,
835 836
                         MemoryRegion *sysmem,
                         MemoryRegion *secure_sysmem)
837 838 839
{
    /* Create two flash devices to fill the VIRT_FLASH space in the memmap.
     * Any file passed via -bios goes in the first of these.
840 841 842 843 844
     * sysmem is the system memory space. secure_sysmem is the secure view
     * of the system, and the first flash device should be made visible only
     * there. The second flash device is visible to both secure and nonsecure.
     * If sysmem == secure_sysmem this means there is no separate Secure
     * address space and both flash devices are generally visible.
845
     */
846 847
    hwaddr flashsize = vms->memmap[VIRT_FLASH].size / 2;
    hwaddr flashbase = vms->memmap[VIRT_FLASH].base;
848
    char *nodename;
849

850 851 852 853
    create_one_flash("virt.flash0", flashbase, flashsize,
                     bios_name, secure_sysmem);
    create_one_flash("virt.flash1", flashbase + flashsize, flashsize,
                     NULL, sysmem);
854

855 856 857
    if (sysmem == secure_sysmem) {
        /* Report both flash devices as a single node in the DT */
        nodename = g_strdup_printf("/flash@%" PRIx64, flashbase);
858 859 860
        qemu_fdt_add_subnode(vms->fdt, nodename);
        qemu_fdt_setprop_string(vms->fdt, nodename, "compatible", "cfi-flash");
        qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
861 862
                                     2, flashbase, 2, flashsize,
                                     2, flashbase + flashsize, 2, flashsize);
863
        qemu_fdt_setprop_cell(vms->fdt, nodename, "bank-width", 4);
864 865 866 867 868 869
        g_free(nodename);
    } else {
        /* Report the devices as separate nodes so we can mark one as
         * only visible to the secure world.
         */
        nodename = g_strdup_printf("/secflash@%" PRIx64, flashbase);
870 871 872
        qemu_fdt_add_subnode(vms->fdt, nodename);
        qemu_fdt_setprop_string(vms->fdt, nodename, "compatible", "cfi-flash");
        qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
873
                                     2, flashbase, 2, flashsize);
874 875 876
        qemu_fdt_setprop_cell(vms->fdt, nodename, "bank-width", 4);
        qemu_fdt_setprop_string(vms->fdt, nodename, "status", "disabled");
        qemu_fdt_setprop_string(vms->fdt, nodename, "secure-status", "okay");
877 878 879
        g_free(nodename);

        nodename = g_strdup_printf("/flash@%" PRIx64, flashbase);
880 881 882
        qemu_fdt_add_subnode(vms->fdt, nodename);
        qemu_fdt_setprop_string(vms->fdt, nodename, "compatible", "cfi-flash");
        qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
883
                                     2, flashbase + flashsize, 2, flashsize);
884
        qemu_fdt_setprop_cell(vms->fdt, nodename, "bank-width", 4);
885 886
        g_free(nodename);
    }
887 888
}

A
Andrew Jones 已提交
889
static FWCfgState *create_fw_cfg(const VirtMachineState *vms, AddressSpace *as)
L
Laszlo Ersek 已提交
890
{
891 892
    hwaddr base = vms->memmap[VIRT_FW_CFG].base;
    hwaddr size = vms->memmap[VIRT_FW_CFG].size;
893
    FWCfgState *fw_cfg;
L
Laszlo Ersek 已提交
894 895
    char *nodename;

896 897
    fw_cfg = fw_cfg_init_mem_wide(base + 8, base, 8, base + 16, as);
    fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, (uint16_t)smp_cpus);
L
Laszlo Ersek 已提交
898 899

    nodename = g_strdup_printf("/fw-cfg@%" PRIx64, base);
900 901
    qemu_fdt_add_subnode(vms->fdt, nodename);
    qemu_fdt_setprop_string(vms->fdt, nodename,
L
Laszlo Ersek 已提交
902
                            "compatible", "qemu,fw-cfg-mmio");
903
    qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
L
Laszlo Ersek 已提交
904
                                 2, base, 2, size);
905
    qemu_fdt_setprop(vms->fdt, nodename, "dma-coherent", NULL, 0);
L
Laszlo Ersek 已提交
906
    g_free(nodename);
A
Andrew Jones 已提交
907
    return fw_cfg;
L
Laszlo Ersek 已提交
908 909
}

910
static void create_pcie_irq_map(const VirtMachineState *vms,
911
                                uint32_t gic_phandle,
912 913 914
                                int first_irq, const char *nodename)
{
    int devfn, pin;
915
    uint32_t full_irq_map[4 * 4 * 10] = { 0 };
916 917 918 919 920 921 922 923 924 925 926 927
    uint32_t *irq_map = full_irq_map;

    for (devfn = 0; devfn <= 0x18; devfn += 0x8) {
        for (pin = 0; pin < 4; pin++) {
            int irq_type = GIC_FDT_IRQ_TYPE_SPI;
            int irq_nr = first_irq + ((pin + PCI_SLOT(devfn)) % PCI_NUM_PINS);
            int irq_level = GIC_FDT_IRQ_FLAGS_LEVEL_HI;
            int i;

            uint32_t map[] = {
                devfn << 8, 0, 0,                           /* devfn */
                pin + 1,                                    /* PCI pin */
928
                gic_phandle, 0, 0, irq_type, irq_nr, irq_level }; /* GIC irq */
929 930

            /* Convert map to big endian */
931
            for (i = 0; i < 10; i++) {
932 933
                irq_map[i] = cpu_to_be32(map[i]);
            }
934
            irq_map += 10;
935 936 937
        }
    }

938
    qemu_fdt_setprop(vms->fdt, nodename, "interrupt-map",
939 940
                     full_irq_map, sizeof(full_irq_map));

941
    qemu_fdt_setprop_cells(vms->fdt, nodename, "interrupt-map-mask",
942 943 944 945
                           0x1800, 0, 0, /* devfn (PCI_SLOT(3)) */
                           0x7           /* PCI irq */);
}

946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996
static void create_smmu(const VirtMachineState *vms, qemu_irq *pic,
                        PCIBus *bus)
{
    char *node;
    const char compat[] = "arm,smmu-v3";
    int irq =  vms->irqmap[VIRT_SMMU];
    int i;
    hwaddr base = vms->memmap[VIRT_SMMU].base;
    hwaddr size = vms->memmap[VIRT_SMMU].size;
    const char irq_names[] = "eventq\0priq\0cmdq-sync\0gerror";
    DeviceState *dev;

    if (vms->iommu != VIRT_IOMMU_SMMUV3 || !vms->iommu_phandle) {
        return;
    }

    dev = qdev_create(NULL, "arm-smmuv3");

    object_property_set_link(OBJECT(dev), OBJECT(bus), "primary-bus",
                             &error_abort);
    qdev_init_nofail(dev);
    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base);
    for (i = 0; i < NUM_SMMU_IRQS; i++) {
        sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, pic[irq + i]);
    }

    node = g_strdup_printf("/smmuv3@%" PRIx64, base);
    qemu_fdt_add_subnode(vms->fdt, node);
    qemu_fdt_setprop(vms->fdt, node, "compatible", compat, sizeof(compat));
    qemu_fdt_setprop_sized_cells(vms->fdt, node, "reg", 2, base, 2, size);

    qemu_fdt_setprop_cells(vms->fdt, node, "interrupts",
            GIC_FDT_IRQ_TYPE_SPI, irq    , GIC_FDT_IRQ_FLAGS_EDGE_LO_HI,
            GIC_FDT_IRQ_TYPE_SPI, irq + 1, GIC_FDT_IRQ_FLAGS_EDGE_LO_HI,
            GIC_FDT_IRQ_TYPE_SPI, irq + 2, GIC_FDT_IRQ_FLAGS_EDGE_LO_HI,
            GIC_FDT_IRQ_TYPE_SPI, irq + 3, GIC_FDT_IRQ_FLAGS_EDGE_LO_HI);

    qemu_fdt_setprop(vms->fdt, node, "interrupt-names", irq_names,
                     sizeof(irq_names));

    qemu_fdt_setprop_cell(vms->fdt, node, "clocks", vms->clock_phandle);
    qemu_fdt_setprop_string(vms->fdt, node, "clock-names", "apb_pclk");
    qemu_fdt_setprop(vms->fdt, node, "dma-coherent", NULL, 0);

    qemu_fdt_setprop_cell(vms->fdt, node, "#iommu-cells", 1);

    qemu_fdt_setprop_cell(vms->fdt, node, "phandle", vms->iommu_phandle);
    g_free(node);
}

static void create_pcie(VirtMachineState *vms, qemu_irq *pic)
997
{
998 999 1000 1001 1002 1003 1004 1005
    hwaddr base_mmio = vms->memmap[VIRT_PCIE_MMIO].base;
    hwaddr size_mmio = vms->memmap[VIRT_PCIE_MMIO].size;
    hwaddr base_mmio_high = vms->memmap[VIRT_PCIE_MMIO_HIGH].base;
    hwaddr size_mmio_high = vms->memmap[VIRT_PCIE_MMIO_HIGH].size;
    hwaddr base_pio = vms->memmap[VIRT_PCIE_PIO].base;
    hwaddr size_pio = vms->memmap[VIRT_PCIE_PIO].size;
    hwaddr base_ecam = vms->memmap[VIRT_PCIE_ECAM].base;
    hwaddr size_ecam = vms->memmap[VIRT_PCIE_ECAM].size;
1006 1007
    hwaddr base = base_mmio;
    int nr_pcie_buses = size_ecam / PCIE_MMCFG_SIZE_MIN;
1008
    int irq = vms->irqmap[VIRT_PCIE];
1009 1010 1011 1012 1013 1014 1015
    MemoryRegion *mmio_alias;
    MemoryRegion *mmio_reg;
    MemoryRegion *ecam_alias;
    MemoryRegion *ecam_reg;
    DeviceState *dev;
    char *nodename;
    int i;
1016
    PCIHostState *pci;
1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038

    dev = qdev_create(NULL, TYPE_GPEX_HOST);
    qdev_init_nofail(dev);

    /* Map only the first size_ecam bytes of ECAM space */
    ecam_alias = g_new0(MemoryRegion, 1);
    ecam_reg = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0);
    memory_region_init_alias(ecam_alias, OBJECT(dev), "pcie-ecam",
                             ecam_reg, 0, size_ecam);
    memory_region_add_subregion(get_system_memory(), base_ecam, ecam_alias);

    /* Map the MMIO window into system address space so as to expose
     * the section of PCI MMIO space which starts at the same base address
     * (ie 1:1 mapping for that part of PCI MMIO space visible through
     * the window).
     */
    mmio_alias = g_new0(MemoryRegion, 1);
    mmio_reg = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 1);
    memory_region_init_alias(mmio_alias, OBJECT(dev), "pcie-mmio",
                             mmio_reg, base_mmio, size_mmio);
    memory_region_add_subregion(get_system_memory(), base_mmio, mmio_alias);

1039
    if (vms->highmem) {
1040 1041 1042 1043 1044 1045 1046 1047 1048
        /* Map high MMIO space */
        MemoryRegion *high_mmio_alias = g_new0(MemoryRegion, 1);

        memory_region_init_alias(high_mmio_alias, OBJECT(dev), "pcie-mmio-high",
                                 mmio_reg, base_mmio_high, size_mmio_high);
        memory_region_add_subregion(get_system_memory(), base_mmio_high,
                                    high_mmio_alias);
    }

1049
    /* Map IO port space */
1050
    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 2, base_pio);
1051 1052 1053

    for (i = 0; i < GPEX_NUM_IRQS; i++) {
        sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, pic[irq + i]);
1054
        gpex_set_irq_num(GPEX_HOST(dev), i, irq + i);
1055 1056
    }

1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069
    pci = PCI_HOST_BRIDGE(dev);
    if (pci->bus) {
        for (i = 0; i < nb_nics; i++) {
            NICInfo *nd = &nd_table[i];

            if (!nd->model) {
                nd->model = g_strdup("virtio");
            }

            pci_nic_init_nofail(nd, pci->bus, nd->model, NULL);
        }
    }

1070
    nodename = g_strdup_printf("/pcie@%" PRIx64, base);
1071 1072
    qemu_fdt_add_subnode(vms->fdt, nodename);
    qemu_fdt_setprop_string(vms->fdt, nodename,
1073
                            "compatible", "pci-host-ecam-generic");
1074 1075 1076
    qemu_fdt_setprop_string(vms->fdt, nodename, "device_type", "pci");
    qemu_fdt_setprop_cell(vms->fdt, nodename, "#address-cells", 3);
    qemu_fdt_setprop_cell(vms->fdt, nodename, "#size-cells", 2);
1077
    qemu_fdt_setprop_cell(vms->fdt, nodename, "linux,pci-domain", 0);
1078
    qemu_fdt_setprop_cells(vms->fdt, nodename, "bus-range", 0,
1079
                           nr_pcie_buses - 1);
1080
    qemu_fdt_setprop(vms->fdt, nodename, "dma-coherent", NULL, 0);
1081

1082 1083 1084
    if (vms->msi_phandle) {
        qemu_fdt_setprop_cells(vms->fdt, nodename, "msi-parent",
                               vms->msi_phandle);
1085
    }
1086

1087
    qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
1088
                                 2, base_ecam, 2, size_ecam);
1089

1090
    if (vms->highmem) {
1091
        qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "ranges",
1092 1093 1094 1095 1096 1097 1098 1099
                                     1, FDT_PCI_RANGE_IOPORT, 2, 0,
                                     2, base_pio, 2, size_pio,
                                     1, FDT_PCI_RANGE_MMIO, 2, base_mmio,
                                     2, base_mmio, 2, size_mmio,
                                     1, FDT_PCI_RANGE_MMIO_64BIT,
                                     2, base_mmio_high,
                                     2, base_mmio_high, 2, size_mmio_high);
    } else {
1100
        qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "ranges",
1101 1102 1103 1104 1105
                                     1, FDT_PCI_RANGE_IOPORT, 2, 0,
                                     2, base_pio, 2, size_pio,
                                     1, FDT_PCI_RANGE_MMIO, 2, base_mmio,
                                     2, base_mmio, 2, size_mmio);
    }
1106

1107 1108
    qemu_fdt_setprop_cell(vms->fdt, nodename, "#interrupt-cells", 1);
    create_pcie_irq_map(vms, vms->gic_phandle, irq, nodename);
1109

1110 1111 1112 1113 1114 1115 1116 1117 1118
    if (vms->iommu) {
        vms->iommu_phandle = qemu_fdt_alloc_phandle(vms->fdt);

        create_smmu(vms, pic, pci->bus);

        qemu_fdt_setprop_cells(vms->fdt, nodename, "iommu-map",
                               0x0, vms->iommu_phandle, 0x0, 0x10000);
    }

1119 1120 1121
    g_free(nodename);
}

1122
static void create_platform_bus(VirtMachineState *vms, qemu_irq *pic)
1123 1124 1125 1126 1127 1128 1129 1130
{
    DeviceState *dev;
    SysBusDevice *s;
    int i;
    MemoryRegion *sysmem = get_system_memory();

    dev = qdev_create(NULL, TYPE_PLATFORM_BUS_DEVICE);
    dev->id = TYPE_PLATFORM_BUS_DEVICE;
1131 1132
    qdev_prop_set_uint32(dev, "num_irqs", PLATFORM_BUS_NUM_IRQS);
    qdev_prop_set_uint32(dev, "mmio_size", vms->memmap[VIRT_PLATFORM_BUS].size);
1133
    qdev_init_nofail(dev);
1134
    vms->platform_bus_dev = dev;
1135

1136 1137 1138
    s = SYS_BUS_DEVICE(dev);
    for (i = 0; i < PLATFORM_BUS_NUM_IRQS; i++) {
        int irqn = vms->irqmap[VIRT_PLATFORM_BUS] + i;
1139 1140 1141 1142
        sysbus_connect_irq(s, i, pic[irqn]);
    }

    memory_region_add_subregion(sysmem,
1143
                                vms->memmap[VIRT_PLATFORM_BUS].base,
1144 1145 1146
                                sysbus_mmio_get_region(s, 0));
}

1147
static void create_secure_ram(VirtMachineState *vms,
1148
                              MemoryRegion *secure_sysmem)
1149 1150 1151
{
    MemoryRegion *secram = g_new(MemoryRegion, 1);
    char *nodename;
1152 1153
    hwaddr base = vms->memmap[VIRT_SECURE_MEM].base;
    hwaddr size = vms->memmap[VIRT_SECURE_MEM].size;
1154

1155 1156
    memory_region_init_ram(secram, NULL, "virt.secure-ram", size,
                           &error_fatal);
1157 1158 1159
    memory_region_add_subregion(secure_sysmem, base, secram);

    nodename = g_strdup_printf("/secram@%" PRIx64, base);
1160 1161 1162 1163 1164
    qemu_fdt_add_subnode(vms->fdt, nodename);
    qemu_fdt_setprop_string(vms->fdt, nodename, "device_type", "memory");
    qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg", 2, base, 2, size);
    qemu_fdt_setprop_string(vms->fdt, nodename, "status", "disabled");
    qemu_fdt_setprop_string(vms->fdt, nodename, "secure-status", "okay");
1165 1166 1167 1168

    g_free(nodename);
}

P
Peter Maydell 已提交
1169 1170
static void *machvirt_dtb(const struct arm_boot_info *binfo, int *fdt_size)
{
1171 1172
    const VirtMachineState *board = container_of(binfo, VirtMachineState,
                                                 bootinfo);
P
Peter Maydell 已提交
1173 1174 1175 1176 1177

    *fdt_size = board->fdt_size;
    return board->fdt;
}

1178
static void virt_build_smbios(VirtMachineState *vms)
1179
{
1180 1181
    MachineClass *mc = MACHINE_GET_CLASS(vms);
    VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
1182 1183
    uint8_t *smbios_tables, *smbios_anchor;
    size_t smbios_tables_len, smbios_anchor_len;
1184
    const char *product = "QEMU Virtual Machine";
1185

A
Andrew Jones 已提交
1186
    if (!vms->fw_cfg) {
1187 1188 1189
        return;
    }

1190 1191 1192 1193 1194
    if (kvm_enabled()) {
        product = "KVM Virtual Machine";
    }

    smbios_set_defaults("QEMU", product,
1195 1196
                        vmc->smbios_old_sys_ver ? "1.0" : mc->name, false,
                        true, SMBIOS_ENTRY_POINT_30);
1197 1198 1199 1200 1201

    smbios_get_tables(NULL, 0, &smbios_tables, &smbios_tables_len,
                      &smbios_anchor, &smbios_anchor_len);

    if (smbios_anchor) {
A
Andrew Jones 已提交
1202
        fw_cfg_add_file(vms->fw_cfg, "etc/smbios/smbios-tables",
1203
                        smbios_tables, smbios_tables_len);
A
Andrew Jones 已提交
1204
        fw_cfg_add_file(vms->fw_cfg, "etc/smbios/smbios-anchor",
1205 1206 1207 1208
                        smbios_anchor, smbios_anchor_len);
    }
}

1209
static
1210
void virt_machine_done(Notifier *notifier, void *data)
1211
{
1212 1213
    VirtMachineState *vms = container_of(notifier, VirtMachineState,
                                         machine_done);
1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233
    ARMCPU *cpu = ARM_CPU(first_cpu);
    struct arm_boot_info *info = &vms->bootinfo;
    AddressSpace *as = arm_boot_address_space(cpu, info);

    /*
     * If the user provided a dtb, we assume the dynamic sysbus nodes
     * already are integrated there. This corresponds to a use case where
     * the dynamic sysbus nodes are complex and their generation is not yet
     * supported. In that case the user can take charge of the guest dt
     * while qemu takes charge of the qom stuff.
     */
    if (info->dtb_filename == NULL) {
        platform_bus_add_all_fdt_nodes(vms->fdt, "/intc",
                                       vms->memmap[VIRT_PLATFORM_BUS].base,
                                       vms->memmap[VIRT_PLATFORM_BUS].size,
                                       vms->irqmap[VIRT_PLATFORM_BUS]);
    }
    if (arm_load_dtb(info->dtb_start, info, info->dtb_limit, as) < 0) {
        exit(1);
    }
1234

1235 1236
    virt_acpi_setup(vms);
    virt_build_smbios(vms);
1237 1238
}

1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261
static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx)
{
    uint8_t clustersz = ARM_DEFAULT_CPUS_PER_CLUSTER;
    VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);

    if (!vmc->disallow_affinity_adjustment) {
        /* Adjust MPIDR like 64-bit KVM hosts, which incorporate the
         * GIC's target-list limitations. 32-bit KVM hosts currently
         * always create clusters of 4 CPUs, but that is expected to
         * change when they gain support for gicv3. When KVM is enabled
         * it will override the changes we make here, therefore our
         * purposes are to make TCG consistent (with 64-bit KVM hosts)
         * and to improve SGI efficiency.
         */
        if (vms->gic_version == 3) {
            clustersz = GICV3_TARGETLIST_BITS;
        } else {
            clustersz = GIC_TARGETLIST_BITS;
        }
    }
    return arm_cpu_mp_affinity(idx, clustersz);
}

1262
static void machvirt_init(MachineState *machine)
P
Peter Maydell 已提交
1263
{
1264
    VirtMachineState *vms = VIRT_MACHINE(machine);
1265
    VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(machine);
1266 1267
    MachineClass *mc = MACHINE_GET_CLASS(machine);
    const CPUArchIdList *possible_cpus;
P
Peter Maydell 已提交
1268 1269
    qemu_irq pic[NUM_IRQS];
    MemoryRegion *sysmem = get_system_memory();
1270
    MemoryRegion *secure_sysmem = NULL;
A
Andrew Jones 已提交
1271
    int n, virt_max_cpus;
P
Peter Maydell 已提交
1272
    MemoryRegion *ram = g_new(MemoryRegion, 1);
1273
    bool firmware_loaded = bios_name || drive_get(IF_PFLASH, 0, 0);
P
Peter Maydell 已提交
1274

1275 1276 1277
    /* We can probe only here because during property set
     * KVM is not available yet
     */
1278 1279
    if (vms->gic_version <= 0) {
        /* "host" or "max" */
1280
        if (!kvm_enabled()) {
1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294
            if (vms->gic_version == 0) {
                error_report("gic-version=host requires KVM");
                exit(1);
            } else {
                /* "max": currently means 3 for TCG */
                vms->gic_version = 3;
            }
        } else {
            vms->gic_version = kvm_arm_vgic_probe();
            if (!vms->gic_version) {
                error_report(
                    "Unable to determine GIC version supported by host");
                exit(1);
            }
1295 1296 1297
        }
    }

1298 1299
    if (!cpu_type_valid(machine->cpu_type)) {
        error_report("mach-virt: CPU type %s not supported", machine->cpu_type);
P
Peter Maydell 已提交
1300 1301 1302
        exit(1);
    }

1303 1304 1305 1306 1307
    /* If we have an EL3 boot ROM then the assumption is that it will
     * implement PSCI itself, so disable QEMU's internal implementation
     * so it doesn't get in the way. Instead of starting secondary
     * CPUs in PSCI powerdown state we will start them all running and
     * let the boot ROM sort them out.
1308 1309 1310 1311
     * The usual case is that we do use QEMU's PSCI implementation;
     * if the guest has EL2 then we will use SMC as the conduit,
     * and otherwise we will use HVC (for backwards compatibility and
     * because if we're using KVM then we must use HVC).
1312
     */
1313 1314
    if (vms->secure && firmware_loaded) {
        vms->psci_conduit = QEMU_PSCI_CONDUIT_DISABLED;
1315 1316
    } else if (vms->virt) {
        vms->psci_conduit = QEMU_PSCI_CONDUIT_SMC;
1317 1318 1319
    } else {
        vms->psci_conduit = QEMU_PSCI_CONDUIT_HVC;
    }
1320

1321 1322 1323
    /* The maximum number of CPUs depends on the GIC version, or on how
     * many redistributors we can fit into the memory map.
     */
1324
    if (vms->gic_version == 3) {
1325
        virt_max_cpus = vms->memmap[VIRT_GIC_REDIST].size / 0x20000;
1326
    } else {
A
Andrew Jones 已提交
1327
        virt_max_cpus = GIC_NCPU;
1328 1329
    }

A
Andrew Jones 已提交
1330
    if (max_cpus > virt_max_cpus) {
1331 1332
        error_report("Number of SMP CPUs requested (%d) exceeds max CPUs "
                     "supported by machine 'mach-virt' (%d)",
A
Andrew Jones 已提交
1333
                     max_cpus, virt_max_cpus);
1334 1335 1336
        exit(1);
    }

1337
    vms->smp_cpus = smp_cpus;
P
Peter Maydell 已提交
1338

1339
    if (machine->ram_size > vms->memmap[VIRT_MEM].size) {
1340
        error_report("mach-virt: cannot model more than %dGB RAM", RAMLIMIT_GB);
P
Peter Maydell 已提交
1341 1342 1343
        exit(1);
    }

1344 1345 1346 1347 1348 1349
    if (vms->virt && kvm_enabled()) {
        error_report("mach-virt: KVM does not support providing "
                     "Virtualization extensions to the guest CPU");
        exit(1);
    }

1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366
    if (vms->secure) {
        if (kvm_enabled()) {
            error_report("mach-virt: KVM does not support Security extensions");
            exit(1);
        }

        /* The Secure view of the world is the same as the NonSecure,
         * but with a few extra devices. Create it as a container region
         * containing the system memory at low priority; any secure-only
         * devices go in at higher priority and take precedence.
         */
        secure_sysmem = g_new(MemoryRegion, 1);
        memory_region_init(secure_sysmem, OBJECT(machine), "secure-memory",
                           UINT64_MAX);
        memory_region_add_subregion_overlap(secure_sysmem, 0, sysmem, -1);
    }

1367
    create_fdt(vms);
P
Peter Maydell 已提交
1368

1369 1370 1371
    possible_cpus = mc->possible_cpu_arch_ids(machine);
    for (n = 0; n < possible_cpus->len; n++) {
        Object *cpuobj;
1372
        CPUState *cs;
1373

1374 1375 1376 1377
        if (n >= smp_cpus) {
            break;
        }

1378
        cpuobj = object_new(possible_cpus->cpus[n].type);
1379
        object_property_set_int(cpuobj, possible_cpus->cpus[n].arch_id,
1380
                                "mp-affinity", NULL);
1381

1382 1383 1384
        cs = CPU(cpuobj);
        cs->cpu_index = n;

1385 1386
        numa_cpu_pre_plug(&possible_cpus->cpus[cs->cpu_index], DEVICE(cpuobj),
                          &error_fatal);
1387

1388 1389 1390 1391
        if (!vms->secure) {
            object_property_set_bool(cpuobj, false, "has_el3", NULL);
        }

1392
        if (!vms->virt && object_property_find(cpuobj, "has_el2", NULL)) {
1393 1394 1395
            object_property_set_bool(cpuobj, false, "has_el2", NULL);
        }

1396 1397
        if (vms->psci_conduit != QEMU_PSCI_CONDUIT_DISABLED) {
            object_property_set_int(cpuobj, vms->psci_conduit,
1398
                                    "psci-conduit", NULL);
1399

1400 1401 1402 1403 1404
            /* Secondary CPUs start in PSCI powered-down state */
            if (n > 0) {
                object_property_set_bool(cpuobj, true,
                                         "start-powered-off", NULL);
            }
P
Peter Maydell 已提交
1405
        }
P
Peter Maydell 已提交
1406

1407 1408 1409 1410
        if (vmc->no_pmu && object_property_find(cpuobj, "pmu", NULL)) {
            object_property_set_bool(cpuobj, false, "pmu", NULL);
        }

P
Peter Maydell 已提交
1411
        if (object_property_find(cpuobj, "reset-cbar", NULL)) {
1412
            object_property_set_int(cpuobj, vms->memmap[VIRT_CPUPERIPHS].base,
P
Peter Maydell 已提交
1413 1414 1415
                                    "reset-cbar", &error_abort);
        }

1416 1417
        object_property_set_link(cpuobj, OBJECT(sysmem), "memory",
                                 &error_abort);
1418 1419 1420 1421
        if (vms->secure) {
            object_property_set_link(cpuobj, OBJECT(secure_sysmem),
                                     "secure-memory", &error_abort);
        }
1422

1423
        object_property_set_bool(cpuobj, true, "realized", &error_fatal);
1424
        object_unref(cpuobj);
P
Peter Maydell 已提交
1425
    }
1426
    fdt_add_timer_nodes(vms);
1427
    fdt_add_cpu_nodes(vms);
P
Peter Maydell 已提交
1428

1429 1430
    memory_region_allocate_system_memory(ram, NULL, "mach-virt.ram",
                                         machine->ram_size);
1431
    memory_region_add_subregion(sysmem, vms->memmap[VIRT_MEM].base, ram);
P
Peter Maydell 已提交
1432

1433
    create_flash(vms, sysmem, secure_sysmem ? secure_sysmem : sysmem);
1434

1435
    create_gic(vms, pic);
P
Peter Maydell 已提交
1436

1437
    fdt_add_pmu_nodes(vms);
1438

1439
    create_uart(vms, pic, VIRT_UART, sysmem, serial_hd(0));
1440 1441

    if (vms->secure) {
1442
        create_secure_ram(vms, secure_sysmem);
1443
        create_uart(vms, pic, VIRT_SECURE_UART, secure_sysmem, serial_hd(1));
1444
    }
P
Peter Maydell 已提交
1445

1446
    create_rtc(vms, pic);
P
Peter Maydell 已提交
1447

1448
    create_pcie(vms, pic);
1449

1450
    create_gpio(vms, pic);
S
Shannon Zhao 已提交
1451

P
Peter Maydell 已提交
1452 1453 1454 1455
    /* Create mmio transports, so the user can create virtio backends
     * (which will be automatically plugged in to the transports). If
     * no backend is created the transport will just sit harmlessly idle.
     */
1456
    create_virtio_devices(vms, pic);
P
Peter Maydell 已提交
1457

A
Andrew Jones 已提交
1458 1459
    vms->fw_cfg = create_fw_cfg(vms, &address_space_memory);
    rom_set_fw(vms->fw_cfg);
1460

1461
    create_platform_bus(vms, pic);
L
Laszlo Ersek 已提交
1462

1463 1464 1465 1466 1467 1468 1469 1470
    vms->bootinfo.ram_size = machine->ram_size;
    vms->bootinfo.kernel_filename = machine->kernel_filename;
    vms->bootinfo.kernel_cmdline = machine->kernel_cmdline;
    vms->bootinfo.initrd_filename = machine->initrd_filename;
    vms->bootinfo.nb_cpus = smp_cpus;
    vms->bootinfo.board_id = -1;
    vms->bootinfo.loader_start = vms->memmap[VIRT_MEM].base;
    vms->bootinfo.get_dtb = machvirt_dtb;
1471
    vms->bootinfo.skip_dtb_autoload = true;
1472 1473
    vms->bootinfo.firmware_loaded = firmware_loaded;
    arm_load_kernel(ARM_CPU(first_cpu), &vms->bootinfo);
1474

1475 1476
    vms->machine_done.notify = virt_machine_done;
    qemu_add_machine_init_done_notifier(&vms->machine_done);
P
Peter Maydell 已提交
1477 1478
}

1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492
static bool virt_get_secure(Object *obj, Error **errp)
{
    VirtMachineState *vms = VIRT_MACHINE(obj);

    return vms->secure;
}

static void virt_set_secure(Object *obj, bool value, Error **errp)
{
    VirtMachineState *vms = VIRT_MACHINE(obj);

    vms->secure = value;
}

1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506
static bool virt_get_virt(Object *obj, Error **errp)
{
    VirtMachineState *vms = VIRT_MACHINE(obj);

    return vms->virt;
}

static void virt_set_virt(Object *obj, bool value, Error **errp)
{
    VirtMachineState *vms = VIRT_MACHINE(obj);

    vms->virt = value;
}

1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520
static bool virt_get_highmem(Object *obj, Error **errp)
{
    VirtMachineState *vms = VIRT_MACHINE(obj);

    return vms->highmem;
}

static void virt_set_highmem(Object *obj, bool value, Error **errp)
{
    VirtMachineState *vms = VIRT_MACHINE(obj);

    vms->highmem = value;
}

1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534
static bool virt_get_its(Object *obj, Error **errp)
{
    VirtMachineState *vms = VIRT_MACHINE(obj);

    return vms->its;
}

static void virt_set_its(Object *obj, bool value, Error **errp)
{
    VirtMachineState *vms = VIRT_MACHINE(obj);

    vms->its = value;
}

1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552
static char *virt_get_gic_version(Object *obj, Error **errp)
{
    VirtMachineState *vms = VIRT_MACHINE(obj);
    const char *val = vms->gic_version == 3 ? "3" : "2";

    return g_strdup(val);
}

static void virt_set_gic_version(Object *obj, const char *value, Error **errp)
{
    VirtMachineState *vms = VIRT_MACHINE(obj);

    if (!strcmp(value, "3")) {
        vms->gic_version = 3;
    } else if (!strcmp(value, "2")) {
        vms->gic_version = 2;
    } else if (!strcmp(value, "host")) {
        vms->gic_version = 0; /* Will probe later */
1553 1554
    } else if (!strcmp(value, "max")) {
        vms->gic_version = -1; /* Will probe later */
1555
    } else {
1556
        error_setg(errp, "Invalid gic-version value");
1557
        error_append_hint(errp, "Valid values are 3, 2, host, max.\n");
1558 1559 1560
    }
}

1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588
static char *virt_get_iommu(Object *obj, Error **errp)
{
    VirtMachineState *vms = VIRT_MACHINE(obj);

    switch (vms->iommu) {
    case VIRT_IOMMU_NONE:
        return g_strdup("none");
    case VIRT_IOMMU_SMMUV3:
        return g_strdup("smmuv3");
    default:
        g_assert_not_reached();
    }
}

static void virt_set_iommu(Object *obj, const char *value, Error **errp)
{
    VirtMachineState *vms = VIRT_MACHINE(obj);

    if (!strcmp(value, "smmuv3")) {
        vms->iommu = VIRT_IOMMU_SMMUV3;
    } else if (!strcmp(value, "none")) {
        vms->iommu = VIRT_IOMMU_NONE;
    } else {
        error_setg(errp, "Invalid iommu value");
        error_append_hint(errp, "Valid values are none, smmuv3.\n");
    }
}

1589 1590 1591 1592 1593 1594 1595 1596 1597 1598
static CpuInstanceProperties
virt_cpu_index_to_props(MachineState *ms, unsigned cpu_index)
{
    MachineClass *mc = MACHINE_GET_CLASS(ms);
    const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms);

    assert(cpu_index < possible_cpus->len);
    return possible_cpus->cpus[cpu_index].props;
}

1599 1600 1601 1602 1603
static int64_t virt_get_default_cpu_node_id(const MachineState *ms, int idx)
{
    return idx % nb_numa_nodes;
}

1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617
static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms)
{
    int n;
    VirtMachineState *vms = VIRT_MACHINE(ms);

    if (ms->possible_cpus) {
        assert(ms->possible_cpus->len == max_cpus);
        return ms->possible_cpus;
    }

    ms->possible_cpus = g_malloc0(sizeof(CPUArchIdList) +
                                  sizeof(CPUArchId) * max_cpus);
    ms->possible_cpus->len = max_cpus;
    for (n = 0; n < ms->possible_cpus->len; n++) {
1618
        ms->possible_cpus->cpus[n].type = ms->cpu_type;
1619 1620 1621 1622 1623 1624 1625 1626
        ms->possible_cpus->cpus[n].arch_id =
            virt_cpu_mp_affinity(vms, n);
        ms->possible_cpus->cpus[n].props.has_thread_id = true;
        ms->possible_cpus->cpus[n].props.thread_id = n;
    }
    return ms->possible_cpus;
}

1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649
static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev,
                                        DeviceState *dev, Error **errp)
{
    VirtMachineState *vms = VIRT_MACHINE(hotplug_dev);

    if (vms->platform_bus_dev) {
        if (object_dynamic_cast(OBJECT(dev), TYPE_SYS_BUS_DEVICE)) {
            platform_bus_link_device(PLATFORM_BUS_DEVICE(vms->platform_bus_dev),
                                     SYS_BUS_DEVICE(dev));
        }
    }
}

static HotplugHandler *virt_machine_get_hotplug_handler(MachineState *machine,
                                                        DeviceState *dev)
{
    if (object_dynamic_cast(OBJECT(dev), TYPE_SYS_BUS_DEVICE)) {
        return HOTPLUG_HANDLER(machine);
    }

    return NULL;
}

1650 1651
static void virt_machine_class_init(ObjectClass *oc, void *data)
{
1652
    MachineClass *mc = MACHINE_CLASS(oc);
1653
    HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
1654 1655 1656 1657 1658 1659

    mc->init = machvirt_init;
    /* Start max_cpus at the maximum QEMU supports. We'll further restrict
     * it later in machvirt_init, where we have more information about the
     * configuration of the particular instance.
     */
1660
    mc->max_cpus = 255;
1661 1662
    machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_CALXEDA_XGMAC);
    machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_AMD_XGBE);
1663
    machine_class_allow_dynamic_sysbus_dev(mc, TYPE_RAMFB_DEVICE);
1664 1665 1666
    mc->block_default_type = IF_VIRTIO;
    mc->no_cdrom = 1;
    mc->pci_allow_0_address = true;
1667 1668
    /* We know we will never create a pre-ARMv7 CPU which needs 1K pages */
    mc->minimum_page_bits = 12;
1669
    mc->possible_cpu_arch_ids = virt_possible_cpu_arch_ids;
1670
    mc->cpu_index_to_instance_props = virt_cpu_index_to_props;
1671
    mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-a15");
1672
    mc->get_default_cpu_node_id = virt_get_default_cpu_node_id;
1673
    assert(!mc->get_hotplug_handler);
1674 1675
    mc->get_hotplug_handler = virt_machine_get_hotplug_handler;
    hc->plug = virt_machine_device_plug_cb;
1676 1677 1678 1679 1680 1681 1682 1683 1684
}

static const TypeInfo virt_machine_info = {
    .name          = TYPE_VIRT_MACHINE,
    .parent        = TYPE_MACHINE,
    .abstract      = true,
    .instance_size = sizeof(VirtMachineState),
    .class_size    = sizeof(VirtMachineClass),
    .class_init    = virt_machine_class_init,
1685 1686 1687 1688
    .interfaces = (InterfaceInfo[]) {
         { TYPE_HOTPLUG_HANDLER },
         { }
    },
1689 1690
};

1691 1692 1693 1694 1695 1696
static void machvirt_machine_init(void)
{
    type_register_static(&virt_machine_info);
}
type_init(machvirt_machine_init);

1697 1698 1699
#define VIRT_COMPAT_2_12 \
    HW_COMPAT_2_12

1700
static void virt_2_12_instance_init(Object *obj)
1701 1702
{
    VirtMachineState *vms = VIRT_MACHINE(obj);
1703
    VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
1704

1705 1706 1707 1708 1709
    /* EL3 is disabled by default on virt: this makes us consistent
     * between KVM and TCG for this board, and it also allows us to
     * boot UEFI blobs which assume no TrustZone support.
     */
    vms->secure = false;
1710 1711 1712 1713 1714 1715
    object_property_add_bool(obj, "secure", virt_get_secure,
                             virt_set_secure, NULL);
    object_property_set_description(obj, "secure",
                                    "Set on/off to enable/disable the ARM "
                                    "Security Extensions (TrustZone)",
                                    NULL);
1716

1717 1718 1719 1720 1721 1722 1723 1724 1725 1726
    /* EL2 is also disabled by default, for similar reasons */
    vms->virt = false;
    object_property_add_bool(obj, "virtualization", virt_get_virt,
                             virt_set_virt, NULL);
    object_property_set_description(obj, "virtualization",
                                    "Set on/off to enable/disable emulating a "
                                    "guest CPU which implements the ARM "
                                    "Virtualization Extensions",
                                    NULL);

1727 1728 1729 1730 1731 1732 1733 1734
    /* High memory is enabled by default */
    vms->highmem = true;
    object_property_add_bool(obj, "highmem", virt_get_highmem,
                             virt_set_highmem, NULL);
    object_property_set_description(obj, "highmem",
                                    "Set on/off to enable/disable using "
                                    "physical address space above 32 bits",
                                    NULL);
1735 1736 1737 1738 1739 1740 1741
    /* Default GIC type is v2 */
    vms->gic_version = 2;
    object_property_add_str(obj, "gic-version", virt_get_gic_version,
                        virt_set_gic_version, NULL);
    object_property_set_description(obj, "gic-version",
                                    "Set GIC version. "
                                    "Valid values are 2, 3 and host", NULL);
1742

1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755
    if (vmc->no_its) {
        vms->its = false;
    } else {
        /* Default allows ITS instantiation */
        vms->its = true;
        object_property_add_bool(obj, "its", virt_get_its,
                                 virt_set_its, NULL);
        object_property_set_description(obj, "its",
                                        "Set on/off to enable/disable "
                                        "ITS instantiation",
                                        NULL);
    }

1756 1757 1758 1759 1760 1761 1762 1763
    /* Default disallows iommu instantiation */
    vms->iommu = VIRT_IOMMU_NONE;
    object_property_add_str(obj, "iommu", virt_get_iommu, virt_set_iommu, NULL);
    object_property_set_description(obj, "iommu",
                                    "Set the IOMMU type. "
                                    "Valid values are none and smmuv3",
                                    NULL);

1764 1765
    vms->memmap = a15memmap;
    vms->irqmap = a15irqmap;
1766 1767
}

1768 1769
static void virt_machine_2_12_options(MachineClass *mc)
{
1770
    SET_MACHINE_COMPAT(mc, VIRT_COMPAT_2_12);
1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781
}
DEFINE_VIRT_MACHINE_AS_LATEST(2, 12)

#define VIRT_COMPAT_2_11 \
    HW_COMPAT_2_11

static void virt_2_11_instance_init(Object *obj)
{
    virt_2_12_instance_init(obj);
}

E
Eric Auger 已提交
1782 1783
static void virt_machine_2_11_options(MachineClass *mc)
{
1784 1785
    VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc));

1786 1787
    virt_machine_2_12_options(mc);
    SET_MACHINE_COMPAT(mc, VIRT_COMPAT_2_11);
1788
    vmc->smbios_old_sys_ver = true;
E
Eric Auger 已提交
1789
}
1790
DEFINE_VIRT_MACHINE(2, 11)
E
Eric Auger 已提交
1791 1792 1793 1794 1795 1796 1797 1798 1799

#define VIRT_COMPAT_2_10 \
    HW_COMPAT_2_10

static void virt_2_10_instance_init(Object *obj)
{
    virt_2_11_instance_init(obj);
}

E
Eric Auger 已提交
1800 1801
static void virt_machine_2_10_options(MachineClass *mc)
{
E
Eric Auger 已提交
1802 1803
    virt_machine_2_11_options(mc);
    SET_MACHINE_COMPAT(mc, VIRT_COMPAT_2_10);
E
Eric Auger 已提交
1804
}
E
Eric Auger 已提交
1805
DEFINE_VIRT_MACHINE(2, 10)
E
Eric Auger 已提交
1806 1807 1808 1809 1810 1811 1812 1813 1814

#define VIRT_COMPAT_2_9 \
    HW_COMPAT_2_9

static void virt_2_9_instance_init(Object *obj)
{
    virt_2_10_instance_init(obj);
}

P
Peter Maydell 已提交
1815 1816
static void virt_machine_2_9_options(MachineClass *mc)
{
E
Eric Auger 已提交
1817 1818
    virt_machine_2_10_options(mc);
    SET_MACHINE_COMPAT(mc, VIRT_COMPAT_2_9);
P
Peter Maydell 已提交
1819
}
E
Eric Auger 已提交
1820
DEFINE_VIRT_MACHINE(2, 9)
P
Peter Maydell 已提交
1821 1822 1823 1824 1825 1826 1827 1828 1829

#define VIRT_COMPAT_2_8 \
    HW_COMPAT_2_8

static void virt_2_8_instance_init(Object *obj)
{
    virt_2_9_instance_init(obj);
}

A
Andrew Jones 已提交
1830 1831
static void virt_machine_2_8_options(MachineClass *mc)
{
1832 1833
    VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc));

P
Peter Maydell 已提交
1834 1835
    virt_machine_2_9_options(mc);
    SET_MACHINE_COMPAT(mc, VIRT_COMPAT_2_8);
1836 1837 1838 1839
    /* For 2.8 and earlier we falsely claimed in the DT that
     * our timers were edge-triggered, not level-triggered.
     */
    vmc->claim_edge_triggered_timers = true;
A
Andrew Jones 已提交
1840
}
P
Peter Maydell 已提交
1841
DEFINE_VIRT_MACHINE(2, 8)
A
Andrew Jones 已提交
1842 1843 1844 1845 1846 1847 1848 1849 1850

#define VIRT_COMPAT_2_7 \
    HW_COMPAT_2_7

static void virt_2_7_instance_init(Object *obj)
{
    virt_2_8_instance_init(obj);
}

1851 1852
static void virt_machine_2_7_options(MachineClass *mc)
{
1853 1854
    VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc));

A
Andrew Jones 已提交
1855 1856
    virt_machine_2_8_options(mc);
    SET_MACHINE_COMPAT(mc, VIRT_COMPAT_2_7);
1857 1858
    /* ITS was introduced with 2.8 */
    vmc->no_its = true;
1859 1860
    /* Stick with 1K pages for migration compatibility */
    mc->minimum_page_bits = 0;
1861
}
A
Andrew Jones 已提交
1862
DEFINE_VIRT_MACHINE(2, 7)
1863 1864 1865 1866 1867 1868 1869 1870 1871

#define VIRT_COMPAT_2_6 \
    HW_COMPAT_2_6

static void virt_2_6_instance_init(Object *obj)
{
    virt_2_7_instance_init(obj);
}

1872
static void virt_machine_2_6_options(MachineClass *mc)
1873
{
1874 1875
    VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc));

1876 1877
    virt_machine_2_7_options(mc);
    SET_MACHINE_COMPAT(mc, VIRT_COMPAT_2_6);
1878
    vmc->disallow_affinity_adjustment = true;
1879 1880
    /* Disable PMU for 2.6 as PMU support was first introduced in 2.7 */
    vmc->no_pmu = true;
1881
}
1882
DEFINE_VIRT_MACHINE(2, 6)