spapr.c 77.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
/*
 * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator
 *
 * Copyright (c) 2004-2007 Fabrice Bellard
 * Copyright (c) 2007 Jocelyn Mayer
 * Copyright (c) 2010 David Gibson, IBM Corporation.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 *
 */
27
#include "sysemu/sysemu.h"
28
#include "sysemu/numa.h"
29
#include "hw/hw.h"
30
#include "hw/fw-path-provider.h"
31
#include "elf.h"
P
Paolo Bonzini 已提交
32
#include "net/net.h"
A
Andrew Jones 已提交
33
#include "sysemu/device_tree.h"
34
#include "sysemu/block-backend.h"
35 36
#include "sysemu/cpus.h"
#include "sysemu/kvm.h"
B
Bharata B Rao 已提交
37
#include "sysemu/device_tree.h"
38
#include "kvm_ppc.h"
39
#include "migration/migration.h"
40
#include "mmu-hash64.h"
41
#include "qom/cpu.h"
42 43

#include "hw/boards.h"
P
Paolo Bonzini 已提交
44
#include "hw/ppc/ppc.h"
45 46
#include "hw/loader.h"

P
Paolo Bonzini 已提交
47 48 49 50
#include "hw/ppc/spapr.h"
#include "hw/ppc/spapr_vio.h"
#include "hw/pci-host/spapr.h"
#include "hw/ppc/xics.h"
51
#include "hw/pci/msi.h"
52

53
#include "hw/pci/pci.h"
54 55
#include "hw/scsi/scsi.h"
#include "hw/virtio/virtio-scsi.h"
56

57
#include "exec/address-spaces.h"
58
#include "hw/usb.h"
59
#include "qemu/config-file.h"
60
#include "qemu/error-report.h"
61
#include "trace.h"
62
#include "hw/nmi.h"
A
Avi Kivity 已提交
63

64
#include "hw/compat.h"
D
David Gibson 已提交
65
#include "qemu-common.h"
66

67 68
#include <libfdt.h>

69 70 71 72 73 74 75 76 77 78
/* SLOF memory layout:
 *
 * SLOF raw image loaded at 0, copies its romfs right below the flat
 * device-tree, then position SLOF itself 31M below that
 *
 * So we set FW_OVERHEAD to 40MB which should account for all of that
 * and more
 *
 * We load our kernel at 4M, leaving space for SLOF initial image
 */
79
#define FDT_MAX_SIZE            0x100000
80
#define RTAS_MAX_SIZE           0x10000
81
#define RTAS_MAX_ADDR           0x80000000 /* RTAS must stay below that */
82 83
#define FW_MAX_SIZE             0x400000
#define FW_FILE_NAME            "slof.bin"
84 85
#define FW_OVERHEAD             0x2800000
#define KERNEL_LOAD_ADDR        FW_MAX_SIZE
86

87
#define MIN_RMA_SLOF            128UL
88 89 90

#define TIMEBASE_FREQ           512000000ULL

91 92
#define PHANDLE_XICP            0x00001111

93 94
#define HTAB_SIZE(spapr)        (1ULL << ((spapr)->htab_shift))

95
static XICSState *try_create_xics(const char *type, int nr_servers,
96
                                  int nr_irqs, Error **errp)
97
{
98
    Error *err = NULL;
99 100 101 102 103
    DeviceState *dev;

    dev = qdev_create(NULL, type);
    qdev_prop_set_uint32(dev, "nr_servers", nr_servers);
    qdev_prop_set_uint32(dev, "nr_irqs", nr_irqs);
104 105 106 107
    object_property_set_bool(OBJECT(dev), true, "realized", &err);
    if (err) {
        error_propagate(errp, err);
        object_unparent(OBJECT(dev));
108 109
        return NULL;
    }
110
    return XICS_COMMON(dev);
111 112
}

113 114
static XICSState *xics_system_init(MachineState *machine,
                                   int nr_servers, int nr_irqs)
115 116 117
{
    XICSState *icp = NULL;

118
    if (kvm_enabled()) {
119 120
        Error *err = NULL;

121
        if (machine_kernel_irqchip_allowed(machine)) {
122
            icp = try_create_xics(TYPE_KVM_XICS, nr_servers, nr_irqs, &err);
123
        }
124
        if (machine_kernel_irqchip_required(machine) && !icp) {
125 126
            error_report("kernel_irqchip requested but unavailable: %s",
                         error_get_pretty(err));
127
        }
128
        error_free(err);
129 130 131
    }

    if (!icp) {
132
        icp = try_create_xics(TYPE_XICS, nr_servers, nr_irqs, &error_abort);
133 134 135 136 137
    }

    return icp;
}

138 139 140 141 142 143 144 145
static int spapr_fixup_cpu_smt_dt(void *fdt, int offset, PowerPCCPU *cpu,
                                  int smt_threads)
{
    int i, ret = 0;
    uint32_t servers_prop[smt_threads];
    uint32_t gservers_prop[smt_threads * 2];
    int index = ppc_get_vcpu_dt_id(cpu);

146
    if (cpu->cpu_version) {
147
        ret = fdt_setprop_cell(fdt, offset, "cpu-version", cpu->cpu_version);
148 149 150 151 152
        if (ret < 0) {
            return ret;
        }
    }

153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
    /* Build interrupt servers and gservers properties */
    for (i = 0; i < smt_threads; i++) {
        servers_prop[i] = cpu_to_be32(index + i);
        /* Hack, direct the group queues back to cpu 0 */
        gservers_prop[i*2] = cpu_to_be32(index + i);
        gservers_prop[i*2 + 1] = 0;
    }
    ret = fdt_setprop(fdt, offset, "ibm,ppc-interrupt-server#s",
                      servers_prop, sizeof(servers_prop));
    if (ret < 0) {
        return ret;
    }
    ret = fdt_setprop(fdt, offset, "ibm,ppc-interrupt-gserver#s",
                      gservers_prop, sizeof(gservers_prop));

    return ret;
}

171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191
static int spapr_fixup_cpu_numa_dt(void *fdt, int offset, CPUState *cs)
{
    int ret = 0;
    PowerPCCPU *cpu = POWERPC_CPU(cs);
    int index = ppc_get_vcpu_dt_id(cpu);
    uint32_t associativity[] = {cpu_to_be32(0x5),
                                cpu_to_be32(0x0),
                                cpu_to_be32(0x0),
                                cpu_to_be32(0x0),
                                cpu_to_be32(cs->numa_node),
                                cpu_to_be32(index)};

    /* Advertise NUMA via ibm,associativity */
    if (nb_numa_nodes > 1) {
        ret = fdt_setprop(fdt, offset, "ibm,associativity", associativity,
                          sizeof(associativity));
    }

    return ret;
}

192
static int spapr_fixup_cpu_dt(void *fdt, sPAPRMachineState *spapr)
193
{
194 195
    int ret = 0, offset, cpus_offset;
    CPUState *cs;
196 197
    char cpu_model[32];
    int smt = kvmppc_smt_threads();
198
    uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
199

200 201 202 203
    CPU_FOREACH(cs) {
        PowerPCCPU *cpu = POWERPC_CPU(cs);
        DeviceClass *dc = DEVICE_GET_CLASS(cs);
        int index = ppc_get_vcpu_dt_id(cpu);
204

205
        if ((index % smt) != 0) {
206 207 208
            continue;
        }

209
        snprintf(cpu_model, 32, "%s@%x", dc->fw_name, index);
210

211 212 213 214 215 216 217 218 219
        cpus_offset = fdt_path_offset(fdt, "/cpus");
        if (cpus_offset < 0) {
            cpus_offset = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"),
                                          "cpus");
            if (cpus_offset < 0) {
                return cpus_offset;
            }
        }
        offset = fdt_subnode_offset(fdt, cpus_offset, cpu_model);
220
        if (offset < 0) {
221 222 223 224
            offset = fdt_add_subnode(fdt, cpus_offset, cpu_model);
            if (offset < 0) {
                return offset;
            }
225 226
        }

227 228
        ret = fdt_setprop(fdt, offset, "ibm,pft-size",
                          pft_size_prop, sizeof(pft_size_prop));
229 230 231
        if (ret < 0) {
            return ret;
        }
232

233 234 235 236 237
        ret = spapr_fixup_cpu_numa_dt(fdt, offset, cs);
        if (ret < 0) {
            return ret;
        }

238
        ret = spapr_fixup_cpu_smt_dt(fdt, offset, cpu,
239
                                     ppc_get_compat_smt_threads(cpu));
240 241 242
        if (ret < 0) {
            return ret;
        }
243 244 245 246
    }
    return ret;
}

247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280

static size_t create_page_sizes_prop(CPUPPCState *env, uint32_t *prop,
                                     size_t maxsize)
{
    size_t maxcells = maxsize / sizeof(uint32_t);
    int i, j, count;
    uint32_t *p = prop;

    for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
        struct ppc_one_seg_page_size *sps = &env->sps.sps[i];

        if (!sps->page_shift) {
            break;
        }
        for (count = 0; count < PPC_PAGE_SIZES_MAX_SZ; count++) {
            if (sps->enc[count].page_shift == 0) {
                break;
            }
        }
        if ((p - prop) >= (maxcells - 3 - count * 2)) {
            break;
        }
        *(p++) = cpu_to_be32(sps->page_shift);
        *(p++) = cpu_to_be32(sps->slb_enc);
        *(p++) = cpu_to_be32(count);
        for (j = 0; j < count; j++) {
            *(p++) = cpu_to_be32(sps->enc[j].page_shift);
            *(p++) = cpu_to_be32(sps->enc[j].pte_enc);
        }
    }

    return (p - prop) * sizeof(uint32_t);
}

281 282
static hwaddr spapr_node0_size(void)
{
283 284
    MachineState *machine = MACHINE(qdev_get_machine());

285 286 287 288
    if (nb_numa_nodes) {
        int i;
        for (i = 0; i < nb_numa_nodes; ++i) {
            if (numa_info[i].node_mem) {
289 290
                return MIN(pow2floor(numa_info[i].node_mem),
                           machine->ram_size);
291 292 293
            }
        }
    }
294
    return machine->ram_size;
295 296
}

297 298 299 300 301 302 303 304 305 306
#define _FDT(exp) \
    do { \
        int ret = (exp);                                           \
        if (ret < 0) {                                             \
            fprintf(stderr, "qemu: error creating device tree: %s: %s\n", \
                    #exp, fdt_strerror(ret));                      \
            exit(1);                                               \
        }                                                          \
    } while (0)

307 308 309 310
static void add_str(GString *s, const gchar *s1)
{
    g_string_append_len(s, s1, strlen(s1) + 1);
}
311

312
static void *spapr_create_fdt_skel(hwaddr initrd_base,
A
Avi Kivity 已提交
313 314
                                   hwaddr initrd_size,
                                   hwaddr kernel_size,
315
                                   bool little_endian,
316 317
                                   const char *kernel_cmdline,
                                   uint32_t epow_irq)
318 319 320 321
{
    void *fdt;
    uint32_t start_prop = cpu_to_be32(initrd_base);
    uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
322 323
    GString *hypertas = g_string_sized_new(256);
    GString *qemu_hypertas = g_string_sized_new(256);
324
    uint32_t refpoints[] = {cpu_to_be32(0x4), cpu_to_be32(0x4)};
325
    uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(max_cpus)};
326
    unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80};
327
    char *buf;
328

329 330 331 332 333 334 335 336 337 338 339
    add_str(hypertas, "hcall-pft");
    add_str(hypertas, "hcall-term");
    add_str(hypertas, "hcall-dabr");
    add_str(hypertas, "hcall-interrupt");
    add_str(hypertas, "hcall-tce");
    add_str(hypertas, "hcall-vio");
    add_str(hypertas, "hcall-splpar");
    add_str(hypertas, "hcall-bulk");
    add_str(hypertas, "hcall-set-mode");
    add_str(qemu_hypertas, "hcall-memop1");

340
    fdt = g_malloc0(FDT_MAX_SIZE);
341 342
    _FDT((fdt_create(fdt, FDT_MAX_SIZE)));

343 344 345 346 347 348
    if (kernel_size) {
        _FDT((fdt_add_reservemap_entry(fdt, KERNEL_LOAD_ADDR, kernel_size)));
    }
    if (initrd_size) {
        _FDT((fdt_add_reservemap_entry(fdt, initrd_base, initrd_size)));
    }
349 350 351 352 353
    _FDT((fdt_finish_reservemap(fdt)));

    /* Root node */
    _FDT((fdt_begin_node(fdt, "")));
    _FDT((fdt_property_string(fdt, "device_type", "chrp")));
354
    _FDT((fdt_property_string(fdt, "model", "IBM pSeries (emulated by qemu)")));
355
    _FDT((fdt_property_string(fdt, "compatible", "qemu,pseries")));
356

357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377
    /*
     * Add info to guest to indentify which host is it being run on
     * and what is the uuid of the guest
     */
    if (kvmppc_get_host_model(&buf)) {
        _FDT((fdt_property_string(fdt, "host-model", buf)));
        g_free(buf);
    }
    if (kvmppc_get_host_serial(&buf)) {
        _FDT((fdt_property_string(fdt, "host-serial", buf)));
        g_free(buf);
    }

    buf = g_strdup_printf(UUID_FMT, qemu_uuid[0], qemu_uuid[1],
                          qemu_uuid[2], qemu_uuid[3], qemu_uuid[4],
                          qemu_uuid[5], qemu_uuid[6], qemu_uuid[7],
                          qemu_uuid[8], qemu_uuid[9], qemu_uuid[10],
                          qemu_uuid[11], qemu_uuid[12], qemu_uuid[13],
                          qemu_uuid[14], qemu_uuid[15]);

    _FDT((fdt_property_string(fdt, "vm,uuid", buf)));
A
Alexey Kardashevskiy 已提交
378 379 380
    if (qemu_uuid_set) {
        _FDT((fdt_property_string(fdt, "system-id", buf)));
    }
381 382
    g_free(buf);

S
Sam Bobroff 已提交
383 384 385 386 387
    if (qemu_get_vm_name()) {
        _FDT((fdt_property_string(fdt, "ibm,partition-name",
                                  qemu_get_vm_name())));
    }

388 389 390 391 392 393
    _FDT((fdt_property_cell(fdt, "#address-cells", 0x2)));
    _FDT((fdt_property_cell(fdt, "#size-cells", 0x2)));

    /* /chosen */
    _FDT((fdt_begin_node(fdt, "chosen")));

394 395 396
    /* Set Form1_affinity */
    _FDT((fdt_property(fdt, "ibm,architecture-vec-5", vec5, sizeof(vec5))));

397 398 399 400 401
    _FDT((fdt_property_string(fdt, "bootargs", kernel_cmdline)));
    _FDT((fdt_property(fdt, "linux,initrd-start",
                       &start_prop, sizeof(start_prop))));
    _FDT((fdt_property(fdt, "linux,initrd-end",
                       &end_prop, sizeof(end_prop))));
402 403 404
    if (kernel_size) {
        uint64_t kprop[2] = { cpu_to_be64(KERNEL_LOAD_ADDR),
                              cpu_to_be64(kernel_size) };
405

406
        _FDT((fdt_property(fdt, "qemu,boot-kernel", &kprop, sizeof(kprop))));
407 408 409
        if (little_endian) {
            _FDT((fdt_property(fdt, "qemu,boot-kernel-le", NULL, 0)));
        }
410
    }
411 412 413
    if (boot_menu) {
        _FDT((fdt_property_cell(fdt, "qemu,boot-menu", boot_menu)));
    }
414 415 416
    _FDT((fdt_property_cell(fdt, "qemu,graphic-width", graphic_width)));
    _FDT((fdt_property_cell(fdt, "qemu,graphic-height", graphic_height)));
    _FDT((fdt_property_cell(fdt, "qemu,graphic-depth", graphic_depth)));
417

418 419
    _FDT((fdt_end_node(fdt)));

420 421 422
    /* RTAS */
    _FDT((fdt_begin_node(fdt, "rtas")));

423 424 425
    if (!kvm_enabled() || kvmppc_spapr_use_multitce()) {
        add_str(hypertas, "hcall-multi-tce");
    }
426 427 428 429 430 431
    _FDT((fdt_property(fdt, "ibm,hypertas-functions", hypertas->str,
                       hypertas->len)));
    g_string_free(hypertas, TRUE);
    _FDT((fdt_property(fdt, "qemu,hypertas-functions", qemu_hypertas->str,
                       qemu_hypertas->len)));
    g_string_free(qemu_hypertas, TRUE);
432

433 434 435
    _FDT((fdt_property(fdt, "ibm,associativity-reference-points",
        refpoints, sizeof(refpoints))));

436
    _FDT((fdt_property_cell(fdt, "rtas-error-log-max", RTAS_ERROR_LOG_MAX)));
437 438
    _FDT((fdt_property_cell(fdt, "rtas-event-scan-rate",
                            RTAS_EVENT_SCAN_RATE)));
439

440 441 442 443
    if (msi_supported) {
        _FDT((fdt_property(fdt, "ibm,change-msix-capable", NULL, 0)));
    }

444
    /*
445
     * According to PAPR, rtas ibm,os-term does not guarantee a return
446 447 448 449 450 451 452
     * back to the guest cpu.
     *
     * While an additional ibm,extended-os-term property indicates that
     * rtas call return will always occur. Set this property.
     */
    _FDT((fdt_property(fdt, "ibm,extended-os-term", NULL, 0)));

453 454
    _FDT((fdt_end_node(fdt)));

455
    /* interrupt controller */
456
    _FDT((fdt_begin_node(fdt, "interrupt-controller")));
457 458 459 460 461 462 463 464

    _FDT((fdt_property_string(fdt, "device_type",
                              "PowerPC-External-Interrupt-Presentation")));
    _FDT((fdt_property_string(fdt, "compatible", "IBM,ppc-xicp")));
    _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
    _FDT((fdt_property(fdt, "ibm,interrupt-server-ranges",
                       interrupt_server_ranges_prop,
                       sizeof(interrupt_server_ranges_prop))));
465 466 467
    _FDT((fdt_property_cell(fdt, "#interrupt-cells", 2)));
    _FDT((fdt_property_cell(fdt, "linux,phandle", PHANDLE_XICP)));
    _FDT((fdt_property_cell(fdt, "phandle", PHANDLE_XICP)));
468 469 470

    _FDT((fdt_end_node(fdt)));

471 472 473 474 475 476 477
    /* vdevice */
    _FDT((fdt_begin_node(fdt, "vdevice")));

    _FDT((fdt_property_string(fdt, "device_type", "vdevice")));
    _FDT((fdt_property_string(fdt, "compatible", "IBM,vdevice")));
    _FDT((fdt_property_cell(fdt, "#address-cells", 0x1)));
    _FDT((fdt_property_cell(fdt, "#size-cells", 0x0)));
478 479
    _FDT((fdt_property_cell(fdt, "#interrupt-cells", 0x2)));
    _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
480 481 482

    _FDT((fdt_end_node(fdt)));

483 484 485
    /* event-sources */
    spapr_events_fdt_skel(fdt, epow_irq);

486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505
    /* /hypervisor node */
    if (kvm_enabled()) {
        uint8_t hypercall[16];

        /* indicate KVM hypercall interface */
        _FDT((fdt_begin_node(fdt, "hypervisor")));
        _FDT((fdt_property_string(fdt, "compatible", "linux,kvm")));
        if (kvmppc_has_cap_fixup_hcalls()) {
            /*
             * Older KVM versions with older guest kernels were broken with the
             * magic page, don't allow the guest to map it.
             */
            kvmppc_get_hypercall(first_cpu->env_ptr, hypercall,
                                 sizeof(hypercall));
            _FDT((fdt_property(fdt, "hcall-instructions", hypercall,
                              sizeof(hypercall))));
        }
        _FDT((fdt_end_node(fdt)));
    }

506 507 508
    _FDT((fdt_end_node(fdt))); /* close root node */
    _FDT((fdt_finish(fdt)));

509 510 511
    return fdt;
}

512
static int spapr_populate_memory_node(void *fdt, int nodeid, hwaddr start,
513 514 515 516 517
                                       hwaddr size)
{
    uint32_t associativity[] = {
        cpu_to_be32(0x4), /* length */
        cpu_to_be32(0x0), cpu_to_be32(0x0),
518
        cpu_to_be32(0x0), cpu_to_be32(nodeid)
519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534
    };
    char mem_name[32];
    uint64_t mem_reg_property[2];
    int off;

    mem_reg_property[0] = cpu_to_be64(start);
    mem_reg_property[1] = cpu_to_be64(size);

    sprintf(mem_name, "memory@" TARGET_FMT_lx, start);
    off = fdt_add_subnode(fdt, 0, mem_name);
    _FDT(off);
    _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
    _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
                      sizeof(mem_reg_property))));
    _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
                      sizeof(associativity))));
535
    return off;
536 537
}

538
static int spapr_populate_memory(sPAPRMachineState *spapr, void *fdt)
539
{
540
    MachineState *machine = MACHINE(spapr);
541 542 543 544 545 546 547 548
    hwaddr mem_start, node_size;
    int i, nb_nodes = nb_numa_nodes;
    NodeInfo *nodes = numa_info;
    NodeInfo ramnode;

    /* No NUMA nodes, assume there is just one node with whole RAM */
    if (!nb_numa_nodes) {
        nb_nodes = 1;
549
        ramnode.node_mem = machine->ram_size;
550
        nodes = &ramnode;
551
    }
552

553 554 555 556
    for (i = 0, mem_start = 0; i < nb_nodes; ++i) {
        if (!nodes[i].node_mem) {
            continue;
        }
557
        if (mem_start >= machine->ram_size) {
558 559
            node_size = 0;
        } else {
560
            node_size = nodes[i].node_mem;
561 562
            if (node_size > machine->ram_size - mem_start) {
                node_size = machine->ram_size - mem_start;
563 564
            }
        }
565 566
        if (!mem_start) {
            /* ppc_spapr_init() checks for rma_size <= node0_size already */
567
            spapr_populate_memory_node(fdt, i, 0, spapr->rma_size);
568 569 570
            mem_start += spapr->rma_size;
            node_size -= spapr->rma_size;
        }
571 572 573 574 575 576 577 578 579 580 581 582
        for ( ; node_size; ) {
            hwaddr sizetmp = pow2floor(node_size);

            /* mem_start != 0 here */
            if (ctzl(mem_start) < ctzl(sizetmp)) {
                sizetmp = 1ULL << ctzl(mem_start);
            }

            spapr_populate_memory_node(fdt, i, mem_start, sizetmp);
            node_size -= sizetmp;
            mem_start += sizetmp;
        }
583 584 585 586 587
    }

    return 0;
}

588 589 590 591 592 593 594 595 596 597 598 599 600
static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset,
                                  sPAPRMachineState *spapr)
{
    PowerPCCPU *cpu = POWERPC_CPU(cs);
    CPUPPCState *env = &cpu->env;
    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
    int index = ppc_get_vcpu_dt_id(cpu);
    uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
                       0xffffffff, 0xffffffff};
    uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ;
    uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000;
    uint32_t page_sizes_prop[64];
    size_t page_sizes_prop_size;
601
    uint32_t vcpus_per_socket = smp_threads * smp_cores;
602 603
    uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};

604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621
    /* Note: we keep CI large pages off for now because a 64K capable guest
     * provisioned with large pages might otherwise try to map a qemu
     * framebuffer (or other kind of memory mapped PCI BAR) using 64K pages
     * even if that qemu runs on a 4k host.
     *
     * We can later add this bit back when we are confident this is not
     * an issue (!HV KVM or 64K host)
     */
    uint8_t pa_features_206[] = { 6, 0,
        0xf6, 0x1f, 0xc7, 0x00, 0x80, 0xc0 };
    uint8_t pa_features_207[] = { 24, 0,
        0xf6, 0x1f, 0xc7, 0xc0, 0x80, 0xf0,
        0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x80, 0x00,
        0x80, 0x00, 0x80, 0x00, 0x80, 0x00 };
    uint8_t *pa_features;
    size_t pa_size;

622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649
    _FDT((fdt_setprop_cell(fdt, offset, "reg", index)));
    _FDT((fdt_setprop_string(fdt, offset, "device_type", "cpu")));

    _FDT((fdt_setprop_cell(fdt, offset, "cpu-version", env->spr[SPR_PVR])));
    _FDT((fdt_setprop_cell(fdt, offset, "d-cache-block-size",
                           env->dcache_line_size)));
    _FDT((fdt_setprop_cell(fdt, offset, "d-cache-line-size",
                           env->dcache_line_size)));
    _FDT((fdt_setprop_cell(fdt, offset, "i-cache-block-size",
                           env->icache_line_size)));
    _FDT((fdt_setprop_cell(fdt, offset, "i-cache-line-size",
                           env->icache_line_size)));

    if (pcc->l1_dcache_size) {
        _FDT((fdt_setprop_cell(fdt, offset, "d-cache-size",
                               pcc->l1_dcache_size)));
    } else {
        fprintf(stderr, "Warning: Unknown L1 dcache size for cpu\n");
    }
    if (pcc->l1_icache_size) {
        _FDT((fdt_setprop_cell(fdt, offset, "i-cache-size",
                               pcc->l1_icache_size)));
    } else {
        fprintf(stderr, "Warning: Unknown L1 icache size for cpu\n");
    }

    _FDT((fdt_setprop_cell(fdt, offset, "timebase-frequency", tbfreq)));
    _FDT((fdt_setprop_cell(fdt, offset, "clock-frequency", cpufreq)));
650
    _FDT((fdt_setprop_cell(fdt, offset, "slb-size", env->slb_nr)));
651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687
    _FDT((fdt_setprop_cell(fdt, offset, "ibm,slb-size", env->slb_nr)));
    _FDT((fdt_setprop_string(fdt, offset, "status", "okay")));
    _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0)));

    if (env->spr_cb[SPR_PURR].oea_read) {
        _FDT((fdt_setprop(fdt, offset, "ibm,purr", NULL, 0)));
    }

    if (env->mmu_model & POWERPC_MMU_1TSEG) {
        _FDT((fdt_setprop(fdt, offset, "ibm,processor-segment-sizes",
                          segs, sizeof(segs))));
    }

    /* Advertise VMX/VSX (vector extensions) if available
     *   0 / no property == no vector extensions
     *   1               == VMX / Altivec available
     *   2               == VSX available */
    if (env->insns_flags & PPC_ALTIVEC) {
        uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;

        _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", vmx)));
    }

    /* Advertise DFP (Decimal Floating Point) if available
     *   0 / no property == no DFP
     *   1               == DFP available */
    if (env->insns_flags2 & PPC2_DFP) {
        _FDT((fdt_setprop_cell(fdt, offset, "ibm,dfp", 1)));
    }

    page_sizes_prop_size = create_page_sizes_prop(env, page_sizes_prop,
                                                  sizeof(page_sizes_prop));
    if (page_sizes_prop_size) {
        _FDT((fdt_setprop(fdt, offset, "ibm,segment-page-sizes",
                          page_sizes_prop, page_sizes_prop_size)));
    }

688 689 690 691 692 693 694 695 696 697 698 699 700
    /* Do the ibm,pa-features property, adjust it for ci-large-pages */
    if (env->mmu_model == POWERPC_MMU_2_06) {
        pa_features = pa_features_206;
        pa_size = sizeof(pa_features_206);
    } else /* env->mmu_model == POWERPC_MMU_2_07 */ {
        pa_features = pa_features_207;
        pa_size = sizeof(pa_features_207);
    }
    if (env->ci_large_pages) {
        pa_features[3] |= 0x20;
    }
    _FDT((fdt_setprop(fdt, offset, "ibm,pa-features", pa_features, pa_size)));

701
    _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id",
702
                           cs->cpu_index / vcpus_per_socket)));
703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748

    _FDT((fdt_setprop(fdt, offset, "ibm,pft-size",
                      pft_size_prop, sizeof(pft_size_prop))));

    _FDT(spapr_fixup_cpu_numa_dt(fdt, offset, cs));

    _FDT(spapr_fixup_cpu_smt_dt(fdt, offset, cpu,
                                ppc_get_compat_smt_threads(cpu)));
}

static void spapr_populate_cpus_dt_node(void *fdt, sPAPRMachineState *spapr)
{
    CPUState *cs;
    int cpus_offset;
    char *nodename;
    int smt = kvmppc_smt_threads();

    cpus_offset = fdt_add_subnode(fdt, 0, "cpus");
    _FDT(cpus_offset);
    _FDT((fdt_setprop_cell(fdt, cpus_offset, "#address-cells", 0x1)));
    _FDT((fdt_setprop_cell(fdt, cpus_offset, "#size-cells", 0x0)));

    /*
     * We walk the CPUs in reverse order to ensure that CPU DT nodes
     * created by fdt_add_subnode() end up in the right order in FDT
     * for the guest kernel the enumerate the CPUs correctly.
     */
    CPU_FOREACH_REVERSE(cs) {
        PowerPCCPU *cpu = POWERPC_CPU(cs);
        int index = ppc_get_vcpu_dt_id(cpu);
        DeviceClass *dc = DEVICE_GET_CLASS(cs);
        int offset;

        if ((index % smt) != 0) {
            continue;
        }

        nodename = g_strdup_printf("%s@%x", dc->fw_name, index);
        offset = fdt_add_subnode(fdt, cpus_offset, nodename);
        g_free(nodename);
        _FDT(offset);
        spapr_populate_cpu_dt(cs, fdt, offset, spapr);
    }

}

749 750 751 752 753 754 755 756 757 758 759
/*
 * Adds ibm,dynamic-reconfiguration-memory node.
 * Refer to docs/specs/ppc-spapr-hotplug.txt for the documentation
 * of this device tree node.
 */
static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt)
{
    MachineState *machine = MACHINE(spapr);
    int ret, i, offset;
    uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE;
    uint32_t prop_lmb_size[] = {0, cpu_to_be32(lmb_size)};
760
    uint32_t nr_lmbs = (machine->maxram_size - machine->ram_size)/lmb_size;
761
    uint32_t *int_buf, *cur_index, buf_len;
762
    int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1;
763

764 765 766 767 768 769
    /*
     * Allocate enough buffer size to fit in ibm,dynamic-memory
     * or ibm,associativity-lookup-arrays
     */
    buf_len = MAX(nr_lmbs * SPAPR_DR_LMB_LIST_ENTRY_SIZE + 1, nr_nodes * 4 + 2)
              * sizeof(uint32_t);
770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795
    cur_index = int_buf = g_malloc0(buf_len);

    offset = fdt_add_subnode(fdt, 0, "ibm,dynamic-reconfiguration-memory");

    ret = fdt_setprop(fdt, offset, "ibm,lmb-size", prop_lmb_size,
                    sizeof(prop_lmb_size));
    if (ret < 0) {
        goto out;
    }

    ret = fdt_setprop_cell(fdt, offset, "ibm,memory-flags-mask", 0xff);
    if (ret < 0) {
        goto out;
    }

    ret = fdt_setprop_cell(fdt, offset, "ibm,memory-preservation-time", 0x0);
    if (ret < 0) {
        goto out;
    }

    /* ibm,dynamic-memory */
    int_buf[0] = cpu_to_be32(nr_lmbs);
    cur_index++;
    for (i = 0; i < nr_lmbs; i++) {
        sPAPRDRConnector *drc;
        sPAPRDRConnectorClass *drck;
796
        uint64_t addr = i * lmb_size + spapr->hotplug_memory.base;;
797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824
        uint32_t *dynamic_memory = cur_index;

        drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB,
                                       addr/lmb_size);
        g_assert(drc);
        drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);

        dynamic_memory[0] = cpu_to_be32(addr >> 32);
        dynamic_memory[1] = cpu_to_be32(addr & 0xffffffff);
        dynamic_memory[2] = cpu_to_be32(drck->get_index(drc));
        dynamic_memory[3] = cpu_to_be32(0); /* reserved */
        dynamic_memory[4] = cpu_to_be32(numa_get_node(addr, NULL));
        if (addr < machine->ram_size ||
                    memory_region_present(get_system_memory(), addr)) {
            dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_ASSIGNED);
        } else {
            dynamic_memory[5] = cpu_to_be32(0);
        }

        cur_index += SPAPR_DR_LMB_LIST_ENTRY_SIZE;
    }
    ret = fdt_setprop(fdt, offset, "ibm,dynamic-memory", int_buf, buf_len);
    if (ret < 0) {
        goto out;
    }

    /* ibm,associativity-lookup-arrays */
    cur_index = int_buf;
825
    int_buf[0] = cpu_to_be32(nr_nodes);
826 827
    int_buf[1] = cpu_to_be32(4); /* Number of entries per associativity list */
    cur_index += 2;
828
    for (i = 0; i < nr_nodes; i++) {
829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890
        uint32_t associativity[] = {
            cpu_to_be32(0x0),
            cpu_to_be32(0x0),
            cpu_to_be32(0x0),
            cpu_to_be32(i)
        };
        memcpy(cur_index, associativity, sizeof(associativity));
        cur_index += 4;
    }
    ret = fdt_setprop(fdt, offset, "ibm,associativity-lookup-arrays", int_buf,
            (cur_index - int_buf) * sizeof(uint32_t));
out:
    g_free(int_buf);
    return ret;
}

int spapr_h_cas_compose_response(sPAPRMachineState *spapr,
                                 target_ulong addr, target_ulong size,
                                 bool cpu_update, bool memory_update)
{
    void *fdt, *fdt_skel;
    sPAPRDeviceTreeUpdateHeader hdr = { .version_id = 1 };
    sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(qdev_get_machine());

    size -= sizeof(hdr);

    /* Create sceleton */
    fdt_skel = g_malloc0(size);
    _FDT((fdt_create(fdt_skel, size)));
    _FDT((fdt_begin_node(fdt_skel, "")));
    _FDT((fdt_end_node(fdt_skel)));
    _FDT((fdt_finish(fdt_skel)));
    fdt = g_malloc0(size);
    _FDT((fdt_open_into(fdt_skel, fdt, size)));
    g_free(fdt_skel);

    /* Fixup cpu nodes */
    if (cpu_update) {
        _FDT((spapr_fixup_cpu_dt(fdt, spapr)));
    }

    /* Generate memory nodes or ibm,dynamic-reconfiguration-memory node */
    if (memory_update && smc->dr_lmb_enabled) {
        _FDT((spapr_populate_drconf_memory(spapr, fdt)));
    }

    /* Pack resulting tree */
    _FDT((fdt_pack(fdt)));

    if (fdt_totalsize(fdt) + sizeof(hdr) > size) {
        trace_spapr_cas_failed(size);
        return -1;
    }

    cpu_physical_memory_write(addr, &hdr, sizeof(hdr));
    cpu_physical_memory_write(addr + sizeof(hdr), fdt, fdt_totalsize(fdt));
    trace_spapr_cas_continue(fdt_totalsize(fdt) + sizeof(hdr));
    g_free(fdt);

    return 0;
}

891
static void spapr_finalize_fdt(sPAPRMachineState *spapr,
A
Avi Kivity 已提交
892 893 894
                               hwaddr fdt_addr,
                               hwaddr rtas_addr,
                               hwaddr rtas_size)
895
{
896
    MachineState *machine = MACHINE(qdev_get_machine());
B
Bharata B Rao 已提交
897
    sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine);
898
    const char *boot_device = machine->boot_order;
899 900 901
    int ret, i;
    size_t cb = 0;
    char *bootlist;
902
    void *fdt;
903
    sPAPRPHBState *phb;
904

905
    fdt = g_malloc(FDT_MAX_SIZE);
906 907 908

    /* open out the base tree into a temp buffer for the final tweaks */
    _FDT((fdt_open_into(spapr->fdt_skel, fdt, FDT_MAX_SIZE)));
909

910 911 912 913
    ret = spapr_populate_memory(spapr, fdt);
    if (ret < 0) {
        fprintf(stderr, "couldn't setup memory nodes in fdt\n");
        exit(1);
914 915
    }

916 917 918 919 920 921
    ret = spapr_populate_vdevice(spapr->vio_bus, fdt);
    if (ret < 0) {
        fprintf(stderr, "couldn't setup vio devices in fdt\n");
        exit(1);
    }

922 923 924 925 926 927 928 929
    if (object_resolve_path_type("", TYPE_SPAPR_RNG, NULL)) {
        ret = spapr_rng_populate_dt(fdt);
        if (ret < 0) {
            fprintf(stderr, "could not set up rng device in the fdt\n");
            exit(1);
        }
    }

930
    QLIST_FOREACH(phb, &spapr->phbs, list) {
931
        ret = spapr_populate_pci_dt(phb, PHANDLE_XICP, fdt);
932 933 934 935 936 937 938
    }

    if (ret < 0) {
        fprintf(stderr, "couldn't setup PCI devices in fdt\n");
        exit(1);
    }

939 940 941 942 943 944
    /* RTAS */
    ret = spapr_rtas_device_tree_setup(fdt, rtas_addr, rtas_size);
    if (ret < 0) {
        fprintf(stderr, "Couldn't set up RTAS device tree properties\n");
    }

945 946
    /* cpus */
    spapr_populate_cpus_dt_node(fdt, spapr);
947

948 949 950 951 952 953 954 955 956 957 958 959 960 961 962
    bootlist = get_boot_devices_list(&cb, true);
    if (cb && bootlist) {
        int offset = fdt_path_offset(fdt, "/chosen");
        if (offset < 0) {
            exit(1);
        }
        for (i = 0; i < cb; i++) {
            if (bootlist[i] == '\n') {
                bootlist[i] = ' ';
            }

        }
        ret = fdt_setprop_string(fdt, offset, "qemu,boot-list", bootlist);
    }

963 964 965 966 967 968 969 970 971
    if (boot_device && strlen(boot_device)) {
        int offset = fdt_path_offset(fdt, "/chosen");

        if (offset < 0) {
            exit(1);
        }
        fdt_setprop_string(fdt, offset, "qemu,boot-device", boot_device);
    }

972
    if (!spapr->has_graphics) {
973 974
        spapr_populate_chosen_stdout(fdt, spapr->vio_bus);
    }
975

B
Bharata B Rao 已提交
976 977 978 979
    if (smc->dr_lmb_enabled) {
        _FDT(spapr_drc_populate_dt(fdt, 0, NULL, SPAPR_DR_CONNECTOR_TYPE_LMB));
    }

980 981
    _FDT((fdt_pack(fdt)));

982
    if (fdt_totalsize(fdt) > FDT_MAX_SIZE) {
983 984
        error_report("FDT too big ! 0x%x bytes (max is 0x%x)",
                     fdt_totalsize(fdt), FDT_MAX_SIZE);
985 986 987
        exit(1);
    }

A
Andrew Jones 已提交
988
    qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt));
989
    cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt));
990

G
Gonglei 已提交
991
    g_free(bootlist);
992
    g_free(fdt);
993 994 995 996 997 998 999
}

static uint64_t translate_kernel_address(void *opaque, uint64_t addr)
{
    return (addr & 0x0fffffff) + KERNEL_LOAD_ADDR;
}

1000
static void emulate_spapr_hypercall(PowerPCCPU *cpu)
1001
{
1002 1003
    CPUPPCState *env = &cpu->env;

1004 1005 1006 1007
    if (msr_pr) {
        hcall_dprintf("Hypercall made with MSR[PR]=1\n");
        env->gpr[3] = H_PRIVILEGE;
    } else {
1008
        env->gpr[3] = spapr_hypercall(cpu, env->gpr[3], &env->gpr[4]);
1009
    }
1010 1011
}

1012 1013 1014 1015 1016 1017
#define HPTE(_table, _i)   (void *)(((uint64_t *)(_table)) + ((_i) * 2))
#define HPTE_VALID(_hpte)  (tswap64(*((uint64_t *)(_hpte))) & HPTE64_V_VALID)
#define HPTE_DIRTY(_hpte)  (tswap64(*((uint64_t *)(_hpte))) & HPTE64_V_HPTE_DIRTY)
#define CLEAN_HPTE(_hpte)  ((*(uint64_t *)(_hpte)) &= tswap64(~HPTE64_V_HPTE_DIRTY))
#define DIRTY_HPTE(_hpte)  ((*(uint64_t *)(_hpte)) |= tswap64(HPTE64_V_HPTE_DIRTY))

1018
static void spapr_alloc_htab(sPAPRMachineState *spapr)
1019 1020
{
    long shift;
1021
    int index;
1022 1023 1024 1025 1026 1027

    /* allocate hash page table.  For now we always make this 16mb,
     * later we should probably make it scale to the size of guest
     * RAM */

    shift = kvmppc_reset_htab(spapr->htab_shift);
1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040
    if (shift < 0) {
        /*
         * For HV KVM, host kernel will return -ENOMEM when requested
         * HTAB size can't be allocated.
         */
        error_setg(&error_abort, "Failed to allocate HTAB of requested size, try with smaller maxmem");
    } else if (shift > 0) {
        /*
         * Kernel handles htab, we don't need to allocate one
         *
         * Older kernels can fall back to lower HTAB shift values,
         * but we don't allow booting of such guests.
         */
1041 1042 1043 1044
        if (shift != spapr->htab_shift) {
            error_setg(&error_abort, "Failed to allocate HTAB of requested size, try with smaller maxmem");
        }

1045
        spapr->htab_shift = shift;
1046
        kvmppc_kern_htab = true;
1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069
    } else {
        /* Allocate htab */
        spapr->htab = qemu_memalign(HTAB_SIZE(spapr), HTAB_SIZE(spapr));

        /* And clear it */
        memset(spapr->htab, 0, HTAB_SIZE(spapr));

        for (index = 0; index < HTAB_SIZE(spapr) / HASH_PTE_SIZE_64; index++) {
            DIRTY_HPTE(HPTE(spapr->htab, index));
        }
    }
}

/*
 * Clear HTAB entries during reset.
 *
 * If host kernel has allocated HTAB, KVM_PPC_ALLOCATE_HTAB ioctl is
 * used to clear HTAB. Otherwise QEMU-allocated HTAB is cleared manually.
 */
static void spapr_reset_htab(sPAPRMachineState *spapr)
{
    long shift;
    int index;
1070

1071
    shift = kvmppc_reset_htab(spapr->htab_shift);
1072 1073 1074
    if (shift < 0) {
        error_setg(&error_abort, "Failed to reset HTAB");
    } else if (shift > 0) {
1075 1076 1077 1078
        if (shift != spapr->htab_shift) {
            error_setg(&error_abort, "Requested HTAB allocation failed during reset");
        }

1079 1080 1081 1082
        /* Tell readers to update their file descriptor */
        if (spapr->htab_fd >= 0) {
            spapr->htab_fd_stale = true;
        }
1083 1084
    } else {
        memset(spapr->htab, 0, HTAB_SIZE(spapr));
1085 1086 1087 1088

        for (index = 0; index < HTAB_SIZE(spapr) / HASH_PTE_SIZE_64; index++) {
            DIRTY_HPTE(HPTE(spapr->htab, index));
        }
1089 1090 1091 1092
    }

    /* Update the RMA size if necessary */
    if (spapr->vrma_adjust) {
1093 1094
        spapr->rma_size = kvmppc_rma_size(spapr_node0_size(),
                                          spapr->htab_shift);
1095
    }
1096 1097
}

1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114
static int find_unknown_sysbus_device(SysBusDevice *sbdev, void *opaque)
{
    bool matched = false;

    if (object_dynamic_cast(OBJECT(sbdev), TYPE_SPAPR_PCI_HOST_BRIDGE)) {
        matched = true;
    }

    if (!matched) {
        error_report("Device %s is not supported by this machine yet.",
                     qdev_fw_name(DEVICE(sbdev)));
        exit(1);
    }

    return 0;
}

1115 1116 1117 1118
/*
 * A guest reset will cause spapr->htab_fd to become stale if being used.
 * Reopen the file descriptor to make sure the whole HTAB is properly read.
 */
1119
static int spapr_check_htab_fd(sPAPRMachineState *spapr)
1120 1121 1122 1123 1124 1125 1126 1127
{
    int rc = 0;

    if (spapr->htab_fd_stale) {
        close(spapr->htab_fd);
        spapr->htab_fd = kvmppc_get_htab_fd(false);
        if (spapr->htab_fd < 0) {
            error_report("Unable to open fd for reading hash table from KVM: "
1128
                         "%s", strerror(errno));
1129 1130 1131 1132 1133 1134 1135 1136
            rc = -1;
        }
        spapr->htab_fd_stale = false;
    }

    return rc;
}

1137
static void ppc_spapr_reset(void)
1138
{
1139
    sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
1140
    PowerPCCPU *first_ppc_cpu;
1141
    uint32_t rtas_limit;
1142

1143 1144 1145
    /* Check for unknown sysbus devices */
    foreach_dynamic_sysbus_device(find_unknown_sysbus_device, NULL);

1146 1147
    /* Reset the hash table & recalc the RMA */
    spapr_reset_htab(spapr);
1148

1149
    qemu_devices_reset();
1150

1151 1152 1153 1154 1155 1156 1157 1158 1159
    /*
     * We place the device tree and RTAS just below either the top of the RMA,
     * or just below 2GB, whichever is lowere, so that it can be
     * processed with 32-bit real mode code if necessary
     */
    rtas_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR);
    spapr->rtas_addr = rtas_limit - RTAS_MAX_SIZE;
    spapr->fdt_addr = spapr->rtas_addr - FDT_MAX_SIZE;

1160 1161 1162 1163
    /* Load the fdt */
    spapr_finalize_fdt(spapr, spapr->fdt_addr, spapr->rtas_addr,
                       spapr->rtas_size);

1164 1165 1166 1167
    /* Copy RTAS over */
    cpu_physical_memory_write(spapr->rtas_addr, spapr->rtas_blob,
                              spapr->rtas_size);

1168
    /* Set up the entry state */
1169 1170 1171 1172
    first_ppc_cpu = POWERPC_CPU(first_cpu);
    first_ppc_cpu->env.gpr[3] = spapr->fdt_addr;
    first_ppc_cpu->env.gpr[5] = 0;
    first_cpu->halted = 0;
1173
    first_ppc_cpu->env.nip = SPAPR_ENTRY_POINT;
1174 1175 1176

}

1177 1178
static void spapr_cpu_reset(void *opaque)
{
1179
    sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
1180
    PowerPCCPU *cpu = opaque;
1181
    CPUState *cs = CPU(cpu);
1182
    CPUPPCState *env = &cpu->env;
1183

1184
    cpu_reset(cs);
1185 1186 1187 1188

    /* All CPUs start halted.  CPU0 is unhalted from the machine level
     * reset code and the rest are explicitly started up by the guest
     * using an RTAS call */
1189
    cs->halted = 1;
1190 1191

    env->spr[SPR_HIOR] = 0;
1192

1193
    env->external_htab = (uint8_t *)spapr->htab;
1194 1195 1196 1197 1198 1199 1200
    if (kvm_enabled() && !env->external_htab) {
        /*
         * HV KVM, set external_htab to 1 so our ppc_hash64_load_hpte*
         * functions do the right thing.
         */
        env->external_htab = (void *)1;
    }
1201
    env->htab_base = -1;
1202 1203 1204 1205 1206 1207
    /*
     * htab_mask is the mask used to normalize hash value to PTEG index.
     * htab_shift is log2 of hash table size.
     * We have 8 hpte per group, and each hpte is 16 bytes.
     * ie have 128 bytes per hpte entry.
     */
1208
    env->htab_mask = (1ULL << (spapr->htab_shift - 7)) - 1;
1209
    env->spr[SPR_SDR1] = (target_ulong)(uintptr_t)spapr->htab |
1210
        (spapr->htab_shift - 18);
1211 1212
}

1213
static void spapr_create_nvram(sPAPRMachineState *spapr)
D
David Gibson 已提交
1214
{
1215
    DeviceState *dev = qdev_create(&spapr->vio_bus->bus, "spapr-nvram");
P
Paolo Bonzini 已提交
1216
    DriveInfo *dinfo = drive_get(IF_PFLASH, 0, 0);
D
David Gibson 已提交
1217

P
Paolo Bonzini 已提交
1218
    if (dinfo) {
1219 1220
        qdev_prop_set_drive(dev, "drive", blk_by_legacy_dinfo(dinfo),
                            &error_fatal);
D
David Gibson 已提交
1221 1222 1223 1224 1225 1226 1227
    }

    qdev_init_nofail(dev);

    spapr->nvram = (struct sPAPRNVRAM *)dev;
}

1228
static void spapr_rtc_create(sPAPRMachineState *spapr)
1229 1230 1231 1232 1233
{
    DeviceState *dev = qdev_create(NULL, TYPE_SPAPR_RTC);

    qdev_init_nofail(dev);
    spapr->rtc = dev;
D
David Gibson 已提交
1234 1235 1236

    object_property_add_alias(qdev_get_machine(), "rtc-time",
                              OBJECT(spapr->rtc), "date", NULL);
1237 1238
}

1239
/* Returns whether we want to use VGA or not */
1240 1241
static int spapr_vga_init(PCIBus *pci_bus)
{
1242 1243
    switch (vga_interface_type) {
    case VGA_NONE:
1244 1245 1246
        return false;
    case VGA_DEVICE:
        return true;
1247
    case VGA_STD:
1248
    case VGA_VIRTIO:
1249
        return pci_vga_init(pci_bus) != NULL;
1250
    default:
1251 1252
        fprintf(stderr, "This vga model is not supported,"
                "currently it only supports -vga std\n");
1253
        exit(0);
1254 1255 1256
    }
}

1257 1258
static int spapr_post_load(void *opaque, int version_id)
{
1259
    sPAPRMachineState *spapr = (sPAPRMachineState *)opaque;
1260 1261
    int err = 0;

S
Stefan Weil 已提交
1262
    /* In earlier versions, there was no separate qdev for the PAPR
1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277
     * RTC, so the RTC offset was stored directly in sPAPREnvironment.
     * So when migrating from those versions, poke the incoming offset
     * value into the RTC device */
    if (version_id < 3) {
        err = spapr_rtc_import_offset(spapr->rtc, spapr->rtc_offset);
    }

    return err;
}

static bool version_before_3(void *opaque, int version_id)
{
    return version_id < 3;
}

1278 1279
static const VMStateDescription vmstate_spapr = {
    .name = "spapr",
1280
    .version_id = 3,
1281
    .minimum_version_id = 1,
1282
    .post_load = spapr_post_load,
1283
    .fields = (VMStateField[]) {
1284 1285
        /* used to be @next_irq */
        VMSTATE_UNUSED_BUFFER(version_before_3, 0, 4),
1286 1287

        /* RTC offset */
1288
        VMSTATE_UINT64_TEST(rtc_offset, sPAPRMachineState, version_before_3),
1289

1290
        VMSTATE_PPC_TIMEBASE_V(tb, sPAPRMachineState, 2),
1291 1292 1293 1294 1295 1296
        VMSTATE_END_OF_LIST()
    },
};

static int htab_save_setup(QEMUFile *f, void *opaque)
{
1297
    sPAPRMachineState *spapr = opaque;
1298 1299 1300 1301

    /* "Iteration" header */
    qemu_put_be32(f, spapr->htab_shift);

1302 1303 1304 1305 1306 1307 1308
    if (spapr->htab) {
        spapr->htab_save_index = 0;
        spapr->htab_first_pass = true;
    } else {
        assert(kvm_enabled());

        spapr->htab_fd = kvmppc_get_htab_fd(false);
1309
        spapr->htab_fd_stale = false;
1310 1311 1312 1313 1314 1315 1316 1317
        if (spapr->htab_fd < 0) {
            fprintf(stderr, "Unable to open fd for reading hash table from KVM: %s\n",
                    strerror(errno));
            return -1;
        }
    }


1318 1319 1320
    return 0;
}

1321
static void htab_save_first_pass(QEMUFile *f, sPAPRMachineState *spapr,
1322 1323 1324 1325
                                 int64_t max_ns)
{
    int htabslots = HTAB_SIZE(spapr) / HASH_PTE_SIZE_64;
    int index = spapr->htab_save_index;
1326
    int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341

    assert(spapr->htab_first_pass);

    do {
        int chunkstart;

        /* Consume invalid HPTEs */
        while ((index < htabslots)
               && !HPTE_VALID(HPTE(spapr->htab, index))) {
            index++;
            CLEAN_HPTE(HPTE(spapr->htab, index));
        }

        /* Consume valid HPTEs */
        chunkstart = index;
1342
        while ((index < htabslots) && (index - chunkstart < USHRT_MAX)
1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356
               && HPTE_VALID(HPTE(spapr->htab, index))) {
            index++;
            CLEAN_HPTE(HPTE(spapr->htab, index));
        }

        if (index > chunkstart) {
            int n_valid = index - chunkstart;

            qemu_put_be32(f, chunkstart);
            qemu_put_be16(f, n_valid);
            qemu_put_be16(f, 0);
            qemu_put_buffer(f, HPTE(spapr->htab, chunkstart),
                            HASH_PTE_SIZE_64 * n_valid);

1357
            if ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) > max_ns) {
1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370
                break;
            }
        }
    } while ((index < htabslots) && !qemu_file_rate_limit(f));

    if (index >= htabslots) {
        assert(index == htabslots);
        index = 0;
        spapr->htab_first_pass = false;
    }
    spapr->htab_save_index = index;
}

1371
static int htab_save_later_pass(QEMUFile *f, sPAPRMachineState *spapr,
1372
                                int64_t max_ns)
1373 1374 1375 1376 1377
{
    bool final = max_ns < 0;
    int htabslots = HTAB_SIZE(spapr) / HASH_PTE_SIZE_64;
    int examined = 0, sent = 0;
    int index = spapr->htab_save_index;
1378
    int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393

    assert(!spapr->htab_first_pass);

    do {
        int chunkstart, invalidstart;

        /* Consume non-dirty HPTEs */
        while ((index < htabslots)
               && !HPTE_DIRTY(HPTE(spapr->htab, index))) {
            index++;
            examined++;
        }

        chunkstart = index;
        /* Consume valid dirty HPTEs */
1394
        while ((index < htabslots) && (index - chunkstart < USHRT_MAX)
1395 1396 1397 1398 1399 1400 1401 1402 1403
               && HPTE_DIRTY(HPTE(spapr->htab, index))
               && HPTE_VALID(HPTE(spapr->htab, index))) {
            CLEAN_HPTE(HPTE(spapr->htab, index));
            index++;
            examined++;
        }

        invalidstart = index;
        /* Consume invalid dirty HPTEs */
1404
        while ((index < htabslots) && (index - invalidstart < USHRT_MAX)
1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422
               && HPTE_DIRTY(HPTE(spapr->htab, index))
               && !HPTE_VALID(HPTE(spapr->htab, index))) {
            CLEAN_HPTE(HPTE(spapr->htab, index));
            index++;
            examined++;
        }

        if (index > chunkstart) {
            int n_valid = invalidstart - chunkstart;
            int n_invalid = index - invalidstart;

            qemu_put_be32(f, chunkstart);
            qemu_put_be16(f, n_valid);
            qemu_put_be16(f, n_invalid);
            qemu_put_buffer(f, HPTE(spapr->htab, chunkstart),
                            HASH_PTE_SIZE_64 * n_valid);
            sent += index - chunkstart;

1423
            if (!final && (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) > max_ns) {
1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444
                break;
            }
        }

        if (examined >= htabslots) {
            break;
        }

        if (index >= htabslots) {
            assert(index == htabslots);
            index = 0;
        }
    } while ((examined < htabslots) && (!qemu_file_rate_limit(f) || final));

    if (index >= htabslots) {
        assert(index == htabslots);
        index = 0;
    }

    spapr->htab_save_index = index;

1445
    return (examined >= htabslots) && (sent == 0) ? 1 : 0;
1446 1447
}

1448 1449 1450
#define MAX_ITERATION_NS    5000000 /* 5 ms */
#define MAX_KVM_BUF_SIZE    2048

1451 1452
static int htab_save_iterate(QEMUFile *f, void *opaque)
{
1453
    sPAPRMachineState *spapr = opaque;
1454
    int rc = 0;
1455 1456 1457 1458

    /* Iteration header */
    qemu_put_be32(f, 0);

1459 1460 1461
    if (!spapr->htab) {
        assert(kvm_enabled());

1462 1463 1464 1465 1466
        rc = spapr_check_htab_fd(spapr);
        if (rc < 0) {
            return rc;
        }

1467 1468 1469 1470 1471 1472
        rc = kvmppc_save_htab(f, spapr->htab_fd,
                              MAX_KVM_BUF_SIZE, MAX_ITERATION_NS);
        if (rc < 0) {
            return rc;
        }
    } else  if (spapr->htab_first_pass) {
1473 1474
        htab_save_first_pass(f, spapr, MAX_ITERATION_NS);
    } else {
1475
        rc = htab_save_later_pass(f, spapr, MAX_ITERATION_NS);
1476 1477 1478 1479 1480 1481 1482
    }

    /* End marker */
    qemu_put_be32(f, 0);
    qemu_put_be16(f, 0);
    qemu_put_be16(f, 0);

1483
    return rc;
1484 1485 1486 1487
}

static int htab_save_complete(QEMUFile *f, void *opaque)
{
1488
    sPAPRMachineState *spapr = opaque;
1489 1490 1491 1492

    /* Iteration header */
    qemu_put_be32(f, 0);

1493 1494 1495 1496 1497
    if (!spapr->htab) {
        int rc;

        assert(kvm_enabled());

1498 1499 1500 1501 1502
        rc = spapr_check_htab_fd(spapr);
        if (rc < 0) {
            return rc;
        }

1503 1504 1505 1506 1507 1508 1509 1510 1511
        rc = kvmppc_save_htab(f, spapr->htab_fd, MAX_KVM_BUF_SIZE, -1);
        if (rc < 0) {
            return rc;
        }
        close(spapr->htab_fd);
        spapr->htab_fd = -1;
    } else {
        htab_save_later_pass(f, spapr, -1);
    }
1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522

    /* End marker */
    qemu_put_be32(f, 0);
    qemu_put_be16(f, 0);
    qemu_put_be16(f, 0);

    return 0;
}

static int htab_load(QEMUFile *f, void *opaque, int version_id)
{
1523
    sPAPRMachineState *spapr = opaque;
1524
    uint32_t section_hdr;
1525
    int fd = -1;
1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536

    if (version_id < 1 || version_id > 1) {
        fprintf(stderr, "htab_load() bad version\n");
        return -EINVAL;
    }

    section_hdr = qemu_get_be32(f);

    if (section_hdr) {
        /* First section, just the hash shift */
        if (spapr->htab_shift != section_hdr) {
1537 1538
            error_report("htab_shift mismatch: source %d target %d",
                         section_hdr, spapr->htab_shift);
1539 1540 1541 1542 1543
            return -EINVAL;
        }
        return 0;
    }

1544 1545 1546 1547 1548 1549 1550 1551 1552 1553
    if (!spapr->htab) {
        assert(kvm_enabled());

        fd = kvmppc_get_htab_fd(true);
        if (fd < 0) {
            fprintf(stderr, "Unable to open fd to restore KVM hash table: %s\n",
                    strerror(errno));
        }
    }

1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566
    while (true) {
        uint32_t index;
        uint16_t n_valid, n_invalid;

        index = qemu_get_be32(f);
        n_valid = qemu_get_be16(f);
        n_invalid = qemu_get_be16(f);

        if ((index == 0) && (n_valid == 0) && (n_invalid == 0)) {
            /* End of Stream */
            break;
        }

1567
        if ((index + n_valid + n_invalid) >
1568 1569 1570
            (HTAB_SIZE(spapr) / HASH_PTE_SIZE_64)) {
            /* Bad index in stream */
            fprintf(stderr, "htab_load() bad index %d (%hd+%hd entries) "
1571 1572
                    "in htab stream (htab_shift=%d)\n", index, n_valid, n_invalid,
                    spapr->htab_shift);
1573 1574 1575
            return -EINVAL;
        }

1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593
        if (spapr->htab) {
            if (n_valid) {
                qemu_get_buffer(f, HPTE(spapr->htab, index),
                                HASH_PTE_SIZE_64 * n_valid);
            }
            if (n_invalid) {
                memset(HPTE(spapr->htab, index + n_valid), 0,
                       HASH_PTE_SIZE_64 * n_invalid);
            }
        } else {
            int rc;

            assert(fd >= 0);

            rc = kvmppc_load_htab_chunk(f, fd, index, n_valid, n_invalid);
            if (rc < 0) {
                return rc;
            }
1594 1595 1596
        }
    }

1597 1598 1599 1600 1601
    if (!spapr->htab) {
        assert(fd >= 0);
        close(fd);
    }

1602 1603 1604 1605 1606 1607
    return 0;
}

static SaveVMHandlers savevm_htab_handlers = {
    .save_live_setup = htab_save_setup,
    .save_live_iterate = htab_save_iterate,
1608
    .save_live_complete_precopy = htab_save_complete,
1609 1610 1611
    .load_state = htab_load,
};

1612 1613 1614 1615 1616 1617 1618
static void spapr_boot_set(void *opaque, const char *boot_device,
                           Error **errp)
{
    MachineState *machine = MACHINE(qdev_get_machine());
    machine->boot_order = g_strdup(boot_device);
}

1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646
static void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu)
{
    CPUPPCState *env = &cpu->env;

    /* Set time-base frequency to 512 MHz */
    cpu_ppc_tb_init(env, TIMEBASE_FREQ);

    /* PAPR always has exception vectors in RAM not ROM. To ensure this,
     * MSR[IP] should never be set.
     */
    env->msr_mask &= ~(1 << 6);

    /* Tell KVM that we're in PAPR mode */
    if (kvm_enabled()) {
        kvmppc_set_papr(cpu);
    }

    if (cpu->max_compat) {
        if (ppc_set_compat(cpu, cpu->max_compat) < 0) {
            exit(1);
        }
    }

    xics_cpu_setup(spapr->icp, cpu);

    qemu_register_reset(spapr_cpu_reset, cpu);
}

D
David Gibson 已提交
1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668
/*
 * Reset routine for LMB DR devices.
 *
 * Unlike PCI DR devices, LMB DR devices explicitly register this reset
 * routine. Reset for PCI DR devices will be handled by PHB reset routine
 * when it walks all its children devices. LMB devices reset occurs
 * as part of spapr_ppc_reset().
 */
static void spapr_drc_reset(void *opaque)
{
    sPAPRDRConnector *drc = opaque;
    DeviceState *d = DEVICE(drc);

    if (d) {
        device_reset(d);
    }
}

static void spapr_create_lmb_dr_connectors(sPAPRMachineState *spapr)
{
    MachineState *machine = MACHINE(spapr);
    uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE;
1669
    uint32_t nr_lmbs = (machine->maxram_size - machine->ram_size)/lmb_size;
D
David Gibson 已提交
1670 1671 1672 1673 1674 1675
    int i;

    for (i = 0; i < nr_lmbs; i++) {
        sPAPRDRConnector *drc;
        uint64_t addr;

1676
        addr = i * lmb_size + spapr->hotplug_memory.base;
D
David Gibson 已提交
1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712
        drc = spapr_dr_connector_new(OBJECT(spapr), SPAPR_DR_CONNECTOR_TYPE_LMB,
                                     addr/lmb_size);
        qemu_register_reset(spapr_drc_reset, drc);
    }
}

/*
 * If RAM size, maxmem size and individual node mem sizes aren't aligned
 * to SPAPR_MEMORY_BLOCK_SIZE(256MB), then refuse to start the guest
 * since we can't support such unaligned sizes with DRCONF_MEMORY.
 */
static void spapr_validate_node_memory(MachineState *machine)
{
    int i;

    if (machine->maxram_size % SPAPR_MEMORY_BLOCK_SIZE ||
        machine->ram_size % SPAPR_MEMORY_BLOCK_SIZE) {
        error_report("Can't support memory configuration where RAM size "
                     "0x" RAM_ADDR_FMT " or maxmem size "
                     "0x" RAM_ADDR_FMT " isn't aligned to %llu MB",
                     machine->ram_size, machine->maxram_size,
                     SPAPR_MEMORY_BLOCK_SIZE/M_BYTE);
        exit(EXIT_FAILURE);
    }

    for (i = 0; i < nb_numa_nodes; i++) {
        if (numa_info[i].node_mem % SPAPR_MEMORY_BLOCK_SIZE) {
            error_report("Can't support memory configuration where memory size"
                         " %" PRIx64 " of node %d isn't aligned to %llu MB",
                         numa_info[i].node_mem, i,
                         SPAPR_MEMORY_BLOCK_SIZE/M_BYTE);
            exit(EXIT_FAILURE);
        }
    }
}

1713
/* pSeries LPAR / sPAPR hardware init */
1714
static void ppc_spapr_init(MachineState *machine)
1715
{
1716
    sPAPRMachineState *spapr = SPAPR_MACHINE(machine);
D
David Gibson 已提交
1717
    sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine);
1718 1719 1720
    const char *kernel_filename = machine->kernel_filename;
    const char *kernel_cmdline = machine->kernel_cmdline;
    const char *initrd_filename = machine->initrd_filename;
1721
    PowerPCCPU *cpu;
1722
    PCIHostState *phb;
1723
    int i;
A
Avi Kivity 已提交
1724 1725
    MemoryRegion *sysmem = get_system_memory();
    MemoryRegion *ram = g_new(MemoryRegion, 1);
1726 1727
    MemoryRegion *rma_region;
    void *rma = NULL;
A
Avi Kivity 已提交
1728
    hwaddr rma_alloc_size;
1729
    hwaddr node0_size = spapr_node0_size();
1730 1731
    uint32_t initrd_base = 0;
    long kernel_size = 0, initrd_size = 0;
1732
    long load_limit, fw_size;
1733
    bool kernel_le = false;
1734
    char *filename;
1735

1736 1737
    msi_supported = true;

1738 1739
    QLIST_INIT(&spapr->phbs);

1740 1741
    cpu_ppc_hypercall = emulate_spapr_hypercall;

1742
    /* Allocate RMA if necessary */
1743
    rma_alloc_size = kvmppc_alloc_rma(&rma);
1744 1745

    if (rma_alloc_size == -1) {
1746
        error_report("Unable to create RMA");
1747 1748
        exit(1);
    }
1749

1750
    if (rma_alloc_size && (rma_alloc_size < node0_size)) {
1751
        spapr->rma_size = rma_alloc_size;
1752
    } else {
1753
        spapr->rma_size = node0_size;
1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767

        /* With KVM, we don't actually know whether KVM supports an
         * unbounded RMA (PR KVM) or is limited by the hash table size
         * (HV KVM using VRMA), so we always assume the latter
         *
         * In that case, we also limit the initial allocations for RTAS
         * etc... to 256M since we have no way to know what the VRMA size
         * is going to be as it depends on the size of the hash table
         * isn't determined yet.
         */
        if (kvm_enabled()) {
            spapr->vrma_adjust = 1;
            spapr->rma_size = MIN(spapr->rma_size, 0x10000000);
        }
1768 1769
    }

1770 1771 1772 1773 1774 1775
    if (spapr->rma_size > node0_size) {
        fprintf(stderr, "Error: Numa node 0 has to span the RMA (%#08"HWADDR_PRIx")\n",
                spapr->rma_size);
        exit(1);
    }

1776 1777
    /* Setup a load limit for the ramdisk leaving room for SLOF and FDT */
    load_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR) - FW_OVERHEAD;
1778

1779 1780 1781 1782 1783
    /* We aim for a hash table of size 1/128 the size of RAM.  The
     * normal rule of thumb is 1/64 the size of RAM, but that's much
     * more than needed for the Linux guests we support. */
    spapr->htab_shift = 18; /* Minimum architected size */
    while (spapr->htab_shift <= 46) {
1784
        if ((1ULL << (spapr->htab_shift + 7)) >= machine->maxram_size) {
1785 1786 1787 1788
            break;
        }
        spapr->htab_shift++;
    }
1789
    spapr_alloc_htab(spapr);
1790

1791
    /* Set up Interrupt Controller before we create the VCPUs */
1792
    spapr->icp = xics_system_init(machine,
1793
                                  DIV_ROUND_UP(max_cpus * kvmppc_smt_threads(),
1794
                                               smp_threads),
1795 1796
                                  XICS_IRQS);

D
David Gibson 已提交
1797 1798 1799 1800
    if (smc->dr_lmb_enabled) {
        spapr_validate_node_memory(machine);
    }

1801
    /* init CPUs */
1802 1803
    if (machine->cpu_model == NULL) {
        machine->cpu_model = kvm_enabled() ? "host" : "POWER7";
1804 1805
    }
    for (i = 0; i < smp_cpus; i++) {
1806
        cpu = cpu_ppc_init(machine->cpu_model);
1807
        if (cpu == NULL) {
1808 1809 1810
            fprintf(stderr, "Unable to find PowerPC CPU definition\n");
            exit(1);
        }
1811
        spapr_cpu_init(spapr, cpu);
1812 1813
    }

1814 1815 1816
    if (kvm_enabled()) {
        /* Enable H_LOGICAL_CI_* so SLOF can talk to in-kernel devices */
        kvmppc_enable_logical_ci_hcalls();
1817
        kvmppc_enable_set_mode_hcall();
1818 1819
    }

1820
    /* allocate RAM */
1821
    memory_region_allocate_system_memory(ram, NULL, "ppc_spapr.ram",
1822
                                         machine->ram_size);
1823
    memory_region_add_subregion(sysmem, 0, ram);
1824

1825 1826 1827 1828 1829 1830 1831 1832
    if (rma_alloc_size && rma) {
        rma_region = g_new(MemoryRegion, 1);
        memory_region_init_ram_ptr(rma_region, NULL, "ppc_spapr.rma",
                                   rma_alloc_size, rma);
        vmstate_register_ram_global(rma_region);
        memory_region_add_subregion(sysmem, 0, rma_region);
    }

1833 1834 1835 1836 1837
    /* initialize hotplug memory address space */
    if (machine->ram_size < machine->maxram_size) {
        ram_addr_t hotplug_mem_size = machine->maxram_size - machine->ram_size;

        if (machine->ram_slots > SPAPR_MAX_RAM_SLOTS) {
1838 1839
            error_report("Specified number of memory slots %"PRIu64" exceeds max supported %d\n",
                         machine->ram_slots, SPAPR_MAX_RAM_SLOTS);
1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850
            exit(EXIT_FAILURE);
        }

        spapr->hotplug_memory.base = ROUND_UP(machine->ram_size,
                                              SPAPR_HOTPLUG_MEM_ALIGN);
        memory_region_init(&spapr->hotplug_memory.mr, OBJECT(spapr),
                           "hotplug-memory", hotplug_mem_size);
        memory_region_add_subregion(sysmem, spapr->hotplug_memory.base,
                                    &spapr->hotplug_memory.mr);
    }

D
David Gibson 已提交
1851 1852 1853 1854
    if (smc->dr_lmb_enabled) {
        spapr_create_lmb_dr_connectors(spapr);
    }

1855
    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, "spapr-rtas.bin");
1856
    if (!filename) {
1857
        error_report("Could not find LPAR rtas '%s'", "spapr-rtas.bin");
1858 1859
        exit(1);
    }
1860 1861 1862
    spapr->rtas_size = get_image_size(filename);
    spapr->rtas_blob = g_malloc(spapr->rtas_size);
    if (load_image_size(filename, spapr->rtas_blob, spapr->rtas_size) < 0) {
1863
        error_report("Could not load LPAR rtas '%s'", filename);
1864 1865
        exit(1);
    }
1866
    if (spapr->rtas_size > RTAS_MAX_SIZE) {
1867 1868
        error_report("RTAS too big ! 0x%zx bytes (max is 0x%x)",
                     (size_t)spapr->rtas_size, RTAS_MAX_SIZE);
1869 1870
        exit(1);
    }
1871
    g_free(filename);
1872

1873 1874 1875
    /* Set up EPOW events infrastructure */
    spapr_events_init(spapr);

1876
    /* Set up the RTC RTAS interfaces */
1877
    spapr_rtc_create(spapr);
1878

1879
    /* Set up VIO bus */
1880 1881
    spapr->vio_bus = spapr_vio_bus_init();

P
Paolo Bonzini 已提交
1882
    for (i = 0; i < MAX_SERIAL_PORTS; i++) {
1883
        if (serial_hds[i]) {
1884
            spapr_vty_create(spapr->vio_bus, serial_hds[i]);
1885 1886
        }
    }
1887

D
David Gibson 已提交
1888 1889 1890
    /* We always have at least the nvram device on VIO */
    spapr_create_nvram(spapr);

1891
    /* Set up PCI */
1892 1893
    spapr_pci_rtas_init();

1894
    phb = spapr_create_phb(spapr, 0);
1895

P
Paolo Bonzini 已提交
1896
    for (i = 0; i < nb_nics; i++) {
1897 1898 1899
        NICInfo *nd = &nd_table[i];

        if (!nd->model) {
1900
            nd->model = g_strdup("ibmveth");
1901 1902 1903
        }

        if (strcmp(nd->model, "ibmveth") == 0) {
1904
            spapr_vlan_create(spapr->vio_bus, nd);
1905
        } else {
1906
            pci_nic_init_nofail(&nd_table[i], phb->bus, nd->model, NULL);
1907 1908 1909
        }
    }

1910
    for (i = 0; i <= drive_get_max_bus(IF_SCSI); i++) {
1911
        spapr_vscsi_create(spapr->vio_bus);
1912 1913
    }

1914
    /* Graphics */
1915
    if (spapr_vga_init(phb->bus)) {
1916
        spapr->has_graphics = true;
1917
        machine->usb |= defaults_enabled() && !machine->usb_disabled;
1918 1919
    }

1920
    if (machine->usb) {
1921 1922 1923 1924 1925
        if (smc->use_ohci_by_default) {
            pci_create_simple(phb->bus, -1, "pci-ohci");
        } else {
            pci_create_simple(phb->bus, -1, "nec-usb-xhci");
        }
1926

1927
        if (spapr->has_graphics) {
1928 1929 1930 1931
            USBBus *usb_bus = usb_bus_find(-1);

            usb_create_simple(usb_bus, "usb-kbd");
            usb_create_simple(usb_bus, "usb-mouse");
1932 1933 1934
        }
    }

1935
    if (spapr->rma_size < (MIN_RMA_SLOF << 20)) {
1936 1937 1938 1939 1940
        fprintf(stderr, "qemu: pSeries SLOF firmware requires >= "
                "%ldM guest RMA (Real Mode Area memory)\n", MIN_RMA_SLOF);
        exit(1);
    }

1941 1942 1943 1944
    if (kernel_filename) {
        uint64_t lowaddr = 0;

        kernel_size = load_elf(kernel_filename, translate_kernel_address, NULL,
1945
                               NULL, &lowaddr, NULL, 1, PPC_ELF_MACHINE, 0);
1946
        if (kernel_size == ELF_LOAD_WRONG_ENDIAN) {
1947 1948
            kernel_size = load_elf(kernel_filename,
                                   translate_kernel_address, NULL,
1949
                                   NULL, &lowaddr, NULL, 0, PPC_ELF_MACHINE, 0);
1950 1951
            kernel_le = kernel_size > 0;
        }
1952
        if (kernel_size < 0) {
1953 1954
            fprintf(stderr, "qemu: error loading %s: %s\n",
                    kernel_filename, load_elf_strerror(kernel_size));
1955 1956 1957 1958 1959
            exit(1);
        }

        /* load initrd */
        if (initrd_filename) {
1960 1961 1962 1963
            /* Try to locate the initrd in the gap between the kernel
             * and the firmware. Add a bit of space just in case
             */
            initrd_base = (KERNEL_LOAD_ADDR + kernel_size + 0x1ffff) & ~0xffff;
1964
            initrd_size = load_image_targphys(initrd_filename, initrd_base,
1965
                                              load_limit - initrd_base);
1966 1967 1968 1969 1970 1971 1972 1973 1974
            if (initrd_size < 0) {
                fprintf(stderr, "qemu: could not load initial ram disk '%s'\n",
                        initrd_filename);
                exit(1);
            }
        } else {
            initrd_base = 0;
            initrd_size = 0;
        }
1975
    }
1976

1977 1978 1979 1980
    if (bios_name == NULL) {
        bios_name = FW_FILE_NAME;
    }
    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
1981
    if (!filename) {
1982
        error_report("Could not find LPAR firmware '%s'", bios_name);
1983 1984
        exit(1);
    }
1985
    fw_size = load_image_targphys(filename, 0, FW_MAX_SIZE);
1986 1987
    if (fw_size <= 0) {
        error_report("Could not load LPAR firmware '%s'", filename);
1988 1989 1990 1991
        exit(1);
    }
    g_free(filename);

1992 1993 1994
    /* FIXME: Should register things through the MachineState's qdev
     * interface, this is a legacy from the sPAPREnvironment structure
     * which predated MachineState but had a similar function */
1995 1996 1997 1998
    vmstate_register(NULL, 0, &vmstate_spapr, spapr);
    register_savevm_live(NULL, "spapr/htab", -1, 1,
                         &savevm_htab_handlers, spapr);

1999
    /* Prepare the device tree */
2000
    spapr->fdt_skel = spapr_create_fdt_skel(initrd_base, initrd_size,
2001
                                            kernel_size, kernel_le,
2002 2003
                                            kernel_cmdline,
                                            spapr->check_exception_irq);
2004
    assert(spapr->fdt_skel != NULL);
2005

2006 2007 2008 2009
    /* used by RTAS */
    QTAILQ_INIT(&spapr->ccs_list);
    qemu_register_reset(spapr_ccs_reset_hook, spapr);

2010
    qemu_register_boot_set(spapr_boot_set, spapr);
2011 2012
}

2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030
static int spapr_kvm_type(const char *vm_type)
{
    if (!vm_type) {
        return 0;
    }

    if (!strcmp(vm_type, "HV")) {
        return 1;
    }

    if (!strcmp(vm_type, "PR")) {
        return 2;
    }

    error_report("Unknown kvm-type specified '%s'", vm_type);
    exit(1);
}

2031
/*
2032
 * Implementation of an interface to adjust firmware path
2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087
 * for the bootindex property handling.
 */
static char *spapr_get_fw_dev_path(FWPathProvider *p, BusState *bus,
                                   DeviceState *dev)
{
#define CAST(type, obj, name) \
    ((type *)object_dynamic_cast(OBJECT(obj), (name)))
    SCSIDevice *d = CAST(SCSIDevice,  dev, TYPE_SCSI_DEVICE);
    sPAPRPHBState *phb = CAST(sPAPRPHBState, dev, TYPE_SPAPR_PCI_HOST_BRIDGE);

    if (d) {
        void *spapr = CAST(void, bus->parent, "spapr-vscsi");
        VirtIOSCSI *virtio = CAST(VirtIOSCSI, bus->parent, TYPE_VIRTIO_SCSI);
        USBDevice *usb = CAST(USBDevice, bus->parent, TYPE_USB_DEVICE);

        if (spapr) {
            /*
             * Replace "channel@0/disk@0,0" with "disk@8000000000000000":
             * We use SRP luns of the form 8000 | (bus << 8) | (id << 5) | lun
             * in the top 16 bits of the 64-bit LUN
             */
            unsigned id = 0x8000 | (d->id << 8) | d->lun;
            return g_strdup_printf("%s@%"PRIX64, qdev_fw_name(dev),
                                   (uint64_t)id << 48);
        } else if (virtio) {
            /*
             * We use SRP luns of the form 01000000 | (target << 8) | lun
             * in the top 32 bits of the 64-bit LUN
             * Note: the quote above is from SLOF and it is wrong,
             * the actual binding is:
             * swap 0100 or 10 << or 20 << ( target lun-id -- srplun )
             */
            unsigned id = 0x1000000 | (d->id << 16) | d->lun;
            return g_strdup_printf("%s@%"PRIX64, qdev_fw_name(dev),
                                   (uint64_t)id << 32);
        } else if (usb) {
            /*
             * We use SRP luns of the form 01000000 | (usb-port << 16) | lun
             * in the top 32 bits of the 64-bit LUN
             */
            unsigned usb_port = atoi(usb->port->path);
            unsigned id = 0x1000000 | (usb_port << 16) | d->lun;
            return g_strdup_printf("%s@%"PRIX64, qdev_fw_name(dev),
                                   (uint64_t)id << 32);
        }
    }

    if (phb) {
        /* Replace "pci" with "pci@800000020000000" */
        return g_strdup_printf("pci@%"PRIX64, phb->buid);
    }

    return NULL;
}

E
Eduardo Habkost 已提交
2088 2089
static char *spapr_get_kvm_type(Object *obj, Error **errp)
{
2090
    sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
E
Eduardo Habkost 已提交
2091

2092
    return g_strdup(spapr->kvm_type);
E
Eduardo Habkost 已提交
2093 2094 2095 2096
}

static void spapr_set_kvm_type(Object *obj, const char *value, Error **errp)
{
2097
    sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
E
Eduardo Habkost 已提交
2098

2099 2100
    g_free(spapr->kvm_type);
    spapr->kvm_type = g_strdup(value);
E
Eduardo Habkost 已提交
2101 2102 2103 2104 2105 2106
}

static void spapr_machine_initfn(Object *obj)
{
    object_property_add_str(obj, "kvm-type",
                            spapr_get_kvm_type, spapr_set_kvm_type, NULL);
2107 2108 2109
    object_property_set_description(obj, "kvm-type",
                                    "Specifies the KVM virtualization mode (HV, PR)",
                                    NULL);
E
Eduardo Habkost 已提交
2110 2111
}

2112 2113 2114 2115 2116 2117 2118
static void spapr_machine_finalizefn(Object *obj)
{
    sPAPRMachineState *spapr = SPAPR_MACHINE(obj);

    g_free(spapr->kvm_type);
}

2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135
static void ppc_cpu_do_nmi_on_cpu(void *arg)
{
    CPUState *cs = arg;

    cpu_synchronize_state(cs);
    ppc_cpu_do_system_reset(cs);
}

static void spapr_nmi(NMIState *n, int cpu_index, Error **errp)
{
    CPUState *cs;

    CPU_FOREACH(cs) {
        async_run_on_cpu(cs, ppc_cpu_do_nmi_on_cpu, cs);
    }
}

B
Bharata B Rao 已提交
2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166
static void spapr_add_lmbs(DeviceState *dev, uint64_t addr, uint64_t size,
                           uint32_t node, Error **errp)
{
    sPAPRDRConnector *drc;
    sPAPRDRConnectorClass *drck;
    uint32_t nr_lmbs = size/SPAPR_MEMORY_BLOCK_SIZE;
    int i, fdt_offset, fdt_size;
    void *fdt;

    /*
     * Check for DRC connectors and send hotplug notification to the
     * guest only in case of hotplugged memory. This allows cold plugged
     * memory to be specified at boot time.
     */
    if (!dev->hotplugged) {
        return;
    }

    for (i = 0; i < nr_lmbs; i++) {
        drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB,
                addr/SPAPR_MEMORY_BLOCK_SIZE);
        g_assert(drc);

        fdt = create_device_tree(&fdt_size);
        fdt_offset = spapr_populate_memory_node(fdt, node, addr,
                                                SPAPR_MEMORY_BLOCK_SIZE);

        drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
        drck->attach(drc, dev, fdt, fdt_offset, !dev->hotplugged, errp);
        addr += SPAPR_MEMORY_BLOCK_SIZE;
    }
2167
    spapr_hotplug_req_add_by_count(SPAPR_DR_CONNECTOR_TYPE_LMB, nr_lmbs);
B
Bharata B Rao 已提交
2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187
}

static void spapr_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
                              uint32_t node, Error **errp)
{
    Error *local_err = NULL;
    sPAPRMachineState *ms = SPAPR_MACHINE(hotplug_dev);
    PCDIMMDevice *dimm = PC_DIMM(dev);
    PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
    MemoryRegion *mr = ddc->get_memory_region(dimm);
    uint64_t align = memory_region_get_alignment(mr);
    uint64_t size = memory_region_size(mr);
    uint64_t addr;

    if (size % SPAPR_MEMORY_BLOCK_SIZE) {
        error_setg(&local_err, "Hotplugged memory size must be a multiple of "
                      "%lld MB", SPAPR_MEMORY_BLOCK_SIZE/M_BYTE);
        goto out;
    }

2188
    pc_dimm_memory_plug(dev, &ms->hotplug_memory, mr, align, &local_err);
B
Bharata B Rao 已提交
2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210
    if (local_err) {
        goto out;
    }

    addr = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP, &local_err);
    if (local_err) {
        pc_dimm_memory_unplug(dev, &ms->hotplug_memory, mr);
        goto out;
    }

    spapr_add_lmbs(dev, addr, size, node, &error_abort);

out:
    error_propagate(errp, local_err);
}

static void spapr_machine_device_plug(HotplugHandler *hotplug_dev,
                                      DeviceState *dev, Error **errp)
{
    sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(qdev_get_machine());

    if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
2211
        int node;
B
Bharata B Rao 已提交
2212 2213 2214 2215 2216 2217 2218 2219 2220 2221

        if (!smc->dr_lmb_enabled) {
            error_setg(errp, "Memory hotplug not supported for this machine");
            return;
        }
        node = object_property_get_int(OBJECT(dev), PC_DIMM_NODE_PROP, errp);
        if (*errp) {
            return;
        }

2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243
        /*
         * Currently PowerPC kernel doesn't allow hot-adding memory to
         * memory-less node, but instead will silently add the memory
         * to the first node that has some memory. This causes two
         * unexpected behaviours for the user.
         *
         * - Memory gets hotplugged to a different node than what the user
         *   specified.
         * - Since pc-dimm subsystem in QEMU still thinks that memory belongs
         *   to memory-less node, a reboot will set things accordingly
         *   and the previously hotplugged memory now ends in the right node.
         *   This appears as if some memory moved from one node to another.
         *
         * So until kernel starts supporting memory hotplug to memory-less
         * nodes, just prevent such attempts upfront in QEMU.
         */
        if (nb_numa_nodes && !numa_info[node].node_mem) {
            error_setg(errp, "Can't hotplug memory to memory-less node %d",
                       node);
            return;
        }

B
Bharata B Rao 已提交
2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264
        spapr_memory_plug(hotplug_dev, dev, node, errp);
    }
}

static void spapr_machine_device_unplug(HotplugHandler *hotplug_dev,
                                      DeviceState *dev, Error **errp)
{
    if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
        error_setg(errp, "Memory hot unplug not supported by sPAPR");
    }
}

static HotplugHandler *spapr_get_hotpug_handler(MachineState *machine,
                                             DeviceState *dev)
{
    if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
        return HOTPLUG_HANDLER(machine);
    }
    return NULL;
}

2265 2266 2267 2268 2269 2270 2271
static unsigned spapr_cpu_index_to_socket_id(unsigned cpu_index)
{
    /* Allocate to NUMA nodes on a "socket" basis (not that concept of
     * socket means much for the paravirtualized PAPR platform) */
    return cpu_index / smp_threads / smp_cores;
}

2272 2273 2274
static void spapr_machine_class_init(ObjectClass *oc, void *data)
{
    MachineClass *mc = MACHINE_CLASS(oc);
D
David Gibson 已提交
2275
    sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(oc);
2276
    FWPathProviderClass *fwc = FW_PATH_PROVIDER_CLASS(oc);
2277
    NMIClass *nc = NMI_CLASS(oc);
B
Bharata B Rao 已提交
2278
    HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
2279

2280
    mc->desc = "pSeries Logical Partition (PAPR compliant)";
2281 2282 2283 2284 2285 2286

    /*
     * We set up the default / latest behaviour here.  The class_init
     * functions for the specific versioned machine types can override
     * these details for backwards compatibility
     */
2287 2288 2289
    mc->init = ppc_spapr_init;
    mc->reset = ppc_spapr_reset;
    mc->block_default_type = IF_SCSI;
2290
    mc->max_cpus = MAX_CPUMASK_BITS;
2291
    mc->no_parallel = 1;
2292
    mc->default_boot_order = "";
2293
    mc->default_ram_size = 512 * M_BYTE;
2294
    mc->kvm_type = spapr_kvm_type;
2295
    mc->has_dynamic_sysbus = true;
2296
    mc->pci_allow_0_address = true;
B
Bharata B Rao 已提交
2297 2298 2299
    mc->get_hotplug_handler = spapr_get_hotpug_handler;
    hc->plug = spapr_machine_device_plug;
    hc->unplug = spapr_machine_device_unplug;
2300
    mc->cpu_index_to_socket_id = spapr_cpu_index_to_socket_id;
2301

2302
    smc->dr_lmb_enabled = true;
2303
    fwc->get_dev_path = spapr_get_fw_dev_path;
2304
    nc->nmi_monitor_handler = spapr_nmi;
2305 2306 2307 2308 2309
}

static const TypeInfo spapr_machine_info = {
    .name          = TYPE_SPAPR_MACHINE,
    .parent        = TYPE_MACHINE,
2310
    .abstract      = true,
2311
    .instance_size = sizeof(sPAPRMachineState),
E
Eduardo Habkost 已提交
2312
    .instance_init = spapr_machine_initfn,
2313
    .instance_finalize = spapr_machine_finalizefn,
D
David Gibson 已提交
2314
    .class_size    = sizeof(sPAPRMachineClass),
2315
    .class_init    = spapr_machine_class_init,
2316 2317
    .interfaces = (InterfaceInfo[]) {
        { TYPE_FW_PATH_PROVIDER },
2318
        { TYPE_NMI },
B
Bharata B Rao 已提交
2319
        { TYPE_HOTPLUG_HANDLER },
2320 2321
        { }
    },
2322 2323
};

2324
#define DEFINE_SPAPR_MACHINE(suffix, verstr, latest)                 \
D
David Gibson 已提交
2325 2326 2327 2328 2329
    static void spapr_machine_##suffix##_class_init(ObjectClass *oc, \
                                                    void *data)      \
    {                                                                \
        MachineClass *mc = MACHINE_CLASS(oc);                        \
        spapr_machine_##suffix##_class_options(mc);                  \
2330 2331 2332 2333
        if (latest) {                                                \
            mc->alias = "pseries";                                   \
            mc->is_default = 1;                                      \
        }                                                            \
D
David Gibson 已提交
2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351
    }                                                                \
    static void spapr_machine_##suffix##_instance_init(Object *obj)  \
    {                                                                \
        MachineState *machine = MACHINE(obj);                        \
        spapr_machine_##suffix##_instance_options(machine);          \
    }                                                                \
    static const TypeInfo spapr_machine_##suffix##_info = {          \
        .name = MACHINE_TYPE_NAME("pseries-" verstr),                \
        .parent = TYPE_SPAPR_MACHINE,                                \
        .class_init = spapr_machine_##suffix##_class_init,           \
        .instance_init = spapr_machine_##suffix##_instance_init,     \
    };                                                               \
    static void spapr_machine_register_##suffix(void)                \
    {                                                                \
        type_register(&spapr_machine_##suffix##_info);               \
    }                                                                \
    machine_init(spapr_machine_register_##suffix)

2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365
/*
 * pseries-2.6
 */
static void spapr_machine_2_6_instance_options(MachineState *machine)
{
}

static void spapr_machine_2_6_class_options(MachineClass *mc)
{
    /* Defaults for the latest behaviour inherited from the base class */
}

DEFINE_SPAPR_MACHINE(2_6, "2.6", true);

2366 2367 2368
/*
 * pseries-2.5
 */
2369 2370 2371
#define SPAPR_COMPAT_2_5 \
        HW_COMPAT_2_5

D
David Gibson 已提交
2372
static void spapr_machine_2_5_instance_options(MachineState *machine)
2373
{
D
David Gibson 已提交
2374 2375 2376 2377
}

static void spapr_machine_2_5_class_options(MachineClass *mc)
{
2378 2379
    sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc);

2380
    spapr_machine_2_6_class_options(mc);
2381
    smc->use_ohci_by_default = true;
2382
    SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_5);
2383 2384
}

2385
DEFINE_SPAPR_MACHINE(2_5, "2.5", false);
2386 2387 2388 2389

/*
 * pseries-2.4
 */
C
Cornelia Huck 已提交
2390 2391 2392
#define SPAPR_COMPAT_2_4 \
        HW_COMPAT_2_4

D
David Gibson 已提交
2393
static void spapr_machine_2_4_instance_options(MachineState *machine)
2394
{
D
David Gibson 已提交
2395 2396
    spapr_machine_2_5_instance_options(machine);
}
2397

D
David Gibson 已提交
2398 2399
static void spapr_machine_2_4_class_options(MachineClass *mc)
{
2400 2401 2402 2403
    sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc);

    spapr_machine_2_5_class_options(mc);
    smc->dr_lmb_enabled = false;
D
David Gibson 已提交
2404
    SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_4);
2405 2406
}

2407
DEFINE_SPAPR_MACHINE(2_4, "2.4", false);
2408 2409 2410 2411

/*
 * pseries-2.3
 */
E
Eduardo Habkost 已提交
2412
#define SPAPR_COMPAT_2_3 \
C
Cornelia Huck 已提交
2413
        SPAPR_COMPAT_2_4 \
2414 2415 2416 2417 2418 2419
        HW_COMPAT_2_3 \
        {\
            .driver   = "spapr-pci-host-bridge",\
            .property = "dynamic-reconfiguration",\
            .value    = "off",\
        },
E
Eduardo Habkost 已提交
2420

D
David Gibson 已提交
2421
static void spapr_machine_2_3_instance_options(MachineState *machine)
J
Jason Wang 已提交
2422
{
D
David Gibson 已提交
2423
    spapr_machine_2_4_instance_options(machine);
2424
    savevm_skip_section_footers();
2425
    global_state_set_optional();
J
Jason Wang 已提交
2426 2427
}

D
David Gibson 已提交
2428
static void spapr_machine_2_3_class_options(MachineClass *mc)
2429
{
2430
    spapr_machine_2_4_class_options(mc);
D
David Gibson 已提交
2431
    SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_3);
2432
}
2433
DEFINE_SPAPR_MACHINE(2_3, "2.3", false);
2434

2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447
/*
 * pseries-2.2
 */

#define SPAPR_COMPAT_2_2 \
        SPAPR_COMPAT_2_3 \
        HW_COMPAT_2_2 \
        {\
            .driver   = TYPE_SPAPR_PCI_HOST_BRIDGE,\
            .property = "mem_win_size",\
            .value    = "0x20000000",\
        },

D
David Gibson 已提交
2448
static void spapr_machine_2_2_instance_options(MachineState *machine)
2449
{
D
David Gibson 已提交
2450
    spapr_machine_2_3_instance_options(machine);
2451 2452
}

D
David Gibson 已提交
2453
static void spapr_machine_2_2_class_options(MachineClass *mc)
2454
{
2455
    spapr_machine_2_3_class_options(mc);
D
David Gibson 已提交
2456
    SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_2);
2457
}
2458
DEFINE_SPAPR_MACHINE(2_2, "2.2", false);
2459

2460 2461 2462 2463 2464 2465
/*
 * pseries-2.1
 */
#define SPAPR_COMPAT_2_1 \
        SPAPR_COMPAT_2_2 \
        HW_COMPAT_2_1
2466

D
David Gibson 已提交
2467
static void spapr_machine_2_1_instance_options(MachineState *machine)
2468
{
D
David Gibson 已提交
2469
    spapr_machine_2_2_instance_options(machine);
2470
}
J
Jason Wang 已提交
2471

D
David Gibson 已提交
2472
static void spapr_machine_2_1_class_options(MachineClass *mc)
J
Jason Wang 已提交
2473
{
2474
    spapr_machine_2_2_class_options(mc);
D
David Gibson 已提交
2475
    SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_1);
J
Jason Wang 已提交
2476
}
2477
DEFINE_SPAPR_MACHINE(2_1, "2.1", false);
D
David Gibson 已提交
2478

2479
static void spapr_machine_register_types(void)
2480
{
2481
    type_register_static(&spapr_machine_info);
2482 2483
}

2484
type_init(spapr_machine_register_types)