spapr.c 59.0 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
/*
 * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator
 *
 * Copyright (c) 2004-2007 Fabrice Bellard
 * Copyright (c) 2007 Jocelyn Mayer
 * Copyright (c) 2010 David Gibson, IBM Corporation.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 *
 */
27
#include "sysemu/sysemu.h"
28
#include "sysemu/numa.h"
29
#include "hw/hw.h"
30
#include "hw/fw-path-provider.h"
31
#include "elf.h"
P
Paolo Bonzini 已提交
32
#include "net/net.h"
33
#include "sysemu/block-backend.h"
34 35
#include "sysemu/cpus.h"
#include "sysemu/kvm.h"
36
#include "kvm_ppc.h"
37
#include "mmu-hash64.h"
38
#include "qom/cpu.h"
39 40

#include "hw/boards.h"
P
Paolo Bonzini 已提交
41
#include "hw/ppc/ppc.h"
42 43
#include "hw/loader.h"

P
Paolo Bonzini 已提交
44 45 46 47
#include "hw/ppc/spapr.h"
#include "hw/ppc/spapr_vio.h"
#include "hw/pci-host/spapr.h"
#include "hw/ppc/xics.h"
48
#include "hw/pci/msi.h"
49

50
#include "hw/pci/pci.h"
51 52
#include "hw/scsi/scsi.h"
#include "hw/virtio/virtio-scsi.h"
53

54
#include "exec/address-spaces.h"
55
#include "hw/usb.h"
56
#include "qemu/config-file.h"
57
#include "qemu/error-report.h"
58
#include "trace.h"
59
#include "hw/nmi.h"
A
Avi Kivity 已提交
60

61 62
#include "hw/compat.h"

63 64
#include <libfdt.h>

65 66 67 68 69 70 71 72 73 74
/* SLOF memory layout:
 *
 * SLOF raw image loaded at 0, copies its romfs right below the flat
 * device-tree, then position SLOF itself 31M below that
 *
 * So we set FW_OVERHEAD to 40MB which should account for all of that
 * and more
 *
 * We load our kernel at 4M, leaving space for SLOF initial image
 */
75
#define FDT_MAX_SIZE            0x40000
76
#define RTAS_MAX_SIZE           0x10000
77
#define RTAS_MAX_ADDR           0x80000000 /* RTAS must stay below that */
78 79
#define FW_MAX_SIZE             0x400000
#define FW_FILE_NAME            "slof.bin"
80 81
#define FW_OVERHEAD             0x2800000
#define KERNEL_LOAD_ADDR        FW_MAX_SIZE
82

83
#define MIN_RMA_SLOF            128UL
84 85 86

#define TIMEBASE_FREQ           512000000ULL

87
#define MAX_CPUS                255
88

89 90
#define PHANDLE_XICP            0x00001111

91 92
#define HTAB_SIZE(spapr)        (1ULL << ((spapr)->htab_shift))

93
typedef struct sPAPRMachineState sPAPRMachineState;
94

95
#define TYPE_SPAPR_MACHINE      "spapr-machine"
96
#define SPAPR_MACHINE(obj) \
97
    OBJECT_CHECK(sPAPRMachineState, (obj), TYPE_SPAPR_MACHINE)
98 99

/**
100
 * sPAPRMachineState:
101
 */
102
struct sPAPRMachineState {
103 104
    /*< private >*/
    MachineState parent_obj;
E
Eduardo Habkost 已提交
105 106 107

    /*< public >*/
    char *kvm_type;
108 109
};

110 111
sPAPREnvironment *spapr;

112
static XICSState *try_create_xics(const char *type, int nr_servers,
113
                                  int nr_irqs, Error **errp)
114
{
115
    Error *err = NULL;
116 117 118 119 120
    DeviceState *dev;

    dev = qdev_create(NULL, type);
    qdev_prop_set_uint32(dev, "nr_servers", nr_servers);
    qdev_prop_set_uint32(dev, "nr_irqs", nr_irqs);
121 122 123 124
    object_property_set_bool(OBJECT(dev), true, "realized", &err);
    if (err) {
        error_propagate(errp, err);
        object_unparent(OBJECT(dev));
125 126
        return NULL;
    }
127
    return XICS_COMMON(dev);
128 129
}

130 131
static XICSState *xics_system_init(MachineState *machine,
                                   int nr_servers, int nr_irqs)
132 133 134
{
    XICSState *icp = NULL;

135
    if (kvm_enabled()) {
136 137
        Error *err = NULL;

138
        if (machine_kernel_irqchip_allowed(machine)) {
139
            icp = try_create_xics(TYPE_KVM_XICS, nr_servers, nr_irqs, &err);
140
        }
141
        if (machine_kernel_irqchip_required(machine) && !icp) {
142 143
            error_report("kernel_irqchip requested but unavailable: %s",
                         error_get_pretty(err));
144 145 146 147
        }
    }

    if (!icp) {
148
        icp = try_create_xics(TYPE_XICS, nr_servers, nr_irqs, &error_abort);
149 150 151 152 153
    }

    return icp;
}

154 155 156 157 158 159 160 161
static int spapr_fixup_cpu_smt_dt(void *fdt, int offset, PowerPCCPU *cpu,
                                  int smt_threads)
{
    int i, ret = 0;
    uint32_t servers_prop[smt_threads];
    uint32_t gservers_prop[smt_threads * 2];
    int index = ppc_get_vcpu_dt_id(cpu);

162
    if (cpu->cpu_version) {
163
        ret = fdt_setprop_cell(fdt, offset, "cpu-version", cpu->cpu_version);
164 165 166 167 168
        if (ret < 0) {
            return ret;
        }
    }

169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
    /* Build interrupt servers and gservers properties */
    for (i = 0; i < smt_threads; i++) {
        servers_prop[i] = cpu_to_be32(index + i);
        /* Hack, direct the group queues back to cpu 0 */
        gservers_prop[i*2] = cpu_to_be32(index + i);
        gservers_prop[i*2 + 1] = 0;
    }
    ret = fdt_setprop(fdt, offset, "ibm,ppc-interrupt-server#s",
                      servers_prop, sizeof(servers_prop));
    if (ret < 0) {
        return ret;
    }
    ret = fdt_setprop(fdt, offset, "ibm,ppc-interrupt-gserver#s",
                      gservers_prop, sizeof(gservers_prop));

    return ret;
}

187
static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr)
188
{
189 190
    int ret = 0, offset, cpus_offset;
    CPUState *cs;
191 192
    char cpu_model[32];
    int smt = kvmppc_smt_threads();
193
    uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
194

195 196 197 198
    CPU_FOREACH(cs) {
        PowerPCCPU *cpu = POWERPC_CPU(cs);
        DeviceClass *dc = DEVICE_GET_CLASS(cs);
        int index = ppc_get_vcpu_dt_id(cpu);
199 200 201 202
        uint32_t associativity[] = {cpu_to_be32(0x5),
                                    cpu_to_be32(0x0),
                                    cpu_to_be32(0x0),
                                    cpu_to_be32(0x0),
203
                                    cpu_to_be32(cs->numa_node),
204
                                    cpu_to_be32(index)};
205

206
        if ((index % smt) != 0) {
207 208 209
            continue;
        }

210
        snprintf(cpu_model, 32, "%s@%x", dc->fw_name, index);
211

212 213 214 215 216 217 218 219 220
        cpus_offset = fdt_path_offset(fdt, "/cpus");
        if (cpus_offset < 0) {
            cpus_offset = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"),
                                          "cpus");
            if (cpus_offset < 0) {
                return cpus_offset;
            }
        }
        offset = fdt_subnode_offset(fdt, cpus_offset, cpu_model);
221
        if (offset < 0) {
222 223 224 225
            offset = fdt_add_subnode(fdt, cpus_offset, cpu_model);
            if (offset < 0) {
                return offset;
            }
226 227
        }

228 229 230 231 232 233 234 235 236 237
        if (nb_numa_nodes > 1) {
            ret = fdt_setprop(fdt, offset, "ibm,associativity", associativity,
                              sizeof(associativity));
            if (ret < 0) {
                return ret;
            }
        }

        ret = fdt_setprop(fdt, offset, "ibm,pft-size",
                          pft_size_prop, sizeof(pft_size_prop));
238 239 240
        if (ret < 0) {
            return ret;
        }
241

242
        ret = spapr_fixup_cpu_smt_dt(fdt, offset, cpu,
243
                                     ppc_get_compat_smt_threads(cpu));
244 245 246
        if (ret < 0) {
            return ret;
        }
247 248 249 250
    }
    return ret;
}

251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284

static size_t create_page_sizes_prop(CPUPPCState *env, uint32_t *prop,
                                     size_t maxsize)
{
    size_t maxcells = maxsize / sizeof(uint32_t);
    int i, j, count;
    uint32_t *p = prop;

    for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
        struct ppc_one_seg_page_size *sps = &env->sps.sps[i];

        if (!sps->page_shift) {
            break;
        }
        for (count = 0; count < PPC_PAGE_SIZES_MAX_SZ; count++) {
            if (sps->enc[count].page_shift == 0) {
                break;
            }
        }
        if ((p - prop) >= (maxcells - 3 - count * 2)) {
            break;
        }
        *(p++) = cpu_to_be32(sps->page_shift);
        *(p++) = cpu_to_be32(sps->slb_enc);
        *(p++) = cpu_to_be32(count);
        for (j = 0; j < count; j++) {
            *(p++) = cpu_to_be32(sps->enc[j].page_shift);
            *(p++) = cpu_to_be32(sps->enc[j].pte_enc);
        }
    }

    return (p - prop) * sizeof(uint32_t);
}

285 286 287 288 289 290 291 292 293 294 295 296 297
static hwaddr spapr_node0_size(void)
{
    if (nb_numa_nodes) {
        int i;
        for (i = 0; i < nb_numa_nodes; ++i) {
            if (numa_info[i].node_mem) {
                return MIN(pow2floor(numa_info[i].node_mem), ram_size);
            }
        }
    }
    return ram_size;
}

298 299 300 301 302 303 304 305 306 307
#define _FDT(exp) \
    do { \
        int ret = (exp);                                           \
        if (ret < 0) {                                             \
            fprintf(stderr, "qemu: error creating device tree: %s: %s\n", \
                    #exp, fdt_strerror(ret));                      \
            exit(1);                                               \
        }                                                          \
    } while (0)

308 309 310 311
static void add_str(GString *s, const gchar *s1)
{
    g_string_append_len(s, s1, strlen(s1) + 1);
}
312

313
static void *spapr_create_fdt_skel(hwaddr initrd_base,
A
Avi Kivity 已提交
314 315
                                   hwaddr initrd_size,
                                   hwaddr kernel_size,
316
                                   bool little_endian,
317 318
                                   const char *kernel_cmdline,
                                   uint32_t epow_irq)
319 320
{
    void *fdt;
321
    CPUState *cs;
322 323
    uint32_t start_prop = cpu_to_be32(initrd_base);
    uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
324 325
    GString *hypertas = g_string_sized_new(256);
    GString *qemu_hypertas = g_string_sized_new(256);
326
    uint32_t refpoints[] = {cpu_to_be32(0x4), cpu_to_be32(0x4)};
327
    uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(smp_cpus)};
328
    int smt = kvmppc_smt_threads();
329
    unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80};
330 331 332
    QemuOpts *opts = qemu_opts_find(qemu_find_opts("smp-opts"), NULL);
    unsigned sockets = opts ? qemu_opt_get_number(opts, "sockets", 0) : 0;
    uint32_t cpus_per_socket = sockets ? (smp_cpus / sockets) : 1;
333
    char *buf;
334

335 336 337 338 339 340 341 342 343 344 345
    add_str(hypertas, "hcall-pft");
    add_str(hypertas, "hcall-term");
    add_str(hypertas, "hcall-dabr");
    add_str(hypertas, "hcall-interrupt");
    add_str(hypertas, "hcall-tce");
    add_str(hypertas, "hcall-vio");
    add_str(hypertas, "hcall-splpar");
    add_str(hypertas, "hcall-bulk");
    add_str(hypertas, "hcall-set-mode");
    add_str(qemu_hypertas, "hcall-memop1");

346
    fdt = g_malloc0(FDT_MAX_SIZE);
347 348
    _FDT((fdt_create(fdt, FDT_MAX_SIZE)));

349 350 351 352 353 354
    if (kernel_size) {
        _FDT((fdt_add_reservemap_entry(fdt, KERNEL_LOAD_ADDR, kernel_size)));
    }
    if (initrd_size) {
        _FDT((fdt_add_reservemap_entry(fdt, initrd_base, initrd_size)));
    }
355 356 357 358 359
    _FDT((fdt_finish_reservemap(fdt)));

    /* Root node */
    _FDT((fdt_begin_node(fdt, "")));
    _FDT((fdt_property_string(fdt, "device_type", "chrp")));
360
    _FDT((fdt_property_string(fdt, "model", "IBM pSeries (emulated by qemu)")));
361
    _FDT((fdt_property_string(fdt, "compatible", "qemu,pseries")));
362

363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385
    /*
     * Add info to guest to indentify which host is it being run on
     * and what is the uuid of the guest
     */
    if (kvmppc_get_host_model(&buf)) {
        _FDT((fdt_property_string(fdt, "host-model", buf)));
        g_free(buf);
    }
    if (kvmppc_get_host_serial(&buf)) {
        _FDT((fdt_property_string(fdt, "host-serial", buf)));
        g_free(buf);
    }

    buf = g_strdup_printf(UUID_FMT, qemu_uuid[0], qemu_uuid[1],
                          qemu_uuid[2], qemu_uuid[3], qemu_uuid[4],
                          qemu_uuid[5], qemu_uuid[6], qemu_uuid[7],
                          qemu_uuid[8], qemu_uuid[9], qemu_uuid[10],
                          qemu_uuid[11], qemu_uuid[12], qemu_uuid[13],
                          qemu_uuid[14], qemu_uuid[15]);

    _FDT((fdt_property_string(fdt, "vm,uuid", buf)));
    g_free(buf);

386 387 388 389 390 391
    _FDT((fdt_property_cell(fdt, "#address-cells", 0x2)));
    _FDT((fdt_property_cell(fdt, "#size-cells", 0x2)));

    /* /chosen */
    _FDT((fdt_begin_node(fdt, "chosen")));

392 393 394
    /* Set Form1_affinity */
    _FDT((fdt_property(fdt, "ibm,architecture-vec-5", vec5, sizeof(vec5))));

395 396 397 398 399
    _FDT((fdt_property_string(fdt, "bootargs", kernel_cmdline)));
    _FDT((fdt_property(fdt, "linux,initrd-start",
                       &start_prop, sizeof(start_prop))));
    _FDT((fdt_property(fdt, "linux,initrd-end",
                       &end_prop, sizeof(end_prop))));
400 401 402
    if (kernel_size) {
        uint64_t kprop[2] = { cpu_to_be64(KERNEL_LOAD_ADDR),
                              cpu_to_be64(kernel_size) };
403

404
        _FDT((fdt_property(fdt, "qemu,boot-kernel", &kprop, sizeof(kprop))));
405 406 407
        if (little_endian) {
            _FDT((fdt_property(fdt, "qemu,boot-kernel-le", NULL, 0)));
        }
408
    }
409 410 411
    if (boot_menu) {
        _FDT((fdt_property_cell(fdt, "qemu,boot-menu", boot_menu)));
    }
412 413 414
    _FDT((fdt_property_cell(fdt, "qemu,graphic-width", graphic_width)));
    _FDT((fdt_property_cell(fdt, "qemu,graphic-height", graphic_height)));
    _FDT((fdt_property_cell(fdt, "qemu,graphic-depth", graphic_depth)));
415

416 417 418 419 420 421 422 423
    _FDT((fdt_end_node(fdt)));

    /* cpus */
    _FDT((fdt_begin_node(fdt, "cpus")));

    _FDT((fdt_property_cell(fdt, "#address-cells", 0x1)));
    _FDT((fdt_property_cell(fdt, "#size-cells", 0x0)));

A
Andreas Färber 已提交
424
    CPU_FOREACH(cs) {
425 426
        PowerPCCPU *cpu = POWERPC_CPU(cs);
        CPUPPCState *env = &cpu->env;
427
        DeviceClass *dc = DEVICE_GET_CLASS(cs);
428
        PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
429
        int index = ppc_get_vcpu_dt_id(cpu);
430 431 432
        char *nodename;
        uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
                           0xffffffff, 0xffffffff};
433 434
        uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ;
        uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000;
435 436
        uint32_t page_sizes_prop[64];
        size_t page_sizes_prop_size;
437

438 439 440 441
        if ((index % smt) != 0) {
            continue;
        }

442
        nodename = g_strdup_printf("%s@%x", dc->fw_name, index);
443 444 445

        _FDT((fdt_begin_node(fdt, nodename)));

446
        g_free(nodename);
447

D
David Gibson 已提交
448
        _FDT((fdt_property_cell(fdt, "reg", index)));
449 450 451
        _FDT((fdt_property_string(fdt, "device_type", "cpu")));

        _FDT((fdt_property_cell(fdt, "cpu-version", env->spr[SPR_PVR])));
452
        _FDT((fdt_property_cell(fdt, "d-cache-block-size",
453
                                env->dcache_line_size)));
454 455 456 457 458
        _FDT((fdt_property_cell(fdt, "d-cache-line-size",
                                env->dcache_line_size)));
        _FDT((fdt_property_cell(fdt, "i-cache-block-size",
                                env->icache_line_size)));
        _FDT((fdt_property_cell(fdt, "i-cache-line-size",
459
                                env->icache_line_size)));
460 461 462 463 464 465 466 467 468 469 470 471

        if (pcc->l1_dcache_size) {
            _FDT((fdt_property_cell(fdt, "d-cache-size", pcc->l1_dcache_size)));
        } else {
            fprintf(stderr, "Warning: Unknown L1 dcache size for cpu\n");
        }
        if (pcc->l1_icache_size) {
            _FDT((fdt_property_cell(fdt, "i-cache-size", pcc->l1_icache_size)));
        } else {
            fprintf(stderr, "Warning: Unknown L1 icache size for cpu\n");
        }

472 473
        _FDT((fdt_property_cell(fdt, "timebase-frequency", tbfreq)));
        _FDT((fdt_property_cell(fdt, "clock-frequency", cpufreq)));
474 475 476
        _FDT((fdt_property_cell(fdt, "ibm,slb-size", env->slb_nr)));
        _FDT((fdt_property_string(fdt, "status", "okay")));
        _FDT((fdt_property(fdt, "64-bit", NULL, 0)));
477

478 479 480 481
        if (env->spr_cb[SPR_PURR].oea_read) {
            _FDT((fdt_property(fdt, "ibm,purr", NULL, 0)));
        }

D
David Gibson 已提交
482
        if (env->mmu_model & POWERPC_MMU_1TSEG) {
483 484 485 486
            _FDT((fdt_property(fdt, "ibm,processor-segment-sizes",
                               segs, sizeof(segs))));
        }

487 488 489 490
        /* Advertise VMX/VSX (vector extensions) if available
         *   0 / no property == no vector extensions
         *   1               == VMX / Altivec available
         *   2               == VSX available */
491 492 493
        if (env->insns_flags & PPC_ALTIVEC) {
            uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;

494 495 496 497 498 499
            _FDT((fdt_property_cell(fdt, "ibm,vmx", vmx)));
        }

        /* Advertise DFP (Decimal Floating Point) if available
         *   0 / no property == no DFP
         *   1               == DFP available */
500 501
        if (env->insns_flags2 & PPC2_DFP) {
            _FDT((fdt_property_cell(fdt, "ibm,dfp", 1)));
502 503
        }

504 505 506 507 508 509 510
        page_sizes_prop_size = create_page_sizes_prop(env, page_sizes_prop,
                                                      sizeof(page_sizes_prop));
        if (page_sizes_prop_size) {
            _FDT((fdt_property(fdt, "ibm,segment-page-sizes",
                               page_sizes_prop, page_sizes_prop_size)));
        }

511 512 513
        _FDT((fdt_property_cell(fdt, "ibm,chip-id",
                                cs->cpu_index / cpus_per_socket)));

514 515 516 517 518
        _FDT((fdt_end_node(fdt)));
    }

    _FDT((fdt_end_node(fdt)));

519 520 521
    /* RTAS */
    _FDT((fdt_begin_node(fdt, "rtas")));

522 523 524
    if (!kvm_enabled() || kvmppc_spapr_use_multitce()) {
        add_str(hypertas, "hcall-multi-tce");
    }
525 526 527 528 529 530
    _FDT((fdt_property(fdt, "ibm,hypertas-functions", hypertas->str,
                       hypertas->len)));
    g_string_free(hypertas, TRUE);
    _FDT((fdt_property(fdt, "qemu,hypertas-functions", qemu_hypertas->str,
                       qemu_hypertas->len)));
    g_string_free(qemu_hypertas, TRUE);
531

532 533 534
    _FDT((fdt_property(fdt, "ibm,associativity-reference-points",
        refpoints, sizeof(refpoints))));

535
    _FDT((fdt_property_cell(fdt, "rtas-error-log-max", RTAS_ERROR_LOG_MAX)));
536 537
    _FDT((fdt_property_cell(fdt, "rtas-event-scan-rate",
                            RTAS_EVENT_SCAN_RATE)));
538

539
    /*
540
     * According to PAPR, rtas ibm,os-term does not guarantee a return
541 542 543 544 545 546 547
     * back to the guest cpu.
     *
     * While an additional ibm,extended-os-term property indicates that
     * rtas call return will always occur. Set this property.
     */
    _FDT((fdt_property(fdt, "ibm,extended-os-term", NULL, 0)));

548 549
    _FDT((fdt_end_node(fdt)));

550
    /* interrupt controller */
551
    _FDT((fdt_begin_node(fdt, "interrupt-controller")));
552 553 554 555 556 557 558 559

    _FDT((fdt_property_string(fdt, "device_type",
                              "PowerPC-External-Interrupt-Presentation")));
    _FDT((fdt_property_string(fdt, "compatible", "IBM,ppc-xicp")));
    _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
    _FDT((fdt_property(fdt, "ibm,interrupt-server-ranges",
                       interrupt_server_ranges_prop,
                       sizeof(interrupt_server_ranges_prop))));
560 561 562
    _FDT((fdt_property_cell(fdt, "#interrupt-cells", 2)));
    _FDT((fdt_property_cell(fdt, "linux,phandle", PHANDLE_XICP)));
    _FDT((fdt_property_cell(fdt, "phandle", PHANDLE_XICP)));
563 564 565

    _FDT((fdt_end_node(fdt)));

566 567 568 569 570 571 572
    /* vdevice */
    _FDT((fdt_begin_node(fdt, "vdevice")));

    _FDT((fdt_property_string(fdt, "device_type", "vdevice")));
    _FDT((fdt_property_string(fdt, "compatible", "IBM,vdevice")));
    _FDT((fdt_property_cell(fdt, "#address-cells", 0x1)));
    _FDT((fdt_property_cell(fdt, "#size-cells", 0x0)));
573 574
    _FDT((fdt_property_cell(fdt, "#interrupt-cells", 0x2)));
    _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
575 576 577

    _FDT((fdt_end_node(fdt)));

578 579 580
    /* event-sources */
    spapr_events_fdt_skel(fdt, epow_irq);

581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600
    /* /hypervisor node */
    if (kvm_enabled()) {
        uint8_t hypercall[16];

        /* indicate KVM hypercall interface */
        _FDT((fdt_begin_node(fdt, "hypervisor")));
        _FDT((fdt_property_string(fdt, "compatible", "linux,kvm")));
        if (kvmppc_has_cap_fixup_hcalls()) {
            /*
             * Older KVM versions with older guest kernels were broken with the
             * magic page, don't allow the guest to map it.
             */
            kvmppc_get_hypercall(first_cpu->env_ptr, hypercall,
                                 sizeof(hypercall));
            _FDT((fdt_property(fdt, "hcall-instructions", hypercall,
                              sizeof(hypercall))));
        }
        _FDT((fdt_end_node(fdt)));
    }

601 602 603
    _FDT((fdt_end_node(fdt))); /* close root node */
    _FDT((fdt_finish(fdt)));

604 605 606
    return fdt;
}

607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623
int spapr_h_cas_compose_response(target_ulong addr, target_ulong size)
{
    void *fdt, *fdt_skel;
    sPAPRDeviceTreeUpdateHeader hdr = { .version_id = 1 };

    size -= sizeof(hdr);

    /* Create sceleton */
    fdt_skel = g_malloc0(size);
    _FDT((fdt_create(fdt_skel, size)));
    _FDT((fdt_begin_node(fdt_skel, "")));
    _FDT((fdt_end_node(fdt_skel)));
    _FDT((fdt_finish(fdt_skel)));
    fdt = g_malloc0(size);
    _FDT((fdt_open_into(fdt_skel, fdt, size)));
    g_free(fdt_skel);

624 625
    /* Fix skeleton up */
    _FDT((spapr_fixup_cpu_dt(fdt, spapr)));
626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642

    /* Pack resulting tree */
    _FDT((fdt_pack(fdt)));

    if (fdt_totalsize(fdt) + sizeof(hdr) > size) {
        trace_spapr_cas_failed(size);
        return -1;
    }

    cpu_physical_memory_write(addr, &hdr, sizeof(hdr));
    cpu_physical_memory_write(addr + sizeof(hdr), fdt, fdt_totalsize(fdt));
    trace_spapr_cas_continue(fdt_totalsize(fdt) + sizeof(hdr));
    g_free(fdt);

    return 0;
}

643 644 645 646 647 648
static void spapr_populate_memory_node(void *fdt, int nodeid, hwaddr start,
                                       hwaddr size)
{
    uint32_t associativity[] = {
        cpu_to_be32(0x4), /* length */
        cpu_to_be32(0x0), cpu_to_be32(0x0),
649
        cpu_to_be32(0x0), cpu_to_be32(nodeid)
650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667
    };
    char mem_name[32];
    uint64_t mem_reg_property[2];
    int off;

    mem_reg_property[0] = cpu_to_be64(start);
    mem_reg_property[1] = cpu_to_be64(size);

    sprintf(mem_name, "memory@" TARGET_FMT_lx, start);
    off = fdt_add_subnode(fdt, 0, mem_name);
    _FDT(off);
    _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
    _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
                      sizeof(mem_reg_property))));
    _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
                      sizeof(associativity))));
}

668 669
static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
{
670 671 672 673 674 675 676 677 678 679
    hwaddr mem_start, node_size;
    int i, nb_nodes = nb_numa_nodes;
    NodeInfo *nodes = numa_info;
    NodeInfo ramnode;

    /* No NUMA nodes, assume there is just one node with whole RAM */
    if (!nb_numa_nodes) {
        nb_nodes = 1;
        ramnode.node_mem = ram_size;
        nodes = &ramnode;
680
    }
681

682 683 684 685
    for (i = 0, mem_start = 0; i < nb_nodes; ++i) {
        if (!nodes[i].node_mem) {
            continue;
        }
686 687 688
        if (mem_start >= ram_size) {
            node_size = 0;
        } else {
689
            node_size = nodes[i].node_mem;
690 691 692 693
            if (node_size > ram_size - mem_start) {
                node_size = ram_size - mem_start;
            }
        }
694 695 696 697 698 699
        if (!mem_start) {
            /* ppc_spapr_init() checks for rma_size <= node0_size already */
            spapr_populate_memory_node(fdt, i, 0, spapr->rma_size);
            mem_start += spapr->rma_size;
            node_size -= spapr->rma_size;
        }
700 701 702 703 704 705 706 707 708 709 710 711
        for ( ; node_size; ) {
            hwaddr sizetmp = pow2floor(node_size);

            /* mem_start != 0 here */
            if (ctzl(mem_start) < ctzl(sizetmp)) {
                sizetmp = 1ULL << ctzl(mem_start);
            }

            spapr_populate_memory_node(fdt, i, mem_start, sizetmp);
            node_size -= sizetmp;
            mem_start += sizetmp;
        }
712 713 714 715 716
    }

    return 0;
}

717
static void spapr_finalize_fdt(sPAPREnvironment *spapr,
A
Avi Kivity 已提交
718 719 720
                               hwaddr fdt_addr,
                               hwaddr rtas_addr,
                               hwaddr rtas_size)
721
{
722 723
    MachineState *machine = MACHINE(qdev_get_machine());
    const char *boot_device = machine->boot_order;
724 725 726
    int ret, i;
    size_t cb = 0;
    char *bootlist;
727
    void *fdt;
728
    sPAPRPHBState *phb;
729

730
    fdt = g_malloc(FDT_MAX_SIZE);
731 732 733

    /* open out the base tree into a temp buffer for the final tweaks */
    _FDT((fdt_open_into(spapr->fdt_skel, fdt, FDT_MAX_SIZE)));
734

735 736 737 738 739 740
    ret = spapr_populate_memory(spapr, fdt);
    if (ret < 0) {
        fprintf(stderr, "couldn't setup memory nodes in fdt\n");
        exit(1);
    }

741 742 743 744 745 746
    ret = spapr_populate_vdevice(spapr->vio_bus, fdt);
    if (ret < 0) {
        fprintf(stderr, "couldn't setup vio devices in fdt\n");
        exit(1);
    }

747
    QLIST_FOREACH(phb, &spapr->phbs, list) {
748
        ret = spapr_populate_pci_dt(phb, PHANDLE_XICP, fdt);
749 750 751 752 753 754 755
    }

    if (ret < 0) {
        fprintf(stderr, "couldn't setup PCI devices in fdt\n");
        exit(1);
    }

756 757 758 759 760 761
    /* RTAS */
    ret = spapr_rtas_device_tree_setup(fdt, rtas_addr, rtas_size);
    if (ret < 0) {
        fprintf(stderr, "Couldn't set up RTAS device tree properties\n");
    }

762
    /* Advertise NUMA via ibm,associativity */
763 764 765
    ret = spapr_fixup_cpu_dt(fdt, spapr);
    if (ret < 0) {
        fprintf(stderr, "Couldn't finalize CPU device tree properties\n");
766 767
    }

768 769 770 771 772 773 774 775 776 777 778 779 780 781 782
    bootlist = get_boot_devices_list(&cb, true);
    if (cb && bootlist) {
        int offset = fdt_path_offset(fdt, "/chosen");
        if (offset < 0) {
            exit(1);
        }
        for (i = 0; i < cb; i++) {
            if (bootlist[i] == '\n') {
                bootlist[i] = ' ';
            }

        }
        ret = fdt_setprop_string(fdt, offset, "qemu,boot-list", bootlist);
    }

783 784 785 786 787 788 789 790 791
    if (boot_device && strlen(boot_device)) {
        int offset = fdt_path_offset(fdt, "/chosen");

        if (offset < 0) {
            exit(1);
        }
        fdt_setprop_string(fdt, offset, "qemu,boot-device", boot_device);
    }

792
    if (!spapr->has_graphics) {
793 794
        spapr_populate_chosen_stdout(fdt, spapr->vio_bus);
    }
795

796 797
    _FDT((fdt_pack(fdt)));

798
    if (fdt_totalsize(fdt) > FDT_MAX_SIZE) {
799 800
        error_report("FDT too big ! 0x%x bytes (max is 0x%x)",
                     fdt_totalsize(fdt), FDT_MAX_SIZE);
801 802 803
        exit(1);
    }

804
    cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt));
805

G
Gonglei 已提交
806
    g_free(bootlist);
807
    g_free(fdt);
808 809 810 811 812 813 814
}

static uint64_t translate_kernel_address(void *opaque, uint64_t addr)
{
    return (addr & 0x0fffffff) + KERNEL_LOAD_ADDR;
}

815
static void emulate_spapr_hypercall(PowerPCCPU *cpu)
816
{
817 818
    CPUPPCState *env = &cpu->env;

819 820 821 822
    if (msr_pr) {
        hcall_dprintf("Hypercall made with MSR[PR]=1\n");
        env->gpr[3] = H_PRIVILEGE;
    } else {
823
        env->gpr[3] = spapr_hypercall(cpu, env->gpr[3], &env->gpr[4]);
824
    }
825 826
}

827 828 829 830 831 832
#define HPTE(_table, _i)   (void *)(((uint64_t *)(_table)) + ((_i) * 2))
#define HPTE_VALID(_hpte)  (tswap64(*((uint64_t *)(_hpte))) & HPTE64_V_VALID)
#define HPTE_DIRTY(_hpte)  (tswap64(*((uint64_t *)(_hpte))) & HPTE64_V_HPTE_DIRTY)
#define CLEAN_HPTE(_hpte)  ((*(uint64_t *)(_hpte)) &= tswap64(~HPTE64_V_HPTE_DIRTY))
#define DIRTY_HPTE(_hpte)  ((*(uint64_t *)(_hpte)) |= tswap64(HPTE64_V_HPTE_DIRTY))

833 834 835
static void spapr_reset_htab(sPAPREnvironment *spapr)
{
    long shift;
836
    int index;
837 838 839 840 841 842 843 844 845 846

    /* allocate hash page table.  For now we always make this 16mb,
     * later we should probably make it scale to the size of guest
     * RAM */

    shift = kvmppc_reset_htab(spapr->htab_shift);

    if (shift > 0) {
        /* Kernel handles htab, we don't need to allocate one */
        spapr->htab_shift = shift;
847
        kvmppc_kern_htab = true;
848 849 850 851 852

        /* Tell readers to update their file descriptor */
        if (spapr->htab_fd >= 0) {
            spapr->htab_fd_stale = true;
        }
853 854 855 856 857 858 859 860
    } else {
        if (!spapr->htab) {
            /* Allocate an htab if we don't yet have one */
            spapr->htab = qemu_memalign(HTAB_SIZE(spapr), HTAB_SIZE(spapr));
        }

        /* And clear it */
        memset(spapr->htab, 0, HTAB_SIZE(spapr));
861 862 863 864

        for (index = 0; index < HTAB_SIZE(spapr) / HASH_PTE_SIZE_64; index++) {
            DIRTY_HPTE(HPTE(spapr->htab, index));
        }
865 866 867 868
    }

    /* Update the RMA size if necessary */
    if (spapr->vrma_adjust) {
869 870
        spapr->rma_size = kvmppc_rma_size(spapr_node0_size(),
                                          spapr->htab_shift);
871
    }
872 873
}

874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890
static int find_unknown_sysbus_device(SysBusDevice *sbdev, void *opaque)
{
    bool matched = false;

    if (object_dynamic_cast(OBJECT(sbdev), TYPE_SPAPR_PCI_HOST_BRIDGE)) {
        matched = true;
    }

    if (!matched) {
        error_report("Device %s is not supported by this machine yet.",
                     qdev_fw_name(DEVICE(sbdev)));
        exit(1);
    }

    return 0;
}

891 892 893 894 895 896 897 898 899 900 901 902 903
/*
 * A guest reset will cause spapr->htab_fd to become stale if being used.
 * Reopen the file descriptor to make sure the whole HTAB is properly read.
 */
static int spapr_check_htab_fd(sPAPREnvironment *spapr)
{
    int rc = 0;

    if (spapr->htab_fd_stale) {
        close(spapr->htab_fd);
        spapr->htab_fd = kvmppc_get_htab_fd(false);
        if (spapr->htab_fd < 0) {
            error_report("Unable to open fd for reading hash table from KVM: "
904
                         "%s", strerror(errno));
905 906 907 908 909 910 911 912
            rc = -1;
        }
        spapr->htab_fd_stale = false;
    }

    return rc;
}

913
static void ppc_spapr_reset(void)
914
{
915
    PowerPCCPU *first_ppc_cpu;
916
    uint32_t rtas_limit;
917

918 919 920
    /* Check for unknown sysbus devices */
    foreach_dynamic_sysbus_device(find_unknown_sysbus_device, NULL);

921 922
    /* Reset the hash table & recalc the RMA */
    spapr_reset_htab(spapr);
923

924
    qemu_devices_reset();
925

926 927 928 929 930 931 932 933 934
    /*
     * We place the device tree and RTAS just below either the top of the RMA,
     * or just below 2GB, whichever is lowere, so that it can be
     * processed with 32-bit real mode code if necessary
     */
    rtas_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR);
    spapr->rtas_addr = rtas_limit - RTAS_MAX_SIZE;
    spapr->fdt_addr = spapr->rtas_addr - FDT_MAX_SIZE;

935 936 937 938
    /* Load the fdt */
    spapr_finalize_fdt(spapr, spapr->fdt_addr, spapr->rtas_addr,
                       spapr->rtas_size);

939 940 941 942
    /* Copy RTAS over */
    cpu_physical_memory_write(spapr->rtas_addr, spapr->rtas_blob,
                              spapr->rtas_size);

943
    /* Set up the entry state */
944 945 946 947 948
    first_ppc_cpu = POWERPC_CPU(first_cpu);
    first_ppc_cpu->env.gpr[3] = spapr->fdt_addr;
    first_ppc_cpu->env.gpr[5] = 0;
    first_cpu->halted = 0;
    first_ppc_cpu->env.nip = spapr->entry_point;
949 950 951

}

952 953
static void spapr_cpu_reset(void *opaque)
{
954
    PowerPCCPU *cpu = opaque;
955
    CPUState *cs = CPU(cpu);
956
    CPUPPCState *env = &cpu->env;
957

958
    cpu_reset(cs);
959 960 961 962

    /* All CPUs start halted.  CPU0 is unhalted from the machine level
     * reset code and the rest are explicitly started up by the guest
     * using an RTAS call */
963
    cs->halted = 1;
964 965

    env->spr[SPR_HIOR] = 0;
966

967
    env->external_htab = (uint8_t *)spapr->htab;
968 969 970 971 972 973 974
    if (kvm_enabled() && !env->external_htab) {
        /*
         * HV KVM, set external_htab to 1 so our ppc_hash64_load_hpte*
         * functions do the right thing.
         */
        env->external_htab = (void *)1;
    }
975
    env->htab_base = -1;
976 977 978 979 980 981 982
    /*
     * htab_mask is the mask used to normalize hash value to PTEG index.
     * htab_shift is log2 of hash table size.
     * We have 8 hpte per group, and each hpte is 16 bytes.
     * ie have 128 bytes per hpte entry.
     */
    env->htab_mask = (1ULL << ((spapr)->htab_shift - 7)) - 1;
983
    env->spr[SPR_SDR1] = (target_ulong)(uintptr_t)spapr->htab |
984
        (spapr->htab_shift - 18);
985 986
}

D
David Gibson 已提交
987 988
static void spapr_create_nvram(sPAPREnvironment *spapr)
{
989
    DeviceState *dev = qdev_create(&spapr->vio_bus->bus, "spapr-nvram");
P
Paolo Bonzini 已提交
990
    DriveInfo *dinfo = drive_get(IF_PFLASH, 0, 0);
D
David Gibson 已提交
991

P
Paolo Bonzini 已提交
992
    if (dinfo) {
993
        qdev_prop_set_drive_nofail(dev, "drive", blk_by_legacy_dinfo(dinfo));
D
David Gibson 已提交
994 995 996 997 998 999 1000
    }

    qdev_init_nofail(dev);

    spapr->nvram = (struct sPAPRNVRAM *)dev;
}

1001 1002 1003 1004 1005 1006
static void spapr_rtc_create(sPAPREnvironment *spapr)
{
    DeviceState *dev = qdev_create(NULL, TYPE_SPAPR_RTC);

    qdev_init_nofail(dev);
    spapr->rtc = dev;
D
David Gibson 已提交
1007 1008 1009

    object_property_add_alias(qdev_get_machine(), "rtc-time",
                              OBJECT(spapr->rtc), "date", NULL);
1010 1011
}

1012
/* Returns whether we want to use VGA or not */
1013 1014
static int spapr_vga_init(PCIBus *pci_bus)
{
1015 1016
    switch (vga_interface_type) {
    case VGA_NONE:
1017 1018 1019
        return false;
    case VGA_DEVICE:
        return true;
1020 1021
    case VGA_STD:
        return pci_vga_init(pci_bus) != NULL;
1022
    default:
1023 1024
        fprintf(stderr, "This vga model is not supported,"
                "currently it only supports -vga std\n");
1025
        exit(0);
1026 1027 1028
    }
}

1029 1030 1031 1032 1033
static int spapr_post_load(void *opaque, int version_id)
{
    sPAPREnvironment *spapr = (sPAPREnvironment *)opaque;
    int err = 0;

S
Stefan Weil 已提交
1034
    /* In earlier versions, there was no separate qdev for the PAPR
1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049
     * RTC, so the RTC offset was stored directly in sPAPREnvironment.
     * So when migrating from those versions, poke the incoming offset
     * value into the RTC device */
    if (version_id < 3) {
        err = spapr_rtc_import_offset(spapr->rtc, spapr->rtc_offset);
    }

    return err;
}

static bool version_before_3(void *opaque, int version_id)
{
    return version_id < 3;
}

1050 1051
static const VMStateDescription vmstate_spapr = {
    .name = "spapr",
1052
    .version_id = 3,
1053
    .minimum_version_id = 1,
1054
    .post_load = spapr_post_load,
1055
    .fields = (VMStateField[]) {
1056 1057
        /* used to be @next_irq */
        VMSTATE_UNUSED_BUFFER(version_before_3, 0, 4),
1058 1059

        /* RTC offset */
1060 1061
        VMSTATE_UINT64_TEST(rtc_offset, sPAPREnvironment, version_before_3),

1062
        VMSTATE_PPC_TIMEBASE_V(tb, sPAPREnvironment, 2),
1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073
        VMSTATE_END_OF_LIST()
    },
};

static int htab_save_setup(QEMUFile *f, void *opaque)
{
    sPAPREnvironment *spapr = opaque;

    /* "Iteration" header */
    qemu_put_be32(f, spapr->htab_shift);

1074 1075 1076 1077 1078 1079 1080
    if (spapr->htab) {
        spapr->htab_save_index = 0;
        spapr->htab_first_pass = true;
    } else {
        assert(kvm_enabled());

        spapr->htab_fd = kvmppc_get_htab_fd(false);
1081
        spapr->htab_fd_stale = false;
1082 1083 1084 1085 1086 1087 1088 1089
        if (spapr->htab_fd < 0) {
            fprintf(stderr, "Unable to open fd for reading hash table from KVM: %s\n",
                    strerror(errno));
            return -1;
        }
    }


1090 1091 1092 1093 1094 1095 1096 1097
    return 0;
}

static void htab_save_first_pass(QEMUFile *f, sPAPREnvironment *spapr,
                                 int64_t max_ns)
{
    int htabslots = HTAB_SIZE(spapr) / HASH_PTE_SIZE_64;
    int index = spapr->htab_save_index;
1098
    int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113

    assert(spapr->htab_first_pass);

    do {
        int chunkstart;

        /* Consume invalid HPTEs */
        while ((index < htabslots)
               && !HPTE_VALID(HPTE(spapr->htab, index))) {
            index++;
            CLEAN_HPTE(HPTE(spapr->htab, index));
        }

        /* Consume valid HPTEs */
        chunkstart = index;
1114
        while ((index < htabslots) && (index - chunkstart < USHRT_MAX)
1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128
               && HPTE_VALID(HPTE(spapr->htab, index))) {
            index++;
            CLEAN_HPTE(HPTE(spapr->htab, index));
        }

        if (index > chunkstart) {
            int n_valid = index - chunkstart;

            qemu_put_be32(f, chunkstart);
            qemu_put_be16(f, n_valid);
            qemu_put_be16(f, 0);
            qemu_put_buffer(f, HPTE(spapr->htab, chunkstart),
                            HASH_PTE_SIZE_64 * n_valid);

1129
            if ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) > max_ns) {
1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142
                break;
            }
        }
    } while ((index < htabslots) && !qemu_file_rate_limit(f));

    if (index >= htabslots) {
        assert(index == htabslots);
        index = 0;
        spapr->htab_first_pass = false;
    }
    spapr->htab_save_index = index;
}

1143 1144
static int htab_save_later_pass(QEMUFile *f, sPAPREnvironment *spapr,
                                int64_t max_ns)
1145 1146 1147 1148 1149
{
    bool final = max_ns < 0;
    int htabslots = HTAB_SIZE(spapr) / HASH_PTE_SIZE_64;
    int examined = 0, sent = 0;
    int index = spapr->htab_save_index;
1150
    int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165

    assert(!spapr->htab_first_pass);

    do {
        int chunkstart, invalidstart;

        /* Consume non-dirty HPTEs */
        while ((index < htabslots)
               && !HPTE_DIRTY(HPTE(spapr->htab, index))) {
            index++;
            examined++;
        }

        chunkstart = index;
        /* Consume valid dirty HPTEs */
1166
        while ((index < htabslots) && (index - chunkstart < USHRT_MAX)
1167 1168 1169 1170 1171 1172 1173 1174 1175
               && HPTE_DIRTY(HPTE(spapr->htab, index))
               && HPTE_VALID(HPTE(spapr->htab, index))) {
            CLEAN_HPTE(HPTE(spapr->htab, index));
            index++;
            examined++;
        }

        invalidstart = index;
        /* Consume invalid dirty HPTEs */
1176
        while ((index < htabslots) && (index - invalidstart < USHRT_MAX)
1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194
               && HPTE_DIRTY(HPTE(spapr->htab, index))
               && !HPTE_VALID(HPTE(spapr->htab, index))) {
            CLEAN_HPTE(HPTE(spapr->htab, index));
            index++;
            examined++;
        }

        if (index > chunkstart) {
            int n_valid = invalidstart - chunkstart;
            int n_invalid = index - invalidstart;

            qemu_put_be32(f, chunkstart);
            qemu_put_be16(f, n_valid);
            qemu_put_be16(f, n_invalid);
            qemu_put_buffer(f, HPTE(spapr->htab, chunkstart),
                            HASH_PTE_SIZE_64 * n_valid);
            sent += index - chunkstart;

1195
            if (!final && (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) > max_ns) {
1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216
                break;
            }
        }

        if (examined >= htabslots) {
            break;
        }

        if (index >= htabslots) {
            assert(index == htabslots);
            index = 0;
        }
    } while ((examined < htabslots) && (!qemu_file_rate_limit(f) || final));

    if (index >= htabslots) {
        assert(index == htabslots);
        index = 0;
    }

    spapr->htab_save_index = index;

1217
    return (examined >= htabslots) && (sent == 0) ? 1 : 0;
1218 1219
}

1220 1221 1222
#define MAX_ITERATION_NS    5000000 /* 5 ms */
#define MAX_KVM_BUF_SIZE    2048

1223 1224 1225
static int htab_save_iterate(QEMUFile *f, void *opaque)
{
    sPAPREnvironment *spapr = opaque;
1226
    int rc = 0;
1227 1228 1229 1230

    /* Iteration header */
    qemu_put_be32(f, 0);

1231 1232 1233
    if (!spapr->htab) {
        assert(kvm_enabled());

1234 1235 1236 1237 1238
        rc = spapr_check_htab_fd(spapr);
        if (rc < 0) {
            return rc;
        }

1239 1240 1241 1242 1243 1244
        rc = kvmppc_save_htab(f, spapr->htab_fd,
                              MAX_KVM_BUF_SIZE, MAX_ITERATION_NS);
        if (rc < 0) {
            return rc;
        }
    } else  if (spapr->htab_first_pass) {
1245 1246
        htab_save_first_pass(f, spapr, MAX_ITERATION_NS);
    } else {
1247
        rc = htab_save_later_pass(f, spapr, MAX_ITERATION_NS);
1248 1249 1250 1251 1252 1253 1254
    }

    /* End marker */
    qemu_put_be32(f, 0);
    qemu_put_be16(f, 0);
    qemu_put_be16(f, 0);

1255
    return rc;
1256 1257 1258 1259 1260 1261 1262 1263 1264
}

static int htab_save_complete(QEMUFile *f, void *opaque)
{
    sPAPREnvironment *spapr = opaque;

    /* Iteration header */
    qemu_put_be32(f, 0);

1265 1266 1267 1268 1269
    if (!spapr->htab) {
        int rc;

        assert(kvm_enabled());

1270 1271 1272 1273 1274
        rc = spapr_check_htab_fd(spapr);
        if (rc < 0) {
            return rc;
        }

1275 1276 1277 1278 1279 1280 1281 1282 1283
        rc = kvmppc_save_htab(f, spapr->htab_fd, MAX_KVM_BUF_SIZE, -1);
        if (rc < 0) {
            return rc;
        }
        close(spapr->htab_fd);
        spapr->htab_fd = -1;
    } else {
        htab_save_later_pass(f, spapr, -1);
    }
1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296

    /* End marker */
    qemu_put_be32(f, 0);
    qemu_put_be16(f, 0);
    qemu_put_be16(f, 0);

    return 0;
}

static int htab_load(QEMUFile *f, void *opaque, int version_id)
{
    sPAPREnvironment *spapr = opaque;
    uint32_t section_hdr;
1297
    int fd = -1;
1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313

    if (version_id < 1 || version_id > 1) {
        fprintf(stderr, "htab_load() bad version\n");
        return -EINVAL;
    }

    section_hdr = qemu_get_be32(f);

    if (section_hdr) {
        /* First section, just the hash shift */
        if (spapr->htab_shift != section_hdr) {
            return -EINVAL;
        }
        return 0;
    }

1314 1315 1316 1317 1318 1319 1320 1321 1322 1323
    if (!spapr->htab) {
        assert(kvm_enabled());

        fd = kvmppc_get_htab_fd(true);
        if (fd < 0) {
            fprintf(stderr, "Unable to open fd to restore KVM hash table: %s\n",
                    strerror(errno));
        }
    }

1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336
    while (true) {
        uint32_t index;
        uint16_t n_valid, n_invalid;

        index = qemu_get_be32(f);
        n_valid = qemu_get_be16(f);
        n_invalid = qemu_get_be16(f);

        if ((index == 0) && (n_valid == 0) && (n_invalid == 0)) {
            /* End of Stream */
            break;
        }

1337
        if ((index + n_valid + n_invalid) >
1338 1339 1340
            (HTAB_SIZE(spapr) / HASH_PTE_SIZE_64)) {
            /* Bad index in stream */
            fprintf(stderr, "htab_load() bad index %d (%hd+%hd entries) "
1341 1342
                    "in htab stream (htab_shift=%d)\n", index, n_valid, n_invalid,
                    spapr->htab_shift);
1343 1344 1345
            return -EINVAL;
        }

1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363
        if (spapr->htab) {
            if (n_valid) {
                qemu_get_buffer(f, HPTE(spapr->htab, index),
                                HASH_PTE_SIZE_64 * n_valid);
            }
            if (n_invalid) {
                memset(HPTE(spapr->htab, index + n_valid), 0,
                       HASH_PTE_SIZE_64 * n_invalid);
            }
        } else {
            int rc;

            assert(fd >= 0);

            rc = kvmppc_load_htab_chunk(f, fd, index, n_valid, n_invalid);
            if (rc < 0) {
                return rc;
            }
1364 1365 1366
        }
    }

1367 1368 1369 1370 1371
    if (!spapr->htab) {
        assert(fd >= 0);
        close(fd);
    }

1372 1373 1374 1375 1376 1377 1378 1379 1380 1381
    return 0;
}

static SaveVMHandlers savevm_htab_handlers = {
    .save_live_setup = htab_save_setup,
    .save_live_iterate = htab_save_iterate,
    .save_live_complete = htab_save_complete,
    .load_state = htab_load,
};

1382 1383 1384 1385 1386 1387 1388
static void spapr_boot_set(void *opaque, const char *boot_device,
                           Error **errp)
{
    MachineState *machine = MACHINE(qdev_get_machine());
    machine->boot_order = g_strdup(boot_device);
}

1389
/* pSeries LPAR / sPAPR hardware init */
1390
static void ppc_spapr_init(MachineState *machine)
1391
{
1392 1393 1394 1395 1396
    ram_addr_t ram_size = machine->ram_size;
    const char *cpu_model = machine->cpu_model;
    const char *kernel_filename = machine->kernel_filename;
    const char *kernel_cmdline = machine->kernel_cmdline;
    const char *initrd_filename = machine->initrd_filename;
1397
    PowerPCCPU *cpu;
A
Andreas Färber 已提交
1398
    CPUPPCState *env;
1399
    PCIHostState *phb;
1400
    int i;
A
Avi Kivity 已提交
1401 1402
    MemoryRegion *sysmem = get_system_memory();
    MemoryRegion *ram = g_new(MemoryRegion, 1);
1403 1404
    MemoryRegion *rma_region;
    void *rma = NULL;
A
Avi Kivity 已提交
1405
    hwaddr rma_alloc_size;
1406
    hwaddr node0_size = spapr_node0_size();
1407 1408
    uint32_t initrd_base = 0;
    long kernel_size = 0, initrd_size = 0;
1409
    long load_limit, fw_size;
1410
    bool kernel_le = false;
1411
    char *filename;
1412

1413 1414
    msi_supported = true;

1415 1416 1417
    spapr = g_malloc0(sizeof(*spapr));
    QLIST_INIT(&spapr->phbs);

1418 1419
    cpu_ppc_hypercall = emulate_spapr_hypercall;

1420
    /* Allocate RMA if necessary */
1421
    rma_alloc_size = kvmppc_alloc_rma(&rma);
1422 1423

    if (rma_alloc_size == -1) {
1424
        error_report("Unable to create RMA");
1425 1426
        exit(1);
    }
1427

1428
    if (rma_alloc_size && (rma_alloc_size < node0_size)) {
1429
        spapr->rma_size = rma_alloc_size;
1430
    } else {
1431
        spapr->rma_size = node0_size;
1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445

        /* With KVM, we don't actually know whether KVM supports an
         * unbounded RMA (PR KVM) or is limited by the hash table size
         * (HV KVM using VRMA), so we always assume the latter
         *
         * In that case, we also limit the initial allocations for RTAS
         * etc... to 256M since we have no way to know what the VRMA size
         * is going to be as it depends on the size of the hash table
         * isn't determined yet.
         */
        if (kvm_enabled()) {
            spapr->vrma_adjust = 1;
            spapr->rma_size = MIN(spapr->rma_size, 0x10000000);
        }
1446 1447
    }

1448 1449 1450 1451 1452 1453
    if (spapr->rma_size > node0_size) {
        fprintf(stderr, "Error: Numa node 0 has to span the RMA (%#08"HWADDR_PRIx")\n",
                spapr->rma_size);
        exit(1);
    }

1454 1455
    /* Setup a load limit for the ramdisk leaving room for SLOF and FDT */
    load_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR) - FW_OVERHEAD;
1456

1457 1458 1459 1460 1461 1462 1463 1464 1465 1466
    /* We aim for a hash table of size 1/128 the size of RAM.  The
     * normal rule of thumb is 1/64 the size of RAM, but that's much
     * more than needed for the Linux guests we support. */
    spapr->htab_shift = 18; /* Minimum architected size */
    while (spapr->htab_shift <= 46) {
        if ((1ULL << (spapr->htab_shift + 7)) >= ram_size) {
            break;
        }
        spapr->htab_shift++;
    }
1467

1468
    /* Set up Interrupt Controller before we create the VCPUs */
1469 1470
    spapr->icp = xics_system_init(machine,
                                  smp_cpus * kvmppc_smt_threads() / smp_threads,
1471 1472
                                  XICS_IRQS);

1473 1474
    /* init CPUs */
    if (cpu_model == NULL) {
1475
        cpu_model = kvm_enabled() ? "host" : "POWER7";
1476 1477
    }
    for (i = 0; i < smp_cpus; i++) {
1478 1479
        cpu = cpu_ppc_init(cpu_model);
        if (cpu == NULL) {
1480 1481 1482
            fprintf(stderr, "Unable to find PowerPC CPU definition\n");
            exit(1);
        }
1483 1484
        env = &cpu->env;

1485 1486 1487
        /* Set time-base frequency to 512 MHz */
        cpu_ppc_tb_init(env, TIMEBASE_FREQ);

1488 1489 1490 1491
        /* PAPR always has exception vectors in RAM not ROM. To ensure this,
         * MSR[IP] should never be set.
         */
        env->msr_mask &= ~(1 << 6);
1492 1493 1494

        /* Tell KVM that we're in PAPR mode */
        if (kvm_enabled()) {
1495
            kvmppc_set_papr(cpu);
1496 1497
        }

1498 1499 1500 1501 1502 1503
        if (cpu->max_compat) {
            if (ppc_set_compat(cpu, cpu->max_compat) < 0) {
                exit(1);
            }
        }

1504 1505
        xics_cpu_setup(spapr->icp, cpu);

1506
        qemu_register_reset(spapr_cpu_reset, cpu);
1507 1508
    }

1509 1510 1511 1512 1513
    if (kvm_enabled()) {
        /* Enable H_LOGICAL_CI_* so SLOF can talk to in-kernel devices */
        kvmppc_enable_logical_ci_hcalls();
    }

1514
    /* allocate RAM */
1515
    spapr->ram_limit = ram_size;
1516 1517 1518
    memory_region_allocate_system_memory(ram, NULL, "ppc_spapr.ram",
                                         spapr->ram_limit);
    memory_region_add_subregion(sysmem, 0, ram);
1519

1520 1521 1522 1523 1524 1525 1526 1527
    if (rma_alloc_size && rma) {
        rma_region = g_new(MemoryRegion, 1);
        memory_region_init_ram_ptr(rma_region, NULL, "ppc_spapr.rma",
                                   rma_alloc_size, rma);
        vmstate_register_ram_global(rma_region);
        memory_region_add_subregion(sysmem, 0, rma_region);
    }

1528
    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, "spapr-rtas.bin");
1529
    if (!filename) {
1530
        error_report("Could not find LPAR rtas '%s'", "spapr-rtas.bin");
1531 1532
        exit(1);
    }
1533 1534 1535
    spapr->rtas_size = get_image_size(filename);
    spapr->rtas_blob = g_malloc(spapr->rtas_size);
    if (load_image_size(filename, spapr->rtas_blob, spapr->rtas_size) < 0) {
1536
        error_report("Could not load LPAR rtas '%s'", filename);
1537 1538
        exit(1);
    }
1539
    if (spapr->rtas_size > RTAS_MAX_SIZE) {
1540 1541
        error_report("RTAS too big ! 0x%zx bytes (max is 0x%x)",
                     (size_t)spapr->rtas_size, RTAS_MAX_SIZE);
1542 1543
        exit(1);
    }
1544
    g_free(filename);
1545

1546 1547 1548
    /* Set up EPOW events infrastructure */
    spapr_events_init(spapr);

1549
    /* Set up the RTC RTAS interfaces */
1550
    spapr_rtc_create(spapr);
1551

1552
    /* Set up VIO bus */
1553 1554
    spapr->vio_bus = spapr_vio_bus_init();

P
Paolo Bonzini 已提交
1555
    for (i = 0; i < MAX_SERIAL_PORTS; i++) {
1556
        if (serial_hds[i]) {
1557
            spapr_vty_create(spapr->vio_bus, serial_hds[i]);
1558 1559
        }
    }
1560

D
David Gibson 已提交
1561 1562 1563
    /* We always have at least the nvram device on VIO */
    spapr_create_nvram(spapr);

1564
    /* Set up PCI */
1565 1566
    spapr_pci_rtas_init();

1567
    phb = spapr_create_phb(spapr, 0);
1568

P
Paolo Bonzini 已提交
1569
    for (i = 0; i < nb_nics; i++) {
1570 1571 1572
        NICInfo *nd = &nd_table[i];

        if (!nd->model) {
1573
            nd->model = g_strdup("ibmveth");
1574 1575 1576
        }

        if (strcmp(nd->model, "ibmveth") == 0) {
1577
            spapr_vlan_create(spapr->vio_bus, nd);
1578
        } else {
1579
            pci_nic_init_nofail(&nd_table[i], phb->bus, nd->model, NULL);
1580 1581 1582
        }
    }

1583
    for (i = 0; i <= drive_get_max_bus(IF_SCSI); i++) {
1584
        spapr_vscsi_create(spapr->vio_bus);
1585 1586
    }

1587
    /* Graphics */
1588
    if (spapr_vga_init(phb->bus)) {
1589
        spapr->has_graphics = true;
1590
        machine->usb |= defaults_enabled() && !machine->usb_disabled;
1591 1592
    }

1593
    if (machine->usb) {
1594
        pci_create_simple(phb->bus, -1, "pci-ohci");
1595

1596
        if (spapr->has_graphics) {
1597 1598 1599 1600
            USBBus *usb_bus = usb_bus_find(-1);

            usb_create_simple(usb_bus, "usb-kbd");
            usb_create_simple(usb_bus, "usb-mouse");
1601 1602 1603
        }
    }

1604
    if (spapr->rma_size < (MIN_RMA_SLOF << 20)) {
1605 1606 1607 1608 1609
        fprintf(stderr, "qemu: pSeries SLOF firmware requires >= "
                "%ldM guest RMA (Real Mode Area memory)\n", MIN_RMA_SLOF);
        exit(1);
    }

1610 1611 1612 1613 1614
    if (kernel_filename) {
        uint64_t lowaddr = 0;

        kernel_size = load_elf(kernel_filename, translate_kernel_address, NULL,
                               NULL, &lowaddr, NULL, 1, ELF_MACHINE, 0);
1615
        if (kernel_size == ELF_LOAD_WRONG_ENDIAN) {
1616 1617 1618 1619 1620
            kernel_size = load_elf(kernel_filename,
                                   translate_kernel_address, NULL,
                                   NULL, &lowaddr, NULL, 0, ELF_MACHINE, 0);
            kernel_le = kernel_size > 0;
        }
1621
        if (kernel_size < 0) {
1622 1623
            fprintf(stderr, "qemu: error loading %s: %s\n",
                    kernel_filename, load_elf_strerror(kernel_size));
1624 1625 1626 1627 1628
            exit(1);
        }

        /* load initrd */
        if (initrd_filename) {
1629 1630 1631 1632
            /* Try to locate the initrd in the gap between the kernel
             * and the firmware. Add a bit of space just in case
             */
            initrd_base = (KERNEL_LOAD_ADDR + kernel_size + 0x1ffff) & ~0xffff;
1633
            initrd_size = load_image_targphys(initrd_filename, initrd_base,
1634
                                              load_limit - initrd_base);
1635 1636 1637 1638 1639 1640 1641 1642 1643
            if (initrd_size < 0) {
                fprintf(stderr, "qemu: could not load initial ram disk '%s'\n",
                        initrd_filename);
                exit(1);
            }
        } else {
            initrd_base = 0;
            initrd_size = 0;
        }
1644
    }
1645

1646 1647 1648 1649
    if (bios_name == NULL) {
        bios_name = FW_FILE_NAME;
    }
    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
1650
    if (!filename) {
1651
        error_report("Could not find LPAR firmware '%s'", bios_name);
1652 1653
        exit(1);
    }
1654
    fw_size = load_image_targphys(filename, 0, FW_MAX_SIZE);
1655 1656
    if (fw_size <= 0) {
        error_report("Could not load LPAR firmware '%s'", filename);
1657 1658 1659 1660 1661 1662
        exit(1);
    }
    g_free(filename);

    spapr->entry_point = 0x100;

1663 1664 1665 1666
    vmstate_register(NULL, 0, &vmstate_spapr, spapr);
    register_savevm_live(NULL, "spapr/htab", -1, 1,
                         &savevm_htab_handlers, spapr);

1667
    /* Prepare the device tree */
1668
    spapr->fdt_skel = spapr_create_fdt_skel(initrd_base, initrd_size,
1669
                                            kernel_size, kernel_le,
1670 1671
                                            kernel_cmdline,
                                            spapr->check_exception_irq);
1672
    assert(spapr->fdt_skel != NULL);
1673

1674 1675 1676 1677
    /* used by RTAS */
    QTAILQ_INIT(&spapr->ccs_list);
    qemu_register_reset(spapr_ccs_reset_hook, spapr);

1678
    qemu_register_boot_set(spapr_boot_set, spapr);
1679 1680
}

1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698
static int spapr_kvm_type(const char *vm_type)
{
    if (!vm_type) {
        return 0;
    }

    if (!strcmp(vm_type, "HV")) {
        return 1;
    }

    if (!strcmp(vm_type, "PR")) {
        return 2;
    }

    error_report("Unknown kvm-type specified '%s'", vm_type);
    exit(1);
}

1699
/*
1700
 * Implementation of an interface to adjust firmware path
1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755
 * for the bootindex property handling.
 */
static char *spapr_get_fw_dev_path(FWPathProvider *p, BusState *bus,
                                   DeviceState *dev)
{
#define CAST(type, obj, name) \
    ((type *)object_dynamic_cast(OBJECT(obj), (name)))
    SCSIDevice *d = CAST(SCSIDevice,  dev, TYPE_SCSI_DEVICE);
    sPAPRPHBState *phb = CAST(sPAPRPHBState, dev, TYPE_SPAPR_PCI_HOST_BRIDGE);

    if (d) {
        void *spapr = CAST(void, bus->parent, "spapr-vscsi");
        VirtIOSCSI *virtio = CAST(VirtIOSCSI, bus->parent, TYPE_VIRTIO_SCSI);
        USBDevice *usb = CAST(USBDevice, bus->parent, TYPE_USB_DEVICE);

        if (spapr) {
            /*
             * Replace "channel@0/disk@0,0" with "disk@8000000000000000":
             * We use SRP luns of the form 8000 | (bus << 8) | (id << 5) | lun
             * in the top 16 bits of the 64-bit LUN
             */
            unsigned id = 0x8000 | (d->id << 8) | d->lun;
            return g_strdup_printf("%s@%"PRIX64, qdev_fw_name(dev),
                                   (uint64_t)id << 48);
        } else if (virtio) {
            /*
             * We use SRP luns of the form 01000000 | (target << 8) | lun
             * in the top 32 bits of the 64-bit LUN
             * Note: the quote above is from SLOF and it is wrong,
             * the actual binding is:
             * swap 0100 or 10 << or 20 << ( target lun-id -- srplun )
             */
            unsigned id = 0x1000000 | (d->id << 16) | d->lun;
            return g_strdup_printf("%s@%"PRIX64, qdev_fw_name(dev),
                                   (uint64_t)id << 32);
        } else if (usb) {
            /*
             * We use SRP luns of the form 01000000 | (usb-port << 16) | lun
             * in the top 32 bits of the 64-bit LUN
             */
            unsigned usb_port = atoi(usb->port->path);
            unsigned id = 0x1000000 | (usb_port << 16) | d->lun;
            return g_strdup_printf("%s@%"PRIX64, qdev_fw_name(dev),
                                   (uint64_t)id << 32);
        }
    }

    if (phb) {
        /* Replace "pci" with "pci@800000020000000" */
        return g_strdup_printf("pci@%"PRIX64, phb->buid);
    }

    return NULL;
}

E
Eduardo Habkost 已提交
1756 1757
static char *spapr_get_kvm_type(Object *obj, Error **errp)
{
1758
    sPAPRMachineState *sm = SPAPR_MACHINE(obj);
E
Eduardo Habkost 已提交
1759 1760 1761 1762 1763 1764

    return g_strdup(sm->kvm_type);
}

static void spapr_set_kvm_type(Object *obj, const char *value, Error **errp)
{
1765
    sPAPRMachineState *sm = SPAPR_MACHINE(obj);
E
Eduardo Habkost 已提交
1766 1767 1768 1769 1770 1771 1772 1773 1774

    g_free(sm->kvm_type);
    sm->kvm_type = g_strdup(value);
}

static void spapr_machine_initfn(Object *obj)
{
    object_property_add_str(obj, "kvm-type",
                            spapr_get_kvm_type, spapr_set_kvm_type, NULL);
1775 1776 1777
    object_property_set_description(obj, "kvm-type",
                                    "Specifies the KVM virtualization mode (HV, PR)",
                                    NULL);
E
Eduardo Habkost 已提交
1778 1779
}

1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796
static void ppc_cpu_do_nmi_on_cpu(void *arg)
{
    CPUState *cs = arg;

    cpu_synchronize_state(cs);
    ppc_cpu_do_system_reset(cs);
}

static void spapr_nmi(NMIState *n, int cpu_index, Error **errp)
{
    CPUState *cs;

    CPU_FOREACH(cs) {
        async_run_on_cpu(cs, ppc_cpu_do_nmi_on_cpu, cs);
    }
}

1797 1798 1799
static void spapr_machine_class_init(ObjectClass *oc, void *data)
{
    MachineClass *mc = MACHINE_CLASS(oc);
1800
    FWPathProviderClass *fwc = FW_PATH_PROVIDER_CLASS(oc);
1801
    NMIClass *nc = NMI_CLASS(oc);
1802 1803 1804 1805 1806 1807

    mc->init = ppc_spapr_init;
    mc->reset = ppc_spapr_reset;
    mc->block_default_type = IF_SCSI;
    mc->max_cpus = MAX_CPUS;
    mc->no_parallel = 1;
1808
    mc->default_boot_order = "";
1809
    mc->default_ram_size = 512 * M_BYTE;
1810
    mc->kvm_type = spapr_kvm_type;
1811
    mc->has_dynamic_sysbus = true;
1812

1813
    fwc->get_dev_path = spapr_get_fw_dev_path;
1814
    nc->nmi_monitor_handler = spapr_nmi;
1815 1816 1817 1818 1819
}

static const TypeInfo spapr_machine_info = {
    .name          = TYPE_SPAPR_MACHINE,
    .parent        = TYPE_MACHINE,
1820
    .abstract      = true,
1821
    .instance_size = sizeof(sPAPRMachineState),
E
Eduardo Habkost 已提交
1822
    .instance_init = spapr_machine_initfn,
1823
    .class_init    = spapr_machine_class_init,
1824 1825
    .interfaces = (InterfaceInfo[]) {
        { TYPE_FW_PATH_PROVIDER },
1826
        { TYPE_NMI },
1827 1828
        { }
    },
1829 1830
};

E
Eduardo Habkost 已提交
1831
#define SPAPR_COMPAT_2_3 \
1832 1833 1834 1835 1836 1837
        HW_COMPAT_2_3 \
        {\
            .driver   = "spapr-pci-host-bridge",\
            .property = "dynamic-reconfiguration",\
            .value    = "off",\
        },
E
Eduardo Habkost 已提交
1838

1839
#define SPAPR_COMPAT_2_2 \
E
Eduardo Habkost 已提交
1840
        SPAPR_COMPAT_2_3 \
1841
        HW_COMPAT_2_2 \
1842 1843 1844 1845
        {\
            .driver   = TYPE_SPAPR_PCI_HOST_BRIDGE,\
            .property = "mem_win_size",\
            .value    = "0x20000000",\
1846
        },
1847 1848

#define SPAPR_COMPAT_2_1 \
1849 1850
        SPAPR_COMPAT_2_2 \
        HW_COMPAT_2_1
1851

J
Jason Wang 已提交
1852 1853 1854 1855
static void spapr_compat_2_3(Object *obj)
{
}

1856 1857
static void spapr_compat_2_2(Object *obj)
{
J
Jason Wang 已提交
1858
    spapr_compat_2_3(obj);
1859 1860 1861 1862 1863 1864 1865
}

static void spapr_compat_2_1(Object *obj)
{
    spapr_compat_2_2(obj);
}

J
Jason Wang 已提交
1866 1867 1868 1869 1870 1871
static void spapr_machine_2_3_instance_init(Object *obj)
{
    spapr_compat_2_3(obj);
    spapr_machine_initfn(obj);
}

1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883
static void spapr_machine_2_2_instance_init(Object *obj)
{
    spapr_compat_2_2(obj);
    spapr_machine_initfn(obj);
}

static void spapr_machine_2_1_instance_init(Object *obj)
{
    spapr_compat_2_1(obj);
    spapr_machine_initfn(obj);
}

1884 1885 1886
static void spapr_machine_2_1_class_init(ObjectClass *oc, void *data)
{
    MachineClass *mc = MACHINE_CLASS(oc);
1887
    static GlobalProperty compat_props[] = {
1888
        SPAPR_COMPAT_2_1
1889 1890
        { /* end of list */ }
    };
1891 1892 1893

    mc->name = "pseries-2.1";
    mc->desc = "pSeries Logical Partition (PAPR compliant) v2.1";
1894
    mc->compat_props = compat_props;
1895 1896 1897 1898 1899 1900
}

static const TypeInfo spapr_machine_2_1_info = {
    .name          = TYPE_SPAPR_MACHINE "2.1",
    .parent        = TYPE_SPAPR_MACHINE,
    .class_init    = spapr_machine_2_1_class_init,
1901
    .instance_init = spapr_machine_2_1_instance_init,
1902 1903
};

1904 1905
static void spapr_machine_2_2_class_init(ObjectClass *oc, void *data)
{
1906
    static GlobalProperty compat_props[] = {
1907
        SPAPR_COMPAT_2_2
1908 1909
        { /* end of list */ }
    };
1910 1911 1912 1913
    MachineClass *mc = MACHINE_CLASS(oc);

    mc->name = "pseries-2.2";
    mc->desc = "pSeries Logical Partition (PAPR compliant) v2.2";
1914
    mc->compat_props = compat_props;
1915 1916 1917 1918 1919 1920
}

static const TypeInfo spapr_machine_2_2_info = {
    .name          = TYPE_SPAPR_MACHINE "2.2",
    .parent        = TYPE_SPAPR_MACHINE,
    .class_init    = spapr_machine_2_2_class_init,
1921
    .instance_init = spapr_machine_2_2_instance_init,
1922 1923
};

1924 1925
static void spapr_machine_2_3_class_init(ObjectClass *oc, void *data)
{
1926
    static GlobalProperty compat_props[] = {
1927
        SPAPR_COMPAT_2_3
1928 1929
        { /* end of list */ }
    };
1930 1931 1932 1933
    MachineClass *mc = MACHINE_CLASS(oc);

    mc->name = "pseries-2.3";
    mc->desc = "pSeries Logical Partition (PAPR compliant) v2.3";
1934
    mc->compat_props = compat_props;
1935 1936 1937 1938 1939 1940
}

static const TypeInfo spapr_machine_2_3_info = {
    .name          = TYPE_SPAPR_MACHINE "2.3",
    .parent        = TYPE_SPAPR_MACHINE,
    .class_init    = spapr_machine_2_3_class_init,
J
Jason Wang 已提交
1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957
    .instance_init = spapr_machine_2_3_instance_init,
};

static void spapr_machine_2_4_class_init(ObjectClass *oc, void *data)
{
    MachineClass *mc = MACHINE_CLASS(oc);

    mc->name = "pseries-2.4";
    mc->desc = "pSeries Logical Partition (PAPR compliant) v2.4";
    mc->alias = "pseries";
    mc->is_default = 1;
}

static const TypeInfo spapr_machine_2_4_info = {
    .name          = TYPE_SPAPR_MACHINE "2.4",
    .parent        = TYPE_SPAPR_MACHINE,
    .class_init    = spapr_machine_2_4_class_init,
1958 1959
};

1960
static void spapr_machine_register_types(void)
1961
{
1962
    type_register_static(&spapr_machine_info);
1963
    type_register_static(&spapr_machine_2_1_info);
1964
    type_register_static(&spapr_machine_2_2_info);
1965
    type_register_static(&spapr_machine_2_3_info);
J
Jason Wang 已提交
1966
    type_register_static(&spapr_machine_2_4_info);
1967 1968
}

1969
type_init(spapr_machine_register_types)