spapr.c 41.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
/*
 * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator
 *
 * Copyright (c) 2004-2007 Fabrice Bellard
 * Copyright (c) 2007 Jocelyn Mayer
 * Copyright (c) 2010 David Gibson, IBM Corporation.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 *
 */
27
#include "sysemu/sysemu.h"
28
#include "hw/hw.h"
29
#include "elf.h"
P
Paolo Bonzini 已提交
30
#include "net/net.h"
31 32 33
#include "sysemu/blockdev.h"
#include "sysemu/cpus.h"
#include "sysemu/kvm.h"
34
#include "kvm_ppc.h"
35
#include "mmu-hash64.h"
36 37

#include "hw/boards.h"
P
Paolo Bonzini 已提交
38
#include "hw/ppc/ppc.h"
39 40
#include "hw/loader.h"

P
Paolo Bonzini 已提交
41 42 43 44
#include "hw/ppc/spapr.h"
#include "hw/ppc/spapr_vio.h"
#include "hw/pci-host/spapr.h"
#include "hw/ppc/xics.h"
45
#include "hw/pci/msi.h"
46

47
#include "hw/pci/pci.h"
48

49
#include "exec/address-spaces.h"
50
#include "hw/usb.h"
51
#include "qemu/config-file.h"
A
Avi Kivity 已提交
52

53 54
#include <libfdt.h>

55 56 57 58 59 60 61 62 63 64
/* SLOF memory layout:
 *
 * SLOF raw image loaded at 0, copies its romfs right below the flat
 * device-tree, then position SLOF itself 31M below that
 *
 * So we set FW_OVERHEAD to 40MB which should account for all of that
 * and more
 *
 * We load our kernel at 4M, leaving space for SLOF initial image
 */
65
#define FDT_MAX_SIZE            0x40000
66
#define RTAS_MAX_SIZE           0x10000
67 68
#define FW_MAX_SIZE             0x400000
#define FW_FILE_NAME            "slof.bin"
69 70
#define FW_OVERHEAD             0x2800000
#define KERNEL_LOAD_ADDR        FW_MAX_SIZE
71

72
#define MIN_RMA_SLOF            128UL
73 74 75

#define TIMEBASE_FREQ           512000000ULL

76
#define MAX_CPUS                256
77
#define XICS_IRQS               1024
78

79 80
#define PHANDLE_XICP            0x00001111

81 82
#define HTAB_SIZE(spapr)        (1ULL << ((spapr)->htab_shift))

83 84
sPAPREnvironment *spapr;

85
int spapr_allocate_irq(int hint, bool lsi)
86
{
87
    int irq;
88 89 90

    if (hint) {
        irq = hint;
91 92 93
        if (hint >= spapr->next_irq) {
            spapr->next_irq = hint + 1;
        }
94 95 96 97 98
        /* FIXME: we should probably check for collisions somehow */
    } else {
        irq = spapr->next_irq++;
    }

99 100 101
    /* Configure irq type */
    if (!xics_get_qirq(spapr->icp, irq)) {
        return 0;
102 103
    }

104
    xics_set_irq_type(spapr->icp, irq, lsi);
105

106
    return irq;
107 108
}

109 110 111 112 113
/*
 * Allocate block of consequtive IRQs, returns a number of the first.
 * If msi==true, aligns the first IRQ number to num.
 */
int spapr_allocate_irq_block(int num, bool lsi, bool msi)
114 115
{
    int first = -1;
116 117 118 119 120 121 122 123 124 125 126 127 128 129
    int i, hint = 0;

    /*
     * MSIMesage::data is used for storing VIRQ so
     * it has to be aligned to num to support multiple
     * MSI vectors. MSI-X is not affected by this.
     * The hint is used for the first IRQ, the rest should
     * be allocated continously.
     */
    if (msi) {
        assert((num == 1) || (num == 2) || (num == 4) ||
               (num == 8) || (num == 16) || (num == 32));
        hint = (spapr->next_irq + num - 1) & ~(num - 1);
    }
130 131 132 133

    for (i = 0; i < num; ++i) {
        int irq;

134
        irq = spapr_allocate_irq(hint, lsi);
135 136 137 138 139 140
        if (!irq) {
            return -1;
        }

        if (0 == i) {
            first = irq;
141
            hint = 0;
142 143 144 145 146 147 148 149 150 151
        }

        /* If the above doesn't create a consecutive block then that's
         * an internal bug */
        assert(irq == (first + i));
    }

    return first;
}

152 153 154 155 156 157 158 159 160 161 162 163
static XICSState *try_create_xics(const char *type, int nr_servers,
                                  int nr_irqs)
{
    DeviceState *dev;

    dev = qdev_create(NULL, type);
    qdev_prop_set_uint32(dev, "nr_servers", nr_servers);
    qdev_prop_set_uint32(dev, "nr_irqs", nr_irqs);
    if (qdev_init(dev) < 0) {
        return NULL;
    }

164
    return XICS_COMMON(dev);
165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
}

static XICSState *xics_system_init(int nr_servers, int nr_irqs)
{
    XICSState *icp = NULL;

    icp = try_create_xics(TYPE_XICS, nr_servers, nr_irqs);
    if (!icp) {
        perror("Failed to create XICS\n");
        abort();
    }

    return icp;
}

180
static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr)
181 182
{
    int ret = 0, offset;
183
    CPUState *cpu;
184 185
    char cpu_model[32];
    int smt = kvmppc_smt_threads();
186
    uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
187 188 189

    assert(spapr->cpu_model);

A
Andreas Färber 已提交
190
    CPU_FOREACH(cpu) {
191 192 193 194
        uint32_t associativity[] = {cpu_to_be32(0x5),
                                    cpu_to_be32(0x0),
                                    cpu_to_be32(0x0),
                                    cpu_to_be32(0x0),
195
                                    cpu_to_be32(cpu->numa_node),
196
                                    cpu_to_be32(cpu->cpu_index)};
197

198
        if ((cpu->cpu_index % smt) != 0) {
199 200 201 202
            continue;
        }

        snprintf(cpu_model, 32, "/cpus/%s@%x", spapr->cpu_model,
203
                 cpu->cpu_index);
204 205 206 207 208 209

        offset = fdt_path_offset(fdt, cpu_model);
        if (offset < 0) {
            return offset;
        }

210 211 212 213 214 215 216 217 218 219
        if (nb_numa_nodes > 1) {
            ret = fdt_setprop(fdt, offset, "ibm,associativity", associativity,
                              sizeof(associativity));
            if (ret < 0) {
                return ret;
            }
        }

        ret = fdt_setprop(fdt, offset, "ibm,pft-size",
                          pft_size_prop, sizeof(pft_size_prop));
220 221 222 223 224 225 226
        if (ret < 0) {
            return ret;
        }
    }
    return ret;
}

227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260

static size_t create_page_sizes_prop(CPUPPCState *env, uint32_t *prop,
                                     size_t maxsize)
{
    size_t maxcells = maxsize / sizeof(uint32_t);
    int i, j, count;
    uint32_t *p = prop;

    for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
        struct ppc_one_seg_page_size *sps = &env->sps.sps[i];

        if (!sps->page_shift) {
            break;
        }
        for (count = 0; count < PPC_PAGE_SIZES_MAX_SZ; count++) {
            if (sps->enc[count].page_shift == 0) {
                break;
            }
        }
        if ((p - prop) >= (maxcells - 3 - count * 2)) {
            break;
        }
        *(p++) = cpu_to_be32(sps->page_shift);
        *(p++) = cpu_to_be32(sps->slb_enc);
        *(p++) = cpu_to_be32(count);
        for (j = 0; j < count; j++) {
            *(p++) = cpu_to_be32(sps->enc[j].page_shift);
            *(p++) = cpu_to_be32(sps->enc[j].pte_enc);
        }
    }

    return (p - prop) * sizeof(uint32_t);
}

261 262 263 264 265 266 267 268 269 270 271
#define _FDT(exp) \
    do { \
        int ret = (exp);                                           \
        if (ret < 0) {                                             \
            fprintf(stderr, "qemu: error creating device tree: %s: %s\n", \
                    #exp, fdt_strerror(ret));                      \
            exit(1);                                               \
        }                                                          \
    } while (0)


272
static void *spapr_create_fdt_skel(const char *cpu_model,
A
Avi Kivity 已提交
273 274 275
                                   hwaddr initrd_base,
                                   hwaddr initrd_size,
                                   hwaddr kernel_size,
276
                                   bool little_endian,
277
                                   const char *boot_device,
278 279
                                   const char *kernel_cmdline,
                                   uint32_t epow_irq)
280 281
{
    void *fdt;
282
    CPUState *cs;
283 284
    uint32_t start_prop = cpu_to_be32(initrd_base);
    uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
285
    char hypertas_prop[] = "hcall-pft\0hcall-term\0hcall-dabr\0hcall-interrupt"
286
        "\0hcall-tce\0hcall-vio\0hcall-splpar\0hcall-bulk\0hcall-set-mode";
287
    char qemu_hypertas_prop[] = "hcall-memop1";
288
    uint32_t refpoints[] = {cpu_to_be32(0x4), cpu_to_be32(0x4)};
289
    uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(smp_cpus)};
290
    char *modelname;
291
    int i, smt = kvmppc_smt_threads();
292
    unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80};
293

294
    fdt = g_malloc0(FDT_MAX_SIZE);
295 296
    _FDT((fdt_create(fdt, FDT_MAX_SIZE)));

297 298 299 300 301 302
    if (kernel_size) {
        _FDT((fdt_add_reservemap_entry(fdt, KERNEL_LOAD_ADDR, kernel_size)));
    }
    if (initrd_size) {
        _FDT((fdt_add_reservemap_entry(fdt, initrd_base, initrd_size)));
    }
303 304 305 306 307
    _FDT((fdt_finish_reservemap(fdt)));

    /* Root node */
    _FDT((fdt_begin_node(fdt, "")));
    _FDT((fdt_property_string(fdt, "device_type", "chrp")));
308
    _FDT((fdt_property_string(fdt, "model", "IBM pSeries (emulated by qemu)")));
309
    _FDT((fdt_property_string(fdt, "compatible", "qemu,pseries")));
310 311 312 313 314 315 316

    _FDT((fdt_property_cell(fdt, "#address-cells", 0x2)));
    _FDT((fdt_property_cell(fdt, "#size-cells", 0x2)));

    /* /chosen */
    _FDT((fdt_begin_node(fdt, "chosen")));

317 318 319
    /* Set Form1_affinity */
    _FDT((fdt_property(fdt, "ibm,architecture-vec-5", vec5, sizeof(vec5))));

320 321 322 323 324
    _FDT((fdt_property_string(fdt, "bootargs", kernel_cmdline)));
    _FDT((fdt_property(fdt, "linux,initrd-start",
                       &start_prop, sizeof(start_prop))));
    _FDT((fdt_property(fdt, "linux,initrd-end",
                       &end_prop, sizeof(end_prop))));
325 326 327
    if (kernel_size) {
        uint64_t kprop[2] = { cpu_to_be64(KERNEL_LOAD_ADDR),
                              cpu_to_be64(kernel_size) };
328

329
        _FDT((fdt_property(fdt, "qemu,boot-kernel", &kprop, sizeof(kprop))));
330 331 332
        if (little_endian) {
            _FDT((fdt_property(fdt, "qemu,boot-kernel-le", NULL, 0)));
        }
333
    }
A
Avik Sil 已提交
334 335 336
    if (boot_device) {
        _FDT((fdt_property_string(fdt, "qemu,boot-device", boot_device)));
    }
337 338 339
    _FDT((fdt_property_cell(fdt, "qemu,graphic-width", graphic_width)));
    _FDT((fdt_property_cell(fdt, "qemu,graphic-height", graphic_height)));
    _FDT((fdt_property_cell(fdt, "qemu,graphic-depth", graphic_depth)));
340

341 342 343 344 345 346 347 348
    _FDT((fdt_end_node(fdt)));

    /* cpus */
    _FDT((fdt_begin_node(fdt, "cpus")));

    _FDT((fdt_property_cell(fdt, "#address-cells", 0x1)));
    _FDT((fdt_property_cell(fdt, "#size-cells", 0x0)));

349
    modelname = g_strdup(cpu_model);
350 351 352 353 354

    for (i = 0; i < strlen(modelname); i++) {
        modelname[i] = toupper(modelname[i]);
    }

355 356 357
    /* This is needed during FDT finalization */
    spapr->cpu_model = g_strdup(modelname);

A
Andreas Färber 已提交
358
    CPU_FOREACH(cs) {
359 360 361 362
        PowerPCCPU *cpu = POWERPC_CPU(cs);
        CPUPPCState *env = &cpu->env;
        PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
        int index = cs->cpu_index;
363 364
        uint32_t servers_prop[smp_threads];
        uint32_t gservers_prop[smp_threads * 2];
365 366 367
        char *nodename;
        uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
                           0xffffffff, 0xffffffff};
368 369
        uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ;
        uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000;
370 371
        uint32_t page_sizes_prop[64];
        size_t page_sizes_prop_size;
372

373 374 375 376
        if ((index % smt) != 0) {
            continue;
        }

377
        nodename = g_strdup_printf("%s@%x", modelname, index);
378 379 380

        _FDT((fdt_begin_node(fdt, nodename)));

381
        g_free(nodename);
382

D
David Gibson 已提交
383
        _FDT((fdt_property_cell(fdt, "reg", index)));
384 385 386
        _FDT((fdt_property_string(fdt, "device_type", "cpu")));

        _FDT((fdt_property_cell(fdt, "cpu-version", env->spr[SPR_PVR])));
387
        _FDT((fdt_property_cell(fdt, "d-cache-block-size",
388
                                env->dcache_line_size)));
389 390 391 392 393
        _FDT((fdt_property_cell(fdt, "d-cache-line-size",
                                env->dcache_line_size)));
        _FDT((fdt_property_cell(fdt, "i-cache-block-size",
                                env->icache_line_size)));
        _FDT((fdt_property_cell(fdt, "i-cache-line-size",
394
                                env->icache_line_size)));
395 396 397 398 399 400 401 402 403 404 405 406

        if (pcc->l1_dcache_size) {
            _FDT((fdt_property_cell(fdt, "d-cache-size", pcc->l1_dcache_size)));
        } else {
            fprintf(stderr, "Warning: Unknown L1 dcache size for cpu\n");
        }
        if (pcc->l1_icache_size) {
            _FDT((fdt_property_cell(fdt, "i-cache-size", pcc->l1_icache_size)));
        } else {
            fprintf(stderr, "Warning: Unknown L1 icache size for cpu\n");
        }

407 408
        _FDT((fdt_property_cell(fdt, "timebase-frequency", tbfreq)));
        _FDT((fdt_property_cell(fdt, "clock-frequency", cpufreq)));
409 410 411
        _FDT((fdt_property_cell(fdt, "ibm,slb-size", env->slb_nr)));
        _FDT((fdt_property_string(fdt, "status", "okay")));
        _FDT((fdt_property(fdt, "64-bit", NULL, 0)));
412 413 414 415 416 417 418 419 420 421

        /* Build interrupt servers and gservers properties */
        for (i = 0; i < smp_threads; i++) {
            servers_prop[i] = cpu_to_be32(index + i);
            /* Hack, direct the group queues back to cpu 0 */
            gservers_prop[i*2] = cpu_to_be32(index + i);
            gservers_prop[i*2 + 1] = 0;
        }
        _FDT((fdt_property(fdt, "ibm,ppc-interrupt-server#s",
                           servers_prop, sizeof(servers_prop))));
422
        _FDT((fdt_property(fdt, "ibm,ppc-interrupt-gserver#s",
423
                           gservers_prop, sizeof(gservers_prop))));
424

425 426 427 428
        if (env->spr_cb[SPR_PURR].oea_read) {
            _FDT((fdt_property(fdt, "ibm,purr", NULL, 0)));
        }

D
David Gibson 已提交
429
        if (env->mmu_model & POWERPC_MMU_1TSEG) {
430 431 432 433
            _FDT((fdt_property(fdt, "ibm,processor-segment-sizes",
                               segs, sizeof(segs))));
        }

434 435 436 437
        /* Advertise VMX/VSX (vector extensions) if available
         *   0 / no property == no vector extensions
         *   1               == VMX / Altivec available
         *   2               == VSX available */
438 439 440
        if (env->insns_flags & PPC_ALTIVEC) {
            uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;

441 442 443 444 445 446
            _FDT((fdt_property_cell(fdt, "ibm,vmx", vmx)));
        }

        /* Advertise DFP (Decimal Floating Point) if available
         *   0 / no property == no DFP
         *   1               == DFP available */
447 448
        if (env->insns_flags2 & PPC2_DFP) {
            _FDT((fdt_property_cell(fdt, "ibm,dfp", 1)));
449 450
        }

451 452 453 454 455 456 457
        page_sizes_prop_size = create_page_sizes_prop(env, page_sizes_prop,
                                                      sizeof(page_sizes_prop));
        if (page_sizes_prop_size) {
            _FDT((fdt_property(fdt, "ibm,segment-page-sizes",
                               page_sizes_prop, page_sizes_prop_size)));
        }

458 459 460
        _FDT((fdt_end_node(fdt)));
    }

461
    g_free(modelname);
462 463 464

    _FDT((fdt_end_node(fdt)));

465 466 467 468 469
    /* RTAS */
    _FDT((fdt_begin_node(fdt, "rtas")));

    _FDT((fdt_property(fdt, "ibm,hypertas-functions", hypertas_prop,
                       sizeof(hypertas_prop))));
470 471
    _FDT((fdt_property(fdt, "qemu,hypertas-functions", qemu_hypertas_prop,
                       sizeof(qemu_hypertas_prop))));
472

473 474 475
    _FDT((fdt_property(fdt, "ibm,associativity-reference-points",
        refpoints, sizeof(refpoints))));

476 477
    _FDT((fdt_property_cell(fdt, "rtas-error-log-max", RTAS_ERROR_LOG_MAX)));

478 479
    _FDT((fdt_end_node(fdt)));

480
    /* interrupt controller */
481
    _FDT((fdt_begin_node(fdt, "interrupt-controller")));
482 483 484 485 486 487 488 489

    _FDT((fdt_property_string(fdt, "device_type",
                              "PowerPC-External-Interrupt-Presentation")));
    _FDT((fdt_property_string(fdt, "compatible", "IBM,ppc-xicp")));
    _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
    _FDT((fdt_property(fdt, "ibm,interrupt-server-ranges",
                       interrupt_server_ranges_prop,
                       sizeof(interrupt_server_ranges_prop))));
490 491 492
    _FDT((fdt_property_cell(fdt, "#interrupt-cells", 2)));
    _FDT((fdt_property_cell(fdt, "linux,phandle", PHANDLE_XICP)));
    _FDT((fdt_property_cell(fdt, "phandle", PHANDLE_XICP)));
493 494 495

    _FDT((fdt_end_node(fdt)));

496 497 498 499 500 501 502
    /* vdevice */
    _FDT((fdt_begin_node(fdt, "vdevice")));

    _FDT((fdt_property_string(fdt, "device_type", "vdevice")));
    _FDT((fdt_property_string(fdt, "compatible", "IBM,vdevice")));
    _FDT((fdt_property_cell(fdt, "#address-cells", 0x1)));
    _FDT((fdt_property_cell(fdt, "#size-cells", 0x0)));
503 504
    _FDT((fdt_property_cell(fdt, "#interrupt-cells", 0x2)));
    _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
505 506 507

    _FDT((fdt_end_node(fdt)));

508 509 510
    /* event-sources */
    spapr_events_fdt_skel(fdt, epow_irq);

511 512 513
    _FDT((fdt_end_node(fdt))); /* close root node */
    _FDT((fdt_finish(fdt)));

514 515 516
    return fdt;
}

517 518 519 520 521 522
static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
{
    uint32_t associativity[] = {cpu_to_be32(0x4), cpu_to_be32(0x0),
                                cpu_to_be32(0x0), cpu_to_be32(0x0),
                                cpu_to_be32(0x0)};
    char mem_name[32];
A
Avi Kivity 已提交
523
    hwaddr node0_size, mem_start;
524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578
    uint64_t mem_reg_property[2];
    int i, off;

    /* memory node(s) */
    node0_size = (nb_numa_nodes > 1) ? node_mem[0] : ram_size;
    if (spapr->rma_size > node0_size) {
        spapr->rma_size = node0_size;
    }

    /* RMA */
    mem_reg_property[0] = 0;
    mem_reg_property[1] = cpu_to_be64(spapr->rma_size);
    off = fdt_add_subnode(fdt, 0, "memory@0");
    _FDT(off);
    _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
    _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
                      sizeof(mem_reg_property))));
    _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
                      sizeof(associativity))));

    /* RAM: Node 0 */
    if (node0_size > spapr->rma_size) {
        mem_reg_property[0] = cpu_to_be64(spapr->rma_size);
        mem_reg_property[1] = cpu_to_be64(node0_size - spapr->rma_size);

        sprintf(mem_name, "memory@" TARGET_FMT_lx, spapr->rma_size);
        off = fdt_add_subnode(fdt, 0, mem_name);
        _FDT(off);
        _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
        _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
                          sizeof(mem_reg_property))));
        _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
                          sizeof(associativity))));
    }

    /* RAM: Node 1 and beyond */
    mem_start = node0_size;
    for (i = 1; i < nb_numa_nodes; i++) {
        mem_reg_property[0] = cpu_to_be64(mem_start);
        mem_reg_property[1] = cpu_to_be64(node_mem[i]);
        associativity[3] = associativity[4] = cpu_to_be32(i);
        sprintf(mem_name, "memory@" TARGET_FMT_lx, mem_start);
        off = fdt_add_subnode(fdt, 0, mem_name);
        _FDT(off);
        _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
        _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
                          sizeof(mem_reg_property))));
        _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
                          sizeof(associativity))));
        mem_start += node_mem[i];
    }

    return 0;
}

579
static void spapr_finalize_fdt(sPAPREnvironment *spapr,
A
Avi Kivity 已提交
580 581 582
                               hwaddr fdt_addr,
                               hwaddr rtas_addr,
                               hwaddr rtas_size)
583 584 585
{
    int ret;
    void *fdt;
586
    sPAPRPHBState *phb;
587

588
    fdt = g_malloc(FDT_MAX_SIZE);
589 590 591

    /* open out the base tree into a temp buffer for the final tweaks */
    _FDT((fdt_open_into(spapr->fdt_skel, fdt, FDT_MAX_SIZE)));
592

593 594 595 596 597 598
    ret = spapr_populate_memory(spapr, fdt);
    if (ret < 0) {
        fprintf(stderr, "couldn't setup memory nodes in fdt\n");
        exit(1);
    }

599 600 601 602 603 604
    ret = spapr_populate_vdevice(spapr->vio_bus, fdt);
    if (ret < 0) {
        fprintf(stderr, "couldn't setup vio devices in fdt\n");
        exit(1);
    }

605
    QLIST_FOREACH(phb, &spapr->phbs, list) {
606
        ret = spapr_populate_pci_dt(phb, PHANDLE_XICP, fdt);
607 608 609 610 611 612 613
    }

    if (ret < 0) {
        fprintf(stderr, "couldn't setup PCI devices in fdt\n");
        exit(1);
    }

614 615 616 617 618 619
    /* RTAS */
    ret = spapr_rtas_device_tree_setup(fdt, rtas_addr, rtas_size);
    if (ret < 0) {
        fprintf(stderr, "Couldn't set up RTAS device tree properties\n");
    }

620
    /* Advertise NUMA via ibm,associativity */
621 622 623
    ret = spapr_fixup_cpu_dt(fdt, spapr);
    if (ret < 0) {
        fprintf(stderr, "Couldn't finalize CPU device tree properties\n");
624 625
    }

626
    if (!spapr->has_graphics) {
627 628
        spapr_populate_chosen_stdout(fdt, spapr->vio_bus);
    }
629

630 631
    _FDT((fdt_pack(fdt)));

632 633 634 635 636 637
    if (fdt_totalsize(fdt) > FDT_MAX_SIZE) {
        hw_error("FDT too big ! 0x%x bytes (max is 0x%x)\n",
                 fdt_totalsize(fdt), FDT_MAX_SIZE);
        exit(1);
    }

638
    cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt));
639

640
    g_free(fdt);
641 642 643 644 645 646 647
}

static uint64_t translate_kernel_address(void *opaque, uint64_t addr)
{
    return (addr & 0x0fffffff) + KERNEL_LOAD_ADDR;
}

648
static void emulate_spapr_hypercall(PowerPCCPU *cpu)
649
{
650 651
    CPUPPCState *env = &cpu->env;

652 653 654 655
    if (msr_pr) {
        hcall_dprintf("Hypercall made with MSR[PR]=1\n");
        env->gpr[3] = H_PRIVILEGE;
    } else {
656
        env->gpr[3] = spapr_hypercall(cpu, env->gpr[3], &env->gpr[4]);
657
    }
658 659
}

660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686
static void spapr_reset_htab(sPAPREnvironment *spapr)
{
    long shift;

    /* allocate hash page table.  For now we always make this 16mb,
     * later we should probably make it scale to the size of guest
     * RAM */

    shift = kvmppc_reset_htab(spapr->htab_shift);

    if (shift > 0) {
        /* Kernel handles htab, we don't need to allocate one */
        spapr->htab_shift = shift;
    } else {
        if (!spapr->htab) {
            /* Allocate an htab if we don't yet have one */
            spapr->htab = qemu_memalign(HTAB_SIZE(spapr), HTAB_SIZE(spapr));
        }

        /* And clear it */
        memset(spapr->htab, 0, HTAB_SIZE(spapr));
    }

    /* Update the RMA size if necessary */
    if (spapr->vrma_adjust) {
        spapr->rma_size = kvmppc_rma_size(ram_size, spapr->htab_shift);
    }
687 688
}

689
static void ppc_spapr_reset(void)
690
{
691
    PowerPCCPU *first_ppc_cpu;
692

693 694
    /* Reset the hash table & recalc the RMA */
    spapr_reset_htab(spapr);
695

696
    qemu_devices_reset();
697 698 699 700 701 702

    /* Load the fdt */
    spapr_finalize_fdt(spapr, spapr->fdt_addr, spapr->rtas_addr,
                       spapr->rtas_size);

    /* Set up the entry state */
703 704 705 706 707
    first_ppc_cpu = POWERPC_CPU(first_cpu);
    first_ppc_cpu->env.gpr[3] = spapr->fdt_addr;
    first_ppc_cpu->env.gpr[5] = 0;
    first_cpu->halted = 0;
    first_ppc_cpu->env.nip = spapr->entry_point;
708 709 710

}

711 712
static void spapr_cpu_reset(void *opaque)
{
713
    PowerPCCPU *cpu = opaque;
714
    CPUState *cs = CPU(cpu);
715
    CPUPPCState *env = &cpu->env;
716

717
    cpu_reset(cs);
718 719 720 721

    /* All CPUs start halted.  CPU0 is unhalted from the machine level
     * reset code and the rest are explicitly started up by the guest
     * using an RTAS call */
722
    cs->halted = 1;
723 724

    env->spr[SPR_HIOR] = 0;
725

726
    env->external_htab = (uint8_t *)spapr->htab;
727 728
    env->htab_base = -1;
    env->htab_mask = HTAB_SIZE(spapr) - 1;
729
    env->spr[SPR_SDR1] = (target_ulong)(uintptr_t)spapr->htab |
730
        (spapr->htab_shift - 18);
731 732
}

D
David Gibson 已提交
733 734
static void spapr_create_nvram(sPAPREnvironment *spapr)
{
735 736
    DeviceState *dev = qdev_create(&spapr->vio_bus->bus, "spapr-nvram");
    const char *drivename = qemu_opt_get(qemu_get_machine_opts(), "nvram");
D
David Gibson 已提交
737

738 739
    if (drivename) {
        BlockDriverState *bs;
D
David Gibson 已提交
740

741 742 743 744 745
        bs = bdrv_find(drivename);
        if (!bs) {
            fprintf(stderr, "No such block device \"%s\" for nvram\n",
                    drivename);
            exit(1);
D
David Gibson 已提交
746
        }
747
        qdev_prop_set_drive_nofail(dev, "drive", bs);
D
David Gibson 已提交
748 749 750 751 752 753 754
    }

    qdev_init_nofail(dev);

    spapr->nvram = (struct sPAPRNVRAM *)dev;
}

755
/* Returns whether we want to use VGA or not */
756 757
static int spapr_vga_init(PCIBus *pci_bus)
{
758 759
    switch (vga_interface_type) {
    case VGA_NONE:
760 761
    case VGA_STD:
        return pci_vga_init(pci_bus) != NULL;
762
    default:
763 764
        fprintf(stderr, "This vga model is not supported,"
                "currently it only supports -vga std\n");
765 766
        exit(0);
        break;
767 768 769
    }
}

770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796
static const VMStateDescription vmstate_spapr = {
    .name = "spapr",
    .version_id = 1,
    .minimum_version_id = 1,
    .minimum_version_id_old = 1,
    .fields      = (VMStateField []) {
        VMSTATE_UINT32(next_irq, sPAPREnvironment),

        /* RTC offset */
        VMSTATE_UINT64(rtc_offset, sPAPREnvironment),

        VMSTATE_END_OF_LIST()
    },
};

#define HPTE(_table, _i)   (void *)(((uint64_t *)(_table)) + ((_i) * 2))
#define HPTE_VALID(_hpte)  (tswap64(*((uint64_t *)(_hpte))) & HPTE64_V_VALID)
#define HPTE_DIRTY(_hpte)  (tswap64(*((uint64_t *)(_hpte))) & HPTE64_V_HPTE_DIRTY)
#define CLEAN_HPTE(_hpte)  ((*(uint64_t *)(_hpte)) &= tswap64(~HPTE64_V_HPTE_DIRTY))

static int htab_save_setup(QEMUFile *f, void *opaque)
{
    sPAPREnvironment *spapr = opaque;

    /* "Iteration" header */
    qemu_put_be32(f, spapr->htab_shift);

797 798 799 800 801 802 803 804 805 806 807 808 809 810 811
    if (spapr->htab) {
        spapr->htab_save_index = 0;
        spapr->htab_first_pass = true;
    } else {
        assert(kvm_enabled());

        spapr->htab_fd = kvmppc_get_htab_fd(false);
        if (spapr->htab_fd < 0) {
            fprintf(stderr, "Unable to open fd for reading hash table from KVM: %s\n",
                    strerror(errno));
            return -1;
        }
    }


812 813 814 815 816 817 818 819
    return 0;
}

static void htab_save_first_pass(QEMUFile *f, sPAPREnvironment *spapr,
                                 int64_t max_ns)
{
    int htabslots = HTAB_SIZE(spapr) / HASH_PTE_SIZE_64;
    int index = spapr->htab_save_index;
820
    int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850

    assert(spapr->htab_first_pass);

    do {
        int chunkstart;

        /* Consume invalid HPTEs */
        while ((index < htabslots)
               && !HPTE_VALID(HPTE(spapr->htab, index))) {
            index++;
            CLEAN_HPTE(HPTE(spapr->htab, index));
        }

        /* Consume valid HPTEs */
        chunkstart = index;
        while ((index < htabslots)
               && HPTE_VALID(HPTE(spapr->htab, index))) {
            index++;
            CLEAN_HPTE(HPTE(spapr->htab, index));
        }

        if (index > chunkstart) {
            int n_valid = index - chunkstart;

            qemu_put_be32(f, chunkstart);
            qemu_put_be16(f, n_valid);
            qemu_put_be16(f, 0);
            qemu_put_buffer(f, HPTE(spapr->htab, chunkstart),
                            HASH_PTE_SIZE_64 * n_valid);

851
            if ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) > max_ns) {
852 853 854 855 856 857 858 859 860 861 862 863 864
                break;
            }
        }
    } while ((index < htabslots) && !qemu_file_rate_limit(f));

    if (index >= htabslots) {
        assert(index == htabslots);
        index = 0;
        spapr->htab_first_pass = false;
    }
    spapr->htab_save_index = index;
}

865 866
static int htab_save_later_pass(QEMUFile *f, sPAPREnvironment *spapr,
                                int64_t max_ns)
867 868 869 870 871
{
    bool final = max_ns < 0;
    int htabslots = HTAB_SIZE(spapr) / HASH_PTE_SIZE_64;
    int examined = 0, sent = 0;
    int index = spapr->htab_save_index;
872
    int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916

    assert(!spapr->htab_first_pass);

    do {
        int chunkstart, invalidstart;

        /* Consume non-dirty HPTEs */
        while ((index < htabslots)
               && !HPTE_DIRTY(HPTE(spapr->htab, index))) {
            index++;
            examined++;
        }

        chunkstart = index;
        /* Consume valid dirty HPTEs */
        while ((index < htabslots)
               && HPTE_DIRTY(HPTE(spapr->htab, index))
               && HPTE_VALID(HPTE(spapr->htab, index))) {
            CLEAN_HPTE(HPTE(spapr->htab, index));
            index++;
            examined++;
        }

        invalidstart = index;
        /* Consume invalid dirty HPTEs */
        while ((index < htabslots)
               && HPTE_DIRTY(HPTE(spapr->htab, index))
               && !HPTE_VALID(HPTE(spapr->htab, index))) {
            CLEAN_HPTE(HPTE(spapr->htab, index));
            index++;
            examined++;
        }

        if (index > chunkstart) {
            int n_valid = invalidstart - chunkstart;
            int n_invalid = index - invalidstart;

            qemu_put_be32(f, chunkstart);
            qemu_put_be16(f, n_valid);
            qemu_put_be16(f, n_invalid);
            qemu_put_buffer(f, HPTE(spapr->htab, chunkstart),
                            HASH_PTE_SIZE_64 * n_valid);
            sent += index - chunkstart;

917
            if (!final && (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) > max_ns) {
918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938
                break;
            }
        }

        if (examined >= htabslots) {
            break;
        }

        if (index >= htabslots) {
            assert(index == htabslots);
            index = 0;
        }
    } while ((examined < htabslots) && (!qemu_file_rate_limit(f) || final));

    if (index >= htabslots) {
        assert(index == htabslots);
        index = 0;
    }

    spapr->htab_save_index = index;

939
    return (examined >= htabslots) && (sent == 0) ? 1 : 0;
940 941
}

942 943 944
#define MAX_ITERATION_NS    5000000 /* 5 ms */
#define MAX_KVM_BUF_SIZE    2048

945 946 947
static int htab_save_iterate(QEMUFile *f, void *opaque)
{
    sPAPREnvironment *spapr = opaque;
948
    int rc = 0;
949 950 951 952

    /* Iteration header */
    qemu_put_be32(f, 0);

953 954 955 956 957 958 959 960 961
    if (!spapr->htab) {
        assert(kvm_enabled());

        rc = kvmppc_save_htab(f, spapr->htab_fd,
                              MAX_KVM_BUF_SIZE, MAX_ITERATION_NS);
        if (rc < 0) {
            return rc;
        }
    } else  if (spapr->htab_first_pass) {
962 963
        htab_save_first_pass(f, spapr, MAX_ITERATION_NS);
    } else {
964
        rc = htab_save_later_pass(f, spapr, MAX_ITERATION_NS);
965 966 967 968 969 970 971
    }

    /* End marker */
    qemu_put_be32(f, 0);
    qemu_put_be16(f, 0);
    qemu_put_be16(f, 0);

972
    return rc;
973 974 975 976 977 978 979 980 981
}

static int htab_save_complete(QEMUFile *f, void *opaque)
{
    sPAPREnvironment *spapr = opaque;

    /* Iteration header */
    qemu_put_be32(f, 0);

982 983 984 985 986 987 988 989 990 991 992 993 994 995
    if (!spapr->htab) {
        int rc;

        assert(kvm_enabled());

        rc = kvmppc_save_htab(f, spapr->htab_fd, MAX_KVM_BUF_SIZE, -1);
        if (rc < 0) {
            return rc;
        }
        close(spapr->htab_fd);
        spapr->htab_fd = -1;
    } else {
        htab_save_later_pass(f, spapr, -1);
    }
996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008

    /* End marker */
    qemu_put_be32(f, 0);
    qemu_put_be16(f, 0);
    qemu_put_be16(f, 0);

    return 0;
}

static int htab_load(QEMUFile *f, void *opaque, int version_id)
{
    sPAPREnvironment *spapr = opaque;
    uint32_t section_hdr;
1009
    int fd = -1;
1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025

    if (version_id < 1 || version_id > 1) {
        fprintf(stderr, "htab_load() bad version\n");
        return -EINVAL;
    }

    section_hdr = qemu_get_be32(f);

    if (section_hdr) {
        /* First section, just the hash shift */
        if (spapr->htab_shift != section_hdr) {
            return -EINVAL;
        }
        return 0;
    }

1026 1027 1028 1029 1030 1031 1032 1033 1034 1035
    if (!spapr->htab) {
        assert(kvm_enabled());

        fd = kvmppc_get_htab_fd(true);
        if (fd < 0) {
            fprintf(stderr, "Unable to open fd to restore KVM hash table: %s\n",
                    strerror(errno));
        }
    }

1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048
    while (true) {
        uint32_t index;
        uint16_t n_valid, n_invalid;

        index = qemu_get_be32(f);
        n_valid = qemu_get_be16(f);
        n_invalid = qemu_get_be16(f);

        if ((index == 0) && (n_valid == 0) && (n_invalid == 0)) {
            /* End of Stream */
            break;
        }

1049
        if ((index + n_valid + n_invalid) >
1050 1051 1052
            (HTAB_SIZE(spapr) / HASH_PTE_SIZE_64)) {
            /* Bad index in stream */
            fprintf(stderr, "htab_load() bad index %d (%hd+%hd entries) "
1053 1054
                    "in htab stream (htab_shift=%d)\n", index, n_valid, n_invalid,
                    spapr->htab_shift);
1055 1056 1057
            return -EINVAL;
        }

1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075
        if (spapr->htab) {
            if (n_valid) {
                qemu_get_buffer(f, HPTE(spapr->htab, index),
                                HASH_PTE_SIZE_64 * n_valid);
            }
            if (n_invalid) {
                memset(HPTE(spapr->htab, index + n_valid), 0,
                       HASH_PTE_SIZE_64 * n_invalid);
            }
        } else {
            int rc;

            assert(fd >= 0);

            rc = kvmppc_load_htab_chunk(f, fd, index, n_valid, n_invalid);
            if (rc < 0) {
                return rc;
            }
1076 1077 1078
        }
    }

1079 1080 1081 1082 1083
    if (!spapr->htab) {
        assert(fd >= 0);
        close(fd);
    }

1084 1085 1086 1087 1088 1089 1090 1091 1092 1093
    return 0;
}

static SaveVMHandlers savevm_htab_handlers = {
    .save_live_setup = htab_save_setup,
    .save_live_iterate = htab_save_iterate,
    .save_live_complete = htab_save_complete,
    .load_state = htab_load,
};

1094
/* pSeries LPAR / sPAPR hardware init */
1095
static void ppc_spapr_init(QEMUMachineInitArgs *args)
1096
{
1097 1098 1099 1100 1101
    ram_addr_t ram_size = args->ram_size;
    const char *cpu_model = args->cpu_model;
    const char *kernel_filename = args->kernel_filename;
    const char *kernel_cmdline = args->kernel_cmdline;
    const char *initrd_filename = args->initrd_filename;
1102
    const char *boot_device = args->boot_order;
1103
    PowerPCCPU *cpu;
A
Andreas Färber 已提交
1104
    CPUPPCState *env;
1105
    PCIHostState *phb;
1106
    int i;
A
Avi Kivity 已提交
1107 1108
    MemoryRegion *sysmem = get_system_memory();
    MemoryRegion *ram = g_new(MemoryRegion, 1);
A
Avi Kivity 已提交
1109
    hwaddr rma_alloc_size;
1110 1111 1112
    uint32_t initrd_base = 0;
    long kernel_size = 0, initrd_size = 0;
    long load_limit, rtas_limit, fw_size;
1113
    bool kernel_le = false;
1114
    char *filename;
1115

1116 1117
    msi_supported = true;

1118 1119 1120
    spapr = g_malloc0(sizeof(*spapr));
    QLIST_INIT(&spapr->phbs);

1121 1122
    cpu_ppc_hypercall = emulate_spapr_hypercall;

1123 1124 1125 1126 1127 1128 1129
    /* Allocate RMA if necessary */
    rma_alloc_size = kvmppc_alloc_rma("ppc_spapr.rma", sysmem);

    if (rma_alloc_size == -1) {
        hw_error("qemu: Unable to create RMA\n");
        exit(1);
    }
1130

1131
    if (rma_alloc_size && (rma_alloc_size < ram_size)) {
1132
        spapr->rma_size = rma_alloc_size;
1133
    } else {
1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148
        spapr->rma_size = ram_size;

        /* With KVM, we don't actually know whether KVM supports an
         * unbounded RMA (PR KVM) or is limited by the hash table size
         * (HV KVM using VRMA), so we always assume the latter
         *
         * In that case, we also limit the initial allocations for RTAS
         * etc... to 256M since we have no way to know what the VRMA size
         * is going to be as it depends on the size of the hash table
         * isn't determined yet.
         */
        if (kvm_enabled()) {
            spapr->vrma_adjust = 1;
            spapr->rma_size = MIN(spapr->rma_size, 0x10000000);
        }
1149 1150
    }

1151
    /* We place the device tree and RTAS just below either the top of the RMA,
1152 1153
     * or just below 2GB, whichever is lowere, so that it can be
     * processed with 32-bit real mode code if necessary */
1154
    rtas_limit = MIN(spapr->rma_size, 0x80000000);
1155 1156 1157
    spapr->rtas_addr = rtas_limit - RTAS_MAX_SIZE;
    spapr->fdt_addr = spapr->rtas_addr - FDT_MAX_SIZE;
    load_limit = spapr->fdt_addr - FW_OVERHEAD;
1158

1159 1160 1161 1162 1163 1164 1165 1166 1167 1168
    /* We aim for a hash table of size 1/128 the size of RAM.  The
     * normal rule of thumb is 1/64 the size of RAM, but that's much
     * more than needed for the Linux guests we support. */
    spapr->htab_shift = 18; /* Minimum architected size */
    while (spapr->htab_shift <= 46) {
        if ((1ULL << (spapr->htab_shift + 7)) >= ram_size) {
            break;
        }
        spapr->htab_shift++;
    }
1169

1170 1171 1172 1173 1174
    /* Set up Interrupt Controller before we create the VCPUs */
    spapr->icp = xics_system_init(smp_cpus * kvmppc_smt_threads() / smp_threads,
                                  XICS_IRQS);
    spapr->next_irq = XICS_IRQ_BASE;

1175 1176
    /* init CPUs */
    if (cpu_model == NULL) {
1177
        cpu_model = kvm_enabled() ? "host" : "POWER7";
1178 1179
    }
    for (i = 0; i < smp_cpus; i++) {
1180 1181
        cpu = cpu_ppc_init(cpu_model);
        if (cpu == NULL) {
1182 1183 1184
            fprintf(stderr, "Unable to find PowerPC CPU definition\n");
            exit(1);
        }
1185 1186
        env = &cpu->env;

1187 1188 1189
        /* Set time-base frequency to 512 MHz */
        cpu_ppc_tb_init(env, TIMEBASE_FREQ);

1190 1191 1192 1193
        /* PAPR always has exception vectors in RAM not ROM. To ensure this,
         * MSR[IP] should never be set.
         */
        env->msr_mask &= ~(1 << 6);
1194 1195 1196

        /* Tell KVM that we're in PAPR mode */
        if (kvm_enabled()) {
1197
            kvmppc_set_papr(cpu);
1198 1199
        }

1200 1201
        xics_cpu_setup(spapr->icp, cpu);

1202
        qemu_register_reset(spapr_cpu_reset, cpu);
1203 1204 1205
    }

    /* allocate RAM */
1206
    spapr->ram_limit = ram_size;
1207 1208 1209 1210
    if (spapr->ram_limit > rma_alloc_size) {
        ram_addr_t nonrma_base = rma_alloc_size;
        ram_addr_t nonrma_size = spapr->ram_limit - rma_alloc_size;

1211
        memory_region_init_ram(ram, NULL, "ppc_spapr.ram", nonrma_size);
1212
        vmstate_register_ram_global(ram);
1213 1214
        memory_region_add_subregion(sysmem, nonrma_base, ram);
    }
1215

1216
    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, "spapr-rtas.bin");
1217
    spapr->rtas_size = load_image_targphys(filename, spapr->rtas_addr,
1218
                                           rtas_limit - spapr->rtas_addr);
1219
    if (spapr->rtas_size < 0) {
1220 1221 1222
        hw_error("qemu: could not load LPAR rtas '%s'\n", filename);
        exit(1);
    }
1223 1224 1225 1226 1227
    if (spapr->rtas_size > RTAS_MAX_SIZE) {
        hw_error("RTAS too big ! 0x%lx bytes (max is 0x%x)\n",
                 spapr->rtas_size, RTAS_MAX_SIZE);
        exit(1);
    }
1228
    g_free(filename);
1229

1230 1231 1232
    /* Set up EPOW events infrastructure */
    spapr_events_init(spapr);

1233
    /* Set up VIO bus */
1234 1235
    spapr->vio_bus = spapr_vio_bus_init();

P
Paolo Bonzini 已提交
1236
    for (i = 0; i < MAX_SERIAL_PORTS; i++) {
1237
        if (serial_hds[i]) {
1238
            spapr_vty_create(spapr->vio_bus, serial_hds[i]);
1239 1240
        }
    }
1241

D
David Gibson 已提交
1242 1243 1244
    /* We always have at least the nvram device on VIO */
    spapr_create_nvram(spapr);

1245
    /* Set up PCI */
1246
    spapr_pci_msi_init(spapr, SPAPR_PCI_MSI_WINDOW);
1247 1248
    spapr_pci_rtas_init();

1249
    phb = spapr_create_phb(spapr, 0);
1250

P
Paolo Bonzini 已提交
1251
    for (i = 0; i < nb_nics; i++) {
1252 1253 1254
        NICInfo *nd = &nd_table[i];

        if (!nd->model) {
1255
            nd->model = g_strdup("ibmveth");
1256 1257 1258
        }

        if (strcmp(nd->model, "ibmveth") == 0) {
1259
            spapr_vlan_create(spapr->vio_bus, nd);
1260
        } else {
1261
            pci_nic_init_nofail(&nd_table[i], phb->bus, nd->model, NULL);
1262 1263 1264
        }
    }

1265
    for (i = 0; i <= drive_get_max_bus(IF_SCSI); i++) {
1266
        spapr_vscsi_create(spapr->vio_bus);
1267 1268
    }

1269
    /* Graphics */
1270
    if (spapr_vga_init(phb->bus)) {
1271
        spapr->has_graphics = true;
1272 1273
    }

1274
    if (usb_enabled(spapr->has_graphics)) {
1275
        pci_create_simple(phb->bus, -1, "pci-ohci");
1276 1277 1278 1279 1280 1281
        if (spapr->has_graphics) {
            usbdevice_create("keyboard");
            usbdevice_create("mouse");
        }
    }

1282
    if (spapr->rma_size < (MIN_RMA_SLOF << 20)) {
1283 1284 1285 1286 1287
        fprintf(stderr, "qemu: pSeries SLOF firmware requires >= "
                "%ldM guest RMA (Real Mode Area memory)\n", MIN_RMA_SLOF);
        exit(1);
    }

1288 1289 1290 1291 1292
    if (kernel_filename) {
        uint64_t lowaddr = 0;

        kernel_size = load_elf(kernel_filename, translate_kernel_address, NULL,
                               NULL, &lowaddr, NULL, 1, ELF_MACHINE, 0);
1293 1294 1295 1296 1297 1298
        if (kernel_size < 0) {
            kernel_size = load_elf(kernel_filename,
                                   translate_kernel_address, NULL,
                                   NULL, &lowaddr, NULL, 0, ELF_MACHINE, 0);
            kernel_le = kernel_size > 0;
        }
1299
        if (kernel_size < 0) {
1300 1301
            kernel_size = load_image_targphys(kernel_filename,
                                              KERNEL_LOAD_ADDR,
1302
                                              load_limit - KERNEL_LOAD_ADDR);
1303 1304 1305 1306 1307 1308 1309 1310 1311
        }
        if (kernel_size < 0) {
            fprintf(stderr, "qemu: could not load kernel '%s'\n",
                    kernel_filename);
            exit(1);
        }

        /* load initrd */
        if (initrd_filename) {
1312 1313 1314 1315
            /* Try to locate the initrd in the gap between the kernel
             * and the firmware. Add a bit of space just in case
             */
            initrd_base = (KERNEL_LOAD_ADDR + kernel_size + 0x1ffff) & ~0xffff;
1316
            initrd_size = load_image_targphys(initrd_filename, initrd_base,
1317
                                              load_limit - initrd_base);
1318 1319 1320 1321 1322 1323 1324 1325 1326
            if (initrd_size < 0) {
                fprintf(stderr, "qemu: could not load initial ram disk '%s'\n",
                        initrd_filename);
                exit(1);
            }
        } else {
            initrd_base = 0;
            initrd_size = 0;
        }
1327
    }
1328

1329 1330 1331 1332
    if (bios_name == NULL) {
        bios_name = FW_FILE_NAME;
    }
    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
1333 1334 1335 1336 1337 1338 1339 1340 1341
    fw_size = load_image_targphys(filename, 0, FW_MAX_SIZE);
    if (fw_size < 0) {
        hw_error("qemu: could not load LPAR rtas '%s'\n", filename);
        exit(1);
    }
    g_free(filename);

    spapr->entry_point = 0x100;

1342 1343 1344 1345
    vmstate_register(NULL, 0, &vmstate_spapr, spapr);
    register_savevm_live(NULL, "spapr/htab", -1, 1,
                         &savevm_htab_handlers, spapr);

1346
    /* Prepare the device tree */
1347
    spapr->fdt_skel = spapr_create_fdt_skel(cpu_model,
1348
                                            initrd_base, initrd_size,
1349
                                            kernel_size, kernel_le,
1350 1351
                                            boot_device, kernel_cmdline,
                                            spapr->epow_irq);
1352
    assert(spapr->fdt_skel != NULL);
1353 1354 1355 1356 1357
}

static QEMUMachine spapr_machine = {
    .name = "pseries",
    .desc = "pSeries Logical Partition (PAPR compliant)",
1358
    .is_default = 1,
1359
    .init = ppc_spapr_init,
1360
    .reset = ppc_spapr_reset,
1361
    .block_default_type = IF_SCSI,
1362 1363
    .max_cpus = MAX_CPUS,
    .no_parallel = 1,
1364
    .default_boot_order = NULL,
1365 1366 1367 1368 1369 1370 1371 1372
};

static void spapr_machine_init(void)
{
    qemu_register_machine(&spapr_machine);
}

machine_init(spapr_machine_init);