spapr.c 25.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
/*
 * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator
 *
 * Copyright (c) 2004-2007 Fabrice Bellard
 * Copyright (c) 2007 Jocelyn Mayer
 * Copyright (c) 2010 David Gibson, IBM Corporation.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 *
 */
#include "sysemu.h"
#include "hw.h"
#include "elf.h"
30
#include "net.h"
31
#include "blockdev.h"
32 33 34
#include "cpus.h"
#include "kvm.h"
#include "kvm_ppc.h"
35 36 37 38 39 40

#include "hw/boards.h"
#include "hw/ppc.h"
#include "hw/loader.h"

#include "hw/spapr.h"
41
#include "hw/spapr_vio.h"
42
#include "hw/spapr_pci.h"
43
#include "hw/xics.h"
44

45 46
#include "kvm.h"
#include "kvm_ppc.h"
47
#include "pci.h"
48

A
Avi Kivity 已提交
49 50
#include "exec-memory.h"

51 52
#include <libfdt.h>

53 54 55 56 57 58 59 60 61 62
/* SLOF memory layout:
 *
 * SLOF raw image loaded at 0, copies its romfs right below the flat
 * device-tree, then position SLOF itself 31M below that
 *
 * So we set FW_OVERHEAD to 40MB which should account for all of that
 * and more
 *
 * We load our kernel at 4M, leaving space for SLOF initial image
 */
63
#define FDT_MAX_SIZE            0x10000
64
#define RTAS_MAX_SIZE           0x10000
65 66
#define FW_MAX_SIZE             0x400000
#define FW_FILE_NAME            "slof.bin"
67 68
#define FW_OVERHEAD             0x2800000
#define KERNEL_LOAD_ADDR        FW_MAX_SIZE
69

70
#define MIN_RMA_SLOF            128UL
71 72 73

#define TIMEBASE_FREQ           512000000ULL

74
#define MAX_CPUS                256
75
#define XICS_IRQS               1024
76

77 78 79 80 81
#define SPAPR_PCI_BUID          0x800000020000001ULL
#define SPAPR_PCI_MEM_WIN_ADDR  (0x10000000000ULL + 0xA0000000)
#define SPAPR_PCI_MEM_WIN_SIZE  0x20000000
#define SPAPR_PCI_IO_WIN_ADDR   (0x10000000000ULL + 0x80000000)

82 83
#define PHANDLE_XICP            0x00001111

84 85
sPAPREnvironment *spapr;

86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109
qemu_irq spapr_allocate_irq(uint32_t hint, uint32_t *irq_num)
{
    uint32_t irq;
    qemu_irq qirq;

    if (hint) {
        irq = hint;
        /* FIXME: we should probably check for collisions somehow */
    } else {
        irq = spapr->next_irq++;
    }

    qirq = xics_find_qirq(spapr->icp, irq);
    if (!qirq) {
        return NULL;
    }

    if (irq_num) {
        *irq_num = irq;
    }

    return qirq;
}

110 111 112
static int spapr_set_associativity(void *fdt, sPAPREnvironment *spapr)
{
    int ret = 0, offset;
A
Andreas Färber 已提交
113
    CPUPPCState *env;
114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
    char cpu_model[32];
    int smt = kvmppc_smt_threads();

    assert(spapr->cpu_model);

    for (env = first_cpu; env != NULL; env = env->next_cpu) {
        uint32_t associativity[] = {cpu_to_be32(0x5),
                                    cpu_to_be32(0x0),
                                    cpu_to_be32(0x0),
                                    cpu_to_be32(0x0),
                                    cpu_to_be32(env->numa_node),
                                    cpu_to_be32(env->cpu_index)};

        if ((env->cpu_index % smt) != 0) {
            continue;
        }

        snprintf(cpu_model, 32, "/cpus/%s@%x", spapr->cpu_model,
                 env->cpu_index);

        offset = fdt_path_offset(fdt, cpu_model);
        if (offset < 0) {
            return offset;
        }

        ret = fdt_setprop(fdt, offset, "ibm,associativity", associativity,
                          sizeof(associativity));
        if (ret < 0) {
            return ret;
        }
    }
    return ret;
}

148
static void *spapr_create_fdt_skel(const char *cpu_model,
149
                                   target_phys_addr_t rma_size,
150 151
                                   target_phys_addr_t initrd_base,
                                   target_phys_addr_t initrd_size,
152
                                   target_phys_addr_t kernel_size,
153 154 155
                                   const char *boot_device,
                                   const char *kernel_cmdline,
                                   long hash_shift)
156 157
{
    void *fdt;
A
Andreas Färber 已提交
158
    CPUPPCState *env;
159
    uint64_t mem_reg_property[2];
160 161
    uint32_t start_prop = cpu_to_be32(initrd_base);
    uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
162
    uint32_t pft_size_prop[] = {0, cpu_to_be32(hash_shift)};
163
    char hypertas_prop[] = "hcall-pft\0hcall-term\0hcall-dabr\0hcall-interrupt"
164
        "\0hcall-tce\0hcall-vio\0hcall-splpar\0hcall-bulk";
165
    uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(smp_cpus)};
166 167
    int i;
    char *modelname;
168
    int smt = kvmppc_smt_threads();
169 170 171 172 173 174 175
    unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80};
    uint32_t refpoints[] = {cpu_to_be32(0x4), cpu_to_be32(0x4)};
    uint32_t associativity[] = {cpu_to_be32(0x4), cpu_to_be32(0x0),
                                cpu_to_be32(0x0), cpu_to_be32(0x0),
                                cpu_to_be32(0x0)};
    char mem_name[32];
    target_phys_addr_t node0_size, mem_start;
176 177 178 179 180 181 182 183 184 185 186

#define _FDT(exp) \
    do { \
        int ret = (exp);                                           \
        if (ret < 0) {                                             \
            fprintf(stderr, "qemu: error creating device tree: %s: %s\n", \
                    #exp, fdt_strerror(ret));                      \
            exit(1);                                               \
        }                                                          \
    } while (0)

187
    fdt = g_malloc0(FDT_MAX_SIZE);
188 189
    _FDT((fdt_create(fdt, FDT_MAX_SIZE)));

190 191 192 193 194 195
    if (kernel_size) {
        _FDT((fdt_add_reservemap_entry(fdt, KERNEL_LOAD_ADDR, kernel_size)));
    }
    if (initrd_size) {
        _FDT((fdt_add_reservemap_entry(fdt, initrd_base, initrd_size)));
    }
196 197 198 199 200
    _FDT((fdt_finish_reservemap(fdt)));

    /* Root node */
    _FDT((fdt_begin_node(fdt, "")));
    _FDT((fdt_property_string(fdt, "device_type", "chrp")));
201
    _FDT((fdt_property_string(fdt, "model", "IBM pSeries (emulated by qemu)")));
202 203 204 205 206 207 208

    _FDT((fdt_property_cell(fdt, "#address-cells", 0x2)));
    _FDT((fdt_property_cell(fdt, "#size-cells", 0x2)));

    /* /chosen */
    _FDT((fdt_begin_node(fdt, "chosen")));

209 210 211
    /* Set Form1_affinity */
    _FDT((fdt_property(fdt, "ibm,architecture-vec-5", vec5, sizeof(vec5))));

212 213 214 215 216
    _FDT((fdt_property_string(fdt, "bootargs", kernel_cmdline)));
    _FDT((fdt_property(fdt, "linux,initrd-start",
                       &start_prop, sizeof(start_prop))));
    _FDT((fdt_property(fdt, "linux,initrd-end",
                       &end_prop, sizeof(end_prop))));
217 218 219
    if (kernel_size) {
        uint64_t kprop[2] = { cpu_to_be64(KERNEL_LOAD_ADDR),
                              cpu_to_be64(kernel_size) };
220

221 222 223
        _FDT((fdt_property(fdt, "qemu,boot-kernel", &kprop, sizeof(kprop))));
    }
    _FDT((fdt_property_string(fdt, "qemu,boot-device", boot_device)));
224

225 226
    _FDT((fdt_end_node(fdt)));

227
    /* memory node(s) */
228 229 230 231
    node0_size = (nb_numa_nodes > 1) ? node_mem[0] : ram_size;
    if (rma_size > node0_size) {
        rma_size = node0_size;
    }
232

233 234 235 236
    /* RMA */
    mem_reg_property[0] = 0;
    mem_reg_property[1] = cpu_to_be64(rma_size);
    _FDT((fdt_begin_node(fdt, "memory@0")));
237
    _FDT((fdt_property_string(fdt, "device_type", "memory")));
238 239 240 241
    _FDT((fdt_property(fdt, "reg", mem_reg_property,
        sizeof(mem_reg_property))));
    _FDT((fdt_property(fdt, "ibm,associativity", associativity,
        sizeof(associativity))));
242 243
    _FDT((fdt_end_node(fdt)));

244 245 246 247
    /* RAM: Node 0 */
    if (node0_size > rma_size) {
        mem_reg_property[0] = cpu_to_be64(rma_size);
        mem_reg_property[1] = cpu_to_be64(node0_size - rma_size);
248

249
        sprintf(mem_name, "memory@" TARGET_FMT_lx, rma_size);
250 251
        _FDT((fdt_begin_node(fdt, mem_name)));
        _FDT((fdt_property_string(fdt, "device_type", "memory")));
252 253 254 255
        _FDT((fdt_property(fdt, "reg", mem_reg_property,
                           sizeof(mem_reg_property))));
        _FDT((fdt_property(fdt, "ibm,associativity", associativity,
                           sizeof(associativity))));
256 257 258
        _FDT((fdt_end_node(fdt)));
    }

259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275
    /* RAM: Node 1 and beyond */
    mem_start = node0_size;
    for (i = 1; i < nb_numa_nodes; i++) {
        mem_reg_property[0] = cpu_to_be64(mem_start);
        mem_reg_property[1] = cpu_to_be64(node_mem[i]);
        associativity[3] = associativity[4] = cpu_to_be32(i);
        sprintf(mem_name, "memory@" TARGET_FMT_lx, mem_start);
        _FDT((fdt_begin_node(fdt, mem_name)));
        _FDT((fdt_property_string(fdt, "device_type", "memory")));
        _FDT((fdt_property(fdt, "reg", mem_reg_property,
            sizeof(mem_reg_property))));
        _FDT((fdt_property(fdt, "ibm,associativity", associativity,
            sizeof(associativity))));
        _FDT((fdt_end_node(fdt)));
        mem_start += node_mem[i];
    }

276 277 278 279 280 281
    /* cpus */
    _FDT((fdt_begin_node(fdt, "cpus")));

    _FDT((fdt_property_cell(fdt, "#address-cells", 0x1)));
    _FDT((fdt_property_cell(fdt, "#size-cells", 0x0)));

282
    modelname = g_strdup(cpu_model);
283 284 285 286 287

    for (i = 0; i < strlen(modelname); i++) {
        modelname[i] = toupper(modelname[i]);
    }

288 289 290
    /* This is needed during FDT finalization */
    spapr->cpu_model = g_strdup(modelname);

D
David Gibson 已提交
291 292
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
        int index = env->cpu_index;
293 294
        uint32_t servers_prop[smp_threads];
        uint32_t gservers_prop[smp_threads * 2];
295 296 297
        char *nodename;
        uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
                           0xffffffff, 0xffffffff};
298 299
        uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ;
        uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000;
300

301 302 303 304
        if ((index % smt) != 0) {
            continue;
        }

D
David Gibson 已提交
305
        if (asprintf(&nodename, "%s@%x", modelname, index) < 0) {
306 307 308 309 310 311 312 313
            fprintf(stderr, "Allocation failure\n");
            exit(1);
        }

        _FDT((fdt_begin_node(fdt, nodename)));

        free(nodename);

D
David Gibson 已提交
314
        _FDT((fdt_property_cell(fdt, "reg", index)));
315 316 317 318 319 320 321
        _FDT((fdt_property_string(fdt, "device_type", "cpu")));

        _FDT((fdt_property_cell(fdt, "cpu-version", env->spr[SPR_PVR])));
        _FDT((fdt_property_cell(fdt, "dcache-block-size",
                                env->dcache_line_size)));
        _FDT((fdt_property_cell(fdt, "icache-block-size",
                                env->icache_line_size)));
322 323
        _FDT((fdt_property_cell(fdt, "timebase-frequency", tbfreq)));
        _FDT((fdt_property_cell(fdt, "clock-frequency", cpufreq)));
324
        _FDT((fdt_property_cell(fdt, "ibm,slb-size", env->slb_nr)));
325 326
        _FDT((fdt_property(fdt, "ibm,pft-size",
                           pft_size_prop, sizeof(pft_size_prop))));
327 328
        _FDT((fdt_property_string(fdt, "status", "okay")));
        _FDT((fdt_property(fdt, "64-bit", NULL, 0)));
329 330 331 332 333 334 335 336 337 338

        /* Build interrupt servers and gservers properties */
        for (i = 0; i < smp_threads; i++) {
            servers_prop[i] = cpu_to_be32(index + i);
            /* Hack, direct the group queues back to cpu 0 */
            gservers_prop[i*2] = cpu_to_be32(index + i);
            gservers_prop[i*2 + 1] = 0;
        }
        _FDT((fdt_property(fdt, "ibm,ppc-interrupt-server#s",
                           servers_prop, sizeof(servers_prop))));
339
        _FDT((fdt_property(fdt, "ibm,ppc-interrupt-gserver#s",
340
                           gservers_prop, sizeof(gservers_prop))));
341

D
David Gibson 已提交
342
        if (env->mmu_model & POWERPC_MMU_1TSEG) {
343 344 345 346
            _FDT((fdt_property(fdt, "ibm,processor-segment-sizes",
                               segs, sizeof(segs))));
        }

347 348 349 350
        /* Advertise VMX/VSX (vector extensions) if available
         *   0 / no property == no vector extensions
         *   1               == VMX / Altivec available
         *   2               == VSX available */
351 352 353
        if (env->insns_flags & PPC_ALTIVEC) {
            uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;

354 355 356 357 358 359
            _FDT((fdt_property_cell(fdt, "ibm,vmx", vmx)));
        }

        /* Advertise DFP (Decimal Floating Point) if available
         *   0 / no property == no DFP
         *   1               == DFP available */
360 361
        if (env->insns_flags2 & PPC2_DFP) {
            _FDT((fdt_property_cell(fdt, "ibm,dfp", 1)));
362 363
        }

364 365 366
        _FDT((fdt_end_node(fdt)));
    }

367
    g_free(modelname);
368 369 370

    _FDT((fdt_end_node(fdt)));

371 372 373 374 375 376
    /* RTAS */
    _FDT((fdt_begin_node(fdt, "rtas")));

    _FDT((fdt_property(fdt, "ibm,hypertas-functions", hypertas_prop,
                       sizeof(hypertas_prop))));

377 378 379
    _FDT((fdt_property(fdt, "ibm,associativity-reference-points",
        refpoints, sizeof(refpoints))));

380 381
    _FDT((fdt_end_node(fdt)));

382
    /* interrupt controller */
383
    _FDT((fdt_begin_node(fdt, "interrupt-controller")));
384 385 386 387 388 389 390 391

    _FDT((fdt_property_string(fdt, "device_type",
                              "PowerPC-External-Interrupt-Presentation")));
    _FDT((fdt_property_string(fdt, "compatible", "IBM,ppc-xicp")));
    _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
    _FDT((fdt_property(fdt, "ibm,interrupt-server-ranges",
                       interrupt_server_ranges_prop,
                       sizeof(interrupt_server_ranges_prop))));
392 393 394
    _FDT((fdt_property_cell(fdt, "#interrupt-cells", 2)));
    _FDT((fdt_property_cell(fdt, "linux,phandle", PHANDLE_XICP)));
    _FDT((fdt_property_cell(fdt, "phandle", PHANDLE_XICP)));
395 396 397

    _FDT((fdt_end_node(fdt)));

398 399 400 401 402 403 404
    /* vdevice */
    _FDT((fdt_begin_node(fdt, "vdevice")));

    _FDT((fdt_property_string(fdt, "device_type", "vdevice")));
    _FDT((fdt_property_string(fdt, "compatible", "IBM,vdevice")));
    _FDT((fdt_property_cell(fdt, "#address-cells", 0x1)));
    _FDT((fdt_property_cell(fdt, "#size-cells", 0x0)));
405 406
    _FDT((fdt_property_cell(fdt, "#interrupt-cells", 0x2)));
    _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
407 408 409

    _FDT((fdt_end_node(fdt)));

410 411 412
    _FDT((fdt_end_node(fdt))); /* close root node */
    _FDT((fdt_finish(fdt)));

413 414 415 416 417 418 419 420 421 422
    return fdt;
}

static void spapr_finalize_fdt(sPAPREnvironment *spapr,
                               target_phys_addr_t fdt_addr,
                               target_phys_addr_t rtas_addr,
                               target_phys_addr_t rtas_size)
{
    int ret;
    void *fdt;
423
    sPAPRPHBState *phb;
424

425
    fdt = g_malloc(FDT_MAX_SIZE);
426 427 428

    /* open out the base tree into a temp buffer for the final tweaks */
    _FDT((fdt_open_into(spapr->fdt_skel, fdt, FDT_MAX_SIZE)));
429 430 431 432 433 434 435

    ret = spapr_populate_vdevice(spapr->vio_bus, fdt);
    if (ret < 0) {
        fprintf(stderr, "couldn't setup vio devices in fdt\n");
        exit(1);
    }

436 437 438 439 440 441 442 443 444
    QLIST_FOREACH(phb, &spapr->phbs, list) {
        ret = spapr_populate_pci_devices(phb, PHANDLE_XICP, fdt);
    }

    if (ret < 0) {
        fprintf(stderr, "couldn't setup PCI devices in fdt\n");
        exit(1);
    }

445 446 447 448 449 450
    /* RTAS */
    ret = spapr_rtas_device_tree_setup(fdt, rtas_addr, rtas_size);
    if (ret < 0) {
        fprintf(stderr, "Couldn't set up RTAS device tree properties\n");
    }

451 452 453 454 455 456 457 458
    /* Advertise NUMA via ibm,associativity */
    if (nb_numa_nodes > 1) {
        ret = spapr_set_associativity(fdt, spapr);
        if (ret < 0) {
            fprintf(stderr, "Couldn't set up NUMA device tree properties\n");
        }
    }

459 460
    spapr_populate_chosen_stdout(fdt, spapr->vio_bus);

461 462
    _FDT((fdt_pack(fdt)));

463 464 465 466 467 468
    if (fdt_totalsize(fdt) > FDT_MAX_SIZE) {
        hw_error("FDT too big ! 0x%x bytes (max is 0x%x)\n",
                 fdt_totalsize(fdt), FDT_MAX_SIZE);
        exit(1);
    }

469
    cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt));
470

471
    g_free(fdt);
472 473 474 475 476 477 478
}

static uint64_t translate_kernel_address(void *opaque, uint64_t addr)
{
    return (addr & 0x0fffffff) + KERNEL_LOAD_ADDR;
}

A
Andreas Färber 已提交
479
static void emulate_spapr_hypercall(CPUPPCState *env)
480 481 482 483
{
    env->gpr[3] = spapr_hypercall(env, env->gpr[3], &env->gpr[4]);
}

484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504
static void spapr_reset(void *opaque)
{
    sPAPREnvironment *spapr = (sPAPREnvironment *)opaque;

    fprintf(stderr, "sPAPR reset\n");

    /* flush out the hash table */
    memset(spapr->htab, 0, spapr->htab_size);

    /* Load the fdt */
    spapr_finalize_fdt(spapr, spapr->fdt_addr, spapr->rtas_addr,
                       spapr->rtas_size);

    /* Set up the entry state */
    first_cpu->gpr[3] = spapr->fdt_addr;
    first_cpu->gpr[5] = 0;
    first_cpu->halted = 0;
    first_cpu->nip = spapr->entry_point;

}

505 506
static void spapr_cpu_reset(void *opaque)
{
A
Andreas Färber 已提交
507
    CPUPPCState *env = opaque;
508 509 510 511

    cpu_state_reset(env);
}

512 513 514 515 516 517 518 519
/* pSeries LPAR / sPAPR hardware init */
static void ppc_spapr_init(ram_addr_t ram_size,
                           const char *boot_device,
                           const char *kernel_filename,
                           const char *kernel_cmdline,
                           const char *initrd_filename,
                           const char *cpu_model)
{
A
Andreas Färber 已提交
520
    CPUPPCState *env;
521
    int i;
A
Avi Kivity 已提交
522 523
    MemoryRegion *sysmem = get_system_memory();
    MemoryRegion *ram = g_new(MemoryRegion, 1);
524
    target_phys_addr_t rma_alloc_size, rma_size;
525 526 527
    uint32_t initrd_base = 0;
    long kernel_size = 0, initrd_size = 0;
    long load_limit, rtas_limit, fw_size;
528
    long pteg_shift = 17;
529
    char *filename;
530

531 532 533
    spapr = g_malloc0(sizeof(*spapr));
    QLIST_INIT(&spapr->phbs);

534 535
    cpu_ppc_hypercall = emulate_spapr_hypercall;

536 537 538 539 540 541 542 543 544 545 546 547 548
    /* Allocate RMA if necessary */
    rma_alloc_size = kvmppc_alloc_rma("ppc_spapr.rma", sysmem);

    if (rma_alloc_size == -1) {
        hw_error("qemu: Unable to create RMA\n");
        exit(1);
    }
    if (rma_alloc_size && (rma_alloc_size < ram_size)) {
        rma_size = rma_alloc_size;
    } else {
        rma_size = ram_size;
    }

549
    /* We place the device tree and RTAS just below either the top of the RMA,
550 551
     * or just below 2GB, whichever is lowere, so that it can be
     * processed with 32-bit real mode code if necessary */
552 553 554 555
    rtas_limit = MIN(rma_size, 0x80000000);
    spapr->rtas_addr = rtas_limit - RTAS_MAX_SIZE;
    spapr->fdt_addr = spapr->rtas_addr - FDT_MAX_SIZE;
    load_limit = spapr->fdt_addr - FW_OVERHEAD;
556 557 558

    /* init CPUs */
    if (cpu_model == NULL) {
559
        cpu_model = kvm_enabled() ? "host" : "POWER7";
560 561
    }
    for (i = 0; i < smp_cpus; i++) {
D
David Gibson 已提交
562
        env = cpu_init(cpu_model);
563 564 565 566 567 568 569

        if (!env) {
            fprintf(stderr, "Unable to find PowerPC CPU definition\n");
            exit(1);
        }
        /* Set time-base frequency to 512 MHz */
        cpu_ppc_tb_init(env, TIMEBASE_FREQ);
570
        qemu_register_reset(spapr_cpu_reset, env);
571 572 573

        env->hreset_vector = 0x60;
        env->hreset_excp_prefix = 0;
D
David Gibson 已提交
574
        env->gpr[3] = env->cpu_index;
575 576 577
    }

    /* allocate RAM */
578
    spapr->ram_limit = ram_size;
579 580 581 582
    if (spapr->ram_limit > rma_alloc_size) {
        ram_addr_t nonrma_base = rma_alloc_size;
        ram_addr_t nonrma_size = spapr->ram_limit - rma_alloc_size;

583 584
        memory_region_init_ram(ram, "ppc_spapr.ram", nonrma_size);
        vmstate_register_ram_global(ram);
585 586
        memory_region_add_subregion(sysmem, nonrma_base, ram);
    }
587

588 589 590
    /* allocate hash page table.  For now we always make this 16mb,
     * later we should probably make it scale to the size of guest
     * RAM */
591
    spapr->htab_size = 1ULL << (pteg_shift + 7);
592
    spapr->htab = qemu_memalign(spapr->htab_size, spapr->htab_size);
593

D
David Gibson 已提交
594
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
595
        env->external_htab = spapr->htab;
D
David Gibson 已提交
596
        env->htab_base = -1;
597
        env->htab_mask = spapr->htab_size - 1;
598 599 600 601 602 603 604 605 606

        /* Tell KVM that we're in PAPR mode */
        env->spr[SPR_SDR1] = (unsigned long)spapr->htab |
                             ((pteg_shift + 7) - 18);
        env->spr[SPR_HIOR] = 0;

        if (kvm_enabled()) {
            kvmppc_set_papr(env);
        }
607 608
    }

609
    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, "spapr-rtas.bin");
610
    spapr->rtas_size = load_image_targphys(filename, spapr->rtas_addr,
611
                                           rtas_limit - spapr->rtas_addr);
612
    if (spapr->rtas_size < 0) {
613 614 615
        hw_error("qemu: could not load LPAR rtas '%s'\n", filename);
        exit(1);
    }
616 617 618 619 620
    if (spapr->rtas_size > RTAS_MAX_SIZE) {
        hw_error("RTAS too big ! 0x%lx bytes (max is 0x%x)\n",
                 spapr->rtas_size, RTAS_MAX_SIZE);
        exit(1);
    }
621
    g_free(filename);
622

623

624
    /* Set up Interrupt Controller */
D
David Gibson 已提交
625
    spapr->icp = xics_system_init(XICS_IRQS);
626
    spapr->next_irq = 16;
627 628

    /* Set up VIO bus */
629 630
    spapr->vio_bus = spapr_vio_bus_init();

P
Paolo Bonzini 已提交
631
    for (i = 0; i < MAX_SERIAL_PORTS; i++) {
632
        if (serial_hds[i]) {
633
            spapr_vty_create(spapr->vio_bus, SPAPR_VTY_BASE_ADDRESS + i,
P
Paolo Bonzini 已提交
634
                             serial_hds[i]);
635 636
        }
    }
637

638 639 640 641 642 643
    /* Set up PCI */
    spapr_create_phb(spapr, "pci", SPAPR_PCI_BUID,
                     SPAPR_PCI_MEM_WIN_ADDR,
                     SPAPR_PCI_MEM_WIN_SIZE,
                     SPAPR_PCI_IO_WIN_ADDR);

P
Paolo Bonzini 已提交
644
    for (i = 0; i < nb_nics; i++) {
645 646 647
        NICInfo *nd = &nd_table[i];

        if (!nd->model) {
648
            nd->model = g_strdup("ibmveth");
649 650 651
        }

        if (strcmp(nd->model, "ibmveth") == 0) {
P
Paolo Bonzini 已提交
652
            spapr_vlan_create(spapr->vio_bus, 0x1000 + i, nd);
653
        } else {
654
            pci_nic_init_nofail(&nd_table[i], nd->model, NULL);
655 656 657
        }
    }

658
    for (i = 0; i <= drive_get_max_bus(IF_SCSI); i++) {
P
Paolo Bonzini 已提交
659
        spapr_vscsi_create(spapr->vio_bus, 0x2000 + i);
660 661
    }

662 663 664 665 666 667 668 669 670 671 672 673 674 675
    if (rma_size < (MIN_RMA_SLOF << 20)) {
        fprintf(stderr, "qemu: pSeries SLOF firmware requires >= "
                "%ldM guest RMA (Real Mode Area memory)\n", MIN_RMA_SLOF);
        exit(1);
    }

    fprintf(stderr, "sPAPR memory map:\n");
    fprintf(stderr, "RTAS                 : 0x%08lx..%08lx\n",
            (unsigned long)spapr->rtas_addr,
            (unsigned long)(spapr->rtas_addr + spapr->rtas_size - 1));
    fprintf(stderr, "FDT                  : 0x%08lx..%08lx\n",
            (unsigned long)spapr->fdt_addr,
            (unsigned long)(spapr->fdt_addr + FDT_MAX_SIZE - 1));

676 677 678 679 680 681
    if (kernel_filename) {
        uint64_t lowaddr = 0;

        kernel_size = load_elf(kernel_filename, translate_kernel_address, NULL,
                               NULL, &lowaddr, NULL, 1, ELF_MACHINE, 0);
        if (kernel_size < 0) {
682 683
            kernel_size = load_image_targphys(kernel_filename,
                                              KERNEL_LOAD_ADDR,
684
                                              load_limit - KERNEL_LOAD_ADDR);
685 686 687 688 689 690
        }
        if (kernel_size < 0) {
            fprintf(stderr, "qemu: could not load kernel '%s'\n",
                    kernel_filename);
            exit(1);
        }
691 692
        fprintf(stderr, "Kernel               : 0x%08x..%08lx\n",
                KERNEL_LOAD_ADDR, KERNEL_LOAD_ADDR + kernel_size - 1);
693 694 695

        /* load initrd */
        if (initrd_filename) {
696 697 698 699
            /* Try to locate the initrd in the gap between the kernel
             * and the firmware. Add a bit of space just in case
             */
            initrd_base = (KERNEL_LOAD_ADDR + kernel_size + 0x1ffff) & ~0xffff;
700
            initrd_size = load_image_targphys(initrd_filename, initrd_base,
701
                                              load_limit - initrd_base);
702 703 704 705 706
            if (initrd_size < 0) {
                fprintf(stderr, "qemu: could not load initial ram disk '%s'\n",
                        initrd_filename);
                exit(1);
            }
707 708
            fprintf(stderr, "Ramdisk              : 0x%08lx..%08lx\n",
                    (long)initrd_base, (long)(initrd_base + initrd_size - 1));
709 710 711 712
        } else {
            initrd_base = 0;
            initrd_size = 0;
        }
713
    }
714

715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731
    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, FW_FILE_NAME);
    fw_size = load_image_targphys(filename, 0, FW_MAX_SIZE);
    if (fw_size < 0) {
        hw_error("qemu: could not load LPAR rtas '%s'\n", filename);
        exit(1);
    }
    g_free(filename);
    fprintf(stderr, "Firmware load        : 0x%08x..%08lx\n",
            0, fw_size);
    fprintf(stderr, "Firmware runtime     : 0x%08lx..%08lx\n",
            load_limit, (unsigned long)spapr->fdt_addr);

    spapr->entry_point = 0x100;

    /* SLOF will startup the secondary CPUs using RTAS */
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
        env->halted = 1;
732 733 734
    }

    /* Prepare the device tree */
735
    spapr->fdt_skel = spapr_create_fdt_skel(cpu_model, rma_size,
736
                                            initrd_base, initrd_size,
737
                                            kernel_size,
738 739 740
                                            boot_device, kernel_cmdline,
                                            pteg_shift + 7);
    assert(spapr->fdt_skel != NULL);
741

742
    qemu_register_reset(spapr_reset, spapr);
743 744 745 746 747 748 749 750
}

static QEMUMachine spapr_machine = {
    .name = "pseries",
    .desc = "pSeries Logical Partition (PAPR compliant)",
    .init = ppc_spapr_init,
    .max_cpus = MAX_CPUS,
    .no_parallel = 1,
751
    .use_scsi = 1,
752 753 754 755 756 757 758 759
};

static void spapr_machine_init(void)
{
    qemu_register_machine(&spapr_machine);
}

machine_init(spapr_machine_init);