spapr.c 25.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
/*
 * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator
 *
 * Copyright (c) 2004-2007 Fabrice Bellard
 * Copyright (c) 2007 Jocelyn Mayer
 * Copyright (c) 2010 David Gibson, IBM Corporation.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 *
 */
#include "sysemu.h"
#include "hw.h"
#include "elf.h"
30
#include "net.h"
31
#include "blockdev.h"
32 33 34
#include "cpus.h"
#include "kvm.h"
#include "kvm_ppc.h"
35 36 37 38 39 40

#include "hw/boards.h"
#include "hw/ppc.h"
#include "hw/loader.h"

#include "hw/spapr.h"
41
#include "hw/spapr_vio.h"
42
#include "hw/spapr_pci.h"
43
#include "hw/xics.h"
44

45 46
#include "kvm.h"
#include "kvm_ppc.h"
47
#include "pci.h"
48

A
Avi Kivity 已提交
49 50
#include "exec-memory.h"

51 52
#include <libfdt.h>

53 54 55 56 57 58 59 60 61 62
/* SLOF memory layout:
 *
 * SLOF raw image loaded at 0, copies its romfs right below the flat
 * device-tree, then position SLOF itself 31M below that
 *
 * So we set FW_OVERHEAD to 40MB which should account for all of that
 * and more
 *
 * We load our kernel at 4M, leaving space for SLOF initial image
 */
63
#define FDT_MAX_SIZE            0x10000
64
#define RTAS_MAX_SIZE           0x10000
65 66
#define FW_MAX_SIZE             0x400000
#define FW_FILE_NAME            "slof.bin"
67 68
#define FW_OVERHEAD             0x2800000
#define KERNEL_LOAD_ADDR        FW_MAX_SIZE
69

70
#define MIN_RMA_SLOF            128UL
71 72 73

#define TIMEBASE_FREQ           512000000ULL

74
#define MAX_CPUS                256
75
#define XICS_IRQS               1024
76

77 78 79 80 81
#define SPAPR_PCI_BUID          0x800000020000001ULL
#define SPAPR_PCI_MEM_WIN_ADDR  (0x10000000000ULL + 0xA0000000)
#define SPAPR_PCI_MEM_WIN_SIZE  0x20000000
#define SPAPR_PCI_IO_WIN_ADDR   (0x10000000000ULL + 0x80000000)

82 83
#define PHANDLE_XICP            0x00001111

84 85
sPAPREnvironment *spapr;

86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109
qemu_irq spapr_allocate_irq(uint32_t hint, uint32_t *irq_num)
{
    uint32_t irq;
    qemu_irq qirq;

    if (hint) {
        irq = hint;
        /* FIXME: we should probably check for collisions somehow */
    } else {
        irq = spapr->next_irq++;
    }

    qirq = xics_find_qirq(spapr->icp, irq);
    if (!qirq) {
        return NULL;
    }

    if (irq_num) {
        *irq_num = irq;
    }

    return qirq;
}

110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
static int spapr_set_associativity(void *fdt, sPAPREnvironment *spapr)
{
    int ret = 0, offset;
    CPUState *env;
    char cpu_model[32];
    int smt = kvmppc_smt_threads();

    assert(spapr->cpu_model);

    for (env = first_cpu; env != NULL; env = env->next_cpu) {
        uint32_t associativity[] = {cpu_to_be32(0x5),
                                    cpu_to_be32(0x0),
                                    cpu_to_be32(0x0),
                                    cpu_to_be32(0x0),
                                    cpu_to_be32(env->numa_node),
                                    cpu_to_be32(env->cpu_index)};

        if ((env->cpu_index % smt) != 0) {
            continue;
        }

        snprintf(cpu_model, 32, "/cpus/%s@%x", spapr->cpu_model,
                 env->cpu_index);

        offset = fdt_path_offset(fdt, cpu_model);
        if (offset < 0) {
            return offset;
        }

        ret = fdt_setprop(fdt, offset, "ibm,associativity", associativity,
                          sizeof(associativity));
        if (ret < 0) {
            return ret;
        }
    }
    return ret;
}

148
static void *spapr_create_fdt_skel(const char *cpu_model,
149
                                   target_phys_addr_t rma_size,
150 151
                                   target_phys_addr_t initrd_base,
                                   target_phys_addr_t initrd_size,
152
                                   target_phys_addr_t kernel_size,
153 154 155
                                   const char *boot_device,
                                   const char *kernel_cmdline,
                                   long hash_shift)
156 157
{
    void *fdt;
D
David Gibson 已提交
158
    CPUState *env;
159
    uint64_t mem_reg_property[2];
160 161
    uint32_t start_prop = cpu_to_be32(initrd_base);
    uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
162
    uint32_t pft_size_prop[] = {0, cpu_to_be32(hash_shift)};
163
    char hypertas_prop[] = "hcall-pft\0hcall-term\0hcall-dabr\0hcall-interrupt"
164
        "\0hcall-tce\0hcall-vio\0hcall-splpar\0hcall-bulk";
165
    uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(smp_cpus)};
166 167
    int i;
    char *modelname;
168
    int smt = kvmppc_smt_threads();
169 170 171 172 173 174 175
    unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80};
    uint32_t refpoints[] = {cpu_to_be32(0x4), cpu_to_be32(0x4)};
    uint32_t associativity[] = {cpu_to_be32(0x4), cpu_to_be32(0x0),
                                cpu_to_be32(0x0), cpu_to_be32(0x0),
                                cpu_to_be32(0x0)};
    char mem_name[32];
    target_phys_addr_t node0_size, mem_start;
176 177 178 179 180 181 182 183 184 185 186

#define _FDT(exp) \
    do { \
        int ret = (exp);                                           \
        if (ret < 0) {                                             \
            fprintf(stderr, "qemu: error creating device tree: %s: %s\n", \
                    #exp, fdt_strerror(ret));                      \
            exit(1);                                               \
        }                                                          \
    } while (0)

187
    fdt = g_malloc0(FDT_MAX_SIZE);
188 189
    _FDT((fdt_create(fdt, FDT_MAX_SIZE)));

190 191 192 193 194 195
    if (kernel_size) {
        _FDT((fdt_add_reservemap_entry(fdt, KERNEL_LOAD_ADDR, kernel_size)));
    }
    if (initrd_size) {
        _FDT((fdt_add_reservemap_entry(fdt, initrd_base, initrd_size)));
    }
196 197 198 199 200
    _FDT((fdt_finish_reservemap(fdt)));

    /* Root node */
    _FDT((fdt_begin_node(fdt, "")));
    _FDT((fdt_property_string(fdt, "device_type", "chrp")));
201
    _FDT((fdt_property_string(fdt, "model", "IBM pSeries (emulated by qemu)")));
202 203 204 205 206 207 208

    _FDT((fdt_property_cell(fdt, "#address-cells", 0x2)));
    _FDT((fdt_property_cell(fdt, "#size-cells", 0x2)));

    /* /chosen */
    _FDT((fdt_begin_node(fdt, "chosen")));

209 210 211
    /* Set Form1_affinity */
    _FDT((fdt_property(fdt, "ibm,architecture-vec-5", vec5, sizeof(vec5))));

212 213 214 215 216
    _FDT((fdt_property_string(fdt, "bootargs", kernel_cmdline)));
    _FDT((fdt_property(fdt, "linux,initrd-start",
                       &start_prop, sizeof(start_prop))));
    _FDT((fdt_property(fdt, "linux,initrd-end",
                       &end_prop, sizeof(end_prop))));
217 218 219
    if (kernel_size) {
        uint64_t kprop[2] = { cpu_to_be64(KERNEL_LOAD_ADDR),
                              cpu_to_be64(kernel_size) };
220

221 222 223
        _FDT((fdt_property(fdt, "qemu,boot-kernel", &kprop, sizeof(kprop))));
    }
    _FDT((fdt_property_string(fdt, "qemu,boot-device", boot_device)));
224

225 226
    _FDT((fdt_end_node(fdt)));

227
    /* memory node(s) */
228 229 230 231
    node0_size = (nb_numa_nodes > 1) ? node_mem[0] : ram_size;
    if (rma_size > node0_size) {
        rma_size = node0_size;
    }
232

233 234 235 236
    /* RMA */
    mem_reg_property[0] = 0;
    mem_reg_property[1] = cpu_to_be64(rma_size);
    _FDT((fdt_begin_node(fdt, "memory@0")));
237
    _FDT((fdt_property_string(fdt, "device_type", "memory")));
238 239 240 241
    _FDT((fdt_property(fdt, "reg", mem_reg_property,
        sizeof(mem_reg_property))));
    _FDT((fdt_property(fdt, "ibm,associativity", associativity,
        sizeof(associativity))));
242 243
    _FDT((fdt_end_node(fdt)));

244 245 246 247
    /* RAM: Node 0 */
    if (node0_size > rma_size) {
        mem_reg_property[0] = cpu_to_be64(rma_size);
        mem_reg_property[1] = cpu_to_be64(node0_size - rma_size);
248

249
        sprintf(mem_name, "memory@" TARGET_FMT_lx, rma_size);
250 251
        _FDT((fdt_begin_node(fdt, mem_name)));
        _FDT((fdt_property_string(fdt, "device_type", "memory")));
252 253 254 255
        _FDT((fdt_property(fdt, "reg", mem_reg_property,
                           sizeof(mem_reg_property))));
        _FDT((fdt_property(fdt, "ibm,associativity", associativity,
                           sizeof(associativity))));
256 257 258
        _FDT((fdt_end_node(fdt)));
    }

259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275
    /* RAM: Node 1 and beyond */
    mem_start = node0_size;
    for (i = 1; i < nb_numa_nodes; i++) {
        mem_reg_property[0] = cpu_to_be64(mem_start);
        mem_reg_property[1] = cpu_to_be64(node_mem[i]);
        associativity[3] = associativity[4] = cpu_to_be32(i);
        sprintf(mem_name, "memory@" TARGET_FMT_lx, mem_start);
        _FDT((fdt_begin_node(fdt, mem_name)));
        _FDT((fdt_property_string(fdt, "device_type", "memory")));
        _FDT((fdt_property(fdt, "reg", mem_reg_property,
            sizeof(mem_reg_property))));
        _FDT((fdt_property(fdt, "ibm,associativity", associativity,
            sizeof(associativity))));
        _FDT((fdt_end_node(fdt)));
        mem_start += node_mem[i];
    }

276 277 278 279 280 281
    /* cpus */
    _FDT((fdt_begin_node(fdt, "cpus")));

    _FDT((fdt_property_cell(fdt, "#address-cells", 0x1)));
    _FDT((fdt_property_cell(fdt, "#size-cells", 0x0)));

282
    modelname = g_strdup(cpu_model);
283 284 285 286 287

    for (i = 0; i < strlen(modelname); i++) {
        modelname[i] = toupper(modelname[i]);
    }

288 289 290
    /* This is needed during FDT finalization */
    spapr->cpu_model = g_strdup(modelname);

D
David Gibson 已提交
291 292
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
        int index = env->cpu_index;
293 294
        uint32_t servers_prop[smp_threads];
        uint32_t gservers_prop[smp_threads * 2];
295 296 297
        char *nodename;
        uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
                           0xffffffff, 0xffffffff};
298 299
        uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ;
        uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000;
300

301 302 303 304
        if ((index % smt) != 0) {
            continue;
        }

D
David Gibson 已提交
305
        if (asprintf(&nodename, "%s@%x", modelname, index) < 0) {
306 307 308 309 310 311 312 313
            fprintf(stderr, "Allocation failure\n");
            exit(1);
        }

        _FDT((fdt_begin_node(fdt, nodename)));

        free(nodename);

D
David Gibson 已提交
314
        _FDT((fdt_property_cell(fdt, "reg", index)));
315 316 317 318 319 320 321
        _FDT((fdt_property_string(fdt, "device_type", "cpu")));

        _FDT((fdt_property_cell(fdt, "cpu-version", env->spr[SPR_PVR])));
        _FDT((fdt_property_cell(fdt, "dcache-block-size",
                                env->dcache_line_size)));
        _FDT((fdt_property_cell(fdt, "icache-block-size",
                                env->icache_line_size)));
322 323
        _FDT((fdt_property_cell(fdt, "timebase-frequency", tbfreq)));
        _FDT((fdt_property_cell(fdt, "clock-frequency", cpufreq)));
324
        _FDT((fdt_property_cell(fdt, "ibm,slb-size", env->slb_nr)));
325 326
        _FDT((fdt_property(fdt, "ibm,pft-size",
                           pft_size_prop, sizeof(pft_size_prop))));
327 328
        _FDT((fdt_property_string(fdt, "status", "okay")));
        _FDT((fdt_property(fdt, "64-bit", NULL, 0)));
329 330 331 332 333 334 335 336 337 338

        /* Build interrupt servers and gservers properties */
        for (i = 0; i < smp_threads; i++) {
            servers_prop[i] = cpu_to_be32(index + i);
            /* Hack, direct the group queues back to cpu 0 */
            gservers_prop[i*2] = cpu_to_be32(index + i);
            gservers_prop[i*2 + 1] = 0;
        }
        _FDT((fdt_property(fdt, "ibm,ppc-interrupt-server#s",
                           servers_prop, sizeof(servers_prop))));
339
        _FDT((fdt_property(fdt, "ibm,ppc-interrupt-gserver#s",
340
                           gservers_prop, sizeof(gservers_prop))));
341

D
David Gibson 已提交
342
        if (env->mmu_model & POWERPC_MMU_1TSEG) {
343 344 345 346
            _FDT((fdt_property(fdt, "ibm,processor-segment-sizes",
                               segs, sizeof(segs))));
        }

347 348 349 350
        /* Advertise VMX/VSX (vector extensions) if available
         *   0 / no property == no vector extensions
         *   1               == VMX / Altivec available
         *   2               == VSX available */
351 352 353
        if (env->insns_flags & PPC_ALTIVEC) {
            uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;

354 355 356 357 358 359
            _FDT((fdt_property_cell(fdt, "ibm,vmx", vmx)));
        }

        /* Advertise DFP (Decimal Floating Point) if available
         *   0 / no property == no DFP
         *   1               == DFP available */
360 361
        if (env->insns_flags2 & PPC2_DFP) {
            _FDT((fdt_property_cell(fdt, "ibm,dfp", 1)));
362 363
        }

364 365 366
        _FDT((fdt_end_node(fdt)));
    }

367
    g_free(modelname);
368 369 370

    _FDT((fdt_end_node(fdt)));

371 372 373 374 375 376
    /* RTAS */
    _FDT((fdt_begin_node(fdt, "rtas")));

    _FDT((fdt_property(fdt, "ibm,hypertas-functions", hypertas_prop,
                       sizeof(hypertas_prop))));

377 378 379
    _FDT((fdt_property(fdt, "ibm,associativity-reference-points",
        refpoints, sizeof(refpoints))));

380 381
    _FDT((fdt_end_node(fdt)));

382
    /* interrupt controller */
383
    _FDT((fdt_begin_node(fdt, "interrupt-controller")));
384 385 386 387 388 389 390 391

    _FDT((fdt_property_string(fdt, "device_type",
                              "PowerPC-External-Interrupt-Presentation")));
    _FDT((fdt_property_string(fdt, "compatible", "IBM,ppc-xicp")));
    _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
    _FDT((fdt_property(fdt, "ibm,interrupt-server-ranges",
                       interrupt_server_ranges_prop,
                       sizeof(interrupt_server_ranges_prop))));
392 393 394
    _FDT((fdt_property_cell(fdt, "#interrupt-cells", 2)));
    _FDT((fdt_property_cell(fdt, "linux,phandle", PHANDLE_XICP)));
    _FDT((fdt_property_cell(fdt, "phandle", PHANDLE_XICP)));
395 396 397

    _FDT((fdt_end_node(fdt)));

398 399 400 401 402 403 404
    /* vdevice */
    _FDT((fdt_begin_node(fdt, "vdevice")));

    _FDT((fdt_property_string(fdt, "device_type", "vdevice")));
    _FDT((fdt_property_string(fdt, "compatible", "IBM,vdevice")));
    _FDT((fdt_property_cell(fdt, "#address-cells", 0x1)));
    _FDT((fdt_property_cell(fdt, "#size-cells", 0x0)));
405 406
    _FDT((fdt_property_cell(fdt, "#interrupt-cells", 0x2)));
    _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
407 408 409

    _FDT((fdt_end_node(fdt)));

410 411 412
    _FDT((fdt_end_node(fdt))); /* close root node */
    _FDT((fdt_finish(fdt)));

413 414 415 416 417 418 419 420 421 422
    return fdt;
}

static void spapr_finalize_fdt(sPAPREnvironment *spapr,
                               target_phys_addr_t fdt_addr,
                               target_phys_addr_t rtas_addr,
                               target_phys_addr_t rtas_size)
{
    int ret;
    void *fdt;
423
    sPAPRPHBState *phb;
424

425
    fdt = g_malloc(FDT_MAX_SIZE);
426 427 428

    /* open out the base tree into a temp buffer for the final tweaks */
    _FDT((fdt_open_into(spapr->fdt_skel, fdt, FDT_MAX_SIZE)));
429 430 431 432 433 434 435

    ret = spapr_populate_vdevice(spapr->vio_bus, fdt);
    if (ret < 0) {
        fprintf(stderr, "couldn't setup vio devices in fdt\n");
        exit(1);
    }

436 437 438 439 440 441 442 443 444
    QLIST_FOREACH(phb, &spapr->phbs, list) {
        ret = spapr_populate_pci_devices(phb, PHANDLE_XICP, fdt);
    }

    if (ret < 0) {
        fprintf(stderr, "couldn't setup PCI devices in fdt\n");
        exit(1);
    }

445 446 447 448 449 450
    /* RTAS */
    ret = spapr_rtas_device_tree_setup(fdt, rtas_addr, rtas_size);
    if (ret < 0) {
        fprintf(stderr, "Couldn't set up RTAS device tree properties\n");
    }

451 452 453 454 455 456 457 458
    /* Advertise NUMA via ibm,associativity */
    if (nb_numa_nodes > 1) {
        ret = spapr_set_associativity(fdt, spapr);
        if (ret < 0) {
            fprintf(stderr, "Couldn't set up NUMA device tree properties\n");
        }
    }

459 460
    spapr_populate_chosen_stdout(fdt, spapr->vio_bus);

461 462
    _FDT((fdt_pack(fdt)));

463 464 465 466 467 468
    if (fdt_totalsize(fdt) > FDT_MAX_SIZE) {
        hw_error("FDT too big ! 0x%x bytes (max is 0x%x)\n",
                 fdt_totalsize(fdt), FDT_MAX_SIZE);
        exit(1);
    }

469
    cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt));
470

471
    g_free(fdt);
472 473 474 475 476 477 478 479 480 481 482 483
}

static uint64_t translate_kernel_address(void *opaque, uint64_t addr)
{
    return (addr & 0x0fffffff) + KERNEL_LOAD_ADDR;
}

static void emulate_spapr_hypercall(CPUState *env)
{
    env->gpr[3] = spapr_hypercall(env, env->gpr[3], &env->gpr[4]);
}

484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504
static void spapr_reset(void *opaque)
{
    sPAPREnvironment *spapr = (sPAPREnvironment *)opaque;

    fprintf(stderr, "sPAPR reset\n");

    /* flush out the hash table */
    memset(spapr->htab, 0, spapr->htab_size);

    /* Load the fdt */
    spapr_finalize_fdt(spapr, spapr->fdt_addr, spapr->rtas_addr,
                       spapr->rtas_size);

    /* Set up the entry state */
    first_cpu->gpr[3] = spapr->fdt_addr;
    first_cpu->gpr[5] = 0;
    first_cpu->halted = 0;
    first_cpu->nip = spapr->entry_point;

}

505 506 507 508 509 510 511 512
/* pSeries LPAR / sPAPR hardware init */
static void ppc_spapr_init(ram_addr_t ram_size,
                           const char *boot_device,
                           const char *kernel_filename,
                           const char *kernel_cmdline,
                           const char *initrd_filename,
                           const char *cpu_model)
{
D
David Gibson 已提交
513
    CPUState *env;
514
    int i;
A
Avi Kivity 已提交
515 516
    MemoryRegion *sysmem = get_system_memory();
    MemoryRegion *ram = g_new(MemoryRegion, 1);
517
    target_phys_addr_t rma_alloc_size, rma_size;
518 519 520
    uint32_t initrd_base = 0;
    long kernel_size = 0, initrd_size = 0;
    long load_limit, rtas_limit, fw_size;
521
    long pteg_shift = 17;
522
    char *filename;
523

524 525 526
    spapr = g_malloc0(sizeof(*spapr));
    QLIST_INIT(&spapr->phbs);

527 528
    cpu_ppc_hypercall = emulate_spapr_hypercall;

529 530 531 532 533 534 535 536 537 538 539 540 541
    /* Allocate RMA if necessary */
    rma_alloc_size = kvmppc_alloc_rma("ppc_spapr.rma", sysmem);

    if (rma_alloc_size == -1) {
        hw_error("qemu: Unable to create RMA\n");
        exit(1);
    }
    if (rma_alloc_size && (rma_alloc_size < ram_size)) {
        rma_size = rma_alloc_size;
    } else {
        rma_size = ram_size;
    }

542
    /* We place the device tree and RTAS just below either the top of the RMA,
543 544
     * or just below 2GB, whichever is lowere, so that it can be
     * processed with 32-bit real mode code if necessary */
545 546 547 548
    rtas_limit = MIN(rma_size, 0x80000000);
    spapr->rtas_addr = rtas_limit - RTAS_MAX_SIZE;
    spapr->fdt_addr = spapr->rtas_addr - FDT_MAX_SIZE;
    load_limit = spapr->fdt_addr - FW_OVERHEAD;
549 550 551

    /* init CPUs */
    if (cpu_model == NULL) {
552
        cpu_model = kvm_enabled() ? "host" : "POWER7";
553 554
    }
    for (i = 0; i < smp_cpus; i++) {
D
David Gibson 已提交
555
        env = cpu_init(cpu_model);
556 557 558 559 560 561 562 563 564 565 566

        if (!env) {
            fprintf(stderr, "Unable to find PowerPC CPU definition\n");
            exit(1);
        }
        /* Set time-base frequency to 512 MHz */
        cpu_ppc_tb_init(env, TIMEBASE_FREQ);
        qemu_register_reset((QEMUResetHandler *)&cpu_reset, env);

        env->hreset_vector = 0x60;
        env->hreset_excp_prefix = 0;
D
David Gibson 已提交
567
        env->gpr[3] = env->cpu_index;
568 569 570
    }

    /* allocate RAM */
571
    spapr->ram_limit = ram_size;
572 573 574 575
    if (spapr->ram_limit > rma_alloc_size) {
        ram_addr_t nonrma_base = rma_alloc_size;
        ram_addr_t nonrma_size = spapr->ram_limit - rma_alloc_size;

576 577
        memory_region_init_ram(ram, "ppc_spapr.ram", nonrma_size);
        vmstate_register_ram_global(ram);
578 579
        memory_region_add_subregion(sysmem, nonrma_base, ram);
    }
580

581 582 583
    /* allocate hash page table.  For now we always make this 16mb,
     * later we should probably make it scale to the size of guest
     * RAM */
584
    spapr->htab_size = 1ULL << (pteg_shift + 7);
585
    spapr->htab = qemu_memalign(spapr->htab_size, spapr->htab_size);
586

D
David Gibson 已提交
587
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
588
        env->external_htab = spapr->htab;
D
David Gibson 已提交
589
        env->htab_base = -1;
590
        env->htab_mask = spapr->htab_size - 1;
591 592 593 594 595 596 597 598 599

        /* Tell KVM that we're in PAPR mode */
        env->spr[SPR_SDR1] = (unsigned long)spapr->htab |
                             ((pteg_shift + 7) - 18);
        env->spr[SPR_HIOR] = 0;

        if (kvm_enabled()) {
            kvmppc_set_papr(env);
        }
600 601
    }

602
    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, "spapr-rtas.bin");
603
    spapr->rtas_size = load_image_targphys(filename, spapr->rtas_addr,
604
                                           rtas_limit - spapr->rtas_addr);
605
    if (spapr->rtas_size < 0) {
606 607 608
        hw_error("qemu: could not load LPAR rtas '%s'\n", filename);
        exit(1);
    }
609 610 611 612 613
    if (spapr->rtas_size > RTAS_MAX_SIZE) {
        hw_error("RTAS too big ! 0x%lx bytes (max is 0x%x)\n",
                 spapr->rtas_size, RTAS_MAX_SIZE);
        exit(1);
    }
614
    g_free(filename);
615

616

617
    /* Set up Interrupt Controller */
D
David Gibson 已提交
618
    spapr->icp = xics_system_init(XICS_IRQS);
619
    spapr->next_irq = 16;
620 621

    /* Set up VIO bus */
622 623
    spapr->vio_bus = spapr_vio_bus_init();

P
Paolo Bonzini 已提交
624
    for (i = 0; i < MAX_SERIAL_PORTS; i++) {
625
        if (serial_hds[i]) {
626
            spapr_vty_create(spapr->vio_bus, SPAPR_VTY_BASE_ADDRESS + i,
P
Paolo Bonzini 已提交
627
                             serial_hds[i]);
628 629
        }
    }
630

631 632 633 634 635 636
    /* Set up PCI */
    spapr_create_phb(spapr, "pci", SPAPR_PCI_BUID,
                     SPAPR_PCI_MEM_WIN_ADDR,
                     SPAPR_PCI_MEM_WIN_SIZE,
                     SPAPR_PCI_IO_WIN_ADDR);

P
Paolo Bonzini 已提交
637
    for (i = 0; i < nb_nics; i++) {
638 639 640
        NICInfo *nd = &nd_table[i];

        if (!nd->model) {
641
            nd->model = g_strdup("ibmveth");
642 643 644
        }

        if (strcmp(nd->model, "ibmveth") == 0) {
P
Paolo Bonzini 已提交
645
            spapr_vlan_create(spapr->vio_bus, 0x1000 + i, nd);
646
        } else {
647
            pci_nic_init_nofail(&nd_table[i], nd->model, NULL);
648 649 650
        }
    }

651
    for (i = 0; i <= drive_get_max_bus(IF_SCSI); i++) {
P
Paolo Bonzini 已提交
652
        spapr_vscsi_create(spapr->vio_bus, 0x2000 + i);
653 654
    }

655 656 657 658 659 660 661 662 663 664 665 666 667 668
    if (rma_size < (MIN_RMA_SLOF << 20)) {
        fprintf(stderr, "qemu: pSeries SLOF firmware requires >= "
                "%ldM guest RMA (Real Mode Area memory)\n", MIN_RMA_SLOF);
        exit(1);
    }

    fprintf(stderr, "sPAPR memory map:\n");
    fprintf(stderr, "RTAS                 : 0x%08lx..%08lx\n",
            (unsigned long)spapr->rtas_addr,
            (unsigned long)(spapr->rtas_addr + spapr->rtas_size - 1));
    fprintf(stderr, "FDT                  : 0x%08lx..%08lx\n",
            (unsigned long)spapr->fdt_addr,
            (unsigned long)(spapr->fdt_addr + FDT_MAX_SIZE - 1));

669 670 671 672 673 674
    if (kernel_filename) {
        uint64_t lowaddr = 0;

        kernel_size = load_elf(kernel_filename, translate_kernel_address, NULL,
                               NULL, &lowaddr, NULL, 1, ELF_MACHINE, 0);
        if (kernel_size < 0) {
675 676
            kernel_size = load_image_targphys(kernel_filename,
                                              KERNEL_LOAD_ADDR,
677
                                              load_limit - KERNEL_LOAD_ADDR);
678 679 680 681 682 683
        }
        if (kernel_size < 0) {
            fprintf(stderr, "qemu: could not load kernel '%s'\n",
                    kernel_filename);
            exit(1);
        }
684 685
        fprintf(stderr, "Kernel               : 0x%08x..%08lx\n",
                KERNEL_LOAD_ADDR, KERNEL_LOAD_ADDR + kernel_size - 1);
686 687 688

        /* load initrd */
        if (initrd_filename) {
689 690 691 692
            /* Try to locate the initrd in the gap between the kernel
             * and the firmware. Add a bit of space just in case
             */
            initrd_base = (KERNEL_LOAD_ADDR + kernel_size + 0x1ffff) & ~0xffff;
693
            initrd_size = load_image_targphys(initrd_filename, initrd_base,
694
                                              load_limit - initrd_base);
695 696 697 698 699
            if (initrd_size < 0) {
                fprintf(stderr, "qemu: could not load initial ram disk '%s'\n",
                        initrd_filename);
                exit(1);
            }
700 701
            fprintf(stderr, "Ramdisk              : 0x%08lx..%08lx\n",
                    (long)initrd_base, (long)(initrd_base + initrd_size - 1));
702 703 704 705
        } else {
            initrd_base = 0;
            initrd_size = 0;
        }
706
    }
707

708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724
    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, FW_FILE_NAME);
    fw_size = load_image_targphys(filename, 0, FW_MAX_SIZE);
    if (fw_size < 0) {
        hw_error("qemu: could not load LPAR rtas '%s'\n", filename);
        exit(1);
    }
    g_free(filename);
    fprintf(stderr, "Firmware load        : 0x%08x..%08lx\n",
            0, fw_size);
    fprintf(stderr, "Firmware runtime     : 0x%08lx..%08lx\n",
            load_limit, (unsigned long)spapr->fdt_addr);

    spapr->entry_point = 0x100;

    /* SLOF will startup the secondary CPUs using RTAS */
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
        env->halted = 1;
725 726 727
    }

    /* Prepare the device tree */
728
    spapr->fdt_skel = spapr_create_fdt_skel(cpu_model, rma_size,
729
                                            initrd_base, initrd_size,
730
                                            kernel_size,
731 732 733
                                            boot_device, kernel_cmdline,
                                            pteg_shift + 7);
    assert(spapr->fdt_skel != NULL);
734

735
    qemu_register_reset(spapr_reset, spapr);
736 737 738 739 740 741 742 743
}

static QEMUMachine spapr_machine = {
    .name = "pseries",
    .desc = "pSeries Logical Partition (PAPR compliant)",
    .init = ppc_spapr_init,
    .max_cpus = MAX_CPUS,
    .no_parallel = 1,
744
    .use_scsi = 1,
745 746 747 748 749 750 751 752
};

static void spapr_machine_init(void)
{
    qemu_register_machine(&spapr_machine);
}

machine_init(spapr_machine_init);