setup.c 27.2 KB
Newer Older
L
Linus Torvalds 已提交
1
/*
2
 *  64-bit pSeries and RS/6000 setup code.
L
Linus Torvalds 已提交
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
 *
 *  Copyright (C) 1995  Linus Torvalds
 *  Adapted from 'alpha' version by Gary Thomas
 *  Modified by Cort Dougan (cort@cs.nmt.edu)
 *  Modified by PPC64 Team, IBM Corp
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version
 * 2 of the License, or (at your option) any later version.
 */

/*
 * bootup setup stuff..
 */

19
#include <linux/cpu.h>
L
Linus Torvalds 已提交
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/stddef.h>
#include <linux/unistd.h>
#include <linux/user.h>
#include <linux/tty.h>
#include <linux/major.h>
#include <linux/interrupt.h>
#include <linux/reboot.h>
#include <linux/init.h>
#include <linux/ioport.h>
#include <linux/console.h>
#include <linux/pci.h>
35
#include <linux/utsname.h>
L
Linus Torvalds 已提交
36
#include <linux/adb.h>
37
#include <linux/export.h>
L
Linus Torvalds 已提交
38 39 40 41
#include <linux/delay.h>
#include <linux/irq.h>
#include <linux/seq_file.h>
#include <linux/root_dev.h>
42
#include <linux/of.h>
43
#include <linux/of_pci.h>
44
#include <linux/memblock.h>
L
Linus Torvalds 已提交
45 46 47 48 49 50 51 52 53 54 55 56 57 58

#include <asm/mmu.h>
#include <asm/processor.h>
#include <asm/io.h>
#include <asm/pgtable.h>
#include <asm/prom.h>
#include <asm/rtas.h>
#include <asm/pci-bridge.h>
#include <asm/iommu.h>
#include <asm/dma.h>
#include <asm/machdep.h>
#include <asm/irq.h>
#include <asm/time.h>
#include <asm/nvram.h>
59
#include <asm/pmc.h>
60
#include <asm/xics.h>
61
#include <asm/xive.h>
62
#include <asm/ppc-pci.h>
63 64
#include <asm/i8259.h>
#include <asm/udbg.h>
P
Paul Mackerras 已提交
65
#include <asm/smp.h>
66
#include <asm/firmware.h>
67
#include <asm/eeh.h>
68
#include <asm/reg.h>
69
#include <asm/plpar_wrappers.h>
70
#include <asm/kexec.h>
71
#include <asm/isa-bridge.h>
72
#include <asm/security_features.h>
73
#include <asm/asm-const.h>
L
Linus Torvalds 已提交
74

75
#include "pseries.h"
76
#include "../../../../drivers/pci/pci.h"
77

78 79
int CMO_PrPSP = -1;
int CMO_SecPSP = -1;
80
unsigned long CMO_PageSize = (ASM_CONST(1) << IOMMU_PAGE_SHIFT_4K);
A
Andrew Morton 已提交
81
EXPORT_SYMBOL(CMO_PageSize);
L
Linus Torvalds 已提交
82 83 84

int fwnmi_active;  /* TRUE if an FWNMI handler is present */

85
static void pSeries_show_cpuinfo(struct seq_file *m)
L
Linus Torvalds 已提交
86 87 88 89 90 91
{
	struct device_node *root;
	const char *model = "";

	root = of_find_node_by_path("/");
	if (root)
92
		model = of_get_property(root, "model", NULL);
L
Linus Torvalds 已提交
93 94
	seq_printf(m, "machine\t\t: CHRP %s\n", model);
	of_node_put(root);
95 96 97 98
	if (radix_enabled())
		seq_printf(m, "MMU\t\t: Radix\n");
	else
		seq_printf(m, "MMU\t\t: Hash\n");
L
Linus Torvalds 已提交
99 100 101 102 103 104 105
}

/* Initialize firmware assisted non-maskable interrupts if
 * the firmware supports this feature.
 */
static void __init fwnmi_init(void)
{
106
	unsigned long system_reset_addr, machine_check_addr;
107 108 109
	u8 *mce_data_buf;
	unsigned int i;
	int nr_cpus = num_possible_cpus();
110 111 112 113
#ifdef CONFIG_PPC_BOOK3S_64
	struct slb_entry *slb_ptr;
	size_t size;
#endif
114

L
Linus Torvalds 已提交
115 116 117
	int ibm_nmi_register = rtas_token("ibm,nmi-register");
	if (ibm_nmi_register == RTAS_UNKNOWN_SERVICE)
		return;
118 119 120 121 122 123 124 125

	/* If the kernel's not linked at zero we point the firmware at low
	 * addresses anyway, and use a trampoline to get to the real code. */
	system_reset_addr  = __pa(system_reset_fwnmi) - PHYSICAL_START;
	machine_check_addr = __pa(machine_check_fwnmi) - PHYSICAL_START;

	if (0 == rtas_call(ibm_nmi_register, 2, 1, NULL, system_reset_addr,
				machine_check_addr))
L
Linus Torvalds 已提交
126
		fwnmi_active = 1;
127 128 129 130 131 132 133 134 135 136 137 138

	/*
	 * Allocate a chunk for per cpu buffer to hold rtas errorlog.
	 * It will be used in real mode mce handler, hence it needs to be
	 * below RMA.
	 */
	mce_data_buf = __va(memblock_alloc_base(RTAS_ERROR_LOG_MAX * nr_cpus,
					RTAS_ERROR_LOG_MAX, ppc64_rma_size));
	for_each_possible_cpu(i) {
		paca_ptrs[i]->mce_data_buf = mce_data_buf +
						(RTAS_ERROR_LOG_MAX * i);
	}
139 140 141 142 143 144 145 146 147

#ifdef CONFIG_PPC_BOOK3S_64
	/* Allocate per cpu slb area to save old slb contents during MCE */
	size = sizeof(struct slb_entry) * mmu_slb_size * nr_cpus;
	slb_ptr = __va(memblock_alloc_base(size, sizeof(struct slb_entry),
					   ppc64_rma_size));
	for_each_possible_cpu(i)
		paca_ptrs[i]->mce_faulty_slbs = slb_ptr + (mmu_slb_size * i);
#endif
L
Linus Torvalds 已提交
148 149
}

150
static void pseries_8259_cascade(struct irq_desc *desc)
151
{
152
	struct irq_chip *chip = irq_desc_get_chip(desc);
O
Olaf Hering 已提交
153
	unsigned int cascade_irq = i8259_irq();
154

155
	if (cascade_irq)
156
		generic_handle_irq(cascade_irq);
157 158

	chip->irq_eoi(&desc->irq_data);
159 160
}

161
static void __init pseries_setup_i8259_cascade(void)
162 163
{
	struct device_node *np, *old, *found = NULL;
164
	unsigned int cascade;
165 166
	const u32 *addrp;
	unsigned long intack = 0;
167
	int naddr;
168

169
	for_each_node_by_type(np, "interrupt-controller") {
170 171 172 173
		if (of_device_is_compatible(np, "chrp,iic")) {
			found = np;
			break;
		}
174 175
	}

176
	if (found == NULL) {
177
		printk(KERN_DEBUG "pic: no ISA interrupt controller\n");
178 179
		return;
	}
180

181
	cascade = irq_of_parse_and_map(found, 0);
182
	if (!cascade) {
183
		printk(KERN_ERR "pic: failed to map cascade interrupt");
184 185
		return;
	}
186
	pr_debug("pic: cascade mapped to irq %d\n", cascade);
187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203

	for (old = of_node_get(found); old != NULL ; old = np) {
		np = of_get_parent(old);
		of_node_put(old);
		if (np == NULL)
			break;
		if (strcmp(np->name, "pci") != 0)
			continue;
		addrp = of_get_property(np, "8259-interrupt-acknowledge", NULL);
		if (addrp == NULL)
			continue;
		naddr = of_n_addr_cells(np);
		intack = addrp[naddr-1];
		if (naddr > 1)
			intack |= ((unsigned long)addrp[naddr-2]) << 32;
	}
	if (intack)
204
		printk(KERN_DEBUG "pic: PCI 8259 intack at 0x%016lx\n", intack);
205 206
	i8259_init(found, intack);
	of_node_put(found);
207
	irq_set_chained_handler(cascade, pseries_8259_cascade);
208 209
}

210
static void __init pseries_init_irq(void)
211
{
212 213 214 215 216
	/* Try using a XIVE if available, otherwise use a XICS */
	if (!xive_spapr_init()) {
		xics_init();
		pseries_setup_i8259_cascade();
	}
217 218
}

219 220 221 222 223 224 225 226 227
static void pseries_lpar_enable_pmcs(void)
{
	unsigned long set, reset;

	set = 1UL << 63;
	reset = 0;
	plpar_hcall_norets(H_PERFMON, set, reset);
}

228
static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *data)
229
{
230
	struct of_reconfig_data *rd = data;
231 232
	struct device_node *parent, *np = rd->dn;
	struct pci_dn *pdn;
233 234 235
	int err = NOTIFY_OK;

	switch (action) {
236
	case OF_RECONFIG_ATTACH_NODE:
237 238
		parent = of_get_parent(np);
		pdn = parent ? PCI_DN(parent) : NULL;
G
Gavin Shan 已提交
239
		if (pdn)
240
			pci_add_device_node_info(pdn->phb, np);
241 242

		of_node_put(parent);
243
		break;
244
	case OF_RECONFIG_DETACH_NODE:
245 246 247
		pdn = PCI_DN(np);
		if (pdn)
			list_del(&pdn->list);
248
		break;
249 250 251 252 253 254 255 256 257 258 259
	default:
		err = NOTIFY_DONE;
		break;
	}
	return err;
}

static struct notifier_block pci_dn_reconfig_nb = {
	.notifier_call = pci_dn_reconfig_notifier,
};

260 261
struct kmem_cache *dtl_cache;

262
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
263 264 265 266 267 268 269 270 271 272 273 274 275 276
/*
 * Allocate space for the dispatch trace log for all possible cpus
 * and register the buffers with the hypervisor.  This is used for
 * computing time stolen by the hypervisor.
 */
static int alloc_dispatch_logs(void)
{
	int cpu, ret;
	struct paca_struct *pp;
	struct dtl_entry *dtl;

	if (!firmware_has_feature(FW_FEATURE_SPLPAR))
		return 0;

277
	if (!dtl_cache)
278 279
		return 0;

280
	for_each_possible_cpu(cpu) {
281
		pp = paca_ptrs[cpu];
282
		dtl = kmem_cache_alloc(dtl_cache, GFP_KERNEL);
283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302
		if (!dtl) {
			pr_warn("Failed to allocate dispatch trace log for cpu %d\n",
				cpu);
			pr_warn("Stolen time statistics will be unreliable\n");
			break;
		}

		pp->dtl_ridx = 0;
		pp->dispatch_log = dtl;
		pp->dispatch_log_end = dtl + N_DISPATCH_LOG;
		pp->dtl_curr = dtl;
	}

	/* Register the DTL for the current (boot) cpu */
	dtl = get_paca()->dispatch_log;
	get_paca()->dtl_ridx = 0;
	get_paca()->dtl_curr = dtl;
	get_paca()->lppaca_ptr->dtl_idx = 0;

	/* hypervisor reads buffer length from this field */
303
	dtl->enqueue_to_dispatch_time = cpu_to_be32(DISPATCH_LOG_BYTES);
304 305
	ret = register_dtl(hard_smp_processor_id(), __pa(dtl));
	if (ret)
306 307 308
		pr_err("WARNING: DTL registration of cpu %d (hw %d) failed "
		       "with %d\n", smp_processor_id(),
		       hard_smp_processor_id(), ret);
309 310 311 312
	get_paca()->lppaca_ptr->dtl_enable_mask = 2;

	return 0;
}
313
#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
314 315 316 317
static inline int alloc_dispatch_logs(void)
{
	return 0;
}
318
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
319

320 321 322 323 324 325 326 327 328 329 330 331
static int alloc_dispatch_log_kmem_cache(void)
{
	dtl_cache = kmem_cache_create("dtl", DISPATCH_LOG_BYTES,
						DISPATCH_LOG_BYTES, 0, NULL);
	if (!dtl_cache) {
		pr_warn("Failed to create dispatch trace log buffer cache\n");
		pr_warn("Stolen time statistics will be unreliable\n");
		return 0;
	}

	return alloc_dispatch_logs();
}
332
machine_early_initcall(pseries, alloc_dispatch_log_kmem_cache);
333

334
static void pseries_lpar_idle(void)
335
{
336 337
	/*
	 * Default handler to go into low thread priority and possibly
M
Michael Ellerman 已提交
338
	 * low power mode by ceding processor to hypervisor
339
	 */
340 341 342 343 344 345 346 347 348 349 350 351 352 353

	/* Indicate to hypervisor that we are idle. */
	get_lppaca()->idle = 1;

	/*
	 * Yield the processor to the hypervisor.  We return if
	 * an external interrupt occurs (which are driven prior
	 * to returning here) or if a prod occurs from another
	 * processor. When returning here, external interrupts
	 * are enabled.
	 */
	cede_processor();

	get_lppaca()->idle = 0;
354 355
}

356 357 358 359 360 361 362
/*
 * Enable relocation on during exceptions. This has partition wide scope and
 * may take a while to complete, if it takes longer than one second we will
 * just give up rather than wasting any more time on this - if that turns out
 * to ever be a problem in practice we can move this into a kernel thread to
 * finish off the process later in boot.
 */
363
void pseries_enable_reloc_on_exc(void)
364 365 366 367 368 369
{
	long rc;
	unsigned int delay, total_delay = 0;

	while (1) {
		rc = enable_reloc_on_exceptions();
370 371 372 373 374 375 376 377 378 379
		if (!H_IS_LONG_BUSY(rc)) {
			if (rc == H_P2) {
				pr_info("Relocation on exceptions not"
					" supported\n");
			} else if (rc != H_SUCCESS) {
				pr_warn("Unable to enable relocation"
					" on exceptions: %ld\n", rc);
			}
			break;
		}
380 381 382 383 384 385 386

		delay = get_longbusy_msecs(rc);
		total_delay += delay;
		if (total_delay > 1000) {
			pr_warn("Warning: Giving up waiting to enable "
				"relocation on exceptions (%u msec)!\n",
				total_delay);
387
			return;
388 389 390 391 392
		}

		mdelay(delay);
	}
}
393
EXPORT_SYMBOL(pseries_enable_reloc_on_exc);
394

395
void pseries_disable_reloc_on_exc(void)
396 397 398 399 400 401
{
	long rc;

	while (1) {
		rc = disable_reloc_on_exceptions();
		if (!H_IS_LONG_BUSY(rc))
402
			break;
403 404
		mdelay(get_longbusy_msecs(rc));
	}
405
	if (rc != H_SUCCESS)
406 407
		pr_warn("Warning: Failed to disable relocation on exceptions: %ld\n",
			rc);
408
}
409
EXPORT_SYMBOL(pseries_disable_reloc_on_exc);
410

411
#ifdef CONFIG_KEXEC_CORE
412 413
static void pSeries_machine_kexec(struct kimage *image)
{
414 415
	if (firmware_has_feature(FW_FEATURE_SET_MODE))
		pseries_disable_reloc_on_exc();
416 417 418 419 420

	default_machine_kexec(image);
}
#endif

421
#ifdef __LITTLE_ENDIAN__
422
void pseries_big_endian_exceptions(void)
423 424 425 426 427 428
{
	long rc;

	while (1) {
		rc = enable_big_endian_exceptions();
		if (!H_IS_LONG_BUSY(rc))
429
			break;
430 431
		mdelay(get_longbusy_msecs(rc));
	}
432 433 434 435 436 437 438 439 440 441 442 443 444 445

	/*
	 * At this point it is unlikely panic() will get anything
	 * out to the user, since this is called very late in kexec
	 * but at least this will stop us from continuing on further
	 * and creating an even more difficult to debug situation.
	 *
	 * There is a known problem when kdump'ing, if cpus are offline
	 * the above call will fail. Rather than panicking again, keep
	 * going and hope the kdump kernel is also little endian, which
	 * it usually is.
	 */
	if (rc && !kdump_in_progress())
		panic("Could not enable big endian exceptions");
446 447
}

448
void pseries_little_endian_exceptions(void)
449 450 451 452 453 454
{
	long rc;

	while (1) {
		rc = enable_little_endian_exceptions();
		if (!H_IS_LONG_BUSY(rc))
455
			break;
456 457
		mdelay(get_longbusy_msecs(rc));
	}
458 459 460 461
	if (rc) {
		ppc_md.progress("H_SET_MODE LE exception fail", 0);
		panic("Could not enable little endian exceptions");
	}
462 463 464
}
#endif

465 466 467 468 469 470 471
static void __init find_and_init_phbs(void)
{
	struct device_node *node;
	struct pci_controller *phb;
	struct device_node *root = of_find_node_by_path("/");

	for_each_child_of_node(root, node) {
472 473
		if (!of_node_is_type(node, "pci") &&
		    !of_node_is_type(node, "pciex"))
474 475 476 477 478 479 480 481
			continue;

		phb = pcibios_alloc_controller(node);
		if (!phb)
			continue;
		rtas_setup_phb(phb);
		pci_process_bridge_OF_ranges(phb, node, 0);
		isa_bridge_find_early(phb);
482
		phb->controller_ops = pseries_pci_controller_ops;
483 484 485 486 487 488 489 490
	}

	of_node_put(root);

	/*
	 * PCI_PROBE_ONLY and PCI_REASSIGN_ALL_BUS can be set via properties
	 * in chosen.
	 */
491
	of_pci_check_probe_only();
492 493
}

494 495
static void init_cpu_char_feature_flags(struct h_cpu_char_result *result)
{
496 497 498 499
	/*
	 * The features below are disabled by default, so we instead look to see
	 * if firmware has *enabled* them, and set them if so.
	 */
500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517
	if (result->character & H_CPU_CHAR_SPEC_BAR_ORI31)
		security_ftr_set(SEC_FTR_SPEC_BAR_ORI31);

	if (result->character & H_CPU_CHAR_BCCTRL_SERIALISED)
		security_ftr_set(SEC_FTR_BCCTRL_SERIALISED);

	if (result->character & H_CPU_CHAR_L1D_FLUSH_ORI30)
		security_ftr_set(SEC_FTR_L1D_FLUSH_ORI30);

	if (result->character & H_CPU_CHAR_L1D_FLUSH_TRIG2)
		security_ftr_set(SEC_FTR_L1D_FLUSH_TRIG2);

	if (result->character & H_CPU_CHAR_L1D_THREAD_PRIV)
		security_ftr_set(SEC_FTR_L1D_THREAD_PRIV);

	if (result->character & H_CPU_CHAR_COUNT_CACHE_DISABLED)
		security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED);

518 519 520 521 522 523
	if (result->character & H_CPU_CHAR_BCCTR_FLUSH_ASSIST)
		security_ftr_set(SEC_FTR_BCCTR_FLUSH_ASSIST);

	if (result->behaviour & H_CPU_BEHAV_FLUSH_COUNT_CACHE)
		security_ftr_set(SEC_FTR_FLUSH_COUNT_CACHE);

524 525 526 527
	/*
	 * The features below are enabled by default, so we instead look to see
	 * if firmware has *disabled* them, and clear them if so.
	 */
528
	if (!(result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY))
529 530
		security_ftr_clear(SEC_FTR_FAVOUR_SECURITY);

531
	if (!(result->behaviour & H_CPU_BEHAV_L1D_FLUSH_PR))
532 533
		security_ftr_clear(SEC_FTR_L1D_FLUSH_PR);

534
	if (!(result->behaviour & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR))
535 536 537
		security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR);
}

538
void pseries_setup_rfi_flush(void)
539 540 541 542 543 544
{
	struct h_cpu_char_result result;
	enum l1d_flush_type types;
	bool enable;
	long rc;

545 546 547 548 549 550 551
	/*
	 * Set features to the defaults assumed by init_cpu_char_feature_flags()
	 * so it can set/clear again any features that might have changed after
	 * migration, and in case the hypercall fails and it is not even called.
	 */
	powerpc_security_features = SEC_FTR_DEFAULT;

552
	rc = plpar_get_cpu_characteristics(&result);
553
	if (rc == H_SUCCESS)
554 555 556 557 558 559 560 561
		init_cpu_char_feature_flags(&result);

	/*
	 * We're the guest so this doesn't apply to us, clear it to simplify
	 * handling of it elsewhere.
	 */
	security_ftr_clear(SEC_FTR_L1D_FLUSH_HV);

562 563 564 565 566 567 568 569 570 571 572
	types = L1D_FLUSH_FALLBACK;

	if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_TRIG2))
		types |= L1D_FLUSH_MTTRIG;

	if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_ORI30))
		types |= L1D_FLUSH_ORI;

	enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \
		 security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR);

573
	setup_rfi_flush(types, enable);
574
	setup_count_cache_flush();
575 576
}

577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685
#ifdef CONFIG_PCI_IOV
enum rtas_iov_fw_value_map {
	NUM_RES_PROPERTY  = 0, /* Number of Resources */
	LOW_INT           = 1, /* Lowest 32 bits of Address */
	START_OF_ENTRIES  = 2, /* Always start of entry */
	APERTURE_PROPERTY = 2, /* Start of entry+ to  Aperture Size */
	WDW_SIZE_PROPERTY = 4, /* Start of entry+ to Window Size */
	NEXT_ENTRY        = 7  /* Go to next entry on array */
};

enum get_iov_fw_value_index {
	BAR_ADDRS     = 1,    /*  Get Bar Address */
	APERTURE_SIZE = 2,    /*  Get Aperture Size */
	WDW_SIZE      = 3     /*  Get Window Size */
};

resource_size_t pseries_get_iov_fw_value(struct pci_dev *dev, int resno,
					 enum get_iov_fw_value_index value)
{
	const int *indexes;
	struct device_node *dn = pci_device_to_OF_node(dev);
	int i, num_res, ret = 0;

	indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL);
	if (!indexes)
		return  0;

	/*
	 * First element in the array is the number of Bars
	 * returned.  Search through the list to find the matching
	 * bar
	 */
	num_res = of_read_number(&indexes[NUM_RES_PROPERTY], 1);
	if (resno >= num_res)
		return 0; /* or an errror */

	i = START_OF_ENTRIES + NEXT_ENTRY * resno;
	switch (value) {
	case BAR_ADDRS:
		ret = of_read_number(&indexes[i], 2);
		break;
	case APERTURE_SIZE:
		ret = of_read_number(&indexes[i + APERTURE_PROPERTY], 2);
		break;
	case WDW_SIZE:
		ret = of_read_number(&indexes[i + WDW_SIZE_PROPERTY], 2);
		break;
	}

	return ret;
}

void of_pci_set_vf_bar_size(struct pci_dev *dev, const int *indexes)
{
	struct resource *res;
	resource_size_t base, size;
	int i, r, num_res;

	num_res = of_read_number(&indexes[NUM_RES_PROPERTY], 1);
	num_res = min_t(int, num_res, PCI_SRIOV_NUM_BARS);
	for (i = START_OF_ENTRIES, r = 0; r < num_res && r < PCI_SRIOV_NUM_BARS;
	     i += NEXT_ENTRY, r++) {
		res = &dev->resource[r + PCI_IOV_RESOURCES];
		base = of_read_number(&indexes[i], 2);
		size = of_read_number(&indexes[i + APERTURE_PROPERTY], 2);
		res->flags = pci_parse_of_flags(of_read_number
						(&indexes[i + LOW_INT], 1), 0);
		res->flags |= (IORESOURCE_MEM_64 | IORESOURCE_PCI_FIXED);
		res->name = pci_name(dev);
		res->start = base;
		res->end = base + size - 1;
	}
}

void of_pci_parse_iov_addrs(struct pci_dev *dev, const int *indexes)
{
	struct resource *res, *root, *conflict;
	resource_size_t base, size;
	int i, r, num_res;

	/*
	 * First element in the array is the number of Bars
	 * returned.  Search through the list to find the matching
	 * bars assign them from firmware into resources structure.
	 */
	num_res = of_read_number(&indexes[NUM_RES_PROPERTY], 1);
	for (i = START_OF_ENTRIES, r = 0; r < num_res && r < PCI_SRIOV_NUM_BARS;
	     i += NEXT_ENTRY, r++) {
		res = &dev->resource[r + PCI_IOV_RESOURCES];
		base = of_read_number(&indexes[i], 2);
		size = of_read_number(&indexes[i + WDW_SIZE_PROPERTY], 2);
		res->name = pci_name(dev);
		res->start = base;
		res->end = base + size - 1;
		root = &iomem_resource;
		dev_dbg(&dev->dev,
			"pSeries IOV BAR %d: trying firmware assignment %pR\n",
			 r + PCI_IOV_RESOURCES, res);
		conflict = request_resource_conflict(root, res);
		if (conflict) {
			dev_info(&dev->dev,
				 "BAR %d: %pR conflicts with %s %pR\n",
				 r + PCI_IOV_RESOURCES, res,
				 conflict->name, conflict);
			res->flags |= IORESOURCE_UNSET;
		}
	}
}

686 687 688 689 690 691 692 693 694
static void pseries_disable_sriov_resources(struct pci_dev *pdev)
{
	int i;

	pci_warn(pdev, "No hypervisor support for SR-IOV on this device, IOV BARs disabled.\n");
	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
		pdev->resource[i + PCI_IOV_RESOURCES].flags = 0;
}

695 696 697 698 699 700 701
static void pseries_pci_fixup_resources(struct pci_dev *pdev)
{
	const int *indexes;
	struct device_node *dn = pci_device_to_OF_node(pdev);

	/*Firmware must support open sriov otherwise dont configure*/
	indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL);
702 703 704 705
	if (indexes)
		of_pci_set_vf_bar_size(pdev, indexes);
	else
		pseries_disable_sriov_resources(pdev);
706 707 708 709 710 711 712
}

static void pseries_pci_fixup_iov_resources(struct pci_dev *pdev)
{
	const int *indexes;
	struct device_node *dn = pci_device_to_OF_node(pdev);

713
	if (!pdev->is_physfn || pci_dev_is_added(pdev))
714 715 716
		return;
	/*Firmware must support open sriov otherwise dont configure*/
	indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL);
717 718 719 720
	if (indexes)
		of_pci_parse_iov_addrs(pdev, indexes);
	else
		pseries_disable_sriov_resources(pdev);
721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741
}

static resource_size_t pseries_pci_iov_resource_alignment(struct pci_dev *pdev,
							  int resno)
{
	const __be32 *reg;
	struct device_node *dn = pci_device_to_OF_node(pdev);

	/*Firmware must support open sriov otherwise report regular alignment*/
	reg = of_get_property(dn, "ibm,is-open-sriov-pf", NULL);
	if (!reg)
		return pci_iov_resource_size(pdev, resno);

	if (!pdev->is_physfn)
		return 0;
	return pseries_get_iov_fw_value(pdev,
					resno - PCI_IOV_RESOURCES,
					APERTURE_SIZE);
}
#endif

742 743
static void __init pSeries_setup_arch(void)
{
744
	set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
745

746
	/* Discover PIC type and setup ppc_md accordingly */
747
	smp_init_pseries();
748

749

L
Linus Torvalds 已提交
750 751 752 753 754 755 756 757 758
	/* openpic global configuration register (64-bit format). */
	/* openpic Interrupt Source Unit pointer (64-bit format). */
	/* python0 facility area (mmio) (64-bit format) REAL address. */

	/* init to some ~sane value until calibrate_delay() runs */
	loops_per_jiffy = 50000000;

	fwnmi_init();

759
	pseries_setup_rfi_flush();
760
	setup_stf_barrier();
761

762
	/* By default, only probe PCI (can be overridden by rtas_pci) */
763
	pci_add_flags(PCI_PROBE_ONLY);
764

L
Linus Torvalds 已提交
765 766 767
	/* Find and initialize PCI host bridges */
	init_pci_config_tokens();
	find_and_init_phbs();
768
	of_reconfig_notifier_register(&pci_dn_reconfig_nb);
L
Linus Torvalds 已提交
769 770 771

	pSeries_nvram_init();

772
	if (firmware_has_feature(FW_FEATURE_LPAR)) {
773
		vpa_init(boot_cpuid);
774
		ppc_md.power_save = pseries_lpar_idle;
775
		ppc_md.enable_pmcs = pseries_lpar_enable_pmcs;
776 777 778 779 780 781 782 783
#ifdef CONFIG_PCI_IOV
		ppc_md.pcibios_fixup_resources =
			pseries_pci_fixup_resources;
		ppc_md.pcibios_fixup_sriov =
			pseries_pci_fixup_iov_resources;
		ppc_md.pcibios_iov_resource_alignment =
			pseries_pci_iov_resource_alignment;
#endif
784 785
	} else {
		/* No special idle routine */
786
		ppc_md.enable_pmcs = power4_enable_pmcs;
787
	}
788

789
	ppc_md.pcibios_root_bridge_prepare = pseries_root_bridge_prepare;
L
Linus Torvalds 已提交
790 791
}

792 793 794 795 796 797
static void pseries_panic(char *str)
{
	panic_flush_kmsg_end();
	rtas_os_term(str);
}

L
Linus Torvalds 已提交
798 799 800
static int __init pSeries_init_panel(void)
{
	/* Manually leave the kernel version on the panel. */
801
#ifdef __BIG_ENDIAN__
L
Linus Torvalds 已提交
802
	ppc_md.progress("Linux ppc64\n", 0);
803 804 805
#else
	ppc_md.progress("Linux ppc64le\n", 0);
#endif
806
	ppc_md.progress(init_utsname()->version, 0);
L
Linus Torvalds 已提交
807 808 809

	return 0;
}
810
machine_arch_initcall(pseries, pSeries_init_panel);
L
Linus Torvalds 已提交
811

812
static int pseries_set_dabr(unsigned long dabr, unsigned long dabrx)
813
{
814
	return plpar_hcall_norets(H_SET_DABR, dabr);
815 816
}

817
static int pseries_set_xdabr(unsigned long dabr, unsigned long dabrx)
818
{
819 820 821 822
	/* Have to set at least one bit in the DABRX according to PAPR */
	if (dabrx == 0 && dabr == 0)
		dabrx = DABRX_USER;
	/* PAPR says we can only set kernel and user bits */
823
	dabrx &= DABRX_KERNEL | DABRX_USER;
824 825

	return plpar_hcall_norets(H_SET_XDABR, dabr, dabrx);
826
}
L
Linus Torvalds 已提交
827

828 829 830 831 832
static int pseries_set_dawr(unsigned long dawr, unsigned long dawrx)
{
	/* PAPR says we can't set HYP */
	dawrx &= ~DAWRX_HYP;

833
	return  plpar_set_watchpoint0(dawr, dawrx);
834 835
}

836 837 838
#define CMO_CHARACTERISTICS_TOKEN 44
#define CMO_MAXLENGTH 1026

839 840 841 842 843 844 845 846 847 848
void pSeries_coalesce_init(void)
{
	struct hvcall_mpp_x_data mpp_x_data;

	if (firmware_has_feature(FW_FEATURE_CMO) && !h_get_mpp_x(&mpp_x_data))
		powerpc_firmware_features |= FW_FEATURE_XCMO;
	else
		powerpc_firmware_features &= ~FW_FEATURE_XCMO;
}

849 850 851 852
/**
 * fw_cmo_feature_init - FW_FEATURE_CMO is not stored in ibm,hypertas-functions,
 * handle that here. (Stolen from parse_system_parameter_string)
 */
853
static void pSeries_cmo_feature_init(void)
854 855 856
{
	char *ptr, *key, *value, *end;
	int call_status;
857
	int page_order = IOMMU_PAGE_SHIFT_4K;
858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895

	pr_debug(" -> fw_cmo_feature_init()\n");
	spin_lock(&rtas_data_buf_lock);
	memset(rtas_data_buf, 0, RTAS_DATA_BUF_SIZE);
	call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
				NULL,
				CMO_CHARACTERISTICS_TOKEN,
				__pa(rtas_data_buf),
				RTAS_DATA_BUF_SIZE);

	if (call_status != 0) {
		spin_unlock(&rtas_data_buf_lock);
		pr_debug("CMO not available\n");
		pr_debug(" <- fw_cmo_feature_init()\n");
		return;
	}

	end = rtas_data_buf + CMO_MAXLENGTH - 2;
	ptr = rtas_data_buf + 2;	/* step over strlen value */
	key = value = ptr;

	while (*ptr && (ptr <= end)) {
		/* Separate the key and value by replacing '=' with '\0' and
		 * point the value at the string after the '='
		 */
		if (ptr[0] == '=') {
			ptr[0] = '\0';
			value = ptr + 1;
		} else if (ptr[0] == '\0' || ptr[0] == ',') {
			/* Terminate the string containing the key/value pair */
			ptr[0] = '\0';

			if (key == value) {
				pr_debug("Malformed key/value pair\n");
				/* Never found a '=', end processing */
				break;
			}

896 897 898 899
			if (0 == strcmp(key, "CMOPageSize"))
				page_order = simple_strtol(value, NULL, 10);
			else if (0 == strcmp(key, "PrPSP"))
				CMO_PrPSP = simple_strtol(value, NULL, 10);
900
			else if (0 == strcmp(key, "SecPSP"))
901
				CMO_SecPSP = simple_strtol(value, NULL, 10);
902 903 904 905 906
			value = key = ptr + 1;
		}
		ptr++;
	}

907 908 909 910 911 912 913
	/* Page size is returned as the power of 2 of the page size,
	 * convert to the page size in bytes before returning
	 */
	CMO_PageSize = 1 << page_order;
	pr_debug("CMO_PageSize = %lu\n", CMO_PageSize);

	if (CMO_PrPSP != -1 || CMO_SecPSP != -1) {
914
		pr_info("CMO enabled\n");
915 916
		pr_debug("CMO enabled, PrPSP=%d, SecPSP=%d\n", CMO_PrPSP,
		         CMO_SecPSP);
917
		powerpc_firmware_features |= FW_FEATURE_CMO;
918
		pSeries_coalesce_init();
919
	} else
920 921
		pr_debug("CMO not enabled, PrPSP=%d, SecPSP=%d\n", CMO_PrPSP,
		         CMO_SecPSP);
922 923 924 925
	spin_unlock(&rtas_data_buf_lock);
	pr_debug(" <- fw_cmo_feature_init()\n");
}

L
Linus Torvalds 已提交
926 927 928
/*
 * Early initialization.  Relocation is on but do not reference unbolted pages
 */
929
static void __init pseries_init(void)
L
Linus Torvalds 已提交
930
{
931
	pr_debug(" -> pseries_init()\n");
L
Linus Torvalds 已提交
932

933
#ifdef CONFIG_HVC_CONSOLE
934
	if (firmware_has_feature(FW_FEATURE_LPAR))
935 936
		hvc_vio_init_early();
#endif
M
Michael Neuling 已提交
937
	if (firmware_has_feature(FW_FEATURE_XDABR))
938
		ppc_md.set_dabr = pseries_set_xdabr;
M
Michael Neuling 已提交
939 940
	else if (firmware_has_feature(FW_FEATURE_DABR))
		ppc_md.set_dabr = pseries_set_dabr;
L
Linus Torvalds 已提交
941

942 943 944
	if (firmware_has_feature(FW_FEATURE_SET_MODE))
		ppc_md.set_dawr = pseries_set_dawr;

945
	pSeries_cmo_feature_init();
L
Linus Torvalds 已提交
946 947
	iommu_init_early_pSeries();

948
	pr_debug(" <- pseries_init()\n");
L
Linus Torvalds 已提交
949 950
}

951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978
/**
 * pseries_power_off - tell firmware about how to power off the system.
 *
 * This function calls either the power-off rtas token in normal cases
 * or the ibm,power-off-ups token (if present & requested) in case of
 * a power failure. If power-off token is used, power on will only be
 * possible with power button press. If ibm,power-off-ups token is used
 * it will allow auto poweron after power is restored.
 */
static void pseries_power_off(void)
{
	int rc;
	int rtas_poweroff_ups_token = rtas_token("ibm,power-off-ups");

	if (rtas_flash_term_hook)
		rtas_flash_term_hook(SYS_POWER_OFF);

	if (rtas_poweron_auto == 0 ||
		rtas_poweroff_ups_token == RTAS_UNKNOWN_SERVICE) {
		rc = rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1);
		printk(KERN_INFO "RTAS power-off returned %d\n", rc);
	} else {
		rc = rtas_call(rtas_poweroff_ups_token, 0, 1, NULL);
		printk(KERN_INFO "RTAS ibm,power-off-ups returned %d\n", rc);
	}
	for (;;);
}

979 980
static int __init pSeries_probe(void)
{
981
	if (!of_node_is_type(of_root, "chrp"))
L
Linus Torvalds 已提交
982 983
		return 0;

984 985 986
	/* Cell blades firmware claims to be chrp while it's not. Until this
	 * is fixed, we need to avoid those here.
	 */
987 988
	if (of_machine_is_compatible("IBM,CPBW-1.0") ||
	    of_machine_is_compatible("IBM,CBEA"))
989 990
		return 0;

991 992
	pm_power_off = pseries_power_off;

993 994
	pr_debug("Machine is%s LPAR !\n",
	         (powerpc_firmware_features & FW_FEATURE_LPAR) ? "" : " not");
995

996 997
	pseries_init();

L
Linus Torvalds 已提交
998 999 1000
	return 1;
}

1001 1002
static int pSeries_pci_probe_mode(struct pci_bus *bus)
{
1003
	if (firmware_has_feature(FW_FEATURE_LPAR))
1004 1005 1006 1007
		return PCI_PROBE_DEVTREE;
	return PCI_PROBE_NORMAL;
}

1008 1009 1010 1011
struct pci_controller_ops pseries_pci_controller_ops = {
	.probe_mode		= pSeries_pci_probe_mode,
};

1012 1013
define_machine(pseries) {
	.name			= "pSeries",
L
Linus Torvalds 已提交
1014 1015
	.probe			= pSeries_probe,
	.setup_arch		= pSeries_setup_arch,
1016
	.init_IRQ		= pseries_init_irq,
1017
	.show_cpuinfo		= pSeries_show_cpuinfo,
L
Linus Torvalds 已提交
1018 1019
	.log_error		= pSeries_log_error,
	.pcibios_fixup		= pSeries_final_fixup,
1020 1021
	.restart		= rtas_restart,
	.halt			= rtas_halt,
1022
	.panic			= pseries_panic,
1023 1024 1025
	.get_boot_time		= rtas_get_boot_time,
	.get_rtc_time		= rtas_get_rtc_time,
	.set_rtc_time		= rtas_set_rtc_time,
1026
	.calibrate_decr		= generic_calibrate_decr,
1027
	.progress		= rtas_progress,
L
Linus Torvalds 已提交
1028
	.system_reset_exception = pSeries_system_reset_exception,
1029
	.machine_check_early	= pseries_machine_check_realmode,
L
Linus Torvalds 已提交
1030
	.machine_check_exception = pSeries_machine_check_exception,
1031
#ifdef CONFIG_KEXEC_CORE
1032
	.machine_kexec          = pSeries_machine_kexec,
1033
	.kexec_cpu_down         = pseries_kexec_cpu_down,
1034
#endif
1035 1036 1037
#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
	.memory_block_size	= pseries_memory_block_size,
#endif
L
Linus Torvalds 已提交
1038
};