setup.c 26.6 KB
Newer Older
L
Linus Torvalds 已提交
1
/*
2
 *  64-bit pSeries and RS/6000 setup code.
L
Linus Torvalds 已提交
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
 *
 *  Copyright (C) 1995  Linus Torvalds
 *  Adapted from 'alpha' version by Gary Thomas
 *  Modified by Cort Dougan (cort@cs.nmt.edu)
 *  Modified by PPC64 Team, IBM Corp
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version
 * 2 of the License, or (at your option) any later version.
 */

/*
 * bootup setup stuff..
 */

19
#include <linux/cpu.h>
L
Linus Torvalds 已提交
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/stddef.h>
#include <linux/unistd.h>
#include <linux/user.h>
#include <linux/tty.h>
#include <linux/major.h>
#include <linux/interrupt.h>
#include <linux/reboot.h>
#include <linux/init.h>
#include <linux/ioport.h>
#include <linux/console.h>
#include <linux/pci.h>
35
#include <linux/utsname.h>
L
Linus Torvalds 已提交
36
#include <linux/adb.h>
37
#include <linux/export.h>
L
Linus Torvalds 已提交
38 39 40 41
#include <linux/delay.h>
#include <linux/irq.h>
#include <linux/seq_file.h>
#include <linux/root_dev.h>
42
#include <linux/of.h>
43
#include <linux/of_pci.h>
44
#include <linux/memblock.h>
L
Linus Torvalds 已提交
45 46 47 48 49 50 51 52 53 54 55 56 57 58

#include <asm/mmu.h>
#include <asm/processor.h>
#include <asm/io.h>
#include <asm/pgtable.h>
#include <asm/prom.h>
#include <asm/rtas.h>
#include <asm/pci-bridge.h>
#include <asm/iommu.h>
#include <asm/dma.h>
#include <asm/machdep.h>
#include <asm/irq.h>
#include <asm/time.h>
#include <asm/nvram.h>
59
#include <asm/pmc.h>
60
#include <asm/xics.h>
61
#include <asm/xive.h>
62
#include <asm/ppc-pci.h>
63 64
#include <asm/i8259.h>
#include <asm/udbg.h>
P
Paul Mackerras 已提交
65
#include <asm/smp.h>
66
#include <asm/firmware.h>
67
#include <asm/eeh.h>
68
#include <asm/reg.h>
69
#include <asm/plpar_wrappers.h>
70
#include <asm/kexec.h>
71
#include <asm/isa-bridge.h>
72
#include <asm/security_features.h>
73
#include <asm/asm-const.h>
L
Linus Torvalds 已提交
74

75
#include "pseries.h"
76

77 78
int CMO_PrPSP = -1;
int CMO_SecPSP = -1;
79
unsigned long CMO_PageSize = (ASM_CONST(1) << IOMMU_PAGE_SHIFT_4K);
A
Andrew Morton 已提交
80
EXPORT_SYMBOL(CMO_PageSize);
L
Linus Torvalds 已提交
81 82 83

int fwnmi_active;  /* TRUE if an FWNMI handler is present */

84
static void pSeries_show_cpuinfo(struct seq_file *m)
L
Linus Torvalds 已提交
85 86 87 88 89 90
{
	struct device_node *root;
	const char *model = "";

	root = of_find_node_by_path("/");
	if (root)
91
		model = of_get_property(root, "model", NULL);
L
Linus Torvalds 已提交
92 93
	seq_printf(m, "machine\t\t: CHRP %s\n", model);
	of_node_put(root);
94 95 96 97
	if (radix_enabled())
		seq_printf(m, "MMU\t\t: Radix\n");
	else
		seq_printf(m, "MMU\t\t: Hash\n");
L
Linus Torvalds 已提交
98 99 100 101 102 103 104
}

/* Initialize firmware assisted non-maskable interrupts if
 * the firmware supports this feature.
 */
static void __init fwnmi_init(void)
{
105
	unsigned long system_reset_addr, machine_check_addr;
106 107 108
	u8 *mce_data_buf;
	unsigned int i;
	int nr_cpus = num_possible_cpus();
109

L
Linus Torvalds 已提交
110 111 112
	int ibm_nmi_register = rtas_token("ibm,nmi-register");
	if (ibm_nmi_register == RTAS_UNKNOWN_SERVICE)
		return;
113 114 115 116 117 118 119 120

	/* If the kernel's not linked at zero we point the firmware at low
	 * addresses anyway, and use a trampoline to get to the real code. */
	system_reset_addr  = __pa(system_reset_fwnmi) - PHYSICAL_START;
	machine_check_addr = __pa(machine_check_fwnmi) - PHYSICAL_START;

	if (0 == rtas_call(ibm_nmi_register, 2, 1, NULL, system_reset_addr,
				machine_check_addr))
L
Linus Torvalds 已提交
121
		fwnmi_active = 1;
122 123 124 125 126 127 128 129 130 131 132 133

	/*
	 * Allocate a chunk for per cpu buffer to hold rtas errorlog.
	 * It will be used in real mode mce handler, hence it needs to be
	 * below RMA.
	 */
	mce_data_buf = __va(memblock_alloc_base(RTAS_ERROR_LOG_MAX * nr_cpus,
					RTAS_ERROR_LOG_MAX, ppc64_rma_size));
	for_each_possible_cpu(i) {
		paca_ptrs[i]->mce_data_buf = mce_data_buf +
						(RTAS_ERROR_LOG_MAX * i);
	}
L
Linus Torvalds 已提交
134 135
}

136
static void pseries_8259_cascade(struct irq_desc *desc)
137
{
138
	struct irq_chip *chip = irq_desc_get_chip(desc);
O
Olaf Hering 已提交
139
	unsigned int cascade_irq = i8259_irq();
140

141
	if (cascade_irq)
142
		generic_handle_irq(cascade_irq);
143 144

	chip->irq_eoi(&desc->irq_data);
145 146
}

147
static void __init pseries_setup_i8259_cascade(void)
148 149
{
	struct device_node *np, *old, *found = NULL;
150
	unsigned int cascade;
151 152
	const u32 *addrp;
	unsigned long intack = 0;
153
	int naddr;
154

155
	for_each_node_by_type(np, "interrupt-controller") {
156 157 158 159
		if (of_device_is_compatible(np, "chrp,iic")) {
			found = np;
			break;
		}
160 161
	}

162
	if (found == NULL) {
163
		printk(KERN_DEBUG "pic: no ISA interrupt controller\n");
164 165
		return;
	}
166

167
	cascade = irq_of_parse_and_map(found, 0);
168
	if (!cascade) {
169
		printk(KERN_ERR "pic: failed to map cascade interrupt");
170 171
		return;
	}
172
	pr_debug("pic: cascade mapped to irq %d\n", cascade);
173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189

	for (old = of_node_get(found); old != NULL ; old = np) {
		np = of_get_parent(old);
		of_node_put(old);
		if (np == NULL)
			break;
		if (strcmp(np->name, "pci") != 0)
			continue;
		addrp = of_get_property(np, "8259-interrupt-acknowledge", NULL);
		if (addrp == NULL)
			continue;
		naddr = of_n_addr_cells(np);
		intack = addrp[naddr-1];
		if (naddr > 1)
			intack |= ((unsigned long)addrp[naddr-2]) << 32;
	}
	if (intack)
190
		printk(KERN_DEBUG "pic: PCI 8259 intack at 0x%016lx\n", intack);
191 192
	i8259_init(found, intack);
	of_node_put(found);
193
	irq_set_chained_handler(cascade, pseries_8259_cascade);
194 195
}

196
static void __init pseries_init_irq(void)
197
{
198 199 200 201 202
	/* Try using a XIVE if available, otherwise use a XICS */
	if (!xive_spapr_init()) {
		xics_init();
		pseries_setup_i8259_cascade();
	}
203 204
}

205 206 207 208 209 210 211 212 213
static void pseries_lpar_enable_pmcs(void)
{
	unsigned long set, reset;

	set = 1UL << 63;
	reset = 0;
	plpar_hcall_norets(H_PERFMON, set, reset);
}

214
static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *data)
215
{
216
	struct of_reconfig_data *rd = data;
217 218
	struct device_node *parent, *np = rd->dn;
	struct pci_dn *pdn;
219 220 221
	int err = NOTIFY_OK;

	switch (action) {
222
	case OF_RECONFIG_ATTACH_NODE:
223 224
		parent = of_get_parent(np);
		pdn = parent ? PCI_DN(parent) : NULL;
G
Gavin Shan 已提交
225
		if (pdn)
226
			pci_add_device_node_info(pdn->phb, np);
227 228

		of_node_put(parent);
229
		break;
230
	case OF_RECONFIG_DETACH_NODE:
231 232 233
		pdn = PCI_DN(np);
		if (pdn)
			list_del(&pdn->list);
234
		break;
235 236 237 238 239 240 241 242 243 244 245
	default:
		err = NOTIFY_DONE;
		break;
	}
	return err;
}

static struct notifier_block pci_dn_reconfig_nb = {
	.notifier_call = pci_dn_reconfig_notifier,
};

246 247
struct kmem_cache *dtl_cache;

248
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
249 250 251 252 253 254 255 256 257 258 259 260 261 262
/*
 * Allocate space for the dispatch trace log for all possible cpus
 * and register the buffers with the hypervisor.  This is used for
 * computing time stolen by the hypervisor.
 */
static int alloc_dispatch_logs(void)
{
	int cpu, ret;
	struct paca_struct *pp;
	struct dtl_entry *dtl;

	if (!firmware_has_feature(FW_FEATURE_SPLPAR))
		return 0;

263
	if (!dtl_cache)
264 265
		return 0;

266
	for_each_possible_cpu(cpu) {
267
		pp = paca_ptrs[cpu];
268
		dtl = kmem_cache_alloc(dtl_cache, GFP_KERNEL);
269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288
		if (!dtl) {
			pr_warn("Failed to allocate dispatch trace log for cpu %d\n",
				cpu);
			pr_warn("Stolen time statistics will be unreliable\n");
			break;
		}

		pp->dtl_ridx = 0;
		pp->dispatch_log = dtl;
		pp->dispatch_log_end = dtl + N_DISPATCH_LOG;
		pp->dtl_curr = dtl;
	}

	/* Register the DTL for the current (boot) cpu */
	dtl = get_paca()->dispatch_log;
	get_paca()->dtl_ridx = 0;
	get_paca()->dtl_curr = dtl;
	get_paca()->lppaca_ptr->dtl_idx = 0;

	/* hypervisor reads buffer length from this field */
289
	dtl->enqueue_to_dispatch_time = cpu_to_be32(DISPATCH_LOG_BYTES);
290 291
	ret = register_dtl(hard_smp_processor_id(), __pa(dtl));
	if (ret)
292 293 294
		pr_err("WARNING: DTL registration of cpu %d (hw %d) failed "
		       "with %d\n", smp_processor_id(),
		       hard_smp_processor_id(), ret);
295 296 297 298
	get_paca()->lppaca_ptr->dtl_enable_mask = 2;

	return 0;
}
299
#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
300 301 302 303
static inline int alloc_dispatch_logs(void)
{
	return 0;
}
304
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
305

306 307 308 309 310 311 312 313 314 315 316 317
static int alloc_dispatch_log_kmem_cache(void)
{
	dtl_cache = kmem_cache_create("dtl", DISPATCH_LOG_BYTES,
						DISPATCH_LOG_BYTES, 0, NULL);
	if (!dtl_cache) {
		pr_warn("Failed to create dispatch trace log buffer cache\n");
		pr_warn("Stolen time statistics will be unreliable\n");
		return 0;
	}

	return alloc_dispatch_logs();
}
318
machine_early_initcall(pseries, alloc_dispatch_log_kmem_cache);
319

320
static void pseries_lpar_idle(void)
321
{
322 323
	/*
	 * Default handler to go into low thread priority and possibly
M
Michael Ellerman 已提交
324
	 * low power mode by ceding processor to hypervisor
325
	 */
326 327 328 329 330 331 332 333 334 335 336 337 338 339

	/* Indicate to hypervisor that we are idle. */
	get_lppaca()->idle = 1;

	/*
	 * Yield the processor to the hypervisor.  We return if
	 * an external interrupt occurs (which are driven prior
	 * to returning here) or if a prod occurs from another
	 * processor. When returning here, external interrupts
	 * are enabled.
	 */
	cede_processor();

	get_lppaca()->idle = 0;
340 341
}

342 343 344 345 346 347 348
/*
 * Enable relocation on during exceptions. This has partition wide scope and
 * may take a while to complete, if it takes longer than one second we will
 * just give up rather than wasting any more time on this - if that turns out
 * to ever be a problem in practice we can move this into a kernel thread to
 * finish off the process later in boot.
 */
349
void pseries_enable_reloc_on_exc(void)
350 351 352 353 354 355
{
	long rc;
	unsigned int delay, total_delay = 0;

	while (1) {
		rc = enable_reloc_on_exceptions();
356 357 358 359 360 361 362 363 364 365
		if (!H_IS_LONG_BUSY(rc)) {
			if (rc == H_P2) {
				pr_info("Relocation on exceptions not"
					" supported\n");
			} else if (rc != H_SUCCESS) {
				pr_warn("Unable to enable relocation"
					" on exceptions: %ld\n", rc);
			}
			break;
		}
366 367 368 369 370 371 372

		delay = get_longbusy_msecs(rc);
		total_delay += delay;
		if (total_delay > 1000) {
			pr_warn("Warning: Giving up waiting to enable "
				"relocation on exceptions (%u msec)!\n",
				total_delay);
373
			return;
374 375 376 377 378
		}

		mdelay(delay);
	}
}
379
EXPORT_SYMBOL(pseries_enable_reloc_on_exc);
380

381
void pseries_disable_reloc_on_exc(void)
382 383 384 385 386 387
{
	long rc;

	while (1) {
		rc = disable_reloc_on_exceptions();
		if (!H_IS_LONG_BUSY(rc))
388
			break;
389 390
		mdelay(get_longbusy_msecs(rc));
	}
391
	if (rc != H_SUCCESS)
392 393
		pr_warn("Warning: Failed to disable relocation on exceptions: %ld\n",
			rc);
394
}
395
EXPORT_SYMBOL(pseries_disable_reloc_on_exc);
396

397
#ifdef CONFIG_KEXEC_CORE
398 399
static void pSeries_machine_kexec(struct kimage *image)
{
400 401
	if (firmware_has_feature(FW_FEATURE_SET_MODE))
		pseries_disable_reloc_on_exc();
402 403 404 405 406

	default_machine_kexec(image);
}
#endif

407
#ifdef __LITTLE_ENDIAN__
408
void pseries_big_endian_exceptions(void)
409 410 411 412 413 414
{
	long rc;

	while (1) {
		rc = enable_big_endian_exceptions();
		if (!H_IS_LONG_BUSY(rc))
415
			break;
416 417
		mdelay(get_longbusy_msecs(rc));
	}
418 419 420 421 422 423 424 425 426 427 428 429 430 431

	/*
	 * At this point it is unlikely panic() will get anything
	 * out to the user, since this is called very late in kexec
	 * but at least this will stop us from continuing on further
	 * and creating an even more difficult to debug situation.
	 *
	 * There is a known problem when kdump'ing, if cpus are offline
	 * the above call will fail. Rather than panicking again, keep
	 * going and hope the kdump kernel is also little endian, which
	 * it usually is.
	 */
	if (rc && !kdump_in_progress())
		panic("Could not enable big endian exceptions");
432 433
}

434
void pseries_little_endian_exceptions(void)
435 436 437 438 439 440
{
	long rc;

	while (1) {
		rc = enable_little_endian_exceptions();
		if (!H_IS_LONG_BUSY(rc))
441
			break;
442 443
		mdelay(get_longbusy_msecs(rc));
	}
444 445 446 447
	if (rc) {
		ppc_md.progress("H_SET_MODE LE exception fail", 0);
		panic("Could not enable little endian exceptions");
	}
448 449 450
}
#endif

451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467
static void __init find_and_init_phbs(void)
{
	struct device_node *node;
	struct pci_controller *phb;
	struct device_node *root = of_find_node_by_path("/");

	for_each_child_of_node(root, node) {
		if (node->type == NULL || (strcmp(node->type, "pci") != 0 &&
					   strcmp(node->type, "pciex") != 0))
			continue;

		phb = pcibios_alloc_controller(node);
		if (!phb)
			continue;
		rtas_setup_phb(phb);
		pci_process_bridge_OF_ranges(phb, node, 0);
		isa_bridge_find_early(phb);
468
		phb->controller_ops = pseries_pci_controller_ops;
469 470 471 472 473 474 475 476
	}

	of_node_put(root);

	/*
	 * PCI_PROBE_ONLY and PCI_REASSIGN_ALL_BUS can be set via properties
	 * in chosen.
	 */
477
	of_pci_check_probe_only();
478 479
}

480 481
static void init_cpu_char_feature_flags(struct h_cpu_char_result *result)
{
482 483 484 485
	/*
	 * The features below are disabled by default, so we instead look to see
	 * if firmware has *enabled* them, and set them if so.
	 */
486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507
	if (result->character & H_CPU_CHAR_SPEC_BAR_ORI31)
		security_ftr_set(SEC_FTR_SPEC_BAR_ORI31);

	if (result->character & H_CPU_CHAR_BCCTRL_SERIALISED)
		security_ftr_set(SEC_FTR_BCCTRL_SERIALISED);

	if (result->character & H_CPU_CHAR_L1D_FLUSH_ORI30)
		security_ftr_set(SEC_FTR_L1D_FLUSH_ORI30);

	if (result->character & H_CPU_CHAR_L1D_FLUSH_TRIG2)
		security_ftr_set(SEC_FTR_L1D_FLUSH_TRIG2);

	if (result->character & H_CPU_CHAR_L1D_THREAD_PRIV)
		security_ftr_set(SEC_FTR_L1D_THREAD_PRIV);

	if (result->character & H_CPU_CHAR_COUNT_CACHE_DISABLED)
		security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED);

	/*
	 * The features below are enabled by default, so we instead look to see
	 * if firmware has *disabled* them, and clear them if so.
	 */
508
	if (!(result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY))
509 510
		security_ftr_clear(SEC_FTR_FAVOUR_SECURITY);

511
	if (!(result->behaviour & H_CPU_BEHAV_L1D_FLUSH_PR))
512 513
		security_ftr_clear(SEC_FTR_L1D_FLUSH_PR);

514
	if (!(result->behaviour & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR))
515 516 517
		security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR);
}

518
void pseries_setup_rfi_flush(void)
519 520 521 522 523 524
{
	struct h_cpu_char_result result;
	enum l1d_flush_type types;
	bool enable;
	long rc;

525 526 527 528 529 530 531
	/*
	 * Set features to the defaults assumed by init_cpu_char_feature_flags()
	 * so it can set/clear again any features that might have changed after
	 * migration, and in case the hypercall fails and it is not even called.
	 */
	powerpc_security_features = SEC_FTR_DEFAULT;

532
	rc = plpar_get_cpu_characteristics(&result);
533
	if (rc == H_SUCCESS)
534 535 536 537 538 539 540 541
		init_cpu_char_feature_flags(&result);

	/*
	 * We're the guest so this doesn't apply to us, clear it to simplify
	 * handling of it elsewhere.
	 */
	security_ftr_clear(SEC_FTR_L1D_FLUSH_HV);

542 543 544 545 546 547 548 549 550 551 552
	types = L1D_FLUSH_FALLBACK;

	if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_TRIG2))
		types |= L1D_FLUSH_MTTRIG;

	if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_ORI30))
		types |= L1D_FLUSH_ORI;

	enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \
		 security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR);

553 554 555
	setup_rfi_flush(types, enable);
}

556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664
#ifdef CONFIG_PCI_IOV
enum rtas_iov_fw_value_map {
	NUM_RES_PROPERTY  = 0, /* Number of Resources */
	LOW_INT           = 1, /* Lowest 32 bits of Address */
	START_OF_ENTRIES  = 2, /* Always start of entry */
	APERTURE_PROPERTY = 2, /* Start of entry+ to  Aperture Size */
	WDW_SIZE_PROPERTY = 4, /* Start of entry+ to Window Size */
	NEXT_ENTRY        = 7  /* Go to next entry on array */
};

enum get_iov_fw_value_index {
	BAR_ADDRS     = 1,    /*  Get Bar Address */
	APERTURE_SIZE = 2,    /*  Get Aperture Size */
	WDW_SIZE      = 3     /*  Get Window Size */
};

resource_size_t pseries_get_iov_fw_value(struct pci_dev *dev, int resno,
					 enum get_iov_fw_value_index value)
{
	const int *indexes;
	struct device_node *dn = pci_device_to_OF_node(dev);
	int i, num_res, ret = 0;

	indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL);
	if (!indexes)
		return  0;

	/*
	 * First element in the array is the number of Bars
	 * returned.  Search through the list to find the matching
	 * bar
	 */
	num_res = of_read_number(&indexes[NUM_RES_PROPERTY], 1);
	if (resno >= num_res)
		return 0; /* or an errror */

	i = START_OF_ENTRIES + NEXT_ENTRY * resno;
	switch (value) {
	case BAR_ADDRS:
		ret = of_read_number(&indexes[i], 2);
		break;
	case APERTURE_SIZE:
		ret = of_read_number(&indexes[i + APERTURE_PROPERTY], 2);
		break;
	case WDW_SIZE:
		ret = of_read_number(&indexes[i + WDW_SIZE_PROPERTY], 2);
		break;
	}

	return ret;
}

void of_pci_set_vf_bar_size(struct pci_dev *dev, const int *indexes)
{
	struct resource *res;
	resource_size_t base, size;
	int i, r, num_res;

	num_res = of_read_number(&indexes[NUM_RES_PROPERTY], 1);
	num_res = min_t(int, num_res, PCI_SRIOV_NUM_BARS);
	for (i = START_OF_ENTRIES, r = 0; r < num_res && r < PCI_SRIOV_NUM_BARS;
	     i += NEXT_ENTRY, r++) {
		res = &dev->resource[r + PCI_IOV_RESOURCES];
		base = of_read_number(&indexes[i], 2);
		size = of_read_number(&indexes[i + APERTURE_PROPERTY], 2);
		res->flags = pci_parse_of_flags(of_read_number
						(&indexes[i + LOW_INT], 1), 0);
		res->flags |= (IORESOURCE_MEM_64 | IORESOURCE_PCI_FIXED);
		res->name = pci_name(dev);
		res->start = base;
		res->end = base + size - 1;
	}
}

void of_pci_parse_iov_addrs(struct pci_dev *dev, const int *indexes)
{
	struct resource *res, *root, *conflict;
	resource_size_t base, size;
	int i, r, num_res;

	/*
	 * First element in the array is the number of Bars
	 * returned.  Search through the list to find the matching
	 * bars assign them from firmware into resources structure.
	 */
	num_res = of_read_number(&indexes[NUM_RES_PROPERTY], 1);
	for (i = START_OF_ENTRIES, r = 0; r < num_res && r < PCI_SRIOV_NUM_BARS;
	     i += NEXT_ENTRY, r++) {
		res = &dev->resource[r + PCI_IOV_RESOURCES];
		base = of_read_number(&indexes[i], 2);
		size = of_read_number(&indexes[i + WDW_SIZE_PROPERTY], 2);
		res->name = pci_name(dev);
		res->start = base;
		res->end = base + size - 1;
		root = &iomem_resource;
		dev_dbg(&dev->dev,
			"pSeries IOV BAR %d: trying firmware assignment %pR\n",
			 r + PCI_IOV_RESOURCES, res);
		conflict = request_resource_conflict(root, res);
		if (conflict) {
			dev_info(&dev->dev,
				 "BAR %d: %pR conflicts with %s %pR\n",
				 r + PCI_IOV_RESOURCES, res,
				 conflict->name, conflict);
			res->flags |= IORESOURCE_UNSET;
		}
	}
}

665 666 667 668 669 670 671 672 673
static void pseries_disable_sriov_resources(struct pci_dev *pdev)
{
	int i;

	pci_warn(pdev, "No hypervisor support for SR-IOV on this device, IOV BARs disabled.\n");
	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
		pdev->resource[i + PCI_IOV_RESOURCES].flags = 0;
}

674 675 676 677 678 679 680
static void pseries_pci_fixup_resources(struct pci_dev *pdev)
{
	const int *indexes;
	struct device_node *dn = pci_device_to_OF_node(pdev);

	/*Firmware must support open sriov otherwise dont configure*/
	indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL);
681 682 683 684
	if (indexes)
		of_pci_set_vf_bar_size(pdev, indexes);
	else
		pseries_disable_sriov_resources(pdev);
685 686 687 688 689 690 691 692 693 694 695
}

static void pseries_pci_fixup_iov_resources(struct pci_dev *pdev)
{
	const int *indexes;
	struct device_node *dn = pci_device_to_OF_node(pdev);

	if (!pdev->is_physfn || pdev->is_added)
		return;
	/*Firmware must support open sriov otherwise dont configure*/
	indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL);
696 697 698 699
	if (indexes)
		of_pci_parse_iov_addrs(pdev, indexes);
	else
		pseries_disable_sriov_resources(pdev);
700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720
}

static resource_size_t pseries_pci_iov_resource_alignment(struct pci_dev *pdev,
							  int resno)
{
	const __be32 *reg;
	struct device_node *dn = pci_device_to_OF_node(pdev);

	/*Firmware must support open sriov otherwise report regular alignment*/
	reg = of_get_property(dn, "ibm,is-open-sriov-pf", NULL);
	if (!reg)
		return pci_iov_resource_size(pdev, resno);

	if (!pdev->is_physfn)
		return 0;
	return pseries_get_iov_fw_value(pdev,
					resno - PCI_IOV_RESOURCES,
					APERTURE_SIZE);
}
#endif

721 722
static void __init pSeries_setup_arch(void)
{
723
	set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
724

725
	/* Discover PIC type and setup ppc_md accordingly */
726
	smp_init_pseries();
727

728

L
Linus Torvalds 已提交
729 730 731 732 733 734 735 736 737
	/* openpic global configuration register (64-bit format). */
	/* openpic Interrupt Source Unit pointer (64-bit format). */
	/* python0 facility area (mmio) (64-bit format) REAL address. */

	/* init to some ~sane value until calibrate_delay() runs */
	loops_per_jiffy = 50000000;

	fwnmi_init();

738
	pseries_setup_rfi_flush();
739
	setup_stf_barrier();
740

741
	/* By default, only probe PCI (can be overridden by rtas_pci) */
742
	pci_add_flags(PCI_PROBE_ONLY);
743

L
Linus Torvalds 已提交
744 745 746
	/* Find and initialize PCI host bridges */
	init_pci_config_tokens();
	find_and_init_phbs();
747
	of_reconfig_notifier_register(&pci_dn_reconfig_nb);
L
Linus Torvalds 已提交
748 749 750

	pSeries_nvram_init();

751
	if (firmware_has_feature(FW_FEATURE_LPAR)) {
752
		vpa_init(boot_cpuid);
753
		ppc_md.power_save = pseries_lpar_idle;
754
		ppc_md.enable_pmcs = pseries_lpar_enable_pmcs;
755 756 757 758 759 760 761 762
#ifdef CONFIG_PCI_IOV
		ppc_md.pcibios_fixup_resources =
			pseries_pci_fixup_resources;
		ppc_md.pcibios_fixup_sriov =
			pseries_pci_fixup_iov_resources;
		ppc_md.pcibios_iov_resource_alignment =
			pseries_pci_iov_resource_alignment;
#endif
763 764
	} else {
		/* No special idle routine */
765
		ppc_md.enable_pmcs = power4_enable_pmcs;
766
	}
767

768
	ppc_md.pcibios_root_bridge_prepare = pseries_root_bridge_prepare;
L
Linus Torvalds 已提交
769 770
}

771 772 773 774 775 776
static void pseries_panic(char *str)
{
	panic_flush_kmsg_end();
	rtas_os_term(str);
}

L
Linus Torvalds 已提交
777 778 779
static int __init pSeries_init_panel(void)
{
	/* Manually leave the kernel version on the panel. */
780
#ifdef __BIG_ENDIAN__
L
Linus Torvalds 已提交
781
	ppc_md.progress("Linux ppc64\n", 0);
782 783 784
#else
	ppc_md.progress("Linux ppc64le\n", 0);
#endif
785
	ppc_md.progress(init_utsname()->version, 0);
L
Linus Torvalds 已提交
786 787 788

	return 0;
}
789
machine_arch_initcall(pseries, pSeries_init_panel);
L
Linus Torvalds 已提交
790

791
static int pseries_set_dabr(unsigned long dabr, unsigned long dabrx)
792
{
793
	return plpar_hcall_norets(H_SET_DABR, dabr);
794 795
}

796
static int pseries_set_xdabr(unsigned long dabr, unsigned long dabrx)
797
{
798 799 800 801
	/* Have to set at least one bit in the DABRX according to PAPR */
	if (dabrx == 0 && dabr == 0)
		dabrx = DABRX_USER;
	/* PAPR says we can only set kernel and user bits */
802
	dabrx &= DABRX_KERNEL | DABRX_USER;
803 804

	return plpar_hcall_norets(H_SET_XDABR, dabr, dabrx);
805
}
L
Linus Torvalds 已提交
806

807 808 809 810 811
static int pseries_set_dawr(unsigned long dawr, unsigned long dawrx)
{
	/* PAPR says we can't set HYP */
	dawrx &= ~DAWRX_HYP;

812
	return  plpar_set_watchpoint0(dawr, dawrx);
813 814
}

815 816 817
#define CMO_CHARACTERISTICS_TOKEN 44
#define CMO_MAXLENGTH 1026

818 819 820 821 822 823 824 825 826 827
void pSeries_coalesce_init(void)
{
	struct hvcall_mpp_x_data mpp_x_data;

	if (firmware_has_feature(FW_FEATURE_CMO) && !h_get_mpp_x(&mpp_x_data))
		powerpc_firmware_features |= FW_FEATURE_XCMO;
	else
		powerpc_firmware_features &= ~FW_FEATURE_XCMO;
}

828 829 830 831
/**
 * fw_cmo_feature_init - FW_FEATURE_CMO is not stored in ibm,hypertas-functions,
 * handle that here. (Stolen from parse_system_parameter_string)
 */
832
static void pSeries_cmo_feature_init(void)
833 834 835
{
	char *ptr, *key, *value, *end;
	int call_status;
836
	int page_order = IOMMU_PAGE_SHIFT_4K;
837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874

	pr_debug(" -> fw_cmo_feature_init()\n");
	spin_lock(&rtas_data_buf_lock);
	memset(rtas_data_buf, 0, RTAS_DATA_BUF_SIZE);
	call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
				NULL,
				CMO_CHARACTERISTICS_TOKEN,
				__pa(rtas_data_buf),
				RTAS_DATA_BUF_SIZE);

	if (call_status != 0) {
		spin_unlock(&rtas_data_buf_lock);
		pr_debug("CMO not available\n");
		pr_debug(" <- fw_cmo_feature_init()\n");
		return;
	}

	end = rtas_data_buf + CMO_MAXLENGTH - 2;
	ptr = rtas_data_buf + 2;	/* step over strlen value */
	key = value = ptr;

	while (*ptr && (ptr <= end)) {
		/* Separate the key and value by replacing '=' with '\0' and
		 * point the value at the string after the '='
		 */
		if (ptr[0] == '=') {
			ptr[0] = '\0';
			value = ptr + 1;
		} else if (ptr[0] == '\0' || ptr[0] == ',') {
			/* Terminate the string containing the key/value pair */
			ptr[0] = '\0';

			if (key == value) {
				pr_debug("Malformed key/value pair\n");
				/* Never found a '=', end processing */
				break;
			}

875 876 877 878
			if (0 == strcmp(key, "CMOPageSize"))
				page_order = simple_strtol(value, NULL, 10);
			else if (0 == strcmp(key, "PrPSP"))
				CMO_PrPSP = simple_strtol(value, NULL, 10);
879
			else if (0 == strcmp(key, "SecPSP"))
880
				CMO_SecPSP = simple_strtol(value, NULL, 10);
881 882 883 884 885
			value = key = ptr + 1;
		}
		ptr++;
	}

886 887 888 889 890 891 892
	/* Page size is returned as the power of 2 of the page size,
	 * convert to the page size in bytes before returning
	 */
	CMO_PageSize = 1 << page_order;
	pr_debug("CMO_PageSize = %lu\n", CMO_PageSize);

	if (CMO_PrPSP != -1 || CMO_SecPSP != -1) {
893
		pr_info("CMO enabled\n");
894 895
		pr_debug("CMO enabled, PrPSP=%d, SecPSP=%d\n", CMO_PrPSP,
		         CMO_SecPSP);
896
		powerpc_firmware_features |= FW_FEATURE_CMO;
897
		pSeries_coalesce_init();
898
	} else
899 900
		pr_debug("CMO not enabled, PrPSP=%d, SecPSP=%d\n", CMO_PrPSP,
		         CMO_SecPSP);
901 902 903 904
	spin_unlock(&rtas_data_buf_lock);
	pr_debug(" <- fw_cmo_feature_init()\n");
}

L
Linus Torvalds 已提交
905 906 907
/*
 * Early initialization.  Relocation is on but do not reference unbolted pages
 */
908
static void __init pseries_init(void)
L
Linus Torvalds 已提交
909
{
910
	pr_debug(" -> pseries_init()\n");
L
Linus Torvalds 已提交
911

912
#ifdef CONFIG_HVC_CONSOLE
913
	if (firmware_has_feature(FW_FEATURE_LPAR))
914 915
		hvc_vio_init_early();
#endif
M
Michael Neuling 已提交
916
	if (firmware_has_feature(FW_FEATURE_XDABR))
917
		ppc_md.set_dabr = pseries_set_xdabr;
M
Michael Neuling 已提交
918 919
	else if (firmware_has_feature(FW_FEATURE_DABR))
		ppc_md.set_dabr = pseries_set_dabr;
L
Linus Torvalds 已提交
920

921 922 923
	if (firmware_has_feature(FW_FEATURE_SET_MODE))
		ppc_md.set_dawr = pseries_set_dawr;

924
	pSeries_cmo_feature_init();
L
Linus Torvalds 已提交
925 926
	iommu_init_early_pSeries();

927
	pr_debug(" <- pseries_init()\n");
L
Linus Torvalds 已提交
928 929
}

930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957
/**
 * pseries_power_off - tell firmware about how to power off the system.
 *
 * This function calls either the power-off rtas token in normal cases
 * or the ibm,power-off-ups token (if present & requested) in case of
 * a power failure. If power-off token is used, power on will only be
 * possible with power button press. If ibm,power-off-ups token is used
 * it will allow auto poweron after power is restored.
 */
static void pseries_power_off(void)
{
	int rc;
	int rtas_poweroff_ups_token = rtas_token("ibm,power-off-ups");

	if (rtas_flash_term_hook)
		rtas_flash_term_hook(SYS_POWER_OFF);

	if (rtas_poweron_auto == 0 ||
		rtas_poweroff_ups_token == RTAS_UNKNOWN_SERVICE) {
		rc = rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1);
		printk(KERN_INFO "RTAS power-off returned %d\n", rc);
	} else {
		rc = rtas_call(rtas_poweroff_ups_token, 0, 1, NULL);
		printk(KERN_INFO "RTAS ibm,power-off-ups returned %d\n", rc);
	}
	for (;;);
}

958 959
static int __init pSeries_probe(void)
{
960
	const char *dtype = of_get_property(of_root, "device_type", NULL);
961

962 963 964
 	if (dtype == NULL)
 		return 0;
 	if (strcmp(dtype, "chrp"))
L
Linus Torvalds 已提交
965 966
		return 0;

967 968 969
	/* Cell blades firmware claims to be chrp while it's not. Until this
	 * is fixed, we need to avoid those here.
	 */
970 971
	if (of_machine_is_compatible("IBM,CPBW-1.0") ||
	    of_machine_is_compatible("IBM,CBEA"))
972 973
		return 0;

974 975
	pm_power_off = pseries_power_off;

976 977
	pr_debug("Machine is%s LPAR !\n",
	         (powerpc_firmware_features & FW_FEATURE_LPAR) ? "" : " not");
978

979 980
	pseries_init();

L
Linus Torvalds 已提交
981 982 983
	return 1;
}

984 985
static int pSeries_pci_probe_mode(struct pci_bus *bus)
{
986
	if (firmware_has_feature(FW_FEATURE_LPAR))
987 988 989 990
		return PCI_PROBE_DEVTREE;
	return PCI_PROBE_NORMAL;
}

991 992 993 994
struct pci_controller_ops pseries_pci_controller_ops = {
	.probe_mode		= pSeries_pci_probe_mode,
};

995 996
define_machine(pseries) {
	.name			= "pSeries",
L
Linus Torvalds 已提交
997 998
	.probe			= pSeries_probe,
	.setup_arch		= pSeries_setup_arch,
999
	.init_IRQ		= pseries_init_irq,
1000
	.show_cpuinfo		= pSeries_show_cpuinfo,
L
Linus Torvalds 已提交
1001 1002
	.log_error		= pSeries_log_error,
	.pcibios_fixup		= pSeries_final_fixup,
1003 1004
	.restart		= rtas_restart,
	.halt			= rtas_halt,
1005
	.panic			= pseries_panic,
1006 1007 1008
	.get_boot_time		= rtas_get_boot_time,
	.get_rtc_time		= rtas_get_rtc_time,
	.set_rtc_time		= rtas_set_rtc_time,
1009
	.calibrate_decr		= generic_calibrate_decr,
1010
	.progress		= rtas_progress,
L
Linus Torvalds 已提交
1011 1012
	.system_reset_exception = pSeries_system_reset_exception,
	.machine_check_exception = pSeries_machine_check_exception,
1013
#ifdef CONFIG_KEXEC_CORE
1014
	.machine_kexec          = pSeries_machine_kexec,
1015
	.kexec_cpu_down         = pseries_kexec_cpu_down,
1016
#endif
1017 1018 1019
#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
	.memory_block_size	= pseries_memory_block_size,
#endif
L
Linus Torvalds 已提交
1020
};