pci.c 22.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
/*
 * Support PCI/PCIe on PowerNV platforms
 *
 * Currently supports only P5IOC2
 *
 * Copyright 2011 Benjamin Herrenschmidt, IBM Corp.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version
 * 2 of the License, or (at your option) any later version.
 */

#include <linux/kernel.h>
#include <linux/pci.h>
#include <linux/delay.h>
#include <linux/string.h>
#include <linux/init.h>
#include <linux/bootmem.h>
#include <linux/irq.h>
#include <linux/io.h>
22
#include <linux/msi.h>
23
#include <linux/iommu.h>
24 25 26 27 28 29

#include <asm/sections.h>
#include <asm/io.h>
#include <asm/prom.h>
#include <asm/pci-bridge.h>
#include <asm/machdep.h>
30
#include <asm/msi_bitmap.h>
31 32 33 34
#include <asm/ppc-pci.h>
#include <asm/opal.h>
#include <asm/iommu.h>
#include <asm/tce.h>
35
#include <asm/firmware.h>
36 37
#include <asm/eeh_event.h>
#include <asm/eeh.h>
38 39 40 41

#include "powernv.h"
#include "pci.h"

42 43
/* Delay in usec */
#define PCI_RESET_DELAY_US	3000000
44 45 46 47

#define cfg_dbg(fmt...)	do { } while(0)
//#define cfg_dbg(fmt...)	printk(fmt)

48 49 50 51 52
#ifdef CONFIG_PCI_MSI
static int pnv_msi_check_device(struct pci_dev* pdev, int nvec, int type)
{
	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
	struct pnv_phb *phb = hose->private_data;
53 54 55 56
	struct pci_dn *pdn = pci_get_pdn(pdev);

	if (pdn && pdn->force_32bit_msi && !phb->msi32_support)
		return -ENODEV;
57

58
	return (phb && phb->msi_bmp.bitmap) ? 0 : -ENODEV;
59 60 61 62 63 64 65 66
}

static int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
{
	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
	struct pnv_phb *phb = hose->private_data;
	struct msi_desc *entry;
	struct msi_msg msg;
67 68
	int hwirq;
	unsigned int virq;
69 70 71 72 73 74 75 76 77 78 79
	int rc;

	if (WARN_ON(!phb))
		return -ENODEV;

	list_for_each_entry(entry, &pdev->msi_list, list) {
		if (!entry->msi_attrib.is_64 && !phb->msi32_support) {
			pr_warn("%s: Supports only 64-bit MSIs\n",
				pci_name(pdev));
			return -ENXIO;
		}
80 81
		hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, 1);
		if (hwirq < 0) {
82 83 84 85
			pr_warn("%s: Failed to find a free MSI\n",
				pci_name(pdev));
			return -ENOSPC;
		}
86
		virq = irq_create_mapping(NULL, phb->msi_base + hwirq);
87 88 89
		if (virq == NO_IRQ) {
			pr_warn("%s: Failed to map MSI to linux irq\n",
				pci_name(pdev));
90
			msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq, 1);
91 92
			return -ENOMEM;
		}
93
		rc = phb->msi_setup(phb, pdev, phb->msi_base + hwirq,
94
				    virq, entry->msi_attrib.is_64, &msg);
95 96 97
		if (rc) {
			pr_warn("%s: Failed to setup MSI\n", pci_name(pdev));
			irq_dispose_mapping(virq);
98
			msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq, 1);
99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
			return rc;
		}
		irq_set_msi_desc(virq, entry);
		write_msi_msg(virq, &msg);
	}
	return 0;
}

static void pnv_teardown_msi_irqs(struct pci_dev *pdev)
{
	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
	struct pnv_phb *phb = hose->private_data;
	struct msi_desc *entry;

	if (WARN_ON(!phb))
		return;

	list_for_each_entry(entry, &pdev->msi_list, list) {
		if (entry->irq == NO_IRQ)
			continue;
		irq_set_msi_desc(entry->irq, NULL);
120 121
		msi_bitmap_free_hwirqs(&phb->msi_bmp,
			virq_to_hw(entry->irq) - phb->msi_base, 1);
122 123 124 125
		irq_dispose_mapping(entry->irq);
	}
}
#endif /* CONFIG_PCI_MSI */
126

127 128
static void pnv_pci_dump_p7ioc_diag_data(struct pci_controller *hose,
					 struct OpalIoPhbErrorCommon *common)
129
{
130
	struct OpalIoP7IOCPhbErrorData *data;
131 132
	int i;

133
	data = (struct OpalIoP7IOCPhbErrorData *)common;
134
	pr_info("P7IOC PHB#%d Diag-data (Version: %d)\n",
135 136
		hose->global_number, common->version);

137
	if (data->brdgCtl)
138
		pr_info("brdgCtl:     %08x\n",
139 140 141
			data->brdgCtl);
	if (data->portStatusReg || data->rootCmplxStatus ||
	    data->busAgentStatus)
142
		pr_info("UtlSts:      %08x %08x %08x\n",
143 144 145 146 147
			data->portStatusReg, data->rootCmplxStatus,
			data->busAgentStatus);
	if (data->deviceStatus || data->slotStatus   ||
	    data->linkStatus   || data->devCmdStatus ||
	    data->devSecStatus)
148
		pr_info("RootSts:     %08x %08x %08x %08x %08x\n",
149 150 151 152 153
			data->deviceStatus, data->slotStatus,
			data->linkStatus, data->devCmdStatus,
			data->devSecStatus);
	if (data->rootErrorStatus   || data->uncorrErrorStatus ||
	    data->corrErrorStatus)
154
		pr_info("RootErrSts:  %08x %08x %08x\n",
155 156 157 158
			data->rootErrorStatus, data->uncorrErrorStatus,
			data->corrErrorStatus);
	if (data->tlpHdr1 || data->tlpHdr2 ||
	    data->tlpHdr3 || data->tlpHdr4)
159
		pr_info("RootErrLog:  %08x %08x %08x %08x\n",
160 161 162 163
			data->tlpHdr1, data->tlpHdr2,
			data->tlpHdr3, data->tlpHdr4);
	if (data->sourceId || data->errorClass ||
	    data->correlator)
164
		pr_info("RootErrLog1: %08x %016llx %016llx\n",
165 166 167
			data->sourceId, data->errorClass,
			data->correlator);
	if (data->p7iocPlssr || data->p7iocCsr)
168
		pr_info("PhbSts:      %016llx %016llx\n",
169
			data->p7iocPlssr, data->p7iocCsr);
170 171
	if (data->lemFir)
		pr_info("Lem:         %016llx %016llx %016llx\n",
172 173
			data->lemFir, data->lemErrorMask,
			data->lemWOF);
174 175
	if (data->phbErrorStatus)
		pr_info("PhbErr:      %016llx %016llx %016llx %016llx\n",
176 177
			data->phbErrorStatus, data->phbFirstErrorStatus,
			data->phbErrorLog0, data->phbErrorLog1);
178 179
	if (data->mmioErrorStatus)
		pr_info("OutErr:      %016llx %016llx %016llx %016llx\n",
180 181
			data->mmioErrorStatus, data->mmioFirstErrorStatus,
			data->mmioErrorLog0, data->mmioErrorLog1);
182 183
	if (data->dma0ErrorStatus)
		pr_info("InAErr:      %016llx %016llx %016llx %016llx\n",
184 185
			data->dma0ErrorStatus, data->dma0FirstErrorStatus,
			data->dma0ErrorLog0, data->dma0ErrorLog1);
186 187
	if (data->dma1ErrorStatus)
		pr_info("InBErr:      %016llx %016llx %016llx %016llx\n",
188 189
			data->dma1ErrorStatus, data->dma1FirstErrorStatus,
			data->dma1ErrorLog0, data->dma1ErrorLog1);
190 191 192 193 194

	for (i = 0; i < OPAL_P7IOC_NUM_PEST_REGS; i++) {
		if ((data->pestA[i] >> 63) == 0 &&
		    (data->pestB[i] >> 63) == 0)
			continue;
195

196
		pr_info("PE[%3d] A/B: %016llx %016llx\n",
197
			i, data->pestA[i], data->pestB[i]);
198 199 200
	}
}

201 202
static void pnv_pci_dump_phb3_diag_data(struct pci_controller *hose,
					struct OpalIoPhbErrorCommon *common)
203
{
204 205 206 207
	struct OpalIoPhb3ErrorData *data;
	int i;

	data = (struct OpalIoPhb3ErrorData*)common;
208
	pr_info("PHB3 PHB#%d Diag-data (Version: %d)\n",
209
		hose->global_number, be32_to_cpu(common->version));
210
	if (data->brdgCtl)
211
		pr_info("brdgCtl:     %08x\n",
212
			be32_to_cpu(data->brdgCtl));
213 214
	if (data->portStatusReg || data->rootCmplxStatus ||
	    data->busAgentStatus)
215
		pr_info("UtlSts:      %08x %08x %08x\n",
216 217 218
			be32_to_cpu(data->portStatusReg),
			be32_to_cpu(data->rootCmplxStatus),
			be32_to_cpu(data->busAgentStatus));
219 220 221
	if (data->deviceStatus || data->slotStatus   ||
	    data->linkStatus   || data->devCmdStatus ||
	    data->devSecStatus)
222
		pr_info("RootSts:     %08x %08x %08x %08x %08x\n",
223 224 225 226 227
			be32_to_cpu(data->deviceStatus),
			be32_to_cpu(data->slotStatus),
			be32_to_cpu(data->linkStatus),
			be32_to_cpu(data->devCmdStatus),
			be32_to_cpu(data->devSecStatus));
228 229
	if (data->rootErrorStatus || data->uncorrErrorStatus ||
	    data->corrErrorStatus)
230
		pr_info("RootErrSts:  %08x %08x %08x\n",
231 232 233
			be32_to_cpu(data->rootErrorStatus),
			be32_to_cpu(data->uncorrErrorStatus),
			be32_to_cpu(data->corrErrorStatus));
234 235
	if (data->tlpHdr1 || data->tlpHdr2 ||
	    data->tlpHdr3 || data->tlpHdr4)
236
		pr_info("RootErrLog:  %08x %08x %08x %08x\n",
237 238 239 240
			be32_to_cpu(data->tlpHdr1),
			be32_to_cpu(data->tlpHdr2),
			be32_to_cpu(data->tlpHdr3),
			be32_to_cpu(data->tlpHdr4));
241 242
	if (data->sourceId || data->errorClass ||
	    data->correlator)
243
		pr_info("RootErrLog1: %08x %016llx %016llx\n",
244 245 246
			be32_to_cpu(data->sourceId),
			be64_to_cpu(data->errorClass),
			be64_to_cpu(data->correlator));
247 248
	if (data->nFir)
		pr_info("nFir:        %016llx %016llx %016llx\n",
249 250 251
			be64_to_cpu(data->nFir),
			be64_to_cpu(data->nFirMask),
			be64_to_cpu(data->nFirWOF));
252
	if (data->phbPlssr || data->phbCsr)
253
		pr_info("PhbSts:      %016llx %016llx\n",
254 255
			be64_to_cpu(data->phbPlssr),
			be64_to_cpu(data->phbCsr));
256 257
	if (data->lemFir)
		pr_info("Lem:         %016llx %016llx %016llx\n",
258 259 260
			be64_to_cpu(data->lemFir),
			be64_to_cpu(data->lemErrorMask),
			be64_to_cpu(data->lemWOF));
261 262
	if (data->phbErrorStatus)
		pr_info("PhbErr:      %016llx %016llx %016llx %016llx\n",
263 264 265 266
			be64_to_cpu(data->phbErrorStatus),
			be64_to_cpu(data->phbFirstErrorStatus),
			be64_to_cpu(data->phbErrorLog0),
			be64_to_cpu(data->phbErrorLog1));
267 268
	if (data->mmioErrorStatus)
		pr_info("OutErr:      %016llx %016llx %016llx %016llx\n",
269 270 271 272
			be64_to_cpu(data->mmioErrorStatus),
			be64_to_cpu(data->mmioFirstErrorStatus),
			be64_to_cpu(data->mmioErrorLog0),
			be64_to_cpu(data->mmioErrorLog1));
273 274
	if (data->dma0ErrorStatus)
		pr_info("InAErr:      %016llx %016llx %016llx %016llx\n",
275 276 277 278
			be64_to_cpu(data->dma0ErrorStatus),
			be64_to_cpu(data->dma0FirstErrorStatus),
			be64_to_cpu(data->dma0ErrorLog0),
			be64_to_cpu(data->dma0ErrorLog1));
279 280
	if (data->dma1ErrorStatus)
		pr_info("InBErr:      %016llx %016llx %016llx %016llx\n",
281 282 283 284
			be64_to_cpu(data->dma1ErrorStatus),
			be64_to_cpu(data->dma1FirstErrorStatus),
			be64_to_cpu(data->dma1ErrorLog0),
			be64_to_cpu(data->dma1ErrorLog1));
285 286

	for (i = 0; i < OPAL_PHB3_NUM_PEST_REGS; i++) {
287 288
		if ((be64_to_cpu(data->pestA[i]) >> 63) == 0 &&
		    (be64_to_cpu(data->pestB[i]) >> 63) == 0)
289 290
			continue;

291
		pr_info("PE[%3d] A/B: %016llx %016llx\n",
292 293
				i, be64_to_cpu(data->pestA[i]),
				be64_to_cpu(data->pestB[i]));
294 295 296 297 298 299 300 301 302 303 304 305
	}
}

void pnv_pci_dump_phb_diag_data(struct pci_controller *hose,
				unsigned char *log_buff)
{
	struct OpalIoPhbErrorCommon *common;

	if (!hose || !log_buff)
		return;

	common = (struct OpalIoPhbErrorCommon *)log_buff;
306
	switch (be32_to_cpu(common->ioType)) {
307 308 309 310 311
	case OPAL_PHB_ERROR_DATA_TYPE_P7IOC:
		pnv_pci_dump_p7ioc_diag_data(hose, common);
		break;
	case OPAL_PHB_ERROR_DATA_TYPE_PHB3:
		pnv_pci_dump_phb3_diag_data(hose, common);
312 313
		break;
	default:
314
		pr_warn("%s: Unrecognized ioType %d\n",
315
			__func__, be32_to_cpu(common->ioType));
316 317 318 319 320 321 322 323 324 325
	}
}

static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no)
{
	unsigned long flags, rc;
	int has_diag;

	spin_lock_irqsave(&phb->lock, flags);

326 327
	rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob,
					 PNV_PCI_DIAG_BUF_SIZE);
328 329 330 331 332 333 334 335 336 337 338 339 340 341 342
	has_diag = (rc == OPAL_SUCCESS);

	rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no,
				       OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
	if (rc) {
		pr_warning("PCI %d: Failed to clear EEH freeze state"
			   " for PE#%d, err %ld\n",
			   phb->hose->global_number, pe_no, rc);

		/* For now, let's only display the diag buffer when we fail to clear
		 * the EEH status. We'll do more sensible things later when we have
		 * proper EEH support. We need to make sure we don't pollute ourselves
		 * with the normal errors generated when probing empty slots
		 */
		if (has_diag)
343
			pnv_pci_dump_phb_diag_data(phb->hose, phb->diag.blob);
344 345 346 347 348 349 350 351
		else
			pr_warning("PCI %d: No diag data available\n",
				   phb->hose->global_number);
	}

	spin_unlock_irqrestore(&phb->lock, flags);
}

352 353
static void pnv_pci_config_check_eeh(struct pnv_phb *phb,
				     struct device_node *dn)
354 355 356
{
	s64	rc;
	u8	fstate;
357
	__be16	pcierr;
358 359
	u32	pe_no;

360 361 362
	/*
	 * Get the PE#. During the PCI probe stage, we might not
	 * setup that yet. So all ER errors should be mapped to
363
	 * reserved PE.
364 365
	 */
	pe_no = PCI_DN(dn)->pe_number;
366 367 368 369 370 371
	if (pe_no == IODA_INVALID_PE) {
		if (phb->type == PNV_PHB_P5IOC2)
			pe_no = 0;
		else
			pe_no = phb->ioda.reserved_pe;
	}
372 373 374 375 376

	/* Read freeze status */
	rc = opal_pci_eeh_freeze_status(phb->opal_id, pe_no, &fstate, &pcierr,
					NULL);
	if (rc) {
377 378 379
		pr_warning("%s: Can't read EEH status (PE#%d) for "
			   "%s, err %lld\n",
			   __func__, pe_no, dn->full_name, rc);
380 381
		return;
	}
382 383 384
	cfg_dbg(" -> EEH check, bdfn=%04x PE#%d fstate=%x\n",
		(PCI_DN(dn)->busno << 8) | (PCI_DN(dn)->devfn),
		pe_no, fstate);
385 386
	if (fstate != 0)
		pnv_pci_handle_eeh_config(phb, pe_no);
387 388
}

389 390
int pnv_pci_cfg_read(struct device_node *dn,
		     int where, int size, u32 *val)
391
{
392 393 394
	struct pci_dn *pdn = PCI_DN(dn);
	struct pnv_phb *phb = pdn->phb->private_data;
	u32 bdfn = (pdn->busno << 8) | pdn->devfn;
395 396 397 398 399 400 401 402 403 404
	s64 rc;

	switch (size) {
	case 1: {
		u8 v8;
		rc = opal_pci_config_read_byte(phb->opal_id, bdfn, where, &v8);
		*val = (rc == OPAL_SUCCESS) ? v8 : 0xff;
		break;
	}
	case 2: {
405
		__be16 v16;
406 407
		rc = opal_pci_config_read_half_word(phb->opal_id, bdfn, where,
						   &v16);
408
		*val = (rc == OPAL_SUCCESS) ? be16_to_cpu(v16) : 0xffff;
409 410 411
		break;
	}
	case 4: {
412
		__be32 v32;
413
		rc = opal_pci_config_read_word(phb->opal_id, bdfn, where, &v32);
414
		*val = (rc == OPAL_SUCCESS) ? be32_to_cpu(v32) : 0xffffffff;
415 416 417 418 419
		break;
	}
	default:
		return PCIBIOS_FUNC_NOT_SUPPORTED;
	}
420

421 422
	cfg_dbg("%s: bus: %x devfn: %x +%x/%x -> %08x\n",
		__func__, pdn->busno, pdn->devfn, where, size, *val);
423 424 425
	return PCIBIOS_SUCCESSFUL;
}

426 427
int pnv_pci_cfg_write(struct device_node *dn,
		      int where, int size, u32 val)
428
{
429 430 431
	struct pci_dn *pdn = PCI_DN(dn);
	struct pnv_phb *phb = pdn->phb->private_data;
	u32 bdfn = (pdn->busno << 8) | pdn->devfn;
432

433 434
	cfg_dbg("%s: bus: %x devfn: %x +%x/%x -> %08x\n",
		pdn->busno, pdn->devfn, where, size, val);
435 436 437 438 439 440 441 442 443 444 445 446 447
	switch (size) {
	case 1:
		opal_pci_config_write_byte(phb->opal_id, bdfn, where, val);
		break;
	case 2:
		opal_pci_config_write_half_word(phb->opal_id, bdfn, where, val);
		break;
	case 4:
		opal_pci_config_write_word(phb->opal_id, bdfn, where, val);
		break;
	default:
		return PCIBIOS_FUNC_NOT_SUPPORTED;
	}
448

449 450 451 452 453 454 455 456 457 458 459
	return PCIBIOS_SUCCESSFUL;
}

#if CONFIG_EEH
static bool pnv_pci_cfg_check(struct pci_controller *hose,
			      struct device_node *dn)
{
	struct eeh_dev *edev = NULL;
	struct pnv_phb *phb = hose->private_data;

	/* EEH not enabled ? */
460
	if (!(phb->flags & PNV_PHB_FLAG_EEH))
461
		return true;
462

463
	/* PE reset or device removed ? */
464
	edev = of_node_to_eeh_dev(dn);
465 466 467 468 469 470 471 472
	if (edev) {
		if (edev->pe &&
		    (edev->pe->state & EEH_PE_RESET))
			return false;

		if (edev->mode & EEH_DEV_REMOVED)
			return false;
	}
473 474 475 476 477 478 479 480

	return true;
}
#else
static inline pnv_pci_cfg_check(struct pci_controller *hose,
				struct device_node *dn)
{
	return true;
481
}
482
#endif /* CONFIG_EEH */
483

484 485 486 487 488 489
static int pnv_pci_read_config(struct pci_bus *bus,
			       unsigned int devfn,
			       int where, int size, u32 *val)
{
	struct device_node *dn, *busdn = pci_bus_to_OF_node(bus);
	struct pci_dn *pdn;
490 491 492
	struct pnv_phb *phb;
	bool found = false;
	int ret;
493

494
	*val = 0xFFFFFFFF;
495 496
	for (dn = busdn->child; dn; dn = dn->sibling) {
		pdn = PCI_DN(dn);
497 498 499 500 501
		if (pdn && pdn->devfn == devfn) {
			phb = pdn->phb->private_data;
			found = true;
			break;
		}
502 503
	}

504 505 506 507 508 509 510 511 512 513 514
	if (!found || !pnv_pci_cfg_check(pdn->phb, dn))
		return PCIBIOS_DEVICE_NOT_FOUND;

	ret = pnv_pci_cfg_read(dn, where, size, val);
	if (phb->flags & PNV_PHB_FLAG_EEH) {
		if (*val == EEH_IO_ERROR_VALUE(size) &&
		    eeh_dev_check_failure(of_node_to_eeh_dev(dn)))
                        return PCIBIOS_DEVICE_NOT_FOUND;
	} else {
		pnv_pci_config_check_eeh(phb, dn);
	}
515

516
	return ret;
517 518 519 520 521 522 523 524
}

static int pnv_pci_write_config(struct pci_bus *bus,
				unsigned int devfn,
				int where, int size, u32 val)
{
	struct device_node *dn, *busdn = pci_bus_to_OF_node(bus);
	struct pci_dn *pdn;
525 526 527
	struct pnv_phb *phb;
	bool found = false;
	int ret;
528 529 530

	for (dn = busdn->child; dn; dn = dn->sibling) {
		pdn = PCI_DN(dn);
531 532 533 534 535
		if (pdn && pdn->devfn == devfn) {
			phb = pdn->phb->private_data;
			found = true;
			break;
		}
536 537
	}

538 539 540 541 542 543 544 545
	if (!found || !pnv_pci_cfg_check(pdn->phb, dn))
		return PCIBIOS_DEVICE_NOT_FOUND;

	ret = pnv_pci_cfg_write(dn, where, size, val);
	if (!(phb->flags & PNV_PHB_FLAG_EEH))
		pnv_pci_config_check_eeh(phb, dn);

	return ret;
546 547
}

548
struct pci_ops pnv_pci_ops = {
549
	.read  = pnv_pci_read_config,
550 551 552 553 554
	.write = pnv_pci_write_config,
};

static int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
			 unsigned long uaddr, enum dma_data_direction direction,
555
			 struct dma_attrs *attrs, bool rm)
556 557
{
	u64 proto_tce;
558
	__be64 *tcep, *tces;
559 560 561 562 563 564 565
	u64 rpn;

	proto_tce = TCE_PCI_READ; // Read allowed

	if (direction != DMA_TO_DEVICE)
		proto_tce |= TCE_PCI_WRITE;

566
	tces = tcep = ((__be64 *)tbl->it_base) + index - tbl->it_offset;
567
	rpn = __pa(uaddr) >> tbl->it_page_shift;
568

569
	while (npages--)
570 571
		*(tcep++) = cpu_to_be64(proto_tce |
				(rpn++ << tbl->it_page_shift));
572 573 574 575 576 577

	/* Some implementations won't cache invalid TCEs and thus may not
	 * need that flush. We'll probably turn it_type into a bit mask
	 * of flags if that becomes the case
	 */
	if (tbl->it_type & TCE_PCI_SWINV_CREATE)
578
		pnv_pci_ioda_tce_invalidate(tbl, tces, tcep - 1, rm);
579 580 581 582

	return 0;
}

583 584 585 586 587 588 589 590 591 592 593
static int pnv_tce_build_vm(struct iommu_table *tbl, long index, long npages,
			    unsigned long uaddr,
			    enum dma_data_direction direction,
			    struct dma_attrs *attrs)
{
	return pnv_tce_build(tbl, index, npages, uaddr, direction, attrs,
			false);
}

static void pnv_tce_free(struct iommu_table *tbl, long index, long npages,
		bool rm)
594
{
595
	__be64 *tcep, *tces;
596

597
	tces = tcep = ((__be64 *)tbl->it_base) + index - tbl->it_offset;
598 599

	while (npages--)
600
		*(tcep++) = cpu_to_be64(0);
601

602
	if (tbl->it_type & TCE_PCI_SWINV_FREE)
603 604 605 606 607 608
		pnv_pci_ioda_tce_invalidate(tbl, tces, tcep - 1, rm);
}

static void pnv_tce_free_vm(struct iommu_table *tbl, long index, long npages)
{
	pnv_tce_free(tbl, index, npages, false);
609 610
}

611 612 613 614 615
static unsigned long pnv_tce_get(struct iommu_table *tbl, long index)
{
	return ((u64 *)tbl->it_base)[index - tbl->it_offset];
}

616 617 618 619 620 621 622 623 624 625 626 627 628
static int pnv_tce_build_rm(struct iommu_table *tbl, long index, long npages,
			    unsigned long uaddr,
			    enum dma_data_direction direction,
			    struct dma_attrs *attrs)
{
	return pnv_tce_build(tbl, index, npages, uaddr, direction, attrs, true);
}

static void pnv_tce_free_rm(struct iommu_table *tbl, long index, long npages)
{
	pnv_tce_free(tbl, index, npages, true);
}

629 630 631 632 633 634
void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
			       void *tce_mem, u64 tce_size,
			       u64 dma_offset)
{
	tbl->it_blocksize = 16;
	tbl->it_base = (unsigned long)tce_mem;
635 636
	tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K;
	tbl->it_offset = dma_offset >> tbl->it_page_shift;
637 638 639 640 641 642
	tbl->it_index = 0;
	tbl->it_size = tce_size >> 3;
	tbl->it_busno = 0;
	tbl->it_type = TCE_PCI;
}

643
static struct iommu_table *pnv_pci_setup_bml_iommu(struct pci_controller *hose)
644 645
{
	struct iommu_table *tbl;
646 647
	const __be64 *basep, *swinvp;
	const __be32 *sizep;
648 649 650 651

	basep = of_get_property(hose->dn, "linux,tce-base", NULL);
	sizep = of_get_property(hose->dn, "linux,tce-size", NULL);
	if (basep == NULL || sizep == NULL) {
652 653
		pr_err("PCI: %s has missing tce entries !\n",
		       hose->dn->full_name);
654 655 656 657 658 659 660 661
		return NULL;
	}
	tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, hose->node);
	if (WARN_ON(!tbl))
		return NULL;
	pnv_pci_setup_iommu_table(tbl, __va(be64_to_cpup(basep)),
				  be32_to_cpup(sizep), 0);
	iommu_init_table(tbl, hose->node);
662
	iommu_register_group(tbl, pci_domain_nr(hose->bus), 0);
663 664 665 666 667

	/* Deal with SW invalidated TCEs when needed (BML way) */
	swinvp = of_get_property(hose->dn, "linux,tce-sw-invalidate-info",
				 NULL);
	if (swinvp) {
668
		tbl->it_busno = be64_to_cpu(swinvp[1]);
669
		tbl->it_index = (unsigned long)ioremap(be64_to_cpup(swinvp), 8);
670 671
		tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE;
	}
672 673 674
	return tbl;
}

675 676
static void pnv_pci_dma_fallback_setup(struct pci_controller *hose,
				       struct pci_dev *pdev)
677 678 679 680 681 682 683 684 685 686 687
{
	struct device_node *np = pci_bus_to_OF_node(hose->bus);
	struct pci_dn *pdn;

	if (np == NULL)
		return;
	pdn = PCI_DN(np);
	if (!pdn->iommu_table)
		pdn->iommu_table = pnv_pci_setup_bml_iommu(hose);
	if (!pdn->iommu_table)
		return;
688
	set_iommu_table_base_and_group(&pdev->dev, pdn->iommu_table);
689 690
}

691
static void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
692 693 694 695 696 697 698 699 700 701 702 703 704
{
	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
	struct pnv_phb *phb = hose->private_data;

	/* If we have no phb structure, try to setup a fallback based on
	 * the device-tree (RTAS PCI for example)
	 */
	if (phb && phb->dma_dev_setup)
		phb->dma_dev_setup(phb, pdev);
	else
		pnv_pci_dma_fallback_setup(hose, pdev);
}

705 706 707 708 709 710 711 712 713 714
int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
{
	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
	struct pnv_phb *phb = hose->private_data;

	if (phb && phb->dma_set_mask)
		return phb->dma_set_mask(phb, pdev, dma_mask);
	return __dma_set_mask(&pdev->dev, dma_mask);
}

715 716 717 718 719 720 721 722 723 724 725 726
void pnv_pci_shutdown(void)
{
	struct pci_controller *hose;

	list_for_each_entry(hose, &hose_list, list_node) {
		struct pnv_phb *phb = hose->private_data;

		if (phb && phb->shutdown)
			phb->shutdown(phb);
	}
}

G
Gavin Shan 已提交
727
/* Fixup wrong class code in p7ioc and p8 root complex */
728
static void pnv_p7ioc_rc_quirk(struct pci_dev *dev)
729 730 731 732 733
{
	dev->class = PCI_CLASS_BRIDGE_PCI << 8;
}
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_IBM, 0x3b9, pnv_p7ioc_rc_quirk);

734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762
static int pnv_pci_probe_mode(struct pci_bus *bus)
{
	struct pci_controller *hose = pci_bus_to_host(bus);
	const __be64 *tstamp;
	u64 now, target;


	/* We hijack this as a way to ensure we have waited long
	 * enough since the reset was lifted on the PCI bus
	 */
	if (bus != hose->bus)
		return PCI_PROBE_NORMAL;
	tstamp = of_get_property(hose->dn, "reset-clear-timestamp", NULL);
	if (!tstamp || !*tstamp)
		return PCI_PROBE_NORMAL;

	now = mftb() / tb_ticks_per_usec;
	target = (be64_to_cpup(tstamp) / tb_ticks_per_usec)
		+ PCI_RESET_DELAY_US;

	pr_devel("pci %04d: Reset target: 0x%llx now: 0x%llx\n",
		 hose->global_number, target, now);

	if (now < target)
		msleep((target - now + 999) / 1000);

	return PCI_PROBE_NORMAL;
}

763 764 765 766
void __init pnv_pci_init(void)
{
	struct device_node *np;

767
	pci_add_flags(PCI_CAN_SKIP_ISA_ALIGN);
768 769 770 771 772 773 774

	/* OPAL absent, try POPAL first then RTAS detection of PHBs */
	if (!firmware_has_feature(FW_FEATURE_OPAL)) {
#ifdef CONFIG_PPC_POWERNV_RTAS
		init_pci_config_tokens();
		find_and_init_phbs();
#endif /* CONFIG_PPC_POWERNV_RTAS */
775 776 777 778 779 780 781 782 783 784 785 786 787
	}
	/* OPAL is here, do our normal stuff */
	else {
		int found_ioda = 0;

		/* Look for IODA IO-Hubs. We don't support mixing IODA
		 * and p5ioc2 due to the need to change some global
		 * probing flags
		 */
		for_each_compatible_node(np, NULL, "ibm,ioda-hub") {
			pnv_pci_init_ioda_hub(np);
			found_ioda = 1;
		}
788 789

		/* Look for p5ioc2 IO-Hubs */
790 791 792
		if (!found_ioda)
			for_each_compatible_node(np, NULL, "ibm,p5ioc2")
				pnv_pci_init_p5ioc2_hub(np);
G
Gavin Shan 已提交
793 794 795 796

		/* Look for ioda2 built-in PHB3's */
		for_each_compatible_node(np, NULL, "ibm,ioda2-phb")
			pnv_pci_init_ioda2_phb(np);
797 798 799 800 801 802 803
	}

	/* Setup the linkage between OF nodes and PHBs */
	pci_devs_phb_init();

	/* Configure IOMMU DMA hooks */
	ppc_md.pci_dma_dev_setup = pnv_pci_dma_dev_setup;
804 805 806 807
	ppc_md.tce_build = pnv_tce_build_vm;
	ppc_md.tce_free = pnv_tce_free_vm;
	ppc_md.tce_build_rm = pnv_tce_build_rm;
	ppc_md.tce_free_rm = pnv_tce_free_rm;
808
	ppc_md.tce_get = pnv_tce_get;
809
	ppc_md.pci_probe_mode = pnv_pci_probe_mode;
810 811
	set_pci_dma_ops(&dma_iommu_ops);

812 813 814 815 816 817
	/* Configure MSIs */
#ifdef CONFIG_PCI_MSI
	ppc_md.msi_check_device = pnv_msi_check_device;
	ppc_md.setup_msi_irqs = pnv_setup_msi_irqs;
	ppc_md.teardown_msi_irqs = pnv_teardown_msi_irqs;
#endif
818
}
819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847

static int tce_iommu_bus_notifier(struct notifier_block *nb,
		unsigned long action, void *data)
{
	struct device *dev = data;

	switch (action) {
	case BUS_NOTIFY_ADD_DEVICE:
		return iommu_add_device(dev);
	case BUS_NOTIFY_DEL_DEVICE:
		if (dev->iommu_group)
			iommu_del_device(dev);
		return 0;
	default:
		return 0;
	}
}

static struct notifier_block tce_iommu_bus_nb = {
	.notifier_call = tce_iommu_bus_notifier,
};

static int __init tce_iommu_bus_notifier_init(void)
{
	bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb);
	return 0;
}

subsys_initcall_sync(tce_iommu_bus_notifier_init);