pci_sun4v.c 23.8 KB
Newer Older
1 2
/* pci_sun4v.c: SUN4V specific PCI controller support.
 *
3
 * Copyright (C) 2006, 2007, 2008 David S. Miller (davem@davemloft.net)
4 5 6 7 8 9 10 11
 */

#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/pci.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
12
#include <linux/percpu.h>
13 14
#include <linux/irq.h>
#include <linux/msi.h>
15
#include <linux/log2.h>
16
#include <linux/of_device.h>
17 18 19 20

#include <asm/iommu.h>
#include <asm/irq.h>
#include <asm/hypervisor.h>
21
#include <asm/prom.h>
22 23 24 25

#include "pci_impl.h"
#include "iommu_common.h"

26 27
#include "pci_sun4v.h"

28 29 30
#define DRIVER_NAME	"pci_sun4v"
#define PFX		DRIVER_NAME ": "

31 32 33
static unsigned long vpci_major = 1;
static unsigned long vpci_minor = 1;

34
#define PGLIST_NENTS	(PAGE_SIZE / sizeof(u64))
35

36
struct iommu_batch {
37
	struct device	*dev;		/* Device mapping is for.	*/
38 39 40 41
	unsigned long	prot;		/* IOMMU page protections	*/
	unsigned long	entry;		/* Index into IOTSB.		*/
	u64		*pglist;	/* List of physical pages	*/
	unsigned long	npages;		/* Number of pages in list.	*/
42 43
};

44
static DEFINE_PER_CPU(struct iommu_batch, iommu_batch);
45
static int iommu_batch_initialized;
46 47

/* Interrupts must be disabled.  */
48
static inline void iommu_batch_start(struct device *dev, unsigned long prot, unsigned long entry)
49
{
50
	struct iommu_batch *p = &__get_cpu_var(iommu_batch);
51

52
	p->dev		= dev;
53 54 55 56 57 58
	p->prot		= prot;
	p->entry	= entry;
	p->npages	= 0;
}

/* Interrupts must be disabled.  */
59
static long iommu_batch_flush(struct iommu_batch *p)
60
{
61
	struct pci_pbm_info *pbm = p->dev->archdata.host_controller;
62
	unsigned long devhandle = pbm->devhandle;
63 64 65 66 67
	unsigned long prot = p->prot;
	unsigned long entry = p->entry;
	u64 *pglist = p->pglist;
	unsigned long npages = p->npages;

68
	while (npages != 0) {
69 70 71 72 73 74
		long num;

		num = pci_sun4v_iommu_map(devhandle, HV_PCI_TSBID(0, entry),
					  npages, prot, __pa(pglist));
		if (unlikely(num < 0)) {
			if (printk_ratelimit())
75
				printk("iommu_batch_flush: IOMMU map of "
76
				       "[%08lx:%08llx:%lx:%lx:%lx] failed with "
77 78 79 80 81 82 83 84 85
				       "status %ld\n",
				       devhandle, HV_PCI_TSBID(0, entry),
				       npages, prot, __pa(pglist), num);
			return -1;
		}

		entry += num;
		npages -= num;
		pglist += num;
86
	}
87 88 89 90 91 92 93

	p->entry = entry;
	p->npages = 0;

	return 0;
}

94 95 96 97 98 99 100 101 102 103 104
static inline void iommu_batch_new_entry(unsigned long entry)
{
	struct iommu_batch *p = &__get_cpu_var(iommu_batch);

	if (p->entry + p->npages == entry)
		return;
	if (p->entry != ~0UL)
		iommu_batch_flush(p);
	p->entry = entry;
}

105
/* Interrupts must be disabled.  */
106
static inline long iommu_batch_add(u64 phys_page)
107
{
108
	struct iommu_batch *p = &__get_cpu_var(iommu_batch);
109 110 111 112 113

	BUG_ON(p->npages >= PGLIST_NENTS);

	p->pglist[p->npages++] = phys_page;
	if (p->npages == PGLIST_NENTS)
114
		return iommu_batch_flush(p);
115 116 117 118 119

	return 0;
}

/* Interrupts must be disabled.  */
120
static inline long iommu_batch_end(void)
121
{
122
	struct iommu_batch *p = &__get_cpu_var(iommu_batch);
123 124 125

	BUG_ON(p->npages >= PGLIST_NENTS);

126
	return iommu_batch_flush(p);
127
}
128

129 130
static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
				   dma_addr_t *dma_addrp, gfp_t gfp)
131
{
132
	unsigned long flags, order, first_page, npages, n;
133 134
	struct iommu *iommu;
	struct page *page;
135 136
	void *ret;
	long entry;
137
	int nid;
138 139 140

	size = IO_PAGE_ALIGN(size);
	order = get_order(size);
141
	if (unlikely(order >= MAX_ORDER))
142 143 144 145
		return NULL;

	npages = size >> IO_PAGE_SHIFT;

146 147 148
	nid = dev->archdata.numa_node;
	page = alloc_pages_node(nid, gfp, order);
	if (unlikely(!page))
149
		return NULL;
150

151
	first_page = (unsigned long) page_address(page);
152 153
	memset((char *)first_page, 0, PAGE_SIZE << order);

154
	iommu = dev->archdata.iommu;
155 156

	spin_lock_irqsave(&iommu->lock, flags);
157
	entry = iommu_range_alloc(dev, iommu, npages, NULL);
158 159
	spin_unlock_irqrestore(&iommu->lock, flags);

160 161
	if (unlikely(entry == DMA_ERROR_CODE))
		goto range_alloc_fail;
162 163 164 165 166 167

	*dma_addrp = (iommu->page_table_map_base +
		      (entry << IO_PAGE_SHIFT));
	ret = (void *) first_page;
	first_page = __pa(first_page);

168
	local_irq_save(flags);
169

170 171 172 173
	iommu_batch_start(dev,
			  (HV_PCI_MAP_ATTR_READ |
			   HV_PCI_MAP_ATTR_WRITE),
			  entry);
174

175
	for (n = 0; n < npages; n++) {
176
		long err = iommu_batch_add(first_page + (n * PAGE_SIZE));
177 178 179
		if (unlikely(err < 0L))
			goto iommu_map_fail;
	}
180

181
	if (unlikely(iommu_batch_end() < 0L))
182
		goto iommu_map_fail;
183

184
	local_irq_restore(flags);
185 186

	return ret;
187 188 189 190

iommu_map_fail:
	/* Interrupts are disabled.  */
	spin_lock(&iommu->lock);
191
	iommu_range_free(iommu, *dma_addrp, npages);
192 193
	spin_unlock_irqrestore(&iommu->lock, flags);

194
range_alloc_fail:
195 196
	free_pages(first_page, order);
	return NULL;
197 198
}

199 200
static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu,
				 dma_addr_t dvma)
201
{
202
	struct pci_pbm_info *pbm;
203
	struct iommu *iommu;
204 205
	unsigned long flags, order, npages, entry;
	u32 devhandle;
206 207

	npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
208 209
	iommu = dev->archdata.iommu;
	pbm = dev->archdata.host_controller;
210
	devhandle = pbm->devhandle;
211 212 213 214
	entry = ((dvma - iommu->page_table_map_base) >> IO_PAGE_SHIFT);

	spin_lock_irqsave(&iommu->lock, flags);

215
	iommu_range_free(iommu, dvma, npages);
216 217 218 219 220 221 222 223 224 225 226 227 228 229 230

	do {
		unsigned long num;

		num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry),
					    npages);
		entry += num;
		npages -= num;
	} while (npages != 0);

	spin_unlock_irqrestore(&iommu->lock, flags);

	order = get_order(size);
	if (order < 10)
		free_pages((unsigned long)cpu, order);
231 232
}

233 234
static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page,
				  unsigned long offset, size_t sz,
F
FUJITA Tomonori 已提交
235 236
				  enum dma_data_direction direction,
				  struct dma_attrs *attrs)
237
{
238
	struct iommu *iommu;
239
	unsigned long flags, npages, oaddr;
240
	unsigned long i, base_paddr;
241
	u32 bus_addr, ret;
242 243 244
	unsigned long prot;
	long entry;

245
	iommu = dev->archdata.iommu;
246

247
	if (unlikely(direction == DMA_NONE))
248 249
		goto bad;

250
	oaddr = (unsigned long)(page_address(page) + offset);
251 252 253 254
	npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK);
	npages >>= IO_PAGE_SHIFT;

	spin_lock_irqsave(&iommu->lock, flags);
255
	entry = iommu_range_alloc(dev, iommu, npages, NULL);
256 257
	spin_unlock_irqrestore(&iommu->lock, flags);

258
	if (unlikely(entry == DMA_ERROR_CODE))
259 260 261 262 263 264 265
		goto bad;

	bus_addr = (iommu->page_table_map_base +
		    (entry << IO_PAGE_SHIFT));
	ret = bus_addr | (oaddr & ~IO_PAGE_MASK);
	base_paddr = __pa(oaddr & IO_PAGE_MASK);
	prot = HV_PCI_MAP_ATTR_READ;
266
	if (direction != DMA_TO_DEVICE)
267 268
		prot |= HV_PCI_MAP_ATTR_WRITE;

269
	local_irq_save(flags);
270

271
	iommu_batch_start(dev, prot, entry);
272

273
	for (i = 0; i < npages; i++, base_paddr += IO_PAGE_SIZE) {
274
		long err = iommu_batch_add(base_paddr);
275 276 277
		if (unlikely(err < 0L))
			goto iommu_map_fail;
	}
278
	if (unlikely(iommu_batch_end() < 0L))
279
		goto iommu_map_fail;
280

281
	local_irq_restore(flags);
282 283 284 285 286 287

	return ret;

bad:
	if (printk_ratelimit())
		WARN_ON(1);
288
	return DMA_ERROR_CODE;
289 290 291 292

iommu_map_fail:
	/* Interrupts are disabled.  */
	spin_lock(&iommu->lock);
293
	iommu_range_free(iommu, bus_addr, npages);
294 295
	spin_unlock_irqrestore(&iommu->lock, flags);

296
	return DMA_ERROR_CODE;
297 298
}

299
static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr,
F
FUJITA Tomonori 已提交
300 301
			      size_t sz, enum dma_data_direction direction,
			      struct dma_attrs *attrs)
302
{
303
	struct pci_pbm_info *pbm;
304
	struct iommu *iommu;
305
	unsigned long flags, npages;
306
	long entry;
307
	u32 devhandle;
308

309
	if (unlikely(direction == DMA_NONE)) {
310 311 312 313 314
		if (printk_ratelimit())
			WARN_ON(1);
		return;
	}

315 316
	iommu = dev->archdata.iommu;
	pbm = dev->archdata.host_controller;
317
	devhandle = pbm->devhandle;
318 319 320 321 322 323 324

	npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK);
	npages >>= IO_PAGE_SHIFT;
	bus_addr &= IO_PAGE_MASK;

	spin_lock_irqsave(&iommu->lock, flags);

325
	iommu_range_free(iommu, bus_addr, npages);
326

327
	entry = (bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT;
328 329 330 331 332 333 334 335 336 337 338 339
	do {
		unsigned long num;

		num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry),
					    npages);
		entry += num;
		npages -= num;
	} while (npages != 0);

	spin_unlock_irqrestore(&iommu->lock, flags);
}

340
static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
F
FUJITA Tomonori 已提交
341 342
			 int nelems, enum dma_data_direction direction,
			 struct dma_attrs *attrs)
343
{
344 345 346 347
	struct scatterlist *s, *outs, *segstart;
	unsigned long flags, handle, prot;
	dma_addr_t dma_next = 0, dma_addr;
	unsigned int max_seg_size;
348
	unsigned long seg_boundary_size;
349
	int outcount, incount, i;
350
	struct iommu *iommu;
351
	unsigned long base_shift;
352 353 354
	long err;

	BUG_ON(direction == DMA_NONE);
355

356
	iommu = dev->archdata.iommu;
357 358
	if (nelems == 0 || !iommu)
		return 0;
359
	
360 361 362
	prot = HV_PCI_MAP_ATTR_READ;
	if (direction != DMA_TO_DEVICE)
		prot |= HV_PCI_MAP_ATTR_WRITE;
363

364 365 366 367
	outs = s = segstart = &sglist[0];
	outcount = 1;
	incount = nelems;
	handle = 0;
368

369 370
	/* Init first segment length for backout at failure */
	outs->dma_length = 0;
371

372
	spin_lock_irqsave(&iommu->lock, flags);
373

374
	iommu_batch_start(dev, prot, ~0UL);
375

376
	max_seg_size = dma_get_max_seg_size(dev);
377 378 379
	seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
				  IO_PAGE_SIZE) >> IO_PAGE_SHIFT;
	base_shift = iommu->page_table_map_base >> IO_PAGE_SHIFT;
380
	for_each_sg(sglist, s, nelems, i) {
381
		unsigned long paddr, npages, entry, out_entry = 0, slen;
382

383 384 385 386 387 388 389 390
		slen = s->length;
		/* Sanity check */
		if (slen == 0) {
			dma_next = 0;
			continue;
		}
		/* Allocate iommu entries for that segment */
		paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s);
391
		npages = iommu_num_pages(paddr, slen, IO_PAGE_SIZE);
392
		entry = iommu_range_alloc(dev, iommu, npages, &handle);
393

394 395 396 397 398 399 400
		/* Handle failure */
		if (unlikely(entry == DMA_ERROR_CODE)) {
			if (printk_ratelimit())
				printk(KERN_INFO "iommu_alloc failed, iommu %p paddr %lx"
				       " npages %lx\n", iommu, paddr, npages);
			goto iommu_map_failed;
		}
401

402
		iommu_batch_new_entry(entry);
403

404 405 406 407
		/* Convert entry to a dma_addr_t */
		dma_addr = iommu->page_table_map_base +
			(entry << IO_PAGE_SHIFT);
		dma_addr |= (s->offset & ~IO_PAGE_MASK);
408

409
		/* Insert into HW table */
410
		paddr &= IO_PAGE_MASK;
411
		while (npages--) {
412
			err = iommu_batch_add(paddr);
413
			if (unlikely(err < 0L))
414
				goto iommu_map_failed;
415 416 417 418 419 420 421 422 423
			paddr += IO_PAGE_SIZE;
		}

		/* If we are in an open segment, try merging */
		if (segstart != s) {
			/* We cannot merge if:
			 * - allocated dma_addr isn't contiguous to previous allocation
			 */
			if ((dma_addr != dma_next) ||
424 425 426
			    (outs->dma_length + s->length > max_seg_size) ||
			    (is_span_boundary(out_entry, base_shift,
					      seg_boundary_size, outs, s))) {
427 428 429 430 431 432
				/* Can't merge: create a new segment */
				segstart = s;
				outcount++;
				outs = sg_next(outs);
			} else {
				outs->dma_length += s->length;
433
			}
434
		}
435

436 437 438 439
		if (segstart == s) {
			/* This is a new segment, fill entries */
			outs->dma_address = dma_addr;
			outs->dma_length = slen;
440
			out_entry = entry;
441
		}
442 443 444

		/* Calculate next page pointer for contiguous check */
		dma_next = dma_addr + slen;
445 446 447 448
	}

	err = iommu_batch_end();

449 450
	if (unlikely(err < 0L))
		goto iommu_map_failed;
451

452
	spin_unlock_irqrestore(&iommu->lock, flags);
453

454 455 456 457 458 459 460
	if (outcount < incount) {
		outs = sg_next(outs);
		outs->dma_address = DMA_ERROR_CODE;
		outs->dma_length = 0;
	}

	return outcount;
461 462

iommu_map_failed:
463 464 465 466 467
	for_each_sg(sglist, s, nelems, i) {
		if (s->dma_length != 0) {
			unsigned long vaddr, npages;

			vaddr = s->dma_address & IO_PAGE_MASK;
468 469
			npages = iommu_num_pages(s->dma_address, s->dma_length,
						 IO_PAGE_SIZE);
470 471 472 473 474 475 476 477
			iommu_range_free(iommu, vaddr, npages);
			/* XXX demap? XXX */
			s->dma_address = DMA_ERROR_CODE;
			s->dma_length = 0;
		}
		if (s == outs)
			break;
	}
478 479 480
	spin_unlock_irqrestore(&iommu->lock, flags);

	return 0;
481 482
}

483
static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist,
F
FUJITA Tomonori 已提交
484 485
			    int nelems, enum dma_data_direction direction,
			    struct dma_attrs *attrs)
486
{
487
	struct pci_pbm_info *pbm;
488
	struct scatterlist *sg;
489
	struct iommu *iommu;
490 491
	unsigned long flags;
	u32 devhandle;
492

493
	BUG_ON(direction == DMA_NONE);
494

495 496
	iommu = dev->archdata.iommu;
	pbm = dev->archdata.host_controller;
497
	devhandle = pbm->devhandle;
498 499 500
	
	spin_lock_irqsave(&iommu->lock, flags);

501 502 503 504 505 506 507 508
	sg = sglist;
	while (nelems--) {
		dma_addr_t dma_handle = sg->dma_address;
		unsigned int len = sg->dma_length;
		unsigned long npages, entry;

		if (!len)
			break;
509
		npages = iommu_num_pages(dma_handle, len, IO_PAGE_SIZE);
510 511 512 513 514 515 516 517 518 519 520
		iommu_range_free(iommu, dma_handle, npages);

		entry = ((dma_handle - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
		while (npages) {
			unsigned long num;

			num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry),
						    npages);
			entry += num;
			npages -= num;
		}
521

522 523
		sg = sg_next(sg);
	}
524 525

	spin_unlock_irqrestore(&iommu->lock, flags);
526 527
}

528
static struct dma_map_ops sun4v_dma_ops = {
529 530
	.alloc_coherent			= dma_4v_alloc_coherent,
	.free_coherent			= dma_4v_free_coherent,
531 532
	.map_page			= dma_4v_map_page,
	.unmap_page			= dma_4v_unmap_page,
533 534
	.map_sg				= dma_4v_map_sg,
	.unmap_sg			= dma_4v_unmap_sg,
535 536
};

537 538
static void __devinit pci_sun4v_scan_bus(struct pci_pbm_info *pbm,
					 struct device *parent)
539
{
540 541 542
	struct property *prop;
	struct device_node *dp;

543
	dp = pbm->op->dev.of_node;
544 545
	prop = of_find_property(dp, "66mhz-capable", NULL);
	pbm->is_66mhz_capable = (prop != NULL);
546
	pbm->pci_bus = pci_scan_one_pbm(pbm, parent);
547 548

	/* XXX register error interrupt handlers XXX */
549 550
}

551 552
static unsigned long __devinit probe_existing_entries(struct pci_pbm_info *pbm,
						      struct iommu *iommu)
553
{
554
	struct iommu_arena *arena = &iommu->arena;
555
	unsigned long i, cnt = 0;
556
	u32 devhandle;
557 558 559 560 561 562 563 564

	devhandle = pbm->devhandle;
	for (i = 0; i < arena->limit; i++) {
		unsigned long ret, io_attrs, ra;

		ret = pci_sun4v_iommu_getmap(devhandle,
					     HV_PCI_TSBID(0, i),
					     &io_attrs, &ra);
565
		if (ret == HV_EOK) {
566 567 568 569 570 571 572
			if (page_in_phys_avail(ra)) {
				pci_sun4v_iommu_demap(devhandle,
						      HV_PCI_TSBID(0, i), 1);
			} else {
				cnt++;
				__set_bit(i, arena->map);
			}
573
		}
574
	}
575 576

	return cnt;
577 578
}

579
static int __devinit pci_sun4v_iommu_init(struct pci_pbm_info *pbm)
580
{
581
	static const u32 vdma_default[] = { 0x80000000, 0x80000000 };
582
	struct iommu *iommu = pbm->iommu;
583
	unsigned long num_tsb_entries, sz, tsbsize;
584 585 586
	u32 dma_mask, dma_offset;
	const u32 *vdma;

587
	vdma = of_get_property(pbm->op->dev.of_node, "virtual-dma", NULL);
588 589
	if (!vdma)
		vdma = vdma_default;
590

591
	if ((vdma[0] | vdma[1]) & ~IO_PAGE_MASK) {
592 593 594
		printk(KERN_ERR PFX "Strange virtual-dma[%08x:%08x].\n",
		       vdma[0], vdma[1]);
		return -EINVAL;
595 596
	};

597 598 599
	dma_mask = (roundup_pow_of_two(vdma[1]) - 1UL);
	num_tsb_entries = vdma[1] / IO_PAGE_SIZE;
	tsbsize = num_tsb_entries * sizeof(iopte_t);
600 601 602 603 604 605 606 607 608 609

	dma_offset = vdma[0];

	/* Setup initial software IOMMU state. */
	spin_lock_init(&iommu->lock);
	iommu->ctx_lowest_free = 1;
	iommu->page_table_map_base = dma_offset;
	iommu->dma_addr_mask = dma_mask;

	/* Allocate and initialize the free area map.  */
610
	sz = (num_tsb_entries + 7) / 8;
611
	sz = (sz + 7UL) & ~7UL;
612
	iommu->arena.map = kzalloc(sz, GFP_KERNEL);
613
	if (!iommu->arena.map) {
614 615
		printk(KERN_ERR PFX "Error, kmalloc(arena.map) failed.\n");
		return -ENOMEM;
616 617 618
	}
	iommu->arena.limit = num_tsb_entries;

619
	sz = probe_existing_entries(pbm, iommu);
620 621 622
	if (sz)
		printk("%s: Imported %lu TSB entries from OBP\n",
		       pbm->name, sz);
623 624

	return 0;
625 626
}

627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653
#ifdef CONFIG_PCI_MSI
struct pci_sun4v_msiq_entry {
	u64		version_type;
#define MSIQ_VERSION_MASK		0xffffffff00000000UL
#define MSIQ_VERSION_SHIFT		32
#define MSIQ_TYPE_MASK			0x00000000000000ffUL
#define MSIQ_TYPE_SHIFT			0
#define MSIQ_TYPE_NONE			0x00
#define MSIQ_TYPE_MSG			0x01
#define MSIQ_TYPE_MSI32			0x02
#define MSIQ_TYPE_MSI64			0x03
#define MSIQ_TYPE_INTX			0x08
#define MSIQ_TYPE_NONE2			0xff

	u64		intx_sysino;
	u64		reserved1;
	u64		stick;
	u64		req_id;  /* bus/device/func */
#define MSIQ_REQID_BUS_MASK		0xff00UL
#define MSIQ_REQID_BUS_SHIFT		8
#define MSIQ_REQID_DEVICE_MASK		0x00f8UL
#define MSIQ_REQID_DEVICE_SHIFT		3
#define MSIQ_REQID_FUNC_MASK		0x0007UL
#define MSIQ_REQID_FUNC_SHIFT		0

	u64		msi_address;

S
Simon Arlott 已提交
654
	/* The format of this value is message type dependent.
655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671
	 * For MSI bits 15:0 are the data from the MSI packet.
	 * For MSI-X bits 31:0 are the data from the MSI packet.
	 * For MSG, the message code and message routing code where:
	 * 	bits 39:32 is the bus/device/fn of the msg target-id
	 *	bits 18:16 is the message routing code
	 *	bits 7:0 is the message code
	 * For INTx the low order 2-bits are:
	 *	00 - INTA
	 *	01 - INTB
	 *	10 - INTC
	 *	11 - INTD
	 */
	u64		msi_data;

	u64		reserved2;
};

672 673
static int pci_sun4v_get_head(struct pci_pbm_info *pbm, unsigned long msiqid,
			      unsigned long *head)
674
{
675
	unsigned long err, limit;
676

677
	err = pci_sun4v_msiq_gethead(pbm->devhandle, msiqid, head);
678
	if (unlikely(err))
679
		return -ENXIO;
680

681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702
	limit = pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry);
	if (unlikely(*head >= limit))
		return -EFBIG;

	return 0;
}

static int pci_sun4v_dequeue_msi(struct pci_pbm_info *pbm,
				 unsigned long msiqid, unsigned long *head,
				 unsigned long *msi)
{
	struct pci_sun4v_msiq_entry *ep;
	unsigned long err, type;

	/* Note: void pointer arithmetic, 'head' is a byte offset  */
	ep = (pbm->msi_queues + ((msiqid - pbm->msiq_first) *
				 (pbm->msiq_ent_count *
				  sizeof(struct pci_sun4v_msiq_entry))) +
	      *head);

	if ((ep->version_type & MSIQ_TYPE_MASK) == 0)
		return 0;
703

704 705 706 707
	type = (ep->version_type & MSIQ_TYPE_MASK) >> MSIQ_TYPE_SHIFT;
	if (unlikely(type != MSIQ_TYPE_MSI32 &&
		     type != MSIQ_TYPE_MSI64))
		return -EINVAL;
708

709 710 711 712 713 714 715
	*msi = ep->msi_data;

	err = pci_sun4v_msi_setstate(pbm->devhandle,
				     ep->msi_data /* msi_num */,
				     HV_MSISTATE_IDLE);
	if (unlikely(err))
		return -ENXIO;
716

717 718
	/* Clear the entry.  */
	ep->version_type &= ~MSIQ_TYPE_MASK;
719

720 721 722 723
	(*head) += sizeof(struct pci_sun4v_msiq_entry);
	if (*head >=
	    (pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry)))
		*head = 0;
724

725
	return 1;
726 727
}

728 729
static int pci_sun4v_set_head(struct pci_pbm_info *pbm, unsigned long msiqid,
			      unsigned long head)
730
{
731
	unsigned long err;
732

733 734 735
	err = pci_sun4v_msiq_sethead(pbm->devhandle, msiqid, head);
	if (unlikely(err))
		return -EINVAL;
736

737 738
	return 0;
}
739

740 741 742 743 744 745 746 747 748 749 750
static int pci_sun4v_msi_setup(struct pci_pbm_info *pbm, unsigned long msiqid,
			       unsigned long msi, int is_msi64)
{
	if (pci_sun4v_msi_setmsiq(pbm->devhandle, msi, msiqid,
				  (is_msi64 ?
				   HV_MSITYPE_MSI64 : HV_MSITYPE_MSI32)))
		return -ENXIO;
	if (pci_sun4v_msi_setstate(pbm->devhandle, msi, HV_MSISTATE_IDLE))
		return -ENXIO;
	if (pci_sun4v_msi_setvalid(pbm->devhandle, msi, HV_MSIVALID_VALID))
		return -ENXIO;
751 752 753
	return 0;
}

754
static int pci_sun4v_msi_teardown(struct pci_pbm_info *pbm, unsigned long msi)
755
{
756 757 758 759 760 761 762 763 764
	unsigned long err, msiqid;

	err = pci_sun4v_msi_getmsiq(pbm->devhandle, msi, &msiqid);
	if (err)
		return -ENXIO;

	pci_sun4v_msi_setvalid(pbm->devhandle, msi, HV_MSIVALID_INVALID);

	return 0;
765 766
}

767
static int pci_sun4v_msiq_alloc(struct pci_pbm_info *pbm)
768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820
{
	unsigned long q_size, alloc_size, pages, order;
	int i;

	q_size = pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry);
	alloc_size = (pbm->msiq_num * q_size);
	order = get_order(alloc_size);
	pages = __get_free_pages(GFP_KERNEL | __GFP_COMP, order);
	if (pages == 0UL) {
		printk(KERN_ERR "MSI: Cannot allocate MSI queues (o=%lu).\n",
		       order);
		return -ENOMEM;
	}
	memset((char *)pages, 0, PAGE_SIZE << order);
	pbm->msi_queues = (void *) pages;

	for (i = 0; i < pbm->msiq_num; i++) {
		unsigned long err, base = __pa(pages + (i * q_size));
		unsigned long ret1, ret2;

		err = pci_sun4v_msiq_conf(pbm->devhandle,
					  pbm->msiq_first + i,
					  base, pbm->msiq_ent_count);
		if (err) {
			printk(KERN_ERR "MSI: msiq register fails (err=%lu)\n",
			       err);
			goto h_error;
		}

		err = pci_sun4v_msiq_info(pbm->devhandle,
					  pbm->msiq_first + i,
					  &ret1, &ret2);
		if (err) {
			printk(KERN_ERR "MSI: Cannot read msiq (err=%lu)\n",
			       err);
			goto h_error;
		}
		if (ret1 != base || ret2 != pbm->msiq_ent_count) {
			printk(KERN_ERR "MSI: Bogus qconf "
			       "expected[%lx:%x] got[%lx:%lx]\n",
			       base, pbm->msiq_ent_count,
			       ret1, ret2);
			goto h_error;
		}
	}

	return 0;

h_error:
	free_pages(pages, order);
	return -EINVAL;
}

821
static void pci_sun4v_msiq_free(struct pci_pbm_info *pbm)
822
{
823
	unsigned long q_size, alloc_size, pages, order;
824 825
	int i;

826 827
	for (i = 0; i < pbm->msiq_num; i++) {
		unsigned long msiqid = pbm->msiq_first + i;
828

829
		(void) pci_sun4v_msiq_conf(pbm->devhandle, msiqid, 0UL, 0);
830
	}
831

832 833 834
	q_size = pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry);
	alloc_size = (pbm->msiq_num * q_size);
	order = get_order(alloc_size);
835

836
	pages = (unsigned long) pbm->msi_queues;
837

838
	free_pages(pages, order);
839

840
	pbm->msi_queues = NULL;
841 842
}

843 844 845
static int pci_sun4v_msiq_build_irq(struct pci_pbm_info *pbm,
				    unsigned long msiqid,
				    unsigned long devino)
846
{
847
	unsigned int virt_irq = sun4v_build_irq(pbm->devhandle, devino);
848

849 850
	if (!virt_irq)
		return -ENOMEM;
851

852 853 854 855
	if (pci_sun4v_msiq_setstate(pbm->devhandle, msiqid, HV_MSIQSTATE_IDLE))
		return -EINVAL;
	if (pci_sun4v_msiq_setvalid(pbm->devhandle, msiqid, HV_MSIQ_VALID))
		return -EINVAL;
856

857
	return virt_irq;
858
}
859

860 861 862 863 864 865 866 867 868 869 870
static const struct sparc64_msiq_ops pci_sun4v_msiq_ops = {
	.get_head	=	pci_sun4v_get_head,
	.dequeue_msi	=	pci_sun4v_dequeue_msi,
	.set_head	=	pci_sun4v_set_head,
	.msi_setup	=	pci_sun4v_msi_setup,
	.msi_teardown	=	pci_sun4v_msi_teardown,
	.msiq_alloc	=	pci_sun4v_msiq_alloc,
	.msiq_free	=	pci_sun4v_msiq_free,
	.msiq_build_irq	=	pci_sun4v_msiq_build_irq,
};

871 872
static void pci_sun4v_msi_init(struct pci_pbm_info *pbm)
{
873
	sparc64_pbm_msi_init(pbm, &pci_sun4v_msiq_ops);
874
}
875 876 877 878 879 880
#else /* CONFIG_PCI_MSI */
static void pci_sun4v_msi_init(struct pci_pbm_info *pbm)
{
}
#endif /* !(CONFIG_PCI_MSI) */

881
static int __devinit pci_sun4v_pbm_init(struct pci_pbm_info *pbm,
882
					struct platform_device *op, u32 devhandle)
883
{
884
	struct device_node *dp = op->dev.of_node;
885
	int err;
886

887 888
	pbm->numa_node = of_node_to_nid(dp);

889 890
	pbm->pci_ops = &sun4v_pci_ops;
	pbm->config_space_reg_bits = 12;
891

892 893
	pbm->index = pci_num_pbms++;

894
	pbm->op = op;
895

D
David S. Miller 已提交
896
	pbm->devhandle = devhandle;
897

898
	pbm->name = dp->full_name;
899

900
	printk("%s: SUN4V PCI Bus Module\n", pbm->name);
901
	printk("%s: On NUMA node %d\n", pbm->name, pbm->numa_node);
902

903
	pci_determine_mem_io_space(pbm);
904

905
	pci_get_pbm_props(pbm);
906 907 908 909 910

	err = pci_sun4v_iommu_init(pbm);
	if (err)
		return err;

911
	pci_sun4v_msi_init(pbm);
912

913
	pci_sun4v_scan_bus(pbm, &op->dev);
914

915 916 917
	pbm->next = pci_pbm_root;
	pci_pbm_root = pbm;

918
	return 0;
919 920
}

921
static int __devinit pci_sun4v_probe(struct platform_device *op,
922
				     const struct of_device_id *match)
923
{
924
	const struct linux_prom64_registers *regs;
925
	static int hvapi_negotiated = 0;
926
	struct pci_pbm_info *pbm;
927
	struct device_node *dp;
928
	struct iommu *iommu;
929
	u32 devhandle;
930
	int i, err;
D
David S. Miller 已提交
931

932
	dp = op->dev.of_node;
933

934
	if (!hvapi_negotiated++) {
935 936 937
		err = sun4v_hvapi_register(HV_GRP_PCI,
					   vpci_major,
					   &vpci_minor);
938 939

		if (err) {
940 941 942
			printk(KERN_ERR PFX "Could not register hvapi, "
			       "err=%d\n", err);
			return err;
943
		}
944
		printk(KERN_INFO PFX "Registered hvapi major[%lu] minor[%lu]\n",
945
		       vpci_major, vpci_minor);
946 947

		dma_ops = &sun4v_dma_ops;
948 949
	}

950
	regs = of_get_property(dp, "reg", NULL);
951
	err = -ENODEV;
952 953
	if (!regs) {
		printk(KERN_ERR PFX "Could not find config registers\n");
954
		goto out_err;
955
	}
956
	devhandle = (regs->phys_addr >> 32UL) & 0x0fffffff;
D
David S. Miller 已提交
957

958
	err = -ENOMEM;
959 960 961
	if (!iommu_batch_initialized) {
		for_each_possible_cpu(i) {
			unsigned long page = get_zeroed_page(GFP_KERNEL);
962

963 964
			if (!page)
				goto out_err;
965

966 967 968
			per_cpu(iommu_batch, i).pglist = (u64 *) page;
		}
		iommu_batch_initialized = 1;
969
	}
970

971 972 973
	pbm = kzalloc(sizeof(*pbm), GFP_KERNEL);
	if (!pbm) {
		printk(KERN_ERR PFX "Could not allocate pci_pbm_info\n");
974
		goto out_err;
975
	}
976

977
	iommu = kzalloc(sizeof(struct iommu), GFP_KERNEL);
978
	if (!iommu) {
979
		printk(KERN_ERR PFX "Could not allocate pbm iommu\n");
980
		goto out_free_controller;
981
	}
982

983
	pbm->iommu = iommu;
984

985 986 987
	err = pci_sun4v_pbm_init(pbm, op, devhandle);
	if (err)
		goto out_free_iommu;
988

989
	dev_set_drvdata(&op->dev, pbm);
990

991
	return 0;
992

993 994
out_free_iommu:
	kfree(pbm->iommu);
995 996

out_free_controller:
997
	kfree(pbm);
998 999 1000

out_err:
	return err;
1001
}
1002

1003
static struct of_device_id __initdata pci_sun4v_match[] = {
1004 1005 1006 1007 1008 1009 1010 1011
	{
		.name = "pci",
		.compatible = "SUNW,sun4v-pci",
	},
	{},
};

static struct of_platform_driver pci_sun4v_driver = {
1012 1013 1014 1015 1016
	.driver = {
		.name = DRIVER_NAME,
		.owner = THIS_MODULE,
		.of_match_table = pci_sun4v_match,
	},
1017 1018 1019 1020 1021
	.probe		= pci_sun4v_probe,
};

static int __init pci_sun4v_init(void)
{
1022
	return of_register_platform_driver(&pci_sun4v_driver);
1023 1024 1025
}

subsys_initcall(pci_sun4v_init);