pci_dma.c 14.0 KB
Newer Older
J
Jan Glauber 已提交
1 2 3 4 5 6 7 8 9 10 11 12
/*
 * Copyright IBM Corp. 2012
 *
 * Author(s):
 *   Jan Glauber <jang@linux.vnet.ibm.com>
 */

#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/export.h>
#include <linux/iommu-helper.h>
#include <linux/dma-mapping.h>
13
#include <linux/vmalloc.h>
J
Jan Glauber 已提交
14 15 16 17 18
#include <linux/pci.h>
#include <asm/pci_dma.h>

static struct kmem_cache *dma_region_table_cache;
static struct kmem_cache *dma_page_table_cache;
19 20 21 22 23 24 25
static int s390_iommu_strict;

static int zpci_refresh_global(struct zpci_dev *zdev)
{
	return zpci_refresh_trans((u64) zdev->fh << 32, zdev->start_dma,
				  zdev->iommu_pages * PAGE_SIZE);
}
J
Jan Glauber 已提交
26

27
unsigned long *dma_alloc_cpu_table(void)
J
Jan Glauber 已提交
28 29 30 31 32 33 34 35
{
	unsigned long *table, *entry;

	table = kmem_cache_alloc(dma_region_table_cache, GFP_ATOMIC);
	if (!table)
		return NULL;

	for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++)
36
		*entry = ZPCI_TABLE_INVALID;
J
Jan Glauber 已提交
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
	return table;
}

static void dma_free_cpu_table(void *table)
{
	kmem_cache_free(dma_region_table_cache, table);
}

static unsigned long *dma_alloc_page_table(void)
{
	unsigned long *table, *entry;

	table = kmem_cache_alloc(dma_page_table_cache, GFP_ATOMIC);
	if (!table)
		return NULL;

	for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++)
54
		*entry = ZPCI_PTE_INVALID;
J
Jan Glauber 已提交
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
	return table;
}

static void dma_free_page_table(void *table)
{
	kmem_cache_free(dma_page_table_cache, table);
}

static unsigned long *dma_get_seg_table_origin(unsigned long *entry)
{
	unsigned long *sto;

	if (reg_entry_isvalid(*entry))
		sto = get_rt_sto(*entry);
	else {
		sto = dma_alloc_cpu_table();
		if (!sto)
			return NULL;

		set_rt_sto(entry, sto);
		validate_rt_entry(entry);
		entry_clr_protected(entry);
	}
	return sto;
}

static unsigned long *dma_get_page_table_origin(unsigned long *entry)
{
	unsigned long *pto;

	if (reg_entry_isvalid(*entry))
		pto = get_st_pto(*entry);
	else {
		pto = dma_alloc_page_table();
		if (!pto)
			return NULL;
		set_st_pto(entry, pto);
		validate_st_entry(entry);
		entry_clr_protected(entry);
	}
	return pto;
}

98
unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr)
J
Jan Glauber 已提交
99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
{
	unsigned long *sto, *pto;
	unsigned int rtx, sx, px;

	rtx = calc_rtx(dma_addr);
	sto = dma_get_seg_table_origin(&rto[rtx]);
	if (!sto)
		return NULL;

	sx = calc_sx(dma_addr);
	pto = dma_get_page_table_origin(&sto[sx]);
	if (!pto)
		return NULL;

	px = calc_px(dma_addr);
	return &pto[px];
}

117
void dma_update_cpu_trans(unsigned long *entry, void *page_addr, int flags)
J
Jan Glauber 已提交
118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138
{
	if (flags & ZPCI_PTE_INVALID) {
		invalidate_pt_entry(entry);
	} else {
		set_pt_pfaa(entry, page_addr);
		validate_pt_entry(entry);
	}

	if (flags & ZPCI_TABLE_PROTECTED)
		entry_set_protected(entry);
	else
		entry_clr_protected(entry);
}

static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
			    dma_addr_t dma_addr, size_t size, int flags)
{
	unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
	u8 *page_addr = (u8 *) (pa & PAGE_MASK);
	dma_addr_t start_dma_addr = dma_addr;
	unsigned long irq_flags;
139
	unsigned long *entry;
J
Jan Glauber 已提交
140 141 142 143 144 145
	int i, rc = 0;

	if (!nr_pages)
		return -EINVAL;

	spin_lock_irqsave(&zdev->dma_table_lock, irq_flags);
146 147
	if (!zdev->dma_table) {
		rc = -EINVAL;
J
Jan Glauber 已提交
148
		goto no_refresh;
149
	}
J
Jan Glauber 已提交
150 151

	for (i = 0; i < nr_pages; i++) {
152 153 154 155 156 157
		entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
		if (!entry) {
			rc = -ENOMEM;
			goto undo_cpu_trans;
		}
		dma_update_cpu_trans(entry, page_addr, flags);
J
Jan Glauber 已提交
158 159 160 161 162
		page_addr += PAGE_SIZE;
		dma_addr += PAGE_SIZE;
	}

	/*
163 164 165 166 167
	 * With zdev->tlb_refresh == 0, rpcit is not required to establish new
	 * translations when previously invalid translation-table entries are
	 * validated. With lazy unmap, it also is skipped for previously valid
	 * entries, but a global rpcit is then required before any address can
	 * be re-used, i.e. after each iommu bitmap wrap-around.
J
Jan Glauber 已提交
168 169
	 */
	if (!zdev->tlb_refresh &&
170 171
			(!s390_iommu_strict ||
			((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)))
J
Jan Glauber 已提交
172
		goto no_refresh;
173

174 175
	rc = zpci_refresh_trans((u64) zdev->fh << 32, start_dma_addr,
				nr_pages * PAGE_SIZE);
176 177 178 179 180 181 182 183 184 185 186 187
undo_cpu_trans:
	if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) {
		flags = ZPCI_PTE_INVALID;
		while (i-- > 0) {
			page_addr -= PAGE_SIZE;
			dma_addr -= PAGE_SIZE;
			entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
			if (!entry)
				break;
			dma_update_cpu_trans(entry, page_addr, flags);
		}
	}
J
Jan Glauber 已提交
188 189 190 191 192 193

no_refresh:
	spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags);
	return rc;
}

194
void dma_free_seg_table(unsigned long entry)
J
Jan Glauber 已提交
195 196 197 198 199 200 201 202 203 204 205
{
	unsigned long *sto = get_rt_sto(entry);
	int sx;

	for (sx = 0; sx < ZPCI_TABLE_ENTRIES; sx++)
		if (reg_entry_isvalid(sto[sx]))
			dma_free_page_table(get_st_pto(sto[sx]));

	dma_free_cpu_table(sto);
}

206
void dma_cleanup_tables(unsigned long *table)
J
Jan Glauber 已提交
207 208 209
{
	int rtx;

210
	if (!table)
J
Jan Glauber 已提交
211 212 213 214 215 216 217 218 219
		return;

	for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++)
		if (reg_entry_isvalid(table[rtx]))
			dma_free_seg_table(table[rtx]);

	dma_free_cpu_table(table);
}

220
static unsigned long __dma_alloc_iommu(struct device *dev,
221
				       unsigned long start, int size)
J
Jan Glauber 已提交
222
{
223
	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
224
	unsigned long boundary_size;
J
Jan Glauber 已提交
225

226
	boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
227
			      PAGE_SIZE) >> PAGE_SHIFT;
J
Jan Glauber 已提交
228
	return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages,
229 230
				start, size, zdev->start_dma >> PAGE_SHIFT,
				boundary_size, 0);
J
Jan Glauber 已提交
231 232
}

233
static unsigned long dma_alloc_iommu(struct device *dev, int size)
J
Jan Glauber 已提交
234
{
235
	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
J
Jan Glauber 已提交
236
	unsigned long offset, flags;
237
	int wrap = 0;
J
Jan Glauber 已提交
238 239

	spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
240
	offset = __dma_alloc_iommu(dev, zdev->next_bit, size);
241 242
	if (offset == -1) {
		/* wrap-around */
243
		offset = __dma_alloc_iommu(dev, 0, size);
244 245
		wrap = 1;
	}
J
Jan Glauber 已提交
246 247 248

	if (offset != -1) {
		zdev->next_bit = offset + size;
249 250 251
		if (!zdev->tlb_refresh && !s390_iommu_strict && wrap)
			/* global flush after wrap-around with lazy unmap */
			zpci_refresh_global(zdev);
J
Jan Glauber 已提交
252 253 254 255 256
	}
	spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
	return offset;
}

257
static void dma_free_iommu(struct device *dev, unsigned long offset, int size)
J
Jan Glauber 已提交
258
{
259
	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
J
Jan Glauber 已提交
260 261 262 263 264 265
	unsigned long flags;

	spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
	if (!zdev->iommu_bitmap)
		goto out;
	bitmap_clear(zdev->iommu_bitmap, offset, size);
266 267 268 269 270
	/*
	 * Lazy flush for unmap: need to move next_bit to avoid address re-use
	 * until wrap-around.
	 */
	if (!s390_iommu_strict && offset >= zdev->next_bit)
J
Jan Glauber 已提交
271 272 273 274 275
		zdev->next_bit = offset + size;
out:
	spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
}

276 277 278 279 280 281 282 283 284 285
static inline void zpci_err_dma(unsigned long rc, unsigned long addr)
{
	struct {
		unsigned long rc;
		unsigned long addr;
	} __packed data = {rc, addr};

	zpci_err_hex(&data, sizeof(data));
}

J
Jan Glauber 已提交
286 287 288 289 290
static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
				     unsigned long offset, size_t size,
				     enum dma_data_direction direction,
				     struct dma_attrs *attrs)
{
S
Sebastian Ott 已提交
291
	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
J
Jan Glauber 已提交
292 293 294 295
	unsigned long nr_pages, iommu_page_index;
	unsigned long pa = page_to_phys(page) + offset;
	int flags = ZPCI_PTE_VALID;
	dma_addr_t dma_addr;
296
	int ret;
J
Jan Glauber 已提交
297 298 299

	/* This rounds up number of pages based on size and offset */
	nr_pages = iommu_num_pages(pa, size, PAGE_SIZE);
300
	iommu_page_index = dma_alloc_iommu(dev, nr_pages);
301 302
	if (iommu_page_index == -1) {
		ret = -ENOSPC;
J
Jan Glauber 已提交
303
		goto out_err;
304
	}
J
Jan Glauber 已提交
305 306 307 308 309

	/* Use rounded up size */
	size = nr_pages * PAGE_SIZE;

	dma_addr = zdev->start_dma + iommu_page_index * PAGE_SIZE;
310 311
	if (dma_addr + size > zdev->end_dma) {
		ret = -ERANGE;
J
Jan Glauber 已提交
312
		goto out_free;
313
	}
J
Jan Glauber 已提交
314 315 316 317

	if (direction == DMA_NONE || direction == DMA_TO_DEVICE)
		flags |= ZPCI_TABLE_PROTECTED;

318 319 320 321 322 323
	ret = dma_update_trans(zdev, pa, dma_addr, size, flags);
	if (ret)
		goto out_free;

	atomic64_add(nr_pages, &zdev->mapped_pages);
	return dma_addr + (offset & ~PAGE_MASK);
J
Jan Glauber 已提交
324 325

out_free:
326
	dma_free_iommu(dev, iommu_page_index, nr_pages);
J
Jan Glauber 已提交
327
out_err:
S
Sebastian Ott 已提交
328
	zpci_err("map error:\n");
329
	zpci_err_dma(ret, pa);
J
Jan Glauber 已提交
330 331 332 333 334 335 336
	return DMA_ERROR_CODE;
}

static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr,
				 size_t size, enum dma_data_direction direction,
				 struct dma_attrs *attrs)
{
S
Sebastian Ott 已提交
337
	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
J
Jan Glauber 已提交
338
	unsigned long iommu_page_index;
339
	int npages, ret;
J
Jan Glauber 已提交
340 341 342

	npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
	dma_addr = dma_addr & PAGE_MASK;
343 344 345
	ret = dma_update_trans(zdev, 0, dma_addr, npages * PAGE_SIZE,
			       ZPCI_PTE_INVALID);
	if (ret) {
S
Sebastian Ott 已提交
346
		zpci_err("unmap error:\n");
347 348
		zpci_err_dma(ret, dma_addr);
		return;
S
Sebastian Ott 已提交
349
	}
J
Jan Glauber 已提交
350

351
	atomic64_add(npages, &zdev->unmapped_pages);
J
Jan Glauber 已提交
352
	iommu_page_index = (dma_addr - zdev->start_dma) >> PAGE_SHIFT;
353
	dma_free_iommu(dev, iommu_page_index, npages);
J
Jan Glauber 已提交
354 355 356 357 358 359
}

static void *s390_dma_alloc(struct device *dev, size_t size,
			    dma_addr_t *dma_handle, gfp_t flag,
			    struct dma_attrs *attrs)
{
S
Sebastian Ott 已提交
360
	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
J
Jan Glauber 已提交
361 362 363 364 365 366 367 368
	struct page *page;
	unsigned long pa;
	dma_addr_t map;

	size = PAGE_ALIGN(size);
	page = alloc_pages(flag, get_order(size));
	if (!page)
		return NULL;
369

J
Jan Glauber 已提交
370 371 372
	pa = page_to_phys(page);
	memset((void *) pa, 0, size);

373
	map = s390_dma_map_pages(dev, page, 0, size, DMA_BIDIRECTIONAL, NULL);
J
Jan Glauber 已提交
374 375 376 377 378
	if (dma_mapping_error(dev, map)) {
		free_pages(pa, get_order(size));
		return NULL;
	}

379
	atomic64_add(size / PAGE_SIZE, &zdev->allocated_pages);
J
Jan Glauber 已提交
380 381 382 383 384 385 386 387 388
	if (dma_handle)
		*dma_handle = map;
	return (void *) pa;
}

static void s390_dma_free(struct device *dev, size_t size,
			  void *pa, dma_addr_t dma_handle,
			  struct dma_attrs *attrs)
{
S
Sebastian Ott 已提交
389
	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
390 391

	size = PAGE_ALIGN(size);
392
	atomic64_sub(size / PAGE_SIZE, &zdev->allocated_pages);
393
	s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
J
Jan Glauber 已提交
394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447
	free_pages((unsigned long) pa, get_order(size));
}

static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
			   int nr_elements, enum dma_data_direction dir,
			   struct dma_attrs *attrs)
{
	int mapped_elements = 0;
	struct scatterlist *s;
	int i;

	for_each_sg(sg, s, nr_elements, i) {
		struct page *page = sg_page(s);
		s->dma_address = s390_dma_map_pages(dev, page, s->offset,
						    s->length, dir, NULL);
		if (!dma_mapping_error(dev, s->dma_address)) {
			s->dma_length = s->length;
			mapped_elements++;
		} else
			goto unmap;
	}
out:
	return mapped_elements;

unmap:
	for_each_sg(sg, s, mapped_elements, i) {
		if (s->dma_address)
			s390_dma_unmap_pages(dev, s->dma_address, s->dma_length,
					     dir, NULL);
		s->dma_address = 0;
		s->dma_length = 0;
	}
	mapped_elements = 0;
	goto out;
}

static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
			      int nr_elements, enum dma_data_direction dir,
			      struct dma_attrs *attrs)
{
	struct scatterlist *s;
	int i;

	for_each_sg(sg, s, nr_elements, i) {
		s390_dma_unmap_pages(dev, s->dma_address, s->dma_length, dir, NULL);
		s->dma_address = 0;
		s->dma_length = 0;
	}
}

int zpci_dma_init_device(struct zpci_dev *zdev)
{
	int rc;

448 449 450 451 452 453 454
	/*
	 * At this point, if the device is part of an IOMMU domain, this would
	 * be a strong hint towards a bug in the IOMMU API (common) code and/or
	 * simultaneous access via IOMMU and DMA API. So let's issue a warning.
	 */
	WARN_ON(zdev->s390_domain);

J
Jan Glauber 已提交
455 456 457 458 459 460
	spin_lock_init(&zdev->iommu_bitmap_lock);
	spin_lock_init(&zdev->dma_table_lock);

	zdev->dma_table = dma_alloc_cpu_table();
	if (!zdev->dma_table) {
		rc = -ENOMEM;
461
		goto out;
J
Jan Glauber 已提交
462 463
	}

464 465 466 467 468 469 470 471 472
	/*
	 * Restrict the iommu bitmap size to the minimum of the following:
	 * - main memory size
	 * - 3-level pagetable address limit minus start_dma offset
	 * - DMA address range allowed by the hardware (clp query pci fn)
	 *
	 * Also set zdev->end_dma to the actual end address of the usable
	 * range, instead of the theoretical maximum as reported by hardware.
	 */
473
	zdev->start_dma = PAGE_ALIGN(zdev->start_dma);
474 475 476 477
	zdev->iommu_size = min3((u64) high_memory,
				ZPCI_TABLE_SIZE_RT - zdev->start_dma,
				zdev->end_dma - zdev->start_dma + 1);
	zdev->end_dma = zdev->start_dma + zdev->iommu_size - 1;
J
Jan Glauber 已提交
478
	zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT;
479
	zdev->iommu_bitmap = vzalloc(zdev->iommu_pages / 8);
J
Jan Glauber 已提交
480 481
	if (!zdev->iommu_bitmap) {
		rc = -ENOMEM;
482
		goto free_dma_table;
J
Jan Glauber 已提交
483 484
	}

485
	rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
J
Jan Glauber 已提交
486 487
				(u64) zdev->dma_table);
	if (rc)
488
		goto free_bitmap;
J
Jan Glauber 已提交
489

490 491 492 493 494
	return 0;
free_bitmap:
	vfree(zdev->iommu_bitmap);
	zdev->iommu_bitmap = NULL;
free_dma_table:
J
Jan Glauber 已提交
495
	dma_free_cpu_table(zdev->dma_table);
496 497
	zdev->dma_table = NULL;
out:
J
Jan Glauber 已提交
498 499 500 501 502
	return rc;
}

void zpci_dma_exit_device(struct zpci_dev *zdev)
{
503 504 505 506 507 508 509
	/*
	 * At this point, if the device is part of an IOMMU domain, this would
	 * be a strong hint towards a bug in the IOMMU API (common) code and/or
	 * simultaneous access via IOMMU and DMA API. So let's issue a warning.
	 */
	WARN_ON(zdev->s390_domain);

J
Jan Glauber 已提交
510
	zpci_unregister_ioat(zdev, 0);
511 512
	dma_cleanup_tables(zdev->dma_table);
	zdev->dma_table = NULL;
513
	vfree(zdev->iommu_bitmap);
J
Jan Glauber 已提交
514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555
	zdev->iommu_bitmap = NULL;
	zdev->next_bit = 0;
}

static int __init dma_alloc_cpu_table_caches(void)
{
	dma_region_table_cache = kmem_cache_create("PCI_DMA_region_tables",
					ZPCI_TABLE_SIZE, ZPCI_TABLE_ALIGN,
					0, NULL);
	if (!dma_region_table_cache)
		return -ENOMEM;

	dma_page_table_cache = kmem_cache_create("PCI_DMA_page_tables",
					ZPCI_PT_SIZE, ZPCI_PT_ALIGN,
					0, NULL);
	if (!dma_page_table_cache) {
		kmem_cache_destroy(dma_region_table_cache);
		return -ENOMEM;
	}
	return 0;
}

int __init zpci_dma_init(void)
{
	return dma_alloc_cpu_table_caches();
}

void zpci_dma_exit(void)
{
	kmem_cache_destroy(dma_page_table_cache);
	kmem_cache_destroy(dma_region_table_cache);
}

#define PREALLOC_DMA_DEBUG_ENTRIES	(1 << 16)

static int __init dma_debug_do_init(void)
{
	dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
	return 0;
}
fs_initcall(dma_debug_do_init);

556
struct dma_map_ops s390_pci_dma_ops = {
J
Jan Glauber 已提交
557 558 559 560 561 562 563 564 565 566
	.alloc		= s390_dma_alloc,
	.free		= s390_dma_free,
	.map_sg		= s390_dma_map_sg,
	.unmap_sg	= s390_dma_unmap_sg,
	.map_page	= s390_dma_map_pages,
	.unmap_page	= s390_dma_unmap_pages,
	/* if we support direct DMA this must be conditional */
	.is_phys	= 0,
	/* dma_supported is unconditionally true without a callback */
};
567
EXPORT_SYMBOL_GPL(s390_pci_dma_ops);
568 569 570 571 572 573 574 575 576

static int __init s390_iommu_setup(char *str)
{
	if (!strncmp(str, "strict", 6))
		s390_iommu_strict = 1;
	return 0;
}

__setup("s390_iommu=", s390_iommu_setup);