pci_dma.c 13.9 KB
Newer Older
J
Jan Glauber 已提交
1 2 3 4 5 6 7 8 9 10 11 12
/*
 * Copyright IBM Corp. 2012
 *
 * Author(s):
 *   Jan Glauber <jang@linux.vnet.ibm.com>
 */

#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/export.h>
#include <linux/iommu-helper.h>
#include <linux/dma-mapping.h>
13
#include <linux/vmalloc.h>
J
Jan Glauber 已提交
14 15 16 17 18
#include <linux/pci.h>
#include <asm/pci_dma.h>

static struct kmem_cache *dma_region_table_cache;
static struct kmem_cache *dma_page_table_cache;
19 20 21 22 23 24 25
static int s390_iommu_strict;

static int zpci_refresh_global(struct zpci_dev *zdev)
{
	return zpci_refresh_trans((u64) zdev->fh << 32, zdev->start_dma,
				  zdev->iommu_pages * PAGE_SIZE);
}
J
Jan Glauber 已提交
26

27
unsigned long *dma_alloc_cpu_table(void)
J
Jan Glauber 已提交
28 29 30 31 32 33 34 35
{
	unsigned long *table, *entry;

	table = kmem_cache_alloc(dma_region_table_cache, GFP_ATOMIC);
	if (!table)
		return NULL;

	for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++)
36
		*entry = ZPCI_TABLE_INVALID;
J
Jan Glauber 已提交
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
	return table;
}

static void dma_free_cpu_table(void *table)
{
	kmem_cache_free(dma_region_table_cache, table);
}

static unsigned long *dma_alloc_page_table(void)
{
	unsigned long *table, *entry;

	table = kmem_cache_alloc(dma_page_table_cache, GFP_ATOMIC);
	if (!table)
		return NULL;

	for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++)
54
		*entry = ZPCI_PTE_INVALID;
J
Jan Glauber 已提交
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
	return table;
}

static void dma_free_page_table(void *table)
{
	kmem_cache_free(dma_page_table_cache, table);
}

static unsigned long *dma_get_seg_table_origin(unsigned long *entry)
{
	unsigned long *sto;

	if (reg_entry_isvalid(*entry))
		sto = get_rt_sto(*entry);
	else {
		sto = dma_alloc_cpu_table();
		if (!sto)
			return NULL;

		set_rt_sto(entry, sto);
		validate_rt_entry(entry);
		entry_clr_protected(entry);
	}
	return sto;
}

static unsigned long *dma_get_page_table_origin(unsigned long *entry)
{
	unsigned long *pto;

	if (reg_entry_isvalid(*entry))
		pto = get_st_pto(*entry);
	else {
		pto = dma_alloc_page_table();
		if (!pto)
			return NULL;
		set_st_pto(entry, pto);
		validate_st_entry(entry);
		entry_clr_protected(entry);
	}
	return pto;
}

98
unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr)
J
Jan Glauber 已提交
99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
{
	unsigned long *sto, *pto;
	unsigned int rtx, sx, px;

	rtx = calc_rtx(dma_addr);
	sto = dma_get_seg_table_origin(&rto[rtx]);
	if (!sto)
		return NULL;

	sx = calc_sx(dma_addr);
	pto = dma_get_page_table_origin(&sto[sx]);
	if (!pto)
		return NULL;

	px = calc_px(dma_addr);
	return &pto[px];
}

117
void dma_update_cpu_trans(unsigned long *entry, void *page_addr, int flags)
J
Jan Glauber 已提交
118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138
{
	if (flags & ZPCI_PTE_INVALID) {
		invalidate_pt_entry(entry);
	} else {
		set_pt_pfaa(entry, page_addr);
		validate_pt_entry(entry);
	}

	if (flags & ZPCI_TABLE_PROTECTED)
		entry_set_protected(entry);
	else
		entry_clr_protected(entry);
}

static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
			    dma_addr_t dma_addr, size_t size, int flags)
{
	unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
	u8 *page_addr = (u8 *) (pa & PAGE_MASK);
	dma_addr_t start_dma_addr = dma_addr;
	unsigned long irq_flags;
139
	unsigned long *entry;
J
Jan Glauber 已提交
140 141 142 143 144 145
	int i, rc = 0;

	if (!nr_pages)
		return -EINVAL;

	spin_lock_irqsave(&zdev->dma_table_lock, irq_flags);
146 147
	if (!zdev->dma_table) {
		rc = -EINVAL;
J
Jan Glauber 已提交
148
		goto no_refresh;
149
	}
J
Jan Glauber 已提交
150 151

	for (i = 0; i < nr_pages; i++) {
152 153 154 155 156 157
		entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
		if (!entry) {
			rc = -ENOMEM;
			goto undo_cpu_trans;
		}
		dma_update_cpu_trans(entry, page_addr, flags);
J
Jan Glauber 已提交
158 159 160 161 162
		page_addr += PAGE_SIZE;
		dma_addr += PAGE_SIZE;
	}

	/*
163 164 165 166 167
	 * With zdev->tlb_refresh == 0, rpcit is not required to establish new
	 * translations when previously invalid translation-table entries are
	 * validated. With lazy unmap, it also is skipped for previously valid
	 * entries, but a global rpcit is then required before any address can
	 * be re-used, i.e. after each iommu bitmap wrap-around.
J
Jan Glauber 已提交
168 169
	 */
	if (!zdev->tlb_refresh &&
170 171
			(!s390_iommu_strict ||
			((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)))
J
Jan Glauber 已提交
172
		goto no_refresh;
173

174 175
	rc = zpci_refresh_trans((u64) zdev->fh << 32, start_dma_addr,
				nr_pages * PAGE_SIZE);
176 177 178 179 180 181 182 183 184 185 186 187
undo_cpu_trans:
	if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) {
		flags = ZPCI_PTE_INVALID;
		while (i-- > 0) {
			page_addr -= PAGE_SIZE;
			dma_addr -= PAGE_SIZE;
			entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
			if (!entry)
				break;
			dma_update_cpu_trans(entry, page_addr, flags);
		}
	}
J
Jan Glauber 已提交
188 189 190 191 192 193

no_refresh:
	spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags);
	return rc;
}

194
void dma_free_seg_table(unsigned long entry)
J
Jan Glauber 已提交
195 196 197 198 199 200 201 202 203 204 205
{
	unsigned long *sto = get_rt_sto(entry);
	int sx;

	for (sx = 0; sx < ZPCI_TABLE_ENTRIES; sx++)
		if (reg_entry_isvalid(sto[sx]))
			dma_free_page_table(get_st_pto(sto[sx]));

	dma_free_cpu_table(sto);
}

206
void dma_cleanup_tables(unsigned long *table)
J
Jan Glauber 已提交
207 208 209
{
	int rtx;

210
	if (!table)
J
Jan Glauber 已提交
211 212 213 214 215 216 217 218 219
		return;

	for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++)
		if (reg_entry_isvalid(table[rtx]))
			dma_free_seg_table(table[rtx]);

	dma_free_cpu_table(table);
}

220
static unsigned long __dma_alloc_iommu(struct device *dev,
221
				       unsigned long start, int size)
J
Jan Glauber 已提交
222
{
223
	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
224
	unsigned long boundary_size;
J
Jan Glauber 已提交
225

226
	boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
227
			      PAGE_SIZE) >> PAGE_SHIFT;
J
Jan Glauber 已提交
228
	return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages,
229 230
				start, size, zdev->start_dma >> PAGE_SHIFT,
				boundary_size, 0);
J
Jan Glauber 已提交
231 232
}

233
static unsigned long dma_alloc_iommu(struct device *dev, int size)
J
Jan Glauber 已提交
234
{
235
	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
J
Jan Glauber 已提交
236
	unsigned long offset, flags;
237
	int wrap = 0;
J
Jan Glauber 已提交
238 239

	spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
240
	offset = __dma_alloc_iommu(dev, zdev->next_bit, size);
241 242
	if (offset == -1) {
		/* wrap-around */
243
		offset = __dma_alloc_iommu(dev, 0, size);
244 245
		wrap = 1;
	}
J
Jan Glauber 已提交
246 247 248

	if (offset != -1) {
		zdev->next_bit = offset + size;
249 250 251
		if (!zdev->tlb_refresh && !s390_iommu_strict && wrap)
			/* global flush after wrap-around with lazy unmap */
			zpci_refresh_global(zdev);
J
Jan Glauber 已提交
252 253 254 255 256
	}
	spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
	return offset;
}

257
static void dma_free_iommu(struct device *dev, unsigned long offset, int size)
J
Jan Glauber 已提交
258
{
259
	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
J
Jan Glauber 已提交
260 261 262 263 264 265
	unsigned long flags;

	spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
	if (!zdev->iommu_bitmap)
		goto out;
	bitmap_clear(zdev->iommu_bitmap, offset, size);
266 267 268 269 270
	/*
	 * Lazy flush for unmap: need to move next_bit to avoid address re-use
	 * until wrap-around.
	 */
	if (!s390_iommu_strict && offset >= zdev->next_bit)
J
Jan Glauber 已提交
271 272 273 274 275
		zdev->next_bit = offset + size;
out:
	spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
}

276 277 278 279 280 281 282 283 284 285
static inline void zpci_err_dma(unsigned long rc, unsigned long addr)
{
	struct {
		unsigned long rc;
		unsigned long addr;
	} __packed data = {rc, addr};

	zpci_err_hex(&data, sizeof(data));
}

J
Jan Glauber 已提交
286 287 288
static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
				     unsigned long offset, size_t size,
				     enum dma_data_direction direction,
289
				     unsigned long attrs)
J
Jan Glauber 已提交
290
{
S
Sebastian Ott 已提交
291
	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
J
Jan Glauber 已提交
292 293 294 295
	unsigned long nr_pages, iommu_page_index;
	unsigned long pa = page_to_phys(page) + offset;
	int flags = ZPCI_PTE_VALID;
	dma_addr_t dma_addr;
296
	int ret;
J
Jan Glauber 已提交
297 298 299

	/* This rounds up number of pages based on size and offset */
	nr_pages = iommu_num_pages(pa, size, PAGE_SIZE);
300
	iommu_page_index = dma_alloc_iommu(dev, nr_pages);
301 302
	if (iommu_page_index == -1) {
		ret = -ENOSPC;
J
Jan Glauber 已提交
303
		goto out_err;
304
	}
J
Jan Glauber 已提交
305 306 307 308 309 310 311 312

	/* Use rounded up size */
	size = nr_pages * PAGE_SIZE;
	dma_addr = zdev->start_dma + iommu_page_index * PAGE_SIZE;

	if (direction == DMA_NONE || direction == DMA_TO_DEVICE)
		flags |= ZPCI_TABLE_PROTECTED;

313 314 315 316 317 318
	ret = dma_update_trans(zdev, pa, dma_addr, size, flags);
	if (ret)
		goto out_free;

	atomic64_add(nr_pages, &zdev->mapped_pages);
	return dma_addr + (offset & ~PAGE_MASK);
J
Jan Glauber 已提交
319 320

out_free:
321
	dma_free_iommu(dev, iommu_page_index, nr_pages);
J
Jan Glauber 已提交
322
out_err:
S
Sebastian Ott 已提交
323
	zpci_err("map error:\n");
324
	zpci_err_dma(ret, pa);
J
Jan Glauber 已提交
325 326 327 328 329
	return DMA_ERROR_CODE;
}

static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr,
				 size_t size, enum dma_data_direction direction,
330
				 unsigned long attrs)
J
Jan Glauber 已提交
331
{
S
Sebastian Ott 已提交
332
	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
J
Jan Glauber 已提交
333
	unsigned long iommu_page_index;
334
	int npages, ret;
J
Jan Glauber 已提交
335 336 337

	npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
	dma_addr = dma_addr & PAGE_MASK;
338 339 340
	ret = dma_update_trans(zdev, 0, dma_addr, npages * PAGE_SIZE,
			       ZPCI_PTE_INVALID);
	if (ret) {
S
Sebastian Ott 已提交
341
		zpci_err("unmap error:\n");
342 343
		zpci_err_dma(ret, dma_addr);
		return;
S
Sebastian Ott 已提交
344
	}
J
Jan Glauber 已提交
345

346
	atomic64_add(npages, &zdev->unmapped_pages);
J
Jan Glauber 已提交
347
	iommu_page_index = (dma_addr - zdev->start_dma) >> PAGE_SHIFT;
348
	dma_free_iommu(dev, iommu_page_index, npages);
J
Jan Glauber 已提交
349 350 351 352
}

static void *s390_dma_alloc(struct device *dev, size_t size,
			    dma_addr_t *dma_handle, gfp_t flag,
353
			    unsigned long attrs)
J
Jan Glauber 已提交
354
{
S
Sebastian Ott 已提交
355
	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
J
Jan Glauber 已提交
356 357 358 359 360 361 362 363
	struct page *page;
	unsigned long pa;
	dma_addr_t map;

	size = PAGE_ALIGN(size);
	page = alloc_pages(flag, get_order(size));
	if (!page)
		return NULL;
364

J
Jan Glauber 已提交
365 366 367
	pa = page_to_phys(page);
	memset((void *) pa, 0, size);

368
	map = s390_dma_map_pages(dev, page, 0, size, DMA_BIDIRECTIONAL, 0);
J
Jan Glauber 已提交
369 370 371 372 373
	if (dma_mapping_error(dev, map)) {
		free_pages(pa, get_order(size));
		return NULL;
	}

374
	atomic64_add(size / PAGE_SIZE, &zdev->allocated_pages);
J
Jan Glauber 已提交
375 376 377 378 379 380 381
	if (dma_handle)
		*dma_handle = map;
	return (void *) pa;
}

static void s390_dma_free(struct device *dev, size_t size,
			  void *pa, dma_addr_t dma_handle,
382
			  unsigned long attrs)
J
Jan Glauber 已提交
383
{
S
Sebastian Ott 已提交
384
	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
385 386

	size = PAGE_ALIGN(size);
387
	atomic64_sub(size / PAGE_SIZE, &zdev->allocated_pages);
388
	s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, 0);
J
Jan Glauber 已提交
389 390 391 392 393
	free_pages((unsigned long) pa, get_order(size));
}

static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
			   int nr_elements, enum dma_data_direction dir,
394
			   unsigned long attrs)
J
Jan Glauber 已提交
395 396 397 398 399 400 401 402
{
	int mapped_elements = 0;
	struct scatterlist *s;
	int i;

	for_each_sg(sg, s, nr_elements, i) {
		struct page *page = sg_page(s);
		s->dma_address = s390_dma_map_pages(dev, page, s->offset,
403
						    s->length, dir, 0);
J
Jan Glauber 已提交
404 405 406 407 408 409 410 411 412 413 414 415 416
		if (!dma_mapping_error(dev, s->dma_address)) {
			s->dma_length = s->length;
			mapped_elements++;
		} else
			goto unmap;
	}
out:
	return mapped_elements;

unmap:
	for_each_sg(sg, s, mapped_elements, i) {
		if (s->dma_address)
			s390_dma_unmap_pages(dev, s->dma_address, s->dma_length,
417
					     dir, 0);
J
Jan Glauber 已提交
418 419 420 421 422 423 424 425 426
		s->dma_address = 0;
		s->dma_length = 0;
	}
	mapped_elements = 0;
	goto out;
}

static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
			      int nr_elements, enum dma_data_direction dir,
427
			      unsigned long attrs)
J
Jan Glauber 已提交
428 429 430 431 432
{
	struct scatterlist *s;
	int i;

	for_each_sg(sg, s, nr_elements, i) {
433 434
		s390_dma_unmap_pages(dev, s->dma_address, s->dma_length, dir,
				     0);
J
Jan Glauber 已提交
435 436 437 438 439 440 441 442 443
		s->dma_address = 0;
		s->dma_length = 0;
	}
}

int zpci_dma_init_device(struct zpci_dev *zdev)
{
	int rc;

444 445 446 447 448 449 450
	/*
	 * At this point, if the device is part of an IOMMU domain, this would
	 * be a strong hint towards a bug in the IOMMU API (common) code and/or
	 * simultaneous access via IOMMU and DMA API. So let's issue a warning.
	 */
	WARN_ON(zdev->s390_domain);

J
Jan Glauber 已提交
451 452 453 454 455 456
	spin_lock_init(&zdev->iommu_bitmap_lock);
	spin_lock_init(&zdev->dma_table_lock);

	zdev->dma_table = dma_alloc_cpu_table();
	if (!zdev->dma_table) {
		rc = -ENOMEM;
457
		goto out;
J
Jan Glauber 已提交
458 459
	}

460 461 462 463 464 465 466 467 468
	/*
	 * Restrict the iommu bitmap size to the minimum of the following:
	 * - main memory size
	 * - 3-level pagetable address limit minus start_dma offset
	 * - DMA address range allowed by the hardware (clp query pci fn)
	 *
	 * Also set zdev->end_dma to the actual end address of the usable
	 * range, instead of the theoretical maximum as reported by hardware.
	 */
469
	zdev->start_dma = PAGE_ALIGN(zdev->start_dma);
470 471 472 473
	zdev->iommu_size = min3((u64) high_memory,
				ZPCI_TABLE_SIZE_RT - zdev->start_dma,
				zdev->end_dma - zdev->start_dma + 1);
	zdev->end_dma = zdev->start_dma + zdev->iommu_size - 1;
J
Jan Glauber 已提交
474
	zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT;
475
	zdev->iommu_bitmap = vzalloc(zdev->iommu_pages / 8);
J
Jan Glauber 已提交
476 477
	if (!zdev->iommu_bitmap) {
		rc = -ENOMEM;
478
		goto free_dma_table;
J
Jan Glauber 已提交
479 480
	}

481
	rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
J
Jan Glauber 已提交
482 483
				(u64) zdev->dma_table);
	if (rc)
484
		goto free_bitmap;
J
Jan Glauber 已提交
485

486 487 488 489 490
	return 0;
free_bitmap:
	vfree(zdev->iommu_bitmap);
	zdev->iommu_bitmap = NULL;
free_dma_table:
J
Jan Glauber 已提交
491
	dma_free_cpu_table(zdev->dma_table);
492 493
	zdev->dma_table = NULL;
out:
J
Jan Glauber 已提交
494 495 496 497 498
	return rc;
}

void zpci_dma_exit_device(struct zpci_dev *zdev)
{
499 500 501 502 503 504 505
	/*
	 * At this point, if the device is part of an IOMMU domain, this would
	 * be a strong hint towards a bug in the IOMMU API (common) code and/or
	 * simultaneous access via IOMMU and DMA API. So let's issue a warning.
	 */
	WARN_ON(zdev->s390_domain);

J
Jan Glauber 已提交
506
	zpci_unregister_ioat(zdev, 0);
507 508
	dma_cleanup_tables(zdev->dma_table);
	zdev->dma_table = NULL;
509
	vfree(zdev->iommu_bitmap);
J
Jan Glauber 已提交
510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551
	zdev->iommu_bitmap = NULL;
	zdev->next_bit = 0;
}

static int __init dma_alloc_cpu_table_caches(void)
{
	dma_region_table_cache = kmem_cache_create("PCI_DMA_region_tables",
					ZPCI_TABLE_SIZE, ZPCI_TABLE_ALIGN,
					0, NULL);
	if (!dma_region_table_cache)
		return -ENOMEM;

	dma_page_table_cache = kmem_cache_create("PCI_DMA_page_tables",
					ZPCI_PT_SIZE, ZPCI_PT_ALIGN,
					0, NULL);
	if (!dma_page_table_cache) {
		kmem_cache_destroy(dma_region_table_cache);
		return -ENOMEM;
	}
	return 0;
}

int __init zpci_dma_init(void)
{
	return dma_alloc_cpu_table_caches();
}

void zpci_dma_exit(void)
{
	kmem_cache_destroy(dma_page_table_cache);
	kmem_cache_destroy(dma_region_table_cache);
}

#define PREALLOC_DMA_DEBUG_ENTRIES	(1 << 16)

static int __init dma_debug_do_init(void)
{
	dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
	return 0;
}
fs_initcall(dma_debug_do_init);

552
struct dma_map_ops s390_pci_dma_ops = {
J
Jan Glauber 已提交
553 554 555 556 557 558 559 560 561 562
	.alloc		= s390_dma_alloc,
	.free		= s390_dma_free,
	.map_sg		= s390_dma_map_sg,
	.unmap_sg	= s390_dma_unmap_sg,
	.map_page	= s390_dma_map_pages,
	.unmap_page	= s390_dma_unmap_pages,
	/* if we support direct DMA this must be conditional */
	.is_phys	= 0,
	/* dma_supported is unconditionally true without a callback */
};
563
EXPORT_SYMBOL_GPL(s390_pci_dma_ops);
564 565 566 567 568 569 570 571 572

static int __init s390_iommu_setup(char *str)
{
	if (!strncmp(str, "strict", 6))
		s390_iommu_strict = 1;
	return 0;
}

__setup("s390_iommu=", s390_iommu_setup);