pci-gart_64.c 22.6 KB
Newer Older
L
Linus Torvalds 已提交
1 2
/*
 * Dynamic DMA mapping support for AMD Hammer.
3
 *
L
Linus Torvalds 已提交
4 5
 * Use the integrated AGP GART in the Hammer northbridge as an IOMMU for PCI.
 * This allows to use PCI devices that only support 32bit addresses on systems
6
 * with more than 4GB.
L
Linus Torvalds 已提交
7 8
 *
 * See Documentation/DMA-mapping.txt for the interface specification.
9
 *
L
Linus Torvalds 已提交
10
 * Copyright 2002 Andi Kleen, SuSE Labs.
A
Andi Kleen 已提交
11
 * Subject to the GNU General Public License v2 only.
L
Linus Torvalds 已提交
12 13 14 15 16 17 18 19 20 21 22 23 24 25
 */

#include <linux/types.h>
#include <linux/ctype.h>
#include <linux/agp_backend.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/spinlock.h>
#include <linux/pci.h>
#include <linux/module.h>
#include <linux/topology.h>
#include <linux/interrupt.h>
#include <linux/bitops.h>
26
#include <linux/kdebug.h>
27
#include <linux/scatterlist.h>
28
#include <linux/iommu-helper.h>
29
#include <linux/sysdev.h>
L
Linus Torvalds 已提交
30 31 32 33 34
#include <asm/atomic.h>
#include <asm/io.h>
#include <asm/mtrr.h>
#include <asm/pgtable.h>
#include <asm/proto.h>
35
#include <asm/iommu.h>
J
Joerg Roedel 已提交
36
#include <asm/gart.h>
L
Linus Torvalds 已提交
37
#include <asm/cacheflush.h>
38 39
#include <asm/swiotlb.h>
#include <asm/dma.h>
40
#include <asm/k8.h>
L
Linus Torvalds 已提交
41

42
static unsigned long iommu_bus_base;	/* GART remapping area (physical) */
43
static unsigned long iommu_size;	/* size of remapping area bytes */
L
Linus Torvalds 已提交
44 45
static unsigned long iommu_pages;	/* .. and in pages */

46
static u32 *iommu_gatt_base;		/* Remapping table */
L
Linus Torvalds 已提交
47

48
/* Allocation bitmap for the remapping area: */
L
Linus Torvalds 已提交
49
static DEFINE_SPINLOCK(iommu_bitmap_lock);
50 51
/* Guarded by iommu_bitmap_lock: */
static unsigned long *iommu_gart_bitmap;
L
Linus Torvalds 已提交
52

53
static u32 gart_unmapped_entry;
L
Linus Torvalds 已提交
54 55 56 57 58 59 60

#define GPTE_VALID    1
#define GPTE_COHERENT 2
#define GPTE_ENCODE(x) \
	(((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT)
#define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28))

61
#define EMERGENCY_PAGES 32 /* = 128KB */
L
Linus Torvalds 已提交
62 63 64 65 66 67 68 69 70 71 72 73

#ifdef CONFIG_AGP
#define AGPEXTERN extern
#else
#define AGPEXTERN
#endif

/* backdoor interface to AGP driver */
AGPEXTERN int agp_memory_reserved;
AGPEXTERN __u32 *agp_gatt_table;

static unsigned long next_bit;  /* protected by iommu_bitmap_lock */
74
static int need_flush;		/* global flush state. set for each gart wrap */
L
Linus Torvalds 已提交
75

76
static unsigned long alloc_iommu(struct device *dev, int size,
77
				 unsigned long align_mask, u64 dma_mask)
78
{
L
Linus Torvalds 已提交
79
	unsigned long offset, flags;
80 81
	unsigned long boundary_size;
	unsigned long base_index;
82
	unsigned long limit;
83 84 85

	base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev),
			   PAGE_SIZE) >> PAGE_SHIFT;
86
	boundary_size = ALIGN((unsigned long long)dma_get_seg_boundary(dev) + 1,
87
			      PAGE_SIZE) >> PAGE_SHIFT;
L
Linus Torvalds 已提交
88

89 90 91 92
	limit = iommu_device_max_index(iommu_pages,
				       DIV_ROUND_UP(iommu_bus_base, PAGE_SIZE),
				       dma_mask >> PAGE_SHIFT);

93
	spin_lock_irqsave(&iommu_bitmap_lock, flags);
94 95 96 97 98 99 100

	if (limit <= next_bit) {
		need_flush = 1;
		next_bit = 0;
	}

	offset = iommu_area_alloc(iommu_gart_bitmap, limit, next_bit,
101
				  size, base_index, boundary_size, align_mask);
102
	if (offset == -1 && next_bit) {
L
Linus Torvalds 已提交
103
		need_flush = 1;
104
		offset = iommu_area_alloc(iommu_gart_bitmap, limit, 0,
105 106
					  size, base_index, boundary_size,
					  align_mask);
L
Linus Torvalds 已提交
107
	}
108 109 110
	if (offset != -1) {
		next_bit = offset+size;
		if (next_bit >= iommu_pages) {
L
Linus Torvalds 已提交
111 112
			next_bit = 0;
			need_flush = 1;
113 114
		}
	}
L
Linus Torvalds 已提交
115 116
	if (iommu_fullflush)
		need_flush = 1;
117 118
	spin_unlock_irqrestore(&iommu_bitmap_lock, flags);

L
Linus Torvalds 已提交
119
	return offset;
120
}
L
Linus Torvalds 已提交
121 122

static void free_iommu(unsigned long offset, int size)
123
{
L
Linus Torvalds 已提交
124
	unsigned long flags;
125

L
Linus Torvalds 已提交
126
	spin_lock_irqsave(&iommu_bitmap_lock, flags);
127
	iommu_area_free(iommu_gart_bitmap, offset, size);
L
Linus Torvalds 已提交
128
	spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
129
}
L
Linus Torvalds 已提交
130

131
/*
L
Linus Torvalds 已提交
132 133
 * Use global flush state to avoid races with multiple flushers.
 */
134
static void flush_gart(void)
135
{
L
Linus Torvalds 已提交
136
	unsigned long flags;
137

L
Linus Torvalds 已提交
138
	spin_lock_irqsave(&iommu_bitmap_lock, flags);
139 140
	if (need_flush) {
		k8_flush_garts();
L
Linus Torvalds 已提交
141
		need_flush = 0;
142
	}
L
Linus Torvalds 已提交
143
	spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
144
}
L
Linus Torvalds 已提交
145 146 147

#ifdef CONFIG_IOMMU_LEAK

148 149 150 151 152 153 154 155 156 157 158
#define SET_LEAK(x)							\
	do {								\
		if (iommu_leak_tab)					\
			iommu_leak_tab[x] = __builtin_return_address(0);\
	} while (0)

#define CLEAR_LEAK(x)							\
	do {								\
		if (iommu_leak_tab)					\
			iommu_leak_tab[x] = NULL;			\
	} while (0)
L
Linus Torvalds 已提交
159 160

/* Debugging aid for drivers that don't free their IOMMU tables */
161
static void **iommu_leak_tab;
L
Linus Torvalds 已提交
162
static int leak_trace;
163
static int iommu_leak_pages = 20;
164

165
static void dump_leak(void)
L
Linus Torvalds 已提交
166 167
{
	int i;
168 169 170 171
	static int dump;

	if (dump || !iommu_leak_tab)
		return;
L
Linus Torvalds 已提交
172
	dump = 1;
173 174 175 176 177 178 179
	show_stack(NULL, NULL);

	/* Very crude. dump some from the end of the table too */
	printk(KERN_DEBUG "Dumping %d pages from end of IOMMU:\n",
	       iommu_leak_pages);
	for (i = 0; i < iommu_leak_pages; i += 2) {
		printk(KERN_DEBUG "%lu: ", iommu_pages-i);
180
		printk_address((unsigned long) iommu_leak_tab[iommu_pages-i], 0);
181 182 183
		printk(KERN_CONT "%c", (i+1)%2 == 0 ? '\n' : ' ');
	}
	printk(KERN_DEBUG "\n");
L
Linus Torvalds 已提交
184 185
}
#else
186 187
# define SET_LEAK(x)
# define CLEAR_LEAK(x)
L
Linus Torvalds 已提交
188 189
#endif

190
static void iommu_full(struct device *dev, size_t size, int dir)
L
Linus Torvalds 已提交
191
{
192
	/*
L
Linus Torvalds 已提交
193 194
	 * Ran out of IOMMU space for this operation. This is very bad.
	 * Unfortunately the drivers cannot handle this operation properly.
195
	 * Return some non mapped prereserved space in the aperture and
L
Linus Torvalds 已提交
196 197
	 * let the Northbridge deal with it. This will result in garbage
	 * in the IO operation. When the size exceeds the prereserved space
198
	 * memory corruption will occur or random memory will be DMAed
L
Linus Torvalds 已提交
199
	 * out. Hopefully no network devices use single mappings that big.
200 201
	 */

202
	dev_err(dev, "PCI-DMA: Out of IOMMU space for %lu bytes\n", size);
L
Linus Torvalds 已提交
203

204
	if (size > PAGE_SIZE*EMERGENCY_PAGES) {
L
Linus Torvalds 已提交
205 206
		if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL)
			panic("PCI-DMA: Memory would be corrupted\n");
207 208 209 210
		if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL)
			panic(KERN_ERR
				"PCI-DMA: Random memory would be DMAed\n");
	}
L
Linus Torvalds 已提交
211
#ifdef CONFIG_IOMMU_LEAK
212
	dump_leak();
L
Linus Torvalds 已提交
213
#endif
214
}
L
Linus Torvalds 已提交
215

216 217 218
static inline int
need_iommu(struct device *dev, unsigned long addr, size_t size)
{
219 220
	return force_iommu ||
		!is_buffer_dma_capable(*dev->dma_mask, addr, size);
L
Linus Torvalds 已提交
221 222
}

223 224 225
static inline int
nonforced_iommu(struct device *dev, unsigned long addr, size_t size)
{
226
	return !is_buffer_dma_capable(*dev->dma_mask, addr, size);
L
Linus Torvalds 已提交
227 228 229 230 231
}

/* Map a single continuous physical area into the IOMMU.
 * Caller needs to check if the iommu is needed and flush.
 */
232
static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
233 234
			       size_t size, int dir, unsigned long align_mask,
			       u64 dma_mask)
235
{
236
	unsigned long npages = iommu_num_pages(phys_mem, size);
237
	unsigned long iommu_page;
L
Linus Torvalds 已提交
238
	int i;
239

240
	iommu_page = alloc_iommu(dev, npages, align_mask, dma_mask);
L
Linus Torvalds 已提交
241 242
	if (iommu_page == -1) {
		if (!nonforced_iommu(dev, phys_mem, size))
243
			return phys_mem;
L
Linus Torvalds 已提交
244 245
		if (panic_on_overflow)
			panic("dma_map_area overflow %lu bytes\n", size);
246
		iommu_full(dev, size, dir);
L
Linus Torvalds 已提交
247 248 249 250 251 252 253 254 255 256 257 258
		return bad_dma_address;
	}

	for (i = 0; i < npages; i++) {
		iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem);
		SET_LEAK(iommu_page + i);
		phys_mem += PAGE_SIZE;
	}
	return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK);
}

/* Map a single area into the IOMMU */
259
static dma_addr_t
I
Ingo Molnar 已提交
260
gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir)
L
Linus Torvalds 已提交
261
{
I
Ingo Molnar 已提交
262
	unsigned long bus;
L
Linus Torvalds 已提交
263 264

	if (!dev)
265
		dev = &x86_dma_fallback_dev;
L
Linus Torvalds 已提交
266

I
Ingo Molnar 已提交
267 268
	if (!need_iommu(dev, paddr, size))
		return paddr;
L
Linus Torvalds 已提交
269

270
	bus = dma_map_area(dev, paddr, size, dir, 0, dma_get_mask(dev));
271
	flush_gart();
272 273

	return bus;
274 275
}

276 277 278
/*
 * Free a DMA mapping.
 */
279
static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr,
280
			      size_t size, int direction)
281 282 283 284 285 286 287 288
{
	unsigned long iommu_page;
	int npages;
	int i;

	if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE ||
	    dma_addr >= iommu_bus_base + iommu_size)
		return;
289

290
	iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT;
291
	npages = iommu_num_pages(dma_addr, size);
292 293 294 295 296 297 298
	for (i = 0; i < npages; i++) {
		iommu_gatt_base[iommu_page + i] = gart_unmapped_entry;
		CLEAR_LEAK(iommu_page + i);
	}
	free_iommu(iommu_page, npages);
}

299 300 301
/*
 * Wrapper for pci_unmap_single working with scatterlists.
 */
302 303
static void
gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
304
{
305
	struct scatterlist *s;
306 307
	int i;

308
	for_each_sg(sg, s, nents, i) {
309
		if (!s->dma_length || !s->length)
310
			break;
311
		gart_unmap_single(dev, s->dma_address, s->dma_length, dir);
312 313
	}
}
L
Linus Torvalds 已提交
314 315 316 317 318

/* Fallback for dma_map_sg in case of overflow */
static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
			       int nents, int dir)
{
319
	struct scatterlist *s;
L
Linus Torvalds 已提交
320
	int i;
321
	u64 dma_mask = dma_get_mask(dev);
L
Linus Torvalds 已提交
322 323 324 325 326

#ifdef CONFIG_IOMMU_DEBUG
	printk(KERN_DEBUG "dma_map_sg overflow\n");
#endif

327
	for_each_sg(sg, s, nents, i) {
J
Jens Axboe 已提交
328
		unsigned long addr = sg_phys(s);
329 330

		if (nonforced_iommu(dev, addr, s->length)) {
331 332
			addr = dma_map_area(dev, addr, s->length, dir, 0,
					    dma_mask);
333 334
			if (addr == bad_dma_address) {
				if (i > 0)
335
					gart_unmap_sg(dev, sg, i, dir);
336
				nents = 0;
L
Linus Torvalds 已提交
337 338 339 340 341 342 343
				sg[0].dma_length = 0;
				break;
			}
		}
		s->dma_address = addr;
		s->dma_length = s->length;
	}
344
	flush_gart();
345

L
Linus Torvalds 已提交
346 347 348 349
	return nents;
}

/* Map multiple scatterlist entries continuous into the first. */
350 351 352
static int __dma_map_cont(struct device *dev, struct scatterlist *start,
			  int nelems, struct scatterlist *sout,
			  unsigned long pages)
L
Linus Torvalds 已提交
353
{
354 355
	unsigned long iommu_start;
	unsigned long iommu_page;
356
	struct scatterlist *s;
L
Linus Torvalds 已提交
357 358
	int i;

359
	iommu_start = alloc_iommu(dev, pages, 0, dma_get_mask(dev));
L
Linus Torvalds 已提交
360 361
	if (iommu_start == -1)
		return -1;
362

363
	iommu_page = iommu_start;
364
	for_each_sg(start, s, nelems, i) {
L
Linus Torvalds 已提交
365 366
		unsigned long pages, addr;
		unsigned long phys_addr = s->dma_address;
367

368 369
		BUG_ON(s != start && s->offset);
		if (s == start) {
L
Linus Torvalds 已提交
370 371 372
			sout->dma_address = iommu_bus_base;
			sout->dma_address += iommu_page*PAGE_SIZE + s->offset;
			sout->dma_length = s->length;
373 374
		} else {
			sout->dma_length += s->length;
L
Linus Torvalds 已提交
375 376 377
		}

		addr = phys_addr;
378
		pages = iommu_num_pages(s->offset, s->length);
379 380
		while (pages--) {
			iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr);
L
Linus Torvalds 已提交
381 382 383
			SET_LEAK(iommu_page);
			addr += PAGE_SIZE;
			iommu_page++;
384
		}
385 386 387
	}
	BUG_ON(iommu_page - iommu_start != pages);

L
Linus Torvalds 已提交
388 389 390
	return 0;
}

391
static inline int
392 393
dma_map_cont(struct device *dev, struct scatterlist *start, int nelems,
	     struct scatterlist *sout, unsigned long pages, int need)
L
Linus Torvalds 已提交
394
{
395 396
	if (!need) {
		BUG_ON(nelems != 1);
F
FUJITA Tomonori 已提交
397
		sout->dma_address = start->dma_address;
398
		sout->dma_length = start->length;
L
Linus Torvalds 已提交
399
		return 0;
400
	}
401
	return __dma_map_cont(dev, start, nelems, sout, pages);
L
Linus Torvalds 已提交
402
}
403

L
Linus Torvalds 已提交
404 405
/*
 * DMA map all entries in a scatterlist.
406
 * Merge chunks that have page aligned sizes into a continuous mapping.
L
Linus Torvalds 已提交
407
 */
408 409
static int
gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
L
Linus Torvalds 已提交
410
{
411
	struct scatterlist *s, *ps, *start_sg, *sgmap;
412 413
	int need = 0, nextneed, i, out, start;
	unsigned long pages = 0;
414 415
	unsigned int seg_size;
	unsigned int max_seg_size;
L
Linus Torvalds 已提交
416

417
	if (nents == 0)
L
Linus Torvalds 已提交
418 419 420
		return 0;

	if (!dev)
421
		dev = &x86_dma_fallback_dev;
L
Linus Torvalds 已提交
422 423 424

	out = 0;
	start = 0;
425
	start_sg = sgmap = sg;
426 427
	seg_size = 0;
	max_seg_size = dma_get_max_seg_size(dev);
428 429
	ps = NULL; /* shut up gcc */
	for_each_sg(sg, s, nents, i) {
J
Jens Axboe 已提交
430
		dma_addr_t addr = sg_phys(s);
431

L
Linus Torvalds 已提交
432
		s->dma_address = addr;
433
		BUG_ON(s->length == 0);
L
Linus Torvalds 已提交
434

435
		nextneed = need_iommu(dev, addr, s->length);
L
Linus Torvalds 已提交
436 437 438

		/* Handle the previous not yet processed entries */
		if (i > start) {
439 440 441 442 443
			/*
			 * Can only merge when the last chunk ends on a
			 * page boundary and the new one doesn't have an
			 * offset.
			 */
L
Linus Torvalds 已提交
444
			if (!iommu_merge || !nextneed || !need || s->offset ||
445
			    (s->length + seg_size > max_seg_size) ||
446
			    (ps->offset + ps->length) % PAGE_SIZE) {
447 448
				if (dma_map_cont(dev, start_sg, i - start,
						 sgmap, pages, need) < 0)
L
Linus Torvalds 已提交
449 450
					goto error;
				out++;
451
				seg_size = 0;
452
				sgmap = sg_next(sgmap);
L
Linus Torvalds 已提交
453
				pages = 0;
454 455
				start = i;
				start_sg = s;
L
Linus Torvalds 已提交
456 457 458
			}
		}

459
		seg_size += s->length;
L
Linus Torvalds 已提交
460
		need = nextneed;
461
		pages += iommu_num_pages(s->offset, s->length);
462
		ps = s;
L
Linus Torvalds 已提交
463
	}
464
	if (dma_map_cont(dev, start_sg, i - start, sgmap, pages, need) < 0)
L
Linus Torvalds 已提交
465 466
		goto error;
	out++;
467
	flush_gart();
468 469 470 471
	if (out < nents) {
		sgmap = sg_next(sgmap);
		sgmap->dma_length = 0;
	}
L
Linus Torvalds 已提交
472 473 474
	return out;

error:
475
	flush_gart();
476
	gart_unmap_sg(dev, sg, out, dir);
477

478 479 480 481 482 483
	/* When it was forced or merged try again in a dumb way */
	if (force_iommu || iommu_merge) {
		out = dma_map_sg_nonforce(dev, sg, nents, dir);
		if (out > 0)
			return out;
	}
L
Linus Torvalds 已提交
484 485
	if (panic_on_overflow)
		panic("dma_map_sg: overflow on %lu pages\n", pages);
486

487
	iommu_full(dev, pages << PAGE_SHIFT, dir);
488 489
	for_each_sg(sg, s, nents, i)
		s->dma_address = bad_dma_address;
L
Linus Torvalds 已提交
490
	return 0;
491
}
L
Linus Torvalds 已提交
492

493 494 495 496 497 498
/* allocate and map a coherent mapping */
static void *
gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
		    gfp_t flag)
{
	void *vaddr;
499
	dma_addr_t paddr;
500
	unsigned long align_mask;
501
	u64 dma_mask = dma_alloc_coherent_mask(dev, flag);
502 503 504 505 506

	vaddr = (void *)__get_free_pages(flag | __GFP_ZERO, get_order(size));
	if (!vaddr)
		return NULL;

507 508 509 510 511 512
	paddr = virt_to_phys(vaddr);
	if (is_buffer_dma_capable(dma_mask, paddr, size)) {
		*dma_addr = paddr;
		return vaddr;
	}

513 514
	align_mask = (1UL << get_order(size)) - 1;

515
	*dma_addr = dma_map_area(dev, paddr, size, DMA_BIDIRECTIONAL,
516
				 align_mask, dma_mask);
517 518
	flush_gart();

519 520 521 522 523 524 525 526
	if (*dma_addr != bad_dma_address)
		return vaddr;

	free_pages((unsigned long)vaddr, get_order(size));

	return NULL;
}

527 528 529 530 531 532 533 534 535
/* free a coherent mapping */
static void
gart_free_coherent(struct device *dev, size_t size, void *vaddr,
		   dma_addr_t dma_addr)
{
	gart_unmap_single(dev, dma_addr, size, DMA_BIDIRECTIONAL);
	free_pages((unsigned long)vaddr, get_order(size));
}

536
static int no_agp;
L
Linus Torvalds 已提交
537 538

static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
539 540 541 542 543 544 545 546 547 548
{
	unsigned long a;

	if (!iommu_size) {
		iommu_size = aper_size;
		if (!no_agp)
			iommu_size /= 2;
	}

	a = aper + iommu_size;
549
	iommu_size -= round_up(a, PMD_PAGE_SIZE) - a;
L
Linus Torvalds 已提交
550

551
	if (iommu_size < 64*1024*1024) {
L
Linus Torvalds 已提交
552
		printk(KERN_WARNING
553 554 555 556 557
			"PCI-DMA: Warning: Small IOMMU %luMB."
			" Consider increasing the AGP aperture in BIOS\n",
				iommu_size >> 20);
	}

L
Linus Torvalds 已提交
558
	return iommu_size;
559
}
L
Linus Torvalds 已提交
560

561 562 563
static __init unsigned read_aperture(struct pci_dev *dev, u32 *size)
{
	unsigned aper_size = 0, aper_base_32, aper_order;
L
Linus Torvalds 已提交
564 565
	u64 aper_base;

P
Pavel Machek 已提交
566 567
	pci_read_config_dword(dev, AMD64_GARTAPERTUREBASE, &aper_base_32);
	pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &aper_order);
568
	aper_order = (aper_order >> 1) & 7;
L
Linus Torvalds 已提交
569

570
	aper_base = aper_base_32 & 0x7fff;
L
Linus Torvalds 已提交
571 572
	aper_base <<= 25;

573 574
	aper_size = (32 * 1024 * 1024) << aper_order;
	if (aper_base + aper_size > 0x100000000UL || !aper_size)
L
Linus Torvalds 已提交
575 576 577 578
		aper_base = 0;

	*size = aper_size;
	return aper_base;
579
}
L
Linus Torvalds 已提交
580

581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606
static void enable_gart_translations(void)
{
	int i;

	for (i = 0; i < num_k8_northbridges; i++) {
		struct pci_dev *dev = k8_northbridges[i];

		enable_gart_translation(dev, __pa(agp_gatt_table));
	}
}

/*
 * If fix_up_north_bridges is set, the north bridges have to be fixed up on
 * resume in the same way as they are handled in gart_iommu_hole_init().
 */
static bool fix_up_north_bridges;
static u32 aperture_order;
static u32 aperture_alloc;

void set_up_gart_resume(u32 aper_order, u32 aper_alloc)
{
	fix_up_north_bridges = true;
	aperture_order = aper_order;
	aperture_alloc = aper_alloc;
}

607 608
static int gart_resume(struct sys_device *dev)
{
609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631
	printk(KERN_INFO "PCI-DMA: Resuming GART IOMMU\n");

	if (fix_up_north_bridges) {
		int i;

		printk(KERN_INFO "PCI-DMA: Restoring GART aperture settings\n");

		for (i = 0; i < num_k8_northbridges; i++) {
			struct pci_dev *dev = k8_northbridges[i];

			/*
			 * Don't enable translations just yet.  That is the next
			 * step.  Restore the pre-suspend aperture settings.
			 */
			pci_write_config_dword(dev, AMD64_GARTAPERTURECTL,
						aperture_order << 1);
			pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE,
						aperture_alloc >> 25);
		}
	}

	enable_gart_translations();

632 633 634 635 636
	return 0;
}

static int gart_suspend(struct sys_device *dev, pm_message_t state)
{
637
	return 0;
638 639 640 641 642 643 644 645 646 647 648 649 650 651
}

static struct sysdev_class gart_sysdev_class = {
	.name = "gart",
	.suspend = gart_suspend,
	.resume = gart_resume,

};

static struct sys_device device_gart = {
	.id	= 0,
	.cls	= &gart_sysdev_class,
};

652
/*
L
Linus Torvalds 已提交
653
 * Private Northbridge GATT initialization in case we cannot use the
654
 * AGP driver for some reason.
L
Linus Torvalds 已提交
655 656
 */
static __init int init_k8_gatt(struct agp_kern_info *info)
657 658 659
{
	unsigned aper_size, gatt_size, new_aper_size;
	unsigned aper_base, new_aper_base;
L
Linus Torvalds 已提交
660 661
	struct pci_dev *dev;
	void *gatt;
662
	int i, error;
Y
Yinghai Lu 已提交
663
	unsigned long start_pfn, end_pfn;
664

L
Linus Torvalds 已提交
665 666
	printk(KERN_INFO "PCI-DMA: Disabling AGP.\n");
	aper_size = aper_base = info->aper_size = 0;
667 668 669
	dev = NULL;
	for (i = 0; i < num_k8_northbridges; i++) {
		dev = k8_northbridges[i];
670 671 672 673 674
		new_aper_base = read_aperture(dev, &new_aper_size);
		if (!new_aper_base)
			goto nommu;

		if (!aper_base) {
L
Linus Torvalds 已提交
675 676
			aper_size = new_aper_size;
			aper_base = new_aper_base;
677 678
		}
		if (aper_size != new_aper_size || aper_base != new_aper_base)
L
Linus Torvalds 已提交
679 680 681
			goto nommu;
	}
	if (!aper_base)
682
		goto nommu;
L
Linus Torvalds 已提交
683
	info->aper_base = aper_base;
684
	info->aper_size = aper_size >> 20;
L
Linus Torvalds 已提交
685

686 687 688
	gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32);
	gatt = (void *)__get_free_pages(GFP_KERNEL, get_order(gatt_size));
	if (!gatt)
689
		panic("Cannot allocate GATT table");
690
	if (set_memory_uc((unsigned long)gatt, gatt_size >> PAGE_SHIFT))
691 692
		panic("Could not set GART PTEs to uncacheable pages");

693
	memset(gatt, 0, gatt_size);
L
Linus Torvalds 已提交
694
	agp_gatt_table = gatt;
695

696
	enable_gart_translations();
697 698 699 700 701 702

	error = sysdev_class_register(&gart_sysdev_class);
	if (!error)
		error = sysdev_register(&device_gart);
	if (error)
		panic("Could not register gart_sysdev -- would corrupt data on next suspend");
703

704
	flush_gart();
705 706 707

	printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n",
	       aper_base, aper_size>>10);
Y
Yinghai Lu 已提交
708 709 710 711

	/* need to map that range */
	end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT);
	if (end_pfn > max_low_pfn_mapped) {
Y
Yinghai Lu 已提交
712 713
		start_pfn = (aper_base>>PAGE_SHIFT);
		init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
Y
Yinghai Lu 已提交
714
	}
L
Linus Torvalds 已提交
715 716 717
	return 0;

 nommu:
718
	/* Should not happen anymore */
719 720
	printk(KERN_WARNING "PCI-DMA: More than 4GB of RAM and no IOMMU\n"
	       KERN_WARNING "falling back to iommu=soft.\n");
721 722
	return -1;
}
L
Linus Torvalds 已提交
723 724 725

extern int agp_amd64_init(void);

726
static struct dma_mapping_ops gart_dma_ops = {
727 728 729 730 731 732 733 734 735 736
	.map_single			= gart_map_single,
	.unmap_single			= gart_unmap_single,
	.sync_single_for_cpu		= NULL,
	.sync_single_for_device		= NULL,
	.sync_single_range_for_cpu	= NULL,
	.sync_single_range_for_device	= NULL,
	.sync_sg_for_cpu		= NULL,
	.sync_sg_for_device		= NULL,
	.map_sg				= gart_map_sg,
	.unmap_sg			= gart_unmap_sg,
737
	.alloc_coherent			= gart_alloc_coherent,
738
	.free_coherent			= gart_free_coherent,
739 740
};

741 742 743 744 745 746 747 748
void gart_iommu_shutdown(void)
{
	struct pci_dev *dev;
	int i;

	if (no_agp && (dma_ops != &gart_dma_ops))
		return;

749 750
	for (i = 0; i < num_k8_northbridges; i++) {
		u32 ctl;
751

752
		dev = k8_northbridges[i];
P
Pavel Machek 已提交
753
		pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl);
754

P
Pavel Machek 已提交
755
		ctl &= ~GARTEN;
756

P
Pavel Machek 已提交
757
		pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl);
758
	}
759 760
}

761
void __init gart_iommu_init(void)
762
{
L
Linus Torvalds 已提交
763 764
	struct agp_kern_info info;
	unsigned long iommu_start;
765
	unsigned long aper_size;
L
Linus Torvalds 已提交
766 767 768
	unsigned long scratch;
	long i;

769 770
	if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0) {
		printk(KERN_INFO "PCI-GART: No AMD northbridge found.\n");
771
		return;
772 773
	}

L
Linus Torvalds 已提交
774
#ifndef CONFIG_AGP_AMD64
775
	no_agp = 1;
L
Linus Torvalds 已提交
776 777 778
#else
	/* Makefile puts PCI initialization via subsys_initcall first. */
	/* Add other K8 AGP bridge drivers here */
779 780
	no_agp = no_agp ||
		(agp_amd64_init() < 0) ||
L
Linus Torvalds 已提交
781
		(agp_copy_info(agp_bridge, &info) < 0);
782
#endif
L
Linus Torvalds 已提交
783

784
	if (swiotlb)
785
		return;
786

787
	/* Did we detect a different HW IOMMU? */
788
	if (iommu_detected && !gart_iommu_aperture)
789
		return;
790

L
Linus Torvalds 已提交
791
	if (no_iommu ||
Y
Yinghai Lu 已提交
792
	    (!force_iommu && max_pfn <= MAX_DMA32_PFN) ||
793
	    !gart_iommu_aperture ||
L
Linus Torvalds 已提交
794
	    (no_agp && init_k8_gatt(&info) < 0)) {
Y
Yinghai Lu 已提交
795
		if (max_pfn > MAX_DMA32_PFN) {
796 797 798
			printk(KERN_WARNING "More than 4GB of memory "
			       	          "but GART IOMMU not available.\n"
			       KERN_WARNING "falling back to iommu=soft.\n");
J
Jon Mason 已提交
799
		}
800
		return;
L
Linus Torvalds 已提交
801 802
	}

J
Jon Mason 已提交
803
	printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n");
804 805 806 807 808 809 810 811
	aper_size = info.aper_size * 1024 * 1024;
	iommu_size = check_iommu_size(info.aper_base, aper_size);
	iommu_pages = iommu_size >> PAGE_SHIFT;

	iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL,
						      get_order(iommu_pages/8));
	if (!iommu_gart_bitmap)
		panic("Cannot allocate iommu bitmap\n");
L
Linus Torvalds 已提交
812 813 814
	memset(iommu_gart_bitmap, 0, iommu_pages/8);

#ifdef CONFIG_IOMMU_LEAK
815 816
	if (leak_trace) {
		iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL,
L
Linus Torvalds 已提交
817
				  get_order(iommu_pages*sizeof(void *)));
818 819
		if (iommu_leak_tab)
			memset(iommu_leak_tab, 0, iommu_pages * 8);
L
Linus Torvalds 已提交
820
		else
821 822 823
			printk(KERN_DEBUG
			       "PCI-DMA: Cannot allocate leak trace area\n");
	}
L
Linus Torvalds 已提交
824 825
#endif

826
	/*
L
Linus Torvalds 已提交
827
	 * Out of IOMMU space handling.
828 829
	 * Reserve some invalid pages at the beginning of the GART.
	 */
830
	iommu_area_reserve(iommu_gart_bitmap, 0, EMERGENCY_PAGES);
L
Linus Torvalds 已提交
831

832
	agp_memory_reserved = iommu_size;
L
Linus Torvalds 已提交
833 834
	printk(KERN_INFO
	       "PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n",
835
	       iommu_size >> 20);
L
Linus Torvalds 已提交
836

837 838
	iommu_start = aper_size - iommu_size;
	iommu_bus_base = info.aper_base + iommu_start;
L
Linus Torvalds 已提交
839 840 841
	bad_dma_address = iommu_bus_base;
	iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT);

842
	/*
L
Linus Torvalds 已提交
843 844 845 846 847 848
	 * Unmap the IOMMU part of the GART. The alias of the page is
	 * always mapped with cache enabled and there is no full cache
	 * coherency across the GART remapping. The unmapping avoids
	 * automatic prefetches from the CPU allocating cache lines in
	 * there. All CPU accesses are done via the direct mapping to
	 * the backing memory. The GART address is only used by PCI
849
	 * devices.
L
Linus Torvalds 已提交
850
	 */
851 852
	set_memory_np((unsigned long)__va(iommu_bus_base),
				iommu_size >> PAGE_SHIFT);
I
Ingo Molnar 已提交
853 854 855 856 857 858 859 860 861
	/*
	 * Tricky. The GART table remaps the physical memory range,
	 * so the CPU wont notice potential aliases and if the memory
	 * is remapped to UC later on, we might surprise the PCI devices
	 * with a stray writeout of a cacheline. So play it sure and
	 * do an explicit, full-scale wbinvd() _after_ having marked all
	 * the pages as Not-Present:
	 */
	wbinvd();
L
Linus Torvalds 已提交
862

863
	/*
864
	 * Try to workaround a bug (thanks to BenH):
865
	 * Set unmapped entries to a scratch page instead of 0.
L
Linus Torvalds 已提交
866
	 * Any prefetches that hit unmapped entries won't get an bus abort
867
	 * then. (P2P bridge may be prefetching on DMA reads).
L
Linus Torvalds 已提交
868
	 */
869 870
	scratch = get_zeroed_page(GFP_KERNEL);
	if (!scratch)
L
Linus Torvalds 已提交
871 872
		panic("Cannot allocate iommu scratch page");
	gart_unmapped_entry = GPTE_ENCODE(__pa(scratch));
873
	for (i = EMERGENCY_PAGES; i < iommu_pages; i++)
L
Linus Torvalds 已提交
874 875
		iommu_gatt_base[i] = gart_unmapped_entry;

876
	flush_gart();
877
	dma_ops = &gart_dma_ops;
878
}
L
Linus Torvalds 已提交
879

880
void __init gart_parse_options(char *p)
881 882 883
{
	int arg;

L
Linus Torvalds 已提交
884
#ifdef CONFIG_IOMMU_LEAK
885
	if (!strncmp(p, "leak", 4)) {
886 887 888 889 890 891
		leak_trace = 1;
		p += 4;
		if (*p == '=') ++p;
		if (isdigit(*p) && get_option(&p, &arg))
			iommu_leak_pages = arg;
	}
L
Linus Torvalds 已提交
892
#endif
893 894
	if (isdigit(*p) && get_option(&p, &arg))
		iommu_size = arg;
895
	if (!strncmp(p, "noagp", 5))
896
		no_agp = 1;
897
	if (!strncmp(p, "noaperture", 10))
898 899
		fix_aperture = 0;
	/* duplicated from pci-dma.c */
900
	if (!strncmp(p, "force", 5))
901
		gart_iommu_aperture_allowed = 1;
902
	if (!strncmp(p, "allowed", 7))
903
		gart_iommu_aperture_allowed = 1;
904 905 906 907 908 909 910 911 912 913
	if (!strncmp(p, "memaper", 7)) {
		fallback_aper_force = 1;
		p += 7;
		if (*p == '=') {
			++p;
			if (get_option(&p, &arg))
				fallback_aper_order = arg;
		}
	}
}