pci-gart_64.c 22.1 KB
Newer Older
L
Linus Torvalds 已提交
1 2
/*
 * Dynamic DMA mapping support for AMD Hammer.
3
 *
L
Linus Torvalds 已提交
4 5
 * Use the integrated AGP GART in the Hammer northbridge as an IOMMU for PCI.
 * This allows to use PCI devices that only support 32bit addresses on systems
6
 * with more than 4GB.
L
Linus Torvalds 已提交
7
 *
8
 * See Documentation/PCI/PCI-DMA-mapping.txt for the interface specification.
9
 *
L
Linus Torvalds 已提交
10
 * Copyright 2002 Andi Kleen, SuSE Labs.
A
Andi Kleen 已提交
11
 * Subject to the GNU General Public License v2 only.
L
Linus Torvalds 已提交
12 13 14 15 16 17 18 19 20 21 22 23 24 25
 */

#include <linux/types.h>
#include <linux/ctype.h>
#include <linux/agp_backend.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/spinlock.h>
#include <linux/pci.h>
#include <linux/module.h>
#include <linux/topology.h>
#include <linux/interrupt.h>
#include <linux/bitops.h>
26
#include <linux/kdebug.h>
27
#include <linux/scatterlist.h>
28
#include <linux/iommu-helper.h>
29
#include <linux/sysdev.h>
30
#include <linux/io.h>
L
Linus Torvalds 已提交
31 32 33 34
#include <asm/atomic.h>
#include <asm/mtrr.h>
#include <asm/pgtable.h>
#include <asm/proto.h>
35
#include <asm/iommu.h>
J
Joerg Roedel 已提交
36
#include <asm/gart.h>
L
Linus Torvalds 已提交
37
#include <asm/cacheflush.h>
38 39
#include <asm/swiotlb.h>
#include <asm/dma.h>
40
#include <asm/k8.h>
L
Linus Torvalds 已提交
41

42
static unsigned long iommu_bus_base;	/* GART remapping area (physical) */
43
static unsigned long iommu_size;	/* size of remapping area bytes */
L
Linus Torvalds 已提交
44 45
static unsigned long iommu_pages;	/* .. and in pages */

46
static u32 *iommu_gatt_base;		/* Remapping table */
L
Linus Torvalds 已提交
47

48 49 50 51 52 53 54
/*
 * If this is disabled the IOMMU will use an optimized flushing strategy
 * of only flushing when an mapping is reused. With it true the GART is
 * flushed for every mapping. Problem is that doing the lazy flush seems
 * to trigger bugs with some popular PCI cards, in particular 3ware (but
 * has been also also seen with Qlogic at least).
 */
55
static int iommu_fullflush = 1;
L
Linus Torvalds 已提交
56

57
/* Allocation bitmap for the remapping area: */
L
Linus Torvalds 已提交
58
static DEFINE_SPINLOCK(iommu_bitmap_lock);
59 60
/* Guarded by iommu_bitmap_lock: */
static unsigned long *iommu_gart_bitmap;
L
Linus Torvalds 已提交
61

62
static u32 gart_unmapped_entry;
L
Linus Torvalds 已提交
63 64 65 66 67 68 69

#define GPTE_VALID    1
#define GPTE_COHERENT 2
#define GPTE_ENCODE(x) \
	(((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT)
#define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28))

70
#define EMERGENCY_PAGES 32 /* = 128KB */
L
Linus Torvalds 已提交
71 72 73 74 75 76 77 78 79 80 81 82

#ifdef CONFIG_AGP
#define AGPEXTERN extern
#else
#define AGPEXTERN
#endif

/* backdoor interface to AGP driver */
AGPEXTERN int agp_memory_reserved;
AGPEXTERN __u32 *agp_gatt_table;

static unsigned long next_bit;  /* protected by iommu_bitmap_lock */
83
static bool need_flush;		/* global flush state. set for each gart wrap */
L
Linus Torvalds 已提交
84

85 86
static unsigned long alloc_iommu(struct device *dev, int size,
				 unsigned long align_mask)
87
{
L
Linus Torvalds 已提交
88
	unsigned long offset, flags;
89 90 91 92 93
	unsigned long boundary_size;
	unsigned long base_index;

	base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev),
			   PAGE_SIZE) >> PAGE_SHIFT;
94
	boundary_size = ALIGN((unsigned long long)dma_get_seg_boundary(dev) + 1,
95
			      PAGE_SIZE) >> PAGE_SHIFT;
L
Linus Torvalds 已提交
96

97
	spin_lock_irqsave(&iommu_bitmap_lock, flags);
98
	offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit,
99
				  size, base_index, boundary_size, align_mask);
L
Linus Torvalds 已提交
100
	if (offset == -1) {
101
		need_flush = true;
102
		offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0,
103 104
					  size, base_index, boundary_size,
					  align_mask);
L
Linus Torvalds 已提交
105
	}
106 107 108
	if (offset != -1) {
		next_bit = offset+size;
		if (next_bit >= iommu_pages) {
L
Linus Torvalds 已提交
109
			next_bit = 0;
110
			need_flush = true;
111 112
		}
	}
L
Linus Torvalds 已提交
113
	if (iommu_fullflush)
114
		need_flush = true;
115 116
	spin_unlock_irqrestore(&iommu_bitmap_lock, flags);

L
Linus Torvalds 已提交
117
	return offset;
118
}
L
Linus Torvalds 已提交
119 120

static void free_iommu(unsigned long offset, int size)
121
{
L
Linus Torvalds 已提交
122
	unsigned long flags;
123

L
Linus Torvalds 已提交
124
	spin_lock_irqsave(&iommu_bitmap_lock, flags);
125
	iommu_area_free(iommu_gart_bitmap, offset, size);
126 127
	if (offset >= next_bit)
		next_bit = offset + size;
L
Linus Torvalds 已提交
128
	spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
129
}
L
Linus Torvalds 已提交
130

131
/*
L
Linus Torvalds 已提交
132 133
 * Use global flush state to avoid races with multiple flushers.
 */
134
static void flush_gart(void)
135
{
L
Linus Torvalds 已提交
136
	unsigned long flags;
137

L
Linus Torvalds 已提交
138
	spin_lock_irqsave(&iommu_bitmap_lock, flags);
139 140
	if (need_flush) {
		k8_flush_garts();
141
		need_flush = false;
142
	}
L
Linus Torvalds 已提交
143
	spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
144
}
L
Linus Torvalds 已提交
145 146 147 148

#ifdef CONFIG_IOMMU_LEAK
/* Debugging aid for drivers that don't free their IOMMU tables */
static int leak_trace;
149
static int iommu_leak_pages = 20;
150

151
static void dump_leak(void)
L
Linus Torvalds 已提交
152
{
153 154
	static int dump;

155
	if (dump)
156
		return;
L
Linus Torvalds 已提交
157
	dump = 1;
158

159 160
	show_stack(NULL, NULL);
	debug_dma_dump_mappings(NULL);
L
Linus Torvalds 已提交
161 162 163
}
#endif

164
static void iommu_full(struct device *dev, size_t size, int dir)
L
Linus Torvalds 已提交
165
{
166
	/*
L
Linus Torvalds 已提交
167 168
	 * Ran out of IOMMU space for this operation. This is very bad.
	 * Unfortunately the drivers cannot handle this operation properly.
169
	 * Return some non mapped prereserved space in the aperture and
L
Linus Torvalds 已提交
170 171
	 * let the Northbridge deal with it. This will result in garbage
	 * in the IO operation. When the size exceeds the prereserved space
172
	 * memory corruption will occur or random memory will be DMAed
L
Linus Torvalds 已提交
173
	 * out. Hopefully no network devices use single mappings that big.
174 175
	 */

176
	dev_err(dev, "PCI-DMA: Out of IOMMU space for %lu bytes\n", size);
L
Linus Torvalds 已提交
177

178
	if (size > PAGE_SIZE*EMERGENCY_PAGES) {
L
Linus Torvalds 已提交
179 180
		if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL)
			panic("PCI-DMA: Memory would be corrupted\n");
181 182 183 184
		if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL)
			panic(KERN_ERR
				"PCI-DMA: Random memory would be DMAed\n");
	}
L
Linus Torvalds 已提交
185
#ifdef CONFIG_IOMMU_LEAK
186
	dump_leak();
L
Linus Torvalds 已提交
187
#endif
188
}
L
Linus Torvalds 已提交
189

190 191 192
static inline int
need_iommu(struct device *dev, unsigned long addr, size_t size)
{
193
	return force_iommu || !dma_capable(dev, addr, size);
L
Linus Torvalds 已提交
194 195
}

196 197 198
static inline int
nonforced_iommu(struct device *dev, unsigned long addr, size_t size)
{
199
	return !dma_capable(dev, addr, size);
L
Linus Torvalds 已提交
200 201 202 203 204
}

/* Map a single continuous physical area into the IOMMU.
 * Caller needs to check if the iommu is needed and flush.
 */
205
static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
206
				size_t size, int dir, unsigned long align_mask)
207
{
208
	unsigned long npages = iommu_num_pages(phys_mem, size, PAGE_SIZE);
209
	unsigned long iommu_page = alloc_iommu(dev, npages, align_mask);
L
Linus Torvalds 已提交
210
	int i;
211

L
Linus Torvalds 已提交
212 213
	if (iommu_page == -1) {
		if (!nonforced_iommu(dev, phys_mem, size))
214
			return phys_mem;
L
Linus Torvalds 已提交
215 216
		if (panic_on_overflow)
			panic("dma_map_area overflow %lu bytes\n", size);
217
		iommu_full(dev, size, dir);
L
Linus Torvalds 已提交
218 219 220 221 222 223 224 225 226 227 228
		return bad_dma_address;
	}

	for (i = 0; i < npages; i++) {
		iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem);
		phys_mem += PAGE_SIZE;
	}
	return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK);
}

/* Map a single area into the IOMMU */
229 230 231 232
static dma_addr_t gart_map_page(struct device *dev, struct page *page,
				unsigned long offset, size_t size,
				enum dma_data_direction dir,
				struct dma_attrs *attrs)
L
Linus Torvalds 已提交
233
{
I
Ingo Molnar 已提交
234
	unsigned long bus;
235
	phys_addr_t paddr = page_to_phys(page) + offset;
L
Linus Torvalds 已提交
236 237

	if (!dev)
238
		dev = &x86_dma_fallback_dev;
L
Linus Torvalds 已提交
239

I
Ingo Molnar 已提交
240 241
	if (!need_iommu(dev, paddr, size))
		return paddr;
L
Linus Torvalds 已提交
242

243 244
	bus = dma_map_area(dev, paddr, size, dir, 0);
	flush_gart();
245 246

	return bus;
247 248
}

249 250 251
/*
 * Free a DMA mapping.
 */
252 253 254
static void gart_unmap_page(struct device *dev, dma_addr_t dma_addr,
			    size_t size, enum dma_data_direction dir,
			    struct dma_attrs *attrs)
255 256 257 258 259 260 261 262
{
	unsigned long iommu_page;
	int npages;
	int i;

	if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE ||
	    dma_addr >= iommu_bus_base + iommu_size)
		return;
263

264
	iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT;
265
	npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
266 267 268 269 270 271
	for (i = 0; i < npages; i++) {
		iommu_gatt_base[iommu_page + i] = gart_unmapped_entry;
	}
	free_iommu(iommu_page, npages);
}

272 273 274
/*
 * Wrapper for pci_unmap_single working with scatterlists.
 */
275 276
static void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
			  enum dma_data_direction dir, struct dma_attrs *attrs)
277
{
278
	struct scatterlist *s;
279 280
	int i;

281
	for_each_sg(sg, s, nents, i) {
282
		if (!s->dma_length || !s->length)
283
			break;
284
		gart_unmap_page(dev, s->dma_address, s->dma_length, dir, NULL);
285 286
	}
}
L
Linus Torvalds 已提交
287 288 289 290 291

/* Fallback for dma_map_sg in case of overflow */
static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
			       int nents, int dir)
{
292
	struct scatterlist *s;
L
Linus Torvalds 已提交
293 294 295 296 297 298
	int i;

#ifdef CONFIG_IOMMU_DEBUG
	printk(KERN_DEBUG "dma_map_sg overflow\n");
#endif

299
	for_each_sg(sg, s, nents, i) {
J
Jens Axboe 已提交
300
		unsigned long addr = sg_phys(s);
301 302

		if (nonforced_iommu(dev, addr, s->length)) {
303
			addr = dma_map_area(dev, addr, s->length, dir, 0);
304 305
			if (addr == bad_dma_address) {
				if (i > 0)
306
					gart_unmap_sg(dev, sg, i, dir, NULL);
307
				nents = 0;
L
Linus Torvalds 已提交
308 309 310 311 312 313 314
				sg[0].dma_length = 0;
				break;
			}
		}
		s->dma_address = addr;
		s->dma_length = s->length;
	}
315
	flush_gart();
316

L
Linus Torvalds 已提交
317 318 319 320
	return nents;
}

/* Map multiple scatterlist entries continuous into the first. */
321 322 323
static int __dma_map_cont(struct device *dev, struct scatterlist *start,
			  int nelems, struct scatterlist *sout,
			  unsigned long pages)
L
Linus Torvalds 已提交
324
{
325
	unsigned long iommu_start = alloc_iommu(dev, pages, 0);
326
	unsigned long iommu_page = iommu_start;
327
	struct scatterlist *s;
L
Linus Torvalds 已提交
328 329 330 331
	int i;

	if (iommu_start == -1)
		return -1;
332 333

	for_each_sg(start, s, nelems, i) {
L
Linus Torvalds 已提交
334 335
		unsigned long pages, addr;
		unsigned long phys_addr = s->dma_address;
336

337 338
		BUG_ON(s != start && s->offset);
		if (s == start) {
L
Linus Torvalds 已提交
339 340 341
			sout->dma_address = iommu_bus_base;
			sout->dma_address += iommu_page*PAGE_SIZE + s->offset;
			sout->dma_length = s->length;
342 343
		} else {
			sout->dma_length += s->length;
L
Linus Torvalds 已提交
344 345 346
		}

		addr = phys_addr;
347
		pages = iommu_num_pages(s->offset, s->length, PAGE_SIZE);
348 349
		while (pages--) {
			iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr);
L
Linus Torvalds 已提交
350 351
			addr += PAGE_SIZE;
			iommu_page++;
352
		}
353 354 355
	}
	BUG_ON(iommu_page - iommu_start != pages);

L
Linus Torvalds 已提交
356 357 358
	return 0;
}

359
static inline int
360 361
dma_map_cont(struct device *dev, struct scatterlist *start, int nelems,
	     struct scatterlist *sout, unsigned long pages, int need)
L
Linus Torvalds 已提交
362
{
363 364
	if (!need) {
		BUG_ON(nelems != 1);
F
FUJITA Tomonori 已提交
365
		sout->dma_address = start->dma_address;
366
		sout->dma_length = start->length;
L
Linus Torvalds 已提交
367
		return 0;
368
	}
369
	return __dma_map_cont(dev, start, nelems, sout, pages);
L
Linus Torvalds 已提交
370
}
371

L
Linus Torvalds 已提交
372 373
/*
 * DMA map all entries in a scatterlist.
374
 * Merge chunks that have page aligned sizes into a continuous mapping.
L
Linus Torvalds 已提交
375
 */
376 377
static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents,
		       enum dma_data_direction dir, struct dma_attrs *attrs)
L
Linus Torvalds 已提交
378
{
379
	struct scatterlist *s, *ps, *start_sg, *sgmap;
380 381
	int need = 0, nextneed, i, out, start;
	unsigned long pages = 0;
382 383
	unsigned int seg_size;
	unsigned int max_seg_size;
L
Linus Torvalds 已提交
384

385
	if (nents == 0)
L
Linus Torvalds 已提交
386 387 388
		return 0;

	if (!dev)
389
		dev = &x86_dma_fallback_dev;
L
Linus Torvalds 已提交
390 391 392

	out = 0;
	start = 0;
393
	start_sg = sgmap = sg;
394 395
	seg_size = 0;
	max_seg_size = dma_get_max_seg_size(dev);
396 397
	ps = NULL; /* shut up gcc */
	for_each_sg(sg, s, nents, i) {
J
Jens Axboe 已提交
398
		dma_addr_t addr = sg_phys(s);
399

L
Linus Torvalds 已提交
400
		s->dma_address = addr;
401
		BUG_ON(s->length == 0);
L
Linus Torvalds 已提交
402

403
		nextneed = need_iommu(dev, addr, s->length);
L
Linus Torvalds 已提交
404 405 406

		/* Handle the previous not yet processed entries */
		if (i > start) {
407 408 409 410 411
			/*
			 * Can only merge when the last chunk ends on a
			 * page boundary and the new one doesn't have an
			 * offset.
			 */
L
Linus Torvalds 已提交
412
			if (!iommu_merge || !nextneed || !need || s->offset ||
413
			    (s->length + seg_size > max_seg_size) ||
414
			    (ps->offset + ps->length) % PAGE_SIZE) {
415 416
				if (dma_map_cont(dev, start_sg, i - start,
						 sgmap, pages, need) < 0)
L
Linus Torvalds 已提交
417 418
					goto error;
				out++;
419
				seg_size = 0;
420
				sgmap = sg_next(sgmap);
L
Linus Torvalds 已提交
421
				pages = 0;
422 423
				start = i;
				start_sg = s;
L
Linus Torvalds 已提交
424 425 426
			}
		}

427
		seg_size += s->length;
L
Linus Torvalds 已提交
428
		need = nextneed;
429
		pages += iommu_num_pages(s->offset, s->length, PAGE_SIZE);
430
		ps = s;
L
Linus Torvalds 已提交
431
	}
432
	if (dma_map_cont(dev, start_sg, i - start, sgmap, pages, need) < 0)
L
Linus Torvalds 已提交
433 434
		goto error;
	out++;
435
	flush_gart();
436 437 438 439
	if (out < nents) {
		sgmap = sg_next(sgmap);
		sgmap->dma_length = 0;
	}
L
Linus Torvalds 已提交
440 441 442
	return out;

error:
443
	flush_gart();
444
	gart_unmap_sg(dev, sg, out, dir, NULL);
445

446 447 448 449 450 451
	/* When it was forced or merged try again in a dumb way */
	if (force_iommu || iommu_merge) {
		out = dma_map_sg_nonforce(dev, sg, nents, dir);
		if (out > 0)
			return out;
	}
L
Linus Torvalds 已提交
452 453
	if (panic_on_overflow)
		panic("dma_map_sg: overflow on %lu pages\n", pages);
454

455
	iommu_full(dev, pages << PAGE_SHIFT, dir);
456 457
	for_each_sg(sg, s, nents, i)
		s->dma_address = bad_dma_address;
L
Linus Torvalds 已提交
458
	return 0;
459
}
L
Linus Torvalds 已提交
460

461 462 463 464 465
/* allocate and map a coherent mapping */
static void *
gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
		    gfp_t flag)
{
466
	dma_addr_t paddr;
467
	unsigned long align_mask;
468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487
	struct page *page;

	if (force_iommu && !(flag & GFP_DMA)) {
		flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
		page = alloc_pages(flag | __GFP_ZERO, get_order(size));
		if (!page)
			return NULL;

		align_mask = (1UL << get_order(size)) - 1;
		paddr = dma_map_area(dev, page_to_phys(page), size,
				     DMA_BIDIRECTIONAL, align_mask);

		flush_gart();
		if (paddr != bad_dma_address) {
			*dma_addr = paddr;
			return page_address(page);
		}
		__free_pages(page, get_order(size));
	} else
		return dma_generic_alloc_coherent(dev, size, dma_addr, flag);
488 489 490 491

	return NULL;
}

492 493 494 495 496
/* free a coherent mapping */
static void
gart_free_coherent(struct device *dev, size_t size, void *vaddr,
		   dma_addr_t dma_addr)
{
497
	gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, NULL);
498 499 500
	free_pages((unsigned long)vaddr, get_order(size));
}

501
static int no_agp;
L
Linus Torvalds 已提交
502 503

static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
504 505 506 507 508 509 510 511 512 513
{
	unsigned long a;

	if (!iommu_size) {
		iommu_size = aper_size;
		if (!no_agp)
			iommu_size /= 2;
	}

	a = aper + iommu_size;
514
	iommu_size -= round_up(a, PMD_PAGE_SIZE) - a;
L
Linus Torvalds 已提交
515

516
	if (iommu_size < 64*1024*1024) {
L
Linus Torvalds 已提交
517
		printk(KERN_WARNING
518 519 520 521 522
			"PCI-DMA: Warning: Small IOMMU %luMB."
			" Consider increasing the AGP aperture in BIOS\n",
				iommu_size >> 20);
	}

L
Linus Torvalds 已提交
523
	return iommu_size;
524
}
L
Linus Torvalds 已提交
525

526 527 528
static __init unsigned read_aperture(struct pci_dev *dev, u32 *size)
{
	unsigned aper_size = 0, aper_base_32, aper_order;
L
Linus Torvalds 已提交
529 530
	u64 aper_base;

P
Pavel Machek 已提交
531 532
	pci_read_config_dword(dev, AMD64_GARTAPERTUREBASE, &aper_base_32);
	pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &aper_order);
533
	aper_order = (aper_order >> 1) & 7;
L
Linus Torvalds 已提交
534

535
	aper_base = aper_base_32 & 0x7fff;
L
Linus Torvalds 已提交
536 537
	aper_base <<= 25;

538 539
	aper_size = (32 * 1024 * 1024) << aper_order;
	if (aper_base + aper_size > 0x100000000UL || !aper_size)
L
Linus Torvalds 已提交
540 541 542 543
		aper_base = 0;

	*size = aper_size;
	return aper_base;
544
}
L
Linus Torvalds 已提交
545

546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571
static void enable_gart_translations(void)
{
	int i;

	for (i = 0; i < num_k8_northbridges; i++) {
		struct pci_dev *dev = k8_northbridges[i];

		enable_gart_translation(dev, __pa(agp_gatt_table));
	}
}

/*
 * If fix_up_north_bridges is set, the north bridges have to be fixed up on
 * resume in the same way as they are handled in gart_iommu_hole_init().
 */
static bool fix_up_north_bridges;
static u32 aperture_order;
static u32 aperture_alloc;

void set_up_gart_resume(u32 aper_order, u32 aper_alloc)
{
	fix_up_north_bridges = true;
	aperture_order = aper_order;
	aperture_alloc = aper_alloc;
}

572 573
static int gart_resume(struct sys_device *dev)
{
574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596
	printk(KERN_INFO "PCI-DMA: Resuming GART IOMMU\n");

	if (fix_up_north_bridges) {
		int i;

		printk(KERN_INFO "PCI-DMA: Restoring GART aperture settings\n");

		for (i = 0; i < num_k8_northbridges; i++) {
			struct pci_dev *dev = k8_northbridges[i];

			/*
			 * Don't enable translations just yet.  That is the next
			 * step.  Restore the pre-suspend aperture settings.
			 */
			pci_write_config_dword(dev, AMD64_GARTAPERTURECTL,
						aperture_order << 1);
			pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE,
						aperture_alloc >> 25);
		}
	}

	enable_gart_translations();

597 598 599 600 601
	return 0;
}

static int gart_suspend(struct sys_device *dev, pm_message_t state)
{
602
	return 0;
603 604 605 606 607 608 609 610 611 612 613 614 615 616
}

static struct sysdev_class gart_sysdev_class = {
	.name = "gart",
	.suspend = gart_suspend,
	.resume = gart_resume,

};

static struct sys_device device_gart = {
	.id	= 0,
	.cls	= &gart_sysdev_class,
};

617
/*
L
Linus Torvalds 已提交
618
 * Private Northbridge GATT initialization in case we cannot use the
619
 * AGP driver for some reason.
L
Linus Torvalds 已提交
620 621
 */
static __init int init_k8_gatt(struct agp_kern_info *info)
622 623 624
{
	unsigned aper_size, gatt_size, new_aper_size;
	unsigned aper_base, new_aper_base;
L
Linus Torvalds 已提交
625 626
	struct pci_dev *dev;
	void *gatt;
627
	int i, error;
628

L
Linus Torvalds 已提交
629 630
	printk(KERN_INFO "PCI-DMA: Disabling AGP.\n");
	aper_size = aper_base = info->aper_size = 0;
631 632 633
	dev = NULL;
	for (i = 0; i < num_k8_northbridges; i++) {
		dev = k8_northbridges[i];
634 635 636 637 638
		new_aper_base = read_aperture(dev, &new_aper_size);
		if (!new_aper_base)
			goto nommu;

		if (!aper_base) {
L
Linus Torvalds 已提交
639 640
			aper_size = new_aper_size;
			aper_base = new_aper_base;
641 642
		}
		if (aper_size != new_aper_size || aper_base != new_aper_base)
L
Linus Torvalds 已提交
643 644 645
			goto nommu;
	}
	if (!aper_base)
646
		goto nommu;
L
Linus Torvalds 已提交
647
	info->aper_base = aper_base;
648
	info->aper_size = aper_size >> 20;
L
Linus Torvalds 已提交
649

650
	gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32);
651 652
	gatt = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
					get_order(gatt_size));
653
	if (!gatt)
654
		panic("Cannot allocate GATT table");
655
	if (set_memory_uc((unsigned long)gatt, gatt_size >> PAGE_SHIFT))
656 657
		panic("Could not set GART PTEs to uncacheable pages");

L
Linus Torvalds 已提交
658
	agp_gatt_table = gatt;
659

660 661 662 663
	error = sysdev_class_register(&gart_sysdev_class);
	if (!error)
		error = sysdev_register(&device_gart);
	if (error)
664 665
		panic("Could not register gart_sysdev -- "
		      "would corrupt data on next suspend");
666

667
	flush_gart();
668 669 670

	printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n",
	       aper_base, aper_size>>10);
Y
Yinghai Lu 已提交
671

L
Linus Torvalds 已提交
672 673 674
	return 0;

 nommu:
675
	/* Should not happen anymore */
676
	printk(KERN_WARNING "PCI-DMA: More than 4GB of RAM and no IOMMU\n"
677
	       "falling back to iommu=soft.\n");
678 679
	return -1;
}
L
Linus Torvalds 已提交
680

681
static struct dma_map_ops gart_dma_ops = {
682 683
	.map_sg				= gart_map_sg,
	.unmap_sg			= gart_unmap_sg,
684 685
	.map_page			= gart_map_page,
	.unmap_page			= gart_unmap_page,
686
	.alloc_coherent			= gart_alloc_coherent,
687
	.free_coherent			= gart_free_coherent,
688 689
};

690 691 692 693 694 695 696 697
void gart_iommu_shutdown(void)
{
	struct pci_dev *dev;
	int i;

	if (no_agp && (dma_ops != &gart_dma_ops))
		return;

698 699
	for (i = 0; i < num_k8_northbridges; i++) {
		u32 ctl;
700

701
		dev = k8_northbridges[i];
P
Pavel Machek 已提交
702
		pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl);
703

P
Pavel Machek 已提交
704
		ctl &= ~GARTEN;
705

P
Pavel Machek 已提交
706
		pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl);
707
	}
708 709
}

710
void __init gart_iommu_init(void)
711
{
L
Linus Torvalds 已提交
712 713
	struct agp_kern_info info;
	unsigned long iommu_start;
714 715
	unsigned long aper_base, aper_size;
	unsigned long start_pfn, end_pfn;
L
Linus Torvalds 已提交
716 717 718
	unsigned long scratch;
	long i;

719
	if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0)
720
		return;
721

L
Linus Torvalds 已提交
722
#ifndef CONFIG_AGP_AMD64
723
	no_agp = 1;
L
Linus Torvalds 已提交
724 725 726
#else
	/* Makefile puts PCI initialization via subsys_initcall first. */
	/* Add other K8 AGP bridge drivers here */
727 728
	no_agp = no_agp ||
		(agp_amd64_init() < 0) ||
L
Linus Torvalds 已提交
729
		(agp_copy_info(agp_bridge, &info) < 0);
730
#endif
L
Linus Torvalds 已提交
731

732
	if (swiotlb)
733
		return;
734

735
	/* Did we detect a different HW IOMMU? */
736
	if (iommu_detected && !gart_iommu_aperture)
737
		return;
738

L
Linus Torvalds 已提交
739
	if (no_iommu ||
Y
Yinghai Lu 已提交
740
	    (!force_iommu && max_pfn <= MAX_DMA32_PFN) ||
741
	    !gart_iommu_aperture ||
L
Linus Torvalds 已提交
742
	    (no_agp && init_k8_gatt(&info) < 0)) {
Y
Yinghai Lu 已提交
743
		if (max_pfn > MAX_DMA32_PFN) {
744
			printk(KERN_WARNING "More than 4GB of memory "
745 746
			       "but GART IOMMU not available.\n");
			printk(KERN_WARNING "falling back to iommu=soft.\n");
J
Jon Mason 已提交
747
		}
748
		return;
L
Linus Torvalds 已提交
749 750
	}

751 752 753 754 755 756 757 758 759
	/* need to map that range */
	aper_size = info.aper_size << 20;
	aper_base = info.aper_base;
	end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT);
	if (end_pfn > max_low_pfn_mapped) {
		start_pfn = (aper_base>>PAGE_SHIFT);
		init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
	}

J
Jon Mason 已提交
760
	printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n");
761 762 763
	iommu_size = check_iommu_size(info.aper_base, aper_size);
	iommu_pages = iommu_size >> PAGE_SHIFT;

764
	iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO,
765 766 767
						      get_order(iommu_pages/8));
	if (!iommu_gart_bitmap)
		panic("Cannot allocate iommu bitmap\n");
L
Linus Torvalds 已提交
768 769

#ifdef CONFIG_IOMMU_LEAK
770
	if (leak_trace) {
771 772 773 774
		int ret;

		ret = dma_debug_resize_entries(iommu_pages);
		if (ret)
775
			printk(KERN_DEBUG
776
			       "PCI-DMA: Cannot trace all the entries\n");
777
	}
L
Linus Torvalds 已提交
778 779
#endif

780
	/*
L
Linus Torvalds 已提交
781
	 * Out of IOMMU space handling.
782 783
	 * Reserve some invalid pages at the beginning of the GART.
	 */
784
	iommu_area_reserve(iommu_gart_bitmap, 0, EMERGENCY_PAGES);
L
Linus Torvalds 已提交
785

786
	agp_memory_reserved = iommu_size;
L
Linus Torvalds 已提交
787 788
	printk(KERN_INFO
	       "PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n",
789
	       iommu_size >> 20);
L
Linus Torvalds 已提交
790

791 792
	iommu_start = aper_size - iommu_size;
	iommu_bus_base = info.aper_base + iommu_start;
L
Linus Torvalds 已提交
793 794 795
	bad_dma_address = iommu_bus_base;
	iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT);

796
	/*
L
Linus Torvalds 已提交
797 798 799 800 801 802
	 * Unmap the IOMMU part of the GART. The alias of the page is
	 * always mapped with cache enabled and there is no full cache
	 * coherency across the GART remapping. The unmapping avoids
	 * automatic prefetches from the CPU allocating cache lines in
	 * there. All CPU accesses are done via the direct mapping to
	 * the backing memory. The GART address is only used by PCI
803
	 * devices.
L
Linus Torvalds 已提交
804
	 */
805 806
	set_memory_np((unsigned long)__va(iommu_bus_base),
				iommu_size >> PAGE_SHIFT);
I
Ingo Molnar 已提交
807 808 809 810 811 812 813 814 815
	/*
	 * Tricky. The GART table remaps the physical memory range,
	 * so the CPU wont notice potential aliases and if the memory
	 * is remapped to UC later on, we might surprise the PCI devices
	 * with a stray writeout of a cacheline. So play it sure and
	 * do an explicit, full-scale wbinvd() _after_ having marked all
	 * the pages as Not-Present:
	 */
	wbinvd();
816 817 818 819 820 821 822 823
	
	/*
	 * Now all caches are flushed and we can safely enable
	 * GART hardware.  Doing it early leaves the possibility
	 * of stale cache entries that can lead to GART PTE
	 * errors.
	 */
	enable_gart_translations();
L
Linus Torvalds 已提交
824

825
	/*
826
	 * Try to workaround a bug (thanks to BenH):
827
	 * Set unmapped entries to a scratch page instead of 0.
L
Linus Torvalds 已提交
828
	 * Any prefetches that hit unmapped entries won't get an bus abort
829
	 * then. (P2P bridge may be prefetching on DMA reads).
L
Linus Torvalds 已提交
830
	 */
831 832
	scratch = get_zeroed_page(GFP_KERNEL);
	if (!scratch)
L
Linus Torvalds 已提交
833 834
		panic("Cannot allocate iommu scratch page");
	gart_unmapped_entry = GPTE_ENCODE(__pa(scratch));
835
	for (i = EMERGENCY_PAGES; i < iommu_pages; i++)
L
Linus Torvalds 已提交
836 837
		iommu_gatt_base[i] = gart_unmapped_entry;

838
	flush_gart();
839
	dma_ops = &gart_dma_ops;
840
}
L
Linus Torvalds 已提交
841

842
void __init gart_parse_options(char *p)
843 844 845
{
	int arg;

L
Linus Torvalds 已提交
846
#ifdef CONFIG_IOMMU_LEAK
847
	if (!strncmp(p, "leak", 4)) {
848 849
		leak_trace = 1;
		p += 4;
850 851
		if (*p == '=')
			++p;
852 853 854
		if (isdigit(*p) && get_option(&p, &arg))
			iommu_leak_pages = arg;
	}
L
Linus Torvalds 已提交
855
#endif
856 857
	if (isdigit(*p) && get_option(&p, &arg))
		iommu_size = arg;
858
	if (!strncmp(p, "fullflush", 8))
859
		iommu_fullflush = 1;
860
	if (!strncmp(p, "nofullflush", 11))
861
		iommu_fullflush = 0;
862
	if (!strncmp(p, "noagp", 5))
863
		no_agp = 1;
864
	if (!strncmp(p, "noaperture", 10))
865 866
		fix_aperture = 0;
	/* duplicated from pci-dma.c */
867
	if (!strncmp(p, "force", 5))
868
		gart_iommu_aperture_allowed = 1;
869
	if (!strncmp(p, "allowed", 7))
870
		gart_iommu_aperture_allowed = 1;
871 872 873 874 875 876 877 878 879 880
	if (!strncmp(p, "memaper", 7)) {
		fallback_aper_force = 1;
		p += 7;
		if (*p == '=') {
			++p;
			if (get_option(&p, &arg))
				fallback_aper_order = arg;
		}
	}
}