ioremap.c 14.2 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8
/*
 * Re-map IO memory to kernel address space so that we can access it.
 * This is needed for high PCI addresses that aren't mapped in the
 * 640k-1MB IO memory area on PC's
 *
 * (C) Copyright 1995 1996 Linus Torvalds
 */

9
#include <linux/bootmem.h>
L
Linus Torvalds 已提交
10
#include <linux/init.h>
11
#include <linux/io.h>
T
Thomas Gleixner 已提交
12 13 14
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
P
Pekka Paalanen 已提交
15
#include <linux/mmiotrace.h>
T
Thomas Gleixner 已提交
16

L
Linus Torvalds 已提交
17
#include <asm/cacheflush.h>
T
Thomas Gleixner 已提交
18 19
#include <asm/e820.h>
#include <asm/fixmap.h>
L
Linus Torvalds 已提交
20
#include <asm/pgtable.h>
T
Thomas Gleixner 已提交
21
#include <asm/tlbflush.h>
22
#include <asm/pgalloc.h>
23
#include <asm/pat.h>
L
Linus Torvalds 已提交
24

25
#include "physaddr.h"
T
Thomas Gleixner 已提交
26

27 28 29 30
/*
 * Fix up the linear direct mapping of the kernel to avoid cache attribute
 * conflicts.
 */
31
int ioremap_change_attr(unsigned long vaddr, unsigned long size,
32
			enum page_cache_mode pcm)
33
{
34
	unsigned long nrpages = size >> PAGE_SHIFT;
35
	int err;
36

37 38
	switch (pcm) {
	case _PAGE_CACHE_MODE_UC:
39
	default:
40
		err = _set_memory_uc(vaddr, nrpages);
41
		break;
42
	case _PAGE_CACHE_MODE_WC:
43 44
		err = _set_memory_wc(vaddr, nrpages);
		break;
45 46 47
	case _PAGE_CACHE_MODE_WT:
		err = _set_memory_wt(vaddr, nrpages);
		break;
48
	case _PAGE_CACHE_MODE_WB:
49
		err = _set_memory_wb(vaddr, nrpages);
50 51
		break;
	}
52 53 54 55

	return err;
}

56 57 58 59 60 61 62 63 64 65 66 67 68
static int __ioremap_check_ram(unsigned long start_pfn, unsigned long nr_pages,
			       void *arg)
{
	unsigned long i;

	for (i = 0; i < nr_pages; ++i)
		if (pfn_valid(start_pfn + i) &&
		    !PageReserved(pfn_to_page(start_pfn + i)))
			return 1;

	return 0;
}

L
Linus Torvalds 已提交
69 70
/*
 * Remap an arbitrary physical address space into the kernel virtual
71 72 73 74 75 76 77
 * address space. It transparently creates kernel huge I/O mapping when
 * the physical address is aligned by a huge page size (1GB or 2MB) and
 * the requested size is at least the huge page size.
 *
 * NOTE: MTRRs can override PAT memory types with a 4KB granularity.
 * Therefore, the mapping code falls back to use a smaller page toward 4KB
 * when a mapping range is covered by non-WB type of MTRRs.
L
Linus Torvalds 已提交
78 79 80 81 82
 *
 * NOTE! We need to allow non-page-aligned mappings too: we will obviously
 * have to convert them into an offset in a page-aligned mapping, but the
 * caller shouldn't need to know that small detail.
 */
83
static void __iomem *__ioremap_caller(resource_size_t phys_addr,
84
		unsigned long size, enum page_cache_mode pcm, void *caller)
L
Linus Torvalds 已提交
85
{
86 87
	unsigned long offset, vaddr;
	resource_size_t pfn, last_pfn, last_addr;
88 89
	const resource_size_t unaligned_phys_addr = phys_addr;
	const unsigned long unaligned_size = size;
90
	struct vm_struct *area;
91
	enum page_cache_mode new_pcm;
92
	pgprot_t prot;
93
	int retval;
P
Pekka Paalanen 已提交
94
	void __iomem *ret_addr;
95
	int ram_region;
L
Linus Torvalds 已提交
96 97 98 99 100 101

	/* Don't allow wraparound or zero size */
	last_addr = phys_addr + size - 1;
	if (!size || last_addr < phys_addr)
		return NULL;

102
	if (!phys_addr_valid(phys_addr)) {
103
		printk(KERN_WARNING "ioremap: invalid physical address %llx\n",
104
		       (unsigned long long)phys_addr);
105 106 107 108
		WARN_ON_ONCE(1);
		return NULL;
	}

L
Linus Torvalds 已提交
109 110 111
	/*
	 * Don't remap the low PCI/ISA area, it's always mapped..
	 */
112
	if (is_ISA_range(phys_addr, last_addr))
T
Thomas Gleixner 已提交
113
		return (__force void __iomem *)phys_to_virt(phys_addr);
L
Linus Torvalds 已提交
114 115 116 117

	/*
	 * Don't allow anybody to remap normal RAM that we're using..
	 */
118 119 120 121 122 123
	/* First check if whole region can be identified as RAM or not */
	ram_region = region_is_ram(phys_addr, size);
	if (ram_region > 0) {
		WARN_ONCE(1, "ioremap on RAM at 0x%lx - 0x%lx\n",
				(unsigned long int)phys_addr,
				(unsigned long int)last_addr);
124
		return NULL;
125
	}
L
Linus Torvalds 已提交
126

127 128 129 130 131
	/* If could not be identified(-1), check page by page */
	if (ram_region < 0) {
		pfn      = phys_addr >> PAGE_SHIFT;
		last_pfn = last_addr >> PAGE_SHIFT;
		if (walk_system_ram_range(pfn, last_pfn - pfn + 1, NULL,
132 133 134
					  __ioremap_check_ram) == 1) {
			WARN_ONCE(1, "ioremap on RAM at 0x%llx - 0x%llx\n",
					phys_addr, last_addr);
135
			return NULL;
136
		}
137
	}
138 139 140 141
	/*
	 * Mappings have to be page-aligned
	 */
	offset = phys_addr & ~PAGE_MASK;
142
	phys_addr &= PHYSICAL_PAGE_MASK;
143 144
	size = PAGE_ALIGN(last_addr+1) - phys_addr;

A
Andi Kleen 已提交
145
	retval = reserve_memtype(phys_addr, (u64)phys_addr + size,
146
						pcm, &new_pcm);
147
	if (retval) {
148
		printk(KERN_ERR "ioremap reserve_memtype failed %d\n", retval);
149 150 151
		return NULL;
	}

152 153
	if (pcm != new_pcm) {
		if (!is_new_memtype_allowed(phys_addr, size, pcm, new_pcm)) {
154
			printk(KERN_ERR
155
		"ioremap error for 0x%llx-0x%llx, requested 0x%x, got 0x%x\n",
156 157
				(unsigned long long)phys_addr,
				(unsigned long long)(phys_addr + size),
158
				pcm, new_pcm);
159
			goto err_free_memtype;
160
		}
161
		pcm = new_pcm;
162 163
	}

164 165 166
	prot = PAGE_KERNEL_IO;
	switch (pcm) {
	case _PAGE_CACHE_MODE_UC:
167
	default:
168 169
		prot = __pgprot(pgprot_val(prot) |
				cachemode2protval(_PAGE_CACHE_MODE_UC));
170
		break;
171 172 173
	case _PAGE_CACHE_MODE_UC_MINUS:
		prot = __pgprot(pgprot_val(prot) |
				cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS));
174
		break;
175 176 177
	case _PAGE_CACHE_MODE_WC:
		prot = __pgprot(pgprot_val(prot) |
				cachemode2protval(_PAGE_CACHE_MODE_WC));
178
		break;
179 180 181 182
	case _PAGE_CACHE_MODE_WT:
		prot = __pgprot(pgprot_val(prot) |
				cachemode2protval(_PAGE_CACHE_MODE_WT));
		break;
183
	case _PAGE_CACHE_MODE_WB:
184 185
		break;
	}
186

L
Linus Torvalds 已提交
187 188 189
	/*
	 * Ok, go for it..
	 */
190
	area = get_vm_area_caller(size, VM_IOREMAP, caller);
L
Linus Torvalds 已提交
191
	if (!area)
192
		goto err_free_memtype;
L
Linus Torvalds 已提交
193
	area->phys_addr = phys_addr;
T
Thomas Gleixner 已提交
194
	vaddr = (unsigned long) area->addr;
195

196
	if (kernel_map_sync_memtype(phys_addr, size, pcm))
197
		goto err_free_area;
198

199 200
	if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot))
		goto err_free_area;
201

P
Pekka Paalanen 已提交
202
	ret_addr = (void __iomem *) (vaddr + offset);
203
	mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr);
P
Pekka Paalanen 已提交
204

205 206 207 208 209 210 211
	/*
	 * Check if the request spans more than any BAR in the iomem resource
	 * tree.
	 */
	WARN_ONCE(iomem_map_sanity_check(unaligned_phys_addr, unaligned_size),
		  KERN_INFO "Info: mapping multiple BARs. Your kernel is fine.");

P
Pekka Paalanen 已提交
212
	return ret_addr;
213 214 215 216 217
err_free_area:
	free_vm_area(area);
err_free_memtype:
	free_memtype(phys_addr, phys_addr + size);
	return NULL;
L
Linus Torvalds 已提交
218 219 220 221
}

/**
 * ioremap_nocache     -   map bus memory into CPU space
222
 * @phys_addr:    bus address of the memory
L
Linus Torvalds 已提交
223 224 225 226 227 228
 * @size:      size of the resource to map
 *
 * ioremap_nocache performs a platform specific sequence of operations to
 * make bus memory CPU accessible via the readb/readw/readl/writeb/
 * writew/writel functions and the other mmio helpers. The returned
 * address is not guaranteed to be usable directly as a virtual
229
 * address.
L
Linus Torvalds 已提交
230 231 232
 *
 * This version of ioremap ensures that the memory is marked uncachable
 * on the CPU as well as honouring existing caching rules from things like
233
 * the PCI bus. Note that there are other caches and buffers on many
L
Linus Torvalds 已提交
234 235 236 237
 * busses. In particular driver authors should read up on PCI writes
 *
 * It's useful if some control registers are in such an area and
 * write combining or read caching is not desirable:
238
 *
L
Linus Torvalds 已提交
239 240
 * Must be freed with iounmap.
 */
241
void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size)
L
Linus Torvalds 已提交
242
{
243 244
	/*
	 * Ideally, this should be:
245
	 *	pat_enabled() ? _PAGE_CACHE_MODE_UC : _PAGE_CACHE_MODE_UC_MINUS;
246 247
	 *
	 * Till we fix all X drivers to use ioremap_wc(), we will use
248 249
	 * UC MINUS. Drivers that are certain they need or can already
	 * be converted over to strong UC can use ioremap_uc().
250
	 */
251
	enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC_MINUS;
252

253
	return __ioremap_caller(phys_addr, size, pcm,
254
				__builtin_return_address(0));
L
Linus Torvalds 已提交
255
}
256
EXPORT_SYMBOL(ioremap_nocache);
L
Linus Torvalds 已提交
257

258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290
/**
 * ioremap_uc     -   map bus memory into CPU space as strongly uncachable
 * @phys_addr:    bus address of the memory
 * @size:      size of the resource to map
 *
 * ioremap_uc performs a platform specific sequence of operations to
 * make bus memory CPU accessible via the readb/readw/readl/writeb/
 * writew/writel functions and the other mmio helpers. The returned
 * address is not guaranteed to be usable directly as a virtual
 * address.
 *
 * This version of ioremap ensures that the memory is marked with a strong
 * preference as completely uncachable on the CPU when possible. For non-PAT
 * systems this ends up setting page-attribute flags PCD=1, PWT=1. For PAT
 * systems this will set the PAT entry for the pages as strong UC.  This call
 * will honor existing caching rules from things like the PCI bus. Note that
 * there are other caches and buffers on many busses. In particular driver
 * authors should read up on PCI writes.
 *
 * It's useful if some control registers are in such an area and
 * write combining or read caching is not desirable:
 *
 * Must be freed with iounmap.
 */
void __iomem *ioremap_uc(resource_size_t phys_addr, unsigned long size)
{
	enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC;

	return __ioremap_caller(phys_addr, size, pcm,
				__builtin_return_address(0));
}
EXPORT_SYMBOL_GPL(ioremap_uc);

291 292
/**
 * ioremap_wc	-	map memory into CPU space write combined
293
 * @phys_addr:	bus address of the memory
294 295 296 297 298 299 300
 * @size:	size of the resource to map
 *
 * This version of ioremap ensures that the memory is marked write combining.
 * Write combining allows faster writes to some hardware devices.
 *
 * Must be freed with iounmap.
 */
301
void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size)
302
{
303
	return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WC,
304
					__builtin_return_address(0));
305 306 307
}
EXPORT_SYMBOL(ioremap_wc);

308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324
/**
 * ioremap_wt	-	map memory into CPU space write through
 * @phys_addr:	bus address of the memory
 * @size:	size of the resource to map
 *
 * This version of ioremap ensures that the memory is marked write through.
 * Write through stores data into memory while keeping the cache up-to-date.
 *
 * Must be freed with iounmap.
 */
void __iomem *ioremap_wt(resource_size_t phys_addr, unsigned long size)
{
	return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WT,
					__builtin_return_address(0));
}
EXPORT_SYMBOL(ioremap_wt);

325
void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size)
T
Thomas Gleixner 已提交
326
{
327
	return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WB,
328
				__builtin_return_address(0));
T
Thomas Gleixner 已提交
329 330 331
}
EXPORT_SYMBOL(ioremap_cache);

332 333 334
void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
				unsigned long prot_val)
{
335 336
	return __ioremap_caller(phys_addr, size,
				pgprot2cachemode(__pgprot(prot_val)),
337 338 339 340
				__builtin_return_address(0));
}
EXPORT_SYMBOL(ioremap_prot);

341 342 343 344 345 346
/**
 * iounmap - Free a IO remapping
 * @addr: virtual address from ioremap_*
 *
 * Caller must ensure there is only one unmapping for the same pointer.
 */
L
Linus Torvalds 已提交
347 348
void iounmap(volatile void __iomem *addr)
{
349
	struct vm_struct *p, *o;
A
Andrew Morton 已提交
350 351

	if ((void __force *)addr <= high_memory)
L
Linus Torvalds 已提交
352 353 354 355 356 357 358
		return;

	/*
	 * __ioremap special-cases the PCI/ISA range by not instantiating a
	 * vm_area and by simply returning an address into the kernel mapping
	 * of ISA space.   So handle that here.
	 */
359 360
	if ((void __force *)addr >= phys_to_virt(ISA_START_ADDRESS) &&
	    (void __force *)addr < phys_to_virt(ISA_END_ADDRESS))
L
Linus Torvalds 已提交
361 362
		return;

363 364
	addr = (volatile void __iomem *)
		(PAGE_MASK & (unsigned long __force)addr);
365

P
Pekka Paalanen 已提交
366 367
	mmiotrace_iounmap(addr);

368 369 370 371 372
	/* Use the vm area unlocked, assuming the caller
	   ensures there isn't another iounmap for the same address
	   in parallel. Reuse of the virtual address is prevented by
	   leaving it in the global lists until we're done with it.
	   cpa takes care of the direct mappings. */
373
	p = find_vm_area((void __force *)addr);
374 375

	if (!p) {
376
		printk(KERN_ERR "iounmap: bad address %p\n", addr);
A
Andrew Morton 已提交
377
		dump_stack();
378
		return;
L
Linus Torvalds 已提交
379 380
	}

381 382
	free_memtype(p->phys_addr, p->phys_addr + get_vm_area_size(p));

383
	/* Finally remove it */
384
	o = remove_vm_area((void __force *)addr);
385
	BUG_ON(p != o || o == NULL);
386
	kfree(p);
L
Linus Torvalds 已提交
387
}
388
EXPORT_SYMBOL(iounmap);
L
Linus Torvalds 已提交
389

390
int __init arch_ioremap_pud_supported(void)
391 392 393 394 395 396 397 398
{
#ifdef CONFIG_X86_64
	return cpu_has_gbpages;
#else
	return 0;
#endif
}

399
int __init arch_ioremap_pmd_supported(void)
400 401 402 403
{
	return cpu_has_pse;
}

404 405 406 407
/*
 * Convert a physical pointer to a virtual kernel pointer for /dev/mem
 * access
 */
408
void *xlate_dev_mem_ptr(phys_addr_t phys)
409
{
410 411
	unsigned long start  = phys &  PAGE_MASK;
	unsigned long offset = phys & ~PAGE_MASK;
412
	void *vaddr;
413 414 415 416 417

	/* If page is RAM, we can use __va. Otherwise ioremap and unmap. */
	if (page_is_ram(start >> PAGE_SHIFT))
		return __va(phys);

418
	vaddr = ioremap_cache(start, PAGE_SIZE);
419 420 421
	/* Only add the offset on success and return NULL if the ioremap() failed: */
	if (vaddr)
		vaddr += offset;
422

423
	return vaddr;
424 425
}

426
void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
427 428 429 430 431 432 433
{
	if (page_is_ram(phys >> PAGE_SHIFT))
		return;

	iounmap((void __iomem *)((unsigned long)addr & PAGE_MASK));
}

434
static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss;
435

436
static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
437
{
438 439 440
	/* Don't assume we're using swapper_pg_dir at this point */
	pgd_t *base = __va(read_cr3());
	pgd_t *pgd = &base[pgd_index(addr)];
441 442 443 444
	pud_t *pud = pud_offset(pgd, addr);
	pmd_t *pmd = pmd_offset(pud, addr);

	return pmd;
445 446
}

447
static inline pte_t * __init early_ioremap_pte(unsigned long addr)
448
{
449
	return &bm_pte[pte_index(addr)];
450 451
}

452 453 454 455 456
bool __init is_early_ioremap_ptep(pte_t *ptep)
{
	return ptep >= &bm_pte[0] && ptep < &bm_pte[PAGE_SIZE/sizeof(pte_t)];
}

457
void __init early_ioremap_init(void)
458
{
459
	pmd_t *pmd;
460

461 462 463 464 465 466
#ifdef CONFIG_X86_64
	BUILD_BUG_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1));
#else
	WARN_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1));
#endif

M
Mark Salter 已提交
467
	early_ioremap_setup();
468

469
	pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
470 471
	memset(bm_pte, 0, sizeof(bm_pte));
	pmd_populate_kernel(&init_mm, pmd, bm_pte);
472

473
	/*
474
	 * The boot-ioremap range spans multiple pmds, for which
475 476
	 * we are not prepared:
	 */
477 478 479 480
#define __FIXADDR_TOP (-PAGE_SIZE)
	BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
		     != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
#undef __FIXADDR_TOP
481
	if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
482
		WARN_ON(1);
483 484
		printk(KERN_WARNING "pmd %p != %p\n",
		       pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END)));
485
		printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
486
			fix_to_virt(FIX_BTMAP_BEGIN));
487
		printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_END):   %08lx\n",
488
			fix_to_virt(FIX_BTMAP_END));
489 490 491 492

		printk(KERN_WARNING "FIX_BTMAP_END:       %d\n", FIX_BTMAP_END);
		printk(KERN_WARNING "FIX_BTMAP_BEGIN:     %d\n",
		       FIX_BTMAP_BEGIN);
493
	}
494 495
}

M
Mark Salter 已提交
496 497
void __init __early_set_fixmap(enum fixed_addresses idx,
			       phys_addr_t phys, pgprot_t flags)
498
{
499 500
	unsigned long addr = __fix_to_virt(idx);
	pte_t *pte;
501 502 503 504 505

	if (idx >= __end_of_fixed_addresses) {
		BUG();
		return;
	}
506
	pte = early_ioremap_pte(addr);
507

508
	if (pgprot_val(flags))
509
		set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
510
	else
511
		pte_clear(&init_mm, addr, pte);
512 513
	__flush_tlb_one(addr);
}