ioremap.c 13.8 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8
/*
 * Re-map IO memory to kernel address space so that we can access it.
 * This is needed for high PCI addresses that aren't mapped in the
 * 640k-1MB IO memory area on PC's
 *
 * (C) Copyright 1995 1996 Linus Torvalds
 */

9
#include <linux/bootmem.h>
L
Linus Torvalds 已提交
10
#include <linux/init.h>
11
#include <linux/io.h>
T
Thomas Gleixner 已提交
12 13 14
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
P
Pekka Paalanen 已提交
15
#include <linux/mmiotrace.h>
T
Thomas Gleixner 已提交
16

L
Linus Torvalds 已提交
17
#include <asm/cacheflush.h>
T
Thomas Gleixner 已提交
18 19
#include <asm/e820.h>
#include <asm/fixmap.h>
L
Linus Torvalds 已提交
20
#include <asm/pgtable.h>
T
Thomas Gleixner 已提交
21
#include <asm/tlbflush.h>
22
#include <asm/pgalloc.h>
23
#include <asm/pat.h>
L
Linus Torvalds 已提交
24

25
#include "physaddr.h"
T
Thomas Gleixner 已提交
26

27 28 29 30
/*
 * Fix up the linear direct mapping of the kernel to avoid cache attribute
 * conflicts.
 */
31
int ioremap_change_attr(unsigned long vaddr, unsigned long size,
32
			enum page_cache_mode pcm)
33
{
34
	unsigned long nrpages = size >> PAGE_SHIFT;
35
	int err;
36

37 38
	switch (pcm) {
	case _PAGE_CACHE_MODE_UC:
39
	default:
40
		err = _set_memory_uc(vaddr, nrpages);
41
		break;
42
	case _PAGE_CACHE_MODE_WC:
43 44
		err = _set_memory_wc(vaddr, nrpages);
		break;
45 46 47
	case _PAGE_CACHE_MODE_WT:
		err = _set_memory_wt(vaddr, nrpages);
		break;
48
	case _PAGE_CACHE_MODE_WB:
49
		err = _set_memory_wb(vaddr, nrpages);
50 51
		break;
	}
52 53 54 55

	return err;
}

56 57 58 59 60 61 62 63 64 65 66 67 68
static int __ioremap_check_ram(unsigned long start_pfn, unsigned long nr_pages,
			       void *arg)
{
	unsigned long i;

	for (i = 0; i < nr_pages; ++i)
		if (pfn_valid(start_pfn + i) &&
		    !PageReserved(pfn_to_page(start_pfn + i)))
			return 1;

	return 0;
}

L
Linus Torvalds 已提交
69 70
/*
 * Remap an arbitrary physical address space into the kernel virtual
71 72 73 74 75 76 77
 * address space. It transparently creates kernel huge I/O mapping when
 * the physical address is aligned by a huge page size (1GB or 2MB) and
 * the requested size is at least the huge page size.
 *
 * NOTE: MTRRs can override PAT memory types with a 4KB granularity.
 * Therefore, the mapping code falls back to use a smaller page toward 4KB
 * when a mapping range is covered by non-WB type of MTRRs.
L
Linus Torvalds 已提交
78 79 80 81 82
 *
 * NOTE! We need to allow non-page-aligned mappings too: we will obviously
 * have to convert them into an offset in a page-aligned mapping, but the
 * caller shouldn't need to know that small detail.
 */
83
static void __iomem *__ioremap_caller(resource_size_t phys_addr,
84
		unsigned long size, enum page_cache_mode pcm, void *caller)
L
Linus Torvalds 已提交
85
{
86 87
	unsigned long offset, vaddr;
	resource_size_t pfn, last_pfn, last_addr;
88 89
	const resource_size_t unaligned_phys_addr = phys_addr;
	const unsigned long unaligned_size = size;
90
	struct vm_struct *area;
91
	enum page_cache_mode new_pcm;
92
	pgprot_t prot;
93
	int retval;
P
Pekka Paalanen 已提交
94
	void __iomem *ret_addr;
L
Linus Torvalds 已提交
95 96 97 98 99 100

	/* Don't allow wraparound or zero size */
	last_addr = phys_addr + size - 1;
	if (!size || last_addr < phys_addr)
		return NULL;

101
	if (!phys_addr_valid(phys_addr)) {
102
		printk(KERN_WARNING "ioremap: invalid physical address %llx\n",
103
		       (unsigned long long)phys_addr);
104 105 106 107
		WARN_ON_ONCE(1);
		return NULL;
	}

L
Linus Torvalds 已提交
108 109 110
	/*
	 * Don't remap the low PCI/ISA area, it's always mapped..
	 */
111
	if (is_ISA_range(phys_addr, last_addr))
T
Thomas Gleixner 已提交
112
		return (__force void __iomem *)phys_to_virt(phys_addr);
L
Linus Torvalds 已提交
113 114 115 116

	/*
	 * Don't allow anybody to remap normal RAM that we're using..
	 */
117 118 119
	pfn      = phys_addr >> PAGE_SHIFT;
	last_pfn = last_addr >> PAGE_SHIFT;
	if (walk_system_ram_range(pfn, last_pfn - pfn + 1, NULL,
120
					  __ioremap_check_ram) == 1) {
121 122
		WARN_ONCE(1, "ioremap on RAM at %pa - %pa\n",
			  &phys_addr, &last_addr);
123
		return NULL;
124
	}
125

126 127 128 129
	/*
	 * Mappings have to be page-aligned
	 */
	offset = phys_addr & ~PAGE_MASK;
130
	phys_addr &= PHYSICAL_PAGE_MASK;
131 132
	size = PAGE_ALIGN(last_addr+1) - phys_addr;

A
Andi Kleen 已提交
133
	retval = reserve_memtype(phys_addr, (u64)phys_addr + size,
134
						pcm, &new_pcm);
135
	if (retval) {
136
		printk(KERN_ERR "ioremap reserve_memtype failed %d\n", retval);
137 138 139
		return NULL;
	}

140 141
	if (pcm != new_pcm) {
		if (!is_new_memtype_allowed(phys_addr, size, pcm, new_pcm)) {
142
			printk(KERN_ERR
143
		"ioremap error for 0x%llx-0x%llx, requested 0x%x, got 0x%x\n",
144 145
				(unsigned long long)phys_addr,
				(unsigned long long)(phys_addr + size),
146
				pcm, new_pcm);
147
			goto err_free_memtype;
148
		}
149
		pcm = new_pcm;
150 151
	}

152 153 154
	prot = PAGE_KERNEL_IO;
	switch (pcm) {
	case _PAGE_CACHE_MODE_UC:
155
	default:
156 157
		prot = __pgprot(pgprot_val(prot) |
				cachemode2protval(_PAGE_CACHE_MODE_UC));
158
		break;
159 160 161
	case _PAGE_CACHE_MODE_UC_MINUS:
		prot = __pgprot(pgprot_val(prot) |
				cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS));
162
		break;
163 164 165
	case _PAGE_CACHE_MODE_WC:
		prot = __pgprot(pgprot_val(prot) |
				cachemode2protval(_PAGE_CACHE_MODE_WC));
166
		break;
167 168 169 170
	case _PAGE_CACHE_MODE_WT:
		prot = __pgprot(pgprot_val(prot) |
				cachemode2protval(_PAGE_CACHE_MODE_WT));
		break;
171
	case _PAGE_CACHE_MODE_WB:
172 173
		break;
	}
174

L
Linus Torvalds 已提交
175 176 177
	/*
	 * Ok, go for it..
	 */
178
	area = get_vm_area_caller(size, VM_IOREMAP, caller);
L
Linus Torvalds 已提交
179
	if (!area)
180
		goto err_free_memtype;
L
Linus Torvalds 已提交
181
	area->phys_addr = phys_addr;
T
Thomas Gleixner 已提交
182
	vaddr = (unsigned long) area->addr;
183

184
	if (kernel_map_sync_memtype(phys_addr, size, pcm))
185
		goto err_free_area;
186

187 188
	if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot))
		goto err_free_area;
189

P
Pekka Paalanen 已提交
190
	ret_addr = (void __iomem *) (vaddr + offset);
191
	mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr);
P
Pekka Paalanen 已提交
192

193 194 195 196 197 198 199
	/*
	 * Check if the request spans more than any BAR in the iomem resource
	 * tree.
	 */
	WARN_ONCE(iomem_map_sanity_check(unaligned_phys_addr, unaligned_size),
		  KERN_INFO "Info: mapping multiple BARs. Your kernel is fine.");

P
Pekka Paalanen 已提交
200
	return ret_addr;
201 202 203 204 205
err_free_area:
	free_vm_area(area);
err_free_memtype:
	free_memtype(phys_addr, phys_addr + size);
	return NULL;
L
Linus Torvalds 已提交
206 207 208 209
}

/**
 * ioremap_nocache     -   map bus memory into CPU space
210
 * @phys_addr:    bus address of the memory
L
Linus Torvalds 已提交
211 212 213 214 215 216
 * @size:      size of the resource to map
 *
 * ioremap_nocache performs a platform specific sequence of operations to
 * make bus memory CPU accessible via the readb/readw/readl/writeb/
 * writew/writel functions and the other mmio helpers. The returned
 * address is not guaranteed to be usable directly as a virtual
217
 * address.
L
Linus Torvalds 已提交
218 219 220
 *
 * This version of ioremap ensures that the memory is marked uncachable
 * on the CPU as well as honouring existing caching rules from things like
221
 * the PCI bus. Note that there are other caches and buffers on many
L
Linus Torvalds 已提交
222 223 224 225
 * busses. In particular driver authors should read up on PCI writes
 *
 * It's useful if some control registers are in such an area and
 * write combining or read caching is not desirable:
226
 *
L
Linus Torvalds 已提交
227 228
 * Must be freed with iounmap.
 */
229
void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size)
L
Linus Torvalds 已提交
230
{
231 232
	/*
	 * Ideally, this should be:
233
	 *	pat_enabled() ? _PAGE_CACHE_MODE_UC : _PAGE_CACHE_MODE_UC_MINUS;
234 235
	 *
	 * Till we fix all X drivers to use ioremap_wc(), we will use
236 237
	 * UC MINUS. Drivers that are certain they need or can already
	 * be converted over to strong UC can use ioremap_uc().
238
	 */
239
	enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC_MINUS;
240

241
	return __ioremap_caller(phys_addr, size, pcm,
242
				__builtin_return_address(0));
L
Linus Torvalds 已提交
243
}
244
EXPORT_SYMBOL(ioremap_nocache);
L
Linus Torvalds 已提交
245

246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278
/**
 * ioremap_uc     -   map bus memory into CPU space as strongly uncachable
 * @phys_addr:    bus address of the memory
 * @size:      size of the resource to map
 *
 * ioremap_uc performs a platform specific sequence of operations to
 * make bus memory CPU accessible via the readb/readw/readl/writeb/
 * writew/writel functions and the other mmio helpers. The returned
 * address is not guaranteed to be usable directly as a virtual
 * address.
 *
 * This version of ioremap ensures that the memory is marked with a strong
 * preference as completely uncachable on the CPU when possible. For non-PAT
 * systems this ends up setting page-attribute flags PCD=1, PWT=1. For PAT
 * systems this will set the PAT entry for the pages as strong UC.  This call
 * will honor existing caching rules from things like the PCI bus. Note that
 * there are other caches and buffers on many busses. In particular driver
 * authors should read up on PCI writes.
 *
 * It's useful if some control registers are in such an area and
 * write combining or read caching is not desirable:
 *
 * Must be freed with iounmap.
 */
void __iomem *ioremap_uc(resource_size_t phys_addr, unsigned long size)
{
	enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC;

	return __ioremap_caller(phys_addr, size, pcm,
				__builtin_return_address(0));
}
EXPORT_SYMBOL_GPL(ioremap_uc);

279 280
/**
 * ioremap_wc	-	map memory into CPU space write combined
281
 * @phys_addr:	bus address of the memory
282 283 284 285 286 287 288
 * @size:	size of the resource to map
 *
 * This version of ioremap ensures that the memory is marked write combining.
 * Write combining allows faster writes to some hardware devices.
 *
 * Must be freed with iounmap.
 */
289
void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size)
290
{
291
	return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WC,
292
					__builtin_return_address(0));
293 294 295
}
EXPORT_SYMBOL(ioremap_wc);

296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312
/**
 * ioremap_wt	-	map memory into CPU space write through
 * @phys_addr:	bus address of the memory
 * @size:	size of the resource to map
 *
 * This version of ioremap ensures that the memory is marked write through.
 * Write through stores data into memory while keeping the cache up-to-date.
 *
 * Must be freed with iounmap.
 */
void __iomem *ioremap_wt(resource_size_t phys_addr, unsigned long size)
{
	return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WT,
					__builtin_return_address(0));
}
EXPORT_SYMBOL(ioremap_wt);

313
void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size)
T
Thomas Gleixner 已提交
314
{
315
	return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WB,
316
				__builtin_return_address(0));
T
Thomas Gleixner 已提交
317 318 319
}
EXPORT_SYMBOL(ioremap_cache);

320 321 322
void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
				unsigned long prot_val)
{
323 324
	return __ioremap_caller(phys_addr, size,
				pgprot2cachemode(__pgprot(prot_val)),
325 326 327 328
				__builtin_return_address(0));
}
EXPORT_SYMBOL(ioremap_prot);

329 330 331 332 333 334
/**
 * iounmap - Free a IO remapping
 * @addr: virtual address from ioremap_*
 *
 * Caller must ensure there is only one unmapping for the same pointer.
 */
L
Linus Torvalds 已提交
335 336
void iounmap(volatile void __iomem *addr)
{
337
	struct vm_struct *p, *o;
A
Andrew Morton 已提交
338 339

	if ((void __force *)addr <= high_memory)
L
Linus Torvalds 已提交
340 341 342 343 344 345 346
		return;

	/*
	 * __ioremap special-cases the PCI/ISA range by not instantiating a
	 * vm_area and by simply returning an address into the kernel mapping
	 * of ISA space.   So handle that here.
	 */
347 348
	if ((void __force *)addr >= phys_to_virt(ISA_START_ADDRESS) &&
	    (void __force *)addr < phys_to_virt(ISA_END_ADDRESS))
L
Linus Torvalds 已提交
349 350
		return;

351 352
	addr = (volatile void __iomem *)
		(PAGE_MASK & (unsigned long __force)addr);
353

P
Pekka Paalanen 已提交
354 355
	mmiotrace_iounmap(addr);

356 357 358 359 360
	/* Use the vm area unlocked, assuming the caller
	   ensures there isn't another iounmap for the same address
	   in parallel. Reuse of the virtual address is prevented by
	   leaving it in the global lists until we're done with it.
	   cpa takes care of the direct mappings. */
361
	p = find_vm_area((void __force *)addr);
362 363

	if (!p) {
364
		printk(KERN_ERR "iounmap: bad address %p\n", addr);
A
Andrew Morton 已提交
365
		dump_stack();
366
		return;
L
Linus Torvalds 已提交
367 368
	}

369 370
	free_memtype(p->phys_addr, p->phys_addr + get_vm_area_size(p));

371
	/* Finally remove it */
372
	o = remove_vm_area((void __force *)addr);
373
	BUG_ON(p != o || o == NULL);
374
	kfree(p);
L
Linus Torvalds 已提交
375
}
376
EXPORT_SYMBOL(iounmap);
L
Linus Torvalds 已提交
377

378
int __init arch_ioremap_pud_supported(void)
379 380 381 382 383 384 385 386
{
#ifdef CONFIG_X86_64
	return cpu_has_gbpages;
#else
	return 0;
#endif
}

387
int __init arch_ioremap_pmd_supported(void)
388 389 390 391
{
	return cpu_has_pse;
}

392 393 394 395
/*
 * Convert a physical pointer to a virtual kernel pointer for /dev/mem
 * access
 */
396
void *xlate_dev_mem_ptr(phys_addr_t phys)
397
{
398 399
	unsigned long start  = phys &  PAGE_MASK;
	unsigned long offset = phys & ~PAGE_MASK;
400
	void *vaddr;
401 402 403 404 405

	/* If page is RAM, we can use __va. Otherwise ioremap and unmap. */
	if (page_is_ram(start >> PAGE_SHIFT))
		return __va(phys);

406
	vaddr = ioremap_cache(start, PAGE_SIZE);
407 408 409
	/* Only add the offset on success and return NULL if the ioremap() failed: */
	if (vaddr)
		vaddr += offset;
410

411
	return vaddr;
412 413
}

414
void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
415 416 417 418 419 420 421
{
	if (page_is_ram(phys >> PAGE_SHIFT))
		return;

	iounmap((void __iomem *)((unsigned long)addr & PAGE_MASK));
}

422
static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss;
423

424
static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
425
{
426 427 428
	/* Don't assume we're using swapper_pg_dir at this point */
	pgd_t *base = __va(read_cr3());
	pgd_t *pgd = &base[pgd_index(addr)];
429 430 431 432
	pud_t *pud = pud_offset(pgd, addr);
	pmd_t *pmd = pmd_offset(pud, addr);

	return pmd;
433 434
}

435
static inline pte_t * __init early_ioremap_pte(unsigned long addr)
436
{
437
	return &bm_pte[pte_index(addr)];
438 439
}

440 441 442 443 444
bool __init is_early_ioremap_ptep(pte_t *ptep)
{
	return ptep >= &bm_pte[0] && ptep < &bm_pte[PAGE_SIZE/sizeof(pte_t)];
}

445
void __init early_ioremap_init(void)
446
{
447
	pmd_t *pmd;
448

449 450 451 452 453 454
#ifdef CONFIG_X86_64
	BUILD_BUG_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1));
#else
	WARN_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1));
#endif

M
Mark Salter 已提交
455
	early_ioremap_setup();
456

457
	pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
458 459
	memset(bm_pte, 0, sizeof(bm_pte));
	pmd_populate_kernel(&init_mm, pmd, bm_pte);
460

461
	/*
462
	 * The boot-ioremap range spans multiple pmds, for which
463 464
	 * we are not prepared:
	 */
465 466 467 468
#define __FIXADDR_TOP (-PAGE_SIZE)
	BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
		     != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
#undef __FIXADDR_TOP
469
	if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
470
		WARN_ON(1);
471 472
		printk(KERN_WARNING "pmd %p != %p\n",
		       pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END)));
473
		printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
474
			fix_to_virt(FIX_BTMAP_BEGIN));
475
		printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_END):   %08lx\n",
476
			fix_to_virt(FIX_BTMAP_END));
477 478 479 480

		printk(KERN_WARNING "FIX_BTMAP_END:       %d\n", FIX_BTMAP_END);
		printk(KERN_WARNING "FIX_BTMAP_BEGIN:     %d\n",
		       FIX_BTMAP_BEGIN);
481
	}
482 483
}

M
Mark Salter 已提交
484 485
void __init __early_set_fixmap(enum fixed_addresses idx,
			       phys_addr_t phys, pgprot_t flags)
486
{
487 488
	unsigned long addr = __fix_to_virt(idx);
	pte_t *pte;
489 490 491 492 493

	if (idx >= __end_of_fixed_addresses) {
		BUG();
		return;
	}
494
	pte = early_ioremap_pte(addr);
495

496
	if (pgprot_val(flags))
497
		set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
498
	else
499
		pte_clear(&init_mm, addr, pte);
500 501
	__flush_tlb_one(addr);
}