ioremap.c 13.8 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8
/*
 * Re-map IO memory to kernel address space so that we can access it.
 * This is needed for high PCI addresses that aren't mapped in the
 * 640k-1MB IO memory area on PC's
 *
 * (C) Copyright 1995 1996 Linus Torvalds
 */

9
#include <linux/bootmem.h>
L
Linus Torvalds 已提交
10
#include <linux/init.h>
11
#include <linux/io.h>
T
Thomas Gleixner 已提交
12 13 14 15
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>

L
Linus Torvalds 已提交
16
#include <asm/cacheflush.h>
T
Thomas Gleixner 已提交
17 18
#include <asm/e820.h>
#include <asm/fixmap.h>
L
Linus Torvalds 已提交
19
#include <asm/pgtable.h>
T
Thomas Gleixner 已提交
20
#include <asm/tlbflush.h>
21
#include <asm/pgalloc.h>
22
#include <asm/pat.h>
L
Linus Torvalds 已提交
23

T
Thomas Gleixner 已提交
24 25 26 27 28 29 30 31 32 33
#ifdef CONFIG_X86_64

unsigned long __phys_addr(unsigned long x)
{
	if (x >= __START_KERNEL_map)
		return x - __START_KERNEL_map + phys_base;
	return x - PAGE_OFFSET;
}
EXPORT_SYMBOL(__phys_addr);

34 35 36 37 38 39 40 41 42 43 44 45
static inline int phys_addr_valid(unsigned long addr)
{
	return addr < (1UL << boot_cpu_data.x86_phys_bits);
}

#else

static inline int phys_addr_valid(unsigned long addr)
{
	return 1;
}

T
Thomas Gleixner 已提交
46 47
#endif

48 49
int page_is_ram(unsigned long pagenr)
{
50
	resource_size_t addr, end;
51 52
	int i;

53 54 55 56 57 58 59 60
	/*
	 * A special case is the first 4Kb of memory;
	 * This is a BIOS owned area, not kernel ram, but generally
	 * not listed as such in the E820 table.
	 */
	if (pagenr == 0)
		return 0;

A
Arjan van de Ven 已提交
61 62 63 64 65 66 67
	/*
	 * Second special case: Some BIOSen report the PC BIOS
	 * area (640->1Mb) as ram even though it is not.
	 */
	if (pagenr >= (BIOS_BEGIN >> PAGE_SHIFT) &&
		    pagenr < (BIOS_END >> PAGE_SHIFT))
		return 0;
68

69 70 71 72 73 74 75 76
	for (i = 0; i < e820.nr_map; i++) {
		/*
		 * Not usable memory:
		 */
		if (e820.map[i].type != E820_RAM)
			continue;
		addr = (e820.map[i].addr + PAGE_SIZE-1) >> PAGE_SHIFT;
		end = (e820.map[i].addr + e820.map[i].size) >> PAGE_SHIFT;
77 78


79 80 81 82 83 84
		if ((pagenr >= addr) && (pagenr < end))
			return 1;
	}
	return 0;
}

85 86 87 88
/*
 * Fix up the linear direct mapping of the kernel to avoid cache attribute
 * conflicts.
 */
89 90
int ioremap_change_attr(unsigned long vaddr, unsigned long size,
			       unsigned long prot_val)
91
{
92
	unsigned long nrpages = size >> PAGE_SHIFT;
93
	int err;
94

95 96
	switch (prot_val) {
	case _PAGE_CACHE_UC:
97
	default:
98
		err = _set_memory_uc(vaddr, nrpages);
99
		break;
100 101 102
	case _PAGE_CACHE_WC:
		err = _set_memory_wc(vaddr, nrpages);
		break;
103
	case _PAGE_CACHE_WB:
104
		err = _set_memory_wb(vaddr, nrpages);
105 106
		break;
	}
107 108 109 110

	return err;
}

L
Linus Torvalds 已提交
111 112 113 114 115 116 117 118 119
/*
 * Remap an arbitrary physical address space into the kernel virtual
 * address space. Needed when the kernel wants to access high addresses
 * directly.
 *
 * NOTE! We need to allow non-page-aligned mappings too: we will obviously
 * have to convert them into an offset in a page-aligned mapping, but the
 * caller shouldn't need to know that small detail.
 */
120
static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size,
121
			       unsigned long prot_val)
L
Linus Torvalds 已提交
122
{
123 124
	unsigned long pfn, offset, vaddr;
	resource_size_t last_addr;
125
	struct vm_struct *area;
126
	unsigned long new_prot_val;
127
	pgprot_t prot;
128
	int retval;
L
Linus Torvalds 已提交
129 130 131 132 133 134

	/* Don't allow wraparound or zero size */
	last_addr = phys_addr + size - 1;
	if (!size || last_addr < phys_addr)
		return NULL;

135
	if (!phys_addr_valid(phys_addr)) {
136
		printk(KERN_WARNING "ioremap: invalid physical address %llx\n",
137
		       (unsigned long long)phys_addr);
138 139 140 141
		WARN_ON_ONCE(1);
		return NULL;
	}

L
Linus Torvalds 已提交
142 143 144 145
	/*
	 * Don't remap the low PCI/ISA area, it's always mapped..
	 */
	if (phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS)
T
Thomas Gleixner 已提交
146
		return (__force void __iomem *)phys_to_virt(phys_addr);
L
Linus Torvalds 已提交
147 148 149 150

	/*
	 * Don't allow anybody to remap normal RAM that we're using..
	 */
151 152 153
	for (pfn = phys_addr >> PAGE_SHIFT;
				(pfn << PAGE_SHIFT) < last_addr; pfn++) {

154 155 156
		int is_ram = page_is_ram(pfn);

		if (is_ram && pfn_valid(pfn) && !PageReserved(pfn_to_page(pfn)))
T
Thomas Gleixner 已提交
157
			return NULL;
158
		WARN_ON_ONCE(is_ram);
L
Linus Torvalds 已提交
159 160
	}

161 162 163 164 165 166 167
	/*
	 * Mappings have to be page-aligned
	 */
	offset = phys_addr & ~PAGE_MASK;
	phys_addr &= PAGE_MASK;
	size = PAGE_ALIGN(last_addr+1) - phys_addr;

168 169 170
	retval = reserve_memtype(phys_addr, phys_addr + size,
						prot_val, &new_prot_val);
	if (retval) {
171
		pr_debug("Warning: reserve_memtype returned %d\n", retval);
172 173 174 175
		return NULL;
	}

	if (prot_val != new_prot_val) {
176 177 178 179
		/*
		 * Do not fallback to certain memory types with certain
		 * requested type:
		 * - request is uncached, return cannot be write-back
180 181
		 * - request is uncached, return cannot be write-combine
		 * - request is write-combine, return cannot be write-back
182 183
		 */
		if ((prot_val == _PAGE_CACHE_UC &&
184 185 186
		     (new_prot_val == _PAGE_CACHE_WB ||
		      new_prot_val == _PAGE_CACHE_WC)) ||
		    (prot_val == _PAGE_CACHE_WC &&
187
		     new_prot_val == _PAGE_CACHE_WB)) {
188
			pr_debug(
189
		"ioremap error for 0x%llx-0x%llx, requested 0x%lx, got 0x%lx\n",
190 191
				(unsigned long long)phys_addr,
				(unsigned long long)(phys_addr + size),
192
				prot_val, new_prot_val);
193 194 195 196 197 198
			free_memtype(phys_addr, phys_addr + size);
			return NULL;
		}
		prot_val = new_prot_val;
	}

199 200
	switch (prot_val) {
	case _PAGE_CACHE_UC:
201
	default:
I
Ingo Molnar 已提交
202
		prot = PAGE_KERNEL_NOCACHE;
203
		break;
204 205 206
	case _PAGE_CACHE_WC:
		prot = PAGE_KERNEL_WC;
		break;
207
	case _PAGE_CACHE_WB:
208 209 210
		prot = PAGE_KERNEL;
		break;
	}
211

L
Linus Torvalds 已提交
212 213 214
	/*
	 * Ok, go for it..
	 */
215
	area = get_vm_area(size, VM_IOREMAP);
L
Linus Torvalds 已提交
216 217 218
	if (!area)
		return NULL;
	area->phys_addr = phys_addr;
T
Thomas Gleixner 已提交
219 220
	vaddr = (unsigned long) area->addr;
	if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot)) {
221
		free_memtype(phys_addr, phys_addr + size);
222
		free_vm_area(area);
L
Linus Torvalds 已提交
223 224
		return NULL;
	}
225

226
	if (ioremap_change_attr(vaddr, size, prot_val) < 0) {
227
		free_memtype(phys_addr, phys_addr + size);
T
Thomas Gleixner 已提交
228
		vunmap(area->addr);
229 230 231
		return NULL;
	}

T
Thomas Gleixner 已提交
232
	return (void __iomem *) (vaddr + offset);
L
Linus Torvalds 已提交
233 234 235 236 237 238 239 240 241 242 243
}

/**
 * ioremap_nocache     -   map bus memory into CPU space
 * @offset:    bus address of the memory
 * @size:      size of the resource to map
 *
 * ioremap_nocache performs a platform specific sequence of operations to
 * make bus memory CPU accessible via the readb/readw/readl/writeb/
 * writew/writel functions and the other mmio helpers. The returned
 * address is not guaranteed to be usable directly as a virtual
244
 * address.
L
Linus Torvalds 已提交
245 246 247
 *
 * This version of ioremap ensures that the memory is marked uncachable
 * on the CPU as well as honouring existing caching rules from things like
248
 * the PCI bus. Note that there are other caches and buffers on many
L
Linus Torvalds 已提交
249 250 251 252
 * busses. In particular driver authors should read up on PCI writes
 *
 * It's useful if some control registers are in such an area and
 * write combining or read caching is not desirable:
253
 *
L
Linus Torvalds 已提交
254 255
 * Must be freed with iounmap.
 */
256
void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size)
L
Linus Torvalds 已提交
257
{
258
	return __ioremap(phys_addr, size, _PAGE_CACHE_UC);
L
Linus Torvalds 已提交
259
}
260
EXPORT_SYMBOL(ioremap_nocache);
L
Linus Torvalds 已提交
261

262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280
/**
 * ioremap_wc	-	map memory into CPU space write combined
 * @offset:	bus address of the memory
 * @size:	size of the resource to map
 *
 * This version of ioremap ensures that the memory is marked write combining.
 * Write combining allows faster writes to some hardware devices.
 *
 * Must be freed with iounmap.
 */
void __iomem *ioremap_wc(unsigned long phys_addr, unsigned long size)
{
	if (pat_wc_enabled)
		return __ioremap(phys_addr, size, _PAGE_CACHE_WC);
	else
		return ioremap_nocache(phys_addr, size);
}
EXPORT_SYMBOL(ioremap_wc);

281
void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size)
T
Thomas Gleixner 已提交
282
{
283
	return __ioremap(phys_addr, size, _PAGE_CACHE_WB);
T
Thomas Gleixner 已提交
284 285 286
}
EXPORT_SYMBOL(ioremap_cache);

287 288 289 290 291 292
/**
 * iounmap - Free a IO remapping
 * @addr: virtual address from ioremap_*
 *
 * Caller must ensure there is only one unmapping for the same pointer.
 */
L
Linus Torvalds 已提交
293 294
void iounmap(volatile void __iomem *addr)
{
295
	struct vm_struct *p, *o;
A
Andrew Morton 已提交
296 297

	if ((void __force *)addr <= high_memory)
L
Linus Torvalds 已提交
298 299 300 301 302 303 304 305
		return;

	/*
	 * __ioremap special-cases the PCI/ISA range by not instantiating a
	 * vm_area and by simply returning an address into the kernel mapping
	 * of ISA space.   So handle that here.
	 */
	if (addr >= phys_to_virt(ISA_START_ADDRESS) &&
306
	    addr < phys_to_virt(ISA_END_ADDRESS))
L
Linus Torvalds 已提交
307 308
		return;

309 310
	addr = (volatile void __iomem *)
		(PAGE_MASK & (unsigned long __force)addr);
311 312 313 314 315 316 317 318 319 320 321 322 323 324

	/* Use the vm area unlocked, assuming the caller
	   ensures there isn't another iounmap for the same address
	   in parallel. Reuse of the virtual address is prevented by
	   leaving it in the global lists until we're done with it.
	   cpa takes care of the direct mappings. */
	read_lock(&vmlist_lock);
	for (p = vmlist; p; p = p->next) {
		if (p->addr == addr)
			break;
	}
	read_unlock(&vmlist_lock);

	if (!p) {
325
		printk(KERN_ERR "iounmap: bad address %p\n", addr);
A
Andrew Morton 已提交
326
		dump_stack();
327
		return;
L
Linus Torvalds 已提交
328 329
	}

330 331
	free_memtype(p->phys_addr, p->phys_addr + get_vm_area_size(p));

332 333 334
	/* Finally remove it */
	o = remove_vm_area((void *)addr);
	BUG_ON(p != o || o == NULL);
335
	kfree(p);
L
Linus Torvalds 已提交
336
}
337
EXPORT_SYMBOL(iounmap);
L
Linus Torvalds 已提交
338

T
Thomas Gleixner 已提交
339
#ifdef CONFIG_X86_32
I
Ingo Molnar 已提交
340 341 342 343 344 345 346

int __initdata early_ioremap_debug;

static int __init early_ioremap_debug_setup(char *str)
{
	early_ioremap_debug = 1;

347
	return 0;
I
Ingo Molnar 已提交
348
}
349
early_param("early_ioremap_debug", early_ioremap_debug_setup);
I
Ingo Molnar 已提交
350

351
static __initdata int after_paging_init;
352 353
static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)]
		__section(.bss.page_aligned);
354

355
static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
356
{
357 358 359
	/* Don't assume we're using swapper_pg_dir at this point */
	pgd_t *base = __va(read_cr3());
	pgd_t *pgd = &base[pgd_index(addr)];
360 361 362 363
	pud_t *pud = pud_offset(pgd, addr);
	pmd_t *pmd = pmd_offset(pud, addr);

	return pmd;
364 365
}

366
static inline pte_t * __init early_ioremap_pte(unsigned long addr)
367
{
368
	return &bm_pte[pte_index(addr)];
369 370
}

371
void __init early_ioremap_init(void)
372
{
373
	pmd_t *pmd;
374

I
Ingo Molnar 已提交
375
	if (early_ioremap_debug)
I
Ingo Molnar 已提交
376
		printk(KERN_INFO "early_ioremap_init()\n");
I
Ingo Molnar 已提交
377

378
	pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
379
	memset(bm_pte, 0, sizeof(bm_pte));
380
	pmd_populate_kernel(&init_mm, pmd, bm_pte);
381

382
	/*
383
	 * The boot-ioremap range spans multiple pmds, for which
384 385
	 * we are not prepared:
	 */
386
	if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
387
		WARN_ON(1);
388 389
		printk(KERN_WARNING "pmd %p != %p\n",
		       pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END)));
390
		printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
391
			fix_to_virt(FIX_BTMAP_BEGIN));
392
		printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_END):   %08lx\n",
393
			fix_to_virt(FIX_BTMAP_END));
394 395 396 397

		printk(KERN_WARNING "FIX_BTMAP_END:       %d\n", FIX_BTMAP_END);
		printk(KERN_WARNING "FIX_BTMAP_BEGIN:     %d\n",
		       FIX_BTMAP_BEGIN);
398
	}
399 400
}

401
void __init early_ioremap_clear(void)
402
{
403
	pmd_t *pmd;
404

I
Ingo Molnar 已提交
405
	if (early_ioremap_debug)
I
Ingo Molnar 已提交
406
		printk(KERN_INFO "early_ioremap_clear()\n");
I
Ingo Molnar 已提交
407

408 409
	pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
	pmd_clear(pmd);
410
	paravirt_release_pt(__pa(bm_pte) >> PAGE_SHIFT);
411 412 413
	__flush_tlb_all();
}

414
void __init early_ioremap_reset(void)
415 416
{
	enum fixed_addresses idx;
417 418
	unsigned long addr, phys;
	pte_t *pte;
419 420

	after_paging_init = 1;
H
Huang, Ying 已提交
421
	for (idx = FIX_BTMAP_BEGIN; idx >= FIX_BTMAP_END; idx--) {
422
		addr = fix_to_virt(idx);
423
		pte = early_ioremap_pte(addr);
424 425
		if (pte_present(*pte)) {
			phys = pte_val(*pte) & PAGE_MASK;
426 427 428 429 430
			set_fixmap(idx, phys);
		}
	}
}

431
static void __init __early_set_fixmap(enum fixed_addresses idx,
432 433
				   unsigned long phys, pgprot_t flags)
{
434 435
	unsigned long addr = __fix_to_virt(idx);
	pte_t *pte;
436 437 438 439 440

	if (idx >= __end_of_fixed_addresses) {
		BUG();
		return;
	}
441
	pte = early_ioremap_pte(addr);
442
	if (pgprot_val(flags))
443
		set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
444
	else
445
		pte_clear(NULL, addr, pte);
446 447 448
	__flush_tlb_one(addr);
}

449
static inline void __init early_set_fixmap(enum fixed_addresses idx,
450 451 452 453 454
					unsigned long phys)
{
	if (after_paging_init)
		set_fixmap(idx, phys);
	else
455
		__early_set_fixmap(idx, phys, PAGE_KERNEL);
456 457
}

458
static inline void __init early_clear_fixmap(enum fixed_addresses idx)
459 460 461 462
{
	if (after_paging_init)
		clear_fixmap(idx);
	else
463
		__early_set_fixmap(idx, 0, __pgprot(0));
464 465
}

I
Ingo Molnar 已提交
466 467 468

int __initdata early_ioremap_nested;

469 470 471 472 473 474
static int __init check_early_ioremap_leak(void)
{
	if (!early_ioremap_nested)
		return 0;

	printk(KERN_WARNING
475 476
	       "Debug warning: early ioremap leak of %d areas detected.\n",
	       early_ioremap_nested);
477
	printk(KERN_WARNING
478
	       "please boot with early_ioremap_debug and report the dmesg.\n");
479 480 481 482 483 484
	WARN_ON(1);

	return 1;
}
late_initcall(check_early_ioremap_leak);

485
void __init *early_ioremap(unsigned long phys_addr, unsigned long size)
L
Linus Torvalds 已提交
486 487
{
	unsigned long offset, last_addr;
I
Ingo Molnar 已提交
488 489 490 491 492 493
	unsigned int nrpages, nesting;
	enum fixed_addresses idx0, idx;

	WARN_ON(system_state != SYSTEM_BOOTING);

	nesting = early_ioremap_nested;
I
Ingo Molnar 已提交
494
	if (early_ioremap_debug) {
I
Ingo Molnar 已提交
495
		printk(KERN_INFO "early_ioremap(%08lx, %08lx) [%d] => ",
496
		       phys_addr, size, nesting);
I
Ingo Molnar 已提交
497 498
		dump_stack();
	}
L
Linus Torvalds 已提交
499 500 501

	/* Don't allow wraparound or zero size */
	last_addr = phys_addr + size - 1;
502 503
	if (!size || last_addr < phys_addr) {
		WARN_ON(1);
L
Linus Torvalds 已提交
504
		return NULL;
505
	}
L
Linus Torvalds 已提交
506

507 508
	if (nesting >= FIX_BTMAPS_NESTING) {
		WARN_ON(1);
I
Ingo Molnar 已提交
509
		return NULL;
510
	}
I
Ingo Molnar 已提交
511
	early_ioremap_nested++;
L
Linus Torvalds 已提交
512 513 514 515 516 517 518 519 520 521 522
	/*
	 * Mappings have to be page-aligned
	 */
	offset = phys_addr & ~PAGE_MASK;
	phys_addr &= PAGE_MASK;
	size = PAGE_ALIGN(last_addr) - phys_addr;

	/*
	 * Mappings have to fit in the FIX_BTMAP area.
	 */
	nrpages = size >> PAGE_SHIFT;
523 524
	if (nrpages > NR_FIX_BTMAPS) {
		WARN_ON(1);
L
Linus Torvalds 已提交
525
		return NULL;
526
	}
L
Linus Torvalds 已提交
527 528 529 530

	/*
	 * Ok, go for it..
	 */
I
Ingo Molnar 已提交
531 532
	idx0 = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*nesting;
	idx = idx0;
L
Linus Torvalds 已提交
533
	while (nrpages > 0) {
534
		early_set_fixmap(idx, phys_addr);
L
Linus Torvalds 已提交
535 536 537 538
		phys_addr += PAGE_SIZE;
		--idx;
		--nrpages;
	}
I
Ingo Molnar 已提交
539 540
	if (early_ioremap_debug)
		printk(KERN_CONT "%08lx + %08lx\n", offset, fix_to_virt(idx0));
I
Ingo Molnar 已提交
541

542
	return (void *) (offset + fix_to_virt(idx0));
L
Linus Torvalds 已提交
543 544
}

545
void __init early_iounmap(void *addr, unsigned long size)
L
Linus Torvalds 已提交
546 547 548 549 550
{
	unsigned long virt_addr;
	unsigned long offset;
	unsigned int nrpages;
	enum fixed_addresses idx;
I
Ingo Molnar 已提交
551 552 553
	unsigned int nesting;

	nesting = --early_ioremap_nested;
554
	WARN_ON(nesting < 0);
L
Linus Torvalds 已提交
555

I
Ingo Molnar 已提交
556
	if (early_ioremap_debug) {
I
Ingo Molnar 已提交
557
		printk(KERN_INFO "early_iounmap(%p, %08lx) [%d]\n", addr,
558
		       size, nesting);
I
Ingo Molnar 已提交
559 560 561
		dump_stack();
	}

L
Linus Torvalds 已提交
562
	virt_addr = (unsigned long)addr;
563 564
	if (virt_addr < fix_to_virt(FIX_BTMAP_BEGIN)) {
		WARN_ON(1);
L
Linus Torvalds 已提交
565
		return;
566
	}
L
Linus Torvalds 已提交
567 568 569
	offset = virt_addr & ~PAGE_MASK;
	nrpages = PAGE_ALIGN(offset + size - 1) >> PAGE_SHIFT;

I
Ingo Molnar 已提交
570
	idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*nesting;
L
Linus Torvalds 已提交
571
	while (nrpages > 0) {
572
		early_clear_fixmap(idx);
L
Linus Torvalds 已提交
573 574 575 576
		--idx;
		--nrpages;
	}
}
I
Ingo Molnar 已提交
577 578 579 580 581

void __this_fixmap_does_not_exist(void)
{
	WARN_ON(1);
}
T
Thomas Gleixner 已提交
582 583

#endif /* CONFIG_X86_32 */