init_64.c 28.7 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
/*
 *  linux/arch/x86_64/mm/init.c
 *
 *  Copyright (C) 1995  Linus Torvalds
 *  Copyright (C) 2000  Pavel Machek <pavel@suse.cz>
 *  Copyright (C) 2002,2003 Andi Kleen <ak@suse.de>
 */

#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/types.h>
#include <linux/ptrace.h>
#include <linux/mman.h>
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/smp.h>
#include <linux/init.h>
T
Thomas Gleixner 已提交
21
#include <linux/initrd.h>
L
Linus Torvalds 已提交
22 23 24
#include <linux/pagemap.h>
#include <linux/bootmem.h>
#include <linux/proc_fs.h>
25
#include <linux/pci.h>
26
#include <linux/pfn.h>
27
#include <linux/poison.h>
28
#include <linux/dma-mapping.h>
29 30
#include <linux/module.h>
#include <linux/memory_hotplug.h>
31
#include <linux/nmi.h>
L
Linus Torvalds 已提交
32 33 34 35 36 37 38 39 40 41 42 43 44 45

#include <asm/processor.h>
#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/dma.h>
#include <asm/fixmap.h>
#include <asm/e820.h>
#include <asm/apic.h>
#include <asm/tlb.h>
#include <asm/mmu_context.h>
#include <asm/proto.h>
#include <asm/smp.h>
46
#include <asm/sections.h>
47
#include <asm/kdebug.h>
48
#include <asm/numa.h>
49
#include <asm/cacheflush.h>
L
Linus Torvalds 已提交
50

51 52 53 54 55 56 57
/*
 * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
 * The direct mapping extends to max_pfn_mapped, so that we can directly access
 * apertures, ACPI and other tables without having to play with fixmaps.
 */
unsigned long max_pfn_mapped;

58 59
static unsigned long dma_reserve __initdata;

L
Linus Torvalds 已提交
60 61
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);

I
Ingo Molnar 已提交
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
int direct_gbpages __meminitdata
#ifdef CONFIG_DIRECT_GBPAGES
				= 1
#endif
;

static int __init parse_direct_gbpages_off(char *arg)
{
	direct_gbpages = 0;
	return 0;
}
early_param("nogbpages", parse_direct_gbpages_off);

static int __init parse_direct_gbpages_on(char *arg)
{
	direct_gbpages = 1;
	return 0;
}
early_param("gbpages", parse_direct_gbpages_on);

L
Linus Torvalds 已提交
82 83 84 85 86 87 88 89
/*
 * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
 * physical space so we can cache the place of the first one and move
 * around without checking the pgd every time.
 */

void show_mem(void)
{
90 91
	long i, total = 0, reserved = 0;
	long shared = 0, cached = 0;
L
Linus Torvalds 已提交
92
	struct page *page;
T
Thomas Gleixner 已提交
93
	pg_data_t *pgdat;
L
Linus Torvalds 已提交
94

95
	printk(KERN_INFO "Mem-info:\n");
L
Linus Torvalds 已提交
96
	show_free_areas();
97
	for_each_online_pgdat(pgdat) {
T
Thomas Gleixner 已提交
98 99 100 101 102 103
		for (i = 0; i < pgdat->node_spanned_pages; ++i) {
			/*
			 * This loop can take a while with 256 GB and
			 * 4k pages so defer the NMI watchdog:
			 */
			if (unlikely(i % MAX_ORDER_NR_PAGES == 0))
104
				touch_nmi_watchdog();
T
Thomas Gleixner 已提交
105

B
Bob Picco 已提交
106 107
			if (!pfn_valid(pgdat->node_start_pfn + i))
				continue;
T
Thomas Gleixner 已提交
108

L
Linus Torvalds 已提交
109 110
			page = pfn_to_page(pgdat->node_start_pfn + i);
			total++;
111 112 113 114 115 116
			if (PageReserved(page))
				reserved++;
			else if (PageSwapCache(page))
				cached++;
			else if (page_count(page))
				shared += page_count(page) - 1;
T
Thomas Gleixner 已提交
117
		}
L
Linus Torvalds 已提交
118
	}
T
Thomas Gleixner 已提交
119 120 121 122
	printk(KERN_INFO "%lu pages of RAM\n",		total);
	printk(KERN_INFO "%lu reserved pages\n",	reserved);
	printk(KERN_INFO "%lu pages shared\n",		shared);
	printk(KERN_INFO "%lu pages swap cached\n",	cached);
L
Linus Torvalds 已提交
123 124 125 126
}

int after_bootmem;

127
static __init void *spp_getpage(void)
T
Thomas Gleixner 已提交
128
{
L
Linus Torvalds 已提交
129
	void *ptr;
T
Thomas Gleixner 已提交
130

L
Linus Torvalds 已提交
131
	if (after_bootmem)
T
Thomas Gleixner 已提交
132
		ptr = (void *) get_zeroed_page(GFP_ATOMIC);
L
Linus Torvalds 已提交
133 134
	else
		ptr = alloc_bootmem_pages(PAGE_SIZE);
T
Thomas Gleixner 已提交
135 136 137 138 139

	if (!ptr || ((unsigned long)ptr & ~PAGE_MASK)) {
		panic("set_pte_phys: cannot allocate page data %s\n",
			after_bootmem ? "after bootmem" : "");
	}
L
Linus Torvalds 已提交
140

141
	pr_debug("spp_getpage %p\n", ptr);
T
Thomas Gleixner 已提交
142

L
Linus Torvalds 已提交
143
	return ptr;
T
Thomas Gleixner 已提交
144
}
L
Linus Torvalds 已提交
145

146
void
147
set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte)
L
Linus Torvalds 已提交
148 149 150
{
	pud_t *pud;
	pmd_t *pmd;
151
	pte_t *pte;
L
Linus Torvalds 已提交
152

153
	pud = pud_page + pud_index(vaddr);
L
Linus Torvalds 已提交
154
	if (pud_none(*pud)) {
T
Thomas Gleixner 已提交
155
		pmd = (pmd_t *) spp_getpage();
156
		pud_populate(&init_mm, pud, pmd);
L
Linus Torvalds 已提交
157
		if (pmd != pmd_offset(pud, 0)) {
158
			printk(KERN_ERR "PAGETABLE BUG #01! %p <-> %p\n",
T
Thomas Gleixner 已提交
159
				pmd, pmd_offset(pud, 0));
L
Linus Torvalds 已提交
160 161 162 163 164 165
			return;
		}
	}
	pmd = pmd_offset(pud, vaddr);
	if (pmd_none(*pmd)) {
		pte = (pte_t *) spp_getpage();
166
		pmd_populate_kernel(&init_mm, pmd, pte);
L
Linus Torvalds 已提交
167
		if (pte != pte_offset_kernel(pmd, 0)) {
168
			printk(KERN_ERR "PAGETABLE BUG #02!\n");
L
Linus Torvalds 已提交
169 170 171 172 173
			return;
		}
	}

	pte = pte_offset_kernel(pmd, vaddr);
I
Ingo Molnar 已提交
174
	if (!pte_none(*pte) && pte_val(new_pte) &&
L
Linus Torvalds 已提交
175 176 177 178 179 180 181 182 183 184 185
	    pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask))
		pte_ERROR(*pte);
	set_pte(pte, new_pte);

	/*
	 * It's enough to flush this one mapping.
	 * (PGE mappings get flushed as well)
	 */
	__flush_tlb_one(vaddr);
}

186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203
void
set_pte_vaddr(unsigned long vaddr, pte_t pteval)
{
	pgd_t *pgd;
	pud_t *pud_page;

	pr_debug("set_pte_vaddr %lx to %lx\n", vaddr, native_pte_val(pteval));

	pgd = pgd_offset_k(vaddr);
	if (pgd_none(*pgd)) {
		printk(KERN_ERR
			"PGD FIXMAP MISSING, it should be setup in head.S!\n");
		return;
	}
	pud_page = (pud_t*)pgd_page_vaddr(*pgd);
	set_pte_vaddr_pud(pud_page, vaddr, pteval);
}

204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243
/*
 * Create large page table mappings for a range of physical addresses.
 */
static void __init __init_extra_mapping(unsigned long phys, unsigned long size,
						pgprot_t prot)
{
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd;

	BUG_ON((phys & ~PMD_MASK) || (size & ~PMD_MASK));
	for (; size; phys += PMD_SIZE, size -= PMD_SIZE) {
		pgd = pgd_offset_k((unsigned long)__va(phys));
		if (pgd_none(*pgd)) {
			pud = (pud_t *) spp_getpage();
			set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE |
						_PAGE_USER));
		}
		pud = pud_offset(pgd, (unsigned long)__va(phys));
		if (pud_none(*pud)) {
			pmd = (pmd_t *) spp_getpage();
			set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE |
						_PAGE_USER));
		}
		pmd = pmd_offset(pud, phys);
		BUG_ON(!pmd_none(*pmd));
		set_pmd(pmd, __pmd(phys | pgprot_val(prot)));
	}
}

void __init init_extra_mapping_wb(unsigned long phys, unsigned long size)
{
	__init_extra_mapping(phys, size, PAGE_KERNEL_LARGE);
}

void __init init_extra_mapping_uc(unsigned long phys, unsigned long size)
{
	__init_extra_mapping(phys, size, PAGE_KERNEL_LARGE_NOCACHE);
}

244
/*
245 246 247
 * The head.S code sets up the kernel high mapping:
 *
 *   from __START_KERNEL_map to __START_KERNEL_map + size (== _end-_text)
248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264
 *
 * phys_addr holds the negative offset to the kernel, which is added
 * to the compile time generated pmds. This results in invalid pmds up
 * to the point where we hit the physaddr 0 mapping.
 *
 * We limit the mappings to the region from _text to _end.  _end is
 * rounded up to the 2MB boundary. This catches the invalid pmds as
 * well, as they are located before _text:
 */
void __init cleanup_highmap(void)
{
	unsigned long vaddr = __START_KERNEL_map;
	unsigned long end = round_up((unsigned long)_end, PMD_SIZE) - 1;
	pmd_t *pmd = level2_kernel_pgt;
	pmd_t *last_pmd = pmd + PTRS_PER_PMD;

	for (; pmd < last_pmd; pmd++, vaddr += PMD_SIZE) {
265
		if (pmd_none(*pmd))
266 267 268 269 270 271
			continue;
		if (vaddr < (unsigned long) _text || vaddr > end)
			set_pmd(pmd, __pmd(0));
	}
}

272 273
static unsigned long __initdata table_start;
static unsigned long __meminitdata table_end;
274
static unsigned long __meminitdata table_top;
L
Linus Torvalds 已提交
275

276
static __meminit void *alloc_low_page(unsigned long *phys)
T
Thomas Gleixner 已提交
277
{
278
	unsigned long pfn = table_end++;
L
Linus Torvalds 已提交
279 280
	void *adr;

281 282 283
	if (after_bootmem) {
		adr = (void *)get_zeroed_page(GFP_ATOMIC);
		*phys = __pa(adr);
T
Thomas Gleixner 已提交
284

285 286 287
		return adr;
	}

288
	if (pfn >= table_top)
T
Thomas Gleixner 已提交
289
		panic("alloc_low_page: ran out of memory");
290 291

	adr = early_ioremap(pfn * PAGE_SIZE, PAGE_SIZE);
292
	memset(adr, 0, PAGE_SIZE);
293 294 295
	*phys  = pfn * PAGE_SIZE;
	return adr;
}
L
Linus Torvalds 已提交
296

297
static __meminit void unmap_low_page(void *adr)
T
Thomas Gleixner 已提交
298
{
299 300 301
	if (after_bootmem)
		return;

302
	early_iounmap(adr, PAGE_SIZE);
T
Thomas Gleixner 已提交
303
}
L
Linus Torvalds 已提交
304

305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341
static void __meminit
phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end)
{
	unsigned pages = 0;
	int i;
	pte_t *pte = pte_page + pte_index(addr);

	for(i = pte_index(addr); i < PTRS_PER_PTE; i++, addr += PAGE_SIZE, pte++) {

		if (addr >= end) {
			if (!after_bootmem) {
				for(; i < PTRS_PER_PTE; i++, pte++)
					set_pte(pte, __pte(0));
			}
			break;
		}

		if (pte_val(*pte))
			continue;

		if (0)
			printk("   pte=%p addr=%lx pte=%016lx\n",
			       pte, addr, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL).pte);
		set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL));
		pages++;
	}
	update_page_count(PG_LEVEL_4K, pages);
}

static void __meminit
phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end)
{
	pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd);

	phys_pte_init(pte, address, end);
}

342
static unsigned long __meminit
343 344
phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
			 unsigned long page_size_mask)
345
{
346 347
	unsigned long pages = 0;

348
	int i = pmd_index(address);
349

350
	for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) {
351
		unsigned long pte_phys;
352
		pmd_t *pmd = pmd_page + pmd_index(address);
353
		pte_t *pte;
354

355
		if (address >= end) {
T
Thomas Gleixner 已提交
356
			if (!after_bootmem) {
357 358
				for (; i < PTRS_PER_PMD; i++, pmd++)
					set_pmd(pmd, __pmd(0));
T
Thomas Gleixner 已提交
359
			}
360 361
			break;
		}
362

363
		if (pmd_val(*pmd)) {
364 365
			if (!pmd_large(*pmd))
				phys_pte_update(pmd, address, end);
366 367 368
			continue;
		}

369
		if (page_size_mask & (1<<PG_LEVEL_2M)) {
370 371 372
			pages++;
			set_pte((pte_t *)pmd,
				pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
373
			continue;
374
		}
375

376 377 378 379 380
		pte = alloc_low_page(&pte_phys);
		phys_pte_init(pte, address, end);
		unmap_low_page(pte);

		pmd_populate_kernel(&init_mm, pmd, __va(pte_phys));
381
	}
382
	update_page_count(PG_LEVEL_2M, pages);
383
	return address;
384 385
}

386
static unsigned long __meminit
387 388
phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end,
			 unsigned long page_size_mask)
389
{
T
Thomas Gleixner 已提交
390
	pmd_t *pmd = pmd_offset(pud, 0);
391 392
	unsigned long last_map_addr;

393
	spin_lock(&init_mm.page_table_lock);
394
	last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask);
395 396
	spin_unlock(&init_mm.page_table_lock);
	__flush_tlb_all();
397
	return last_map_addr;
398 399
}

400
static unsigned long __meminit
401 402
phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
			 unsigned long page_size_mask)
T
Thomas Gleixner 已提交
403
{
404
	unsigned long pages = 0;
405
	unsigned long last_map_addr = end;
406
	int i = pud_index(addr);
407

T
Thomas Gleixner 已提交
408
	for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE) {
409 410
		unsigned long pmd_phys;
		pud_t *pud = pud_page + pud_index(addr);
L
Linus Torvalds 已提交
411 412
		pmd_t *pmd;

413
		if (addr >= end)
L
Linus Torvalds 已提交
414 415
			break;

T
Thomas Gleixner 已提交
416 417 418
		if (!after_bootmem &&
				!e820_any_mapped(addr, addr+PUD_SIZE, 0)) {
			set_pud(pud, __pud(0));
L
Linus Torvalds 已提交
419
			continue;
T
Thomas Gleixner 已提交
420
		}
L
Linus Torvalds 已提交
421

422
		if (pud_val(*pud)) {
423
			if (!pud_large(*pud))
424 425
				last_map_addr = phys_pmd_update(pud, addr, end,
							 page_size_mask);
426 427 428
			continue;
		}

429
		if (page_size_mask & (1<<PG_LEVEL_1G)) {
430
			pages++;
431 432
			set_pte((pte_t *)pud,
				pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
433
			last_map_addr = (addr & PUD_MASK) + PUD_SIZE;
434 435 436
			continue;
		}

437
		pmd = alloc_low_page(&pmd_phys);
T
Thomas Gleixner 已提交
438

439
		spin_lock(&init_mm.page_table_lock);
440
		last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask);
441 442
		unmap_low_page(pmd);
		pud_populate(&init_mm, pud, __va(pmd_phys));
443
		spin_unlock(&init_mm.page_table_lock);
T
Thomas Gleixner 已提交
444

L
Linus Torvalds 已提交
445
	}
A
Andi Kleen 已提交
446
	__flush_tlb_all();
447
	update_page_count(PG_LEVEL_1G, pages);
448

449
	return last_map_addr;
T
Thomas Gleixner 已提交
450
}
L
Linus Torvalds 已提交
451

452
static unsigned long __meminit
453 454
phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end,
		 unsigned long page_size_mask)
455 456 457 458 459
{
	pud_t *pud;

	pud = (pud_t *)pgd_page_vaddr(*pgd);

460
	return phys_pud_init(pud, addr, end, page_size_mask);
461 462
}

L
Linus Torvalds 已提交
463 464
static void __init find_early_table_space(unsigned long end)
{
465
	unsigned long puds, pmds, ptes, tables, start;
L
Linus Torvalds 已提交
466 467

	puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
468
	tables = round_up(puds * sizeof(pud_t), PAGE_SIZE);
469 470 471 472 473 474 475 476 477 478 479 480 481 482 483
	if (direct_gbpages) {
		unsigned long extra;
		extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT);
		pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT;
	} else
		pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
	tables += round_up(pmds * sizeof(pmd_t), PAGE_SIZE);

	if (cpu_has_pse) {
		unsigned long extra;
		extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT);
		ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
	} else
		ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
	tables += round_up(ptes * sizeof(pte_t), PAGE_SIZE);
L
Linus Torvalds 已提交
484

T
Thomas Gleixner 已提交
485 486 487 488 489 490
	/*
	 * RED-PEN putting page tables only on node 0 could
	 * cause a hotspot and fill up ZONE_DMA. The page tables
	 * need roughly 0.5KB per GB.
	 */
	start = 0x8000;
491
	table_start = find_e820_area(start, end, tables, PAGE_SIZE);
L
Linus Torvalds 已提交
492 493 494 495 496
	if (table_start == -1UL)
		panic("Cannot find space for the kernel page tables");

	table_start >>= PAGE_SHIFT;
	table_end = table_start;
497
	table_top = table_start + (tables >> PAGE_SHIFT);
498

499 500
	printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n",
		end, table_start << PAGE_SHIFT, table_top << PAGE_SHIFT);
L
Linus Torvalds 已提交
501 502
}

503 504 505 506 507 508 509 510
static void __init init_gbpages(void)
{
	if (direct_gbpages && cpu_has_gbpages)
		printk(KERN_INFO "Using GB pages for direct mapping\n");
	else
		direct_gbpages = 0;
}

511
#ifdef CONFIG_MEMTEST
Y
Yinghai Lu 已提交
512 513 514

static void __init memtest(unsigned long start_phys, unsigned long size,
				 unsigned pattern)
Y
Yinghai Lu 已提交
515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556
{
	unsigned long i;
	unsigned long *start;
	unsigned long start_bad;
	unsigned long last_bad;
	unsigned long val;
	unsigned long start_phys_aligned;
	unsigned long count;
	unsigned long incr;

	switch (pattern) {
	case 0:
		val = 0UL;
		break;
	case 1:
		val = -1UL;
		break;
	case 2:
		val = 0x5555555555555555UL;
		break;
	case 3:
		val = 0xaaaaaaaaaaaaaaaaUL;
		break;
	default:
		return;
	}

	incr = sizeof(unsigned long);
	start_phys_aligned = ALIGN(start_phys, incr);
	count = (size - (start_phys_aligned - start_phys))/incr;
	start = __va(start_phys_aligned);
	start_bad = 0;
	last_bad = 0;

	for (i = 0; i < count; i++)
		start[i] = val;
	for (i = 0; i < count; i++, start++, start_phys_aligned += incr) {
		if (*start != val) {
			if (start_phys_aligned == last_bad + incr) {
				last_bad += incr;
			} else {
				if (start_bad) {
Y
Yinghai Lu 已提交
557
					printk(KERN_CONT "\n  %016lx bad mem addr %016lx - %016lx reserved",
Y
Yinghai Lu 已提交
558 559 560 561 562 563 564 565
						val, start_bad, last_bad + incr);
					reserve_early(start_bad, last_bad - start_bad, "BAD RAM");
				}
				start_bad = last_bad = start_phys_aligned;
			}
		}
	}
	if (start_bad) {
Y
Yinghai Lu 已提交
566
		printk(KERN_CONT "\n  %016lx bad mem addr %016lx - %016lx reserved",
Y
Yinghai Lu 已提交
567 568 569 570 571 572
			val, start_bad, last_bad + incr);
		reserve_early(start_bad, last_bad - start_bad, "BAD RAM");
	}

}

573 574
/* default is disabled */
static int memtest_pattern __initdata;
Y
Yinghai Lu 已提交
575

Y
Yinghai Lu 已提交
576 577 578
static int __init parse_memtest(char *arg)
{
	if (arg)
Y
Yinghai Lu 已提交
579
		memtest_pattern = simple_strtoul(arg, NULL, 0);
Y
Yinghai Lu 已提交
580 581 582 583 584 585 586
	return 0;
}

early_param("memtest", parse_memtest);

static void __init early_memtest(unsigned long start, unsigned long end)
{
587
	u64 t_start, t_size;
Y
Yinghai Lu 已提交
588 589
	unsigned pattern;

Y
Yinghai Lu 已提交
590 591 592 593
	if (!memtest_pattern)
		return;

	printk(KERN_INFO "early_memtest: pattern num %d", memtest_pattern);
Y
Yinghai Lu 已提交
594 595 596 597 598 599 600 601 602 603 604 605
	for (pattern = 0; pattern < memtest_pattern; pattern++) {
		t_start = start;
		t_size = 0;
		while (t_start < end) {
			t_start = find_e820_area_size(t_start, &t_size, 1);

			/* done ? */
			if (t_start >= end)
				break;
			if (t_start + t_size > end)
				t_size = end - t_start;

606 607 608
			printk(KERN_CONT "\n  %016llx - %016llx pattern %d",
				(unsigned long long)t_start,
				(unsigned long long)t_start + t_size, pattern);
Y
Yinghai Lu 已提交
609 610 611 612 613 614

			memtest(t_start, t_size, pattern);

			t_start += t_size;
		}
	}
Y
Yinghai Lu 已提交
615
	printk(KERN_CONT "\n");
Y
Yinghai Lu 已提交
616
}
Y
Yinghai Lu 已提交
617 618 619 620 621
#else
static void __init early_memtest(unsigned long start, unsigned long end)
{
}
#endif
Y
Yinghai Lu 已提交
622

623 624 625
static unsigned long __init kernel_physical_mapping_init(unsigned long start,
						unsigned long end,
						unsigned long page_size_mask)
T
Thomas Gleixner 已提交
626
{
L
Linus Torvalds 已提交
627

628
	unsigned long next, last_map_addr = end;
L
Linus Torvalds 已提交
629 630 631 632 633

	start = (unsigned long)__va(start);
	end = (unsigned long)__va(end);

	for (; start < end; start = next) {
634
		pgd_t *pgd = pgd_offset_k(start);
T
Thomas Gleixner 已提交
635
		unsigned long pud_phys;
636 637
		pud_t *pud;

638 639 640 641 642
		next = start + PGDIR_SIZE;
		if (next > end)
			next = end;

		if (pgd_val(*pgd)) {
643 644
			last_map_addr = phys_pud_update(pgd, __pa(start),
						 __pa(end), page_size_mask);
645 646 647
			continue;
		}

648
		if (after_bootmem)
649
			pud = pud_offset(pgd, start & PGDIR_MASK);
650
		else
651
			pud = alloc_low_page(&pud_phys);
652

653 654
		last_map_addr = phys_pud_init(pud, __pa(start), __pa(next),
						 page_size_mask);
655
		unmap_low_page(pud);
656 657
		pgd_populate(&init_mm, pgd_offset_k(start),
			     __va(pud_phys));
T
Thomas Gleixner 已提交
658
	}
L
Linus Torvalds 已提交
659

660 661 662 663 664 665 666 667 668 669
	return last_map_addr;
}
/*
 * Setup the direct mapping of the physical memory at PAGE_OFFSET.
 * This runs before bootmem is initialized and gets pages directly from
 * the physical memory. To access them they are temporarily mapped.
 */
unsigned long __init_refok init_memory_mapping(unsigned long start,
					       unsigned long end)
{
670
	unsigned long last_map_addr = end;
671
	unsigned long page_size_mask = 0;
672
	unsigned long start_pfn, end_pfn;
673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692

	printk(KERN_INFO "init_memory_mapping\n");

	/*
	 * Find space for the kernel direct mapping tables.
	 *
	 * Later we should allocate these tables in the local node of the
	 * memory mapped. Unfortunately this is done currently before the
	 * nodes are discovered.
	 */
	if (!after_bootmem) {
		init_gbpages();
		find_early_table_space(end);
	}

	if (direct_gbpages)
		page_size_mask |= 1 << PG_LEVEL_1G;
	if (cpu_has_pse)
		page_size_mask |= 1 << PG_LEVEL_2M;

693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739
	/* head if not big page aligment ?*/
	start_pfn = start >> PAGE_SHIFT;
	end_pfn = ((start + (PMD_SIZE - 1)) >> PMD_SHIFT)
			<< (PMD_SHIFT - PAGE_SHIFT);
	if (start_pfn < end_pfn)
		last_map_addr = kernel_physical_mapping_init(
					start_pfn<<PAGE_SHIFT,
					end_pfn<<PAGE_SHIFT, 0);

	/* big page (2M) range*/
	start_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT)
			 << (PMD_SHIFT - PAGE_SHIFT);
	end_pfn = ((start + (PUD_SIZE - 1))>>PUD_SHIFT)
			 << (PUD_SHIFT - PAGE_SHIFT);
	if (end_pfn > ((end>>PUD_SHIFT)<<(PUD_SHIFT - PAGE_SHIFT)))
		end_pfn = ((end>>PUD_SHIFT)<<(PUD_SHIFT - PAGE_SHIFT));
	if (start_pfn < end_pfn)
		last_map_addr = kernel_physical_mapping_init(
					     start_pfn<<PAGE_SHIFT,
					     end_pfn<<PAGE_SHIFT,
					     page_size_mask & (1<<PG_LEVEL_2M));

	/* big page (1G) range */
	start_pfn = end_pfn;
	end_pfn = (end>>PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT);
	if (start_pfn < end_pfn)
		last_map_addr = kernel_physical_mapping_init(
					     start_pfn<<PAGE_SHIFT,
					     end_pfn<<PAGE_SHIFT,
					     page_size_mask & ((1<<PG_LEVEL_2M)
							 | (1<<PG_LEVEL_1G)));

	/* tail is not big page (1G) alignment */
	start_pfn = end_pfn;
	end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
	if (start_pfn < end_pfn)
		last_map_addr = kernel_physical_mapping_init(
					     start_pfn<<PAGE_SHIFT,
					     end_pfn<<PAGE_SHIFT,
					     page_size_mask & (1<<PG_LEVEL_2M));
	/* tail is not big page (2M) alignment */
	start_pfn = end_pfn;
	end_pfn = end>>PAGE_SHIFT;
	if (start_pfn < end_pfn)
		last_map_addr = kernel_physical_mapping_init(
					     start_pfn<<PAGE_SHIFT,
					     end_pfn<<PAGE_SHIFT, 0);
740

741
	if (!after_bootmem)
742
		mmu_cr4_features = read_cr4();
L
Linus Torvalds 已提交
743
	__flush_tlb_all();
744

745
	if (!after_bootmem && table_end > table_start)
746 747
		reserve_early(table_start << PAGE_SHIFT,
				 table_end << PAGE_SHIFT, "PGTABLE");
Y
Yinghai Lu 已提交
748

749 750 751
	printk(KERN_INFO "last_map_addr: %lx end: %lx\n",
			 last_map_addr, end);

Y
Yinghai Lu 已提交
752
	if (!after_bootmem)
753
		early_memtest(start, end);
754

755
	return last_map_addr >> PAGE_SHIFT;
L
Linus Torvalds 已提交
756 757
}

758
#ifndef CONFIG_NUMA
759 760 761 762 763 764 765 766 767
void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn)
{
	unsigned long bootmap_size, bootmap;

	bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
	bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size,
				 PAGE_SIZE);
	if (bootmap == -1L)
		panic("Cannot find bootmem map of size %ld\n", bootmap_size);
Y
Yinghai Lu 已提交
768 769 770
	/* don't touch min_low_pfn */
	bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT,
					 0, end_pfn);
771 772 773 774 775 776
	e820_register_active_regions(0, start_pfn, end_pfn);
	free_bootmem_with_active_regions(0, end_pfn);
	early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT);
	reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT);
}

L
Linus Torvalds 已提交
777 778
void __init paging_init(void)
{
779
	unsigned long max_zone_pfns[MAX_NR_ZONES];
T
Thomas Gleixner 已提交
780

781 782 783
	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
	max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
	max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
Y
Yinghai Lu 已提交
784
	max_zone_pfns[ZONE_NORMAL] = max_pfn;
785

Y
Yinghai Lu 已提交
786
	memory_present(0, 0, max_pfn);
787
	sparse_init();
788
	free_area_init_nodes(max_zone_pfns);
L
Linus Torvalds 已提交
789 790 791
}
#endif

792 793 794
/*
 * Memory hotplug specific functions
 */
795
#ifdef CONFIG_MEMORY_HOTPLUG
796 797 798 799
/*
 * Memory is added always to NORMAL zone. This means you will never get
 * additional DMA/DMA32 memory.
 */
800
int arch_add_memory(int nid, u64 start, u64 size)
801
{
802
	struct pglist_data *pgdat = NODE_DATA(nid);
803
	struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
804
	unsigned long last_mapped_pfn, start_pfn = start >> PAGE_SHIFT;
805 806 807
	unsigned long nr_pages = size >> PAGE_SHIFT;
	int ret;

808 809 810
	last_mapped_pfn = init_memory_mapping(start, start + size-1);
	if (last_mapped_pfn > max_pfn_mapped)
		max_pfn_mapped = last_mapped_pfn;
811

812
	ret = __add_pages(zone, start_pfn, nr_pages);
813
	WARN_ON(1);
814 815 816

	return ret;
}
817
EXPORT_SYMBOL_GPL(arch_add_memory);
818

819
#if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA)
820 821 822 823
int memory_add_physaddr_to_nid(u64 start)
{
	return 0;
}
824
EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
825 826
#endif

827 828
#endif /* CONFIG_MEMORY_HOTPLUG */

829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848
/*
 * devmem_is_allowed() checks to see if /dev/mem access to a certain address
 * is valid. The argument is a physical page number.
 *
 *
 * On x86, access has to be given to the first megabyte of ram because that area
 * contains bios code and data regions used by X and dosemu and similar apps.
 * Access has to be given to non-kernel-ram areas as well, these contain the PCI
 * mmio resources as well as potential bios/acpi data regions.
 */
int devmem_is_allowed(unsigned long pagenr)
{
	if (pagenr <= 256)
		return 1;
	if (!page_is_ram(pagenr))
		return 1;
	return 0;
}


T
Thomas Gleixner 已提交
849 850
static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel,
			 kcore_modules, kcore_vsyscall;
L
Linus Torvalds 已提交
851 852 853

void __init mem_init(void)
{
854
	long codesize, reservedpages, datasize, initsize;
L
Linus Torvalds 已提交
855

856
	pci_iommu_alloc();
L
Linus Torvalds 已提交
857

858
	/* clear_bss() already clear the empty_zero_page */
L
Linus Torvalds 已提交
859 860 861 862

	reservedpages = 0;

	/* this will put all low memory onto the freelists */
863
#ifdef CONFIG_NUMA
864
	totalram_pages = numa_free_all_bootmem();
L
Linus Torvalds 已提交
865
#else
866
	totalram_pages = free_all_bootmem();
L
Linus Torvalds 已提交
867
#endif
Y
Yinghai Lu 已提交
868 869
	reservedpages = max_pfn - totalram_pages -
					absent_pages_in_range(0, max_pfn);
L
Linus Torvalds 已提交
870 871 872 873 874 875 876
	after_bootmem = 1;

	codesize =  (unsigned long) &_etext - (unsigned long) &_text;
	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;

	/* Register memory areas for /proc/kcore */
T
Thomas Gleixner 已提交
877 878
	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
	kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
L
Linus Torvalds 已提交
879 880 881
		   VMALLOC_END-VMALLOC_START);
	kclist_add(&kcore_kernel, &_stext, _end - _stext);
	kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN);
T
Thomas Gleixner 已提交
882
	kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
L
Linus Torvalds 已提交
883 884
				 VSYSCALL_END - VSYSCALL_START);

885
	printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, "
T
Thomas Gleixner 已提交
886
				"%ldk reserved, %ldk data, %ldk init)\n",
L
Linus Torvalds 已提交
887
		(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
Y
Yinghai Lu 已提交
888
		max_pfn << (PAGE_SHIFT-10),
L
Linus Torvalds 已提交
889 890 891 892
		codesize >> 10,
		reservedpages << (PAGE_SHIFT-10),
		datasize >> 10,
		initsize >> 10);
893 894

	cpa_init();
L
Linus Torvalds 已提交
895 896
}

897
void free_init_pages(char *what, unsigned long begin, unsigned long end)
L
Linus Torvalds 已提交
898
{
899
	unsigned long addr = begin;
L
Linus Torvalds 已提交
900

901
	if (addr >= end)
902 903
		return;

I
Ingo Molnar 已提交
904 905 906 907 908 909 910 911 912 913
	/*
	 * If debugging page accesses then do not free this memory but
	 * mark them not present - any buggy init-section access will
	 * create a kernel page fault:
	 */
#ifdef CONFIG_DEBUG_PAGEALLOC
	printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n",
		begin, PAGE_ALIGN(end));
	set_memory_np(begin, (end - begin) >> PAGE_SHIFT);
#else
914
	printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
T
Thomas Gleixner 已提交
915

916
	for (; addr < end; addr += PAGE_SIZE) {
917 918 919 920 921
		ClearPageReserved(virt_to_page(addr));
		init_page_count(virt_to_page(addr));
		memset((void *)(addr & ~(PAGE_SIZE-1)),
			POISON_FREE_INITMEM, PAGE_SIZE);
		free_page(addr);
L
Linus Torvalds 已提交
922 923
		totalram_pages++;
	}
I
Ingo Molnar 已提交
924
#endif
925 926 927 928 929
}

void free_initmem(void)
{
	free_init_pages("unused kernel memory",
930 931
			(unsigned long)(&__init_begin),
			(unsigned long)(&__init_end));
L
Linus Torvalds 已提交
932 933
}

934
#ifdef CONFIG_DEBUG_RODATA
935 936
const int rodata_test_data = 0xC3;
EXPORT_SYMBOL_GPL(rodata_test_data);
937 938 939

void mark_rodata_ro(void)
{
940
	unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata);
941

942
	printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
943
	       (end - start) >> 10);
944 945 946 947 948 949 950 951
	set_memory_ro(start, (end - start) >> PAGE_SHIFT);

	/*
	 * The rodata section (but not the kernel text!) should also be
	 * not-executable.
	 */
	start = ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK;
	set_memory_nx(start, (end - start) >> PAGE_SHIFT);
952

953 954
	rodata_test();

955
#ifdef CONFIG_CPA_DEBUG
956
	printk(KERN_INFO "Testing CPA: undo %lx-%lx\n", start, end);
957
	set_memory_rw(start, (end-start) >> PAGE_SHIFT);
958

959
	printk(KERN_INFO "Testing CPA: again\n");
960
	set_memory_ro(start, (end-start) >> PAGE_SHIFT);
961
#endif
962
}
963

964 965
#endif

L
Linus Torvalds 已提交
966 967 968
#ifdef CONFIG_BLK_DEV_INITRD
void free_initrd_mem(unsigned long start, unsigned long end)
{
969
	free_init_pages("initrd memory", start, end);
L
Linus Torvalds 已提交
970 971 972
}
#endif

973 974
int __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
				   int flags)
T
Thomas Gleixner 已提交
975
{
976
#ifdef CONFIG_NUMA
977
	int nid, next_nid;
978
	int ret;
979 980
#endif
	unsigned long pfn = phys >> PAGE_SHIFT;
T
Thomas Gleixner 已提交
981

Y
Yinghai Lu 已提交
982
	if (pfn >= max_pfn) {
T
Thomas Gleixner 已提交
983 984 985 986
		/*
		 * This can happen with kdump kernels when accessing
		 * firmware tables:
		 */
987
		if (pfn < max_pfn_mapped)
988
			return -EFAULT;
T
Thomas Gleixner 已提交
989

990
		printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %lu\n",
991
				phys, len);
992
		return -EFAULT;
993 994 995 996
	}

	/* Should check here against the e820 map to avoid double free */
#ifdef CONFIG_NUMA
997 998 999
	nid = phys_to_nid(phys);
	next_nid = phys_to_nid(phys + len - 1);
	if (nid == next_nid)
1000
		ret = reserve_bootmem_node(NODE_DATA(nid), phys, len, flags);
1001
	else
1002 1003 1004 1005 1006
		ret = reserve_bootmem(phys, len, flags);

	if (ret != 0)
		return ret;

T
Thomas Gleixner 已提交
1007
#else
1008
	reserve_bootmem(phys, len, BOOTMEM_DEFAULT);
L
Linus Torvalds 已提交
1009
#endif
1010

1011
	if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
1012
		dma_reserve += len / PAGE_SIZE;
1013 1014
		set_dma_reserve(dma_reserve);
	}
1015 1016

	return 0;
L
Linus Torvalds 已提交
1017 1018
}

T
Thomas Gleixner 已提交
1019 1020
int kern_addr_valid(unsigned long addr)
{
L
Linus Torvalds 已提交
1021
	unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
T
Thomas Gleixner 已提交
1022 1023 1024 1025
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd;
	pte_t *pte;
L
Linus Torvalds 已提交
1026 1027

	if (above != 0 && above != -1UL)
T
Thomas Gleixner 已提交
1028 1029
		return 0;

L
Linus Torvalds 已提交
1030 1031 1032 1033 1034 1035
	pgd = pgd_offset_k(addr);
	if (pgd_none(*pgd))
		return 0;

	pud = pud_offset(pgd, addr);
	if (pud_none(*pud))
T
Thomas Gleixner 已提交
1036
		return 0;
L
Linus Torvalds 已提交
1037 1038 1039 1040

	pmd = pmd_offset(pud, addr);
	if (pmd_none(*pmd))
		return 0;
T
Thomas Gleixner 已提交
1041

L
Linus Torvalds 已提交
1042 1043 1044 1045 1046 1047
	if (pmd_large(*pmd))
		return pfn_valid(pmd_pfn(*pmd));

	pte = pte_offset_kernel(pmd, addr);
	if (pte_none(*pte))
		return 0;
T
Thomas Gleixner 已提交
1048

L
Linus Torvalds 已提交
1049 1050 1051
	return pfn_valid(pte_pfn(*pte));
}

T
Thomas Gleixner 已提交
1052 1053 1054 1055 1056
/*
 * A pseudo VMA to allow ptrace access for the vsyscall page.  This only
 * covers the 64bit vsyscall page now. 32bit has a real VMA now and does
 * not need special handling anymore:
 */
L
Linus Torvalds 已提交
1057
static struct vm_area_struct gate_vma = {
T
Thomas Gleixner 已提交
1058 1059 1060 1061
	.vm_start	= VSYSCALL_START,
	.vm_end		= VSYSCALL_START + (VSYSCALL_MAPPED_PAGES * PAGE_SIZE),
	.vm_page_prot	= PAGE_READONLY_EXEC,
	.vm_flags	= VM_READ | VM_EXEC
L
Linus Torvalds 已提交
1062 1063 1064 1065 1066
};

struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
{
#ifdef CONFIG_IA32_EMULATION
1067 1068
	if (test_tsk_thread_flag(tsk, TIF_IA32))
		return NULL;
L
Linus Torvalds 已提交
1069 1070 1071 1072 1073 1074 1075
#endif
	return &gate_vma;
}

int in_gate_area(struct task_struct *task, unsigned long addr)
{
	struct vm_area_struct *vma = get_gate_vma(task);
T
Thomas Gleixner 已提交
1076

1077 1078
	if (!vma)
		return 0;
T
Thomas Gleixner 已提交
1079

L
Linus Torvalds 已提交
1080 1081 1082
	return (addr >= vma->vm_start) && (addr < vma->vm_end);
}

T
Thomas Gleixner 已提交
1083 1084 1085 1086
/*
 * Use this when you have no reliable task/vma, typically from interrupt
 * context. It is less reliable than using the task's vma and may give
 * false positives:
L
Linus Torvalds 已提交
1087 1088 1089
 */
int in_gate_area_no_task(unsigned long addr)
{
1090
	return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
L
Linus Torvalds 已提交
1091
}
1092

1093 1094 1095 1096 1097 1098 1099 1100
const char *arch_vma_name(struct vm_area_struct *vma)
{
	if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
		return "[vdso]";
	if (vma == &gate_vma)
		return "[vsyscall]";
	return NULL;
}
1101 1102 1103 1104 1105

#ifdef CONFIG_SPARSEMEM_VMEMMAP
/*
 * Initialise the sparsemem vmemmap using huge-pages at the PMD level.
 */
1106 1107 1108 1109
static long __meminitdata addr_start, addr_end;
static void __meminitdata *p_start, *p_end;
static int __meminitdata node_start;

T
Thomas Gleixner 已提交
1110 1111
int __meminit
vmemmap_populate(struct page *start_page, unsigned long size, int node)
1112 1113 1114 1115 1116 1117 1118 1119 1120
{
	unsigned long addr = (unsigned long)start_page;
	unsigned long end = (unsigned long)(start_page + size);
	unsigned long next;
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd;

	for (; addr < end; addr = next) {
1121
		void *p = NULL;
1122 1123 1124 1125

		pgd = vmemmap_pgd_populate(addr, node);
		if (!pgd)
			return -ENOMEM;
T
Thomas Gleixner 已提交
1126

1127 1128 1129 1130
		pud = vmemmap_pud_populate(pgd, addr, node);
		if (!pud)
			return -ENOMEM;

1131 1132 1133 1134 1135 1136 1137 1138
		if (!cpu_has_pse) {
			next = (addr + PAGE_SIZE) & PAGE_MASK;
			pmd = vmemmap_pmd_populate(pud, addr, node);

			if (!pmd)
				return -ENOMEM;

			p = vmemmap_pte_populate(pmd, addr, node);
T
Thomas Gleixner 已提交
1139

1140 1141 1142
			if (!p)
				return -ENOMEM;

1143 1144
			addr_end = addr + PAGE_SIZE;
			p_end = p + PAGE_SIZE;
T
Thomas Gleixner 已提交
1145
		} else {
1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168
			next = pmd_addr_end(addr, end);

			pmd = pmd_offset(pud, addr);
			if (pmd_none(*pmd)) {
				pte_t entry;

				p = vmemmap_alloc_block(PMD_SIZE, node);
				if (!p)
					return -ENOMEM;

				entry = pfn_pte(__pa(p) >> PAGE_SHIFT,
						PAGE_KERNEL_LARGE);
				set_pmd(pmd, __pmd(pte_val(entry)));

				/* check to see if we have contiguous blocks */
				if (p_end != p || node_start != node) {
					if (p_start)
						printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n",
						       addr_start, addr_end-1, p_start, p_end-1, node_start);
					addr_start = addr;
					node_start = node;
					p_start = p;
				}
Y
Yinghai Lu 已提交
1169 1170 1171

				addr_end = addr + PMD_SIZE;
				p_end = p + PMD_SIZE;
1172 1173
			} else
				vmemmap_verify((pte_t *)pmd, node, addr, next);
T
Thomas Gleixner 已提交
1174
		}
1175

1176 1177 1178
	}
	return 0;
}
1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189

void __meminit vmemmap_populate_print_last(void)
{
	if (p_start) {
		printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n",
			addr_start, addr_end-1, p_start, p_end-1, node_start);
		p_start = NULL;
		p_end = NULL;
		node_start = 0;
	}
}
1190
#endif