e820_64.c 22.8 KB
Newer Older
1
/*
L
Linus Torvalds 已提交
2 3
 * Handle the memory map.
 * The functions here do the job until bootmem takes over.
4 5 6 7 8 9
 *
 *  Getting sanitize_e820_map() in sync with i386 version by applying change:
 *  -  Provisions for empty E820 memory regions (reported by certain BIOSes).
 *     Alex Achenbach <xela@slit.de>, December 2002.
 *  Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
 *
L
Linus Torvalds 已提交
10 11 12 13 14 15 16
 */
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/bootmem.h>
#include <linux/ioport.h>
#include <linux/string.h>
17
#include <linux/kexec.h>
18
#include <linux/module.h>
19
#include <linux/mm.h>
20 21
#include <linux/suspend.h>
#include <linux/pfn.h>
22

23
#include <asm/pgtable.h>
L
Linus Torvalds 已提交
24 25 26
#include <asm/page.h>
#include <asm/e820.h>
#include <asm/proto.h>
27
#include <asm/setup.h>
28
#include <asm/sections.h>
29
#include <asm/kdebug.h>
30
#include <asm/trampoline.h>
L
Linus Torvalds 已提交
31

32
struct e820map e820;
A
Andi Kleen 已提交
33

34
/*
L
Linus Torvalds 已提交
35 36
 * PFN of last memory page.
 */
37
unsigned long end_pfn;
L
Linus Torvalds 已提交
38

39
/*
40 41
 * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
 * The direct mapping extends to max_pfn_mapped, so that we can directly access
L
Linus Torvalds 已提交
42
 * apertures, ACPI and other tables without having to play with fixmaps.
43
 */
44
unsigned long max_pfn_mapped;
L
Linus Torvalds 已提交
45

46
/*
L
Linus Torvalds 已提交
47 48
 * Last pfn which the user wants to use.
 */
49
static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT;
L
Linus Torvalds 已提交
50

51 52 53 54
/*
 * Early reserved memory areas.
 */
#define MAX_EARLY_RES 20
55

56 57
struct early_res {
	unsigned long start, end;
58
	char name[16];
59 60
};
static struct early_res early_res[MAX_EARLY_RES] __initdata = {
61
	{ 0, PAGE_SIZE, "BIOS data page" },			/* BIOS data page */
62 63
#ifdef CONFIG_X86_TRAMPOLINE
	{ TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" },
L
Linus Torvalds 已提交
64
#endif
65 66 67
	{}
};

68
void __init reserve_early(unsigned long start, unsigned long end, char *name)
69 70 71 72 73 74
{
	int i;
	struct early_res *r;
	for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
		r = &early_res[i];
		if (end > r->start && start < r->end)
75 76
			panic("Overlapping early reservations %lx-%lx %s to %lx-%lx %s\n",
			      start, end - 1, name?name:"", r->start, r->end - 1, r->name);
L
Linus Torvalds 已提交
77
	}
78 79 80 81 82
	if (i >= MAX_EARLY_RES)
		panic("Too many early reservations");
	r = &early_res[i];
	r->start = start;
	r->end = end;
83 84
	if (name)
		strncpy(r->name, name, sizeof(r->name) - 1);
85
}
86

87 88 89 90 91
void __init early_res_to_bootmem(void)
{
	int i;
	for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
		struct early_res *r = &early_res[i];
92 93
		printk(KERN_INFO "early res: %d [%lx-%lx] %s\n", i,
			r->start, r->end - 1, r->name);
94
		reserve_bootmem_generic(r->start, r->end - r->start);
95
	}
96
}
97

98
/* Check for already reserved areas */
99
static inline int __init
100
bad_addr(unsigned long *addrp, unsigned long size, unsigned long align)
101 102 103 104 105 106 107 108 109
{
	int i;
	unsigned long addr = *addrp, last;
	int changed = 0;
again:
	last = addr + size;
	for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
		struct early_res *r = &early_res[i];
		if (last >= r->start && addr < r->end) {
110
			*addrp = addr = round_up(r->end, align);
111 112 113
			changed = 1;
			goto again;
		}
114
	}
115
	return changed;
116
}
L
Linus Torvalds 已提交
117

Y
Yinghai Lu 已提交
118
/* Check for already reserved areas */
119
static inline int __init
Y
Yinghai Lu 已提交
120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151
bad_addr_size(unsigned long *addrp, unsigned long *sizep, unsigned long align)
{
	int i;
	unsigned long addr = *addrp, last;
	unsigned long size = *sizep;
	int changed = 0;
again:
	last = addr + size;
	for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
		struct early_res *r = &early_res[i];
		if (last > r->start && addr < r->start) {
			size = r->start - addr;
			changed = 1;
			goto again;
		}
		if (last > r->end && addr < r->end) {
			addr = round_up(r->end, align);
			size = last - addr;
			changed = 1;
			goto again;
		}
		if (last <= r->end && addr >= r->start) {
			(*sizep)++;
			return 0;
		}
	}
	if (changed) {
		*addrp = addr;
		*sizep = size;
	}
	return changed;
}
152 153 154 155
/*
 * This function checks if any part of the range <start,end> is mapped
 * with type.
 */
156
int
157
e820_any_mapped(unsigned long start, unsigned long end, unsigned type)
158
{
L
Linus Torvalds 已提交
159
	int i;
160 161 162 163 164

	for (i = 0; i < e820.nr_map; i++) {
		struct e820entry *ei = &e820.map[i];

		if (type && ei->type != type)
L
Linus Torvalds 已提交
165
			continue;
166
		if (ei->addr >= end || ei->addr + ei->size <= start)
167 168 169
			continue;
		return 1;
	}
L
Linus Torvalds 已提交
170 171
	return 0;
}
172
EXPORT_SYMBOL_GPL(e820_any_mapped);
L
Linus Torvalds 已提交
173

174 175 176 177 178 179
/*
 * This function checks if the entire range <start,end> is mapped with type.
 *
 * Note: this function only works correct if the e820 table is sorted and
 * not-overlapping, which is the case
 */
180 181
int __init e820_all_mapped(unsigned long start, unsigned long end,
			   unsigned type)
182 183
{
	int i;
184

185 186
	for (i = 0; i < e820.nr_map; i++) {
		struct e820entry *ei = &e820.map[i];
187

188 189 190 191 192 193 194 195 196 197 198
		if (type && ei->type != type)
			continue;
		/* is the region (part) in overlap with the current region ?*/
		if (ei->addr >= end || ei->addr + ei->size <= start)
			continue;

		/* if the region is at the beginning of <start,end> we move
		 * start to the end of the region since it's ok until there
		 */
		if (ei->addr <= start)
			start = ei->addr + ei->size;
199 200 201 202
		/*
		 * if start is now at or beyond end, we're done, full
		 * coverage
		 */
203
		if (start >= end)
204
			return 1;
205 206 207 208
	}
	return 0;
}

209
/*
210
 * Find a free area with specified alignment in a specific range.
211 212
 */
unsigned long __init find_e820_area(unsigned long start, unsigned long end,
213
				    unsigned long size, unsigned long align)
214 215 216 217 218
{
	int i;

	for (i = 0; i < e820.nr_map; i++) {
		struct e820entry *ei = &e820.map[i];
219 220
		unsigned long addr, last;
		unsigned long ei_last;
221 222 223

		if (ei->type != E820_RAM)
			continue;
224 225
		addr = round_up(ei->addr, align);
		ei_last = ei->addr + ei->size;
226
		if (addr < start)
227
			addr = round_up(start, align);
Y
Yinghai Lu 已提交
228
		if (addr >= ei_last)
229
			continue;
230
		while (bad_addr(&addr, size, align) && addr+size <= ei_last)
L
Linus Torvalds 已提交
231
			;
232
		last = addr + size;
233
		if (last > ei_last)
L
Linus Torvalds 已提交
234
			continue;
235
		if (last > end)
L
Linus Torvalds 已提交
236
			continue;
237 238 239 240
		return addr;
	}
	return -1UL;
}
L
Linus Torvalds 已提交
241

Y
Yinghai Lu 已提交
242 243 244
/*
 * Find next free range after *start
 */
Y
Yinghai Lu 已提交
245 246 247
unsigned long __init find_e820_area_size(unsigned long start,
					 unsigned long *sizep,
					 unsigned long align)
Y
Yinghai Lu 已提交
248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264
{
	int i;

	for (i = 0; i < e820.nr_map; i++) {
		struct e820entry *ei = &e820.map[i];
		unsigned long addr, last;
		unsigned long ei_last;

		if (ei->type != E820_RAM)
			continue;
		addr = round_up(ei->addr, align);
		ei_last = ei->addr + ei->size;
		if (addr < start)
			addr = round_up(start, align);
		if (addr >= ei_last)
			continue;
		*sizep = ei_last - addr;
Y
Yinghai Lu 已提交
265 266
		while (bad_addr_size(&addr, sizep, align) &&
			addr + *sizep <= ei_last)
Y
Yinghai Lu 已提交
267 268 269 270 271 272 273 274 275
			;
		last = addr + *sizep;
		if (last > ei_last)
			continue;
		return addr;
	}
	return -1UL;

}
L
Linus Torvalds 已提交
276 277 278 279 280
/*
 * Find the highest page frame number we have available
 */
unsigned long __init e820_end_of_ram(void)
{
281 282
	unsigned long end_pfn;

283
	end_pfn = find_max_pfn_with_active_regions();
284

285 286 287 288
	if (end_pfn > max_pfn_mapped)
		max_pfn_mapped = end_pfn;
	if (max_pfn_mapped > MAXMEM>>PAGE_SHIFT)
		max_pfn_mapped = MAXMEM>>PAGE_SHIFT;
L
Linus Torvalds 已提交
289 290
	if (end_pfn > end_user_pfn)
		end_pfn = end_user_pfn;
291 292
	if (end_pfn > max_pfn_mapped)
		end_pfn = max_pfn_mapped;
L
Linus Torvalds 已提交
293

294
	printk(KERN_INFO "max_pfn_mapped = %lu\n", max_pfn_mapped);
295
	return end_pfn;
L
Linus Torvalds 已提交
296 297
}

298
/*
L
Linus Torvalds 已提交
299 300
 * Mark e820 reserved areas as busy for the resource manager.
 */
301
void __init e820_reserve_resources(void)
L
Linus Torvalds 已提交
302 303
{
	int i;
304 305 306
	struct resource *res;

	res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map);
L
Linus Torvalds 已提交
307 308 309 310 311 312 313 314 315 316
	for (i = 0; i < e820.nr_map; i++) {
		switch (e820.map[i].type) {
		case E820_RAM:	res->name = "System RAM"; break;
		case E820_ACPI:	res->name = "ACPI Tables"; break;
		case E820_NVS:	res->name = "ACPI Non-volatile Storage"; break;
		default:	res->name = "reserved";
		}
		res->start = e820.map[i].addr;
		res->end = res->start + e820.map[i].size - 1;
		res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
317
		insert_resource(&iomem_resource, res);
318
		res++;
L
Linus Torvalds 已提交
319 320 321
	}
}

322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339
/*
 * Find the ranges of physical addresses that do not correspond to
 * e820 RAM areas and mark the corresponding pages as nosave for software
 * suspend and suspend to RAM.
 *
 * This function requires the e820 map to be sorted and without any
 * overlapping entries and assumes the first e820 area to be RAM.
 */
void __init e820_mark_nosave_regions(void)
{
	int i;
	unsigned long paddr;

	paddr = round_down(e820.map[0].addr + e820.map[0].size, PAGE_SIZE);
	for (i = 1; i < e820.nr_map; i++) {
		struct e820entry *ei = &e820.map[i];

		if (paddr < ei->addr)
340 341
			register_nosave_region(PFN_DOWN(paddr),
						PFN_UP(ei->addr));
342 343 344

		paddr = round_down(ei->addr + ei->size, PAGE_SIZE);
		if (ei->type != E820_RAM)
345 346
			register_nosave_region(PFN_UP(ei->addr),
						PFN_DOWN(paddr));
347 348 349 350 351 352

		if (paddr >= (end_pfn << PAGE_SHIFT))
			break;
	}
}

353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369
/*
 * Finds an active region in the address range from start_pfn to end_pfn and
 * returns its range in ei_startpfn and ei_endpfn for the e820 entry.
 */
static int __init e820_find_active_region(const struct e820entry *ei,
					  unsigned long start_pfn,
					  unsigned long end_pfn,
					  unsigned long *ei_startpfn,
					  unsigned long *ei_endpfn)
{
	*ei_startpfn = round_up(ei->addr, PAGE_SIZE) >> PAGE_SHIFT;
	*ei_endpfn = round_down(ei->addr + ei->size, PAGE_SIZE) >> PAGE_SHIFT;

	/* Skip map entries smaller than a page */
	if (*ei_startpfn >= *ei_endpfn)
		return 0;

370 371 372
	/* Check if max_pfn_mapped should be updated */
	if (ei->type != E820_RAM && *ei_endpfn > max_pfn_mapped)
		max_pfn_mapped = *ei_endpfn;
373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393

	/* Skip if map is outside the node */
	if (ei->type != E820_RAM || *ei_endpfn <= start_pfn ||
				    *ei_startpfn >= end_pfn)
		return 0;

	/* Check for overlaps */
	if (*ei_startpfn < start_pfn)
		*ei_startpfn = start_pfn;
	if (*ei_endpfn > end_pfn)
		*ei_endpfn = end_pfn;

	/* Obey end_user_pfn to save on memmap */
	if (*ei_startpfn >= end_user_pfn)
		return 0;
	if (*ei_endpfn > end_user_pfn)
		*ei_endpfn = end_user_pfn;

	return 1;
}

394 395 396 397 398
/* Walk the e820 map and register active regions within a node */
void __init
e820_register_active_regions(int nid, unsigned long start_pfn,
							unsigned long end_pfn)
{
399 400
	unsigned long ei_startpfn;
	unsigned long ei_endpfn;
401 402
	int i;

403 404 405 406 407
	for (i = 0; i < e820.nr_map; i++)
		if (e820_find_active_region(&e820.map[i],
					    start_pfn, end_pfn,
					    &ei_startpfn, &ei_endpfn))
			add_active_range(nid, ei_startpfn, ei_endpfn);
408 409
}

410
/*
L
Linus Torvalds 已提交
411
 * Add a memory region to the kernel e820 map.
412
 */
L
Linus Torvalds 已提交
413 414 415 416 417 418 419 420 421 422 423 424 425 426 427
void __init add_memory_region(unsigned long start, unsigned long size, int type)
{
	int x = e820.nr_map;

	if (x == E820MAX) {
		printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
		return;
	}

	e820.map[x].addr = start;
	e820.map[x].size = size;
	e820.map[x].type = type;
	e820.nr_map++;
}

428 429 430 431 432 433 434 435 436
/*
 * Find the hole size (in bytes) in the memory range.
 * @start: starting address of the memory range to scan
 * @end: ending address of the memory range to scan
 */
unsigned long __init e820_hole_size(unsigned long start, unsigned long end)
{
	unsigned long start_pfn = start >> PAGE_SHIFT;
	unsigned long end_pfn = end >> PAGE_SHIFT;
437
	unsigned long ei_startpfn, ei_endpfn, ram = 0;
438 439 440 441 442 443 444 445 446 447 448
	int i;

	for (i = 0; i < e820.nr_map; i++) {
		if (e820_find_active_region(&e820.map[i],
					    start_pfn, end_pfn,
					    &ei_startpfn, &ei_endpfn))
			ram += ei_endpfn - ei_startpfn;
	}
	return end - start - (ram << PAGE_SHIFT);
}

449
static void __init e820_print_map(char *who)
L
Linus Torvalds 已提交
450 451 452 453
{
	int i;

	for (i = 0; i < e820.nr_map; i++) {
454
		printk(KERN_INFO " %s: %016Lx - %016Lx ", who,
455 456 457
		       (unsigned long long) e820.map[i].addr,
		       (unsigned long long)
		       (e820.map[i].addr + e820.map[i].size));
L
Linus Torvalds 已提交
458
		switch (e820.map[i].type) {
459 460 461
		case E820_RAM:
			printk(KERN_CONT "(usable)\n");
			break;
L
Linus Torvalds 已提交
462
		case E820_RESERVED:
463 464
			printk(KERN_CONT "(reserved)\n");
			break;
L
Linus Torvalds 已提交
465
		case E820_ACPI:
466 467
			printk(KERN_CONT "(ACPI data)\n");
			break;
L
Linus Torvalds 已提交
468
		case E820_NVS:
469 470 471 472 473
			printk(KERN_CONT "(ACPI NVS)\n");
			break;
		default:
			printk(KERN_CONT "type %u\n", e820.map[i].type);
			break;
L
Linus Torvalds 已提交
474 475 476 477 478 479 480
		}
	}
}

/*
 * Sanitize the BIOS e820 map.
 *
481
 * Some e820 responses include overlapping entries. The following
L
Linus Torvalds 已提交
482 483 484
 * replaces the original e820 map with a new one, removing overlaps.
 *
 */
485
static int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map)
L
Linus Torvalds 已提交
486 487 488 489 490 491 492 493 494 495 496 497 498 499 500
{
	struct change_member {
		struct e820entry *pbios; /* pointer to original bios entry */
		unsigned long long addr; /* address for this change point */
	};
	static struct change_member change_point_list[2*E820MAX] __initdata;
	static struct change_member *change_point[2*E820MAX] __initdata;
	static struct e820entry *overlap_list[E820MAX] __initdata;
	static struct e820entry new_bios[E820MAX] __initdata;
	struct change_member *change_tmp;
	unsigned long current_type, last_type;
	unsigned long long last_addr;
	int chgidx, still_changing;
	int overlap_entries;
	int new_bios_entry;
501
	int old_nr, new_nr, chg_nr;
L
Linus Torvalds 已提交
502 503 504
	int i;

	/*
505 506
		Visually we're performing the following
		(1,2,3,4 = memory types)...
L
Linus Torvalds 已提交
507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547

		Sample memory map (w/overlaps):
		   ____22__________________
		   ______________________4_
		   ____1111________________
		   _44_____________________
		   11111111________________
		   ____________________33__
		   ___________44___________
		   __________33333_________
		   ______________22________
		   ___________________2222_
		   _________111111111______
		   _____________________11_
		   _________________4______

		Sanitized equivalent (no overlap):
		   1_______________________
		   _44_____________________
		   ___1____________________
		   ____22__________________
		   ______11________________
		   _________1______________
		   __________3_____________
		   ___________44___________
		   _____________33_________
		   _______________2________
		   ________________1_______
		   _________________4______
		   ___________________2____
		   ____________________33__
		   ______________________4_
	*/

	/* if there's only one memory region, don't bother */
	if (*pnr_map < 2)
		return -1;

	old_nr = *pnr_map;

	/* bail out if we find any unreasonable addresses in bios map */
548
	for (i = 0; i < old_nr; i++)
L
Linus Torvalds 已提交
549 550 551 552
		if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
			return -1;

	/* create pointers for initial change-point information (for sorting) */
553
	for (i = 0; i < 2 * old_nr; i++)
L
Linus Torvalds 已提交
554 555
		change_point[i] = &change_point_list[i];

556 557
	/* record all known change-points (starting and ending addresses),
	   omitting those that are for empty memory regions */
L
Linus Torvalds 已提交
558
	chgidx = 0;
559
	for (i = 0; i < old_nr; i++)	{
560 561 562
		if (biosmap[i].size != 0) {
			change_point[chgidx]->addr = biosmap[i].addr;
			change_point[chgidx++]->pbios = &biosmap[i];
563 564
			change_point[chgidx]->addr = biosmap[i].addr +
				biosmap[i].size;
565 566
			change_point[chgidx++]->pbios = &biosmap[i];
		}
L
Linus Torvalds 已提交
567
	}
568
	chg_nr = chgidx;
L
Linus Torvalds 已提交
569 570 571 572 573

	/* sort change-point list by memory addresses (low -> high) */
	still_changing = 1;
	while (still_changing)	{
		still_changing = 0;
574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592
		for (i = 1; i < chg_nr; i++)  {
			unsigned long long curaddr, lastaddr;
			unsigned long long curpbaddr, lastpbaddr;

			curaddr = change_point[i]->addr;
			lastaddr = change_point[i - 1]->addr;
			curpbaddr = change_point[i]->pbios->addr;
			lastpbaddr = change_point[i - 1]->pbios->addr;

			/*
			 * swap entries, when:
			 *
			 * curaddr > lastaddr or
			 * curaddr == lastaddr and curaddr == curpbaddr and
			 * lastaddr != lastpbaddr
			 */
			if (curaddr < lastaddr ||
			    (curaddr == lastaddr && curaddr == curpbaddr &&
			     lastaddr != lastpbaddr)) {
L
Linus Torvalds 已提交
593 594 595
				change_tmp = change_point[i];
				change_point[i] = change_point[i-1];
				change_point[i-1] = change_tmp;
596
				still_changing = 1;
L
Linus Torvalds 已提交
597 598 599 600 601
			}
		}
	}

	/* create a new bios memory map, removing overlaps */
602 603
	overlap_entries = 0;	 /* number of entries in the overlap table */
	new_bios_entry = 0;	 /* index for creating new bios map entries */
L
Linus Torvalds 已提交
604 605
	last_type = 0;		 /* start with undefined memory type */
	last_addr = 0;		 /* start with 0 as last starting address */
606

L
Linus Torvalds 已提交
607
	/* loop through change-points, determining affect on the new bios map */
608
	for (chgidx = 0; chgidx < chg_nr; chgidx++) {
L
Linus Torvalds 已提交
609
		/* keep track of all overlapping bios entries */
610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627
		if (change_point[chgidx]->addr ==
		    change_point[chgidx]->pbios->addr) {
			/*
			 * add map entry to overlap list (> 1 entry
			 * implies an overlap)
			 */
			overlap_list[overlap_entries++] =
				change_point[chgidx]->pbios;
		} else {
			/*
			 * remove entry from list (order independent,
			 * so swap with last)
			 */
			for (i = 0; i < overlap_entries; i++) {
				if (overlap_list[i] ==
				    change_point[chgidx]->pbios)
					overlap_list[i] =
						overlap_list[overlap_entries-1];
L
Linus Torvalds 已提交
628 629 630
			}
			overlap_entries--;
		}
631 632 633 634 635
		/*
		 * if there are overlapping entries, decide which
		 * "type" to use (larger value takes precedence --
		 * 1=usable, 2,3,4,4+=unusable)
		 */
L
Linus Torvalds 已提交
636
		current_type = 0;
637
		for (i = 0; i < overlap_entries; i++)
L
Linus Torvalds 已提交
638 639
			if (overlap_list[i]->type > current_type)
				current_type = overlap_list[i]->type;
640 641 642 643
		/*
		 * continue building up new bios map based on this
		 * information
		 */
L
Linus Torvalds 已提交
644 645 646 647
		if (current_type != last_type)	{
			if (last_type != 0)	 {
				new_bios[new_bios_entry].size =
					change_point[chgidx]->addr - last_addr;
648 649 650 651
				/*
				 * move forward only if the new size
				 * was non-zero
				 */
L
Linus Torvalds 已提交
652
				if (new_bios[new_bios_entry].size != 0)
653 654 655 656
					/*
					 * no more space left for new
					 * bios entries ?
					 */
L
Linus Torvalds 已提交
657
					if (++new_bios_entry >= E820MAX)
658
						break;
L
Linus Torvalds 已提交
659 660
			}
			if (current_type != 0)	{
661 662
				new_bios[new_bios_entry].addr =
					change_point[chgidx]->addr;
L
Linus Torvalds 已提交
663
				new_bios[new_bios_entry].type = current_type;
664
				last_addr = change_point[chgidx]->addr;
L
Linus Torvalds 已提交
665 666 667 668
			}
			last_type = current_type;
		}
	}
669 670
	/* retain count for new bios entries */
	new_nr = new_bios_entry;
L
Linus Torvalds 已提交
671 672

	/* copy new bios mapping into original location */
673
	memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry));
L
Linus Torvalds 已提交
674 675 676 677 678 679 680 681 682 683 684 685 686 687
	*pnr_map = new_nr;

	return 0;
}

/*
 * Copy the BIOS e820 map into a safe place.
 *
 * Sanity-check it while we're at it..
 *
 * If we're lucky and live on a modern system, the setup code
 * will have given us a memory map that we can use to properly
 * set up memory.  If we aren't, we'll fake a memory map.
 */
688
static int __init copy_e820_map(struct e820entry *biosmap, int nr_map)
L
Linus Torvalds 已提交
689 690 691 692 693 694
{
	/* Only one memory region (or negative)? Ignore it */
	if (nr_map < 2)
		return -1;

	do {
695 696 697 698
		u64 start = biosmap->addr;
		u64 size = biosmap->size;
		u64 end = start + size;
		u32 type = biosmap->type;
L
Linus Torvalds 已提交
699 700 701 702 703 704

		/* Overflow in 64 bits? Ignore the memory map. */
		if (start > end)
			return -1;

		add_memory_region(start, size, type);
705
	} while (biosmap++, --nr_map);
L
Linus Torvalds 已提交
706 707 708
	return 0;
}

709
static void early_panic(char *msg)
L
Linus Torvalds 已提交
710
{
711 712 713
	early_printk(msg);
	panic(msg);
}
L
Linus Torvalds 已提交
714

715 716
/* We're not void only for x86 32-bit compat */
char * __init machine_specific_memory_setup(void)
717
{
718
	char *who = "BIOS-e820";
L
Linus Torvalds 已提交
719 720 721 722 723 724
	/*
	 * Try to copy the BIOS-supplied E820-map.
	 *
	 * Otherwise fake a memory map; one section from 0k->640k,
	 * the next section from 1mb->appropriate_mem_k
	 */
725 726
	sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries);
	if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) < 0)
727
		early_panic("Cannot find a valid memory map");
L
Linus Torvalds 已提交
728
	printk(KERN_INFO "BIOS-provided physical RAM map:\n");
729 730 731 732
	e820_print_map(who);

	/* In case someone cares... */
	return who;
L
Linus Torvalds 已提交
733 734
}

735 736 737 738 739
static int __init parse_memopt(char *p)
{
	if (!p)
		return -EINVAL;
	end_user_pfn = memparse(p, &p);
740
	end_user_pfn >>= PAGE_SHIFT;
741
	return 0;
742
}
743
early_param("mem", parse_memopt);
L
Linus Torvalds 已提交
744

745
static int userdef __initdata;
L
Linus Torvalds 已提交
746

747
static int __init parse_memmap_opt(char *p)
748
{
749
	char *oldp;
750 751
	unsigned long long start_at, mem_size;

752 753
	if (!strcmp(p, "exactmap")) {
#ifdef CONFIG_CRASH_DUMP
754 755 756
		/*
		 * If we are doing a crash dump, we still need to know
		 * the real mem size before original memory map is
757 758
		 * reset.
		 */
759
		e820_register_active_regions(0, 0, -1UL);
760
		saved_max_pfn = e820_end_of_ram();
761
		remove_all_active_ranges();
762
#endif
763
		max_pfn_mapped = 0;
764 765 766 767 768 769 770 771 772
		e820.nr_map = 0;
		userdef = 1;
		return 0;
	}

	oldp = p;
	mem_size = memparse(p, &p);
	if (p == oldp)
		return -EINVAL;
773 774

	userdef = 1;
775
	if (*p == '@') {
776
		start_at = memparse(p+1, &p);
777 778
		add_memory_region(start_at, mem_size, E820_RAM);
	} else if (*p == '#') {
779
		start_at = memparse(p+1, &p);
780 781
		add_memory_region(start_at, mem_size, E820_ACPI);
	} else if (*p == '$') {
782
		start_at = memparse(p+1, &p);
783 784 785 786
		add_memory_region(start_at, mem_size, E820_RESERVED);
	} else {
		end_user_pfn = (mem_size >> PAGE_SHIFT);
	}
787 788 789 790
	return *p == '\0' ? 0 : -EINVAL;
}
early_param("memmap", parse_memmap_opt);

791
void __init finish_e820_parsing(void)
792 793
{
	if (userdef) {
794 795 796 797 798 799
		char nr = e820.nr_map;

		if (sanitize_e820_map(e820.map, &nr) < 0)
			early_panic("Invalid user supplied memory map");
		e820.nr_map = nr;

800 801 802
		printk(KERN_INFO "user-defined physical RAM map:\n");
		e820_print_map("user");
	}
803 804
}

805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831
void __init update_memory_range(u64 start, u64 size, unsigned old_type,
				unsigned new_type)
{
	int i;

	BUG_ON(old_type == new_type);

	for (i = 0; i < e820.nr_map; i++) {
		struct e820entry *ei = &e820.map[i];
		u64 final_start, final_end;
		if (ei->type != old_type)
			continue;
		/* totally covered? */
		if (ei->addr >= start && ei->size <= size) {
			ei->type = new_type;
			continue;
		}
		/* partially covered */
		final_start = max(start, ei->addr);
		final_end = min(start + size, ei->addr + ei->size);
		if (final_start >= final_end)
			continue;
		add_memory_region(final_start, final_end - final_start,
					 new_type);
	}
}

Y
Yinghai Lu 已提交
832 833 834 835 836 837 838 839 840 841 842 843
void __init update_e820(void)
{
	u8 nr_map;

	nr_map = e820.nr_map;
	if (sanitize_e820_map(e820.map, &nr_map))
		return;
	e820.nr_map = nr_map;
	printk(KERN_INFO "modified physical RAM map:\n");
	e820_print_map("modified");
}

844
unsigned long pci_mem_start = 0xaeedbabe;
845
EXPORT_SYMBOL(pci_mem_start);
846 847 848 849 850 851 852 853 854

/*
 * Search for the biggest gap in the low 32 bits of the e820
 * memory space.  We pass this space to PCI to assign MMIO resources
 * for hotplug or unconfigured devices in.
 * Hopefully the BIOS let enough space left.
 */
__init void e820_setup_gap(void)
{
855
	unsigned long gapstart, gapsize, round;
856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886
	unsigned long last;
	int i;
	int found = 0;

	last = 0x100000000ull;
	gapstart = 0x10000000;
	gapsize = 0x400000;
	i = e820.nr_map;
	while (--i >= 0) {
		unsigned long long start = e820.map[i].addr;
		unsigned long long end = start + e820.map[i].size;

		/*
		 * Since "last" is at most 4GB, we know we'll
		 * fit in 32 bits if this condition is true
		 */
		if (last > end) {
			unsigned long gap = last - end;

			if (gap > gapsize) {
				gapsize = gap;
				gapstart = end;
				found = 1;
			}
		}
		if (start < last)
			last = start;
	}

	if (!found) {
		gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024;
887 888 889 890
		printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit "
		       "address range\n"
		       KERN_ERR "PCI: Unassigned devices with 32bit resource "
		       "registers may break!\n");
891 892 893
	}

	/*
894 895
	 * See how much we want to round up: start off with
	 * rounding to the next 1MB area.
896
	 */
897 898 899 900 901
	round = 0x100000;
	while ((gapsize >> 4) > round)
		round += round;
	/* Fun with two's complement */
	pci_mem_start = (gapstart + round) & -round;
902

903 904 905
	printk(KERN_INFO
	       "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n",
	       pci_mem_start, gapstart, gapsize);
906
}
907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925

int __init arch_get_ram_range(int slot, u64 *addr, u64 *size)
{
	int i;

	if (slot < 0 || slot >= e820.nr_map)
		return -1;
	for (i = slot; i < e820.nr_map; i++) {
		if (e820.map[i].type != E820_RAM)
			continue;
		break;
	}
	if (i == e820.nr_map || e820.map[i].addr > (max_pfn << PAGE_SHIFT))
		return -1;
	*addr = e820.map[i].addr;
	*size = min_t(u64, e820.map[i].size + e820.map[i].addr,
		max_pfn << PAGE_SHIFT) - *addr;
	return i + 1;
}