e820_64.c 23.3 KB
Newer Older
1
/*
L
Linus Torvalds 已提交
2 3
 * Handle the memory map.
 * The functions here do the job until bootmem takes over.
4 5 6 7 8 9
 *
 *  Getting sanitize_e820_map() in sync with i386 version by applying change:
 *  -  Provisions for empty E820 memory regions (reported by certain BIOSes).
 *     Alex Achenbach <xela@slit.de>, December 2002.
 *  Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
 *
L
Linus Torvalds 已提交
10 11 12 13 14 15 16
 */
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/bootmem.h>
#include <linux/ioport.h>
#include <linux/string.h>
17
#include <linux/kexec.h>
18
#include <linux/module.h>
19
#include <linux/mm.h>
20 21
#include <linux/suspend.h>
#include <linux/pfn.h>
22

23
#include <asm/pgtable.h>
L
Linus Torvalds 已提交
24 25 26
#include <asm/page.h>
#include <asm/e820.h>
#include <asm/proto.h>
27
#include <asm/setup.h>
28
#include <asm/sections.h>
29
#include <asm/kdebug.h>
30
#include <asm/trampoline.h>
L
Linus Torvalds 已提交
31

32
struct e820map e820;
A
Andi Kleen 已提交
33

34
/*
L
Linus Torvalds 已提交
35 36
 * PFN of last memory page.
 */
37
unsigned long end_pfn;
L
Linus Torvalds 已提交
38

39
/*
40 41
 * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
 * The direct mapping extends to max_pfn_mapped, so that we can directly access
L
Linus Torvalds 已提交
42
 * apertures, ACPI and other tables without having to play with fixmaps.
43
 */
44
unsigned long max_pfn_mapped;
L
Linus Torvalds 已提交
45

46
/*
L
Linus Torvalds 已提交
47 48
 * Last pfn which the user wants to use.
 */
49
static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT;
L
Linus Torvalds 已提交
50

51 52 53 54
/*
 * Early reserved memory areas.
 */
#define MAX_EARLY_RES 20
55

56 57
struct early_res {
	unsigned long start, end;
58
	char name[16];
59 60
};
static struct early_res early_res[MAX_EARLY_RES] __initdata = {
61
	{ 0, PAGE_SIZE, "BIOS data page" },			/* BIOS data page */
62 63
#ifdef CONFIG_X86_TRAMPOLINE
	{ TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" },
L
Linus Torvalds 已提交
64
#endif
65 66 67
	{}
};

68
void __init reserve_early(unsigned long start, unsigned long end, char *name)
69 70 71 72 73 74
{
	int i;
	struct early_res *r;
	for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
		r = &early_res[i];
		if (end > r->start && start < r->end)
75 76
			panic("Overlapping early reservations %lx-%lx %s to %lx-%lx %s\n",
			      start, end - 1, name?name:"", r->start, r->end - 1, r->name);
L
Linus Torvalds 已提交
77
	}
78 79 80 81 82
	if (i >= MAX_EARLY_RES)
		panic("Too many early reservations");
	r = &early_res[i];
	r->start = start;
	r->end = end;
83 84
	if (name)
		strncpy(r->name, name, sizeof(r->name) - 1);
85
}
86

87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
void __init free_early(unsigned long start, unsigned long end)
{
	struct early_res *r;
	int i, j;

	for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
		r = &early_res[i];
		if (start == r->start && end == r->end)
			break;
	}
	if (i >= MAX_EARLY_RES || !early_res[i].end)
		panic("free_early on not reserved area: %lx-%lx!", start, end);

	for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++)
		;

	memcpy(&early_res[i], &early_res[i + 1],
	       (j - 1 - i) * sizeof(struct early_res));

	early_res[j - 1].end = 0;
}

109 110 111 112 113
void __init early_res_to_bootmem(void)
{
	int i;
	for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
		struct early_res *r = &early_res[i];
114 115
		printk(KERN_INFO "early res: %d [%lx-%lx] %s\n", i,
			r->start, r->end - 1, r->name);
116
		reserve_bootmem_generic(r->start, r->end - r->start);
117
	}
118
}
119

120
/* Check for already reserved areas */
121
static inline int __init
122
bad_addr(unsigned long *addrp, unsigned long size, unsigned long align)
123 124 125 126 127 128 129 130 131
{
	int i;
	unsigned long addr = *addrp, last;
	int changed = 0;
again:
	last = addr + size;
	for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
		struct early_res *r = &early_res[i];
		if (last >= r->start && addr < r->end) {
132
			*addrp = addr = round_up(r->end, align);
133 134 135
			changed = 1;
			goto again;
		}
136
	}
137
	return changed;
138
}
L
Linus Torvalds 已提交
139

Y
Yinghai Lu 已提交
140
/* Check for already reserved areas */
141
static inline int __init
Y
Yinghai Lu 已提交
142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173
bad_addr_size(unsigned long *addrp, unsigned long *sizep, unsigned long align)
{
	int i;
	unsigned long addr = *addrp, last;
	unsigned long size = *sizep;
	int changed = 0;
again:
	last = addr + size;
	for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
		struct early_res *r = &early_res[i];
		if (last > r->start && addr < r->start) {
			size = r->start - addr;
			changed = 1;
			goto again;
		}
		if (last > r->end && addr < r->end) {
			addr = round_up(r->end, align);
			size = last - addr;
			changed = 1;
			goto again;
		}
		if (last <= r->end && addr >= r->start) {
			(*sizep)++;
			return 0;
		}
	}
	if (changed) {
		*addrp = addr;
		*sizep = size;
	}
	return changed;
}
174 175 176 177
/*
 * This function checks if any part of the range <start,end> is mapped
 * with type.
 */
178
int
179
e820_any_mapped(unsigned long start, unsigned long end, unsigned type)
180
{
L
Linus Torvalds 已提交
181
	int i;
182 183 184 185 186

	for (i = 0; i < e820.nr_map; i++) {
		struct e820entry *ei = &e820.map[i];

		if (type && ei->type != type)
L
Linus Torvalds 已提交
187
			continue;
188
		if (ei->addr >= end || ei->addr + ei->size <= start)
189 190 191
			continue;
		return 1;
	}
L
Linus Torvalds 已提交
192 193
	return 0;
}
194
EXPORT_SYMBOL_GPL(e820_any_mapped);
L
Linus Torvalds 已提交
195

196 197 198 199 200 201
/*
 * This function checks if the entire range <start,end> is mapped with type.
 *
 * Note: this function only works correct if the e820 table is sorted and
 * not-overlapping, which is the case
 */
202 203
int __init e820_all_mapped(unsigned long start, unsigned long end,
			   unsigned type)
204 205
{
	int i;
206

207 208
	for (i = 0; i < e820.nr_map; i++) {
		struct e820entry *ei = &e820.map[i];
209

210 211 212 213 214 215 216 217 218 219 220
		if (type && ei->type != type)
			continue;
		/* is the region (part) in overlap with the current region ?*/
		if (ei->addr >= end || ei->addr + ei->size <= start)
			continue;

		/* if the region is at the beginning of <start,end> we move
		 * start to the end of the region since it's ok until there
		 */
		if (ei->addr <= start)
			start = ei->addr + ei->size;
221 222 223 224
		/*
		 * if start is now at or beyond end, we're done, full
		 * coverage
		 */
225
		if (start >= end)
226
			return 1;
227 228 229 230
	}
	return 0;
}

231
/*
232
 * Find a free area with specified alignment in a specific range.
233 234
 */
unsigned long __init find_e820_area(unsigned long start, unsigned long end,
235
				    unsigned long size, unsigned long align)
236 237 238 239 240
{
	int i;

	for (i = 0; i < e820.nr_map; i++) {
		struct e820entry *ei = &e820.map[i];
241 242
		unsigned long addr, last;
		unsigned long ei_last;
243 244 245

		if (ei->type != E820_RAM)
			continue;
246 247
		addr = round_up(ei->addr, align);
		ei_last = ei->addr + ei->size;
248
		if (addr < start)
249
			addr = round_up(start, align);
Y
Yinghai Lu 已提交
250
		if (addr >= ei_last)
251
			continue;
252
		while (bad_addr(&addr, size, align) && addr+size <= ei_last)
L
Linus Torvalds 已提交
253
			;
254
		last = addr + size;
255
		if (last > ei_last)
L
Linus Torvalds 已提交
256
			continue;
257
		if (last > end)
L
Linus Torvalds 已提交
258
			continue;
259 260 261 262
		return addr;
	}
	return -1UL;
}
L
Linus Torvalds 已提交
263

Y
Yinghai Lu 已提交
264 265 266
/*
 * Find next free range after *start
 */
Y
Yinghai Lu 已提交
267 268 269
unsigned long __init find_e820_area_size(unsigned long start,
					 unsigned long *sizep,
					 unsigned long align)
Y
Yinghai Lu 已提交
270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286
{
	int i;

	for (i = 0; i < e820.nr_map; i++) {
		struct e820entry *ei = &e820.map[i];
		unsigned long addr, last;
		unsigned long ei_last;

		if (ei->type != E820_RAM)
			continue;
		addr = round_up(ei->addr, align);
		ei_last = ei->addr + ei->size;
		if (addr < start)
			addr = round_up(start, align);
		if (addr >= ei_last)
			continue;
		*sizep = ei_last - addr;
Y
Yinghai Lu 已提交
287 288
		while (bad_addr_size(&addr, sizep, align) &&
			addr + *sizep <= ei_last)
Y
Yinghai Lu 已提交
289 290 291 292 293 294 295 296 297
			;
		last = addr + *sizep;
		if (last > ei_last)
			continue;
		return addr;
	}
	return -1UL;

}
L
Linus Torvalds 已提交
298 299 300 301 302
/*
 * Find the highest page frame number we have available
 */
unsigned long __init e820_end_of_ram(void)
{
303 304
	unsigned long end_pfn;

305
	end_pfn = find_max_pfn_with_active_regions();
306

307 308 309 310
	if (end_pfn > max_pfn_mapped)
		max_pfn_mapped = end_pfn;
	if (max_pfn_mapped > MAXMEM>>PAGE_SHIFT)
		max_pfn_mapped = MAXMEM>>PAGE_SHIFT;
L
Linus Torvalds 已提交
311 312
	if (end_pfn > end_user_pfn)
		end_pfn = end_user_pfn;
313 314
	if (end_pfn > max_pfn_mapped)
		end_pfn = max_pfn_mapped;
L
Linus Torvalds 已提交
315

316
	printk(KERN_INFO "max_pfn_mapped = %lu\n", max_pfn_mapped);
317
	return end_pfn;
L
Linus Torvalds 已提交
318 319
}

320
/*
L
Linus Torvalds 已提交
321 322
 * Mark e820 reserved areas as busy for the resource manager.
 */
323
void __init e820_reserve_resources(void)
L
Linus Torvalds 已提交
324 325
{
	int i;
326 327 328
	struct resource *res;

	res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map);
L
Linus Torvalds 已提交
329 330 331 332 333 334 335 336 337 338
	for (i = 0; i < e820.nr_map; i++) {
		switch (e820.map[i].type) {
		case E820_RAM:	res->name = "System RAM"; break;
		case E820_ACPI:	res->name = "ACPI Tables"; break;
		case E820_NVS:	res->name = "ACPI Non-volatile Storage"; break;
		default:	res->name = "reserved";
		}
		res->start = e820.map[i].addr;
		res->end = res->start + e820.map[i].size - 1;
		res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
339
		insert_resource(&iomem_resource, res);
340
		res++;
L
Linus Torvalds 已提交
341 342 343
	}
}

344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361
/*
 * Find the ranges of physical addresses that do not correspond to
 * e820 RAM areas and mark the corresponding pages as nosave for software
 * suspend and suspend to RAM.
 *
 * This function requires the e820 map to be sorted and without any
 * overlapping entries and assumes the first e820 area to be RAM.
 */
void __init e820_mark_nosave_regions(void)
{
	int i;
	unsigned long paddr;

	paddr = round_down(e820.map[0].addr + e820.map[0].size, PAGE_SIZE);
	for (i = 1; i < e820.nr_map; i++) {
		struct e820entry *ei = &e820.map[i];

		if (paddr < ei->addr)
362 363
			register_nosave_region(PFN_DOWN(paddr),
						PFN_UP(ei->addr));
364 365 366

		paddr = round_down(ei->addr + ei->size, PAGE_SIZE);
		if (ei->type != E820_RAM)
367 368
			register_nosave_region(PFN_UP(ei->addr),
						PFN_DOWN(paddr));
369 370 371 372 373 374

		if (paddr >= (end_pfn << PAGE_SHIFT))
			break;
	}
}

375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391
/*
 * Finds an active region in the address range from start_pfn to end_pfn and
 * returns its range in ei_startpfn and ei_endpfn for the e820 entry.
 */
static int __init e820_find_active_region(const struct e820entry *ei,
					  unsigned long start_pfn,
					  unsigned long end_pfn,
					  unsigned long *ei_startpfn,
					  unsigned long *ei_endpfn)
{
	*ei_startpfn = round_up(ei->addr, PAGE_SIZE) >> PAGE_SHIFT;
	*ei_endpfn = round_down(ei->addr + ei->size, PAGE_SIZE) >> PAGE_SHIFT;

	/* Skip map entries smaller than a page */
	if (*ei_startpfn >= *ei_endpfn)
		return 0;

392 393 394
	/* Check if max_pfn_mapped should be updated */
	if (ei->type != E820_RAM && *ei_endpfn > max_pfn_mapped)
		max_pfn_mapped = *ei_endpfn;
395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415

	/* Skip if map is outside the node */
	if (ei->type != E820_RAM || *ei_endpfn <= start_pfn ||
				    *ei_startpfn >= end_pfn)
		return 0;

	/* Check for overlaps */
	if (*ei_startpfn < start_pfn)
		*ei_startpfn = start_pfn;
	if (*ei_endpfn > end_pfn)
		*ei_endpfn = end_pfn;

	/* Obey end_user_pfn to save on memmap */
	if (*ei_startpfn >= end_user_pfn)
		return 0;
	if (*ei_endpfn > end_user_pfn)
		*ei_endpfn = end_user_pfn;

	return 1;
}

416 417 418 419 420
/* Walk the e820 map and register active regions within a node */
void __init
e820_register_active_regions(int nid, unsigned long start_pfn,
							unsigned long end_pfn)
{
421 422
	unsigned long ei_startpfn;
	unsigned long ei_endpfn;
423 424
	int i;

425 426 427 428 429
	for (i = 0; i < e820.nr_map; i++)
		if (e820_find_active_region(&e820.map[i],
					    start_pfn, end_pfn,
					    &ei_startpfn, &ei_endpfn))
			add_active_range(nid, ei_startpfn, ei_endpfn);
430 431
}

432
/*
L
Linus Torvalds 已提交
433
 * Add a memory region to the kernel e820 map.
434
 */
L
Linus Torvalds 已提交
435 436 437 438 439 440 441 442 443 444 445 446 447 448 449
void __init add_memory_region(unsigned long start, unsigned long size, int type)
{
	int x = e820.nr_map;

	if (x == E820MAX) {
		printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
		return;
	}

	e820.map[x].addr = start;
	e820.map[x].size = size;
	e820.map[x].type = type;
	e820.nr_map++;
}

450 451 452 453 454 455 456 457 458
/*
 * Find the hole size (in bytes) in the memory range.
 * @start: starting address of the memory range to scan
 * @end: ending address of the memory range to scan
 */
unsigned long __init e820_hole_size(unsigned long start, unsigned long end)
{
	unsigned long start_pfn = start >> PAGE_SHIFT;
	unsigned long end_pfn = end >> PAGE_SHIFT;
459
	unsigned long ei_startpfn, ei_endpfn, ram = 0;
460 461 462 463 464 465 466 467 468 469 470
	int i;

	for (i = 0; i < e820.nr_map; i++) {
		if (e820_find_active_region(&e820.map[i],
					    start_pfn, end_pfn,
					    &ei_startpfn, &ei_endpfn))
			ram += ei_endpfn - ei_startpfn;
	}
	return end - start - (ram << PAGE_SHIFT);
}

471
static void __init e820_print_map(char *who)
L
Linus Torvalds 已提交
472 473 474 475
{
	int i;

	for (i = 0; i < e820.nr_map; i++) {
476
		printk(KERN_INFO " %s: %016Lx - %016Lx ", who,
477 478 479
		       (unsigned long long) e820.map[i].addr,
		       (unsigned long long)
		       (e820.map[i].addr + e820.map[i].size));
L
Linus Torvalds 已提交
480
		switch (e820.map[i].type) {
481 482 483
		case E820_RAM:
			printk(KERN_CONT "(usable)\n");
			break;
L
Linus Torvalds 已提交
484
		case E820_RESERVED:
485 486
			printk(KERN_CONT "(reserved)\n");
			break;
L
Linus Torvalds 已提交
487
		case E820_ACPI:
488 489
			printk(KERN_CONT "(ACPI data)\n");
			break;
L
Linus Torvalds 已提交
490
		case E820_NVS:
491 492 493 494 495
			printk(KERN_CONT "(ACPI NVS)\n");
			break;
		default:
			printk(KERN_CONT "type %u\n", e820.map[i].type);
			break;
L
Linus Torvalds 已提交
496 497 498 499 500 501 502
		}
	}
}

/*
 * Sanitize the BIOS e820 map.
 *
503
 * Some e820 responses include overlapping entries. The following
L
Linus Torvalds 已提交
504 505 506
 * replaces the original e820 map with a new one, removing overlaps.
 *
 */
507
static int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map)
L
Linus Torvalds 已提交
508 509 510 511 512 513 514 515 516 517 518 519 520 521 522
{
	struct change_member {
		struct e820entry *pbios; /* pointer to original bios entry */
		unsigned long long addr; /* address for this change point */
	};
	static struct change_member change_point_list[2*E820MAX] __initdata;
	static struct change_member *change_point[2*E820MAX] __initdata;
	static struct e820entry *overlap_list[E820MAX] __initdata;
	static struct e820entry new_bios[E820MAX] __initdata;
	struct change_member *change_tmp;
	unsigned long current_type, last_type;
	unsigned long long last_addr;
	int chgidx, still_changing;
	int overlap_entries;
	int new_bios_entry;
523
	int old_nr, new_nr, chg_nr;
L
Linus Torvalds 已提交
524 525 526
	int i;

	/*
527 528
		Visually we're performing the following
		(1,2,3,4 = memory types)...
L
Linus Torvalds 已提交
529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569

		Sample memory map (w/overlaps):
		   ____22__________________
		   ______________________4_
		   ____1111________________
		   _44_____________________
		   11111111________________
		   ____________________33__
		   ___________44___________
		   __________33333_________
		   ______________22________
		   ___________________2222_
		   _________111111111______
		   _____________________11_
		   _________________4______

		Sanitized equivalent (no overlap):
		   1_______________________
		   _44_____________________
		   ___1____________________
		   ____22__________________
		   ______11________________
		   _________1______________
		   __________3_____________
		   ___________44___________
		   _____________33_________
		   _______________2________
		   ________________1_______
		   _________________4______
		   ___________________2____
		   ____________________33__
		   ______________________4_
	*/

	/* if there's only one memory region, don't bother */
	if (*pnr_map < 2)
		return -1;

	old_nr = *pnr_map;

	/* bail out if we find any unreasonable addresses in bios map */
570
	for (i = 0; i < old_nr; i++)
L
Linus Torvalds 已提交
571 572 573 574
		if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
			return -1;

	/* create pointers for initial change-point information (for sorting) */
575
	for (i = 0; i < 2 * old_nr; i++)
L
Linus Torvalds 已提交
576 577
		change_point[i] = &change_point_list[i];

578 579
	/* record all known change-points (starting and ending addresses),
	   omitting those that are for empty memory regions */
L
Linus Torvalds 已提交
580
	chgidx = 0;
581
	for (i = 0; i < old_nr; i++)	{
582 583 584
		if (biosmap[i].size != 0) {
			change_point[chgidx]->addr = biosmap[i].addr;
			change_point[chgidx++]->pbios = &biosmap[i];
585 586
			change_point[chgidx]->addr = biosmap[i].addr +
				biosmap[i].size;
587 588
			change_point[chgidx++]->pbios = &biosmap[i];
		}
L
Linus Torvalds 已提交
589
	}
590
	chg_nr = chgidx;
L
Linus Torvalds 已提交
591 592 593 594 595

	/* sort change-point list by memory addresses (low -> high) */
	still_changing = 1;
	while (still_changing)	{
		still_changing = 0;
596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614
		for (i = 1; i < chg_nr; i++)  {
			unsigned long long curaddr, lastaddr;
			unsigned long long curpbaddr, lastpbaddr;

			curaddr = change_point[i]->addr;
			lastaddr = change_point[i - 1]->addr;
			curpbaddr = change_point[i]->pbios->addr;
			lastpbaddr = change_point[i - 1]->pbios->addr;

			/*
			 * swap entries, when:
			 *
			 * curaddr > lastaddr or
			 * curaddr == lastaddr and curaddr == curpbaddr and
			 * lastaddr != lastpbaddr
			 */
			if (curaddr < lastaddr ||
			    (curaddr == lastaddr && curaddr == curpbaddr &&
			     lastaddr != lastpbaddr)) {
L
Linus Torvalds 已提交
615 616 617
				change_tmp = change_point[i];
				change_point[i] = change_point[i-1];
				change_point[i-1] = change_tmp;
618
				still_changing = 1;
L
Linus Torvalds 已提交
619 620 621 622 623
			}
		}
	}

	/* create a new bios memory map, removing overlaps */
624 625
	overlap_entries = 0;	 /* number of entries in the overlap table */
	new_bios_entry = 0;	 /* index for creating new bios map entries */
L
Linus Torvalds 已提交
626 627
	last_type = 0;		 /* start with undefined memory type */
	last_addr = 0;		 /* start with 0 as last starting address */
628

L
Linus Torvalds 已提交
629
	/* loop through change-points, determining affect on the new bios map */
630
	for (chgidx = 0; chgidx < chg_nr; chgidx++) {
L
Linus Torvalds 已提交
631
		/* keep track of all overlapping bios entries */
632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649
		if (change_point[chgidx]->addr ==
		    change_point[chgidx]->pbios->addr) {
			/*
			 * add map entry to overlap list (> 1 entry
			 * implies an overlap)
			 */
			overlap_list[overlap_entries++] =
				change_point[chgidx]->pbios;
		} else {
			/*
			 * remove entry from list (order independent,
			 * so swap with last)
			 */
			for (i = 0; i < overlap_entries; i++) {
				if (overlap_list[i] ==
				    change_point[chgidx]->pbios)
					overlap_list[i] =
						overlap_list[overlap_entries-1];
L
Linus Torvalds 已提交
650 651 652
			}
			overlap_entries--;
		}
653 654 655 656 657
		/*
		 * if there are overlapping entries, decide which
		 * "type" to use (larger value takes precedence --
		 * 1=usable, 2,3,4,4+=unusable)
		 */
L
Linus Torvalds 已提交
658
		current_type = 0;
659
		for (i = 0; i < overlap_entries; i++)
L
Linus Torvalds 已提交
660 661
			if (overlap_list[i]->type > current_type)
				current_type = overlap_list[i]->type;
662 663 664 665
		/*
		 * continue building up new bios map based on this
		 * information
		 */
L
Linus Torvalds 已提交
666 667 668 669
		if (current_type != last_type)	{
			if (last_type != 0)	 {
				new_bios[new_bios_entry].size =
					change_point[chgidx]->addr - last_addr;
670 671 672 673
				/*
				 * move forward only if the new size
				 * was non-zero
				 */
L
Linus Torvalds 已提交
674
				if (new_bios[new_bios_entry].size != 0)
675 676 677 678
					/*
					 * no more space left for new
					 * bios entries ?
					 */
L
Linus Torvalds 已提交
679
					if (++new_bios_entry >= E820MAX)
680
						break;
L
Linus Torvalds 已提交
681 682
			}
			if (current_type != 0)	{
683 684
				new_bios[new_bios_entry].addr =
					change_point[chgidx]->addr;
L
Linus Torvalds 已提交
685
				new_bios[new_bios_entry].type = current_type;
686
				last_addr = change_point[chgidx]->addr;
L
Linus Torvalds 已提交
687 688 689 690
			}
			last_type = current_type;
		}
	}
691 692
	/* retain count for new bios entries */
	new_nr = new_bios_entry;
L
Linus Torvalds 已提交
693 694

	/* copy new bios mapping into original location */
695
	memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry));
L
Linus Torvalds 已提交
696 697 698 699 700 701 702 703 704 705 706 707 708 709
	*pnr_map = new_nr;

	return 0;
}

/*
 * Copy the BIOS e820 map into a safe place.
 *
 * Sanity-check it while we're at it..
 *
 * If we're lucky and live on a modern system, the setup code
 * will have given us a memory map that we can use to properly
 * set up memory.  If we aren't, we'll fake a memory map.
 */
710
static int __init copy_e820_map(struct e820entry *biosmap, int nr_map)
L
Linus Torvalds 已提交
711 712 713 714 715 716
{
	/* Only one memory region (or negative)? Ignore it */
	if (nr_map < 2)
		return -1;

	do {
717 718 719 720
		u64 start = biosmap->addr;
		u64 size = biosmap->size;
		u64 end = start + size;
		u32 type = biosmap->type;
L
Linus Torvalds 已提交
721 722 723 724 725 726

		/* Overflow in 64 bits? Ignore the memory map. */
		if (start > end)
			return -1;

		add_memory_region(start, size, type);
727
	} while (biosmap++, --nr_map);
L
Linus Torvalds 已提交
728 729 730
	return 0;
}

731
static void early_panic(char *msg)
L
Linus Torvalds 已提交
732
{
733 734 735
	early_printk(msg);
	panic(msg);
}
L
Linus Torvalds 已提交
736

737 738
/* We're not void only for x86 32-bit compat */
char * __init machine_specific_memory_setup(void)
739
{
740
	char *who = "BIOS-e820";
L
Linus Torvalds 已提交
741 742 743 744 745 746
	/*
	 * Try to copy the BIOS-supplied E820-map.
	 *
	 * Otherwise fake a memory map; one section from 0k->640k,
	 * the next section from 1mb->appropriate_mem_k
	 */
747 748
	sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries);
	if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) < 0)
749
		early_panic("Cannot find a valid memory map");
L
Linus Torvalds 已提交
750
	printk(KERN_INFO "BIOS-provided physical RAM map:\n");
751 752 753 754
	e820_print_map(who);

	/* In case someone cares... */
	return who;
L
Linus Torvalds 已提交
755 756
}

757 758 759 760 761
static int __init parse_memopt(char *p)
{
	if (!p)
		return -EINVAL;
	end_user_pfn = memparse(p, &p);
762
	end_user_pfn >>= PAGE_SHIFT;
763
	return 0;
764
}
765
early_param("mem", parse_memopt);
L
Linus Torvalds 已提交
766

767
static int userdef __initdata;
L
Linus Torvalds 已提交
768

769
static int __init parse_memmap_opt(char *p)
770
{
771
	char *oldp;
772 773
	unsigned long long start_at, mem_size;

774 775
	if (!strcmp(p, "exactmap")) {
#ifdef CONFIG_CRASH_DUMP
776 777 778
		/*
		 * If we are doing a crash dump, we still need to know
		 * the real mem size before original memory map is
779 780
		 * reset.
		 */
781
		e820_register_active_regions(0, 0, -1UL);
782
		saved_max_pfn = e820_end_of_ram();
783
		remove_all_active_ranges();
784
#endif
785
		max_pfn_mapped = 0;
786 787 788 789 790 791 792 793 794
		e820.nr_map = 0;
		userdef = 1;
		return 0;
	}

	oldp = p;
	mem_size = memparse(p, &p);
	if (p == oldp)
		return -EINVAL;
795 796

	userdef = 1;
797
	if (*p == '@') {
798
		start_at = memparse(p+1, &p);
799 800
		add_memory_region(start_at, mem_size, E820_RAM);
	} else if (*p == '#') {
801
		start_at = memparse(p+1, &p);
802 803
		add_memory_region(start_at, mem_size, E820_ACPI);
	} else if (*p == '$') {
804
		start_at = memparse(p+1, &p);
805 806 807 808
		add_memory_region(start_at, mem_size, E820_RESERVED);
	} else {
		end_user_pfn = (mem_size >> PAGE_SHIFT);
	}
809 810 811 812
	return *p == '\0' ? 0 : -EINVAL;
}
early_param("memmap", parse_memmap_opt);

813
void __init finish_e820_parsing(void)
814 815
{
	if (userdef) {
816 817 818 819 820 821
		char nr = e820.nr_map;

		if (sanitize_e820_map(e820.map, &nr) < 0)
			early_panic("Invalid user supplied memory map");
		e820.nr_map = nr;

822 823 824
		printk(KERN_INFO "user-defined physical RAM map:\n");
		e820_print_map("user");
	}
825 826
}

827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853
void __init update_memory_range(u64 start, u64 size, unsigned old_type,
				unsigned new_type)
{
	int i;

	BUG_ON(old_type == new_type);

	for (i = 0; i < e820.nr_map; i++) {
		struct e820entry *ei = &e820.map[i];
		u64 final_start, final_end;
		if (ei->type != old_type)
			continue;
		/* totally covered? */
		if (ei->addr >= start && ei->size <= size) {
			ei->type = new_type;
			continue;
		}
		/* partially covered */
		final_start = max(start, ei->addr);
		final_end = min(start + size, ei->addr + ei->size);
		if (final_start >= final_end)
			continue;
		add_memory_region(final_start, final_end - final_start,
					 new_type);
	}
}

Y
Yinghai Lu 已提交
854 855 856 857 858 859 860 861 862 863 864 865
void __init update_e820(void)
{
	u8 nr_map;

	nr_map = e820.nr_map;
	if (sanitize_e820_map(e820.map, &nr_map))
		return;
	e820.nr_map = nr_map;
	printk(KERN_INFO "modified physical RAM map:\n");
	e820_print_map("modified");
}

866
unsigned long pci_mem_start = 0xaeedbabe;
867
EXPORT_SYMBOL(pci_mem_start);
868 869 870 871 872 873 874 875 876

/*
 * Search for the biggest gap in the low 32 bits of the e820
 * memory space.  We pass this space to PCI to assign MMIO resources
 * for hotplug or unconfigured devices in.
 * Hopefully the BIOS let enough space left.
 */
__init void e820_setup_gap(void)
{
877
	unsigned long gapstart, gapsize, round;
878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908
	unsigned long last;
	int i;
	int found = 0;

	last = 0x100000000ull;
	gapstart = 0x10000000;
	gapsize = 0x400000;
	i = e820.nr_map;
	while (--i >= 0) {
		unsigned long long start = e820.map[i].addr;
		unsigned long long end = start + e820.map[i].size;

		/*
		 * Since "last" is at most 4GB, we know we'll
		 * fit in 32 bits if this condition is true
		 */
		if (last > end) {
			unsigned long gap = last - end;

			if (gap > gapsize) {
				gapsize = gap;
				gapstart = end;
				found = 1;
			}
		}
		if (start < last)
			last = start;
	}

	if (!found) {
		gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024;
909 910 911 912
		printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit "
		       "address range\n"
		       KERN_ERR "PCI: Unassigned devices with 32bit resource "
		       "registers may break!\n");
913 914 915
	}

	/*
916 917
	 * See how much we want to round up: start off with
	 * rounding to the next 1MB area.
918
	 */
919 920 921 922 923
	round = 0x100000;
	while ((gapsize >> 4) > round)
		round += round;
	/* Fun with two's complement */
	pci_mem_start = (gapstart + round) & -round;
924

925 926 927
	printk(KERN_INFO
	       "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n",
	       pci_mem_start, gapstart, gapsize);
928
}
929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947

int __init arch_get_ram_range(int slot, u64 *addr, u64 *size)
{
	int i;

	if (slot < 0 || slot >= e820.nr_map)
		return -1;
	for (i = slot; i < e820.nr_map; i++) {
		if (e820.map[i].type != E820_RAM)
			continue;
		break;
	}
	if (i == e820.nr_map || e820.map[i].addr > (max_pfn << PAGE_SHIFT))
		return -1;
	*addr = e820.map[i].addr;
	*size = min_t(u64, e820.map[i].size + e820.map[i].addr,
		max_pfn << PAGE_SHIFT) - *addr;
	return i + 1;
}