setup.c 26.7 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
/*
 *  Copyright (C) 1995  Linus Torvalds
 *
 *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
 *
 *  Memory region support
 *	David Parsons <orc@pell.chi.il.us>, July-August 1999
 *
 *  Added E820 sanitization routine (removes overlapping memory regions);
 *  Brian Moyle <bmoyle@mvista.com>, February 2001
 *
 * Moved CPU detection code to cpu/${cpu}.c
 *    Patrick Mochel <mochel@osdl.org>, March 2002
 *
 *  Provisions for empty E820 memory regions (reported by certain BIOSes).
 *  Alex Achenbach <xela@slit.de>, December 2002.
 *
 */

/*
 * This file handles the architecture-dependent parts of initialization
 */

#include <linux/sched.h>
#include <linux/mm.h>
26
#include <linux/mmzone.h>
27
#include <linux/screen_info.h>
L
Linus Torvalds 已提交
28 29 30 31 32 33 34 35 36 37 38 39 40 41
#include <linux/ioport.h>
#include <linux/acpi.h>
#include <linux/apm_bios.h>
#include <linux/initrd.h>
#include <linux/bootmem.h>
#include <linux/seq_file.h>
#include <linux/console.h>
#include <linux/mca.h>
#include <linux/root_dev.h>
#include <linux/highmem.h>
#include <linux/module.h>
#include <linux/efi.h>
#include <linux/init.h>
#include <linux/edd.h>
42
#include <linux/iscsi_ibft.h>
L
Linus Torvalds 已提交
43
#include <linux/nodemask.h>
44
#include <linux/kexec.h>
45
#include <linux/dmi.h>
D
Dave Hansen 已提交
46
#include <linux/pfn.h>
47
#include <linux/pci.h>
48
#include <asm/pci-direct.h>
49
#include <linux/init_ohci1394_dma.h>
50
#include <linux/kvm_para.h>
51

52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/stddef.h>
#include <linux/unistd.h>
#include <linux/ptrace.h>
#include <linux/slab.h>
#include <linux/user.h>
#include <linux/delay.h>

#include <linux/kallsyms.h>
#include <linux/cpufreq.h>
#include <linux/dma-mapping.h>
#include <linux/ctype.h>
#include <linux/uaccess.h>

#include <linux/percpu.h>
#include <linux/crash_dump.h>
69
#include <linux/tboot.h>
70

L
Linus Torvalds 已提交
71
#include <video/edid.h>
72

73
#include <asm/mtrr.h>
74
#include <asm/apic.h>
L
Linus Torvalds 已提交
75 76 77
#include <asm/e820.h>
#include <asm/mpspec.h>
#include <asm/setup.h>
Y
Yinghai Lu 已提交
78
#include <asm/efi.h>
I
Ingo Molnar 已提交
79 80
#include <asm/timer.h>
#include <asm/i8259.h>
L
Linus Torvalds 已提交
81
#include <asm/sections.h>
82
#include <asm/dmi.h>
L
Linus Torvalds 已提交
83 84
#include <asm/io_apic.h>
#include <asm/ist.h>
85
#include <asm/vmi.h>
86
#include <asm/setup_arch.h>
87
#include <asm/bios_ebda.h>
B
Bernhard Walle 已提交
88
#include <asm/cacheflush.h>
89
#include <asm/processor.h>
Y
Yinghai Lu 已提交
90
#include <asm/bugs.h>
L
Linus Torvalds 已提交
91

92 93
#include <asm/system.h>
#include <asm/vsyscall.h>
94
#include <asm/cpu.h>
95 96
#include <asm/desc.h>
#include <asm/dma.h>
97
#include <asm/iommu.h>
98
#include <asm/gart.h>
99 100 101 102
#include <asm/mmu_context.h>
#include <asm/proto.h>

#include <asm/paravirt.h>
103
#include <asm/hypervisor.h>
104 105 106 107

#include <asm/percpu.h>
#include <asm/topology.h>
#include <asm/apicdef.h>
Y
Yinghai Lu 已提交
108 109 110
#ifdef CONFIG_X86_64
#include <asm/numa_64.h>
#endif
111

Y
Yinghai Lu 已提交
112 113 114 115
#ifndef ARCH_SETUP
#define ARCH_SETUP
#endif

116 117 118 119 120 121 122 123
/*
 * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
 * The direct mapping extends to max_pfn_mapped, so that we can directly access
 * apertures, ACPI and other tables without having to play with fixmaps.
 */
unsigned long max_low_pfn_mapped;
unsigned long max_pfn_mapped;

124 125
RESERVE_BRK(dmi_alloc, 65536);

I
Ingo Molnar 已提交
126 127
unsigned int boot_cpu_id __read_mostly;

128 129 130
static __initdata unsigned long _brk_start = (unsigned long)__brk_base;
unsigned long _brk_end = (unsigned long)__brk_base;

I
Ingo Molnar 已提交
131 132 133 134 135 136 137 138 139 140 141 142
#ifdef CONFIG_X86_64
int default_cpu_present_to_apicid(int mps_cpu)
{
	return __default_cpu_present_to_apicid(mps_cpu);
}

int default_check_phys_apicid_present(int boot_cpu_physical_apicid)
{
	return __default_check_phys_apicid_present(boot_cpu_physical_apicid);
}
#endif

143 144 145 146 147 148
#ifndef CONFIG_DEBUG_BOOT_PARAMS
struct boot_params __initdata boot_params;
#else
struct boot_params boot_params;
#endif

L
Linus Torvalds 已提交
149 150 151
/*
 * Machine setup..
 */
152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172
static struct resource data_resource = {
	.name	= "Kernel data",
	.start	= 0,
	.end	= 0,
	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
};

static struct resource code_resource = {
	.name	= "Kernel code",
	.start	= 0,
	.end	= 0,
	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
};

static struct resource bss_resource = {
	.name	= "Kernel bss",
	.start	= 0,
	.end	= 0,
	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
};

173 174

#ifdef CONFIG_X86_32
175 176 177 178 179 180 181
static struct resource video_ram_resource = {
	.name	= "Video RAM area",
	.start	= 0xa0000,
	.end	= 0xbffff,
	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
};

L
Linus Torvalds 已提交
182
/* cpu data as detected by the assembly code in head.S */
183
struct cpuinfo_x86 new_cpu_data __cpuinitdata = {0, 0, 0, 0, -1, 1, 0, 0, -1};
L
Linus Torvalds 已提交
184
/* common cpu data for all cpus */
185
struct cpuinfo_x86 boot_cpu_data __read_mostly = {0, 0, 0, 0, -1, 1, 0, 0, -1};
186
EXPORT_SYMBOL(boot_cpu_data);
187 188 189 190 191 192
static void set_mca_bus(int x)
{
#ifdef CONFIG_MCA
	MCA_bus = x;
#endif
}
L
Linus Torvalds 已提交
193

194 195
unsigned int def_to_bigsmp;

L
Linus Torvalds 已提交
196 197 198 199 200
/* for MCA, but anyone else can use it if they want */
unsigned int machine_id;
unsigned int machine_submodel_id;
unsigned int BIOS_revision;

201 202 203 204 205 206 207 208 209 210 211 212
struct apm_info apm_info;
EXPORT_SYMBOL(apm_info);

#if defined(CONFIG_X86_SPEEDSTEP_SMI) || \
	defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)
struct ist_info ist_info;
EXPORT_SYMBOL(ist_info);
#else
struct ist_info ist_info;
#endif

#else
213 214 215
struct cpuinfo_x86 boot_cpu_data __read_mostly = {
	.x86_phys_bits = MAX_PHYSMEM_BITS,
};
216 217 218 219 220 221 222 223 224 225
EXPORT_SYMBOL(boot_cpu_data);
#endif


#if !defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
unsigned long mmu_cr4_features;
#else
unsigned long mmu_cr4_features = X86_CR4_PAE;
#endif

226 227
/* Boot loader ID and version as integers, for the benefit of proc_dointvec */
int bootloader_type, bootloader_version;
L
Linus Torvalds 已提交
228 229 230 231 232

/*
 * Setup options
 */
struct screen_info screen_info;
233
EXPORT_SYMBOL(screen_info);
L
Linus Torvalds 已提交
234
struct edid_info edid_info;
235
EXPORT_SYMBOL_GPL(edid_info);
L
Linus Torvalds 已提交
236 237 238

extern int root_mountflags;

239
unsigned long saved_video_mode;
L
Linus Torvalds 已提交
240

241
#define RAMDISK_IMAGE_START_MASK	0x07FF
L
Linus Torvalds 已提交
242
#define RAMDISK_PROMPT_FLAG		0x8000
243
#define RAMDISK_LOAD_FLAG		0x4000
L
Linus Torvalds 已提交
244

245
static char __initdata command_line[COMMAND_LINE_SIZE];
246 247 248
#ifdef CONFIG_CMDLINE_BOOL
static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE;
#endif
L
Linus Torvalds 已提交
249 250 251 252 253 254 255 256 257 258 259 260 261

#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
struct edd edd;
#ifdef CONFIG_EDD_MODULE
EXPORT_SYMBOL(edd);
#endif
/**
 * copy_edd() - Copy the BIOS EDD information
 *              from boot_params into a safe place.
 *
 */
static inline void copy_edd(void)
{
262 263 264 265 266
     memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer,
	    sizeof(edd.mbr_signature));
     memcpy(edd.edd_info, boot_params.eddbuf, sizeof(edd.edd_info));
     edd.mbr_signature_nr = boot_params.edd_mbr_sig_buf_entries;
     edd.edd_info_nr = boot_params.eddbuf_entries;
L
Linus Torvalds 已提交
267 268 269 270 271 272 273
}
#else
static inline void copy_edd(void)
{
}
#endif

274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292
void * __init extend_brk(size_t size, size_t align)
{
	size_t mask = align - 1;
	void *ret;

	BUG_ON(_brk_start == 0);
	BUG_ON(align & mask);

	_brk_end = (_brk_end + mask) & ~mask;
	BUG_ON((char *)(_brk_end + size) > __brk_limit);

	ret = (void *)_brk_end;
	_brk_end += size;

	memset(ret, 0, size);

	return ret;
}

293 294 295 296 297 298 299 300 301 302 303 304 305 306
#ifdef CONFIG_X86_64
static void __init init_gbpages(void)
{
	if (direct_gbpages && cpu_has_gbpages)
		printk(KERN_INFO "Using GB pages for direct mapping\n");
	else
		direct_gbpages = 0;
}
#else
static inline void init_gbpages(void)
{
}
#endif

307 308 309 310 311 312 313 314 315 316
static void __init reserve_brk(void)
{
	if (_brk_end > _brk_start)
		reserve_early(__pa(_brk_start), __pa(_brk_end), "BRK");

	/* Mark brk area as locked down and no longer taking any
	   new allocations */
	_brk_start = 0;
}

317 318
#ifdef CONFIG_BLK_DEV_INITRD

319 320
#define MAX_MAP_CHUNK	(NR_FIX_BTMAPS << PAGE_SHIFT)
static void __init relocate_initrd(void)
321
{
322

323 324
	u64 ramdisk_image = boot_params.hdr.ramdisk_image;
	u64 ramdisk_size  = boot_params.hdr.ramdisk_size;
325
	u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT;
326
	u64 ramdisk_here;
327 328
	unsigned long slop, clen, mapaddr;
	char *p, *q;
329 330

	/* We need to move the initrd down into lowmem */
331 332
	ramdisk_here = find_e820_area(0, end_of_lowmem, ramdisk_size,
					 PAGE_SIZE);
333

334 335 336 337
	if (ramdisk_here == -1ULL)
		panic("Cannot find place for new RAMDISK of size %lld\n",
			 ramdisk_size);

338 339
	/* Note: this includes all the lowmem currently occupied by
	   the initrd, we rely on that fact to keep the data intact. */
340
	reserve_early(ramdisk_here, ramdisk_here + ramdisk_size,
341
			 "NEW RAMDISK");
342 343
	initrd_start = ramdisk_here + PAGE_OFFSET;
	initrd_end   = initrd_start + ramdisk_size;
344 345
	printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n",
			 ramdisk_here, ramdisk_here + ramdisk_size);
346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365

	q = (char *)initrd_start;

	/* Copy any lowmem portion of the initrd */
	if (ramdisk_image < end_of_lowmem) {
		clen = end_of_lowmem - ramdisk_image;
		p = (char *)__va(ramdisk_image);
		memcpy(q, p, clen);
		q += clen;
		ramdisk_image += clen;
		ramdisk_size  -= clen;
	}

	/* Copy the highmem portion of the initrd */
	while (ramdisk_size) {
		slop = ramdisk_image & ~PAGE_MASK;
		clen = ramdisk_size;
		if (clen > MAX_MAP_CHUNK-slop)
			clen = MAX_MAP_CHUNK-slop;
		mapaddr = ramdisk_image & PAGE_MASK;
366
		p = early_memremap(mapaddr, clen+slop);
367
		memcpy(q, p+slop, clen);
368
		early_iounmap(p, clen+slop);
369 370 371 372
		q += clen;
		ramdisk_image += clen;
		ramdisk_size  -= clen;
	}
373
	/* high pages is not converted by early_res_to_bootmem */
374 375
	ramdisk_image = boot_params.hdr.ramdisk_image;
	ramdisk_size  = boot_params.hdr.ramdisk_size;
376 377
	printk(KERN_INFO "Move RAMDISK from %016llx - %016llx to"
		" %08llx - %08llx\n",
378 379
		ramdisk_image, ramdisk_image + ramdisk_size - 1,
		ramdisk_here, ramdisk_here + ramdisk_size - 1);
380
}
Y
Yinghai Lu 已提交
381

382 383 384 385 386
static void __init reserve_initrd(void)
{
	u64 ramdisk_image = boot_params.hdr.ramdisk_image;
	u64 ramdisk_size  = boot_params.hdr.ramdisk_size;
	u64 ramdisk_end   = ramdisk_image + ramdisk_size;
387
	u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT;
388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417

	if (!boot_params.hdr.type_of_loader ||
	    !ramdisk_image || !ramdisk_size)
		return;		/* No initrd provided by bootloader */

	initrd_start = 0;

	if (ramdisk_size >= (end_of_lowmem>>1)) {
		free_early(ramdisk_image, ramdisk_end);
		printk(KERN_ERR "initrd too large to handle, "
		       "disabling initrd\n");
		return;
	}

	printk(KERN_INFO "RAMDISK: %08llx - %08llx\n", ramdisk_image,
			ramdisk_end);


	if (ramdisk_end <= end_of_lowmem) {
		/* All in lowmem, easy case */
		/*
		 * don't need to reserve again, already reserved early
		 * in i386_start_kernel
		 */
		initrd_start = ramdisk_image + PAGE_OFFSET;
		initrd_end = initrd_start + ramdisk_size;
		return;
	}

	relocate_initrd();
418

419
	free_early(ramdisk_image, ramdisk_end);
420
}
Y
Yinghai Lu 已提交
421
#else
422
static void __init reserve_initrd(void)
Y
Yinghai Lu 已提交
423 424
{
}
425 426
#endif /* CONFIG_BLK_DEV_INITRD */

427
static void __init parse_setup_data(void)
428 429 430 431 432 433 434 435
{
	struct setup_data *data;
	u64 pa_data;

	if (boot_params.hdr.version < 0x0209)
		return;
	pa_data = boot_params.hdr.setup_data;
	while (pa_data) {
436
		data = early_memremap(pa_data, PAGE_SIZE);
437 438 439 440 441 442 443 444 445 446 447 448
		switch (data->type) {
		case SETUP_E820_EXT:
			parse_e820_ext(data, pa_data);
			break;
		default:
			break;
		}
		pa_data = data->next;
		early_iounmap(data, PAGE_SIZE);
	}
}

449
static void __init e820_reserve_setup_data(void)
450 451 452
{
	struct setup_data *data;
	u64 pa_data;
453
	int found = 0;
454 455 456 457 458

	if (boot_params.hdr.version < 0x0209)
		return;
	pa_data = boot_params.hdr.setup_data;
	while (pa_data) {
459
		data = early_memremap(pa_data, sizeof(*data));
460 461
		e820_update_range(pa_data, sizeof(*data)+data->len,
			 E820_RAM, E820_RESERVED_KERN);
462
		found = 1;
463 464 465
		pa_data = data->next;
		early_iounmap(data, sizeof(*data));
	}
466 467 468
	if (!found)
		return;

469
	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
470
	memcpy(&e820_saved, &e820, sizeof(struct e820map));
471 472 473 474
	printk(KERN_INFO "extended physical RAM map:\n");
	e820_print_map("reserve setup_data");
}

475 476 477 478 479 480 481 482 483 484
static void __init reserve_early_setup_data(void)
{
	struct setup_data *data;
	u64 pa_data;
	char buf[32];

	if (boot_params.hdr.version < 0x0209)
		return;
	pa_data = boot_params.hdr.setup_data;
	while (pa_data) {
485
		data = early_memremap(pa_data, sizeof(*data));
486 487 488 489 490 491 492
		sprintf(buf, "setup data %x", data->type);
		reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf);
		pa_data = data->next;
		early_iounmap(data, sizeof(*data));
	}
}

493 494 495 496 497
/*
 * --------- Crashkernel reservation ------------------------------
 */

#ifdef CONFIG_KEXEC
498 499 500 501 502 503 504

/**
 * Reserve @size bytes of crashkernel memory at any suitable offset.
 *
 * @size: Size of the crashkernel memory to reserve.
 * Returns the base address on success, and -1ULL on failure.
 */
505
static
506
unsigned long long __init find_and_reserve_crashkernel(unsigned long long size)
507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526
{
	const unsigned long long alignment = 16<<20; 	/* 16M */
	unsigned long long start = 0LL;

	while (1) {
		int ret;

		start = find_e820_area(start, ULONG_MAX, size, alignment);
		if (start == -1ULL)
			return start;

		/* try to reserve it */
		ret = reserve_bootmem_generic(start, size, BOOTMEM_EXCLUSIVE);
		if (ret >= 0)
			return start;

		start += alignment;
	}
}

527 528 529 530 531 532 533 534 535 536 537 538
static inline unsigned long long get_total_mem(void)
{
	unsigned long long total;

	total = max_low_pfn - min_low_pfn;
#ifdef CONFIG_HIGHMEM
	total += highend_pfn - highstart_pfn;
#endif

	return total << PAGE_SHIFT;
}

539
static void __init reserve_crashkernel(void)
540 541 542 543 544 545 546 547 548
{
	unsigned long long total_mem;
	unsigned long long crash_size, crash_base;
	int ret;

	total_mem = get_total_mem();

	ret = parse_crashkernel(boot_command_line, total_mem,
			&crash_size, &crash_base);
549 550 551 552 553 554 555 556 557
	if (ret != 0 || crash_size <= 0)
		return;

	/* 0 means: find the address automatically */
	if (crash_base <= 0) {
		crash_base = find_and_reserve_crashkernel(crash_size);
		if (crash_base == -1ULL) {
			pr_info("crashkernel reservation failed. "
				"No suitable area found.\n");
558 559
			return;
		}
560 561 562 563 564 565
	} else {
		ret = reserve_bootmem_generic(crash_base, crash_size,
					BOOTMEM_EXCLUSIVE);
		if (ret < 0) {
			pr_info("crashkernel reservation failed - "
				"memory is in use\n");
566 567
			return;
		}
568
	}
569

570 571 572 573 574
	printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
			"for crashkernel (System RAM: %ldMB)\n",
			(unsigned long)(crash_size >> 20),
			(unsigned long)(crash_base >> 20),
			(unsigned long)(total_mem >> 20));
575

576 577 578
	crashk_res.start = crash_base;
	crashk_res.end   = crash_base + crash_size - 1;
	insert_resource(&iomem_resource, &crashk_res);
579 580
}
#else
581
static void __init reserve_crashkernel(void)
582 583 584 585
{
}
#endif

586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608
static struct resource standard_io_resources[] = {
	{ .name = "dma1", .start = 0x00, .end = 0x1f,
		.flags = IORESOURCE_BUSY | IORESOURCE_IO },
	{ .name = "pic1", .start = 0x20, .end = 0x21,
		.flags = IORESOURCE_BUSY | IORESOURCE_IO },
	{ .name = "timer0", .start = 0x40, .end = 0x43,
		.flags = IORESOURCE_BUSY | IORESOURCE_IO },
	{ .name = "timer1", .start = 0x50, .end = 0x53,
		.flags = IORESOURCE_BUSY | IORESOURCE_IO },
	{ .name = "keyboard", .start = 0x60, .end = 0x60,
		.flags = IORESOURCE_BUSY | IORESOURCE_IO },
	{ .name = "keyboard", .start = 0x64, .end = 0x64,
		.flags = IORESOURCE_BUSY | IORESOURCE_IO },
	{ .name = "dma page reg", .start = 0x80, .end = 0x8f,
		.flags = IORESOURCE_BUSY | IORESOURCE_IO },
	{ .name = "pic2", .start = 0xa0, .end = 0xa1,
		.flags = IORESOURCE_BUSY | IORESOURCE_IO },
	{ .name = "dma2", .start = 0xc0, .end = 0xdf,
		.flags = IORESOURCE_BUSY | IORESOURCE_IO },
	{ .name = "fpu", .start = 0xf0, .end = 0xff,
		.flags = IORESOURCE_BUSY | IORESOURCE_IO }
};

609
static void __init reserve_standard_io_resources(void)
610 611 612 613 614 615 616 617 618
{
	int i;

	/* request I/O space for devices used on all i[345]86 PCs */
	for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
		request_resource(&ioport_resource, &standard_io_resources[i]);

}

619 620 621 622 623 624 625
/*
 * Note: elfcorehdr_addr is not just limited to vmcore. It is also used by
 * is_kdump_kernel() to determine if we are booting after a panic. Hence
 * ifdef it under CONFIG_CRASH_DUMP and not CONFIG_PROC_VMCORE.
 */

#ifdef CONFIG_CRASH_DUMP
626 627 628 629 630 631 632 633 634 635 636 637 638 639 640
/* elfcorehdr= specifies the location of elf core header
 * stored by the crashed kernel. This option will be passed
 * by kexec loader to the capture kernel.
 */
static int __init setup_elfcorehdr(char *arg)
{
	char *end;
	if (!arg)
		return -EINVAL;
	elfcorehdr_addr = memparse(arg, &end);
	return end > arg ? 0 : -EINVAL;
}
early_param("elfcorehdr", setup_elfcorehdr);
#endif

641
static struct x86_quirks default_x86_quirks __initdata;
642

Y
Yinghai Lu 已提交
643
struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
644

645
#ifdef CONFIG_X86_RESERVE_LOW_64K
646 647 648
static int __init dmi_low_memory_corruption(const struct dmi_system_id *d)
{
	printk(KERN_NOTICE
649
		"%s detected: BIOS may corrupt low RAM, working around it.\n",
650 651
		d->ident);

652 653
	e820_update_range(0, 0x10000, E820_RAM, E820_RESERVED);
	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
654 655 656

	return 0;
}
657
#endif
658 659 660

/* List of systems that have known low memory corruption BIOS problems */
static struct dmi_system_id __initdata bad_bios_dmi_table[] = {
I
Ingo Molnar 已提交
661
#ifdef CONFIG_X86_RESERVE_LOW_64K
662 663 664 665 666 667 668
	{
		.callback = dmi_low_memory_corruption,
		.ident = "AMI BIOS",
		.matches = {
			DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."),
		},
	},
669 670 671 672
	{
		.callback = dmi_low_memory_corruption,
		.ident = "Phoenix BIOS",
		.matches = {
673
			DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies"),
674 675
		},
	},
676 677 678 679 680 681 682 683 684 685 686 687 688
	{
	/*
	 * AMI BIOS with low memory corruption was found on Intel DG45ID board.
	 * It hase different DMI_BIOS_VENDOR = "Intel Corp.", for now we will
	 * match only DMI_BOARD_NAME and see if there is more bad products
	 * with this vendor.
	 */
		.callback = dmi_low_memory_corruption,
		.ident = "AMI BIOS",
		.matches = {
			DMI_MATCH(DMI_BOARD_NAME, "DG45ID"),
		},
	},
I
Ingo Molnar 已提交
689
#endif
690
	{}
691 692
};

L
Linus Torvalds 已提交
693 694 695 696 697 698 699
/*
 * Determine if we were loaded by an EFI loader.  If so, then we have also been
 * passed the efi memmap, systab, etc., so we should use these data structures
 * for initialization.  Note, the efi init code path is determined by the
 * global efi_enabled. This allows the same kernel image to be used on existing
 * systems (with a traditional BIOS) as well as on EFI systems.
 */
700 701 702 703 704 705
/*
 * setup_arch - architecture-specific boot-time initializations
 *
 * Note: On x86_64, fixmaps are ready for use even before this is called.
 */

L
Linus Torvalds 已提交
706 707
void __init setup_arch(char **cmdline_p)
{
708
#ifdef CONFIG_X86_32
L
Linus Torvalds 已提交
709
	memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
I
Ingo Molnar 已提交
710
	visws_early_detect();
711 712 713
#else
	printk(KERN_INFO "Command line: %s\n", boot_command_line);
#endif
L
Linus Torvalds 已提交
714

715 716 717
	strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
	*cmdline_p = command_line;

718 719 720 721 722 723 724 725 726 727
#ifdef CONFIG_X86_64
	/*
	 * Must call this twice: Once just to detect whether hardware doesn't
	 * support NX (so that the early EHCI debug console setup can safely
	 * call set_fixmap(), and then again after parsing early parameters to
	 * honor the respective command line option.
	 */
	check_efer();
#endif

728 729
	parse_early_param();

730 731 732
	/* VMI may relocate the fixmap; do this before touching ioremap area */
	vmi_init();

733
	early_cpu_init();
734 735
	early_ioremap_init();

736 737 738
	ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev);
	screen_info = boot_params.screen_info;
	edid_info = boot_params.edid_info;
739
#ifdef CONFIG_X86_32
740 741
	apm_info.bios = boot_params.apm_bios_info;
	ist_info = boot_params.ist_info;
742
	if (boot_params.sys_desc_table.length != 0) {
743 744 745 746
		set_mca_bus(boot_params.sys_desc_table.table[3] & 0x2);
		machine_id = boot_params.sys_desc_table.table[0];
		machine_submodel_id = boot_params.sys_desc_table.table[1];
		BIOS_revision = boot_params.sys_desc_table.table[2];
L
Linus Torvalds 已提交
747
	}
748 749
#endif
	saved_video_mode = boot_params.hdr.vid_mode;
750
	bootloader_type = boot_params.hdr.type_of_loader;
751 752 753 754 755 756
	if ((bootloader_type >> 4) == 0xe) {
		bootloader_type &= 0xf;
		bootloader_type |= (boot_params.hdr.ext_loader_type+0x10) << 4;
	}
	bootloader_version  = bootloader_type & 0xf;
	bootloader_version |= boot_params.hdr.ext_loader_ver << 4;
L
Linus Torvalds 已提交
757 758

#ifdef CONFIG_BLK_DEV_RAM
759 760 761
	rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK;
	rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0);
	rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0);
L
Linus Torvalds 已提交
762
#endif
763 764
#ifdef CONFIG_EFI
	if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
765 766 767 768 769 770
#ifdef CONFIG_X86_32
		     "EL32",
#else
		     "EL64",
#endif
	 4)) {
771 772 773 774 775
		efi_enabled = 1;
		efi_reserve_early();
	}
#endif

L
Linus Torvalds 已提交
776
	ARCH_SETUP
777

778
	setup_memory_map();
779
	parse_setup_data();
780 781
	/* update the e820_saved too */
	e820_reserve_setup_data();
782

L
Linus Torvalds 已提交
783 784
	copy_edd();

785
	if (!boot_params.hdr.root_flags)
L
Linus Torvalds 已提交
786 787 788 789
		root_mountflags &= ~MS_RDONLY;
	init_mm.start_code = (unsigned long) _text;
	init_mm.end_code = (unsigned long) _etext;
	init_mm.end_data = (unsigned long) _edata;
790
	init_mm.brk = _brk_end;
L
Linus Torvalds 已提交
791 792 793 794 795

	code_resource.start = virt_to_phys(_text);
	code_resource.end = virt_to_phys(_etext)-1;
	data_resource.start = virt_to_phys(_etext);
	data_resource.end = virt_to_phys(_edata)-1;
B
Bernhard Walle 已提交
796 797
	bss_resource.start = virt_to_phys(&__bss_start);
	bss_resource.end = virt_to_phys(&__bss_stop)-1;
L
Linus Torvalds 已提交
798

799 800 801 802 803 804 805 806 807 808 809 810 811
#ifdef CONFIG_CMDLINE_BOOL
#ifdef CONFIG_CMDLINE_OVERRIDE
	strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
#else
	if (builtin_cmdline[0]) {
		/* append boot loader cmdline to builtin */
		strlcat(builtin_cmdline, " ", COMMAND_LINE_SIZE);
		strlcat(builtin_cmdline, boot_command_line, COMMAND_LINE_SIZE);
		strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
	}
#endif
#endif

812 813 814 815
#ifdef CONFIG_X86_64
	check_efer();
#endif

816 817
	/* Must be before kernel pagetables are setup */
	vmi_activate();
A
Alok Kataria 已提交
818

819
	/* after early param, so could get panic from serial */
820
	reserve_early_setup_data();
821

822
	if (acpi_mps_check()) {
I
Ingo Molnar 已提交
823
#ifdef CONFIG_X86_LOCAL_APIC
824
		disable_apic = 1;
I
Ingo Molnar 已提交
825
#endif
826
		setup_clear_cpu_cap(X86_FEATURE_APIC);
827 828
	}

829 830 831 832 833
#ifdef CONFIG_PCI
	if (pci_early_dump_regs)
		early_dump_pci_devices();
#endif

834
	finish_e820_parsing();
835

B
Brian Maly 已提交
836 837 838
	if (efi_enabled)
		efi_init();

839 840 841 842
	dmi_scan_machine();

	dmi_check_system(bad_bios_dmi_table);

843 844 845 846 847 848
	/*
	 * VMware detection requires dmi to be available, so this
	 * needs to be done after dmi_scan_machine, for the BP.
	 */
	init_hypervisor(&boot_cpu_data);

849
#ifdef CONFIG_X86_32
850
	probe_roms();
851
#endif
852 853 854 855 856 857

	/* after parse_early_param, so could debug it */
	insert_resource(&iomem_resource, &code_resource);
	insert_resource(&iomem_resource, &data_resource);
	insert_resource(&iomem_resource, &bss_resource);

858

859
#ifdef CONFIG_X86_32
Y
Yinghai Lu 已提交
860 861 862 863 864 865 866
	if (ppro_with_ram_bug()) {
		e820_update_range(0x70000000ULL, 0x40000ULL, E820_RAM,
				  E820_RESERVED);
		sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
		printk(KERN_INFO "fixed physical RAM map:\n");
		e820_print_map("bad_ppro");
	}
867 868 869
#else
	early_gart_iommu_check();
#endif
Y
Yinghai Lu 已提交
870

871 872 873 874
	/*
	 * partially used pages are not usable - thus
	 * we are rounding upwards:
	 */
875
	max_pfn = e820_end_of_ram_pfn();
876

Y
Yinghai Lu 已提交
877 878
	/* preallocate 4k for mptable mpc */
	early_reserve_e820_mpc_new();
879 880
	/* update e820 for memory not covered by WB MTRRs */
	mtrr_bp_init();
881
	if (mtrr_trim_uncached_memory(max_pfn))
882
		max_pfn = e820_end_of_ram_pfn();
883

884
#ifdef CONFIG_X86_32
885
	/* max_low_pfn get updated here */
886
	find_low_pfn_range();
887 888 889
#else
	num_physpages = max_pfn;

Y
Yinghai Lu 已提交
890
	check_x2apic();
891 892 893

	/* How many end-of-memory variables you have, grandma! */
	/* need this before calling reserve_initrd */
894 895 896 897 898
	if (max_pfn > (1UL<<(32 - PAGE_SHIFT)))
		max_low_pfn = e820_end_of_low_ram_pfn();
	else
		max_low_pfn = max_pfn;

899
	high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
900
	max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT;
901
#endif
902

903 904 905 906
#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
	setup_bios_corruption_check();
#endif

907 908 909
	printk(KERN_DEBUG "initial memory mapped : 0 - %08lx\n",
			max_pfn_mapped<<PAGE_SHIFT);

910 911
	reserve_brk();

912 913
	init_gbpages();

914
	/* max_pfn_mapped is updated here */
915 916 917 918 919 920 921 922 923 924 925
	max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
	max_pfn_mapped = max_low_pfn_mapped;

#ifdef CONFIG_X86_64
	if (max_pfn > max_low_pfn) {
		max_pfn_mapped = init_memory_mapping(1UL<<32,
						     max_pfn<<PAGE_SHIFT);
		/* can we preseve max_low_pfn ?*/
		max_low_pfn = max_pfn;
	}
#endif
926

927 928 929 930 931 932 933 934 935
	/*
	 * NOTE: On x86-32, only from this point on, fixmaps are ready for use.
	 */

#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
	if (init_ohci1394_dma_early)
		init_ohci1394_dma_on_all_controllers();
#endif

936 937
	reserve_initrd();

938 939
	vsmp_init();

940 941 942 943 944 945 946
	io_delay_init();

	/*
	 * Parse the ACPI tables for possible boot-time SMP configuration.
	 */
	acpi_boot_table_init();

947 948
	early_acpi_boot_init();

949
#ifdef CONFIG_ACPI_NUMA
Y
Yinghai Lu 已提交
950 951 952 953
	/*
	 * Parse SRAT to discover nodes.
	 */
	acpi_numa_init();
954 955
#endif

956
	initmem_init(0, max_pfn);
957

958 959 960 961 962 963 964 965 966 967
#ifdef CONFIG_ACPI_SLEEP
	/*
	 * Reserve low memory region for sleep support.
	 */
	acpi_reserve_bootmem();
#endif
	/*
	 * Find and reserve possible boot-time SMP configuration:
	 */
	find_smp_config();
968

969 970
	reserve_crashkernel();

971 972 973 974 975 976 977 978 979
#ifdef CONFIG_X86_64
	/*
	 * dma32_reserve_bootmem() allocates bootmem which may conflict
	 * with the crashkernel command line, so do that after
	 * reserve_crashkernel()
	 */
	dma32_reserve_bootmem();
#endif

980 981
	reserve_ibft_region();

982 983 984 985
#ifdef CONFIG_KVM_CLOCK
	kvmclock_init();
#endif

986
	paravirt_pagetable_setup_start(swapper_pg_dir);
L
Linus Torvalds 已提交
987
	paging_init();
988
	paravirt_pagetable_setup_done(swapper_pg_dir);
989
	paravirt_post_allocator_init();
990

991 992
	tboot_probe();

993 994 995 996
#ifdef CONFIG_X86_64
	map_vsyscall();
#endif

997
	generic_apic_probe();
L
Linus Torvalds 已提交
998

999
	early_quirks();
1000

1001 1002 1003
	/*
	 * Read APIC and some other early information from ACPI tables.
	 */
L
Linus Torvalds 已提交
1004
	acpi_boot_init();
1005

1006
#if defined(CONFIG_X86_MPPARSE) || defined(CONFIG_X86_VISWS)
1007 1008 1009
	/*
	 * get boot-time SMP configuration:
	 */
1010 1011 1012
	if (smp_found_config)
		get_smp_config();
#endif
1013

1014
	prefill_possible_map();
Y
Yinghai Lu 已提交
1015

1016 1017 1018 1019
#ifdef CONFIG_X86_64
	init_cpu_to_node();
#endif

1020 1021
	init_apic_mappings();
	ioapic_init_mappings();
1022

1023
	/* need to wait for io_apic is mapped */
1024
	probe_nr_irqs_gsi();
1025

1026
	kvm_guest_init();
L
Linus Torvalds 已提交
1027

1028
	e820_reserve_resources();
1029
	e820_mark_nosave_regions(max_low_pfn);
L
Linus Torvalds 已提交
1030

1031
#ifdef CONFIG_X86_32
1032
	request_resource(&iomem_resource, &video_ram_resource);
1033
#endif
1034
	reserve_standard_io_resources();
1035 1036 1037

	e820_setup_gap();

L
Linus Torvalds 已提交
1038 1039 1040 1041 1042 1043 1044 1045 1046
#ifdef CONFIG_VT
#if defined(CONFIG_VGA_CONSOLE)
	if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
		conswitchp = &vga_con;
#elif defined(CONFIG_DUMMY_CONSOLE)
	conswitchp = &dummy_con;
#endif
#endif
}
1047

1048 1049 1050
#ifdef CONFIG_X86_32

/**
I
Ingo Molnar 已提交
1051
 * x86_quirk_intr_init - post gate setup interrupt initialisation
1052 1053 1054 1055 1056 1057 1058
 *
 * Description:
 *	Fill in any interrupts that may have been left out by the general
 *	init_IRQ() routine.  interrupts having to do with the machine rather
 *	than the devices on the I/O bus (like APIC interrupts in intel MP
 *	systems) are started here.
 **/
I
Ingo Molnar 已提交
1059
void __init x86_quirk_intr_init(void)
1060 1061 1062 1063 1064 1065 1066 1067
{
	if (x86_quirks->arch_intr_init) {
		if (x86_quirks->arch_intr_init())
			return;
	}
}

/**
I
Ingo Molnar 已提交
1068
 * x86_quirk_trap_init - initialise system specific traps
1069 1070 1071 1072 1073
 *
 * Description:
 *	Called as the final act of trap_init().  Used in VISWS to initialise
 *	the various board specific APIC traps.
 **/
I
Ingo Molnar 已提交
1074
void __init x86_quirk_trap_init(void)
1075 1076 1077 1078 1079 1080 1081 1082 1083
{
	if (x86_quirks->arch_trap_init) {
		if (x86_quirks->arch_trap_init())
			return;
	}
}

static struct irqaction irq0  = {
	.handler = timer_interrupt,
I
Ingo Molnar 已提交
1084
	.flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER,
1085 1086 1087 1088
	.name = "timer"
};

/**
I
Ingo Molnar 已提交
1089
 * x86_quirk_pre_time_init - do any specific initialisations before.
1090 1091
 *
 **/
I
Ingo Molnar 已提交
1092
void __init x86_quirk_pre_time_init(void)
1093 1094 1095 1096 1097 1098
{
	if (x86_quirks->arch_pre_time_init)
		x86_quirks->arch_pre_time_init();
}

/**
I
Ingo Molnar 已提交
1099
 * x86_quirk_time_init - do any specific initialisations for the system timer.
1100 1101 1102 1103 1104
 *
 * Description:
 *	Must plug the system timer interrupt source at HZ into the IRQ listed
 *	in irq_vectors.h:TIMER_IRQ
 **/
I
Ingo Molnar 已提交
1105
void __init x86_quirk_time_init(void)
1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120
{
	if (x86_quirks->arch_time_init) {
		/*
		 * A nonzero return code does not mean failure, it means
		 * that the architecture quirk does not want any
		 * generic (timer) setup to be performed after this:
		 */
		if (x86_quirks->arch_time_init())
			return;
	}

	irq0.mask = cpumask_of_cpu(0);
	setup_irq(0, &irq0);
}
#endif /* CONFIG_X86_32 */