head_32.S 14.3 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
/*
 *  linux/arch/i386/kernel/head.S -- the 32-bit startup code.
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 *
 *  Enhanced CPU detection and feature setting code by Mike Jagdis
 *  and Martin Mares, November 1997.
 */

.text
#include <linux/threads.h>
#include <linux/linkage.h>
#include <asm/segment.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/desc.h>
#include <asm/cache.h>
#include <asm/thread_info.h>
19
#include <asm/asm-offsets.h>
L
Linus Torvalds 已提交
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
#include <asm/setup.h>

/*
 * References to members of the new_cpu_data structure.
 */

#define X86		new_cpu_data+CPUINFO_x86
#define X86_VENDOR	new_cpu_data+CPUINFO_x86_vendor
#define X86_MODEL	new_cpu_data+CPUINFO_x86_model
#define X86_MASK	new_cpu_data+CPUINFO_x86_mask
#define X86_HARD_MATH	new_cpu_data+CPUINFO_hard_math
#define X86_CPUID	new_cpu_data+CPUINFO_cpuid_level
#define X86_CAPABILITY	new_cpu_data+CPUINFO_x86_capability
#define X86_VENDOR_ID	new_cpu_data+CPUINFO_x86_vendor_id

/*
 * This is how much memory *in addition to the memory covered up to
37 38 39 40 41 42 43 44 45
 * and including _end* we need mapped initially.
 * We need:
 *  - one bit for each possible page, but only in low memory, which means
 *     2^32/4096/8 = 128K worst case (4G/4G split.)
 *  - enough space to map all low memory, which means
 *     (2^32/4096) / 1024 pages (worst case, non PAE)
 *     (2^32/4096) / 512 + 4 pages (worst case for PAE)
 *  - a few pages for allocator use before the kernel pagetable has
 *     been set up
L
Linus Torvalds 已提交
46 47 48 49 50 51
 *
 * Modulo rounding, each megabyte assigned here requires a kilobyte of
 * memory, which is currently unreclaimed.
 *
 * This should be a multiple of a page.
 */
52
LOW_PAGES = 1<<(32-PAGE_SHIFT_asm)
L
Linus Torvalds 已提交
53

54 55 56 57 58 59 60 61 62
/*
 * To preserve the DMA pool in PAGEALLOC kernels, we'll allocate
 * pagetables from above the 16MB DMA limit, so we'll have to set
 * up pagetables 16MB more (worst-case):
 */
#ifdef CONFIG_DEBUG_PAGEALLOC
LOW_PAGES = LOW_PAGES + 0x1000000
#endif

63 64 65 66 67 68 69 70 71
#if PTRS_PER_PMD > 1
PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PMD) + PTRS_PER_PGD
#else
PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PGD)
#endif
BOOTBITMAP_SIZE = LOW_PAGES / 8
ALLOCATOR_SLOP = 4

INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_SIZE_asm
L
Linus Torvalds 已提交
72 73 74 75 76 77 78 79

/*
 * 32-bit kernel entrypoint; only used by the boot CPU.  On entry,
 * %esi points to the real-mode code as a 32-bit pointer.
 * CS and DS must be 4 GB flat segments, but we don't depend on
 * any particular GDT layout, because we load our own as soon as we
 * can.
 */
80
.section .text.head,"ax",@progbits
L
Linus Torvalds 已提交
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103
ENTRY(startup_32)

/*
 * Set segments to known values.
 */
	cld
	lgdt boot_gdt_descr - __PAGE_OFFSET
	movl $(__BOOT_DS),%eax
	movl %eax,%ds
	movl %eax,%es
	movl %eax,%fs
	movl %eax,%gs

/*
 * Clear BSS first so that there are no surprises...
 * No need to cld as DF is already clear from cld above...
 */
	xorl %eax,%eax
	movl $__bss_start - __PAGE_OFFSET,%edi
	movl $__bss_stop - __PAGE_OFFSET,%ecx
	subl %edi,%ecx
	shrl $2,%ecx
	rep ; stosl
104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124
/*
 * Copy bootup parameters out of the way.
 * Note: %esi still has the pointer to the real-mode data.
 * With the kexec as boot loader, parameter segment might be loaded beyond
 * kernel image and might not even be addressable by early boot page tables.
 * (kexec on panic case). Hence copy out the parameters before initializing
 * page tables.
 */
	movl $(boot_params - __PAGE_OFFSET),%edi
	movl $(PARAM_SIZE/4),%ecx
	cld
	rep
	movsl
	movl boot_params - __PAGE_OFFSET + NEW_CL_POINTER,%esi
	andl %esi,%esi
	jnz 2f			# New command line protocol
	cmpw $(OLD_CL_MAGIC),OLD_CL_MAGIC_ADDR
	jne 1f
	movzwl OLD_CL_OFFSET,%esi
	addl $(OLD_CL_BASE_ADDR),%esi
2:
125
	movl $(boot_command_line - __PAGE_OFFSET),%edi
126 127 128 129
	movl $(COMMAND_LINE_SIZE/4),%ecx
	rep
	movsl
1:
L
Linus Torvalds 已提交
130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167

/*
 * Initialize page tables.  This creates a PDE and a set of page
 * tables, which are located immediately beyond _end.  The variable
 * init_pg_tables_end is set up to point to the first "safe" location.
 * Mappings are created both at virtual address 0 (identity mapping)
 * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END.
 *
 * Warning: don't use %esi or the stack in this code.  However, %esp
 * can be used as a GPR if you really need it...
 */
page_pde_offset = (__PAGE_OFFSET >> 20);

	movl $(pg0 - __PAGE_OFFSET), %edi
	movl $(swapper_pg_dir - __PAGE_OFFSET), %edx
	movl $0x007, %eax			/* 0x007 = PRESENT+RW+USER */
10:
	leal 0x007(%edi),%ecx			/* Create PDE entry */
	movl %ecx,(%edx)			/* Store identity PDE entry */
	movl %ecx,page_pde_offset(%edx)		/* Store kernel PDE entry */
	addl $4,%edx
	movl $1024, %ecx
11:
	stosl
	addl $0x1000,%eax
	loop 11b
	/* End condition: we must map up to and including INIT_MAP_BEYOND_END */
	/* bytes beyond the end of our own page tables; the +0x007 is the attribute bits */
	leal (INIT_MAP_BEYOND_END+0x007)(%edi),%ebp
	cmpl %ebp,%eax
	jb 10b
	movl %edi,(init_pg_tables_end - __PAGE_OFFSET)

	xorl %ebx,%ebx				/* This is the boot CPU (BSP) */
	jmp 3f
/*
 * Non-boot CPU entry point; entered from trampoline.S
 * We can't lgdt here, because lgdt itself uses a data segment, but
168
 * we know the trampoline has already loaded the boot_gdt for us.
169 170 171
 *
 * If cpu hotplug is not supported then this code can go in init section
 * which will be freed later
L
Linus Torvalds 已提交
172
 */
173

A
Andi Kleen 已提交
174
#ifndef CONFIG_HOTPLUG_CPU
175 176 177
.section .init.text,"ax",@progbits
#endif

E
Eric W. Biderman 已提交
178 179 180 181 182 183
	/* Do an early initialization of the fixmap area */
	movl $(swapper_pg_dir - __PAGE_OFFSET), %edx
	movl $(swapper_pg_pmd - __PAGE_OFFSET), %eax
	addl $0x007, %eax			/* 0x007 = PRESENT+RW+USER */
	movl %eax, 4092(%edx)

184
#ifdef CONFIG_SMP
L
Linus Torvalds 已提交
185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242
ENTRY(startup_32_smp)
	cld
	movl $(__BOOT_DS),%eax
	movl %eax,%ds
	movl %eax,%es
	movl %eax,%fs
	movl %eax,%gs

/*
 *	New page tables may be in 4Mbyte page mode and may
 *	be using the global pages. 
 *
 *	NOTE! If we are on a 486 we may have no cr4 at all!
 *	So we do not try to touch it unless we really have
 *	some bits in it to set.  This won't work if the BSP
 *	implements cr4 but this AP does not -- very unlikely
 *	but be warned!  The same applies to the pse feature
 *	if not equally supported. --macro
 *
 *	NOTE! We have to correct for the fact that we're
 *	not yet offset PAGE_OFFSET..
 */
#define cr4_bits mmu_cr4_features-__PAGE_OFFSET
	movl cr4_bits,%edx
	andl %edx,%edx
	jz 6f
	movl %cr4,%eax		# Turn on paging options (PSE,PAE,..)
	orl %edx,%eax
	movl %eax,%cr4

	btl $5, %eax		# check if PAE is enabled
	jnc 6f

	/* Check if extended functions are implemented */
	movl $0x80000000, %eax
	cpuid
	cmpl $0x80000000, %eax
	jbe 6f
	mov $0x80000001, %eax
	cpuid
	/* Execute Disable bit supported? */
	btl $20, %edx
	jnc 6f

	/* Setup EFER (Extended Feature Enable Register) */
	movl $0xc0000080, %ecx
	rdmsr

	btsl $11, %eax
	/* Make changes effective */
	wrmsr

6:
	/* This is a secondary processor (AP) */
	xorl %ebx,%ebx
	incl %ebx

#endif /* CONFIG_SMP */
243
3:
L
Linus Torvalds 已提交
244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341

/*
 * Enable paging
 */
	movl $swapper_pg_dir-__PAGE_OFFSET,%eax
	movl %eax,%cr3		/* set the page table pointer.. */
	movl %cr0,%eax
	orl $0x80000000,%eax
	movl %eax,%cr0		/* ..and set paging (PG) bit */
	ljmp $__BOOT_CS,$1f	/* Clear prefetch and normalize %eip */
1:
	/* Set up the stack pointer */
	lss stack_start,%esp

/*
 * Initialize eflags.  Some BIOS's leave bits like NT set.  This would
 * confuse the debugger if this code is traced.
 * XXX - best to initialize before switching to protected mode.
 */
	pushl $0
	popfl

#ifdef CONFIG_SMP
	andl %ebx,%ebx
	jz  1f				/* Initial CPU cleans BSS */
	jmp checkCPUtype
1:
#endif /* CONFIG_SMP */

/*
 * start system 32-bit setup. We need to re-do some of the things done
 * in 16-bit mode for the "real" operations.
 */
	call setup_idt

checkCPUtype:

	movl $-1,X86_CPUID		#  -1 for no CPUID initially

/* check if it is 486 or 386. */
/*
 * XXX - this does a lot of unnecessary setup.  Alignment checks don't
 * apply at our cpl of 0 and the stack ought to be aligned already, and
 * we don't need to preserve eflags.
 */

	movb $3,X86		# at least 386
	pushfl			# push EFLAGS
	popl %eax		# get EFLAGS
	movl %eax,%ecx		# save original EFLAGS
	xorl $0x240000,%eax	# flip AC and ID bits in EFLAGS
	pushl %eax		# copy to EFLAGS
	popfl			# set EFLAGS
	pushfl			# get new EFLAGS
	popl %eax		# put it in eax
	xorl %ecx,%eax		# change in flags
	pushl %ecx		# restore original EFLAGS
	popfl
	testl $0x40000,%eax	# check if AC bit changed
	je is386

	movb $4,X86		# at least 486
	testl $0x200000,%eax	# check if ID bit changed
	je is486

	/* get vendor info */
	xorl %eax,%eax			# call CPUID with 0 -> return vendor ID
	cpuid
	movl %eax,X86_CPUID		# save CPUID level
	movl %ebx,X86_VENDOR_ID		# lo 4 chars
	movl %edx,X86_VENDOR_ID+4	# next 4 chars
	movl %ecx,X86_VENDOR_ID+8	# last 4 chars

	orl %eax,%eax			# do we have processor info as well?
	je is486

	movl $1,%eax		# Use the CPUID instruction to get CPU type
	cpuid
	movb %al,%cl		# save reg for future use
	andb $0x0f,%ah		# mask processor family
	movb %ah,X86
	andb $0xf0,%al		# mask model
	shrb $4,%al
	movb %al,X86_MODEL
	andb $0x0f,%cl		# mask mask revision
	movb %cl,X86_MASK
	movl %edx,X86_CAPABILITY

is486:	movl $0x50022,%ecx	# set AM, WP, NE and MP
	jmp 2f

is386:	movl $2,%ecx		# set MP
2:	movl %cr0,%eax
	andl $0x80000011,%eax	# Save PG,PE,ET
	orl %ecx,%eax
	movl %eax,%cr0

	call check_x87
342
	lgdt early_gdt_descr
L
Linus Torvalds 已提交
343 344 345 346
	lidt idt_descr
	ljmp $(__KERNEL_CS),$1f
1:	movl $(__KERNEL_DS),%eax	# reload all the segment registers
	movl %eax,%ss			# after changing gdt.
347
	movl %eax,%fs			# gets reset once there's real percpu
L
Linus Torvalds 已提交
348 349 350 351 352

	movl $(__USER_DS),%eax		# DS/ES contains default USER segment
	movl %eax,%ds
	movl %eax,%es

353 354
	xorl %eax,%eax			# Clear GS and LDT
	movl %eax,%gs
L
Linus Torvalds 已提交
355
	lldt %ax
356

L
Linus Torvalds 已提交
357
	cld			# gcc2 wants the direction flag cleared at all times
358
	pushl $0		# fake return address for unwinder
L
Linus Torvalds 已提交
359
#ifdef CONFIG_SMP
360 361
	movb ready, %cl
	movb $1, ready
362
	cmpb $0,%cl		# the first CPU calls start_kernel
363 364 365 366 367
	je   1f
	movl $(__KERNEL_PERCPU), %eax
	movl %eax,%fs		# set this cpu's percpu
	jmp initialize_secondary # all other CPUs call initialize_secondary
1:
L
Linus Torvalds 已提交
368
#endif /* CONFIG_SMP */
369
	jmp start_kernel
L
Linus Torvalds 已提交
370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415

/*
 * We depend on ET to be correct. This checks for 287/387.
 */
check_x87:
	movb $0,X86_HARD_MATH
	clts
	fninit
	fstsw %ax
	cmpb $0,%al
	je 1f
	movl %cr0,%eax		/* no coprocessor: have to set bits */
	xorl $4,%eax		/* set EM */
	movl %eax,%cr0
	ret
	ALIGN
1:	movb $1,X86_HARD_MATH
	.byte 0xDB,0xE4		/* fsetpm for 287, ignored by 387 */
	ret

/*
 *  setup_idt
 *
 *  sets up a idt with 256 entries pointing to
 *  ignore_int, interrupt gates. It doesn't actually load
 *  idt - that can be done only after paging has been enabled
 *  and the kernel moved to PAGE_OFFSET. Interrupts
 *  are enabled elsewhere, when we can be relatively
 *  sure everything is ok.
 *
 *  Warning: %esi is live across this function.
 */
setup_idt:
	lea ignore_int,%edx
	movl $(__KERNEL_CS << 16),%eax
	movw %dx,%ax		/* selector = 0x0010 = cs */
	movw $0x8E00,%dx	/* interrupt gate - dpl=0, present */

	lea idt_table,%edi
	mov $256,%ecx
rp_sidt:
	movl %eax,(%edi)
	movl %edx,4(%edi)
	addl $8,%edi
	dec %ecx
	jne rp_sidt
416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431

.macro	set_early_handler handler,trapno
	lea \handler,%edx
	movl $(__KERNEL_CS << 16),%eax
	movw %dx,%ax
	movw $0x8E00,%dx	/* interrupt gate - dpl=0, present */
	lea idt_table,%edi
	movl %eax,8*\trapno(%edi)
	movl %edx,8*\trapno+4(%edi)
.endm

	set_early_handler handler=early_divide_err,trapno=0
	set_early_handler handler=early_illegal_opcode,trapno=6
	set_early_handler handler=early_protection_fault,trapno=13
	set_early_handler handler=early_page_fault,trapno=14

L
Linus Torvalds 已提交
432 433
	ret

434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454
early_divide_err:
	xor %edx,%edx
	pushl $0	/* fake errcode */
	jmp early_fault

early_illegal_opcode:
	movl $6,%edx
	pushl $0	/* fake errcode */
	jmp early_fault

early_protection_fault:
	movl $13,%edx
	jmp early_fault

early_page_fault:
	movl $14,%edx
	jmp early_fault

early_fault:
	cld
#ifdef CONFIG_PRINTK
I
Ingo Molnar 已提交
455
	pusha
456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475
	movl $(__KERNEL_DS),%eax
	movl %eax,%ds
	movl %eax,%es
	cmpl $2,early_recursion_flag
	je hlt_loop
	incl early_recursion_flag
	movl %cr2,%eax
	pushl %eax
	pushl %edx		/* trapno */
	pushl $fault_msg
#ifdef CONFIG_EARLY_PRINTK
	call early_printk
#else
	call printk
#endif
#endif
hlt_loop:
	hlt
	jmp hlt_loop

L
Linus Torvalds 已提交
476 477 478 479
/* This is the default interrupt "handler" :-) */
	ALIGN
ignore_int:
	cld
M
Matt Mackall 已提交
480
#ifdef CONFIG_PRINTK
L
Linus Torvalds 已提交
481 482 483 484 485 486 487 488
	pushl %eax
	pushl %ecx
	pushl %edx
	pushl %es
	pushl %ds
	movl $(__KERNEL_DS),%eax
	movl %eax,%ds
	movl %eax,%es
489 490 491
	cmpl $2,early_recursion_flag
	je hlt_loop
	incl early_recursion_flag
L
Linus Torvalds 已提交
492 493 494 495 496
	pushl 16(%esp)
	pushl 24(%esp)
	pushl 32(%esp)
	pushl 40(%esp)
	pushl $int_msg
497 498 499
#ifdef CONFIG_EARLY_PRINTK
	call early_printk
#else
L
Linus Torvalds 已提交
500
	call printk
501
#endif
L
Linus Torvalds 已提交
502 503 504 505 506 507
	addl $(5*4),%esp
	popl %ds
	popl %es
	popl %edx
	popl %ecx
	popl %eax
M
Matt Mackall 已提交
508
#endif
L
Linus Torvalds 已提交
509 510
	iret

511
.section .text
L
Linus Torvalds 已提交
512 513 514 515 516 517 518 519 520
/*
 * Real beginning of normal "text" segment
 */
ENTRY(stext)
ENTRY(_stext)

/*
 * BSS section
 */
521 522
.section ".bss.page_aligned","wa"
	.align PAGE_SIZE_asm
L
Linus Torvalds 已提交
523 524
ENTRY(swapper_pg_dir)
	.fill 1024,4,0
E
Eric W. Biderman 已提交
525 526
ENTRY(swapper_pg_pmd)
	.fill 1024,4,0
L
Linus Torvalds 已提交
527 528 529 530 531 532 533 534 535 536 537 538 539
ENTRY(empty_zero_page)
	.fill 4096,1,0

/*
 * This starts the data section.
 */
.data
ENTRY(stack_start)
	.long init_thread_union+THREAD_SIZE
	.long __BOOT_DS

ready:	.byte 0

540 541 542
early_recursion_flag:
	.long 0

L
Linus Torvalds 已提交
543 544 545
int_msg:
	.asciz "Unknown interrupt or fault at EIP %p %p %p\n"

546
fault_msg:
I
Ingo Molnar 已提交
547 548 549 550 551 552 553 554 555
	.ascii								\
/* fault info: */	"BUG: Int %d: CR2 %p\n"				\
/* pusha regs: */	"     EDI %p  ESI %p  EBP %p  ESP %p\n"		\
			"     EBX %p  EDX %p  ECX %p  EAX %p\n"		\
/* fault frame: */	"     err %p  EIP %p   CS %p  flg %p\n"		\
									\
			"Stack: %p %p %p %p %p %p %p %p\n"		\
			"       %p %p %p %p %p %p %p %p\n"		\
			"       %p %p %p %p %p %p %p %p\n"
556

T
Thomas Gleixner 已提交
557
#include "../../x86/xen/xen-head.S"
558

L
Linus Torvalds 已提交
559 560 561 562 563 564 565 566 567 568 569 570 571 572 573
/*
 * The IDT and GDT 'descriptors' are a strange 48-bit object
 * only used by the lidt and lgdt instructions. They are not
 * like usual segment descriptors - they consist of a 16-bit
 * segment size, and 32-bit linear address value:
 */

.globl boot_gdt_descr
.globl idt_descr

	ALIGN
# early boot GDT descriptor (must use 1:1 address mapping)
	.word 0				# 32 bit align gdt_desc.address
boot_gdt_descr:
	.word __BOOT_DS+7
574
	.long boot_gdt - __PAGE_OFFSET
L
Linus Torvalds 已提交
575 576 577 578 579 580 581 582

	.word 0				# 32-bit align idt_desc.address
idt_descr:
	.word IDT_ENTRIES*8-1		# idt contains 256 entries
	.long idt_table

# boot GDT descriptor (later on used by CPU#0):
	.word 0				# 32 bit align gdt_desc.address
583
ENTRY(early_gdt_descr)
L
Linus Torvalds 已提交
584
	.word GDT_ENTRIES*8-1
585
	.long per_cpu__gdt_page		/* Overwritten for secondary CPUs */
L
Linus Torvalds 已提交
586 587

/*
588
 * The boot_gdt must mirror the equivalent in setup.S and is
L
Linus Torvalds 已提交
589 590 591
 * used only for booting.
 */
	.align L1_CACHE_BYTES
592
ENTRY(boot_gdt)
L
Linus Torvalds 已提交
593 594 595
	.fill GDT_ENTRY_BOOT_CS,8,0
	.quad 0x00cf9a000000ffff	/* kernel 4GB code at 0x00000000 */
	.quad 0x00cf92000000ffff	/* kernel 4GB data at 0x00000000 */