entry.S 22.3 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
/*
 *  linux/arch/i386/entry.S
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 */

/*
 * entry.S contains the system-call and fault low-level handling routines.
 * This also contains the timer-interrupt handler, as well as all interrupts
 * and faults that can result in a task-switch.
 *
 * NOTE: This code handles signal-recognition, which happens every time
 * after a timer-interrupt and after each system call.
 *
 * I changed all the .align's to 4 (16 byte alignment), as that's faster
 * on a 486.
 *
 * Stack layout in 'ret_from_system_call':
 * 	ptrace needs to have all regs on the stack.
 *	if the order here is changed, it needs to be
 *	updated in fork.c:copy_process, signal.c:do_signal,
 *	ptrace.c and ptrace.h
 *
 *	 0(%esp) - %ebx
 *	 4(%esp) - %ecx
 *	 8(%esp) - %edx
 *       C(%esp) - %esi
 *	10(%esp) - %edi
 *	14(%esp) - %ebp
 *	18(%esp) - %eax
 *	1C(%esp) - %ds
 *	20(%esp) - %es
 *	24(%esp) - orig_eax
 *	28(%esp) - %eip
 *	2C(%esp) - %cs
 *	30(%esp) - %eflags
 *	34(%esp) - %oldesp
 *	38(%esp) - %oldss
 *
 * "current" is in register %ebx during any slow entries.
 */

#include <linux/linkage.h>
#include <asm/thread_info.h>
45
#include <asm/irqflags.h>
L
Linus Torvalds 已提交
46 47 48 49 50
#include <asm/errno.h>
#include <asm/segment.h>
#include <asm/smp.h>
#include <asm/page.h>
#include <asm/desc.h>
51
#include <asm/dwarf2.h>
L
Linus Torvalds 已提交
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
#include "irq_vectors.h"

#define nr_syscalls ((syscall_table_size)/4)

EBX		= 0x00
ECX		= 0x04
EDX		= 0x08
ESI		= 0x0C
EDI		= 0x10
EBP		= 0x14
EAX		= 0x18
DS		= 0x1C
ES		= 0x20
ORIG_EAX	= 0x24
EIP		= 0x28
CS		= 0x2C
EFLAGS		= 0x30
OLDESP		= 0x34
OLDSS		= 0x38

CF_MASK		= 0x00000001
TF_MASK		= 0x00000100
IF_MASK		= 0x00000200
DF_MASK		= 0x00000400 
NT_MASK		= 0x00004000
VM_MASK		= 0x00020000

79 80 81 82 83 84 85
/* These are replaces for paravirtualization */
#define DISABLE_INTERRUPTS		cli
#define ENABLE_INTERRUPTS		sti
#define ENABLE_INTERRUPTS_SYSEXIT	sti; sysexit
#define INTERRUPT_RETURN		iret
#define GET_CR0_INTO_EAX		movl %cr0, %eax

L
Linus Torvalds 已提交
86
#ifdef CONFIG_PREEMPT
87
#define preempt_stop		DISABLE_INTERRUPTS; TRACE_IRQS_OFF
L
Linus Torvalds 已提交
88 89 90 91 92
#else
#define preempt_stop
#define resume_kernel		restore_nocheck
#endif

93 94 95 96 97 98 99 100 101
.macro TRACE_IRQS_IRET
#ifdef CONFIG_TRACE_IRQFLAGS
	testl $IF_MASK,EFLAGS(%esp)     # interrupts off?
	jz 1f
	TRACE_IRQS_ON
1:
#endif
.endm

102 103 104 105 106 107
#ifdef CONFIG_VM86
#define resume_userspace_sig	check_userspace
#else
#define resume_userspace_sig	resume_userspace
#endif

L
Linus Torvalds 已提交
108 109 110
#define SAVE_ALL \
	cld; \
	pushl %es; \
111 112
	CFI_ADJUST_CFA_OFFSET 4;\
	/*CFI_REL_OFFSET es, 0;*/\
L
Linus Torvalds 已提交
113
	pushl %ds; \
114 115
	CFI_ADJUST_CFA_OFFSET 4;\
	/*CFI_REL_OFFSET ds, 0;*/\
L
Linus Torvalds 已提交
116
	pushl %eax; \
117 118
	CFI_ADJUST_CFA_OFFSET 4;\
	CFI_REL_OFFSET eax, 0;\
L
Linus Torvalds 已提交
119
	pushl %ebp; \
120 121
	CFI_ADJUST_CFA_OFFSET 4;\
	CFI_REL_OFFSET ebp, 0;\
L
Linus Torvalds 已提交
122
	pushl %edi; \
123 124
	CFI_ADJUST_CFA_OFFSET 4;\
	CFI_REL_OFFSET edi, 0;\
L
Linus Torvalds 已提交
125
	pushl %esi; \
126 127
	CFI_ADJUST_CFA_OFFSET 4;\
	CFI_REL_OFFSET esi, 0;\
L
Linus Torvalds 已提交
128
	pushl %edx; \
129 130
	CFI_ADJUST_CFA_OFFSET 4;\
	CFI_REL_OFFSET edx, 0;\
L
Linus Torvalds 已提交
131
	pushl %ecx; \
132 133
	CFI_ADJUST_CFA_OFFSET 4;\
	CFI_REL_OFFSET ecx, 0;\
L
Linus Torvalds 已提交
134
	pushl %ebx; \
135 136
	CFI_ADJUST_CFA_OFFSET 4;\
	CFI_REL_OFFSET ebx, 0;\
L
Linus Torvalds 已提交
137 138 139 140 141 142
	movl $(__USER_DS), %edx; \
	movl %edx, %ds; \
	movl %edx, %es;

#define RESTORE_INT_REGS \
	popl %ebx;	\
143 144
	CFI_ADJUST_CFA_OFFSET -4;\
	CFI_RESTORE ebx;\
L
Linus Torvalds 已提交
145
	popl %ecx;	\
146 147
	CFI_ADJUST_CFA_OFFSET -4;\
	CFI_RESTORE ecx;\
L
Linus Torvalds 已提交
148
	popl %edx;	\
149 150
	CFI_ADJUST_CFA_OFFSET -4;\
	CFI_RESTORE edx;\
L
Linus Torvalds 已提交
151
	popl %esi;	\
152 153
	CFI_ADJUST_CFA_OFFSET -4;\
	CFI_RESTORE esi;\
L
Linus Torvalds 已提交
154
	popl %edi;	\
155 156
	CFI_ADJUST_CFA_OFFSET -4;\
	CFI_RESTORE edi;\
L
Linus Torvalds 已提交
157
	popl %ebp;	\
158 159 160 161 162
	CFI_ADJUST_CFA_OFFSET -4;\
	CFI_RESTORE ebp;\
	popl %eax;	\
	CFI_ADJUST_CFA_OFFSET -4;\
	CFI_RESTORE eax
L
Linus Torvalds 已提交
163 164 165 166

#define RESTORE_REGS	\
	RESTORE_INT_REGS; \
1:	popl %ds;	\
167 168
	CFI_ADJUST_CFA_OFFSET -4;\
	/*CFI_RESTORE ds;*/\
L
Linus Torvalds 已提交
169
2:	popl %es;	\
170 171
	CFI_ADJUST_CFA_OFFSET -4;\
	/*CFI_RESTORE es;*/\
L
Linus Torvalds 已提交
172 173 174 175 176 177 178 179 180 181 182 183
.section .fixup,"ax";	\
3:	movl $0,(%esp);	\
	jmp 1b;		\
4:	movl $0,(%esp);	\
	jmp 2b;		\
.previous;		\
.section __ex_table,"a";\
	.align 4;	\
	.long 1b,3b;	\
	.long 2b,4b;	\
.previous

184 185
#define RING0_INT_FRAME \
	CFI_STARTPROC simple;\
186
	CFI_SIGNAL_FRAME;\
187 188 189 190 191 192
	CFI_DEF_CFA esp, 3*4;\
	/*CFI_OFFSET cs, -2*4;*/\
	CFI_OFFSET eip, -3*4

#define RING0_EC_FRAME \
	CFI_STARTPROC simple;\
193
	CFI_SIGNAL_FRAME;\
194 195 196 197 198 199
	CFI_DEF_CFA esp, 4*4;\
	/*CFI_OFFSET cs, -2*4;*/\
	CFI_OFFSET eip, -3*4

#define RING0_PTREGS_FRAME \
	CFI_STARTPROC simple;\
200
	CFI_SIGNAL_FRAME;\
201 202 203 204 205 206 207 208 209 210 211 212
	CFI_DEF_CFA esp, OLDESP-EBX;\
	/*CFI_OFFSET cs, CS-OLDESP;*/\
	CFI_OFFSET eip, EIP-OLDESP;\
	/*CFI_OFFSET es, ES-OLDESP;*/\
	/*CFI_OFFSET ds, DS-OLDESP;*/\
	CFI_OFFSET eax, EAX-OLDESP;\
	CFI_OFFSET ebp, EBP-OLDESP;\
	CFI_OFFSET edi, EDI-OLDESP;\
	CFI_OFFSET esi, ESI-OLDESP;\
	CFI_OFFSET edx, EDX-OLDESP;\
	CFI_OFFSET ecx, ECX-OLDESP;\
	CFI_OFFSET ebx, EBX-OLDESP
L
Linus Torvalds 已提交
213 214

ENTRY(ret_from_fork)
215
	CFI_STARTPROC
L
Linus Torvalds 已提交
216
	pushl %eax
217
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
218 219 220
	call schedule_tail
	GET_THREAD_INFO(%ebp)
	popl %eax
221
	CFI_ADJUST_CFA_OFFSET -4
222 223 224 225
	pushl $0x0202			# Reset kernel eflags
	CFI_ADJUST_CFA_OFFSET 4
	popfl
	CFI_ADJUST_CFA_OFFSET -4
L
Linus Torvalds 已提交
226
	jmp syscall_exit
227
	CFI_ENDPROC
L
Linus Torvalds 已提交
228 229 230 231 232 233 234 235 236 237

/*
 * Return to user mode is not as complex as all this looks,
 * but we want the default path for a system call return to
 * go as quickly as possible which is why some of this is
 * less clear than it otherwise should be.
 */

	# userspace resumption stub bypassing syscall exit tracing
	ALIGN
238
	RING0_PTREGS_FRAME
L
Linus Torvalds 已提交
239 240 241 242
ret_from_exception:
	preempt_stop
ret_from_intr:
	GET_THREAD_INFO(%ebp)
243
check_userspace:
L
Linus Torvalds 已提交
244 245
	movl EFLAGS(%esp), %eax		# mix EFLAGS and CS
	movb CS(%esp), %al
246 247 248
	andl $(VM_MASK | SEGMENT_RPL_MASK), %eax
	cmpl $USER_RPL, %eax
	jb resume_kernel		# not returning to v8086 or userspace
L
Linus Torvalds 已提交
249
ENTRY(resume_userspace)
250
 	DISABLE_INTERRUPTS		# make sure we don't miss an interrupt
L
Linus Torvalds 已提交
251 252 253 254 255 256 257 258 259 260
					# setting need_resched or sigpending
					# between sampling and the iret
	movl TI_flags(%ebp), %ecx
	andl $_TIF_WORK_MASK, %ecx	# is there any work to be done on
					# int/exception return?
	jne work_pending
	jmp restore_all

#ifdef CONFIG_PREEMPT
ENTRY(resume_kernel)
261
	DISABLE_INTERRUPTS
L
Linus Torvalds 已提交
262 263 264 265 266 267 268 269 270 271 272
	cmpl $0,TI_preempt_count(%ebp)	# non-zero preempt_count ?
	jnz restore_nocheck
need_resched:
	movl TI_flags(%ebp), %ecx	# need_resched set ?
	testb $_TIF_NEED_RESCHED, %cl
	jz restore_all
	testl $IF_MASK,EFLAGS(%esp)     # interrupts off (exception path) ?
	jz restore_all
	call preempt_schedule_irq
	jmp need_resched
#endif
273
	CFI_ENDPROC
L
Linus Torvalds 已提交
274 275 276 277 278 279

/* SYSENTER_RETURN points to after the "sysenter" instruction in
   the vsyscall page.  See vsyscall-sysentry.S, which defines the symbol.  */

	# sysenter call handler stub
ENTRY(sysenter_entry)
280
	CFI_STARTPROC simple
281
	CFI_SIGNAL_FRAME
282 283
	CFI_DEF_CFA esp, 0
	CFI_REGISTER esp, ebp
L
Linus Torvalds 已提交
284 285
	movl TSS_sysenter_esp0(%esp),%esp
sysenter_past_esp:
286 287 288 289
	/*
	 * No need to follow this irqs on/off section: the syscall
	 * disabled irqs and here we enable it straight after entry:
	 */
290
	ENABLE_INTERRUPTS
L
Linus Torvalds 已提交
291
	pushl $(__USER_DS)
292 293
	CFI_ADJUST_CFA_OFFSET 4
	/*CFI_REL_OFFSET ss, 0*/
L
Linus Torvalds 已提交
294
	pushl %ebp
295 296
	CFI_ADJUST_CFA_OFFSET 4
	CFI_REL_OFFSET esp, 0
L
Linus Torvalds 已提交
297
	pushfl
298
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
299
	pushl $(__USER_CS)
300 301
	CFI_ADJUST_CFA_OFFSET 4
	/*CFI_REL_OFFSET cs, 0*/
302 303 304 305 306 307
	/*
	 * Push current_thread_info()->sysenter_return to the stack.
	 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
	 * pushed above; +8 corresponds to copy_thread's esp0 setting.
	 */
	pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
308 309
	CFI_ADJUST_CFA_OFFSET 4
	CFI_REL_OFFSET eip, 0
L
Linus Torvalds 已提交
310 311 312 313 314 315 316 317 318 319 320 321 322 323

/*
 * Load the potential sixth argument from user stack.
 * Careful about security.
 */
	cmpl $__PAGE_OFFSET-3,%ebp
	jae syscall_fault
1:	movl (%ebp),%ebp
.section __ex_table,"a"
	.align 4
	.long 1b,syscall_fault
.previous

	pushl %eax
324
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
325 326 327 328
	SAVE_ALL
	GET_THREAD_INFO(%ebp)

	/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
329
	testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
L
Linus Torvalds 已提交
330 331 332 333 334
	jnz syscall_trace_entry
	cmpl $(nr_syscalls), %eax
	jae syscall_badsys
	call *sys_call_table(,%eax,4)
	movl %eax,EAX(%esp)
335
	DISABLE_INTERRUPTS
336
	TRACE_IRQS_OFF
L
Linus Torvalds 已提交
337 338 339 340 341 342 343
	movl TI_flags(%ebp), %ecx
	testw $_TIF_ALLWORK_MASK, %cx
	jne syscall_exit_work
/* if something modifies registers it must also disable sysexit */
	movl EIP(%esp), %edx
	movl OLDESP(%esp), %ecx
	xorl %ebp,%ebp
344
	TRACE_IRQS_ON
345
	ENABLE_INTERRUPTS_SYSEXIT
346
	CFI_ENDPROC
L
Linus Torvalds 已提交
347 348 349 350


	# system call handler stub
ENTRY(system_call)
351
	RING0_INT_FRAME			# can't unwind into user space anyway
L
Linus Torvalds 已提交
352
	pushl %eax			# save orig_eax
353
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
354 355
	SAVE_ALL
	GET_THREAD_INFO(%ebp)
356 357 358 359
	testl $TF_MASK,EFLAGS(%esp)
	jz no_singlestep
	orl $_TIF_SINGLESTEP,TI_flags(%ebp)
no_singlestep:
360
					# system call tracing in operation / emulation
L
Linus Torvalds 已提交
361
	/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
362
	testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
L
Linus Torvalds 已提交
363 364 365 366 367 368 369
	jnz syscall_trace_entry
	cmpl $(nr_syscalls), %eax
	jae syscall_badsys
syscall_call:
	call *sys_call_table(,%eax,4)
	movl %eax,EAX(%esp)		# store the return value
syscall_exit:
370
	DISABLE_INTERRUPTS		# make sure we don't miss an interrupt
L
Linus Torvalds 已提交
371 372
					# setting need_resched or sigpending
					# between sampling and the iret
373
	TRACE_IRQS_OFF
L
Linus Torvalds 已提交
374 375 376 377 378 379
	movl TI_flags(%ebp), %ecx
	testw $_TIF_ALLWORK_MASK, %cx	# current->work
	jne syscall_exit_work

restore_all:
	movl EFLAGS(%esp), %eax		# mix EFLAGS, SS and CS
380 381 382
	# Warning: OLDSS(%esp) contains the wrong/random values if we
	# are returning to the kernel.
	# See comments in process.c:copy_thread() for details.
L
Linus Torvalds 已提交
383 384
	movb OLDSS(%esp), %ah
	movb CS(%esp), %al
385 386
	andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
	cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
387
	CFI_REMEMBER_STATE
L
Linus Torvalds 已提交
388 389
	je ldt_ss			# returning to user-space with LDT SS
restore_nocheck:
390 391
	TRACE_IRQS_IRET
restore_nocheck_notrace:
L
Linus Torvalds 已提交
392 393
	RESTORE_REGS
	addl $4, %esp
394
	CFI_ADJUST_CFA_OFFSET -4
395
1:	INTERRUPT_RETURN
L
Linus Torvalds 已提交
396 397
.section .fixup,"ax"
iret_exc:
398
	TRACE_IRQS_ON
399
	ENABLE_INTERRUPTS
400 401 402
	pushl $0			# no error code
	pushl $do_iret_error
	jmp error_code
L
Linus Torvalds 已提交
403 404 405 406 407 408
.previous
.section __ex_table,"a"
	.align 4
	.long 1b,iret_exc
.previous

409
	CFI_RESTORE_STATE
L
Linus Torvalds 已提交
410 411 412 413 414 415 416 417 418 419 420 421
ldt_ss:
	larl OLDSS(%esp), %eax
	jnz restore_nocheck
	testl $0x00400000, %eax		# returning to 32bit stack?
	jnz restore_nocheck		# allright, normal return
	/* If returning to userspace with 16bit stack,
	 * try to fix the higher word of ESP, as the CPU
	 * won't restore it.
	 * This is an "official" bug of all the x86-compatible
	 * CPUs, which we can try to work around to make
	 * dosemu and wine happy. */
	subl $8, %esp		# reserve space for switch16 pointer
422
	CFI_ADJUST_CFA_OFFSET 8
423
	DISABLE_INTERRUPTS
424
	TRACE_IRQS_OFF
L
Linus Torvalds 已提交
425 426 427 428
	movl %esp, %eax
	/* Set up the 16bit stack frame with switch32 pointer on top,
	 * and a switch16 pointer on top of the current frame. */
	call setup_x86_bogus_stack
429
	CFI_ADJUST_CFA_OFFSET -8	# frame has moved
430
	TRACE_IRQS_IRET
L
Linus Torvalds 已提交
431 432
	RESTORE_REGS
	lss 20+4(%esp), %esp	# switch to 16bit stack
433
1:	INTERRUPT_RETURN
L
Linus Torvalds 已提交
434 435 436 437
.section __ex_table,"a"
	.align 4
	.long 1b,iret_exc
.previous
438
	CFI_ENDPROC
L
Linus Torvalds 已提交
439 440 441

	# perform work that needs to be done immediately before resumption
	ALIGN
442
	RING0_PTREGS_FRAME		# can't unwind into user space anyway
L
Linus Torvalds 已提交
443 444 445 446 447
work_pending:
	testb $_TIF_NEED_RESCHED, %cl
	jz work_notifysig
work_resched:
	call schedule
448
	DISABLE_INTERRUPTS		# make sure we don't miss an interrupt
L
Linus Torvalds 已提交
449 450
					# setting need_resched or sigpending
					# between sampling and the iret
451
	TRACE_IRQS_OFF
L
Linus Torvalds 已提交
452 453 454 455 456 457 458 459 460 461 462 463 464 465 466
	movl TI_flags(%ebp), %ecx
	andl $_TIF_WORK_MASK, %ecx	# is there any work to be done other
					# than syscall tracing?
	jz restore_all
	testb $_TIF_NEED_RESCHED, %cl
	jnz work_resched

work_notifysig:				# deal with pending signals and
					# notify-resume requests
	testl $VM_MASK, EFLAGS(%esp)
	movl %esp, %eax
	jne work_notifysig_v86		# returning to kernel-space or
					# vm86-space
	xorl %edx, %edx
	call do_notify_resume
467
	jmp resume_userspace_sig
L
Linus Torvalds 已提交
468 469 470

	ALIGN
work_notifysig_v86:
471
#ifdef CONFIG_VM86
L
Linus Torvalds 已提交
472
	pushl %ecx			# save ti_flags for do_notify_resume
473
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
474 475
	call save_v86_state		# %eax contains pt_regs pointer
	popl %ecx
476
	CFI_ADJUST_CFA_OFFSET -4
L
Linus Torvalds 已提交
477 478 479
	movl %eax, %esp
	xorl %edx, %edx
	call do_notify_resume
480
	jmp resume_userspace_sig
481
#endif
L
Linus Torvalds 已提交
482 483 484 485 486 487 488 489

	# perform syscall exit tracing
	ALIGN
syscall_trace_entry:
	movl $-ENOSYS,EAX(%esp)
	movl %esp, %eax
	xorl %edx,%edx
	call do_syscall_trace
490
	cmpl $0, %eax
491
	jne resume_userspace		# ret != 0 -> running under PTRACE_SYSEMU,
492
					# so must skip actual syscall
L
Linus Torvalds 已提交
493 494 495 496 497 498 499 500 501 502
	movl ORIG_EAX(%esp), %eax
	cmpl $(nr_syscalls), %eax
	jnae syscall_call
	jmp syscall_exit

	# perform syscall exit tracing
	ALIGN
syscall_exit_work:
	testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
	jz work_pending
503
	TRACE_IRQS_ON
504
	ENABLE_INTERRUPTS		# could let do_syscall_trace() call
L
Linus Torvalds 已提交
505 506 507 508 509
					# schedule() instead
	movl %esp, %eax
	movl $1, %edx
	call do_syscall_trace
	jmp resume_userspace
510
	CFI_ENDPROC
L
Linus Torvalds 已提交
511

512
	RING0_INT_FRAME			# can't unwind into user space anyway
L
Linus Torvalds 已提交
513 514
syscall_fault:
	pushl %eax			# save orig_eax
515
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
516 517 518 519 520 521 522 523
	SAVE_ALL
	GET_THREAD_INFO(%ebp)
	movl $-EFAULT,EAX(%esp)
	jmp resume_userspace

syscall_badsys:
	movl $-ENOSYS,EAX(%esp)
	jmp resume_userspace
524
	CFI_ENDPROC
L
Linus Torvalds 已提交
525 526 527 528 529 530 531 532 533 534 535

#define FIXUP_ESPFIX_STACK \
	movl %esp, %eax; \
	/* switch to 32bit stack using the pointer on top of 16bit stack */ \
	lss %ss:CPU_16BIT_STACK_SIZE-8, %esp; \
	/* copy data from 16bit stack to 32bit stack */ \
	call fixup_x86_bogus_stack; \
	/* put ESP to the proper location */ \
	movl %eax, %esp;
#define UNWIND_ESPFIX_STACK \
	pushl %eax; \
536
	CFI_ADJUST_CFA_OFFSET 4; \
L
Linus Torvalds 已提交
537 538 539
	movl %ss, %eax; \
	/* see if on 16bit stack */ \
	cmpw $__ESPFIX_SS, %ax; \
540 541 542 543 544 545 546
	je 28f; \
27:	popl %eax; \
	CFI_ADJUST_CFA_OFFSET -4; \
.section .fixup,"ax"; \
28:	movl $__KERNEL_DS, %eax; \
	movl %eax, %ds; \
	movl %eax, %es; \
L
Linus Torvalds 已提交
547
	/* switch to 32bit stack */ \
548 549 550
	FIXUP_ESPFIX_STACK; \
	jmp 27b; \
.previous
L
Linus Torvalds 已提交
551 552 553 554 555 556 557 558 559 560 561

/*
 * Build the entry stubs and pointer table with
 * some assembler magic.
 */
.data
ENTRY(interrupt)
.text

vector=0
ENTRY(irq_entries_start)
562
	RING0_INT_FRAME
L
Linus Torvalds 已提交
563 564
.rept NR_IRQS
	ALIGN
565 566 567
 .if vector
	CFI_ADJUST_CFA_OFFSET -4
 .endif
568
1:	pushl $~(vector)
569
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
570 571 572 573 574 575 576
	jmp common_interrupt
.data
	.long 1b
.text
vector=vector+1
.endr

577 578 579 580
/*
 * the CPU automatically disables interrupts when executing an IRQ vector,
 * so IRQ-flags tracing has to follow that:
 */
L
Linus Torvalds 已提交
581 582 583
	ALIGN
common_interrupt:
	SAVE_ALL
584
	TRACE_IRQS_OFF
L
Linus Torvalds 已提交
585 586 587
	movl %esp,%eax
	call do_IRQ
	jmp ret_from_intr
588
	CFI_ENDPROC
L
Linus Torvalds 已提交
589 590 591

#define BUILD_INTERRUPT(name, nr)	\
ENTRY(name)				\
592
	RING0_INT_FRAME;		\
593
	pushl $~(nr);			\
594 595
	CFI_ADJUST_CFA_OFFSET 4;	\
	SAVE_ALL;			\
596
	TRACE_IRQS_OFF			\
L
Linus Torvalds 已提交
597 598
	movl %esp,%eax;			\
	call smp_/**/name;		\
599
	jmp ret_from_intr;		\
600
	CFI_ENDPROC
L
Linus Torvalds 已提交
601 602 603 604

/* The include is where all of the SMP etc. interrupts come from */
#include "entry_arch.h"

605 606 607
KPROBE_ENTRY(page_fault)
	RING0_EC_FRAME
	pushl $do_page_fault
608
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
609 610 611
	ALIGN
error_code:
	pushl %ds
612 613
	CFI_ADJUST_CFA_OFFSET 4
	/*CFI_REL_OFFSET ds, 0*/
L
Linus Torvalds 已提交
614
	pushl %eax
615 616
	CFI_ADJUST_CFA_OFFSET 4
	CFI_REL_OFFSET eax, 0
L
Linus Torvalds 已提交
617 618
	xorl %eax, %eax
	pushl %ebp
619 620
	CFI_ADJUST_CFA_OFFSET 4
	CFI_REL_OFFSET ebp, 0
L
Linus Torvalds 已提交
621
	pushl %edi
622 623
	CFI_ADJUST_CFA_OFFSET 4
	CFI_REL_OFFSET edi, 0
L
Linus Torvalds 已提交
624
	pushl %esi
625 626
	CFI_ADJUST_CFA_OFFSET 4
	CFI_REL_OFFSET esi, 0
L
Linus Torvalds 已提交
627
	pushl %edx
628 629
	CFI_ADJUST_CFA_OFFSET 4
	CFI_REL_OFFSET edx, 0
L
Linus Torvalds 已提交
630 631
	decl %eax			# eax = -1
	pushl %ecx
632 633
	CFI_ADJUST_CFA_OFFSET 4
	CFI_REL_OFFSET ecx, 0
L
Linus Torvalds 已提交
634
	pushl %ebx
635 636
	CFI_ADJUST_CFA_OFFSET 4
	CFI_REL_OFFSET ebx, 0
L
Linus Torvalds 已提交
637 638
	cld
	pushl %es
639 640
	CFI_ADJUST_CFA_OFFSET 4
	/*CFI_REL_OFFSET es, 0*/
L
Linus Torvalds 已提交
641 642
	UNWIND_ESPFIX_STACK
	popl %ecx
643 644
	CFI_ADJUST_CFA_OFFSET -4
	/*CFI_REGISTER es, ecx*/
L
Linus Torvalds 已提交
645 646 647 648
	movl ES(%esp), %edi		# get the function address
	movl ORIG_EAX(%esp), %edx	# get the error code
	movl %eax, ORIG_EAX(%esp)
	movl %ecx, ES(%esp)
649
	/*CFI_REL_OFFSET es, ES*/
L
Linus Torvalds 已提交
650 651 652 653 654 655
	movl $(__USER_DS), %ecx
	movl %ecx, %ds
	movl %ecx, %es
	movl %esp,%eax			# pt_regs pointer
	call *%edi
	jmp ret_from_exception
656
	CFI_ENDPROC
657
KPROBE_END(page_fault)
L
Linus Torvalds 已提交
658 659

ENTRY(coprocessor_error)
660
	RING0_INT_FRAME
L
Linus Torvalds 已提交
661
	pushl $0
662
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
663
	pushl $do_coprocessor_error
664
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
665
	jmp error_code
666
	CFI_ENDPROC
L
Linus Torvalds 已提交
667 668

ENTRY(simd_coprocessor_error)
669
	RING0_INT_FRAME
L
Linus Torvalds 已提交
670
	pushl $0
671
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
672
	pushl $do_simd_coprocessor_error
673
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
674
	jmp error_code
675
	CFI_ENDPROC
L
Linus Torvalds 已提交
676 677

ENTRY(device_not_available)
678
	RING0_INT_FRAME
L
Linus Torvalds 已提交
679
	pushl $-1			# mark this as an int
680
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
681
	SAVE_ALL
682
	GET_CR0_INTO_EAX
L
Linus Torvalds 已提交
683 684 685 686 687 688 689
	testl $0x4, %eax		# EM (math emulation bit)
	jne device_not_available_emulate
	preempt_stop
	call math_state_restore
	jmp ret_from_exception
device_not_available_emulate:
	pushl $0			# temporary storage for ORIG_EIP
690
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
691 692
	call math_emulate
	addl $4, %esp
693
	CFI_ADJUST_CFA_OFFSET -4
L
Linus Torvalds 已提交
694
	jmp ret_from_exception
695
	CFI_ENDPROC
L
Linus Torvalds 已提交
696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714

/*
 * Debug traps and NMI can happen at the one SYSENTER instruction
 * that sets up the real kernel stack. Check here, since we can't
 * allow the wrong stack to be used.
 *
 * "TSS_sysenter_esp0+12" is because the NMI/debug handler will have
 * already pushed 3 words if it hits on the sysenter instruction:
 * eflags, cs and eip.
 *
 * We just load the right stack, and push the three (known) values
 * by hand onto the new stack - while updating the return eip past
 * the instruction that would have done it for sysenter.
 */
#define FIX_STACK(offset, ok, label)		\
	cmpw $__KERNEL_CS,4(%esp);		\
	jne ok;					\
label:						\
	movl TSS_sysenter_esp0+offset(%esp),%esp;	\
715 716
	CFI_DEF_CFA esp, 0;			\
	CFI_UNDEFINED eip;			\
L
Linus Torvalds 已提交
717
	pushfl;					\
718
	CFI_ADJUST_CFA_OFFSET 4;		\
L
Linus Torvalds 已提交
719
	pushl $__KERNEL_CS;			\
720 721 722 723
	CFI_ADJUST_CFA_OFFSET 4;		\
	pushl $sysenter_past_esp;		\
	CFI_ADJUST_CFA_OFFSET 4;		\
	CFI_REL_OFFSET eip, 0
L
Linus Torvalds 已提交
724

725
KPROBE_ENTRY(debug)
726
	RING0_INT_FRAME
L
Linus Torvalds 已提交
727 728 729 730 731
	cmpl $sysenter_entry,(%esp)
	jne debug_stack_correct
	FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
debug_stack_correct:
	pushl $-1			# mark this as an int
732
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
733 734 735 736 737
	SAVE_ALL
	xorl %edx,%edx			# error code 0
	movl %esp,%eax			# pt_regs pointer
	call do_debug
	jmp ret_from_exception
738
	CFI_ENDPROC
739 740
KPROBE_END(debug)

L
Linus Torvalds 已提交
741 742 743 744 745 746 747 748
/*
 * NMI is doubly nasty. It can happen _while_ we're handling
 * a debug fault, and the debug fault hasn't yet been able to
 * clear up the stack. So we first check whether we got  an
 * NMI on the sysenter entry path, but after that we need to
 * check whether we got an NMI on the debug path where the debug
 * fault happened on the sysenter path.
 */
749
KPROBE_ENTRY(nmi)
750
	RING0_INT_FRAME
L
Linus Torvalds 已提交
751
	pushl %eax
752
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
753 754 755
	movl %ss, %eax
	cmpw $__ESPFIX_SS, %ax
	popl %eax
756
	CFI_ADJUST_CFA_OFFSET -4
L
Linus Torvalds 已提交
757 758 759 760
	je nmi_16bit_stack
	cmpl $sysenter_entry,(%esp)
	je nmi_stack_fixup
	pushl %eax
761
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
762 763 764 765 766 767 768
	movl %esp,%eax
	/* Do not access memory above the end of our stack page,
	 * it might not exist.
	 */
	andl $(THREAD_SIZE-1),%eax
	cmpl $(THREAD_SIZE-20),%eax
	popl %eax
769
	CFI_ADJUST_CFA_OFFSET -4
L
Linus Torvalds 已提交
770 771 772 773
	jae nmi_stack_correct
	cmpl $sysenter_entry,12(%esp)
	je nmi_debug_stack_check
nmi_stack_correct:
774
	/* We have a RING0_INT_FRAME here */
L
Linus Torvalds 已提交
775
	pushl %eax
776
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
777 778 779 780
	SAVE_ALL
	xorl %edx,%edx		# zero error code
	movl %esp,%eax		# pt_regs pointer
	call do_nmi
781
	jmp restore_nocheck_notrace
782
	CFI_ENDPROC
L
Linus Torvalds 已提交
783 784

nmi_stack_fixup:
785
	RING0_INT_FRAME
L
Linus Torvalds 已提交
786 787
	FIX_STACK(12,nmi_stack_correct, 1)
	jmp nmi_stack_correct
788

L
Linus Torvalds 已提交
789
nmi_debug_stack_check:
790
	/* We have a RING0_INT_FRAME here */
L
Linus Torvalds 已提交
791 792
	cmpw $__KERNEL_CS,16(%esp)
	jne nmi_stack_correct
793 794
	cmpl $debug,(%esp)
	jb nmi_stack_correct
L
Linus Torvalds 已提交
795
	cmpl $debug_esp_fix_insn,(%esp)
796
	ja nmi_stack_correct
L
Linus Torvalds 已提交
797 798 799 800
	FIX_STACK(24,nmi_stack_correct, 1)
	jmp nmi_stack_correct

nmi_16bit_stack:
801 802 803 804
	/* We have a RING0_INT_FRAME here.
	 *
	 * create the pointer to lss back
	 */
L
Linus Torvalds 已提交
805
	pushl %ss
806
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
807
	pushl %esp
808
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
809 810 811 812 813
	movzwl %sp, %esp
	addw $4, (%esp)
	/* copy the iret frame of 12 bytes */
	.rept 3
	pushl 16(%esp)
814
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
815 816
	.endr
	pushl %eax
817
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
818 819
	SAVE_ALL
	FIXUP_ESPFIX_STACK		# %eax == %esp
820
	CFI_ADJUST_CFA_OFFSET -20	# the frame has now moved
L
Linus Torvalds 已提交
821 822 823 824
	xorl %edx,%edx			# zero error code
	call do_nmi
	RESTORE_REGS
	lss 12+4(%esp), %esp		# back to 16bit stack
825
1:	INTERRUPT_RETURN
826
	CFI_ENDPROC
L
Linus Torvalds 已提交
827 828 829 830
.section __ex_table,"a"
	.align 4
	.long 1b,iret_exc
.previous
831
KPROBE_END(nmi)
L
Linus Torvalds 已提交
832

833
KPROBE_ENTRY(int3)
834
	RING0_INT_FRAME
L
Linus Torvalds 已提交
835
	pushl $-1			# mark this as an int
836
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
837 838 839 840 841
	SAVE_ALL
	xorl %edx,%edx		# zero error code
	movl %esp,%eax		# pt_regs pointer
	call do_int3
	jmp ret_from_exception
842
	CFI_ENDPROC
843
KPROBE_END(int3)
L
Linus Torvalds 已提交
844 845

ENTRY(overflow)
846
	RING0_INT_FRAME
L
Linus Torvalds 已提交
847
	pushl $0
848
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
849
	pushl $do_overflow
850
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
851
	jmp error_code
852
	CFI_ENDPROC
L
Linus Torvalds 已提交
853 854

ENTRY(bounds)
855
	RING0_INT_FRAME
L
Linus Torvalds 已提交
856
	pushl $0
857
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
858
	pushl $do_bounds
859
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
860
	jmp error_code
861
	CFI_ENDPROC
L
Linus Torvalds 已提交
862 863

ENTRY(invalid_op)
864
	RING0_INT_FRAME
L
Linus Torvalds 已提交
865
	pushl $0
866
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
867
	pushl $do_invalid_op
868
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
869
	jmp error_code
870
	CFI_ENDPROC
L
Linus Torvalds 已提交
871 872

ENTRY(coprocessor_segment_overrun)
873
	RING0_INT_FRAME
L
Linus Torvalds 已提交
874
	pushl $0
875
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
876
	pushl $do_coprocessor_segment_overrun
877
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
878
	jmp error_code
879
	CFI_ENDPROC
L
Linus Torvalds 已提交
880 881

ENTRY(invalid_TSS)
882
	RING0_EC_FRAME
L
Linus Torvalds 已提交
883
	pushl $do_invalid_TSS
884
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
885
	jmp error_code
886
	CFI_ENDPROC
L
Linus Torvalds 已提交
887 888

ENTRY(segment_not_present)
889
	RING0_EC_FRAME
L
Linus Torvalds 已提交
890
	pushl $do_segment_not_present
891
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
892
	jmp error_code
893
	CFI_ENDPROC
L
Linus Torvalds 已提交
894 895

ENTRY(stack_segment)
896
	RING0_EC_FRAME
L
Linus Torvalds 已提交
897
	pushl $do_stack_segment
898
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
899
	jmp error_code
900
	CFI_ENDPROC
L
Linus Torvalds 已提交
901

902
KPROBE_ENTRY(general_protection)
903
	RING0_EC_FRAME
L
Linus Torvalds 已提交
904
	pushl $do_general_protection
905
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
906
	jmp error_code
907
	CFI_ENDPROC
908
KPROBE_END(general_protection)
L
Linus Torvalds 已提交
909 910

ENTRY(alignment_check)
911
	RING0_EC_FRAME
L
Linus Torvalds 已提交
912
	pushl $do_alignment_check
913
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
914
	jmp error_code
915
	CFI_ENDPROC
L
Linus Torvalds 已提交
916

917 918 919 920 921
ENTRY(divide_error)
	RING0_INT_FRAME
	pushl $0			# no error code
	CFI_ADJUST_CFA_OFFSET 4
	pushl $do_divide_error
922
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
923
	jmp error_code
924
	CFI_ENDPROC
L
Linus Torvalds 已提交
925 926 927

#ifdef CONFIG_X86_MCE
ENTRY(machine_check)
928
	RING0_INT_FRAME
L
Linus Torvalds 已提交
929
	pushl $0
930
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
931
	pushl machine_check_vector
932
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
933
	jmp error_code
934
	CFI_ENDPROC
L
Linus Torvalds 已提交
935 936 937
#endif

ENTRY(spurious_interrupt_bug)
938
	RING0_INT_FRAME
L
Linus Torvalds 已提交
939
	pushl $0
940
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
941
	pushl $do_spurious_interrupt_bug
942
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
943
	jmp error_code
944
	CFI_ENDPROC
L
Linus Torvalds 已提交
945

946 947
#ifdef CONFIG_STACK_UNWIND
ENTRY(arch_unwind_init_running)
948
	CFI_STARTPROC
949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972
	movl	4(%esp), %edx
	movl	(%esp), %ecx
	leal	4(%esp), %eax
	movl	%ebx, EBX(%edx)
	xorl	%ebx, %ebx
	movl	%ebx, ECX(%edx)
	movl	%ebx, EDX(%edx)
	movl	%esi, ESI(%edx)
	movl	%edi, EDI(%edx)
	movl	%ebp, EBP(%edx)
	movl	%ebx, EAX(%edx)
	movl	$__USER_DS, DS(%edx)
	movl	$__USER_DS, ES(%edx)
	movl	%ebx, ORIG_EAX(%edx)
	movl	%ecx, EIP(%edx)
	movl	12(%esp), %ecx
	movl	$__KERNEL_CS, CS(%edx)
	movl	%ebx, EFLAGS(%edx)
	movl	%eax, OLDESP(%edx)
	movl	8(%esp), %eax
	movl	%ecx, 8(%esp)
	movl	EBX(%edx), %ebx
	movl	$__KERNEL_DS, OLDSS(%edx)
	jmpl	*%eax
973
	CFI_ENDPROC
974 975 976
ENDPROC(arch_unwind_init_running)
#endif

977 978 979 980 981 982 983 984 985 986 987 988 989
ENTRY(kernel_thread_helper)
	pushl $0		# fake return address for unwinder
	CFI_STARTPROC
	movl %edx,%eax
	push %edx
	CFI_ADJUST_CFA_OFFSET 4
	call *%ebx
	push %eax
	CFI_ADJUST_CFA_OFFSET 4
	call do_exit
	CFI_ENDPROC
ENDPROC(kernel_thread_helper)

990
.section .rodata,"a"
991
#include "syscall_table.S"
L
Linus Torvalds 已提交
992 993

syscall_table_size=(.-sys_call_table)