entry.S 23.0 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
/*
 *  linux/arch/i386/entry.S
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 */

/*
 * entry.S contains the system-call and fault low-level handling routines.
 * This also contains the timer-interrupt handler, as well as all interrupts
 * and faults that can result in a task-switch.
 *
 * NOTE: This code handles signal-recognition, which happens every time
 * after a timer-interrupt and after each system call.
 *
 * I changed all the .align's to 4 (16 byte alignment), as that's faster
 * on a 486.
 *
 * Stack layout in 'ret_from_system_call':
 * 	ptrace needs to have all regs on the stack.
 *	if the order here is changed, it needs to be
 *	updated in fork.c:copy_process, signal.c:do_signal,
 *	ptrace.c and ptrace.h
 *
 *	 0(%esp) - %ebx
 *	 4(%esp) - %ecx
 *	 8(%esp) - %edx
 *       C(%esp) - %esi
 *	10(%esp) - %edi
 *	14(%esp) - %ebp
 *	18(%esp) - %eax
 *	1C(%esp) - %ds
 *	20(%esp) - %es
33
 *	24(%esp) - %fs
34 35 36 37 38 39
 *	28(%esp) - orig_eax
 *	2C(%esp) - %eip
 *	30(%esp) - %cs
 *	34(%esp) - %eflags
 *	38(%esp) - %oldesp
 *	3C(%esp) - %oldss
L
Linus Torvalds 已提交
40 41 42 43 44 45
 *
 * "current" is in register %ebx during any slow entries.
 */

#include <linux/linkage.h>
#include <asm/thread_info.h>
46
#include <asm/irqflags.h>
L
Linus Torvalds 已提交
47 48 49 50 51
#include <asm/errno.h>
#include <asm/segment.h>
#include <asm/smp.h>
#include <asm/page.h>
#include <asm/desc.h>
S
Stas Sergeev 已提交
52
#include <asm/percpu.h>
53
#include <asm/dwarf2.h>
L
Linus Torvalds 已提交
54 55
#include "irq_vectors.h"

56 57 58 59 60 61 62 63 64 65 66 67 68
/*
 * We use macros for low-level operations which need to be overridden
 * for paravirtualization.  The following will never clobber any registers:
 *   INTERRUPT_RETURN (aka. "iret")
 *   GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
 *   ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
 *
 * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
 * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
 * Allowing a register to be clobbered can shrink the paravirt replacement
 * enough to patch inline, increasing performance.
 */

L
Linus Torvalds 已提交
69 70 71 72 73 74 75 76 77 78
#define nr_syscalls ((syscall_table_size)/4)

CF_MASK		= 0x00000001
TF_MASK		= 0x00000100
IF_MASK		= 0x00000200
DF_MASK		= 0x00000400 
NT_MASK		= 0x00004000
VM_MASK		= 0x00020000

#ifdef CONFIG_PREEMPT
79
#define preempt_stop(clobbers)	DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
L
Linus Torvalds 已提交
80
#else
81
#define preempt_stop(clobbers)
L
Linus Torvalds 已提交
82 83 84
#define resume_kernel		restore_nocheck
#endif

85 86
.macro TRACE_IRQS_IRET
#ifdef CONFIG_TRACE_IRQFLAGS
87
	testl $IF_MASK,PT_EFLAGS(%esp)     # interrupts off?
88 89 90 91 92 93
	jz 1f
	TRACE_IRQS_ON
1:
#endif
.endm

94 95 96 97 98 99
#ifdef CONFIG_VM86
#define resume_userspace_sig	check_userspace
#else
#define resume_userspace_sig	resume_userspace
#endif

L
Linus Torvalds 已提交
100 101
#define SAVE_ALL \
	cld; \
102
	pushl %fs; \
103
	CFI_ADJUST_CFA_OFFSET 4;\
104
	/*CFI_REL_OFFSET fs, 0;*/\
L
Linus Torvalds 已提交
105
	pushl %es; \
106 107
	CFI_ADJUST_CFA_OFFSET 4;\
	/*CFI_REL_OFFSET es, 0;*/\
L
Linus Torvalds 已提交
108
	pushl %ds; \
109 110
	CFI_ADJUST_CFA_OFFSET 4;\
	/*CFI_REL_OFFSET ds, 0;*/\
L
Linus Torvalds 已提交
111
	pushl %eax; \
112 113
	CFI_ADJUST_CFA_OFFSET 4;\
	CFI_REL_OFFSET eax, 0;\
L
Linus Torvalds 已提交
114
	pushl %ebp; \
115 116
	CFI_ADJUST_CFA_OFFSET 4;\
	CFI_REL_OFFSET ebp, 0;\
L
Linus Torvalds 已提交
117
	pushl %edi; \
118 119
	CFI_ADJUST_CFA_OFFSET 4;\
	CFI_REL_OFFSET edi, 0;\
L
Linus Torvalds 已提交
120
	pushl %esi; \
121 122
	CFI_ADJUST_CFA_OFFSET 4;\
	CFI_REL_OFFSET esi, 0;\
L
Linus Torvalds 已提交
123
	pushl %edx; \
124 125
	CFI_ADJUST_CFA_OFFSET 4;\
	CFI_REL_OFFSET edx, 0;\
L
Linus Torvalds 已提交
126
	pushl %ecx; \
127 128
	CFI_ADJUST_CFA_OFFSET 4;\
	CFI_REL_OFFSET ecx, 0;\
L
Linus Torvalds 已提交
129
	pushl %ebx; \
130 131
	CFI_ADJUST_CFA_OFFSET 4;\
	CFI_REL_OFFSET ebx, 0;\
L
Linus Torvalds 已提交
132 133
	movl $(__USER_DS), %edx; \
	movl %edx, %ds; \
134 135
	movl %edx, %es; \
	movl $(__KERNEL_PDA), %edx; \
136
	movl %edx, %fs
L
Linus Torvalds 已提交
137 138 139

#define RESTORE_INT_REGS \
	popl %ebx;	\
140 141
	CFI_ADJUST_CFA_OFFSET -4;\
	CFI_RESTORE ebx;\
L
Linus Torvalds 已提交
142
	popl %ecx;	\
143 144
	CFI_ADJUST_CFA_OFFSET -4;\
	CFI_RESTORE ecx;\
L
Linus Torvalds 已提交
145
	popl %edx;	\
146 147
	CFI_ADJUST_CFA_OFFSET -4;\
	CFI_RESTORE edx;\
L
Linus Torvalds 已提交
148
	popl %esi;	\
149 150
	CFI_ADJUST_CFA_OFFSET -4;\
	CFI_RESTORE esi;\
L
Linus Torvalds 已提交
151
	popl %edi;	\
152 153
	CFI_ADJUST_CFA_OFFSET -4;\
	CFI_RESTORE edi;\
L
Linus Torvalds 已提交
154
	popl %ebp;	\
155 156 157 158 159
	CFI_ADJUST_CFA_OFFSET -4;\
	CFI_RESTORE ebp;\
	popl %eax;	\
	CFI_ADJUST_CFA_OFFSET -4;\
	CFI_RESTORE eax
L
Linus Torvalds 已提交
160 161 162 163

#define RESTORE_REGS	\
	RESTORE_INT_REGS; \
1:	popl %ds;	\
164 165
	CFI_ADJUST_CFA_OFFSET -4;\
	/*CFI_RESTORE ds;*/\
L
Linus Torvalds 已提交
166
2:	popl %es;	\
167 168
	CFI_ADJUST_CFA_OFFSET -4;\
	/*CFI_RESTORE es;*/\
169
3:	popl %fs;	\
170
	CFI_ADJUST_CFA_OFFSET -4;\
171
	/*CFI_RESTORE fs;*/\
172
.pushsection .fixup,"ax";	\
L
Linus Torvalds 已提交
173
4:	movl $0,(%esp);	\
174 175
	jmp 1b;		\
5:	movl $0,(%esp);	\
L
Linus Torvalds 已提交
176
	jmp 2b;		\
177 178
6:	movl $0,(%esp);	\
	jmp 3b;		\
L
Linus Torvalds 已提交
179 180
.section __ex_table,"a";\
	.align 4;	\
181 182 183 184
	.long 1b,4b;	\
	.long 2b,5b;	\
	.long 3b,6b;	\
.popsection
L
Linus Torvalds 已提交
185

186 187
#define RING0_INT_FRAME \
	CFI_STARTPROC simple;\
188
	CFI_SIGNAL_FRAME;\
189 190 191 192 193 194
	CFI_DEF_CFA esp, 3*4;\
	/*CFI_OFFSET cs, -2*4;*/\
	CFI_OFFSET eip, -3*4

#define RING0_EC_FRAME \
	CFI_STARTPROC simple;\
195
	CFI_SIGNAL_FRAME;\
196 197 198 199 200 201
	CFI_DEF_CFA esp, 4*4;\
	/*CFI_OFFSET cs, -2*4;*/\
	CFI_OFFSET eip, -3*4

#define RING0_PTREGS_FRAME \
	CFI_STARTPROC simple;\
202
	CFI_SIGNAL_FRAME;\
203 204 205 206 207 208 209 210 211 212 213 214
	CFI_DEF_CFA esp, PT_OLDESP-PT_EBX;\
	/*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/\
	CFI_OFFSET eip, PT_EIP-PT_OLDESP;\
	/*CFI_OFFSET es, PT_ES-PT_OLDESP;*/\
	/*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/\
	CFI_OFFSET eax, PT_EAX-PT_OLDESP;\
	CFI_OFFSET ebp, PT_EBP-PT_OLDESP;\
	CFI_OFFSET edi, PT_EDI-PT_OLDESP;\
	CFI_OFFSET esi, PT_ESI-PT_OLDESP;\
	CFI_OFFSET edx, PT_EDX-PT_OLDESP;\
	CFI_OFFSET ecx, PT_ECX-PT_OLDESP;\
	CFI_OFFSET ebx, PT_EBX-PT_OLDESP
L
Linus Torvalds 已提交
215 216

ENTRY(ret_from_fork)
217
	CFI_STARTPROC
L
Linus Torvalds 已提交
218
	pushl %eax
219
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
220 221 222
	call schedule_tail
	GET_THREAD_INFO(%ebp)
	popl %eax
223
	CFI_ADJUST_CFA_OFFSET -4
224 225 226 227
	pushl $0x0202			# Reset kernel eflags
	CFI_ADJUST_CFA_OFFSET 4
	popfl
	CFI_ADJUST_CFA_OFFSET -4
L
Linus Torvalds 已提交
228
	jmp syscall_exit
229
	CFI_ENDPROC
L
Linus Torvalds 已提交
230 231 232 233 234 235 236 237 238 239

/*
 * Return to user mode is not as complex as all this looks,
 * but we want the default path for a system call return to
 * go as quickly as possible which is why some of this is
 * less clear than it otherwise should be.
 */

	# userspace resumption stub bypassing syscall exit tracing
	ALIGN
240
	RING0_PTREGS_FRAME
L
Linus Torvalds 已提交
241
ret_from_exception:
242
	preempt_stop(CLBR_ANY)
L
Linus Torvalds 已提交
243 244
ret_from_intr:
	GET_THREAD_INFO(%ebp)
245
check_userspace:
246 247
	movl PT_EFLAGS(%esp), %eax	# mix EFLAGS and CS
	movb PT_CS(%esp), %al
248 249 250
	andl $(VM_MASK | SEGMENT_RPL_MASK), %eax
	cmpl $USER_RPL, %eax
	jb resume_kernel		# not returning to v8086 or userspace
251

L
Linus Torvalds 已提交
252
ENTRY(resume_userspace)
253
 	DISABLE_INTERRUPTS(CLBR_ANY)	# make sure we don't miss an interrupt
L
Linus Torvalds 已提交
254 255 256 257 258 259 260 261 262 263
					# setting need_resched or sigpending
					# between sampling and the iret
	movl TI_flags(%ebp), %ecx
	andl $_TIF_WORK_MASK, %ecx	# is there any work to be done on
					# int/exception return?
	jne work_pending
	jmp restore_all

#ifdef CONFIG_PREEMPT
ENTRY(resume_kernel)
264
	DISABLE_INTERRUPTS(CLBR_ANY)
L
Linus Torvalds 已提交
265 266 267 268 269 270
	cmpl $0,TI_preempt_count(%ebp)	# non-zero preempt_count ?
	jnz restore_nocheck
need_resched:
	movl TI_flags(%ebp), %ecx	# need_resched set ?
	testb $_TIF_NEED_RESCHED, %cl
	jz restore_all
271
	testl $IF_MASK,PT_EFLAGS(%esp)	# interrupts off (exception path) ?
L
Linus Torvalds 已提交
272 273 274 275
	jz restore_all
	call preempt_schedule_irq
	jmp need_resched
#endif
276
	CFI_ENDPROC
L
Linus Torvalds 已提交
277 278 279 280 281 282

/* SYSENTER_RETURN points to after the "sysenter" instruction in
   the vsyscall page.  See vsyscall-sysentry.S, which defines the symbol.  */

	# sysenter call handler stub
ENTRY(sysenter_entry)
283
	CFI_STARTPROC simple
284
	CFI_SIGNAL_FRAME
285 286
	CFI_DEF_CFA esp, 0
	CFI_REGISTER esp, ebp
L
Linus Torvalds 已提交
287 288
	movl TSS_sysenter_esp0(%esp),%esp
sysenter_past_esp:
289 290 291 292
	/*
	 * No need to follow this irqs on/off section: the syscall
	 * disabled irqs and here we enable it straight after entry:
	 */
293
	ENABLE_INTERRUPTS(CLBR_NONE)
L
Linus Torvalds 已提交
294
	pushl $(__USER_DS)
295 296
	CFI_ADJUST_CFA_OFFSET 4
	/*CFI_REL_OFFSET ss, 0*/
L
Linus Torvalds 已提交
297
	pushl %ebp
298 299
	CFI_ADJUST_CFA_OFFSET 4
	CFI_REL_OFFSET esp, 0
L
Linus Torvalds 已提交
300
	pushfl
301
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
302
	pushl $(__USER_CS)
303 304
	CFI_ADJUST_CFA_OFFSET 4
	/*CFI_REL_OFFSET cs, 0*/
R
Roland McGrath 已提交
305
#ifndef CONFIG_COMPAT_VDSO
306 307 308 309 310 311
	/*
	 * Push current_thread_info()->sysenter_return to the stack.
	 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
	 * pushed above; +8 corresponds to copy_thread's esp0 setting.
	 */
	pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
R
Roland McGrath 已提交
312 313 314
#else
	pushl $SYSENTER_RETURN
#endif
315 316
	CFI_ADJUST_CFA_OFFSET 4
	CFI_REL_OFFSET eip, 0
L
Linus Torvalds 已提交
317 318 319 320 321 322 323 324 325 326 327 328 329 330

/*
 * Load the potential sixth argument from user stack.
 * Careful about security.
 */
	cmpl $__PAGE_OFFSET-3,%ebp
	jae syscall_fault
1:	movl (%ebp),%ebp
.section __ex_table,"a"
	.align 4
	.long 1b,syscall_fault
.previous

	pushl %eax
331
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
332 333 334 335
	SAVE_ALL
	GET_THREAD_INFO(%ebp)

	/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
336
	testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
L
Linus Torvalds 已提交
337 338 339 340
	jnz syscall_trace_entry
	cmpl $(nr_syscalls), %eax
	jae syscall_badsys
	call *sys_call_table(,%eax,4)
341
	movl %eax,PT_EAX(%esp)
342
	DISABLE_INTERRUPTS(CLBR_ECX|CLBR_EDX)
343
	TRACE_IRQS_OFF
L
Linus Torvalds 已提交
344 345 346 347
	movl TI_flags(%ebp), %ecx
	testw $_TIF_ALLWORK_MASK, %cx
	jne syscall_exit_work
/* if something modifies registers it must also disable sysexit */
348 349
	movl PT_EIP(%esp), %edx
	movl PT_OLDESP(%esp), %ecx
L
Linus Torvalds 已提交
350
	xorl %ebp,%ebp
351
	TRACE_IRQS_ON
352
1:	mov  PT_FS(%esp), %fs
353
	ENABLE_INTERRUPTS_SYSEXIT
354
	CFI_ENDPROC
355
.pushsection .fixup,"ax"
356
2:	movl $0,PT_FS(%esp)
357 358 359 360 361
	jmp 1b
.section __ex_table,"a"
	.align 4
	.long 1b,2b
.popsection
L
Linus Torvalds 已提交
362 363 364

	# system call handler stub
ENTRY(system_call)
365
	RING0_INT_FRAME			# can't unwind into user space anyway
L
Linus Torvalds 已提交
366
	pushl %eax			# save orig_eax
367
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
368 369
	SAVE_ALL
	GET_THREAD_INFO(%ebp)
370
	testl $TF_MASK,PT_EFLAGS(%esp)
371 372 373
	jz no_singlestep
	orl $_TIF_SINGLESTEP,TI_flags(%ebp)
no_singlestep:
374
					# system call tracing in operation / emulation
L
Linus Torvalds 已提交
375
	/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
376
	testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
L
Linus Torvalds 已提交
377 378 379 380 381
	jnz syscall_trace_entry
	cmpl $(nr_syscalls), %eax
	jae syscall_badsys
syscall_call:
	call *sys_call_table(,%eax,4)
382
	movl %eax,PT_EAX(%esp)		# store the return value
L
Linus Torvalds 已提交
383
syscall_exit:
384
	DISABLE_INTERRUPTS(CLBR_ANY)	# make sure we don't miss an interrupt
L
Linus Torvalds 已提交
385 386
					# setting need_resched or sigpending
					# between sampling and the iret
387
	TRACE_IRQS_OFF
L
Linus Torvalds 已提交
388 389 390 391 392
	movl TI_flags(%ebp), %ecx
	testw $_TIF_ALLWORK_MASK, %cx	# current->work
	jne syscall_exit_work

restore_all:
393 394
	movl PT_EFLAGS(%esp), %eax	# mix EFLAGS, SS and CS
	# Warning: PT_OLDSS(%esp) contains the wrong/random values if we
395 396
	# are returning to the kernel.
	# See comments in process.c:copy_thread() for details.
397 398
	movb PT_OLDSS(%esp), %ah
	movb PT_CS(%esp), %al
399 400
	andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
	cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
401
	CFI_REMEMBER_STATE
L
Linus Torvalds 已提交
402 403
	je ldt_ss			# returning to user-space with LDT SS
restore_nocheck:
404 405
	TRACE_IRQS_IRET
restore_nocheck_notrace:
L
Linus Torvalds 已提交
406
	RESTORE_REGS
407
	addl $4, %esp			# skip orig_eax/error_code
408
	CFI_ADJUST_CFA_OFFSET -4
409
1:	INTERRUPT_RETURN
L
Linus Torvalds 已提交
410 411
.section .fixup,"ax"
iret_exc:
412
	TRACE_IRQS_ON
413
	ENABLE_INTERRUPTS(CLBR_NONE)
414 415 416
	pushl $0			# no error code
	pushl $do_iret_error
	jmp error_code
L
Linus Torvalds 已提交
417 418 419 420 421 422
.previous
.section __ex_table,"a"
	.align 4
	.long 1b,iret_exc
.previous

423
	CFI_RESTORE_STATE
L
Linus Torvalds 已提交
424
ldt_ss:
425
	larl PT_OLDSS(%esp), %eax
L
Linus Torvalds 已提交
426 427 428
	jnz restore_nocheck
	testl $0x00400000, %eax		# returning to 32bit stack?
	jnz restore_nocheck		# allright, normal return
429 430 431 432 433 434 435 436 437 438 439 440 441 442

#ifdef CONFIG_PARAVIRT
	/*
	 * The kernel can't run on a non-flat stack if paravirt mode
	 * is active.  Rather than try to fixup the high bits of
	 * ESP, bypass this code entirely.  This may break DOSemu
	 * and/or Wine support in a paravirt VM, although the option
	 * is still available to implement the setting of the high
	 * 16-bits in the INTERRUPT_RETURN paravirt-op.
	 */
	cmpl $0, paravirt_ops+PARAVIRT_enabled
	jne restore_nocheck
#endif

L
Linus Torvalds 已提交
443 444 445 446 447 448
	/* If returning to userspace with 16bit stack,
	 * try to fix the higher word of ESP, as the CPU
	 * won't restore it.
	 * This is an "official" bug of all the x86-compatible
	 * CPUs, which we can try to work around to make
	 * dosemu and wine happy. */
449
	movl PT_OLDESP(%esp), %eax
S
Stas Sergeev 已提交
450 451 452 453 454 455
	movl %esp, %edx
	call patch_espfix_desc
	pushl $__ESPFIX_SS
	CFI_ADJUST_CFA_OFFSET 4
	pushl %eax
	CFI_ADJUST_CFA_OFFSET 4
456
	DISABLE_INTERRUPTS(CLBR_EAX)
457
	TRACE_IRQS_OFF
S
Stas Sergeev 已提交
458 459 460
	lss (%esp), %esp
	CFI_ADJUST_CFA_OFFSET -8
	jmp restore_nocheck
461
	CFI_ENDPROC
L
Linus Torvalds 已提交
462 463 464

	# perform work that needs to be done immediately before resumption
	ALIGN
465
	RING0_PTREGS_FRAME		# can't unwind into user space anyway
L
Linus Torvalds 已提交
466 467 468 469 470
work_pending:
	testb $_TIF_NEED_RESCHED, %cl
	jz work_notifysig
work_resched:
	call schedule
471
	DISABLE_INTERRUPTS(CLBR_ANY)	# make sure we don't miss an interrupt
L
Linus Torvalds 已提交
472 473
					# setting need_resched or sigpending
					# between sampling and the iret
474
	TRACE_IRQS_OFF
L
Linus Torvalds 已提交
475 476 477 478 479 480 481 482 483
	movl TI_flags(%ebp), %ecx
	andl $_TIF_WORK_MASK, %ecx	# is there any work to be done other
					# than syscall tracing?
	jz restore_all
	testb $_TIF_NEED_RESCHED, %cl
	jnz work_resched

work_notifysig:				# deal with pending signals and
					# notify-resume requests
484
#ifdef CONFIG_VM86
485
	testl $VM_MASK, PT_EFLAGS(%esp)
L
Linus Torvalds 已提交
486 487 488 489 490
	movl %esp, %eax
	jne work_notifysig_v86		# returning to kernel-space or
					# vm86-space
	xorl %edx, %edx
	call do_notify_resume
491
	jmp resume_userspace_sig
L
Linus Torvalds 已提交
492 493 494 495

	ALIGN
work_notifysig_v86:
	pushl %ecx			# save ti_flags for do_notify_resume
496
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
497 498
	call save_v86_state		# %eax contains pt_regs pointer
	popl %ecx
499
	CFI_ADJUST_CFA_OFFSET -4
L
Linus Torvalds 已提交
500
	movl %eax, %esp
501 502 503
#else
	movl %esp, %eax
#endif
L
Linus Torvalds 已提交
504 505
	xorl %edx, %edx
	call do_notify_resume
506
	jmp resume_userspace_sig
L
Linus Torvalds 已提交
507 508 509 510

	# perform syscall exit tracing
	ALIGN
syscall_trace_entry:
511
	movl $-ENOSYS,PT_EAX(%esp)
L
Linus Torvalds 已提交
512 513 514
	movl %esp, %eax
	xorl %edx,%edx
	call do_syscall_trace
515
	cmpl $0, %eax
516
	jne resume_userspace		# ret != 0 -> running under PTRACE_SYSEMU,
517
					# so must skip actual syscall
518
	movl PT_ORIG_EAX(%esp), %eax
L
Linus Torvalds 已提交
519 520 521 522 523 524 525 526 527
	cmpl $(nr_syscalls), %eax
	jnae syscall_call
	jmp syscall_exit

	# perform syscall exit tracing
	ALIGN
syscall_exit_work:
	testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
	jz work_pending
528
	TRACE_IRQS_ON
529
	ENABLE_INTERRUPTS(CLBR_ANY)	# could let do_syscall_trace() call
L
Linus Torvalds 已提交
530 531 532 533 534
					# schedule() instead
	movl %esp, %eax
	movl $1, %edx
	call do_syscall_trace
	jmp resume_userspace
535
	CFI_ENDPROC
L
Linus Torvalds 已提交
536

537
	RING0_INT_FRAME			# can't unwind into user space anyway
L
Linus Torvalds 已提交
538 539
syscall_fault:
	pushl %eax			# save orig_eax
540
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
541 542
	SAVE_ALL
	GET_THREAD_INFO(%ebp)
543
	movl $-EFAULT,PT_EAX(%esp)
L
Linus Torvalds 已提交
544 545 546
	jmp resume_userspace

syscall_badsys:
547
	movl $-ENOSYS,PT_EAX(%esp)
L
Linus Torvalds 已提交
548
	jmp resume_userspace
549
	CFI_ENDPROC
L
Linus Torvalds 已提交
550 551

#define FIXUP_ESPFIX_STACK \
S
Stas Sergeev 已提交
552
	/* since we are on a wrong stack, we cant make it a C code :( */ \
553
	movl %fs:PDA_cpu, %ebx; \
S
Stas Sergeev 已提交
554 555 556 557 558 559
	PER_CPU(cpu_gdt_descr, %ebx); \
	movl GDS_address(%ebx), %ebx; \
	GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \
	addl %esp, %eax; \
	pushl $__KERNEL_DS; \
	CFI_ADJUST_CFA_OFFSET 4; \
L
Linus Torvalds 已提交
560
	pushl %eax; \
561
	CFI_ADJUST_CFA_OFFSET 4; \
S
Stas Sergeev 已提交
562 563 564
	lss (%esp), %esp; \
	CFI_ADJUST_CFA_OFFSET -8;
#define UNWIND_ESPFIX_STACK \
L
Linus Torvalds 已提交
565
	movl %ss, %eax; \
S
Stas Sergeev 已提交
566
	/* see if on espfix stack */ \
L
Linus Torvalds 已提交
567
	cmpw $__ESPFIX_SS, %ax; \
S
Stas Sergeev 已提交
568 569
	jne 27f; \
	movl $__KERNEL_DS, %eax; \
570 571
	movl %eax, %ds; \
	movl %eax, %es; \
S
Stas Sergeev 已提交
572
	/* switch to normal stack */ \
573
	FIXUP_ESPFIX_STACK; \
S
Stas Sergeev 已提交
574
27:;
L
Linus Torvalds 已提交
575 576 577 578 579 580 581 582 583 584 585

/*
 * Build the entry stubs and pointer table with
 * some assembler magic.
 */
.data
ENTRY(interrupt)
.text

vector=0
ENTRY(irq_entries_start)
586
	RING0_INT_FRAME
L
Linus Torvalds 已提交
587 588
.rept NR_IRQS
	ALIGN
589 590 591
 .if vector
	CFI_ADJUST_CFA_OFFSET -4
 .endif
592
1:	pushl $~(vector)
593
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
594 595 596 597 598 599 600
	jmp common_interrupt
.data
	.long 1b
.text
vector=vector+1
.endr

601 602 603 604
/*
 * the CPU automatically disables interrupts when executing an IRQ vector,
 * so IRQ-flags tracing has to follow that:
 */
L
Linus Torvalds 已提交
605 606 607
	ALIGN
common_interrupt:
	SAVE_ALL
608
	TRACE_IRQS_OFF
L
Linus Torvalds 已提交
609 610 611
	movl %esp,%eax
	call do_IRQ
	jmp ret_from_intr
612
	CFI_ENDPROC
L
Linus Torvalds 已提交
613 614 615

#define BUILD_INTERRUPT(name, nr)	\
ENTRY(name)				\
616
	RING0_INT_FRAME;		\
617
	pushl $~(nr);			\
618 619
	CFI_ADJUST_CFA_OFFSET 4;	\
	SAVE_ALL;			\
620
	TRACE_IRQS_OFF			\
L
Linus Torvalds 已提交
621 622
	movl %esp,%eax;			\
	call smp_/**/name;		\
623
	jmp ret_from_intr;		\
624
	CFI_ENDPROC
L
Linus Torvalds 已提交
625 626 627 628

/* The include is where all of the SMP etc. interrupts come from */
#include "entry_arch.h"

629 630 631
KPROBE_ENTRY(page_fault)
	RING0_EC_FRAME
	pushl $do_page_fault
632
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
633 634
	ALIGN
error_code:
635
	/* the function address is in %fs's slot on the stack */
636 637 638
	pushl %es
	CFI_ADJUST_CFA_OFFSET 4
	/*CFI_REL_OFFSET es, 0*/
L
Linus Torvalds 已提交
639
	pushl %ds
640 641
	CFI_ADJUST_CFA_OFFSET 4
	/*CFI_REL_OFFSET ds, 0*/
L
Linus Torvalds 已提交
642
	pushl %eax
643 644
	CFI_ADJUST_CFA_OFFSET 4
	CFI_REL_OFFSET eax, 0
L
Linus Torvalds 已提交
645
	pushl %ebp
646 647
	CFI_ADJUST_CFA_OFFSET 4
	CFI_REL_OFFSET ebp, 0
L
Linus Torvalds 已提交
648
	pushl %edi
649 650
	CFI_ADJUST_CFA_OFFSET 4
	CFI_REL_OFFSET edi, 0
L
Linus Torvalds 已提交
651
	pushl %esi
652 653
	CFI_ADJUST_CFA_OFFSET 4
	CFI_REL_OFFSET esi, 0
L
Linus Torvalds 已提交
654
	pushl %edx
655 656
	CFI_ADJUST_CFA_OFFSET 4
	CFI_REL_OFFSET edx, 0
L
Linus Torvalds 已提交
657
	pushl %ecx
658 659
	CFI_ADJUST_CFA_OFFSET 4
	CFI_REL_OFFSET ecx, 0
L
Linus Torvalds 已提交
660
	pushl %ebx
661 662
	CFI_ADJUST_CFA_OFFSET 4
	CFI_REL_OFFSET ebx, 0
L
Linus Torvalds 已提交
663
	cld
664
	pushl %fs
665
	CFI_ADJUST_CFA_OFFSET 4
666
	/*CFI_REL_OFFSET fs, 0*/
667
	movl $(__KERNEL_PDA), %ecx
668
	movl %ecx, %fs
L
Linus Torvalds 已提交
669 670
	UNWIND_ESPFIX_STACK
	popl %ecx
671 672
	CFI_ADJUST_CFA_OFFSET -4
	/*CFI_REGISTER es, ecx*/
673
	movl PT_FS(%esp), %edi		# get the function address
674
	movl PT_ORIG_EAX(%esp), %edx	# get the error code
675
	movl $-1, PT_ORIG_EAX(%esp)	# no syscall to restart
676 677
	mov  %ecx, PT_FS(%esp)
	/*CFI_REL_OFFSET fs, ES*/
L
Linus Torvalds 已提交
678 679 680 681 682 683
	movl $(__USER_DS), %ecx
	movl %ecx, %ds
	movl %ecx, %es
	movl %esp,%eax			# pt_regs pointer
	call *%edi
	jmp ret_from_exception
684
	CFI_ENDPROC
685
KPROBE_END(page_fault)
L
Linus Torvalds 已提交
686 687

ENTRY(coprocessor_error)
688
	RING0_INT_FRAME
L
Linus Torvalds 已提交
689
	pushl $0
690
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
691
	pushl $do_coprocessor_error
692
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
693
	jmp error_code
694
	CFI_ENDPROC
L
Linus Torvalds 已提交
695 696

ENTRY(simd_coprocessor_error)
697
	RING0_INT_FRAME
L
Linus Torvalds 已提交
698
	pushl $0
699
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
700
	pushl $do_simd_coprocessor_error
701
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
702
	jmp error_code
703
	CFI_ENDPROC
L
Linus Torvalds 已提交
704 705

ENTRY(device_not_available)
706
	RING0_INT_FRAME
L
Linus Torvalds 已提交
707
	pushl $-1			# mark this as an int
708
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
709
	SAVE_ALL
710
	GET_CR0_INTO_EAX
L
Linus Torvalds 已提交
711 712
	testl $0x4, %eax		# EM (math emulation bit)
	jne device_not_available_emulate
713
	preempt_stop(CLBR_ANY)
L
Linus Torvalds 已提交
714 715 716 717
	call math_state_restore
	jmp ret_from_exception
device_not_available_emulate:
	pushl $0			# temporary storage for ORIG_EIP
718
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
719 720
	call math_emulate
	addl $4, %esp
721
	CFI_ADJUST_CFA_OFFSET -4
L
Linus Torvalds 已提交
722
	jmp ret_from_exception
723
	CFI_ENDPROC
L
Linus Torvalds 已提交
724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742

/*
 * Debug traps and NMI can happen at the one SYSENTER instruction
 * that sets up the real kernel stack. Check here, since we can't
 * allow the wrong stack to be used.
 *
 * "TSS_sysenter_esp0+12" is because the NMI/debug handler will have
 * already pushed 3 words if it hits on the sysenter instruction:
 * eflags, cs and eip.
 *
 * We just load the right stack, and push the three (known) values
 * by hand onto the new stack - while updating the return eip past
 * the instruction that would have done it for sysenter.
 */
#define FIX_STACK(offset, ok, label)		\
	cmpw $__KERNEL_CS,4(%esp);		\
	jne ok;					\
label:						\
	movl TSS_sysenter_esp0+offset(%esp),%esp;	\
743 744
	CFI_DEF_CFA esp, 0;			\
	CFI_UNDEFINED eip;			\
L
Linus Torvalds 已提交
745
	pushfl;					\
746
	CFI_ADJUST_CFA_OFFSET 4;		\
L
Linus Torvalds 已提交
747
	pushl $__KERNEL_CS;			\
748 749 750 751
	CFI_ADJUST_CFA_OFFSET 4;		\
	pushl $sysenter_past_esp;		\
	CFI_ADJUST_CFA_OFFSET 4;		\
	CFI_REL_OFFSET eip, 0
L
Linus Torvalds 已提交
752

753
KPROBE_ENTRY(debug)
754
	RING0_INT_FRAME
L
Linus Torvalds 已提交
755 756 757 758 759
	cmpl $sysenter_entry,(%esp)
	jne debug_stack_correct
	FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
debug_stack_correct:
	pushl $-1			# mark this as an int
760
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
761 762 763 764 765
	SAVE_ALL
	xorl %edx,%edx			# error code 0
	movl %esp,%eax			# pt_regs pointer
	call do_debug
	jmp ret_from_exception
766
	CFI_ENDPROC
767 768
KPROBE_END(debug)

L
Linus Torvalds 已提交
769 770 771 772 773 774 775 776
/*
 * NMI is doubly nasty. It can happen _while_ we're handling
 * a debug fault, and the debug fault hasn't yet been able to
 * clear up the stack. So we first check whether we got  an
 * NMI on the sysenter entry path, but after that we need to
 * check whether we got an NMI on the debug path where the debug
 * fault happened on the sysenter path.
 */
777
KPROBE_ENTRY(nmi)
778
	RING0_INT_FRAME
L
Linus Torvalds 已提交
779
	pushl %eax
780
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
781 782 783
	movl %ss, %eax
	cmpw $__ESPFIX_SS, %ax
	popl %eax
784
	CFI_ADJUST_CFA_OFFSET -4
S
Stas Sergeev 已提交
785
	je nmi_espfix_stack
L
Linus Torvalds 已提交
786 787 788
	cmpl $sysenter_entry,(%esp)
	je nmi_stack_fixup
	pushl %eax
789
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
790 791 792 793 794 795 796
	movl %esp,%eax
	/* Do not access memory above the end of our stack page,
	 * it might not exist.
	 */
	andl $(THREAD_SIZE-1),%eax
	cmpl $(THREAD_SIZE-20),%eax
	popl %eax
797
	CFI_ADJUST_CFA_OFFSET -4
L
Linus Torvalds 已提交
798 799 800 801
	jae nmi_stack_correct
	cmpl $sysenter_entry,12(%esp)
	je nmi_debug_stack_check
nmi_stack_correct:
802
	/* We have a RING0_INT_FRAME here */
L
Linus Torvalds 已提交
803
	pushl %eax
804
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
805 806 807 808
	SAVE_ALL
	xorl %edx,%edx		# zero error code
	movl %esp,%eax		# pt_regs pointer
	call do_nmi
809
	jmp restore_nocheck_notrace
810
	CFI_ENDPROC
L
Linus Torvalds 已提交
811 812

nmi_stack_fixup:
813
	RING0_INT_FRAME
L
Linus Torvalds 已提交
814 815
	FIX_STACK(12,nmi_stack_correct, 1)
	jmp nmi_stack_correct
816

L
Linus Torvalds 已提交
817
nmi_debug_stack_check:
818
	/* We have a RING0_INT_FRAME here */
L
Linus Torvalds 已提交
819 820
	cmpw $__KERNEL_CS,16(%esp)
	jne nmi_stack_correct
821 822
	cmpl $debug,(%esp)
	jb nmi_stack_correct
L
Linus Torvalds 已提交
823
	cmpl $debug_esp_fix_insn,(%esp)
824
	ja nmi_stack_correct
L
Linus Torvalds 已提交
825 826 827
	FIX_STACK(24,nmi_stack_correct, 1)
	jmp nmi_stack_correct

S
Stas Sergeev 已提交
828
nmi_espfix_stack:
829 830 831 832
	/* We have a RING0_INT_FRAME here.
	 *
	 * create the pointer to lss back
	 */
L
Linus Torvalds 已提交
833
	pushl %ss
834
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
835
	pushl %esp
836
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
837 838 839 840
	addw $4, (%esp)
	/* copy the iret frame of 12 bytes */
	.rept 3
	pushl 16(%esp)
841
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
842 843
	.endr
	pushl %eax
844
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
845 846 847 848 849
	SAVE_ALL
	FIXUP_ESPFIX_STACK		# %eax == %esp
	xorl %edx,%edx			# zero error code
	call do_nmi
	RESTORE_REGS
S
Stas Sergeev 已提交
850 851
	lss 12+4(%esp), %esp		# back to espfix stack
	CFI_ADJUST_CFA_OFFSET -24
852
1:	INTERRUPT_RETURN
853
	CFI_ENDPROC
L
Linus Torvalds 已提交
854 855 856 857
.section __ex_table,"a"
	.align 4
	.long 1b,iret_exc
.previous
858
KPROBE_END(nmi)
L
Linus Torvalds 已提交
859

860 861 862 863 864 865 866 867 868 869 870 871 872
#ifdef CONFIG_PARAVIRT
ENTRY(native_iret)
1:	iret
.section __ex_table,"a"
	.align 4
	.long 1b,iret_exc
.previous

ENTRY(native_irq_enable_sysexit)
	sti
	sysexit
#endif

873
KPROBE_ENTRY(int3)
874
	RING0_INT_FRAME
L
Linus Torvalds 已提交
875
	pushl $-1			# mark this as an int
876
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
877 878 879 880 881
	SAVE_ALL
	xorl %edx,%edx		# zero error code
	movl %esp,%eax		# pt_regs pointer
	call do_int3
	jmp ret_from_exception
882
	CFI_ENDPROC
883
KPROBE_END(int3)
L
Linus Torvalds 已提交
884 885

ENTRY(overflow)
886
	RING0_INT_FRAME
L
Linus Torvalds 已提交
887
	pushl $0
888
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
889
	pushl $do_overflow
890
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
891
	jmp error_code
892
	CFI_ENDPROC
L
Linus Torvalds 已提交
893 894

ENTRY(bounds)
895
	RING0_INT_FRAME
L
Linus Torvalds 已提交
896
	pushl $0
897
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
898
	pushl $do_bounds
899
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
900
	jmp error_code
901
	CFI_ENDPROC
L
Linus Torvalds 已提交
902 903

ENTRY(invalid_op)
904
	RING0_INT_FRAME
L
Linus Torvalds 已提交
905
	pushl $0
906
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
907
	pushl $do_invalid_op
908
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
909
	jmp error_code
910
	CFI_ENDPROC
L
Linus Torvalds 已提交
911 912

ENTRY(coprocessor_segment_overrun)
913
	RING0_INT_FRAME
L
Linus Torvalds 已提交
914
	pushl $0
915
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
916
	pushl $do_coprocessor_segment_overrun
917
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
918
	jmp error_code
919
	CFI_ENDPROC
L
Linus Torvalds 已提交
920 921

ENTRY(invalid_TSS)
922
	RING0_EC_FRAME
L
Linus Torvalds 已提交
923
	pushl $do_invalid_TSS
924
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
925
	jmp error_code
926
	CFI_ENDPROC
L
Linus Torvalds 已提交
927 928

ENTRY(segment_not_present)
929
	RING0_EC_FRAME
L
Linus Torvalds 已提交
930
	pushl $do_segment_not_present
931
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
932
	jmp error_code
933
	CFI_ENDPROC
L
Linus Torvalds 已提交
934 935

ENTRY(stack_segment)
936
	RING0_EC_FRAME
L
Linus Torvalds 已提交
937
	pushl $do_stack_segment
938
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
939
	jmp error_code
940
	CFI_ENDPROC
L
Linus Torvalds 已提交
941

942
KPROBE_ENTRY(general_protection)
943
	RING0_EC_FRAME
L
Linus Torvalds 已提交
944
	pushl $do_general_protection
945
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
946
	jmp error_code
947
	CFI_ENDPROC
948
KPROBE_END(general_protection)
L
Linus Torvalds 已提交
949 950

ENTRY(alignment_check)
951
	RING0_EC_FRAME
L
Linus Torvalds 已提交
952
	pushl $do_alignment_check
953
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
954
	jmp error_code
955
	CFI_ENDPROC
L
Linus Torvalds 已提交
956

957 958 959 960 961
ENTRY(divide_error)
	RING0_INT_FRAME
	pushl $0			# no error code
	CFI_ADJUST_CFA_OFFSET 4
	pushl $do_divide_error
962
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
963
	jmp error_code
964
	CFI_ENDPROC
L
Linus Torvalds 已提交
965 966 967

#ifdef CONFIG_X86_MCE
ENTRY(machine_check)
968
	RING0_INT_FRAME
L
Linus Torvalds 已提交
969
	pushl $0
970
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
971
	pushl machine_check_vector
972
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
973
	jmp error_code
974
	CFI_ENDPROC
L
Linus Torvalds 已提交
975 976 977
#endif

ENTRY(spurious_interrupt_bug)
978
	RING0_INT_FRAME
L
Linus Torvalds 已提交
979
	pushl $0
980
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
981
	pushl $do_spurious_interrupt_bug
982
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
983
	jmp error_code
984
	CFI_ENDPROC
L
Linus Torvalds 已提交
985

986 987 988 989 990 991 992 993 994 995 996 997 998
ENTRY(kernel_thread_helper)
	pushl $0		# fake return address for unwinder
	CFI_STARTPROC
	movl %edx,%eax
	push %edx
	CFI_ADJUST_CFA_OFFSET 4
	call *%ebx
	push %eax
	CFI_ADJUST_CFA_OFFSET 4
	call do_exit
	CFI_ENDPROC
ENDPROC(kernel_thread_helper)

999
.section .rodata,"a"
1000
#include "syscall_table.S"
L
Linus Torvalds 已提交
1001 1002

syscall_table_size=(.-sys_call_table)