entry.S 22.6 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
/*
 *  linux/arch/i386/entry.S
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 */

/*
 * entry.S contains the system-call and fault low-level handling routines.
 * This also contains the timer-interrupt handler, as well as all interrupts
 * and faults that can result in a task-switch.
 *
 * NOTE: This code handles signal-recognition, which happens every time
 * after a timer-interrupt and after each system call.
 *
 * I changed all the .align's to 4 (16 byte alignment), as that's faster
 * on a 486.
 *
 * Stack layout in 'ret_from_system_call':
 * 	ptrace needs to have all regs on the stack.
 *	if the order here is changed, it needs to be
 *	updated in fork.c:copy_process, signal.c:do_signal,
 *	ptrace.c and ptrace.h
 *
 *	 0(%esp) - %ebx
 *	 4(%esp) - %ecx
 *	 8(%esp) - %edx
 *       C(%esp) - %esi
 *	10(%esp) - %edi
 *	14(%esp) - %ebp
 *	18(%esp) - %eax
 *	1C(%esp) - %ds
 *	20(%esp) - %es
33 34 35 36 37 38 39
 *	24(%esp) - %gs
 *	28(%esp) - orig_eax
 *	2C(%esp) - %eip
 *	30(%esp) - %cs
 *	34(%esp) - %eflags
 *	38(%esp) - %oldesp
 *	3C(%esp) - %oldss
L
Linus Torvalds 已提交
40 41 42 43 44 45
 *
 * "current" is in register %ebx during any slow entries.
 */

#include <linux/linkage.h>
#include <asm/thread_info.h>
46
#include <asm/irqflags.h>
L
Linus Torvalds 已提交
47 48 49 50 51
#include <asm/errno.h>
#include <asm/segment.h>
#include <asm/smp.h>
#include <asm/page.h>
#include <asm/desc.h>
S
Stas Sergeev 已提交
52
#include <asm/percpu.h>
53
#include <asm/dwarf2.h>
L
Linus Torvalds 已提交
54 55 56 57 58 59 60 61 62 63 64
#include "irq_vectors.h"

#define nr_syscalls ((syscall_table_size)/4)

CF_MASK		= 0x00000001
TF_MASK		= 0x00000100
IF_MASK		= 0x00000200
DF_MASK		= 0x00000400 
NT_MASK		= 0x00004000
VM_MASK		= 0x00020000

65 66 67 68 69 70 71
/* These are replaces for paravirtualization */
#define DISABLE_INTERRUPTS		cli
#define ENABLE_INTERRUPTS		sti
#define ENABLE_INTERRUPTS_SYSEXIT	sti; sysexit
#define INTERRUPT_RETURN		iret
#define GET_CR0_INTO_EAX		movl %cr0, %eax

L
Linus Torvalds 已提交
72
#ifdef CONFIG_PREEMPT
73
#define preempt_stop		DISABLE_INTERRUPTS; TRACE_IRQS_OFF
L
Linus Torvalds 已提交
74 75 76 77 78
#else
#define preempt_stop
#define resume_kernel		restore_nocheck
#endif

79 80
.macro TRACE_IRQS_IRET
#ifdef CONFIG_TRACE_IRQFLAGS
81
	testl $IF_MASK,PT_EFLAGS(%esp)     # interrupts off?
82 83 84 85 86 87
	jz 1f
	TRACE_IRQS_ON
1:
#endif
.endm

88 89 90 91 92 93
#ifdef CONFIG_VM86
#define resume_userspace_sig	check_userspace
#else
#define resume_userspace_sig	resume_userspace
#endif

L
Linus Torvalds 已提交
94 95
#define SAVE_ALL \
	cld; \
96 97 98
	pushl %gs; \
	CFI_ADJUST_CFA_OFFSET 4;\
	/*CFI_REL_OFFSET gs, 0;*/\
L
Linus Torvalds 已提交
99
	pushl %es; \
100 101
	CFI_ADJUST_CFA_OFFSET 4;\
	/*CFI_REL_OFFSET es, 0;*/\
L
Linus Torvalds 已提交
102
	pushl %ds; \
103 104
	CFI_ADJUST_CFA_OFFSET 4;\
	/*CFI_REL_OFFSET ds, 0;*/\
L
Linus Torvalds 已提交
105
	pushl %eax; \
106 107
	CFI_ADJUST_CFA_OFFSET 4;\
	CFI_REL_OFFSET eax, 0;\
L
Linus Torvalds 已提交
108
	pushl %ebp; \
109 110
	CFI_ADJUST_CFA_OFFSET 4;\
	CFI_REL_OFFSET ebp, 0;\
L
Linus Torvalds 已提交
111
	pushl %edi; \
112 113
	CFI_ADJUST_CFA_OFFSET 4;\
	CFI_REL_OFFSET edi, 0;\
L
Linus Torvalds 已提交
114
	pushl %esi; \
115 116
	CFI_ADJUST_CFA_OFFSET 4;\
	CFI_REL_OFFSET esi, 0;\
L
Linus Torvalds 已提交
117
	pushl %edx; \
118 119
	CFI_ADJUST_CFA_OFFSET 4;\
	CFI_REL_OFFSET edx, 0;\
L
Linus Torvalds 已提交
120
	pushl %ecx; \
121 122
	CFI_ADJUST_CFA_OFFSET 4;\
	CFI_REL_OFFSET ecx, 0;\
L
Linus Torvalds 已提交
123
	pushl %ebx; \
124 125
	CFI_ADJUST_CFA_OFFSET 4;\
	CFI_REL_OFFSET ebx, 0;\
L
Linus Torvalds 已提交
126 127
	movl $(__USER_DS), %edx; \
	movl %edx, %ds; \
128 129 130
	movl %edx, %es; \
	movl $(__KERNEL_PDA), %edx; \
	movl %edx, %gs
L
Linus Torvalds 已提交
131 132 133

#define RESTORE_INT_REGS \
	popl %ebx;	\
134 135
	CFI_ADJUST_CFA_OFFSET -4;\
	CFI_RESTORE ebx;\
L
Linus Torvalds 已提交
136
	popl %ecx;	\
137 138
	CFI_ADJUST_CFA_OFFSET -4;\
	CFI_RESTORE ecx;\
L
Linus Torvalds 已提交
139
	popl %edx;	\
140 141
	CFI_ADJUST_CFA_OFFSET -4;\
	CFI_RESTORE edx;\
L
Linus Torvalds 已提交
142
	popl %esi;	\
143 144
	CFI_ADJUST_CFA_OFFSET -4;\
	CFI_RESTORE esi;\
L
Linus Torvalds 已提交
145
	popl %edi;	\
146 147
	CFI_ADJUST_CFA_OFFSET -4;\
	CFI_RESTORE edi;\
L
Linus Torvalds 已提交
148
	popl %ebp;	\
149 150 151 152 153
	CFI_ADJUST_CFA_OFFSET -4;\
	CFI_RESTORE ebp;\
	popl %eax;	\
	CFI_ADJUST_CFA_OFFSET -4;\
	CFI_RESTORE eax
L
Linus Torvalds 已提交
154 155 156 157

#define RESTORE_REGS	\
	RESTORE_INT_REGS; \
1:	popl %ds;	\
158 159
	CFI_ADJUST_CFA_OFFSET -4;\
	/*CFI_RESTORE ds;*/\
L
Linus Torvalds 已提交
160
2:	popl %es;	\
161 162
	CFI_ADJUST_CFA_OFFSET -4;\
	/*CFI_RESTORE es;*/\
163 164 165 166
3:	popl %gs;	\
	CFI_ADJUST_CFA_OFFSET -4;\
	/*CFI_RESTORE gs;*/\
.pushsection .fixup,"ax";	\
L
Linus Torvalds 已提交
167
4:	movl $0,(%esp);	\
168 169
	jmp 1b;		\
5:	movl $0,(%esp);	\
L
Linus Torvalds 已提交
170
	jmp 2b;		\
171 172
6:	movl $0,(%esp);	\
	jmp 3b;		\
L
Linus Torvalds 已提交
173 174
.section __ex_table,"a";\
	.align 4;	\
175 176 177 178
	.long 1b,4b;	\
	.long 2b,5b;	\
	.long 3b,6b;	\
.popsection
L
Linus Torvalds 已提交
179

180 181
#define RING0_INT_FRAME \
	CFI_STARTPROC simple;\
182
	CFI_SIGNAL_FRAME;\
183 184 185 186 187 188
	CFI_DEF_CFA esp, 3*4;\
	/*CFI_OFFSET cs, -2*4;*/\
	CFI_OFFSET eip, -3*4

#define RING0_EC_FRAME \
	CFI_STARTPROC simple;\
189
	CFI_SIGNAL_FRAME;\
190 191 192 193 194 195
	CFI_DEF_CFA esp, 4*4;\
	/*CFI_OFFSET cs, -2*4;*/\
	CFI_OFFSET eip, -3*4

#define RING0_PTREGS_FRAME \
	CFI_STARTPROC simple;\
196
	CFI_SIGNAL_FRAME;\
197 198 199 200 201 202 203 204 205 206 207 208
	CFI_DEF_CFA esp, PT_OLDESP-PT_EBX;\
	/*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/\
	CFI_OFFSET eip, PT_EIP-PT_OLDESP;\
	/*CFI_OFFSET es, PT_ES-PT_OLDESP;*/\
	/*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/\
	CFI_OFFSET eax, PT_EAX-PT_OLDESP;\
	CFI_OFFSET ebp, PT_EBP-PT_OLDESP;\
	CFI_OFFSET edi, PT_EDI-PT_OLDESP;\
	CFI_OFFSET esi, PT_ESI-PT_OLDESP;\
	CFI_OFFSET edx, PT_EDX-PT_OLDESP;\
	CFI_OFFSET ecx, PT_ECX-PT_OLDESP;\
	CFI_OFFSET ebx, PT_EBX-PT_OLDESP
L
Linus Torvalds 已提交
209 210

ENTRY(ret_from_fork)
211
	CFI_STARTPROC
L
Linus Torvalds 已提交
212
	pushl %eax
213
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
214 215 216
	call schedule_tail
	GET_THREAD_INFO(%ebp)
	popl %eax
217
	CFI_ADJUST_CFA_OFFSET -4
218 219 220 221
	pushl $0x0202			# Reset kernel eflags
	CFI_ADJUST_CFA_OFFSET 4
	popfl
	CFI_ADJUST_CFA_OFFSET -4
L
Linus Torvalds 已提交
222
	jmp syscall_exit
223
	CFI_ENDPROC
L
Linus Torvalds 已提交
224 225 226 227 228 229 230 231 232 233

/*
 * Return to user mode is not as complex as all this looks,
 * but we want the default path for a system call return to
 * go as quickly as possible which is why some of this is
 * less clear than it otherwise should be.
 */

	# userspace resumption stub bypassing syscall exit tracing
	ALIGN
234
	RING0_PTREGS_FRAME
L
Linus Torvalds 已提交
235 236 237 238
ret_from_exception:
	preempt_stop
ret_from_intr:
	GET_THREAD_INFO(%ebp)
239
check_userspace:
240 241
	movl PT_EFLAGS(%esp), %eax	# mix EFLAGS and CS
	movb PT_CS(%esp), %al
242 243 244
	andl $(VM_MASK | SEGMENT_RPL_MASK), %eax
	cmpl $USER_RPL, %eax
	jb resume_kernel		# not returning to v8086 or userspace
245

L
Linus Torvalds 已提交
246
ENTRY(resume_userspace)
247
 	DISABLE_INTERRUPTS		# make sure we don't miss an interrupt
L
Linus Torvalds 已提交
248 249 250 251 252 253 254 255 256 257
					# setting need_resched or sigpending
					# between sampling and the iret
	movl TI_flags(%ebp), %ecx
	andl $_TIF_WORK_MASK, %ecx	# is there any work to be done on
					# int/exception return?
	jne work_pending
	jmp restore_all

#ifdef CONFIG_PREEMPT
ENTRY(resume_kernel)
258
	DISABLE_INTERRUPTS
L
Linus Torvalds 已提交
259 260 261 262 263 264
	cmpl $0,TI_preempt_count(%ebp)	# non-zero preempt_count ?
	jnz restore_nocheck
need_resched:
	movl TI_flags(%ebp), %ecx	# need_resched set ?
	testb $_TIF_NEED_RESCHED, %cl
	jz restore_all
265
	testl $IF_MASK,PT_EFLAGS(%esp)	# interrupts off (exception path) ?
L
Linus Torvalds 已提交
266 267 268 269
	jz restore_all
	call preempt_schedule_irq
	jmp need_resched
#endif
270
	CFI_ENDPROC
L
Linus Torvalds 已提交
271 272 273 274 275 276

/* SYSENTER_RETURN points to after the "sysenter" instruction in
   the vsyscall page.  See vsyscall-sysentry.S, which defines the symbol.  */

	# sysenter call handler stub
ENTRY(sysenter_entry)
277
	CFI_STARTPROC simple
278
	CFI_SIGNAL_FRAME
279 280
	CFI_DEF_CFA esp, 0
	CFI_REGISTER esp, ebp
L
Linus Torvalds 已提交
281 282
	movl TSS_sysenter_esp0(%esp),%esp
sysenter_past_esp:
283 284 285 286
	/*
	 * No need to follow this irqs on/off section: the syscall
	 * disabled irqs and here we enable it straight after entry:
	 */
287
	ENABLE_INTERRUPTS
L
Linus Torvalds 已提交
288
	pushl $(__USER_DS)
289 290
	CFI_ADJUST_CFA_OFFSET 4
	/*CFI_REL_OFFSET ss, 0*/
L
Linus Torvalds 已提交
291
	pushl %ebp
292 293
	CFI_ADJUST_CFA_OFFSET 4
	CFI_REL_OFFSET esp, 0
L
Linus Torvalds 已提交
294
	pushfl
295
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
296
	pushl $(__USER_CS)
297 298
	CFI_ADJUST_CFA_OFFSET 4
	/*CFI_REL_OFFSET cs, 0*/
299 300 301 302 303 304
	/*
	 * Push current_thread_info()->sysenter_return to the stack.
	 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
	 * pushed above; +8 corresponds to copy_thread's esp0 setting.
	 */
	pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
305 306
	CFI_ADJUST_CFA_OFFSET 4
	CFI_REL_OFFSET eip, 0
L
Linus Torvalds 已提交
307 308 309 310 311 312 313 314 315 316 317 318 319 320

/*
 * Load the potential sixth argument from user stack.
 * Careful about security.
 */
	cmpl $__PAGE_OFFSET-3,%ebp
	jae syscall_fault
1:	movl (%ebp),%ebp
.section __ex_table,"a"
	.align 4
	.long 1b,syscall_fault
.previous

	pushl %eax
321
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
322 323 324 325
	SAVE_ALL
	GET_THREAD_INFO(%ebp)

	/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
326
	testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
L
Linus Torvalds 已提交
327 328 329 330
	jnz syscall_trace_entry
	cmpl $(nr_syscalls), %eax
	jae syscall_badsys
	call *sys_call_table(,%eax,4)
331
	movl %eax,PT_EAX(%esp)
332
	DISABLE_INTERRUPTS
333
	TRACE_IRQS_OFF
L
Linus Torvalds 已提交
334 335 336 337
	movl TI_flags(%ebp), %ecx
	testw $_TIF_ALLWORK_MASK, %cx
	jne syscall_exit_work
/* if something modifies registers it must also disable sysexit */
338 339
	movl PT_EIP(%esp), %edx
	movl PT_OLDESP(%esp), %ecx
L
Linus Torvalds 已提交
340
	xorl %ebp,%ebp
341
	TRACE_IRQS_ON
342
1:	mov  PT_GS(%esp), %gs
343
	ENABLE_INTERRUPTS_SYSEXIT
344
	CFI_ENDPROC
345 346 347 348 349 350 351
.pushsection .fixup,"ax"
2:	movl $0,PT_GS(%esp)
	jmp 1b
.section __ex_table,"a"
	.align 4
	.long 1b,2b
.popsection
L
Linus Torvalds 已提交
352 353 354

	# system call handler stub
ENTRY(system_call)
355
	RING0_INT_FRAME			# can't unwind into user space anyway
L
Linus Torvalds 已提交
356
	pushl %eax			# save orig_eax
357
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
358 359
	SAVE_ALL
	GET_THREAD_INFO(%ebp)
360
	testl $TF_MASK,PT_EFLAGS(%esp)
361 362 363
	jz no_singlestep
	orl $_TIF_SINGLESTEP,TI_flags(%ebp)
no_singlestep:
364
					# system call tracing in operation / emulation
L
Linus Torvalds 已提交
365
	/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
366
	testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
L
Linus Torvalds 已提交
367 368 369 370 371
	jnz syscall_trace_entry
	cmpl $(nr_syscalls), %eax
	jae syscall_badsys
syscall_call:
	call *sys_call_table(,%eax,4)
372
	movl %eax,PT_EAX(%esp)		# store the return value
L
Linus Torvalds 已提交
373
syscall_exit:
374
	DISABLE_INTERRUPTS		# make sure we don't miss an interrupt
L
Linus Torvalds 已提交
375 376
					# setting need_resched or sigpending
					# between sampling and the iret
377
	TRACE_IRQS_OFF
L
Linus Torvalds 已提交
378 379 380 381 382
	movl TI_flags(%ebp), %ecx
	testw $_TIF_ALLWORK_MASK, %cx	# current->work
	jne syscall_exit_work

restore_all:
383 384
	movl PT_EFLAGS(%esp), %eax	# mix EFLAGS, SS and CS
	# Warning: PT_OLDSS(%esp) contains the wrong/random values if we
385 386
	# are returning to the kernel.
	# See comments in process.c:copy_thread() for details.
387 388
	movb PT_OLDSS(%esp), %ah
	movb PT_CS(%esp), %al
389 390
	andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
	cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
391
	CFI_REMEMBER_STATE
L
Linus Torvalds 已提交
392 393
	je ldt_ss			# returning to user-space with LDT SS
restore_nocheck:
394 395
	TRACE_IRQS_IRET
restore_nocheck_notrace:
L
Linus Torvalds 已提交
396
	RESTORE_REGS
397
	addl $4, %esp			# skip orig_eax/error_code
398
	CFI_ADJUST_CFA_OFFSET -4
399
1:	INTERRUPT_RETURN
L
Linus Torvalds 已提交
400 401
.section .fixup,"ax"
iret_exc:
402
	TRACE_IRQS_ON
403
	ENABLE_INTERRUPTS
404 405 406
	pushl $0			# no error code
	pushl $do_iret_error
	jmp error_code
L
Linus Torvalds 已提交
407 408 409 410 411 412
.previous
.section __ex_table,"a"
	.align 4
	.long 1b,iret_exc
.previous

413
	CFI_RESTORE_STATE
L
Linus Torvalds 已提交
414
ldt_ss:
415
	larl PT_OLDSS(%esp), %eax
L
Linus Torvalds 已提交
416 417 418 419 420 421 422 423 424
	jnz restore_nocheck
	testl $0x00400000, %eax		# returning to 32bit stack?
	jnz restore_nocheck		# allright, normal return
	/* If returning to userspace with 16bit stack,
	 * try to fix the higher word of ESP, as the CPU
	 * won't restore it.
	 * This is an "official" bug of all the x86-compatible
	 * CPUs, which we can try to work around to make
	 * dosemu and wine happy. */
425
	movl PT_OLDESP(%esp), %eax
S
Stas Sergeev 已提交
426 427 428 429 430 431
	movl %esp, %edx
	call patch_espfix_desc
	pushl $__ESPFIX_SS
	CFI_ADJUST_CFA_OFFSET 4
	pushl %eax
	CFI_ADJUST_CFA_OFFSET 4
432
	DISABLE_INTERRUPTS
433
	TRACE_IRQS_OFF
S
Stas Sergeev 已提交
434 435 436
	lss (%esp), %esp
	CFI_ADJUST_CFA_OFFSET -8
	jmp restore_nocheck
437
	CFI_ENDPROC
L
Linus Torvalds 已提交
438 439 440

	# perform work that needs to be done immediately before resumption
	ALIGN
441
	RING0_PTREGS_FRAME		# can't unwind into user space anyway
L
Linus Torvalds 已提交
442 443 444 445 446
work_pending:
	testb $_TIF_NEED_RESCHED, %cl
	jz work_notifysig
work_resched:
	call schedule
447
	DISABLE_INTERRUPTS		# make sure we don't miss an interrupt
L
Linus Torvalds 已提交
448 449
					# setting need_resched or sigpending
					# between sampling and the iret
450
	TRACE_IRQS_OFF
L
Linus Torvalds 已提交
451 452 453 454 455 456 457 458 459
	movl TI_flags(%ebp), %ecx
	andl $_TIF_WORK_MASK, %ecx	# is there any work to be done other
					# than syscall tracing?
	jz restore_all
	testb $_TIF_NEED_RESCHED, %cl
	jnz work_resched

work_notifysig:				# deal with pending signals and
					# notify-resume requests
460
#ifdef CONFIG_VM86
461
	testl $VM_MASK, PT_EFLAGS(%esp)
L
Linus Torvalds 已提交
462 463 464 465 466
	movl %esp, %eax
	jne work_notifysig_v86		# returning to kernel-space or
					# vm86-space
	xorl %edx, %edx
	call do_notify_resume
467
	jmp resume_userspace_sig
L
Linus Torvalds 已提交
468 469 470 471

	ALIGN
work_notifysig_v86:
	pushl %ecx			# save ti_flags for do_notify_resume
472
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
473 474
	call save_v86_state		# %eax contains pt_regs pointer
	popl %ecx
475
	CFI_ADJUST_CFA_OFFSET -4
L
Linus Torvalds 已提交
476
	movl %eax, %esp
477 478 479
#else
	movl %esp, %eax
#endif
L
Linus Torvalds 已提交
480 481
	xorl %edx, %edx
	call do_notify_resume
482
	jmp resume_userspace_sig
L
Linus Torvalds 已提交
483 484 485 486

	# perform syscall exit tracing
	ALIGN
syscall_trace_entry:
487
	movl $-ENOSYS,PT_EAX(%esp)
L
Linus Torvalds 已提交
488 489 490
	movl %esp, %eax
	xorl %edx,%edx
	call do_syscall_trace
491
	cmpl $0, %eax
492
	jne resume_userspace		# ret != 0 -> running under PTRACE_SYSEMU,
493
					# so must skip actual syscall
494
	movl PT_ORIG_EAX(%esp), %eax
L
Linus Torvalds 已提交
495 496 497 498 499 500 501 502 503
	cmpl $(nr_syscalls), %eax
	jnae syscall_call
	jmp syscall_exit

	# perform syscall exit tracing
	ALIGN
syscall_exit_work:
	testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
	jz work_pending
504
	TRACE_IRQS_ON
505
	ENABLE_INTERRUPTS		# could let do_syscall_trace() call
L
Linus Torvalds 已提交
506 507 508 509 510
					# schedule() instead
	movl %esp, %eax
	movl $1, %edx
	call do_syscall_trace
	jmp resume_userspace
511
	CFI_ENDPROC
L
Linus Torvalds 已提交
512

513
	RING0_INT_FRAME			# can't unwind into user space anyway
L
Linus Torvalds 已提交
514 515
syscall_fault:
	pushl %eax			# save orig_eax
516
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
517 518
	SAVE_ALL
	GET_THREAD_INFO(%ebp)
519
	movl $-EFAULT,PT_EAX(%esp)
L
Linus Torvalds 已提交
520 521 522
	jmp resume_userspace

syscall_badsys:
523
	movl $-ENOSYS,PT_EAX(%esp)
L
Linus Torvalds 已提交
524
	jmp resume_userspace
525
	CFI_ENDPROC
L
Linus Torvalds 已提交
526 527

#define FIXUP_ESPFIX_STACK \
S
Stas Sergeev 已提交
528
	/* since we are on a wrong stack, we cant make it a C code :( */ \
529
	movl %gs:PDA_cpu, %ebx; \
S
Stas Sergeev 已提交
530 531 532 533 534 535
	PER_CPU(cpu_gdt_descr, %ebx); \
	movl GDS_address(%ebx), %ebx; \
	GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \
	addl %esp, %eax; \
	pushl $__KERNEL_DS; \
	CFI_ADJUST_CFA_OFFSET 4; \
L
Linus Torvalds 已提交
536
	pushl %eax; \
537
	CFI_ADJUST_CFA_OFFSET 4; \
S
Stas Sergeev 已提交
538 539 540
	lss (%esp), %esp; \
	CFI_ADJUST_CFA_OFFSET -8;
#define UNWIND_ESPFIX_STACK \
L
Linus Torvalds 已提交
541
	movl %ss, %eax; \
S
Stas Sergeev 已提交
542
	/* see if on espfix stack */ \
L
Linus Torvalds 已提交
543
	cmpw $__ESPFIX_SS, %ax; \
S
Stas Sergeev 已提交
544 545
	jne 27f; \
	movl $__KERNEL_DS, %eax; \
546 547
	movl %eax, %ds; \
	movl %eax, %es; \
S
Stas Sergeev 已提交
548
	/* switch to normal stack */ \
549
	FIXUP_ESPFIX_STACK; \
S
Stas Sergeev 已提交
550
27:;
L
Linus Torvalds 已提交
551 552 553 554 555 556 557 558 559 560 561

/*
 * Build the entry stubs and pointer table with
 * some assembler magic.
 */
.data
ENTRY(interrupt)
.text

vector=0
ENTRY(irq_entries_start)
562
	RING0_INT_FRAME
L
Linus Torvalds 已提交
563 564
.rept NR_IRQS
	ALIGN
565 566 567
 .if vector
	CFI_ADJUST_CFA_OFFSET -4
 .endif
568
1:	pushl $~(vector)
569
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
570 571 572 573 574 575 576
	jmp common_interrupt
.data
	.long 1b
.text
vector=vector+1
.endr

577 578 579 580
/*
 * the CPU automatically disables interrupts when executing an IRQ vector,
 * so IRQ-flags tracing has to follow that:
 */
L
Linus Torvalds 已提交
581 582 583
	ALIGN
common_interrupt:
	SAVE_ALL
584
	TRACE_IRQS_OFF
L
Linus Torvalds 已提交
585 586 587
	movl %esp,%eax
	call do_IRQ
	jmp ret_from_intr
588
	CFI_ENDPROC
L
Linus Torvalds 已提交
589 590 591

#define BUILD_INTERRUPT(name, nr)	\
ENTRY(name)				\
592
	RING0_INT_FRAME;		\
593
	pushl $~(nr);			\
594 595
	CFI_ADJUST_CFA_OFFSET 4;	\
	SAVE_ALL;			\
596
	TRACE_IRQS_OFF			\
L
Linus Torvalds 已提交
597 598
	movl %esp,%eax;			\
	call smp_/**/name;		\
599
	jmp ret_from_intr;		\
600
	CFI_ENDPROC
L
Linus Torvalds 已提交
601 602 603 604

/* The include is where all of the SMP etc. interrupts come from */
#include "entry_arch.h"

605 606 607
KPROBE_ENTRY(page_fault)
	RING0_EC_FRAME
	pushl $do_page_fault
608
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
609 610
	ALIGN
error_code:
611 612 613 614
	/* the function address is in %gs's slot on the stack */
	pushl %es
	CFI_ADJUST_CFA_OFFSET 4
	/*CFI_REL_OFFSET es, 0*/
L
Linus Torvalds 已提交
615
	pushl %ds
616 617
	CFI_ADJUST_CFA_OFFSET 4
	/*CFI_REL_OFFSET ds, 0*/
L
Linus Torvalds 已提交
618
	pushl %eax
619 620
	CFI_ADJUST_CFA_OFFSET 4
	CFI_REL_OFFSET eax, 0
L
Linus Torvalds 已提交
621
	pushl %ebp
622 623
	CFI_ADJUST_CFA_OFFSET 4
	CFI_REL_OFFSET ebp, 0
L
Linus Torvalds 已提交
624
	pushl %edi
625 626
	CFI_ADJUST_CFA_OFFSET 4
	CFI_REL_OFFSET edi, 0
L
Linus Torvalds 已提交
627
	pushl %esi
628 629
	CFI_ADJUST_CFA_OFFSET 4
	CFI_REL_OFFSET esi, 0
L
Linus Torvalds 已提交
630
	pushl %edx
631 632
	CFI_ADJUST_CFA_OFFSET 4
	CFI_REL_OFFSET edx, 0
L
Linus Torvalds 已提交
633
	pushl %ecx
634 635
	CFI_ADJUST_CFA_OFFSET 4
	CFI_REL_OFFSET ecx, 0
L
Linus Torvalds 已提交
636
	pushl %ebx
637 638
	CFI_ADJUST_CFA_OFFSET 4
	CFI_REL_OFFSET ebx, 0
L
Linus Torvalds 已提交
639
	cld
640
	pushl %gs
641
	CFI_ADJUST_CFA_OFFSET 4
642 643 644
	/*CFI_REL_OFFSET gs, 0*/
	movl $(__KERNEL_PDA), %ecx
	movl %ecx, %gs
L
Linus Torvalds 已提交
645 646
	UNWIND_ESPFIX_STACK
	popl %ecx
647 648
	CFI_ADJUST_CFA_OFFSET -4
	/*CFI_REGISTER es, ecx*/
649
	movl PT_GS(%esp), %edi		# get the function address
650
	movl PT_ORIG_EAX(%esp), %edx	# get the error code
651 652 653
	movl $-1, PT_ORIG_EAX(%esp)	# no syscall to restart
	mov  %ecx, PT_GS(%esp)
	/*CFI_REL_OFFSET gs, ES*/
L
Linus Torvalds 已提交
654 655 656 657 658 659
	movl $(__USER_DS), %ecx
	movl %ecx, %ds
	movl %ecx, %es
	movl %esp,%eax			# pt_regs pointer
	call *%edi
	jmp ret_from_exception
660
	CFI_ENDPROC
661
KPROBE_END(page_fault)
L
Linus Torvalds 已提交
662 663

ENTRY(coprocessor_error)
664
	RING0_INT_FRAME
L
Linus Torvalds 已提交
665
	pushl $0
666
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
667
	pushl $do_coprocessor_error
668
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
669
	jmp error_code
670
	CFI_ENDPROC
L
Linus Torvalds 已提交
671 672

ENTRY(simd_coprocessor_error)
673
	RING0_INT_FRAME
L
Linus Torvalds 已提交
674
	pushl $0
675
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
676
	pushl $do_simd_coprocessor_error
677
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
678
	jmp error_code
679
	CFI_ENDPROC
L
Linus Torvalds 已提交
680 681

ENTRY(device_not_available)
682
	RING0_INT_FRAME
L
Linus Torvalds 已提交
683
	pushl $-1			# mark this as an int
684
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
685
	SAVE_ALL
686
	GET_CR0_INTO_EAX
L
Linus Torvalds 已提交
687 688 689 690 691 692 693
	testl $0x4, %eax		# EM (math emulation bit)
	jne device_not_available_emulate
	preempt_stop
	call math_state_restore
	jmp ret_from_exception
device_not_available_emulate:
	pushl $0			# temporary storage for ORIG_EIP
694
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
695 696
	call math_emulate
	addl $4, %esp
697
	CFI_ADJUST_CFA_OFFSET -4
L
Linus Torvalds 已提交
698
	jmp ret_from_exception
699
	CFI_ENDPROC
L
Linus Torvalds 已提交
700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718

/*
 * Debug traps and NMI can happen at the one SYSENTER instruction
 * that sets up the real kernel stack. Check here, since we can't
 * allow the wrong stack to be used.
 *
 * "TSS_sysenter_esp0+12" is because the NMI/debug handler will have
 * already pushed 3 words if it hits on the sysenter instruction:
 * eflags, cs and eip.
 *
 * We just load the right stack, and push the three (known) values
 * by hand onto the new stack - while updating the return eip past
 * the instruction that would have done it for sysenter.
 */
#define FIX_STACK(offset, ok, label)		\
	cmpw $__KERNEL_CS,4(%esp);		\
	jne ok;					\
label:						\
	movl TSS_sysenter_esp0+offset(%esp),%esp;	\
719 720
	CFI_DEF_CFA esp, 0;			\
	CFI_UNDEFINED eip;			\
L
Linus Torvalds 已提交
721
	pushfl;					\
722
	CFI_ADJUST_CFA_OFFSET 4;		\
L
Linus Torvalds 已提交
723
	pushl $__KERNEL_CS;			\
724 725 726 727
	CFI_ADJUST_CFA_OFFSET 4;		\
	pushl $sysenter_past_esp;		\
	CFI_ADJUST_CFA_OFFSET 4;		\
	CFI_REL_OFFSET eip, 0
L
Linus Torvalds 已提交
728

729
KPROBE_ENTRY(debug)
730
	RING0_INT_FRAME
L
Linus Torvalds 已提交
731 732 733 734 735
	cmpl $sysenter_entry,(%esp)
	jne debug_stack_correct
	FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
debug_stack_correct:
	pushl $-1			# mark this as an int
736
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
737 738 739 740 741
	SAVE_ALL
	xorl %edx,%edx			# error code 0
	movl %esp,%eax			# pt_regs pointer
	call do_debug
	jmp ret_from_exception
742
	CFI_ENDPROC
743 744
KPROBE_END(debug)

L
Linus Torvalds 已提交
745 746 747 748 749 750 751 752
/*
 * NMI is doubly nasty. It can happen _while_ we're handling
 * a debug fault, and the debug fault hasn't yet been able to
 * clear up the stack. So we first check whether we got  an
 * NMI on the sysenter entry path, but after that we need to
 * check whether we got an NMI on the debug path where the debug
 * fault happened on the sysenter path.
 */
753
KPROBE_ENTRY(nmi)
754
	RING0_INT_FRAME
L
Linus Torvalds 已提交
755
	pushl %eax
756
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
757 758 759
	movl %ss, %eax
	cmpw $__ESPFIX_SS, %ax
	popl %eax
760
	CFI_ADJUST_CFA_OFFSET -4
S
Stas Sergeev 已提交
761
	je nmi_espfix_stack
L
Linus Torvalds 已提交
762 763 764
	cmpl $sysenter_entry,(%esp)
	je nmi_stack_fixup
	pushl %eax
765
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
766 767 768 769 770 771 772
	movl %esp,%eax
	/* Do not access memory above the end of our stack page,
	 * it might not exist.
	 */
	andl $(THREAD_SIZE-1),%eax
	cmpl $(THREAD_SIZE-20),%eax
	popl %eax
773
	CFI_ADJUST_CFA_OFFSET -4
L
Linus Torvalds 已提交
774 775 776 777
	jae nmi_stack_correct
	cmpl $sysenter_entry,12(%esp)
	je nmi_debug_stack_check
nmi_stack_correct:
778
	/* We have a RING0_INT_FRAME here */
L
Linus Torvalds 已提交
779
	pushl %eax
780
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
781 782 783 784
	SAVE_ALL
	xorl %edx,%edx		# zero error code
	movl %esp,%eax		# pt_regs pointer
	call do_nmi
785
	jmp restore_nocheck_notrace
786
	CFI_ENDPROC
L
Linus Torvalds 已提交
787 788

nmi_stack_fixup:
789
	RING0_INT_FRAME
L
Linus Torvalds 已提交
790 791
	FIX_STACK(12,nmi_stack_correct, 1)
	jmp nmi_stack_correct
792

L
Linus Torvalds 已提交
793
nmi_debug_stack_check:
794
	/* We have a RING0_INT_FRAME here */
L
Linus Torvalds 已提交
795 796
	cmpw $__KERNEL_CS,16(%esp)
	jne nmi_stack_correct
797 798
	cmpl $debug,(%esp)
	jb nmi_stack_correct
L
Linus Torvalds 已提交
799
	cmpl $debug_esp_fix_insn,(%esp)
800
	ja nmi_stack_correct
L
Linus Torvalds 已提交
801 802 803
	FIX_STACK(24,nmi_stack_correct, 1)
	jmp nmi_stack_correct

S
Stas Sergeev 已提交
804
nmi_espfix_stack:
805 806 807 808
	/* We have a RING0_INT_FRAME here.
	 *
	 * create the pointer to lss back
	 */
L
Linus Torvalds 已提交
809
	pushl %ss
810
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
811
	pushl %esp
812
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
813 814 815 816
	addw $4, (%esp)
	/* copy the iret frame of 12 bytes */
	.rept 3
	pushl 16(%esp)
817
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
818 819
	.endr
	pushl %eax
820
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
821 822 823 824 825
	SAVE_ALL
	FIXUP_ESPFIX_STACK		# %eax == %esp
	xorl %edx,%edx			# zero error code
	call do_nmi
	RESTORE_REGS
S
Stas Sergeev 已提交
826 827
	lss 12+4(%esp), %esp		# back to espfix stack
	CFI_ADJUST_CFA_OFFSET -24
828
1:	INTERRUPT_RETURN
829
	CFI_ENDPROC
L
Linus Torvalds 已提交
830 831 832 833
.section __ex_table,"a"
	.align 4
	.long 1b,iret_exc
.previous
834
KPROBE_END(nmi)
L
Linus Torvalds 已提交
835

836
KPROBE_ENTRY(int3)
837
	RING0_INT_FRAME
L
Linus Torvalds 已提交
838
	pushl $-1			# mark this as an int
839
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
840 841 842 843 844
	SAVE_ALL
	xorl %edx,%edx		# zero error code
	movl %esp,%eax		# pt_regs pointer
	call do_int3
	jmp ret_from_exception
845
	CFI_ENDPROC
846
KPROBE_END(int3)
L
Linus Torvalds 已提交
847 848

ENTRY(overflow)
849
	RING0_INT_FRAME
L
Linus Torvalds 已提交
850
	pushl $0
851
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
852
	pushl $do_overflow
853
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
854
	jmp error_code
855
	CFI_ENDPROC
L
Linus Torvalds 已提交
856 857

ENTRY(bounds)
858
	RING0_INT_FRAME
L
Linus Torvalds 已提交
859
	pushl $0
860
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
861
	pushl $do_bounds
862
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
863
	jmp error_code
864
	CFI_ENDPROC
L
Linus Torvalds 已提交
865 866

ENTRY(invalid_op)
867
	RING0_INT_FRAME
L
Linus Torvalds 已提交
868
	pushl $0
869
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
870
	pushl $do_invalid_op
871
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
872
	jmp error_code
873
	CFI_ENDPROC
L
Linus Torvalds 已提交
874 875

ENTRY(coprocessor_segment_overrun)
876
	RING0_INT_FRAME
L
Linus Torvalds 已提交
877
	pushl $0
878
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
879
	pushl $do_coprocessor_segment_overrun
880
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
881
	jmp error_code
882
	CFI_ENDPROC
L
Linus Torvalds 已提交
883 884

ENTRY(invalid_TSS)
885
	RING0_EC_FRAME
L
Linus Torvalds 已提交
886
	pushl $do_invalid_TSS
887
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
888
	jmp error_code
889
	CFI_ENDPROC
L
Linus Torvalds 已提交
890 891

ENTRY(segment_not_present)
892
	RING0_EC_FRAME
L
Linus Torvalds 已提交
893
	pushl $do_segment_not_present
894
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
895
	jmp error_code
896
	CFI_ENDPROC
L
Linus Torvalds 已提交
897 898

ENTRY(stack_segment)
899
	RING0_EC_FRAME
L
Linus Torvalds 已提交
900
	pushl $do_stack_segment
901
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
902
	jmp error_code
903
	CFI_ENDPROC
L
Linus Torvalds 已提交
904

905
KPROBE_ENTRY(general_protection)
906
	RING0_EC_FRAME
L
Linus Torvalds 已提交
907
	pushl $do_general_protection
908
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
909
	jmp error_code
910
	CFI_ENDPROC
911
KPROBE_END(general_protection)
L
Linus Torvalds 已提交
912 913

ENTRY(alignment_check)
914
	RING0_EC_FRAME
L
Linus Torvalds 已提交
915
	pushl $do_alignment_check
916
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
917
	jmp error_code
918
	CFI_ENDPROC
L
Linus Torvalds 已提交
919

920 921 922 923 924
ENTRY(divide_error)
	RING0_INT_FRAME
	pushl $0			# no error code
	CFI_ADJUST_CFA_OFFSET 4
	pushl $do_divide_error
925
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
926
	jmp error_code
927
	CFI_ENDPROC
L
Linus Torvalds 已提交
928 929 930

#ifdef CONFIG_X86_MCE
ENTRY(machine_check)
931
	RING0_INT_FRAME
L
Linus Torvalds 已提交
932
	pushl $0
933
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
934
	pushl machine_check_vector
935
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
936
	jmp error_code
937
	CFI_ENDPROC
L
Linus Torvalds 已提交
938 939 940
#endif

ENTRY(spurious_interrupt_bug)
941
	RING0_INT_FRAME
L
Linus Torvalds 已提交
942
	pushl $0
943
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
944
	pushl $do_spurious_interrupt_bug
945
	CFI_ADJUST_CFA_OFFSET 4
L
Linus Torvalds 已提交
946
	jmp error_code
947
	CFI_ENDPROC
L
Linus Torvalds 已提交
948

949 950
#ifdef CONFIG_STACK_UNWIND
ENTRY(arch_unwind_init_running)
951
	CFI_STARTPROC
952 953 954
	movl	4(%esp), %edx
	movl	(%esp), %ecx
	leal	4(%esp), %eax
955
	movl	%ebx, PT_EBX(%edx)
956
	xorl	%ebx, %ebx
957 958 959 960 961 962 963 964
	movl	%ebx, PT_ECX(%edx)
	movl	%ebx, PT_EDX(%edx)
	movl	%esi, PT_ESI(%edx)
	movl	%edi, PT_EDI(%edx)
	movl	%ebp, PT_EBP(%edx)
	movl	%ebx, PT_EAX(%edx)
	movl	$__USER_DS, PT_DS(%edx)
	movl	$__USER_DS, PT_ES(%edx)
965
	movl	$0, PT_GS(%edx)
966 967
	movl	%ebx, PT_ORIG_EAX(%edx)
	movl	%ecx, PT_EIP(%edx)
968
	movl	12(%esp), %ecx
969 970 971
	movl	$__KERNEL_CS, PT_CS(%edx)
	movl	%ebx, PT_EFLAGS(%edx)
	movl	%eax, PT_OLDESP(%edx)
972 973
	movl	8(%esp), %eax
	movl	%ecx, 8(%esp)
974 975
	movl	PT_EBX(%edx), %ebx
	movl	$__KERNEL_DS, PT_OLDSS(%edx)
976
	jmpl	*%eax
977
	CFI_ENDPROC
978 979 980
ENDPROC(arch_unwind_init_running)
#endif

981 982 983 984 985 986 987 988 989 990 991 992 993
ENTRY(kernel_thread_helper)
	pushl $0		# fake return address for unwinder
	CFI_STARTPROC
	movl %edx,%eax
	push %edx
	CFI_ADJUST_CFA_OFFSET 4
	call *%ebx
	push %eax
	CFI_ADJUST_CFA_OFFSET 4
	call do_exit
	CFI_ENDPROC
ENDPROC(kernel_thread_helper)

994
.section .rodata,"a"
995
#include "syscall_table.S"
L
Linus Torvalds 已提交
996 997

syscall_table_size=(.-sys_call_table)