entry_32.S 31.7 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/*
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 */

/*
 * entry.S contains the system-call and fault low-level handling routines.
 * This also contains the timer-interrupt handler, as well as all interrupts
 * and faults that can result in a task-switch.
 *
 * NOTE: This code handles signal-recognition, which happens every time
 * after a timer-interrupt and after each system call.
 *
 * I changed all the .align's to 4 (16 byte alignment), as that's faster
 * on a 486.
 *
17
 * Stack layout in 'syscall_exit':
L
Linus Torvalds 已提交
18 19 20 21 22 23 24 25 26 27 28 29 30 31
 * 	ptrace needs to have all regs on the stack.
 *	if the order here is changed, it needs to be
 *	updated in fork.c:copy_process, signal.c:do_signal,
 *	ptrace.c and ptrace.h
 *
 *	 0(%esp) - %ebx
 *	 4(%esp) - %ecx
 *	 8(%esp) - %edx
 *       C(%esp) - %esi
 *	10(%esp) - %edi
 *	14(%esp) - %ebp
 *	18(%esp) - %eax
 *	1C(%esp) - %ds
 *	20(%esp) - %es
32
 *	24(%esp) - %fs
33 34 35 36 37 38 39
 *	28(%esp) - %gs		saved iff !CONFIG_X86_32_LAZY_GS
 *	2C(%esp) - orig_eax
 *	30(%esp) - %eip
 *	34(%esp) - %cs
 *	38(%esp) - %eflags
 *	3C(%esp) - %oldesp
 *	40(%esp) - %oldss
L
Linus Torvalds 已提交
40 41 42 43 44 45
 *
 * "current" is in register %ebx during any slow entries.
 */

#include <linux/linkage.h>
#include <asm/thread_info.h>
46
#include <asm/irqflags.h>
L
Linus Torvalds 已提交
47 48 49
#include <asm/errno.h>
#include <asm/segment.h>
#include <asm/smp.h>
50
#include <asm/page_types.h>
S
Stas Sergeev 已提交
51
#include <asm/percpu.h>
52
#include <asm/dwarf2.h>
53
#include <asm/processor-flags.h>
54
#include <asm/ftrace.h>
55
#include <asm/irq_vectors.h>
56
#include <asm/cpufeature.h>
L
Linus Torvalds 已提交
57

R
Roland McGrath 已提交
58 59 60 61 62 63 64 65 66 67
/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
#include <linux/elf-em.h>
#define AUDIT_ARCH_I386		(EM_386|__AUDIT_ARCH_LE)
#define __AUDIT_ARCH_LE	   0x40000000

#ifndef CONFIG_AUDITSYSCALL
#define sysenter_audit	syscall_trace_entry
#define sysexit_audit	syscall_exit_work
#endif

J
Jiri Olsa 已提交
68 69
	.section .entry.text, "ax"

70 71 72 73 74
/*
 * We use macros for low-level operations which need to be overridden
 * for paravirtualization.  The following will never clobber any registers:
 *   INTERRUPT_RETURN (aka. "iret")
 *   GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
75
 *   ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
76 77 78 79 80 81 82
 *
 * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
 * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
 * Allowing a register to be clobbered can shrink the paravirt replacement
 * enough to patch inline, increasing performance.
 */

L
Linus Torvalds 已提交
83 84 85
#define nr_syscalls ((syscall_table_size)/4)

#ifdef CONFIG_PREEMPT
86
#define preempt_stop(clobbers)	DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
L
Linus Torvalds 已提交
87
#else
88
#define preempt_stop(clobbers)
89
#define resume_kernel		restore_all
L
Linus Torvalds 已提交
90 91
#endif

92 93
.macro TRACE_IRQS_IRET
#ifdef CONFIG_TRACE_IRQFLAGS
94
	testl $X86_EFLAGS_IF,PT_EFLAGS(%esp)     # interrupts off?
95 96 97 98 99 100
	jz 1f
	TRACE_IRQS_ON
1:
#endif
.endm

101 102 103 104 105 106
#ifdef CONFIG_VM86
#define resume_userspace_sig	check_userspace
#else
#define resume_userspace_sig	resume_userspace
#endif

107 108 109 110 111 112 113 114 115 116 117 118 119
/*
 * User gs save/restore
 *
 * %gs is used for userland TLS and kernel only uses it for stack
 * canary which is required to be at %gs:20 by gcc.  Read the comment
 * at the top of stackprotector.h for more info.
 *
 * Local labels 98 and 99 are used.
 */
#ifdef CONFIG_X86_32_LAZY_GS

 /* unfortunately push/pop can't be no-op */
.macro PUSH_GS
120
	pushl_cfi $0
121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
.endm
.macro POP_GS pop=0
	addl $(4 + \pop), %esp
	CFI_ADJUST_CFA_OFFSET -(4 + \pop)
.endm
.macro POP_GS_EX
.endm

 /* all the rest are no-op */
.macro PTGS_TO_GS
.endm
.macro PTGS_TO_GS_EX
.endm
.macro GS_TO_REG reg
.endm
.macro REG_TO_PTGS reg
.endm
.macro SET_KERNEL_GS reg
.endm

#else	/* CONFIG_X86_32_LAZY_GS */

.macro PUSH_GS
144
	pushl_cfi %gs
145 146 147 148
	/*CFI_REL_OFFSET gs, 0*/
.endm

.macro POP_GS pop=0
149
98:	popl_cfi %gs
150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187
	/*CFI_RESTORE gs*/
  .if \pop <> 0
	add $\pop, %esp
	CFI_ADJUST_CFA_OFFSET -\pop
  .endif
.endm
.macro POP_GS_EX
.pushsection .fixup, "ax"
99:	movl $0, (%esp)
	jmp 98b
.section __ex_table, "a"
	.align 4
	.long 98b, 99b
.popsection
.endm

.macro PTGS_TO_GS
98:	mov PT_GS(%esp), %gs
.endm
.macro PTGS_TO_GS_EX
.pushsection .fixup, "ax"
99:	movl $0, PT_GS(%esp)
	jmp 98b
.section __ex_table, "a"
	.align 4
	.long 98b, 99b
.popsection
.endm

.macro GS_TO_REG reg
	movl %gs, \reg
	/*CFI_REGISTER gs, \reg*/
.endm
.macro REG_TO_PTGS reg
	movl \reg, PT_GS(%esp)
	/*CFI_REL_OFFSET gs, PT_GS*/
.endm
.macro SET_KERNEL_GS reg
188
	movl $(__KERNEL_STACK_CANARY), \reg
189 190 191 192 193
	movl \reg, %gs
.endm

#endif	/* CONFIG_X86_32_LAZY_GS */

194 195
.macro SAVE_ALL
	cld
196
	PUSH_GS
197
	pushl_cfi %fs
198
	/*CFI_REL_OFFSET fs, 0;*/
199
	pushl_cfi %es
200
	/*CFI_REL_OFFSET es, 0;*/
201
	pushl_cfi %ds
202
	/*CFI_REL_OFFSET ds, 0;*/
203
	pushl_cfi %eax
204
	CFI_REL_OFFSET eax, 0
205
	pushl_cfi %ebp
206
	CFI_REL_OFFSET ebp, 0
207
	pushl_cfi %edi
208
	CFI_REL_OFFSET edi, 0
209
	pushl_cfi %esi
210
	CFI_REL_OFFSET esi, 0
211
	pushl_cfi %edx
212
	CFI_REL_OFFSET edx, 0
213
	pushl_cfi %ecx
214
	CFI_REL_OFFSET ecx, 0
215
	pushl_cfi %ebx
216 217 218 219 220
	CFI_REL_OFFSET ebx, 0
	movl $(__USER_DS), %edx
	movl %edx, %ds
	movl %edx, %es
	movl $(__KERNEL_PERCPU), %edx
221
	movl %edx, %fs
222
	SET_KERNEL_GS %edx
223
.endm
L
Linus Torvalds 已提交
224

225
.macro RESTORE_INT_REGS
226
	popl_cfi %ebx
227
	CFI_RESTORE ebx
228
	popl_cfi %ecx
229
	CFI_RESTORE ecx
230
	popl_cfi %edx
231
	CFI_RESTORE edx
232
	popl_cfi %esi
233
	CFI_RESTORE esi
234
	popl_cfi %edi
235
	CFI_RESTORE edi
236
	popl_cfi %ebp
237
	CFI_RESTORE ebp
238
	popl_cfi %eax
239
	CFI_RESTORE eax
240
.endm
L
Linus Torvalds 已提交
241

242
.macro RESTORE_REGS pop=0
243
	RESTORE_INT_REGS
244
1:	popl_cfi %ds
245
	/*CFI_RESTORE ds;*/
246
2:	popl_cfi %es
247
	/*CFI_RESTORE es;*/
248
3:	popl_cfi %fs
249
	/*CFI_RESTORE fs;*/
250
	POP_GS \pop
251 252 253 254 255 256 257 258 259 260 261 262
.pushsection .fixup, "ax"
4:	movl $0, (%esp)
	jmp 1b
5:	movl $0, (%esp)
	jmp 2b
6:	movl $0, (%esp)
	jmp 3b
.section __ex_table, "a"
	.align 4
	.long 1b, 4b
	.long 2b, 5b
	.long 3b, 6b
263
.popsection
264
	POP_GS_EX
265
.endm
L
Linus Torvalds 已提交
266

267 268 269 270 271
.macro RING0_INT_FRAME
	CFI_STARTPROC simple
	CFI_SIGNAL_FRAME
	CFI_DEF_CFA esp, 3*4
	/*CFI_OFFSET cs, -2*4;*/
272
	CFI_OFFSET eip, -3*4
273
.endm
274

275 276 277 278 279
.macro RING0_EC_FRAME
	CFI_STARTPROC simple
	CFI_SIGNAL_FRAME
	CFI_DEF_CFA esp, 4*4
	/*CFI_OFFSET cs, -2*4;*/
280
	CFI_OFFSET eip, -3*4
281
.endm
282

283 284 285 286 287 288 289 290 291 292 293 294 295 296
.macro RING0_PTREGS_FRAME
	CFI_STARTPROC simple
	CFI_SIGNAL_FRAME
	CFI_DEF_CFA esp, PT_OLDESP-PT_EBX
	/*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/
	CFI_OFFSET eip, PT_EIP-PT_OLDESP
	/*CFI_OFFSET es, PT_ES-PT_OLDESP;*/
	/*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/
	CFI_OFFSET eax, PT_EAX-PT_OLDESP
	CFI_OFFSET ebp, PT_EBP-PT_OLDESP
	CFI_OFFSET edi, PT_EDI-PT_OLDESP
	CFI_OFFSET esi, PT_ESI-PT_OLDESP
	CFI_OFFSET edx, PT_EDX-PT_OLDESP
	CFI_OFFSET ecx, PT_ECX-PT_OLDESP
297
	CFI_OFFSET ebx, PT_EBX-PT_OLDESP
298
.endm
L
Linus Torvalds 已提交
299 300

ENTRY(ret_from_fork)
301
	CFI_STARTPROC
302
	pushl_cfi %eax
L
Linus Torvalds 已提交
303 304
	call schedule_tail
	GET_THREAD_INFO(%ebp)
305 306 307
	popl_cfi %eax
	pushl_cfi $0x0202		# Reset kernel eflags
	popfl_cfi
L
Linus Torvalds 已提交
308
	jmp syscall_exit
309
	CFI_ENDPROC
310
END(ret_from_fork)
L
Linus Torvalds 已提交
311

312 313 314 315
/*
 * Interrupt exit functions should be protected against kprobes
 */
	.pushsection .kprobes.text, "ax"
L
Linus Torvalds 已提交
316 317 318 319 320 321 322 323 324
/*
 * Return to user mode is not as complex as all this looks,
 * but we want the default path for a system call return to
 * go as quickly as possible which is why some of this is
 * less clear than it otherwise should be.
 */

	# userspace resumption stub bypassing syscall exit tracing
	ALIGN
325
	RING0_PTREGS_FRAME
L
Linus Torvalds 已提交
326
ret_from_exception:
327
	preempt_stop(CLBR_ANY)
L
Linus Torvalds 已提交
328 329
ret_from_intr:
	GET_THREAD_INFO(%ebp)
330
check_userspace:
331 332
	movl PT_EFLAGS(%esp), %eax	# mix EFLAGS and CS
	movb PT_CS(%esp), %al
333
	andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
334 335
	cmpl $USER_RPL, %eax
	jb resume_kernel		# not returning to v8086 or userspace
336

L
Linus Torvalds 已提交
337
ENTRY(resume_userspace)
338
	LOCKDEP_SYS_EXIT
339
 	DISABLE_INTERRUPTS(CLBR_ANY)	# make sure we don't miss an interrupt
L
Linus Torvalds 已提交
340 341
					# setting need_resched or sigpending
					# between sampling and the iret
342
	TRACE_IRQS_OFF
L
Linus Torvalds 已提交
343 344 345 346 347
	movl TI_flags(%ebp), %ecx
	andl $_TIF_WORK_MASK, %ecx	# is there any work to be done on
					# int/exception return?
	jne work_pending
	jmp restore_all
348
END(ret_from_exception)
L
Linus Torvalds 已提交
349 350 351

#ifdef CONFIG_PREEMPT
ENTRY(resume_kernel)
352
	DISABLE_INTERRUPTS(CLBR_ANY)
L
Linus Torvalds 已提交
353
	cmpl $0,TI_preempt_count(%ebp)	# non-zero preempt_count ?
354
	jnz restore_all
L
Linus Torvalds 已提交
355 356 357 358
need_resched:
	movl TI_flags(%ebp), %ecx	# need_resched set ?
	testb $_TIF_NEED_RESCHED, %cl
	jz restore_all
359
	testl $X86_EFLAGS_IF,PT_EFLAGS(%esp)	# interrupts off (exception path) ?
L
Linus Torvalds 已提交
360 361 362
	jz restore_all
	call preempt_schedule_irq
	jmp need_resched
363
END(resume_kernel)
L
Linus Torvalds 已提交
364
#endif
365
	CFI_ENDPROC
366 367 368 369
/*
 * End of kprobes section
 */
	.popsection
L
Linus Torvalds 已提交
370 371 372 373 374

/* SYSENTER_RETURN points to after the "sysenter" instruction in
   the vsyscall page.  See vsyscall-sysentry.S, which defines the symbol.  */

	# sysenter call handler stub
R
Roland McGrath 已提交
375
ENTRY(ia32_sysenter_target)
376
	CFI_STARTPROC simple
377
	CFI_SIGNAL_FRAME
378 379
	CFI_DEF_CFA esp, 0
	CFI_REGISTER esp, ebp
380
	movl TSS_sysenter_sp0(%esp),%esp
L
Linus Torvalds 已提交
381
sysenter_past_esp:
382
	/*
383 384 385
	 * Interrupts are disabled here, but we can't trace it until
	 * enough kernel state to call TRACE_IRQS_OFF can be called - but
	 * we immediately enable interrupts at that point anyway.
386
	 */
387
	pushl_cfi $__USER_DS
388
	/*CFI_REL_OFFSET ss, 0*/
389
	pushl_cfi %ebp
390
	CFI_REL_OFFSET esp, 0
391
	pushfl_cfi
392
	orl $X86_EFLAGS_IF, (%esp)
393
	pushl_cfi $__USER_CS
394
	/*CFI_REL_OFFSET cs, 0*/
395 396 397 398 399
	/*
	 * Push current_thread_info()->sysenter_return to the stack.
	 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
	 * pushed above; +8 corresponds to copy_thread's esp0 setting.
	 */
400
	pushl_cfi ((TI_sysenter_return)-THREAD_SIZE+8+4*4)(%esp)
401
	CFI_REL_OFFSET eip, 0
L
Linus Torvalds 已提交
402

403
	pushl_cfi %eax
404 405 406
	SAVE_ALL
	ENABLE_INTERRUPTS(CLBR_NONE)

L
Linus Torvalds 已提交
407 408 409 410 411 412 413
/*
 * Load the potential sixth argument from user stack.
 * Careful about security.
 */
	cmpl $__PAGE_OFFSET-3,%ebp
	jae syscall_fault
1:	movl (%ebp),%ebp
414
	movl %ebp,PT_EBP(%esp)
L
Linus Torvalds 已提交
415 416 417 418 419 420 421
.section __ex_table,"a"
	.align 4
	.long 1b,syscall_fault
.previous

	GET_THREAD_INFO(%ebp)

422
	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
R
Roland McGrath 已提交
423 424
	jnz sysenter_audit
sysenter_do_call:
L
Linus Torvalds 已提交
425 426 427
	cmpl $(nr_syscalls), %eax
	jae syscall_badsys
	call *sys_call_table(,%eax,4)
428
	movl %eax,PT_EAX(%esp)
429
	LOCKDEP_SYS_EXIT
430
	DISABLE_INTERRUPTS(CLBR_ANY)
431
	TRACE_IRQS_OFF
L
Linus Torvalds 已提交
432
	movl TI_flags(%ebp), %ecx
433
	testl $_TIF_ALLWORK_MASK, %ecx
R
Roland McGrath 已提交
434 435
	jne sysexit_audit
sysenter_exit:
L
Linus Torvalds 已提交
436
/* if something modifies registers it must also disable sysexit */
437 438
	movl PT_EIP(%esp), %edx
	movl PT_OLDESP(%esp), %ecx
L
Linus Torvalds 已提交
439
	xorl %ebp,%ebp
440
	TRACE_IRQS_ON
441
1:	mov  PT_FS(%esp), %fs
442
	PTGS_TO_GS
443
	ENABLE_INTERRUPTS_SYSEXIT
R
Roland McGrath 已提交
444 445 446

#ifdef CONFIG_AUDITSYSCALL
sysenter_audit:
447
	testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
R
Roland McGrath 已提交
448 449 450 451 452 453 454 455 456 457
	jnz syscall_trace_entry
	addl $4,%esp
	CFI_ADJUST_CFA_OFFSET -4
	/* %esi already in 8(%esp)	   6th arg: 4th syscall arg */
	/* %edx already in 4(%esp)	   5th arg: 3rd syscall arg */
	/* %ecx already in 0(%esp)	   4th arg: 2nd syscall arg */
	movl %ebx,%ecx			/* 3rd arg: 1st syscall arg */
	movl %eax,%edx			/* 2nd arg: syscall number */
	movl $AUDIT_ARCH_I386,%eax	/* 1st arg: audit arch */
	call audit_syscall_entry
458
	pushl_cfi %ebx
R
Roland McGrath 已提交
459 460 461 462
	movl PT_EAX(%esp),%eax		/* reload syscall number */
	jmp sysenter_do_call

sysexit_audit:
463
	testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
R
Roland McGrath 已提交
464 465 466 467 468 469 470 471 472 473 474 475
	jne syscall_exit_work
	TRACE_IRQS_ON
	ENABLE_INTERRUPTS(CLBR_ANY)
	movl %eax,%edx		/* second arg, syscall return value */
	cmpl $0,%eax		/* is it < 0? */
	setl %al		/* 1 if so, 0 if not */
	movzbl %al,%eax		/* zero-extend that */
	inc %eax /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
	call audit_syscall_exit
	DISABLE_INTERRUPTS(CLBR_ANY)
	TRACE_IRQS_OFF
	movl TI_flags(%ebp), %ecx
476
	testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
R
Roland McGrath 已提交
477 478 479 480 481
	jne syscall_exit_work
	movl PT_EAX(%esp),%eax	/* reload syscall return value */
	jmp sysenter_exit
#endif

482
	CFI_ENDPROC
483
.pushsection .fixup,"ax"
484
2:	movl $0,PT_FS(%esp)
485 486 487 488 489
	jmp 1b
.section __ex_table,"a"
	.align 4
	.long 1b,2b
.popsection
490
	PTGS_TO_GS_EX
R
Roland McGrath 已提交
491
ENDPROC(ia32_sysenter_target)
L
Linus Torvalds 已提交
492

493 494 495 496
/*
 * syscall stub including irq exit should be protected against kprobes
 */
	.pushsection .kprobes.text, "ax"
L
Linus Torvalds 已提交
497 498
	# system call handler stub
ENTRY(system_call)
499
	RING0_INT_FRAME			# can't unwind into user space anyway
500
	pushl_cfi %eax			# save orig_eax
L
Linus Torvalds 已提交
501 502
	SAVE_ALL
	GET_THREAD_INFO(%ebp)
503
					# system call tracing in operation / emulation
504
	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
L
Linus Torvalds 已提交
505 506 507 508 509
	jnz syscall_trace_entry
	cmpl $(nr_syscalls), %eax
	jae syscall_badsys
syscall_call:
	call *sys_call_table(,%eax,4)
510
	movl %eax,PT_EAX(%esp)		# store the return value
L
Linus Torvalds 已提交
511
syscall_exit:
512
	LOCKDEP_SYS_EXIT
513
	DISABLE_INTERRUPTS(CLBR_ANY)	# make sure we don't miss an interrupt
L
Linus Torvalds 已提交
514 515
					# setting need_resched or sigpending
					# between sampling and the iret
516
	TRACE_IRQS_OFF
L
Linus Torvalds 已提交
517
	movl TI_flags(%ebp), %ecx
518
	testl $_TIF_ALLWORK_MASK, %ecx	# current->work
L
Linus Torvalds 已提交
519 520 521
	jne syscall_exit_work

restore_all:
522 523
	TRACE_IRQS_IRET
restore_all_notrace:
524 525
	movl PT_EFLAGS(%esp), %eax	# mix EFLAGS, SS and CS
	# Warning: PT_OLDSS(%esp) contains the wrong/random values if we
526 527
	# are returning to the kernel.
	# See comments in process.c:copy_thread() for details.
528 529
	movb PT_OLDSS(%esp), %ah
	movb PT_CS(%esp), %al
530
	andl $(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
531
	cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
532
	CFI_REMEMBER_STATE
L
Linus Torvalds 已提交
533 534
	je ldt_ss			# returning to user-space with LDT SS
restore_nocheck:
535
	RESTORE_REGS 4			# skip orig_eax/error_code
A
Adrian Bunk 已提交
536
irq_return:
I
Ingo Molnar 已提交
537
	INTERRUPT_RETURN
L
Linus Torvalds 已提交
538
.section .fixup,"ax"
539
ENTRY(iret_exc)
540 541 542
	pushl $0			# no error code
	pushl $do_iret_error
	jmp error_code
L
Linus Torvalds 已提交
543 544 545
.previous
.section __ex_table,"a"
	.align 4
I
Ingo Molnar 已提交
546
	.long irq_return,iret_exc
L
Linus Torvalds 已提交
547 548
.previous

549
	CFI_RESTORE_STATE
L
Linus Torvalds 已提交
550
ldt_ss:
551
	larl PT_OLDSS(%esp), %eax
L
Linus Torvalds 已提交
552 553 554
	jnz restore_nocheck
	testl $0x00400000, %eax		# returning to 32bit stack?
	jnz restore_nocheck		# allright, normal return
555 556 557 558 559 560 561 562 563 564

#ifdef CONFIG_PARAVIRT
	/*
	 * The kernel can't run on a non-flat stack if paravirt mode
	 * is active.  Rather than try to fixup the high bits of
	 * ESP, bypass this code entirely.  This may break DOSemu
	 * and/or Wine support in a paravirt VM, although the option
	 * is still available to implement the setting of the high
	 * 16-bits in the INTERRUPT_RETURN paravirt-op.
	 */
565
	cmpl $0, pv_info+PARAVIRT_enabled
566 567 568
	jne restore_nocheck
#endif

569 570 571 572 573 574 575 576 577 578 579
/*
 * Setup and switch to ESPFIX stack
 *
 * We're returning to userspace with a 16 bit stack. The CPU will not
 * restore the high word of ESP for us on executing iret... This is an
 * "official" bug of all the x86-compatible CPUs, which we can work
 * around to make dosemu and wine happy. We do this by preloading the
 * high word of ESP with the high word of the userspace ESP while
 * compensating for the offset by changing to the ESPFIX segment with
 * a base address that matches for the difference.
 */
580
#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8)
581 582 583 584 585
	mov %esp, %edx			/* load kernel esp */
	mov PT_OLDESP(%esp), %eax	/* load userspace esp */
	mov %dx, %ax			/* eax: new kernel esp */
	sub %eax, %edx			/* offset (low word is 0) */
	shr $16, %edx
586 587
	mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */
	mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */
588 589
	pushl_cfi $__ESPFIX_SS
	pushl_cfi %eax			/* new kernel esp */
590 591 592
	/* Disable interrupts, but do not irqtrace this section: we
	 * will soon execute iret and the tracer was already set to
	 * the irqstate after the iret */
593
	DISABLE_INTERRUPTS(CLBR_EAX)
594
	lss (%esp), %esp		/* switch to espfix segment */
S
Stas Sergeev 已提交
595 596
	CFI_ADJUST_CFA_OFFSET -8
	jmp restore_nocheck
597
	CFI_ENDPROC
598
ENDPROC(system_call)
L
Linus Torvalds 已提交
599 600 601

	# perform work that needs to be done immediately before resumption
	ALIGN
602
	RING0_PTREGS_FRAME		# can't unwind into user space anyway
L
Linus Torvalds 已提交
603 604 605 606 607
work_pending:
	testb $_TIF_NEED_RESCHED, %cl
	jz work_notifysig
work_resched:
	call schedule
608
	LOCKDEP_SYS_EXIT
609
	DISABLE_INTERRUPTS(CLBR_ANY)	# make sure we don't miss an interrupt
L
Linus Torvalds 已提交
610 611
					# setting need_resched or sigpending
					# between sampling and the iret
612
	TRACE_IRQS_OFF
L
Linus Torvalds 已提交
613 614 615 616 617 618 619 620 621
	movl TI_flags(%ebp), %ecx
	andl $_TIF_WORK_MASK, %ecx	# is there any work to be done other
					# than syscall tracing?
	jz restore_all
	testb $_TIF_NEED_RESCHED, %cl
	jnz work_resched

work_notifysig:				# deal with pending signals and
					# notify-resume requests
622
#ifdef CONFIG_VM86
623
	testl $X86_EFLAGS_VM, PT_EFLAGS(%esp)
L
Linus Torvalds 已提交
624 625 626 627 628
	movl %esp, %eax
	jne work_notifysig_v86		# returning to kernel-space or
					# vm86-space
	xorl %edx, %edx
	call do_notify_resume
629
	jmp resume_userspace_sig
L
Linus Torvalds 已提交
630 631 632

	ALIGN
work_notifysig_v86:
633
	pushl_cfi %ecx			# save ti_flags for do_notify_resume
L
Linus Torvalds 已提交
634
	call save_v86_state		# %eax contains pt_regs pointer
635
	popl_cfi %ecx
L
Linus Torvalds 已提交
636
	movl %eax, %esp
637 638 639
#else
	movl %esp, %eax
#endif
L
Linus Torvalds 已提交
640 641
	xorl %edx, %edx
	call do_notify_resume
642
	jmp resume_userspace_sig
643
END(work_pending)
L
Linus Torvalds 已提交
644 645 646 647

	# perform syscall exit tracing
	ALIGN
syscall_trace_entry:
648
	movl $-ENOSYS,PT_EAX(%esp)
L
Linus Torvalds 已提交
649
	movl %esp, %eax
650 651
	call syscall_trace_enter
	/* What it returned is what we'll actually use.  */
L
Linus Torvalds 已提交
652 653 654
	cmpl $(nr_syscalls), %eax
	jnae syscall_call
	jmp syscall_exit
655
END(syscall_trace_entry)
L
Linus Torvalds 已提交
656 657 658 659

	# perform syscall exit tracing
	ALIGN
syscall_exit_work:
660
	testl $_TIF_WORK_SYSCALL_EXIT, %ecx
L
Linus Torvalds 已提交
661
	jz work_pending
662
	TRACE_IRQS_ON
663
	ENABLE_INTERRUPTS(CLBR_ANY)	# could let syscall_trace_leave() call
L
Linus Torvalds 已提交
664 665
					# schedule() instead
	movl %esp, %eax
666
	call syscall_trace_leave
L
Linus Torvalds 已提交
667
	jmp resume_userspace
668
END(syscall_exit_work)
669
	CFI_ENDPROC
L
Linus Torvalds 已提交
670

671
	RING0_INT_FRAME			# can't unwind into user space anyway
L
Linus Torvalds 已提交
672 673
syscall_fault:
	GET_THREAD_INFO(%ebp)
674
	movl $-EFAULT,PT_EAX(%esp)
L
Linus Torvalds 已提交
675
	jmp resume_userspace
676
END(syscall_fault)
L
Linus Torvalds 已提交
677 678

syscall_badsys:
679
	movl $-ENOSYS,PT_EAX(%esp)
L
Linus Torvalds 已提交
680
	jmp resume_userspace
681
END(syscall_badsys)
682
	CFI_ENDPROC
683 684 685 686
/*
 * End of kprobes section
 */
	.popsection
L
Linus Torvalds 已提交
687

688 689 690
/*
 * System calls that need a pt_regs pointer.
 */
B
Brian Gerst 已提交
691
#define PTREGSCALL0(name) \
692 693 694 695 696
	ALIGN; \
ptregs_##name: \
	leal 4(%esp),%eax; \
	jmp sys_##name;

B
Brian Gerst 已提交
697 698 699 700
#define PTREGSCALL1(name) \
	ALIGN; \
ptregs_##name: \
	leal 4(%esp),%edx; \
701
	movl (PT_EBX+4)(%esp),%eax; \
B
Brian Gerst 已提交
702 703 704 705 706 707
	jmp sys_##name;

#define PTREGSCALL2(name) \
	ALIGN; \
ptregs_##name: \
	leal 4(%esp),%ecx; \
708 709
	movl (PT_ECX+4)(%esp),%edx; \
	movl (PT_EBX+4)(%esp),%eax; \
B
Brian Gerst 已提交
710 711 712 713 714
	jmp sys_##name;

#define PTREGSCALL3(name) \
	ALIGN; \
ptregs_##name: \
715
	CFI_STARTPROC; \
B
Brian Gerst 已提交
716
	leal 4(%esp),%eax; \
717
	pushl_cfi %eax; \
B
Brian Gerst 已提交
718 719 720 721 722
	movl PT_EDX(%eax),%ecx; \
	movl PT_ECX(%eax),%edx; \
	movl PT_EBX(%eax),%eax; \
	call sys_##name; \
	addl $4,%esp; \
723 724 725 726
	CFI_ADJUST_CFA_OFFSET -4; \
	ret; \
	CFI_ENDPROC; \
ENDPROC(ptregs_##name)
B
Brian Gerst 已提交
727

B
Brian Gerst 已提交
728
PTREGSCALL1(iopl)
B
Brian Gerst 已提交
729 730
PTREGSCALL0(fork)
PTREGSCALL0(vfork)
B
Brian Gerst 已提交
731
PTREGSCALL3(execve)
B
Brian Gerst 已提交
732
PTREGSCALL2(sigaltstack)
B
Brian Gerst 已提交
733 734
PTREGSCALL0(sigreturn)
PTREGSCALL0(rt_sigreturn)
735 736
PTREGSCALL2(vm86)
PTREGSCALL1(vm86old)
737

B
Brian Gerst 已提交
738 739 740
/* Clone is an oddball.  The 4th arg is in %edi */
	ALIGN;
ptregs_clone:
741
	CFI_STARTPROC
B
Brian Gerst 已提交
742
	leal 4(%esp),%eax
743 744
	pushl_cfi %eax
	pushl_cfi PT_EDI(%eax)
B
Brian Gerst 已提交
745 746 747 748 749
	movl PT_EDX(%eax),%ecx
	movl PT_ECX(%eax),%edx
	movl PT_EBX(%eax),%eax
	call sys_clone
	addl $8,%esp
750
	CFI_ADJUST_CFA_OFFSET -8
B
Brian Gerst 已提交
751
	ret
752 753
	CFI_ENDPROC
ENDPROC(ptregs_clone)
B
Brian Gerst 已提交
754

755
.macro FIXUP_ESPFIX_STACK
756 757 758 759 760 761 762 763
/*
 * Switch back for ESPFIX stack to the normal zerobased stack
 *
 * We can't call C functions using the ESPFIX stack. This code reads
 * the high word of the segment base from the GDT and swiches to the
 * normal stack and adjusts ESP with the matching offset.
 */
	/* fixup the stack */
764 765
	mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */
	mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */
766 767
	shl $16, %eax
	addl %esp, %eax			/* the adjusted stack pointer */
768 769
	pushl_cfi $__KERNEL_DS
	pushl_cfi %eax
770
	lss (%esp), %esp		/* switch to the normal stack segment */
771 772 773 774 775 776 777 778 779 780 781 782 783 784
	CFI_ADJUST_CFA_OFFSET -8
.endm
.macro UNWIND_ESPFIX_STACK
	movl %ss, %eax
	/* see if on espfix stack */
	cmpw $__ESPFIX_SS, %ax
	jne 27f
	movl $__KERNEL_DS, %eax
	movl %eax, %ds
	movl %eax, %es
	/* switch to normal stack */
	FIXUP_ESPFIX_STACK
27:
.endm
L
Linus Torvalds 已提交
785 786

/*
787 788 789
 * Build the entry stubs and pointer table with some assembler magic.
 * We pack 7 stubs into a single 32-byte chunk, which will fit in a
 * single cache line on all modern x86 implementations.
L
Linus Torvalds 已提交
790
 */
791
.section .init.rodata,"a"
L
Linus Torvalds 已提交
792
ENTRY(interrupt)
J
Jiri Olsa 已提交
793
.section .entry.text, "ax"
794 795
	.p2align 5
	.p2align CONFIG_X86_L1_CACHE_SHIFT
L
Linus Torvalds 已提交
796
ENTRY(irq_entries_start)
797
	RING0_INT_FRAME
798
vector=FIRST_EXTERNAL_VECTOR
799 800 801 802
.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7
	.balign 32
  .rept	7
    .if vector < NR_VECTORS
803
      .if vector <> FIRST_EXTERNAL_VECTOR
804
	CFI_ADJUST_CFA_OFFSET -4
805
      .endif
806
1:	pushl_cfi $(~vector+0x80)	/* Note: always in signed byte range */
807
      .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
808 809 810
	jmp 2f
      .endif
      .previous
L
Linus Torvalds 已提交
811
	.long 1b
J
Jiri Olsa 已提交
812
      .section .entry.text, "ax"
L
Linus Torvalds 已提交
813
vector=vector+1
814 815 816
    .endif
  .endr
2:	jmp common_interrupt
L
Linus Torvalds 已提交
817
.endr
818 819 820 821 822
END(irq_entries_start)

.previous
END(interrupt)
.previous
L
Linus Torvalds 已提交
823

824 825 826 827
/*
 * the CPU automatically disables interrupts when executing an IRQ vector,
 * so IRQ-flags tracing has to follow that:
 */
828
	.p2align CONFIG_X86_L1_CACHE_SHIFT
L
Linus Torvalds 已提交
829
common_interrupt:
830
	addl $-0x80,(%esp)	/* Adjust vector into the [-256,-1] range */
L
Linus Torvalds 已提交
831
	SAVE_ALL
832
	TRACE_IRQS_OFF
L
Linus Torvalds 已提交
833 834 835
	movl %esp,%eax
	call do_IRQ
	jmp ret_from_intr
836
ENDPROC(common_interrupt)
837
	CFI_ENDPROC
L
Linus Torvalds 已提交
838

839 840 841 842
/*
 *  Irq entries should be protected against kprobes
 */
	.pushsection .kprobes.text, "ax"
T
Tejun Heo 已提交
843
#define BUILD_INTERRUPT3(name, nr, fn)	\
L
Linus Torvalds 已提交
844
ENTRY(name)				\
845
	RING0_INT_FRAME;		\
846
	pushl_cfi $~(nr);		\
847
	SAVE_ALL;			\
848
	TRACE_IRQS_OFF			\
L
Linus Torvalds 已提交
849
	movl %esp,%eax;			\
T
Tejun Heo 已提交
850
	call fn;			\
851
	jmp ret_from_intr;		\
852 853
	CFI_ENDPROC;			\
ENDPROC(name)
L
Linus Torvalds 已提交
854

T
Tejun Heo 已提交
855 856
#define BUILD_INTERRUPT(name, nr)	BUILD_INTERRUPT3(name, nr, smp_##name)

L
Linus Torvalds 已提交
857
/* The include is where all of the SMP etc. interrupts come from */
858
#include <asm/entry_arch.h>
L
Linus Torvalds 已提交
859 860

ENTRY(coprocessor_error)
861
	RING0_INT_FRAME
862 863
	pushl_cfi $0
	pushl_cfi $do_coprocessor_error
L
Linus Torvalds 已提交
864
	jmp error_code
865
	CFI_ENDPROC
866
END(coprocessor_error)
L
Linus Torvalds 已提交
867 868

ENTRY(simd_coprocessor_error)
869
	RING0_INT_FRAME
870
	pushl_cfi $0
871 872
#ifdef CONFIG_X86_INVD_BUG
	/* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */
873
661:	pushl_cfi $do_general_protection
874 875 876 877 878
662:
.section .altinstructions,"a"
	.balign 4
	.long 661b
	.long 663f
879
	.word X86_FEATURE_XMM
880 881 882 883 884 885 886 887
	.byte 662b-661b
	.byte 664f-663f
.previous
.section .altinstr_replacement,"ax"
663:	pushl $do_simd_coprocessor_error
664:
.previous
#else
888
	pushl_cfi $do_simd_coprocessor_error
889
#endif
L
Linus Torvalds 已提交
890
	jmp error_code
891
	CFI_ENDPROC
892
END(simd_coprocessor_error)
L
Linus Torvalds 已提交
893 894

ENTRY(device_not_available)
895
	RING0_INT_FRAME
896 897
	pushl_cfi $-1			# mark this as an int
	pushl_cfi $do_device_not_available
898
	jmp error_code
899
	CFI_ENDPROC
900
END(device_not_available)
L
Linus Torvalds 已提交
901

902 903
#ifdef CONFIG_PARAVIRT
ENTRY(native_iret)
I
Ingo Molnar 已提交
904
	iret
905 906
.section __ex_table,"a"
	.align 4
I
Ingo Molnar 已提交
907
	.long native_iret, iret_exc
908
.previous
909
END(native_iret)
910

911
ENTRY(native_irq_enable_sysexit)
912 913
	sti
	sysexit
914
END(native_irq_enable_sysexit)
915 916
#endif

L
Linus Torvalds 已提交
917
ENTRY(overflow)
918
	RING0_INT_FRAME
919 920
	pushl_cfi $0
	pushl_cfi $do_overflow
L
Linus Torvalds 已提交
921
	jmp error_code
922
	CFI_ENDPROC
923
END(overflow)
L
Linus Torvalds 已提交
924 925

ENTRY(bounds)
926
	RING0_INT_FRAME
927 928
	pushl_cfi $0
	pushl_cfi $do_bounds
L
Linus Torvalds 已提交
929
	jmp error_code
930
	CFI_ENDPROC
931
END(bounds)
L
Linus Torvalds 已提交
932 933

ENTRY(invalid_op)
934
	RING0_INT_FRAME
935 936
	pushl_cfi $0
	pushl_cfi $do_invalid_op
L
Linus Torvalds 已提交
937
	jmp error_code
938
	CFI_ENDPROC
939
END(invalid_op)
L
Linus Torvalds 已提交
940 941

ENTRY(coprocessor_segment_overrun)
942
	RING0_INT_FRAME
943 944
	pushl_cfi $0
	pushl_cfi $do_coprocessor_segment_overrun
L
Linus Torvalds 已提交
945
	jmp error_code
946
	CFI_ENDPROC
947
END(coprocessor_segment_overrun)
L
Linus Torvalds 已提交
948 949

ENTRY(invalid_TSS)
950
	RING0_EC_FRAME
951
	pushl_cfi $do_invalid_TSS
L
Linus Torvalds 已提交
952
	jmp error_code
953
	CFI_ENDPROC
954
END(invalid_TSS)
L
Linus Torvalds 已提交
955 956

ENTRY(segment_not_present)
957
	RING0_EC_FRAME
958
	pushl_cfi $do_segment_not_present
L
Linus Torvalds 已提交
959
	jmp error_code
960
	CFI_ENDPROC
961
END(segment_not_present)
L
Linus Torvalds 已提交
962 963

ENTRY(stack_segment)
964
	RING0_EC_FRAME
965
	pushl_cfi $do_stack_segment
L
Linus Torvalds 已提交
966
	jmp error_code
967
	CFI_ENDPROC
968
END(stack_segment)
L
Linus Torvalds 已提交
969 970

ENTRY(alignment_check)
971
	RING0_EC_FRAME
972
	pushl_cfi $do_alignment_check
L
Linus Torvalds 已提交
973
	jmp error_code
974
	CFI_ENDPROC
975
END(alignment_check)
L
Linus Torvalds 已提交
976

977 978
ENTRY(divide_error)
	RING0_INT_FRAME
979 980
	pushl_cfi $0			# no error code
	pushl_cfi $do_divide_error
L
Linus Torvalds 已提交
981
	jmp error_code
982
	CFI_ENDPROC
983
END(divide_error)
L
Linus Torvalds 已提交
984 985 986

#ifdef CONFIG_X86_MCE
ENTRY(machine_check)
987
	RING0_INT_FRAME
988 989
	pushl_cfi $0
	pushl_cfi machine_check_vector
L
Linus Torvalds 已提交
990
	jmp error_code
991
	CFI_ENDPROC
992
END(machine_check)
L
Linus Torvalds 已提交
993 994 995
#endif

ENTRY(spurious_interrupt_bug)
996
	RING0_INT_FRAME
997 998
	pushl_cfi $0
	pushl_cfi $do_spurious_interrupt_bug
L
Linus Torvalds 已提交
999
	jmp error_code
1000
	CFI_ENDPROC
1001
END(spurious_interrupt_bug)
1002 1003 1004 1005
/*
 * End of kprobes section
 */
	.popsection
L
Linus Torvalds 已提交
1006

1007 1008 1009
ENTRY(kernel_thread_helper)
	pushl $0		# fake return address for unwinder
	CFI_STARTPROC
1010 1011
	movl %edi,%eax
	call *%esi
1012
	call do_exit
1013
	ud2			# padding for call trace
1014 1015 1016
	CFI_ENDPROC
ENDPROC(kernel_thread_helper)

1017
#ifdef CONFIG_XEN
1018 1019 1020 1021 1022
/* Xen doesn't set %esp to be precisely what the normal sysenter
   entrypoint expects, so fix it up before using the normal path. */
ENTRY(xen_sysenter_target)
	RING0_INT_FRAME
	addl $5*4, %esp		/* remove xen-provided frame */
1023
	CFI_ADJUST_CFA_OFFSET -5*4
1024
	jmp sysenter_past_esp
G
Glauber Costa 已提交
1025
	CFI_ENDPROC
1026

1027 1028
ENTRY(xen_hypervisor_callback)
	CFI_STARTPROC
1029
	pushl_cfi $0
1030 1031
	SAVE_ALL
	TRACE_IRQS_OFF
1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043

	/* Check to see if we got the event in the critical
	   region in xen_iret_direct, after we've reenabled
	   events and checked for pending events.  This simulates
	   iret instruction's behaviour where it delivers a
	   pending interrupt when enabling interrupts. */
	movl PT_EIP(%esp),%eax
	cmpl $xen_iret_start_crit,%eax
	jb   1f
	cmpl $xen_iret_end_crit,%eax
	jae  1f

J
Jeremy Fitzhardinge 已提交
1044
	jmp  xen_iret_crit_fixup
1045 1046

ENTRY(xen_do_upcall)
1047
1:	mov %esp, %eax
1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064
	call xen_evtchn_do_upcall
	jmp  ret_from_intr
	CFI_ENDPROC
ENDPROC(xen_hypervisor_callback)

# Hypervisor uses this for application faults while it executes.
# We get here for two reasons:
#  1. Fault while reloading DS, ES, FS or GS
#  2. Fault while executing IRET
# Category 1 we fix up by reattempting the load, and zeroing the segment
# register if the load fails.
# Category 2 we fix up by jumping to do_iret_error. We cannot use the
# normal Linux return path in this case because if we use the IRET hypercall
# to pop the stack frame we end up in an infinite loop of failsafe callbacks.
# We distinguish between categories by maintaining a status value in EAX.
ENTRY(xen_failsafe_callback)
	CFI_STARTPROC
1065
	pushl_cfi %eax
1066 1067 1068 1069 1070 1071
	movl $1,%eax
1:	mov 4(%esp),%ds
2:	mov 8(%esp),%es
3:	mov 12(%esp),%fs
4:	mov 16(%esp),%gs
	testl %eax,%eax
1072
	popl_cfi %eax
1073 1074 1075 1076 1077
	lea 16(%esp),%esp
	CFI_ADJUST_CFA_OFFSET -16
	jz 5f
	addl $16,%esp
	jmp iret_exc		# EAX != 0 => Category 2 (Bad IRET)
1078
5:	pushl_cfi $0		# EAX == 0 => Category 1 (Bad segment)
1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105
	SAVE_ALL
	jmp ret_from_exception
	CFI_ENDPROC

.section .fixup,"ax"
6:	xorl %eax,%eax
	movl %eax,4(%esp)
	jmp 1b
7:	xorl %eax,%eax
	movl %eax,8(%esp)
	jmp 2b
8:	xorl %eax,%eax
	movl %eax,12(%esp)
	jmp 3b
9:	xorl %eax,%eax
	movl %eax,16(%esp)
	jmp 4b
.previous
.section __ex_table,"a"
	.align 4
	.long 1b,6b
	.long 2b,7b
	.long 3b,8b
	.long 4b,9b
.previous
ENDPROC(xen_failsafe_callback)

1106 1107 1108
BUILD_INTERRUPT3(xen_hvm_callback_vector, XEN_HVM_EVTCHN_CALLBACK,
		xen_evtchn_do_upcall)

1109 1110
#endif	/* CONFIG_XEN */

1111
#ifdef CONFIG_FUNCTION_TRACER
1112 1113 1114 1115 1116 1117 1118
#ifdef CONFIG_DYNAMIC_FTRACE

ENTRY(mcount)
	ret
END(mcount)

ENTRY(ftrace_caller)
1119 1120 1121
	cmpl $0, function_trace_stop
	jne  ftrace_stub

1122 1123 1124 1125 1126
	pushl %eax
	pushl %ecx
	pushl %edx
	movl 0xc(%esp), %eax
	movl 0x4(%ebp), %edx
1127
	subl $MCOUNT_INSN_SIZE, %eax
1128 1129 1130 1131 1132 1133 1134 1135

.globl ftrace_call
ftrace_call:
	call ftrace_stub

	popl %edx
	popl %ecx
	popl %eax
1136 1137 1138 1139 1140
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
.globl ftrace_graph_call
ftrace_graph_call:
	jmp ftrace_stub
#endif
1141 1142 1143 1144 1145 1146 1147 1148

.globl ftrace_stub
ftrace_stub:
	ret
END(ftrace_caller)

#else /* ! CONFIG_DYNAMIC_FTRACE */

1149
ENTRY(mcount)
1150 1151 1152
	cmpl $0, function_trace_stop
	jne  ftrace_stub

1153 1154
	cmpl $ftrace_stub, ftrace_trace_function
	jnz trace
1155
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
I
Ingo Molnar 已提交
1156
	cmpl $ftrace_stub, ftrace_graph_return
1157
	jnz ftrace_graph_caller
1158 1159 1160

	cmpl $ftrace_graph_entry_stub, ftrace_graph_entry
	jnz ftrace_graph_caller
1161
#endif
1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172
.globl ftrace_stub
ftrace_stub:
	ret

	/* taken from glibc */
trace:
	pushl %eax
	pushl %ecx
	pushl %edx
	movl 0xc(%esp), %eax
	movl 0x4(%ebp), %edx
1173
	subl $MCOUNT_INSN_SIZE, %eax
1174

1175
	call *ftrace_trace_function
1176 1177 1178 1179 1180 1181

	popl %edx
	popl %ecx
	popl %eax
	jmp ftrace_stub
END(mcount)
1182
#endif /* CONFIG_DYNAMIC_FTRACE */
1183
#endif /* CONFIG_FUNCTION_TRACER */
1184

1185 1186
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
ENTRY(ftrace_graph_caller)
1187 1188 1189
	cmpl $0, function_trace_stop
	jne ftrace_stub

1190 1191 1192
	pushl %eax
	pushl %ecx
	pushl %edx
1193
	movl 0xc(%esp), %edx
1194
	lea 0x4(%ebp), %eax
1195
	movl (%ebp), %ecx
1196
	subl $MCOUNT_INSN_SIZE, %edx
1197 1198 1199 1200
	call prepare_ftrace_return
	popl %edx
	popl %ecx
	popl %eax
1201
	ret
1202
END(ftrace_graph_caller)
1203 1204 1205 1206 1207

.globl return_to_handler
return_to_handler:
	pushl %eax
	pushl %edx
1208
	movl %ebp, %eax
1209
	call ftrace_return_to_handler
1210
	movl %eax, %ecx
1211 1212
	popl %edx
	popl %eax
1213
	jmp *%ecx
1214
#endif
1215

1216
.section .rodata,"a"
1217
#include "syscall_table_32.S"
L
Linus Torvalds 已提交
1218 1219

syscall_table_size=(.-sys_call_table)
1220 1221 1222 1223 1224 1225 1226 1227

/*
 * Some functions should be protected against kprobes
 */
	.pushsection .kprobes.text, "ax"

ENTRY(page_fault)
	RING0_EC_FRAME
1228
	pushl_cfi $do_page_fault
1229 1230
	ALIGN
error_code:
1231
	/* the function address is in %gs's slot on the stack */
1232
	pushl_cfi %fs
1233
	/*CFI_REL_OFFSET fs, 0*/
1234
	pushl_cfi %es
1235
	/*CFI_REL_OFFSET es, 0*/
1236
	pushl_cfi %ds
1237
	/*CFI_REL_OFFSET ds, 0*/
1238
	pushl_cfi %eax
1239
	CFI_REL_OFFSET eax, 0
1240
	pushl_cfi %ebp
1241
	CFI_REL_OFFSET ebp, 0
1242
	pushl_cfi %edi
1243
	CFI_REL_OFFSET edi, 0
1244
	pushl_cfi %esi
1245
	CFI_REL_OFFSET esi, 0
1246
	pushl_cfi %edx
1247
	CFI_REL_OFFSET edx, 0
1248
	pushl_cfi %ecx
1249
	CFI_REL_OFFSET ecx, 0
1250
	pushl_cfi %ebx
1251 1252 1253 1254 1255
	CFI_REL_OFFSET ebx, 0
	cld
	movl $(__KERNEL_PERCPU), %ecx
	movl %ecx, %fs
	UNWIND_ESPFIX_STACK
1256 1257
	GS_TO_REG %ecx
	movl PT_GS(%esp), %edi		# get the function address
1258 1259
	movl PT_ORIG_EAX(%esp), %edx	# get the error code
	movl $-1, PT_ORIG_EAX(%esp)	# no syscall to restart
1260 1261
	REG_TO_PTGS %ecx
	SET_KERNEL_GS %ecx
1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284
	movl $(__USER_DS), %ecx
	movl %ecx, %ds
	movl %ecx, %es
	TRACE_IRQS_OFF
	movl %esp,%eax			# pt_regs pointer
	call *%edi
	jmp ret_from_exception
	CFI_ENDPROC
END(page_fault)

/*
 * Debug traps and NMI can happen at the one SYSENTER instruction
 * that sets up the real kernel stack. Check here, since we can't
 * allow the wrong stack to be used.
 *
 * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have
 * already pushed 3 words if it hits on the sysenter instruction:
 * eflags, cs and eip.
 *
 * We just load the right stack, and push the three (known) values
 * by hand onto the new stack - while updating the return eip past
 * the instruction that would have done it for sysenter.
 */
1285 1286 1287 1288 1289 1290 1291
.macro FIX_STACK offset ok label
	cmpw $__KERNEL_CS, 4(%esp)
	jne \ok
\label:
	movl TSS_sysenter_sp0 + \offset(%esp), %esp
	CFI_DEF_CFA esp, 0
	CFI_UNDEFINED eip
1292 1293 1294
	pushfl_cfi
	pushl_cfi $__KERNEL_CS
	pushl_cfi $sysenter_past_esp
1295
	CFI_REL_OFFSET eip, 0
1296
.endm
1297 1298 1299 1300 1301

ENTRY(debug)
	RING0_INT_FRAME
	cmpl $ia32_sysenter_target,(%esp)
	jne debug_stack_correct
1302
	FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn
1303
debug_stack_correct:
1304
	pushl_cfi $-1			# mark this as an int
1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323
	SAVE_ALL
	TRACE_IRQS_OFF
	xorl %edx,%edx			# error code 0
	movl %esp,%eax			# pt_regs pointer
	call do_debug
	jmp ret_from_exception
	CFI_ENDPROC
END(debug)

/*
 * NMI is doubly nasty. It can happen _while_ we're handling
 * a debug fault, and the debug fault hasn't yet been able to
 * clear up the stack. So we first check whether we got  an
 * NMI on the sysenter entry path, but after that we need to
 * check whether we got an NMI on the debug path where the debug
 * fault happened on the sysenter path.
 */
ENTRY(nmi)
	RING0_INT_FRAME
1324
	pushl_cfi %eax
1325 1326
	movl %ss, %eax
	cmpw $__ESPFIX_SS, %ax
1327
	popl_cfi %eax
1328 1329 1330
	je nmi_espfix_stack
	cmpl $ia32_sysenter_target,(%esp)
	je nmi_stack_fixup
1331
	pushl_cfi %eax
1332 1333 1334 1335 1336 1337
	movl %esp,%eax
	/* Do not access memory above the end of our stack page,
	 * it might not exist.
	 */
	andl $(THREAD_SIZE-1),%eax
	cmpl $(THREAD_SIZE-20),%eax
1338
	popl_cfi %eax
1339 1340 1341 1342 1343
	jae nmi_stack_correct
	cmpl $ia32_sysenter_target,12(%esp)
	je nmi_debug_stack_check
nmi_stack_correct:
	/* We have a RING0_INT_FRAME here */
1344
	pushl_cfi %eax
1345 1346 1347 1348
	SAVE_ALL
	xorl %edx,%edx		# zero error code
	movl %esp,%eax		# pt_regs pointer
	call do_nmi
1349
	jmp restore_all_notrace
1350 1351 1352 1353
	CFI_ENDPROC

nmi_stack_fixup:
	RING0_INT_FRAME
1354
	FIX_STACK 12, nmi_stack_correct, 1
1355 1356 1357 1358 1359 1360 1361 1362 1363 1364
	jmp nmi_stack_correct

nmi_debug_stack_check:
	/* We have a RING0_INT_FRAME here */
	cmpw $__KERNEL_CS,16(%esp)
	jne nmi_stack_correct
	cmpl $debug,(%esp)
	jb nmi_stack_correct
	cmpl $debug_esp_fix_insn,(%esp)
	ja nmi_stack_correct
1365
	FIX_STACK 24, nmi_stack_correct, 1
1366 1367 1368 1369 1370 1371 1372
	jmp nmi_stack_correct

nmi_espfix_stack:
	/* We have a RING0_INT_FRAME here.
	 *
	 * create the pointer to lss back
	 */
1373 1374
	pushl_cfi %ss
	pushl_cfi %esp
1375
	addl $4, (%esp)
1376 1377
	/* copy the iret frame of 12 bytes */
	.rept 3
1378
	pushl_cfi 16(%esp)
1379
	.endr
1380
	pushl_cfi %eax
1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393
	SAVE_ALL
	FIXUP_ESPFIX_STACK		# %eax == %esp
	xorl %edx,%edx			# zero error code
	call do_nmi
	RESTORE_REGS
	lss 12+4(%esp), %esp		# back to espfix stack
	CFI_ADJUST_CFA_OFFSET -24
	jmp irq_return
	CFI_ENDPROC
END(nmi)

ENTRY(int3)
	RING0_INT_FRAME
1394
	pushl_cfi $-1			# mark this as an int
1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405
	SAVE_ALL
	TRACE_IRQS_OFF
	xorl %edx,%edx		# zero error code
	movl %esp,%eax		# pt_regs pointer
	call do_int3
	jmp ret_from_exception
	CFI_ENDPROC
END(int3)

ENTRY(general_protection)
	RING0_EC_FRAME
1406
	pushl_cfi $do_general_protection
1407 1408 1409 1410
	jmp error_code
	CFI_ENDPROC
END(general_protection)

G
Gleb Natapov 已提交
1411 1412 1413
#ifdef CONFIG_KVM_GUEST
ENTRY(async_page_fault)
	RING0_EC_FRAME
1414
	pushl_cfi $do_async_page_fault
G
Gleb Natapov 已提交
1415 1416
	jmp error_code
	CFI_ENDPROC
1417
END(async_page_fault)
G
Gleb Natapov 已提交
1418 1419
#endif

1420 1421 1422 1423
/*
 * End of kprobes section
 */
	.popsection