entry_32.S 32.5 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/*
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 */

/*
 * entry.S contains the system-call and fault low-level handling routines.
 * This also contains the timer-interrupt handler, as well as all interrupts
 * and faults that can result in a task-switch.
 *
 * NOTE: This code handles signal-recognition, which happens every time
 * after a timer-interrupt and after each system call.
 *
 * I changed all the .align's to 4 (16 byte alignment), as that's faster
 * on a 486.
 *
17
 * Stack layout in 'syscall_exit':
L
Linus Torvalds 已提交
18 19 20 21 22 23 24 25 26 27 28 29 30 31
 * 	ptrace needs to have all regs on the stack.
 *	if the order here is changed, it needs to be
 *	updated in fork.c:copy_process, signal.c:do_signal,
 *	ptrace.c and ptrace.h
 *
 *	 0(%esp) - %ebx
 *	 4(%esp) - %ecx
 *	 8(%esp) - %edx
 *       C(%esp) - %esi
 *	10(%esp) - %edi
 *	14(%esp) - %ebp
 *	18(%esp) - %eax
 *	1C(%esp) - %ds
 *	20(%esp) - %es
32
 *	24(%esp) - %fs
33 34 35 36 37 38 39
 *	28(%esp) - %gs		saved iff !CONFIG_X86_32_LAZY_GS
 *	2C(%esp) - orig_eax
 *	30(%esp) - %eip
 *	34(%esp) - %cs
 *	38(%esp) - %eflags
 *	3C(%esp) - %oldesp
 *	40(%esp) - %oldss
L
Linus Torvalds 已提交
40 41 42 43 44
 *
 * "current" is in register %ebx during any slow entries.
 */

#include <linux/linkage.h>
45
#include <linux/err.h>
L
Linus Torvalds 已提交
46
#include <asm/thread_info.h>
47
#include <asm/irqflags.h>
L
Linus Torvalds 已提交
48 49 50
#include <asm/errno.h>
#include <asm/segment.h>
#include <asm/smp.h>
51
#include <asm/page_types.h>
S
Stas Sergeev 已提交
52
#include <asm/percpu.h>
53
#include <asm/dwarf2.h>
54
#include <asm/processor-flags.h>
55
#include <asm/ftrace.h>
56
#include <asm/irq_vectors.h>
57
#include <asm/cpufeature.h>
58
#include <asm/alternative-asm.h>
59
#include <asm/asm.h>
60
#include <asm/smap.h>
L
Linus Torvalds 已提交
61

R
Roland McGrath 已提交
62 63 64 65 66 67 68 69 70 71
/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
#include <linux/elf-em.h>
#define AUDIT_ARCH_I386		(EM_386|__AUDIT_ARCH_LE)
#define __AUDIT_ARCH_LE	   0x40000000

#ifndef CONFIG_AUDITSYSCALL
#define sysenter_audit	syscall_trace_entry
#define sysexit_audit	syscall_exit_work
#endif

J
Jiri Olsa 已提交
72 73
	.section .entry.text, "ax"

74 75 76 77 78
/*
 * We use macros for low-level operations which need to be overridden
 * for paravirtualization.  The following will never clobber any registers:
 *   INTERRUPT_RETURN (aka. "iret")
 *   GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
79
 *   ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
80 81 82 83 84 85 86
 *
 * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
 * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
 * Allowing a register to be clobbered can shrink the paravirt replacement
 * enough to patch inline, increasing performance.
 */

L
Linus Torvalds 已提交
87
#ifdef CONFIG_PREEMPT
88
#define preempt_stop(clobbers)	DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
L
Linus Torvalds 已提交
89
#else
90
#define preempt_stop(clobbers)
91
#define resume_kernel		restore_all
L
Linus Torvalds 已提交
92 93
#endif

94 95
.macro TRACE_IRQS_IRET
#ifdef CONFIG_TRACE_IRQFLAGS
96
	testl $X86_EFLAGS_IF,PT_EFLAGS(%esp)     # interrupts off?
97 98 99 100 101 102
	jz 1f
	TRACE_IRQS_ON
1:
#endif
.endm

103 104 105 106 107 108 109 110 111 112 113 114 115
/*
 * User gs save/restore
 *
 * %gs is used for userland TLS and kernel only uses it for stack
 * canary which is required to be at %gs:20 by gcc.  Read the comment
 * at the top of stackprotector.h for more info.
 *
 * Local labels 98 and 99 are used.
 */
#ifdef CONFIG_X86_32_LAZY_GS

 /* unfortunately push/pop can't be no-op */
.macro PUSH_GS
116
	pushl_cfi $0
117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
.endm
.macro POP_GS pop=0
	addl $(4 + \pop), %esp
	CFI_ADJUST_CFA_OFFSET -(4 + \pop)
.endm
.macro POP_GS_EX
.endm

 /* all the rest are no-op */
.macro PTGS_TO_GS
.endm
.macro PTGS_TO_GS_EX
.endm
.macro GS_TO_REG reg
.endm
.macro REG_TO_PTGS reg
.endm
.macro SET_KERNEL_GS reg
.endm

#else	/* CONFIG_X86_32_LAZY_GS */

.macro PUSH_GS
140
	pushl_cfi %gs
141 142 143 144
	/*CFI_REL_OFFSET gs, 0*/
.endm

.macro POP_GS pop=0
145
98:	popl_cfi %gs
146 147 148 149 150 151 152 153 154 155 156
	/*CFI_RESTORE gs*/
  .if \pop <> 0
	add $\pop, %esp
	CFI_ADJUST_CFA_OFFSET -\pop
  .endif
.endm
.macro POP_GS_EX
.pushsection .fixup, "ax"
99:	movl $0, (%esp)
	jmp 98b
.popsection
157
	_ASM_EXTABLE(98b,99b)
158 159 160 161 162 163 164 165 166 167
.endm

.macro PTGS_TO_GS
98:	mov PT_GS(%esp), %gs
.endm
.macro PTGS_TO_GS_EX
.pushsection .fixup, "ax"
99:	movl $0, PT_GS(%esp)
	jmp 98b
.popsection
168
	_ASM_EXTABLE(98b,99b)
169 170 171 172 173 174 175 176 177 178 179
.endm

.macro GS_TO_REG reg
	movl %gs, \reg
	/*CFI_REGISTER gs, \reg*/
.endm
.macro REG_TO_PTGS reg
	movl \reg, PT_GS(%esp)
	/*CFI_REL_OFFSET gs, PT_GS*/
.endm
.macro SET_KERNEL_GS reg
180
	movl $(__KERNEL_STACK_CANARY), \reg
181 182 183 184 185
	movl \reg, %gs
.endm

#endif	/* CONFIG_X86_32_LAZY_GS */

186 187
.macro SAVE_ALL
	cld
188
	PUSH_GS
189
	pushl_cfi %fs
190
	/*CFI_REL_OFFSET fs, 0;*/
191
	pushl_cfi %es
192
	/*CFI_REL_OFFSET es, 0;*/
193
	pushl_cfi %ds
194
	/*CFI_REL_OFFSET ds, 0;*/
195
	pushl_cfi %eax
196
	CFI_REL_OFFSET eax, 0
197
	pushl_cfi %ebp
198
	CFI_REL_OFFSET ebp, 0
199
	pushl_cfi %edi
200
	CFI_REL_OFFSET edi, 0
201
	pushl_cfi %esi
202
	CFI_REL_OFFSET esi, 0
203
	pushl_cfi %edx
204
	CFI_REL_OFFSET edx, 0
205
	pushl_cfi %ecx
206
	CFI_REL_OFFSET ecx, 0
207
	pushl_cfi %ebx
208 209 210 211 212
	CFI_REL_OFFSET ebx, 0
	movl $(__USER_DS), %edx
	movl %edx, %ds
	movl %edx, %es
	movl $(__KERNEL_PERCPU), %edx
213
	movl %edx, %fs
214
	SET_KERNEL_GS %edx
215
.endm
L
Linus Torvalds 已提交
216

217
.macro RESTORE_INT_REGS
218
	popl_cfi %ebx
219
	CFI_RESTORE ebx
220
	popl_cfi %ecx
221
	CFI_RESTORE ecx
222
	popl_cfi %edx
223
	CFI_RESTORE edx
224
	popl_cfi %esi
225
	CFI_RESTORE esi
226
	popl_cfi %edi
227
	CFI_RESTORE edi
228
	popl_cfi %ebp
229
	CFI_RESTORE ebp
230
	popl_cfi %eax
231
	CFI_RESTORE eax
232
.endm
L
Linus Torvalds 已提交
233

234
.macro RESTORE_REGS pop=0
235
	RESTORE_INT_REGS
236
1:	popl_cfi %ds
237
	/*CFI_RESTORE ds;*/
238
2:	popl_cfi %es
239
	/*CFI_RESTORE es;*/
240
3:	popl_cfi %fs
241
	/*CFI_RESTORE fs;*/
242
	POP_GS \pop
243 244 245 246 247 248 249
.pushsection .fixup, "ax"
4:	movl $0, (%esp)
	jmp 1b
5:	movl $0, (%esp)
	jmp 2b
6:	movl $0, (%esp)
	jmp 3b
250
.popsection
251 252 253
	_ASM_EXTABLE(1b,4b)
	_ASM_EXTABLE(2b,5b)
	_ASM_EXTABLE(3b,6b)
254
	POP_GS_EX
255
.endm
L
Linus Torvalds 已提交
256

257 258 259 260 261
.macro RING0_INT_FRAME
	CFI_STARTPROC simple
	CFI_SIGNAL_FRAME
	CFI_DEF_CFA esp, 3*4
	/*CFI_OFFSET cs, -2*4;*/
262
	CFI_OFFSET eip, -3*4
263
.endm
264

265 266 267 268 269
.macro RING0_EC_FRAME
	CFI_STARTPROC simple
	CFI_SIGNAL_FRAME
	CFI_DEF_CFA esp, 4*4
	/*CFI_OFFSET cs, -2*4;*/
270
	CFI_OFFSET eip, -3*4
271
.endm
272

273 274 275 276 277 278 279 280 281 282 283 284 285 286
.macro RING0_PTREGS_FRAME
	CFI_STARTPROC simple
	CFI_SIGNAL_FRAME
	CFI_DEF_CFA esp, PT_OLDESP-PT_EBX
	/*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/
	CFI_OFFSET eip, PT_EIP-PT_OLDESP
	/*CFI_OFFSET es, PT_ES-PT_OLDESP;*/
	/*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/
	CFI_OFFSET eax, PT_EAX-PT_OLDESP
	CFI_OFFSET ebp, PT_EBP-PT_OLDESP
	CFI_OFFSET edi, PT_EDI-PT_OLDESP
	CFI_OFFSET esi, PT_ESI-PT_OLDESP
	CFI_OFFSET edx, PT_EDX-PT_OLDESP
	CFI_OFFSET ecx, PT_ECX-PT_OLDESP
287
	CFI_OFFSET ebx, PT_EBX-PT_OLDESP
288
.endm
L
Linus Torvalds 已提交
289 290

ENTRY(ret_from_fork)
291
	CFI_STARTPROC
292
	pushl_cfi %eax
L
Linus Torvalds 已提交
293 294
	call schedule_tail
	GET_THREAD_INFO(%ebp)
295 296 297
	popl_cfi %eax
	pushl_cfi $0x0202		# Reset kernel eflags
	popfl_cfi
L
Linus Torvalds 已提交
298
	jmp syscall_exit
299
	CFI_ENDPROC
300
END(ret_from_fork)
L
Linus Torvalds 已提交
301

302 303 304 305
ENTRY(ret_from_kernel_thread)
	CFI_STARTPROC
	pushl_cfi %eax
	call schedule_tail
306
	GET_THREAD_INFO(%ebp)
307 308 309 310 311 312
	popl_cfi %eax
	pushl_cfi $0x0202		# Reset kernel eflags
	popfl_cfi
	movl PT_EBP(%esp),%eax
	call *PT_EBX(%esp)
	movl $0,PT_EAX(%esp)
313
	jmp syscall_exit
314 315
	CFI_ENDPROC
ENDPROC(ret_from_kernel_thread)
316

L
Linus Torvalds 已提交
317 318 319 320 321 322 323 324 325
/*
 * Return to user mode is not as complex as all this looks,
 * but we want the default path for a system call return to
 * go as quickly as possible which is why some of this is
 * less clear than it otherwise should be.
 */

	# userspace resumption stub bypassing syscall exit tracing
	ALIGN
326
	RING0_PTREGS_FRAME
L
Linus Torvalds 已提交
327
ret_from_exception:
328
	preempt_stop(CLBR_ANY)
L
Linus Torvalds 已提交
329 330
ret_from_intr:
	GET_THREAD_INFO(%ebp)
331
#ifdef CONFIG_VM86
332 333
	movl PT_EFLAGS(%esp), %eax	# mix EFLAGS and CS
	movb PT_CS(%esp), %al
334
	andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
335 336
#else
	/*
337
	 * We can be coming here from child spawned by kernel_thread().
338 339 340 341
	 */
	movl PT_CS(%esp), %eax
	andl $SEGMENT_RPL_MASK, %eax
#endif
342 343
	cmpl $USER_RPL, %eax
	jb resume_kernel		# not returning to v8086 or userspace
344

L
Linus Torvalds 已提交
345
ENTRY(resume_userspace)
346
	LOCKDEP_SYS_EXIT
347
 	DISABLE_INTERRUPTS(CLBR_ANY)	# make sure we don't miss an interrupt
L
Linus Torvalds 已提交
348 349
					# setting need_resched or sigpending
					# between sampling and the iret
350
	TRACE_IRQS_OFF
L
Linus Torvalds 已提交
351 352 353 354 355
	movl TI_flags(%ebp), %ecx
	andl $_TIF_WORK_MASK, %ecx	# is there any work to be done on
					# int/exception return?
	jne work_pending
	jmp restore_all
356
END(ret_from_exception)
L
Linus Torvalds 已提交
357 358 359

#ifdef CONFIG_PREEMPT
ENTRY(resume_kernel)
360
	DISABLE_INTERRUPTS(CLBR_ANY)
L
Linus Torvalds 已提交
361
need_resched:
362 363
	cmpl $0,PER_CPU_VAR(__preempt_count)
	jnz restore_all
364
	testl $X86_EFLAGS_IF,PT_EFLAGS(%esp)	# interrupts off (exception path) ?
L
Linus Torvalds 已提交
365 366 367
	jz restore_all
	call preempt_schedule_irq
	jmp need_resched
368
END(resume_kernel)
L
Linus Torvalds 已提交
369
#endif
370
	CFI_ENDPROC
L
Linus Torvalds 已提交
371 372 373 374 375

/* SYSENTER_RETURN points to after the "sysenter" instruction in
   the vsyscall page.  See vsyscall-sysentry.S, which defines the symbol.  */

	# sysenter call handler stub
R
Roland McGrath 已提交
376
ENTRY(ia32_sysenter_target)
377
	CFI_STARTPROC simple
378
	CFI_SIGNAL_FRAME
379 380
	CFI_DEF_CFA esp, 0
	CFI_REGISTER esp, ebp
381
	movl TSS_sysenter_sp0(%esp),%esp
L
Linus Torvalds 已提交
382
sysenter_past_esp:
383
	/*
384 385 386
	 * Interrupts are disabled here, but we can't trace it until
	 * enough kernel state to call TRACE_IRQS_OFF can be called - but
	 * we immediately enable interrupts at that point anyway.
387
	 */
388
	pushl_cfi $__USER_DS
389
	/*CFI_REL_OFFSET ss, 0*/
390
	pushl_cfi %ebp
391
	CFI_REL_OFFSET esp, 0
392
	pushfl_cfi
393
	orl $X86_EFLAGS_IF, (%esp)
394
	pushl_cfi $__USER_CS
395
	/*CFI_REL_OFFSET cs, 0*/
396 397 398 399 400
	/*
	 * Push current_thread_info()->sysenter_return to the stack.
	 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
	 * pushed above; +8 corresponds to copy_thread's esp0 setting.
	 */
401
	pushl_cfi ((TI_sysenter_return)-THREAD_SIZE+8+4*4)(%esp)
402
	CFI_REL_OFFSET eip, 0
L
Linus Torvalds 已提交
403

404
	pushl_cfi %eax
405 406 407
	SAVE_ALL
	ENABLE_INTERRUPTS(CLBR_NONE)

L
Linus Torvalds 已提交
408 409 410 411 412 413
/*
 * Load the potential sixth argument from user stack.
 * Careful about security.
 */
	cmpl $__PAGE_OFFSET-3,%ebp
	jae syscall_fault
414
	ASM_STAC
L
Linus Torvalds 已提交
415
1:	movl (%ebp),%ebp
416
	ASM_CLAC
417
	movl %ebp,PT_EBP(%esp)
418
	_ASM_EXTABLE(1b,syscall_fault)
L
Linus Torvalds 已提交
419 420 421

	GET_THREAD_INFO(%ebp)

422
	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
R
Roland McGrath 已提交
423 424
	jnz sysenter_audit
sysenter_do_call:
425
	cmpl $(NR_syscalls), %eax
426
	jae sysenter_badsys
L
Linus Torvalds 已提交
427
	call *sys_call_table(,%eax,4)
428
sysenter_after_call:
429
	movl %eax,PT_EAX(%esp)
430
	LOCKDEP_SYS_EXIT
431
	DISABLE_INTERRUPTS(CLBR_ANY)
432
	TRACE_IRQS_OFF
L
Linus Torvalds 已提交
433
	movl TI_flags(%ebp), %ecx
434
	testl $_TIF_ALLWORK_MASK, %ecx
R
Roland McGrath 已提交
435 436
	jne sysexit_audit
sysenter_exit:
L
Linus Torvalds 已提交
437
/* if something modifies registers it must also disable sysexit */
438 439
	movl PT_EIP(%esp), %edx
	movl PT_OLDESP(%esp), %ecx
L
Linus Torvalds 已提交
440
	xorl %ebp,%ebp
441
	TRACE_IRQS_ON
442
1:	mov  PT_FS(%esp), %fs
443
	PTGS_TO_GS
444
	ENABLE_INTERRUPTS_SYSEXIT
R
Roland McGrath 已提交
445 446 447

#ifdef CONFIG_AUDITSYSCALL
sysenter_audit:
448
	testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
R
Roland McGrath 已提交
449 450 451 452 453 454 455 456 457
	jnz syscall_trace_entry
	addl $4,%esp
	CFI_ADJUST_CFA_OFFSET -4
	/* %esi already in 8(%esp)	   6th arg: 4th syscall arg */
	/* %edx already in 4(%esp)	   5th arg: 3rd syscall arg */
	/* %ecx already in 0(%esp)	   4th arg: 2nd syscall arg */
	movl %ebx,%ecx			/* 3rd arg: 1st syscall arg */
	movl %eax,%edx			/* 2nd arg: syscall number */
	movl $AUDIT_ARCH_I386,%eax	/* 1st arg: audit arch */
458
	call __audit_syscall_entry
459
	pushl_cfi %ebx
R
Roland McGrath 已提交
460 461 462 463
	movl PT_EAX(%esp),%eax		/* reload syscall number */
	jmp sysenter_do_call

sysexit_audit:
464
	testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
R
Roland McGrath 已提交
465 466 467 468
	jne syscall_exit_work
	TRACE_IRQS_ON
	ENABLE_INTERRUPTS(CLBR_ANY)
	movl %eax,%edx		/* second arg, syscall return value */
469 470
	cmpl $-MAX_ERRNO,%eax	/* is it an error ? */
	setbe %al		/* 1 if so, 0 if not */
R
Roland McGrath 已提交
471
	movzbl %al,%eax		/* zero-extend that */
472
	call __audit_syscall_exit
R
Roland McGrath 已提交
473 474 475
	DISABLE_INTERRUPTS(CLBR_ANY)
	TRACE_IRQS_OFF
	movl TI_flags(%ebp), %ecx
476
	testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
R
Roland McGrath 已提交
477 478 479 480 481
	jne syscall_exit_work
	movl PT_EAX(%esp),%eax	/* reload syscall return value */
	jmp sysenter_exit
#endif

482
	CFI_ENDPROC
483
.pushsection .fixup,"ax"
484
2:	movl $0,PT_FS(%esp)
485 486
	jmp 1b
.popsection
487
	_ASM_EXTABLE(1b,2b)
488
	PTGS_TO_GS_EX
R
Roland McGrath 已提交
489
ENDPROC(ia32_sysenter_target)
L
Linus Torvalds 已提交
490 491 492

	# system call handler stub
ENTRY(system_call)
493
	RING0_INT_FRAME			# can't unwind into user space anyway
494
	ASM_CLAC
495
	pushl_cfi %eax			# save orig_eax
L
Linus Torvalds 已提交
496 497
	SAVE_ALL
	GET_THREAD_INFO(%ebp)
498
					# system call tracing in operation / emulation
499
	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
L
Linus Torvalds 已提交
500
	jnz syscall_trace_entry
501
	cmpl $(NR_syscalls), %eax
L
Linus Torvalds 已提交
502 503 504
	jae syscall_badsys
syscall_call:
	call *sys_call_table(,%eax,4)
505
syscall_after_call:
506
	movl %eax,PT_EAX(%esp)		# store the return value
L
Linus Torvalds 已提交
507
syscall_exit:
508
	LOCKDEP_SYS_EXIT
509
	DISABLE_INTERRUPTS(CLBR_ANY)	# make sure we don't miss an interrupt
L
Linus Torvalds 已提交
510 511
					# setting need_resched or sigpending
					# between sampling and the iret
512
	TRACE_IRQS_OFF
L
Linus Torvalds 已提交
513
	movl TI_flags(%ebp), %ecx
514
	testl $_TIF_ALLWORK_MASK, %ecx	# current->work
L
Linus Torvalds 已提交
515 516 517
	jne syscall_exit_work

restore_all:
518 519
	TRACE_IRQS_IRET
restore_all_notrace:
520
#ifdef CONFIG_X86_ESPFIX32
521 522
	movl PT_EFLAGS(%esp), %eax	# mix EFLAGS, SS and CS
	# Warning: PT_OLDSS(%esp) contains the wrong/random values if we
523 524
	# are returning to the kernel.
	# See comments in process.c:copy_thread() for details.
525 526
	movb PT_OLDSS(%esp), %ah
	movb PT_CS(%esp), %al
527
	andl $(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
528
	cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
529
	CFI_REMEMBER_STATE
L
Linus Torvalds 已提交
530
	je ldt_ss			# returning to user-space with LDT SS
531
#endif
L
Linus Torvalds 已提交
532
restore_nocheck:
533
	RESTORE_REGS 4			# skip orig_eax/error_code
A
Adrian Bunk 已提交
534
irq_return:
I
Ingo Molnar 已提交
535
	INTERRUPT_RETURN
L
Linus Torvalds 已提交
536
.section .fixup,"ax"
537
ENTRY(iret_exc)
538 539 540
	pushl $0			# no error code
	pushl $do_iret_error
	jmp error_code
L
Linus Torvalds 已提交
541
.previous
542
	_ASM_EXTABLE(irq_return,iret_exc)
L
Linus Torvalds 已提交
543

544
#ifdef CONFIG_X86_ESPFIX32
545
	CFI_RESTORE_STATE
L
Linus Torvalds 已提交
546
ldt_ss:
547 548 549 550 551 552 553 554 555
#ifdef CONFIG_PARAVIRT
	/*
	 * The kernel can't run on a non-flat stack if paravirt mode
	 * is active.  Rather than try to fixup the high bits of
	 * ESP, bypass this code entirely.  This may break DOSemu
	 * and/or Wine support in a paravirt VM, although the option
	 * is still available to implement the setting of the high
	 * 16-bits in the INTERRUPT_RETURN paravirt-op.
	 */
556
	cmpl $0, pv_info+PARAVIRT_enabled
557 558 559
	jne restore_nocheck
#endif

560 561 562 563 564 565 566 567 568 569 570
/*
 * Setup and switch to ESPFIX stack
 *
 * We're returning to userspace with a 16 bit stack. The CPU will not
 * restore the high word of ESP for us on executing iret... This is an
 * "official" bug of all the x86-compatible CPUs, which we can work
 * around to make dosemu and wine happy. We do this by preloading the
 * high word of ESP with the high word of the userspace ESP while
 * compensating for the offset by changing to the ESPFIX segment with
 * a base address that matches for the difference.
 */
571
#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8)
572 573 574 575 576
	mov %esp, %edx			/* load kernel esp */
	mov PT_OLDESP(%esp), %eax	/* load userspace esp */
	mov %dx, %ax			/* eax: new kernel esp */
	sub %eax, %edx			/* offset (low word is 0) */
	shr $16, %edx
577 578
	mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */
	mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */
579 580
	pushl_cfi $__ESPFIX_SS
	pushl_cfi %eax			/* new kernel esp */
581 582 583
	/* Disable interrupts, but do not irqtrace this section: we
	 * will soon execute iret and the tracer was already set to
	 * the irqstate after the iret */
584
	DISABLE_INTERRUPTS(CLBR_EAX)
585
	lss (%esp), %esp		/* switch to espfix segment */
S
Stas Sergeev 已提交
586 587
	CFI_ADJUST_CFA_OFFSET -8
	jmp restore_nocheck
588
#endif
589
	CFI_ENDPROC
590
ENDPROC(system_call)
L
Linus Torvalds 已提交
591 592 593

	# perform work that needs to be done immediately before resumption
	ALIGN
594
	RING0_PTREGS_FRAME		# can't unwind into user space anyway
L
Linus Torvalds 已提交
595 596 597 598 599
work_pending:
	testb $_TIF_NEED_RESCHED, %cl
	jz work_notifysig
work_resched:
	call schedule
600
	LOCKDEP_SYS_EXIT
601
	DISABLE_INTERRUPTS(CLBR_ANY)	# make sure we don't miss an interrupt
L
Linus Torvalds 已提交
602 603
					# setting need_resched or sigpending
					# between sampling and the iret
604
	TRACE_IRQS_OFF
L
Linus Torvalds 已提交
605 606 607 608 609 610 611 612 613
	movl TI_flags(%ebp), %ecx
	andl $_TIF_WORK_MASK, %ecx	# is there any work to be done other
					# than syscall tracing?
	jz restore_all
	testb $_TIF_NEED_RESCHED, %cl
	jnz work_resched

work_notifysig:				# deal with pending signals and
					# notify-resume requests
614
#ifdef CONFIG_VM86
615
	testl $X86_EFLAGS_VM, PT_EFLAGS(%esp)
L
Linus Torvalds 已提交
616 617 618
	movl %esp, %eax
	jne work_notifysig_v86		# returning to kernel-space or
					# vm86-space
619 620 621 622
1:
#else
	movl %esp, %eax
#endif
623 624
	TRACE_IRQS_ON
	ENABLE_INTERRUPTS(CLBR_NONE)
625 626 627 628
	movb PT_CS(%esp), %bl
	andb $SEGMENT_RPL_MASK, %bl
	cmpb $USER_RPL, %bl
	jb resume_kernel
L
Linus Torvalds 已提交
629 630
	xorl %edx, %edx
	call do_notify_resume
631
	jmp resume_userspace
L
Linus Torvalds 已提交
632

633
#ifdef CONFIG_VM86
L
Linus Torvalds 已提交
634 635
	ALIGN
work_notifysig_v86:
636
	pushl_cfi %ecx			# save ti_flags for do_notify_resume
L
Linus Torvalds 已提交
637
	call save_v86_state		# %eax contains pt_regs pointer
638
	popl_cfi %ecx
L
Linus Torvalds 已提交
639
	movl %eax, %esp
640
	jmp 1b
641
#endif
642
END(work_pending)
L
Linus Torvalds 已提交
643 644 645 646

	# perform syscall exit tracing
	ALIGN
syscall_trace_entry:
647
	movl $-ENOSYS,PT_EAX(%esp)
L
Linus Torvalds 已提交
648
	movl %esp, %eax
649 650
	call syscall_trace_enter
	/* What it returned is what we'll actually use.  */
651
	cmpl $(NR_syscalls), %eax
L
Linus Torvalds 已提交
652 653
	jnae syscall_call
	jmp syscall_exit
654
END(syscall_trace_entry)
L
Linus Torvalds 已提交
655 656 657 658

	# perform syscall exit tracing
	ALIGN
syscall_exit_work:
659
	testl $_TIF_WORK_SYSCALL_EXIT, %ecx
L
Linus Torvalds 已提交
660
	jz work_pending
661
	TRACE_IRQS_ON
662
	ENABLE_INTERRUPTS(CLBR_ANY)	# could let syscall_trace_leave() call
L
Linus Torvalds 已提交
663 664
					# schedule() instead
	movl %esp, %eax
665
	call syscall_trace_leave
L
Linus Torvalds 已提交
666
	jmp resume_userspace
667
END(syscall_exit_work)
668
	CFI_ENDPROC
L
Linus Torvalds 已提交
669

670
	RING0_INT_FRAME			# can't unwind into user space anyway
L
Linus Torvalds 已提交
671
syscall_fault:
672
	ASM_CLAC
L
Linus Torvalds 已提交
673
	GET_THREAD_INFO(%ebp)
674
	movl $-EFAULT,PT_EAX(%esp)
L
Linus Torvalds 已提交
675
	jmp resume_userspace
676
END(syscall_fault)
L
Linus Torvalds 已提交
677 678

syscall_badsys:
679 680
	movl $-ENOSYS,%eax
	jmp syscall_after_call
681 682 683
END(syscall_badsys)

sysenter_badsys:
684
	movl $-ENOSYS,%eax
685
	jmp sysenter_after_call
686
END(syscall_badsys)
687
	CFI_ENDPROC
L
Linus Torvalds 已提交
688

689
.macro FIXUP_ESPFIX_STACK
690 691 692 693 694 695 696
/*
 * Switch back for ESPFIX stack to the normal zerobased stack
 *
 * We can't call C functions using the ESPFIX stack. This code reads
 * the high word of the segment base from the GDT and swiches to the
 * normal stack and adjusts ESP with the matching offset.
 */
697
#ifdef CONFIG_X86_ESPFIX32
698
	/* fixup the stack */
699 700
	mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */
	mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */
701 702
	shl $16, %eax
	addl %esp, %eax			/* the adjusted stack pointer */
703 704
	pushl_cfi $__KERNEL_DS
	pushl_cfi %eax
705
	lss (%esp), %esp		/* switch to the normal stack segment */
706
	CFI_ADJUST_CFA_OFFSET -8
707
#endif
708 709
.endm
.macro UNWIND_ESPFIX_STACK
710
#ifdef CONFIG_X86_ESPFIX32
711 712 713 714 715 716 717 718 719 720
	movl %ss, %eax
	/* see if on espfix stack */
	cmpw $__ESPFIX_SS, %ax
	jne 27f
	movl $__KERNEL_DS, %eax
	movl %eax, %ds
	movl %eax, %es
	/* switch to normal stack */
	FIXUP_ESPFIX_STACK
27:
721
#endif
722
.endm
L
Linus Torvalds 已提交
723 724

/*
725 726 727
 * Build the entry stubs and pointer table with some assembler magic.
 * We pack 7 stubs into a single 32-byte chunk, which will fit in a
 * single cache line on all modern x86 implementations.
L
Linus Torvalds 已提交
728
 */
729
.section .init.rodata,"a"
L
Linus Torvalds 已提交
730
ENTRY(interrupt)
J
Jiri Olsa 已提交
731
.section .entry.text, "ax"
732 733
	.p2align 5
	.p2align CONFIG_X86_L1_CACHE_SHIFT
L
Linus Torvalds 已提交
734
ENTRY(irq_entries_start)
735
	RING0_INT_FRAME
736
vector=FIRST_EXTERNAL_VECTOR
737 738 739 740
.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7
	.balign 32
  .rept	7
    .if vector < NR_VECTORS
741
      .if vector <> FIRST_EXTERNAL_VECTOR
742
	CFI_ADJUST_CFA_OFFSET -4
743
      .endif
744
1:	pushl_cfi $(~vector+0x80)	/* Note: always in signed byte range */
745
      .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
746 747 748
	jmp 2f
      .endif
      .previous
L
Linus Torvalds 已提交
749
	.long 1b
J
Jiri Olsa 已提交
750
      .section .entry.text, "ax"
L
Linus Torvalds 已提交
751
vector=vector+1
752 753 754
    .endif
  .endr
2:	jmp common_interrupt
L
Linus Torvalds 已提交
755
.endr
756 757 758 759 760
END(irq_entries_start)

.previous
END(interrupt)
.previous
L
Linus Torvalds 已提交
761

762 763 764 765
/*
 * the CPU automatically disables interrupts when executing an IRQ vector,
 * so IRQ-flags tracing has to follow that:
 */
766
	.p2align CONFIG_X86_L1_CACHE_SHIFT
L
Linus Torvalds 已提交
767
common_interrupt:
768
	ASM_CLAC
769
	addl $-0x80,(%esp)	/* Adjust vector into the [-256,-1] range */
L
Linus Torvalds 已提交
770
	SAVE_ALL
771
	TRACE_IRQS_OFF
L
Linus Torvalds 已提交
772 773 774
	movl %esp,%eax
	call do_IRQ
	jmp ret_from_intr
775
ENDPROC(common_interrupt)
776
	CFI_ENDPROC
L
Linus Torvalds 已提交
777

T
Tejun Heo 已提交
778
#define BUILD_INTERRUPT3(name, nr, fn)	\
L
Linus Torvalds 已提交
779
ENTRY(name)				\
780
	RING0_INT_FRAME;		\
781
	ASM_CLAC;			\
782
	pushl_cfi $~(nr);		\
783
	SAVE_ALL;			\
784
	TRACE_IRQS_OFF			\
L
Linus Torvalds 已提交
785
	movl %esp,%eax;			\
T
Tejun Heo 已提交
786
	call fn;			\
787
	jmp ret_from_intr;		\
788 789
	CFI_ENDPROC;			\
ENDPROC(name)
L
Linus Torvalds 已提交
790

791 792 793 794 795 796 797 798 799 800 801

#ifdef CONFIG_TRACING
#define TRACE_BUILD_INTERRUPT(name, nr)		\
	BUILD_INTERRUPT3(trace_##name, nr, smp_trace_##name)
#else
#define TRACE_BUILD_INTERRUPT(name, nr)
#endif

#define BUILD_INTERRUPT(name, nr) \
	BUILD_INTERRUPT3(name, nr, smp_##name); \
	TRACE_BUILD_INTERRUPT(name, nr)
T
Tejun Heo 已提交
802

L
Linus Torvalds 已提交
803
/* The include is where all of the SMP etc. interrupts come from */
804
#include <asm/entry_arch.h>
L
Linus Torvalds 已提交
805 806

ENTRY(coprocessor_error)
807
	RING0_INT_FRAME
808
	ASM_CLAC
809 810
	pushl_cfi $0
	pushl_cfi $do_coprocessor_error
L
Linus Torvalds 已提交
811
	jmp error_code
812
	CFI_ENDPROC
813
END(coprocessor_error)
L
Linus Torvalds 已提交
814 815

ENTRY(simd_coprocessor_error)
816
	RING0_INT_FRAME
817
	ASM_CLAC
818
	pushl_cfi $0
819 820
#ifdef CONFIG_X86_INVD_BUG
	/* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */
821
661:	pushl_cfi $do_general_protection
822 823
662:
.section .altinstructions,"a"
824
	altinstruction_entry 661b, 663f, X86_FEATURE_XMM, 662b-661b, 664f-663f
825 826 827 828 829 830
.previous
.section .altinstr_replacement,"ax"
663:	pushl $do_simd_coprocessor_error
664:
.previous
#else
831
	pushl_cfi $do_simd_coprocessor_error
832
#endif
L
Linus Torvalds 已提交
833
	jmp error_code
834
	CFI_ENDPROC
835
END(simd_coprocessor_error)
L
Linus Torvalds 已提交
836 837

ENTRY(device_not_available)
838
	RING0_INT_FRAME
839
	ASM_CLAC
840 841
	pushl_cfi $-1			# mark this as an int
	pushl_cfi $do_device_not_available
842
	jmp error_code
843
	CFI_ENDPROC
844
END(device_not_available)
L
Linus Torvalds 已提交
845

846 847
#ifdef CONFIG_PARAVIRT
ENTRY(native_iret)
I
Ingo Molnar 已提交
848
	iret
849
	_ASM_EXTABLE(native_iret, iret_exc)
850
END(native_iret)
851

852
ENTRY(native_irq_enable_sysexit)
853 854
	sti
	sysexit
855
END(native_irq_enable_sysexit)
856 857
#endif

L
Linus Torvalds 已提交
858
ENTRY(overflow)
859
	RING0_INT_FRAME
860
	ASM_CLAC
861 862
	pushl_cfi $0
	pushl_cfi $do_overflow
L
Linus Torvalds 已提交
863
	jmp error_code
864
	CFI_ENDPROC
865
END(overflow)
L
Linus Torvalds 已提交
866 867

ENTRY(bounds)
868
	RING0_INT_FRAME
869
	ASM_CLAC
870 871
	pushl_cfi $0
	pushl_cfi $do_bounds
L
Linus Torvalds 已提交
872
	jmp error_code
873
	CFI_ENDPROC
874
END(bounds)
L
Linus Torvalds 已提交
875 876

ENTRY(invalid_op)
877
	RING0_INT_FRAME
878
	ASM_CLAC
879 880
	pushl_cfi $0
	pushl_cfi $do_invalid_op
L
Linus Torvalds 已提交
881
	jmp error_code
882
	CFI_ENDPROC
883
END(invalid_op)
L
Linus Torvalds 已提交
884 885

ENTRY(coprocessor_segment_overrun)
886
	RING0_INT_FRAME
887
	ASM_CLAC
888 889
	pushl_cfi $0
	pushl_cfi $do_coprocessor_segment_overrun
L
Linus Torvalds 已提交
890
	jmp error_code
891
	CFI_ENDPROC
892
END(coprocessor_segment_overrun)
L
Linus Torvalds 已提交
893 894

ENTRY(invalid_TSS)
895
	RING0_EC_FRAME
896
	ASM_CLAC
897
	pushl_cfi $do_invalid_TSS
L
Linus Torvalds 已提交
898
	jmp error_code
899
	CFI_ENDPROC
900
END(invalid_TSS)
L
Linus Torvalds 已提交
901 902

ENTRY(segment_not_present)
903
	RING0_EC_FRAME
904
	ASM_CLAC
905
	pushl_cfi $do_segment_not_present
L
Linus Torvalds 已提交
906
	jmp error_code
907
	CFI_ENDPROC
908
END(segment_not_present)
L
Linus Torvalds 已提交
909 910

ENTRY(stack_segment)
911
	RING0_EC_FRAME
912
	ASM_CLAC
913
	pushl_cfi $do_stack_segment
L
Linus Torvalds 已提交
914
	jmp error_code
915
	CFI_ENDPROC
916
END(stack_segment)
L
Linus Torvalds 已提交
917 918

ENTRY(alignment_check)
919
	RING0_EC_FRAME
920
	ASM_CLAC
921
	pushl_cfi $do_alignment_check
L
Linus Torvalds 已提交
922
	jmp error_code
923
	CFI_ENDPROC
924
END(alignment_check)
L
Linus Torvalds 已提交
925

926 927
ENTRY(divide_error)
	RING0_INT_FRAME
928
	ASM_CLAC
929 930
	pushl_cfi $0			# no error code
	pushl_cfi $do_divide_error
L
Linus Torvalds 已提交
931
	jmp error_code
932
	CFI_ENDPROC
933
END(divide_error)
L
Linus Torvalds 已提交
934 935 936

#ifdef CONFIG_X86_MCE
ENTRY(machine_check)
937
	RING0_INT_FRAME
938
	ASM_CLAC
939 940
	pushl_cfi $0
	pushl_cfi machine_check_vector
L
Linus Torvalds 已提交
941
	jmp error_code
942
	CFI_ENDPROC
943
END(machine_check)
L
Linus Torvalds 已提交
944 945 946
#endif

ENTRY(spurious_interrupt_bug)
947
	RING0_INT_FRAME
948
	ASM_CLAC
949 950
	pushl_cfi $0
	pushl_cfi $do_spurious_interrupt_bug
L
Linus Torvalds 已提交
951
	jmp error_code
952
	CFI_ENDPROC
953
END(spurious_interrupt_bug)
L
Linus Torvalds 已提交
954

955
#ifdef CONFIG_XEN
956 957 958 959 960
/* Xen doesn't set %esp to be precisely what the normal sysenter
   entrypoint expects, so fix it up before using the normal path. */
ENTRY(xen_sysenter_target)
	RING0_INT_FRAME
	addl $5*4, %esp		/* remove xen-provided frame */
961
	CFI_ADJUST_CFA_OFFSET -5*4
962
	jmp sysenter_past_esp
G
Glauber Costa 已提交
963
	CFI_ENDPROC
964

965 966
ENTRY(xen_hypervisor_callback)
	CFI_STARTPROC
967
	pushl_cfi $-1 /* orig_ax = -1 => not a system call */
968 969
	SAVE_ALL
	TRACE_IRQS_OFF
970 971 972 973 974 975 976 977 978 979 980 981

	/* Check to see if we got the event in the critical
	   region in xen_iret_direct, after we've reenabled
	   events and checked for pending events.  This simulates
	   iret instruction's behaviour where it delivers a
	   pending interrupt when enabling interrupts. */
	movl PT_EIP(%esp),%eax
	cmpl $xen_iret_start_crit,%eax
	jb   1f
	cmpl $xen_iret_end_crit,%eax
	jae  1f

J
Jeremy Fitzhardinge 已提交
982
	jmp  xen_iret_crit_fixup
983 984

ENTRY(xen_do_upcall)
985
1:	mov %esp, %eax
986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002
	call xen_evtchn_do_upcall
	jmp  ret_from_intr
	CFI_ENDPROC
ENDPROC(xen_hypervisor_callback)

# Hypervisor uses this for application faults while it executes.
# We get here for two reasons:
#  1. Fault while reloading DS, ES, FS or GS
#  2. Fault while executing IRET
# Category 1 we fix up by reattempting the load, and zeroing the segment
# register if the load fails.
# Category 2 we fix up by jumping to do_iret_error. We cannot use the
# normal Linux return path in this case because if we use the IRET hypercall
# to pop the stack frame we end up in an infinite loop of failsafe callbacks.
# We distinguish between categories by maintaining a status value in EAX.
ENTRY(xen_failsafe_callback)
	CFI_STARTPROC
1003
	pushl_cfi %eax
1004 1005 1006 1007 1008
	movl $1,%eax
1:	mov 4(%esp),%ds
2:	mov 8(%esp),%es
3:	mov 12(%esp),%fs
4:	mov 16(%esp),%gs
1009 1010
	/* EAX == 0 => Category 1 (Bad segment)
	   EAX != 0 => Category 2 (Bad IRET) */
1011
	testl %eax,%eax
1012
	popl_cfi %eax
1013 1014 1015
	lea 16(%esp),%esp
	CFI_ADJUST_CFA_OFFSET -16
	jz 5f
1016 1017
	jmp iret_exc
5:	pushl_cfi $-1 /* orig_ax = -1 => not a system call */
1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035
	SAVE_ALL
	jmp ret_from_exception
	CFI_ENDPROC

.section .fixup,"ax"
6:	xorl %eax,%eax
	movl %eax,4(%esp)
	jmp 1b
7:	xorl %eax,%eax
	movl %eax,8(%esp)
	jmp 2b
8:	xorl %eax,%eax
	movl %eax,12(%esp)
	jmp 3b
9:	xorl %eax,%eax
	movl %eax,16(%esp)
	jmp 4b
.previous
1036 1037 1038 1039
	_ASM_EXTABLE(1b,6b)
	_ASM_EXTABLE(2b,7b)
	_ASM_EXTABLE(3b,8b)
	_ASM_EXTABLE(4b,9b)
1040 1041
ENDPROC(xen_failsafe_callback)

1042
BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
1043 1044
		xen_evtchn_do_upcall)

1045
#endif	/* CONFIG_XEN */
1046 1047 1048 1049 1050 1051 1052

#if IS_ENABLED(CONFIG_HYPERV)

BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
	hyperv_vector_handler)

#endif /* CONFIG_HYPERV */
1053

1054
#ifdef CONFIG_FUNCTION_TRACER
1055 1056 1057 1058 1059 1060 1061
#ifdef CONFIG_DYNAMIC_FTRACE

ENTRY(mcount)
	ret
END(mcount)

ENTRY(ftrace_caller)
1062 1063 1064
	cmpl $0, function_trace_stop
	jne  ftrace_stub

1065 1066 1067
	pushl %eax
	pushl %ecx
	pushl %edx
1068 1069
	pushl $0	/* Pass NULL as regs pointer */
	movl 4*4(%esp), %eax
1070
	movl 0x4(%ebp), %edx
1071
	movl function_trace_op, %ecx
1072
	subl $MCOUNT_INSN_SIZE, %eax
1073 1074 1075 1076 1077

.globl ftrace_call
ftrace_call:
	call ftrace_stub

1078
	addl $4,%esp	/* skip NULL pointer */
1079 1080 1081
	popl %edx
	popl %ecx
	popl %eax
1082
ftrace_ret:
1083 1084 1085 1086 1087
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
.globl ftrace_graph_call
ftrace_graph_call:
	jmp ftrace_stub
#endif
1088 1089 1090 1091 1092 1093

.globl ftrace_stub
ftrace_stub:
	ret
END(ftrace_caller)

1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126
ENTRY(ftrace_regs_caller)
	pushf	/* push flags before compare (in cs location) */
	cmpl $0, function_trace_stop
	jne ftrace_restore_flags

	/*
	 * i386 does not save SS and ESP when coming from kernel.
	 * Instead, to get sp, &regs->sp is used (see ptrace.h).
	 * Unfortunately, that means eflags must be at the same location
	 * as the current return ip is. We move the return ip into the
	 * ip location, and move flags into the return ip location.
	 */
	pushl 4(%esp)	/* save return ip into ip slot */

	pushl $0	/* Load 0 into orig_ax */
	pushl %gs
	pushl %fs
	pushl %es
	pushl %ds
	pushl %eax
	pushl %ebp
	pushl %edi
	pushl %esi
	pushl %edx
	pushl %ecx
	pushl %ebx

	movl 13*4(%esp), %eax	/* Get the saved flags */
	movl %eax, 14*4(%esp)	/* Move saved flags into regs->flags location */
				/* clobbering return ip */
	movl $__KERNEL_CS,13*4(%esp)

	movl 12*4(%esp), %eax	/* Load ip (1st parameter) */
1127
	subl $MCOUNT_INSN_SIZE, %eax	/* Adjust ip */
1128
	movl 0x4(%ebp), %edx	/* Load parent ip (2nd parameter) */
1129
	movl function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */
1130
	pushl %esp		/* Save pt_regs as 4th parameter */
1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158

GLOBAL(ftrace_regs_call)
	call ftrace_stub

	addl $4, %esp		/* Skip pt_regs */
	movl 14*4(%esp), %eax	/* Move flags back into cs */
	movl %eax, 13*4(%esp)	/* Needed to keep addl from modifying flags */
	movl 12*4(%esp), %eax	/* Get return ip from regs->ip */
	movl %eax, 14*4(%esp)	/* Put return ip back for ret */

	popl %ebx
	popl %ecx
	popl %edx
	popl %esi
	popl %edi
	popl %ebp
	popl %eax
	popl %ds
	popl %es
	popl %fs
	popl %gs
	addl $8, %esp		/* Skip orig_ax and ip */
	popf			/* Pop flags at end (no addl to corrupt flags) */
	jmp ftrace_ret

ftrace_restore_flags:
	popf
	jmp  ftrace_stub
1159 1160
#else /* ! CONFIG_DYNAMIC_FTRACE */

1161
ENTRY(mcount)
1162 1163 1164
	cmpl $__PAGE_OFFSET, %esp
	jb ftrace_stub		/* Paging not enabled yet? */

1165 1166 1167
	cmpl $0, function_trace_stop
	jne  ftrace_stub

1168 1169
	cmpl $ftrace_stub, ftrace_trace_function
	jnz trace
1170
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
I
Ingo Molnar 已提交
1171
	cmpl $ftrace_stub, ftrace_graph_return
1172
	jnz ftrace_graph_caller
1173 1174 1175

	cmpl $ftrace_graph_entry_stub, ftrace_graph_entry
	jnz ftrace_graph_caller
1176
#endif
1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187
.globl ftrace_stub
ftrace_stub:
	ret

	/* taken from glibc */
trace:
	pushl %eax
	pushl %ecx
	pushl %edx
	movl 0xc(%esp), %eax
	movl 0x4(%ebp), %edx
1188
	subl $MCOUNT_INSN_SIZE, %eax
1189

1190
	call *ftrace_trace_function
1191 1192 1193 1194 1195 1196

	popl %edx
	popl %ecx
	popl %eax
	jmp ftrace_stub
END(mcount)
1197
#endif /* CONFIG_DYNAMIC_FTRACE */
1198
#endif /* CONFIG_FUNCTION_TRACER */
1199

1200 1201
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
ENTRY(ftrace_graph_caller)
1202 1203 1204
	pushl %eax
	pushl %ecx
	pushl %edx
1205
	movl 0xc(%esp), %edx
1206
	lea 0x4(%ebp), %eax
1207
	movl (%ebp), %ecx
1208
	subl $MCOUNT_INSN_SIZE, %edx
1209 1210 1211 1212
	call prepare_ftrace_return
	popl %edx
	popl %ecx
	popl %eax
1213
	ret
1214
END(ftrace_graph_caller)
1215 1216 1217 1218 1219

.globl return_to_handler
return_to_handler:
	pushl %eax
	pushl %edx
1220
	movl %ebp, %eax
1221
	call ftrace_return_to_handler
1222
	movl %eax, %ecx
1223 1224
	popl %edx
	popl %eax
1225
	jmp *%ecx
1226
#endif
1227

1228 1229 1230 1231 1232 1233 1234 1235 1236 1237
#ifdef CONFIG_TRACING
ENTRY(trace_page_fault)
	RING0_EC_FRAME
	ASM_CLAC
	pushl_cfi $trace_do_page_fault
	jmp error_code
	CFI_ENDPROC
END(trace_page_fault)
#endif

1238 1239
ENTRY(page_fault)
	RING0_EC_FRAME
1240
	ASM_CLAC
1241
	pushl_cfi $do_page_fault
1242 1243
	ALIGN
error_code:
1244
	/* the function address is in %gs's slot on the stack */
1245
	pushl_cfi %fs
1246
	/*CFI_REL_OFFSET fs, 0*/
1247
	pushl_cfi %es
1248
	/*CFI_REL_OFFSET es, 0*/
1249
	pushl_cfi %ds
1250
	/*CFI_REL_OFFSET ds, 0*/
1251
	pushl_cfi %eax
1252
	CFI_REL_OFFSET eax, 0
1253
	pushl_cfi %ebp
1254
	CFI_REL_OFFSET ebp, 0
1255
	pushl_cfi %edi
1256
	CFI_REL_OFFSET edi, 0
1257
	pushl_cfi %esi
1258
	CFI_REL_OFFSET esi, 0
1259
	pushl_cfi %edx
1260
	CFI_REL_OFFSET edx, 0
1261
	pushl_cfi %ecx
1262
	CFI_REL_OFFSET ecx, 0
1263
	pushl_cfi %ebx
1264 1265 1266 1267 1268
	CFI_REL_OFFSET ebx, 0
	cld
	movl $(__KERNEL_PERCPU), %ecx
	movl %ecx, %fs
	UNWIND_ESPFIX_STACK
1269 1270
	GS_TO_REG %ecx
	movl PT_GS(%esp), %edi		# get the function address
1271 1272
	movl PT_ORIG_EAX(%esp), %edx	# get the error code
	movl $-1, PT_ORIG_EAX(%esp)	# no syscall to restart
1273 1274
	REG_TO_PTGS %ecx
	SET_KERNEL_GS %ecx
1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297
	movl $(__USER_DS), %ecx
	movl %ecx, %ds
	movl %ecx, %es
	TRACE_IRQS_OFF
	movl %esp,%eax			# pt_regs pointer
	call *%edi
	jmp ret_from_exception
	CFI_ENDPROC
END(page_fault)

/*
 * Debug traps and NMI can happen at the one SYSENTER instruction
 * that sets up the real kernel stack. Check here, since we can't
 * allow the wrong stack to be used.
 *
 * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have
 * already pushed 3 words if it hits on the sysenter instruction:
 * eflags, cs and eip.
 *
 * We just load the right stack, and push the three (known) values
 * by hand onto the new stack - while updating the return eip past
 * the instruction that would have done it for sysenter.
 */
1298 1299 1300 1301 1302 1303 1304
.macro FIX_STACK offset ok label
	cmpw $__KERNEL_CS, 4(%esp)
	jne \ok
\label:
	movl TSS_sysenter_sp0 + \offset(%esp), %esp
	CFI_DEF_CFA esp, 0
	CFI_UNDEFINED eip
1305 1306 1307
	pushfl_cfi
	pushl_cfi $__KERNEL_CS
	pushl_cfi $sysenter_past_esp
1308
	CFI_REL_OFFSET eip, 0
1309
.endm
1310 1311 1312

ENTRY(debug)
	RING0_INT_FRAME
1313
	ASM_CLAC
1314 1315
	cmpl $ia32_sysenter_target,(%esp)
	jne debug_stack_correct
1316
	FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn
1317
debug_stack_correct:
1318
	pushl_cfi $-1			# mark this as an int
1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337
	SAVE_ALL
	TRACE_IRQS_OFF
	xorl %edx,%edx			# error code 0
	movl %esp,%eax			# pt_regs pointer
	call do_debug
	jmp ret_from_exception
	CFI_ENDPROC
END(debug)

/*
 * NMI is doubly nasty. It can happen _while_ we're handling
 * a debug fault, and the debug fault hasn't yet been able to
 * clear up the stack. So we first check whether we got  an
 * NMI on the sysenter entry path, but after that we need to
 * check whether we got an NMI on the debug path where the debug
 * fault happened on the sysenter path.
 */
ENTRY(nmi)
	RING0_INT_FRAME
1338
	ASM_CLAC
1339
#ifdef CONFIG_X86_ESPFIX32
1340
	pushl_cfi %eax
1341 1342
	movl %ss, %eax
	cmpw $__ESPFIX_SS, %ax
1343
	popl_cfi %eax
1344
	je nmi_espfix_stack
1345
#endif
1346 1347
	cmpl $ia32_sysenter_target,(%esp)
	je nmi_stack_fixup
1348
	pushl_cfi %eax
1349 1350 1351 1352 1353 1354
	movl %esp,%eax
	/* Do not access memory above the end of our stack page,
	 * it might not exist.
	 */
	andl $(THREAD_SIZE-1),%eax
	cmpl $(THREAD_SIZE-20),%eax
1355
	popl_cfi %eax
1356 1357 1358 1359 1360
	jae nmi_stack_correct
	cmpl $ia32_sysenter_target,12(%esp)
	je nmi_debug_stack_check
nmi_stack_correct:
	/* We have a RING0_INT_FRAME here */
1361
	pushl_cfi %eax
1362 1363 1364 1365
	SAVE_ALL
	xorl %edx,%edx		# zero error code
	movl %esp,%eax		# pt_regs pointer
	call do_nmi
1366
	jmp restore_all_notrace
1367 1368 1369 1370
	CFI_ENDPROC

nmi_stack_fixup:
	RING0_INT_FRAME
1371
	FIX_STACK 12, nmi_stack_correct, 1
1372 1373 1374 1375 1376 1377 1378 1379 1380 1381
	jmp nmi_stack_correct

nmi_debug_stack_check:
	/* We have a RING0_INT_FRAME here */
	cmpw $__KERNEL_CS,16(%esp)
	jne nmi_stack_correct
	cmpl $debug,(%esp)
	jb nmi_stack_correct
	cmpl $debug_esp_fix_insn,(%esp)
	ja nmi_stack_correct
1382
	FIX_STACK 24, nmi_stack_correct, 1
1383 1384
	jmp nmi_stack_correct

1385
#ifdef CONFIG_X86_ESPFIX32
1386 1387 1388 1389 1390
nmi_espfix_stack:
	/* We have a RING0_INT_FRAME here.
	 *
	 * create the pointer to lss back
	 */
1391 1392
	pushl_cfi %ss
	pushl_cfi %esp
1393
	addl $4, (%esp)
1394 1395
	/* copy the iret frame of 12 bytes */
	.rept 3
1396
	pushl_cfi 16(%esp)
1397
	.endr
1398
	pushl_cfi %eax
1399 1400 1401 1402 1403 1404 1405 1406
	SAVE_ALL
	FIXUP_ESPFIX_STACK		# %eax == %esp
	xorl %edx,%edx			# zero error code
	call do_nmi
	RESTORE_REGS
	lss 12+4(%esp), %esp		# back to espfix stack
	CFI_ADJUST_CFA_OFFSET -24
	jmp irq_return
1407
#endif
1408 1409 1410 1411 1412
	CFI_ENDPROC
END(nmi)

ENTRY(int3)
	RING0_INT_FRAME
1413
	ASM_CLAC
1414
	pushl_cfi $-1			# mark this as an int
1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425
	SAVE_ALL
	TRACE_IRQS_OFF
	xorl %edx,%edx		# zero error code
	movl %esp,%eax		# pt_regs pointer
	call do_int3
	jmp ret_from_exception
	CFI_ENDPROC
END(int3)

ENTRY(general_protection)
	RING0_EC_FRAME
1426
	pushl_cfi $do_general_protection
1427 1428 1429 1430
	jmp error_code
	CFI_ENDPROC
END(general_protection)

G
Gleb Natapov 已提交
1431 1432 1433
#ifdef CONFIG_KVM_GUEST
ENTRY(async_page_fault)
	RING0_EC_FRAME
1434
	ASM_CLAC
1435
	pushl_cfi $do_async_page_fault
G
Gleb Natapov 已提交
1436 1437
	jmp error_code
	CFI_ENDPROC
1438
END(async_page_fault)
G
Gleb Natapov 已提交
1439 1440
#endif