entry_32.S 33.0 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/*
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 */

/*
 * entry.S contains the system-call and fault low-level handling routines.
 * This also contains the timer-interrupt handler, as well as all interrupts
 * and faults that can result in a task-switch.
 *
 * NOTE: This code handles signal-recognition, which happens every time
 * after a timer-interrupt and after each system call.
 *
 * I changed all the .align's to 4 (16 byte alignment), as that's faster
 * on a 486.
 *
17
 * Stack layout in 'syscall_exit':
L
Linus Torvalds 已提交
18 19 20 21 22 23 24 25 26 27 28 29 30 31
 * 	ptrace needs to have all regs on the stack.
 *	if the order here is changed, it needs to be
 *	updated in fork.c:copy_process, signal.c:do_signal,
 *	ptrace.c and ptrace.h
 *
 *	 0(%esp) - %ebx
 *	 4(%esp) - %ecx
 *	 8(%esp) - %edx
 *       C(%esp) - %esi
 *	10(%esp) - %edi
 *	14(%esp) - %ebp
 *	18(%esp) - %eax
 *	1C(%esp) - %ds
 *	20(%esp) - %es
32
 *	24(%esp) - %fs
33 34 35 36 37 38 39
 *	28(%esp) - %gs		saved iff !CONFIG_X86_32_LAZY_GS
 *	2C(%esp) - orig_eax
 *	30(%esp) - %eip
 *	34(%esp) - %cs
 *	38(%esp) - %eflags
 *	3C(%esp) - %oldesp
 *	40(%esp) - %oldss
L
Linus Torvalds 已提交
40 41 42 43 44
 *
 * "current" is in register %ebx during any slow entries.
 */

#include <linux/linkage.h>
45
#include <linux/err.h>
L
Linus Torvalds 已提交
46
#include <asm/thread_info.h>
47
#include <asm/irqflags.h>
L
Linus Torvalds 已提交
48 49 50
#include <asm/errno.h>
#include <asm/segment.h>
#include <asm/smp.h>
51
#include <asm/page_types.h>
S
Stas Sergeev 已提交
52
#include <asm/percpu.h>
53
#include <asm/dwarf2.h>
54
#include <asm/processor-flags.h>
55
#include <asm/ftrace.h>
56
#include <asm/irq_vectors.h>
57
#include <asm/cpufeature.h>
58
#include <asm/alternative-asm.h>
59
#include <asm/asm.h>
60
#include <asm/smap.h>
L
Linus Torvalds 已提交
61

R
Roland McGrath 已提交
62 63 64 65 66 67 68 69 70 71
/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
#include <linux/elf-em.h>
#define AUDIT_ARCH_I386		(EM_386|__AUDIT_ARCH_LE)
#define __AUDIT_ARCH_LE	   0x40000000

#ifndef CONFIG_AUDITSYSCALL
#define sysenter_audit	syscall_trace_entry
#define sysexit_audit	syscall_exit_work
#endif

J
Jiri Olsa 已提交
72 73
	.section .entry.text, "ax"

74 75 76 77 78
/*
 * We use macros for low-level operations which need to be overridden
 * for paravirtualization.  The following will never clobber any registers:
 *   INTERRUPT_RETURN (aka. "iret")
 *   GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
79
 *   ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
80 81 82 83 84 85 86
 *
 * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
 * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
 * Allowing a register to be clobbered can shrink the paravirt replacement
 * enough to patch inline, increasing performance.
 */

L
Linus Torvalds 已提交
87
#ifdef CONFIG_PREEMPT
88
#define preempt_stop(clobbers)	DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
L
Linus Torvalds 已提交
89
#else
90
#define preempt_stop(clobbers)
91
#define resume_kernel		restore_all
L
Linus Torvalds 已提交
92 93
#endif

94 95
.macro TRACE_IRQS_IRET
#ifdef CONFIG_TRACE_IRQFLAGS
96
	testl $X86_EFLAGS_IF,PT_EFLAGS(%esp)     # interrupts off?
97 98 99 100 101 102
	jz 1f
	TRACE_IRQS_ON
1:
#endif
.endm

103 104 105 106 107 108 109 110 111 112 113 114 115
/*
 * User gs save/restore
 *
 * %gs is used for userland TLS and kernel only uses it for stack
 * canary which is required to be at %gs:20 by gcc.  Read the comment
 * at the top of stackprotector.h for more info.
 *
 * Local labels 98 and 99 are used.
 */
#ifdef CONFIG_X86_32_LAZY_GS

 /* unfortunately push/pop can't be no-op */
.macro PUSH_GS
116
	pushl_cfi $0
117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
.endm
.macro POP_GS pop=0
	addl $(4 + \pop), %esp
	CFI_ADJUST_CFA_OFFSET -(4 + \pop)
.endm
.macro POP_GS_EX
.endm

 /* all the rest are no-op */
.macro PTGS_TO_GS
.endm
.macro PTGS_TO_GS_EX
.endm
.macro GS_TO_REG reg
.endm
.macro REG_TO_PTGS reg
.endm
.macro SET_KERNEL_GS reg
.endm

#else	/* CONFIG_X86_32_LAZY_GS */

.macro PUSH_GS
140
	pushl_cfi %gs
141 142 143 144
	/*CFI_REL_OFFSET gs, 0*/
.endm

.macro POP_GS pop=0
145
98:	popl_cfi %gs
146 147 148 149 150 151 152 153 154 155 156
	/*CFI_RESTORE gs*/
  .if \pop <> 0
	add $\pop, %esp
	CFI_ADJUST_CFA_OFFSET -\pop
  .endif
.endm
.macro POP_GS_EX
.pushsection .fixup, "ax"
99:	movl $0, (%esp)
	jmp 98b
.popsection
157
	_ASM_EXTABLE(98b,99b)
158 159 160 161 162 163 164 165 166 167
.endm

.macro PTGS_TO_GS
98:	mov PT_GS(%esp), %gs
.endm
.macro PTGS_TO_GS_EX
.pushsection .fixup, "ax"
99:	movl $0, PT_GS(%esp)
	jmp 98b
.popsection
168
	_ASM_EXTABLE(98b,99b)
169 170 171 172 173 174 175 176 177 178 179
.endm

.macro GS_TO_REG reg
	movl %gs, \reg
	/*CFI_REGISTER gs, \reg*/
.endm
.macro REG_TO_PTGS reg
	movl \reg, PT_GS(%esp)
	/*CFI_REL_OFFSET gs, PT_GS*/
.endm
.macro SET_KERNEL_GS reg
180
	movl $(__KERNEL_STACK_CANARY), \reg
181 182 183 184 185
	movl \reg, %gs
.endm

#endif	/* CONFIG_X86_32_LAZY_GS */

186 187
.macro SAVE_ALL
	cld
188
	PUSH_GS
189
	pushl_cfi %fs
190
	/*CFI_REL_OFFSET fs, 0;*/
191
	pushl_cfi %es
192
	/*CFI_REL_OFFSET es, 0;*/
193
	pushl_cfi %ds
194
	/*CFI_REL_OFFSET ds, 0;*/
195
	pushl_cfi %eax
196
	CFI_REL_OFFSET eax, 0
197
	pushl_cfi %ebp
198
	CFI_REL_OFFSET ebp, 0
199
	pushl_cfi %edi
200
	CFI_REL_OFFSET edi, 0
201
	pushl_cfi %esi
202
	CFI_REL_OFFSET esi, 0
203
	pushl_cfi %edx
204
	CFI_REL_OFFSET edx, 0
205
	pushl_cfi %ecx
206
	CFI_REL_OFFSET ecx, 0
207
	pushl_cfi %ebx
208 209 210 211 212
	CFI_REL_OFFSET ebx, 0
	movl $(__USER_DS), %edx
	movl %edx, %ds
	movl %edx, %es
	movl $(__KERNEL_PERCPU), %edx
213
	movl %edx, %fs
214
	SET_KERNEL_GS %edx
215
.endm
L
Linus Torvalds 已提交
216

217
.macro RESTORE_INT_REGS
218
	popl_cfi %ebx
219
	CFI_RESTORE ebx
220
	popl_cfi %ecx
221
	CFI_RESTORE ecx
222
	popl_cfi %edx
223
	CFI_RESTORE edx
224
	popl_cfi %esi
225
	CFI_RESTORE esi
226
	popl_cfi %edi
227
	CFI_RESTORE edi
228
	popl_cfi %ebp
229
	CFI_RESTORE ebp
230
	popl_cfi %eax
231
	CFI_RESTORE eax
232
.endm
L
Linus Torvalds 已提交
233

234
.macro RESTORE_REGS pop=0
235
	RESTORE_INT_REGS
236
1:	popl_cfi %ds
237
	/*CFI_RESTORE ds;*/
238
2:	popl_cfi %es
239
	/*CFI_RESTORE es;*/
240
3:	popl_cfi %fs
241
	/*CFI_RESTORE fs;*/
242
	POP_GS \pop
243 244 245 246 247 248 249
.pushsection .fixup, "ax"
4:	movl $0, (%esp)
	jmp 1b
5:	movl $0, (%esp)
	jmp 2b
6:	movl $0, (%esp)
	jmp 3b
250
.popsection
251 252 253
	_ASM_EXTABLE(1b,4b)
	_ASM_EXTABLE(2b,5b)
	_ASM_EXTABLE(3b,6b)
254
	POP_GS_EX
255
.endm
L
Linus Torvalds 已提交
256

257 258 259 260 261
.macro RING0_INT_FRAME
	CFI_STARTPROC simple
	CFI_SIGNAL_FRAME
	CFI_DEF_CFA esp, 3*4
	/*CFI_OFFSET cs, -2*4;*/
262
	CFI_OFFSET eip, -3*4
263
.endm
264

265 266 267 268 269
.macro RING0_EC_FRAME
	CFI_STARTPROC simple
	CFI_SIGNAL_FRAME
	CFI_DEF_CFA esp, 4*4
	/*CFI_OFFSET cs, -2*4;*/
270
	CFI_OFFSET eip, -3*4
271
.endm
272

273 274 275 276 277 278 279 280 281 282 283 284 285 286
.macro RING0_PTREGS_FRAME
	CFI_STARTPROC simple
	CFI_SIGNAL_FRAME
	CFI_DEF_CFA esp, PT_OLDESP-PT_EBX
	/*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/
	CFI_OFFSET eip, PT_EIP-PT_OLDESP
	/*CFI_OFFSET es, PT_ES-PT_OLDESP;*/
	/*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/
	CFI_OFFSET eax, PT_EAX-PT_OLDESP
	CFI_OFFSET ebp, PT_EBP-PT_OLDESP
	CFI_OFFSET edi, PT_EDI-PT_OLDESP
	CFI_OFFSET esi, PT_ESI-PT_OLDESP
	CFI_OFFSET edx, PT_EDX-PT_OLDESP
	CFI_OFFSET ecx, PT_ECX-PT_OLDESP
287
	CFI_OFFSET ebx, PT_EBX-PT_OLDESP
288
.endm
L
Linus Torvalds 已提交
289 290

ENTRY(ret_from_fork)
291
	CFI_STARTPROC
292
	pushl_cfi %eax
L
Linus Torvalds 已提交
293 294
	call schedule_tail
	GET_THREAD_INFO(%ebp)
295 296 297
	popl_cfi %eax
	pushl_cfi $0x0202		# Reset kernel eflags
	popfl_cfi
L
Linus Torvalds 已提交
298
	jmp syscall_exit
299
	CFI_ENDPROC
300
END(ret_from_fork)
L
Linus Torvalds 已提交
301

302 303 304 305
ENTRY(ret_from_kernel_thread)
	CFI_STARTPROC
	pushl_cfi %eax
	call schedule_tail
306
	GET_THREAD_INFO(%ebp)
307 308 309 310 311 312
	popl_cfi %eax
	pushl_cfi $0x0202		# Reset kernel eflags
	popfl_cfi
	movl PT_EBP(%esp),%eax
	call *PT_EBX(%esp)
	movl $0,PT_EAX(%esp)
313
	jmp syscall_exit
314 315
	CFI_ENDPROC
ENDPROC(ret_from_kernel_thread)
316

317 318 319 320
/*
 * Interrupt exit functions should be protected against kprobes
 */
	.pushsection .kprobes.text, "ax"
L
Linus Torvalds 已提交
321 322 323 324 325 326 327 328 329
/*
 * Return to user mode is not as complex as all this looks,
 * but we want the default path for a system call return to
 * go as quickly as possible which is why some of this is
 * less clear than it otherwise should be.
 */

	# userspace resumption stub bypassing syscall exit tracing
	ALIGN
330
	RING0_PTREGS_FRAME
L
Linus Torvalds 已提交
331
ret_from_exception:
332
	preempt_stop(CLBR_ANY)
L
Linus Torvalds 已提交
333 334
ret_from_intr:
	GET_THREAD_INFO(%ebp)
335
#ifdef CONFIG_VM86
336 337
	movl PT_EFLAGS(%esp), %eax	# mix EFLAGS and CS
	movb PT_CS(%esp), %al
338
	andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
339 340
#else
	/*
341
	 * We can be coming here from child spawned by kernel_thread().
342 343 344 345
	 */
	movl PT_CS(%esp), %eax
	andl $SEGMENT_RPL_MASK, %eax
#endif
346 347
	cmpl $USER_RPL, %eax
	jb resume_kernel		# not returning to v8086 or userspace
348

L
Linus Torvalds 已提交
349
ENTRY(resume_userspace)
350
	LOCKDEP_SYS_EXIT
351
 	DISABLE_INTERRUPTS(CLBR_ANY)	# make sure we don't miss an interrupt
L
Linus Torvalds 已提交
352 353
					# setting need_resched or sigpending
					# between sampling and the iret
354
	TRACE_IRQS_OFF
L
Linus Torvalds 已提交
355 356 357 358 359
	movl TI_flags(%ebp), %ecx
	andl $_TIF_WORK_MASK, %ecx	# is there any work to be done on
					# int/exception return?
	jne work_pending
	jmp restore_all
360
END(ret_from_exception)
L
Linus Torvalds 已提交
361 362 363

#ifdef CONFIG_PREEMPT
ENTRY(resume_kernel)
364
	DISABLE_INTERRUPTS(CLBR_ANY)
L
Linus Torvalds 已提交
365
need_resched:
366 367
	cmpl $0,PER_CPU_VAR(__preempt_count)
	jnz restore_all
368
	testl $X86_EFLAGS_IF,PT_EFLAGS(%esp)	# interrupts off (exception path) ?
L
Linus Torvalds 已提交
369 370 371
	jz restore_all
	call preempt_schedule_irq
	jmp need_resched
372
END(resume_kernel)
L
Linus Torvalds 已提交
373
#endif
374
	CFI_ENDPROC
375 376 377 378
/*
 * End of kprobes section
 */
	.popsection
L
Linus Torvalds 已提交
379 380 381 382 383

/* SYSENTER_RETURN points to after the "sysenter" instruction in
   the vsyscall page.  See vsyscall-sysentry.S, which defines the symbol.  */

	# sysenter call handler stub
R
Roland McGrath 已提交
384
ENTRY(ia32_sysenter_target)
385
	CFI_STARTPROC simple
386
	CFI_SIGNAL_FRAME
387 388
	CFI_DEF_CFA esp, 0
	CFI_REGISTER esp, ebp
389
	movl TSS_sysenter_sp0(%esp),%esp
L
Linus Torvalds 已提交
390
sysenter_past_esp:
391
	/*
392 393 394
	 * Interrupts are disabled here, but we can't trace it until
	 * enough kernel state to call TRACE_IRQS_OFF can be called - but
	 * we immediately enable interrupts at that point anyway.
395
	 */
396
	pushl_cfi $__USER_DS
397
	/*CFI_REL_OFFSET ss, 0*/
398
	pushl_cfi %ebp
399
	CFI_REL_OFFSET esp, 0
400
	pushfl_cfi
401
	orl $X86_EFLAGS_IF, (%esp)
402
	pushl_cfi $__USER_CS
403
	/*CFI_REL_OFFSET cs, 0*/
404 405 406 407 408
	/*
	 * Push current_thread_info()->sysenter_return to the stack.
	 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
	 * pushed above; +8 corresponds to copy_thread's esp0 setting.
	 */
409
	pushl_cfi ((TI_sysenter_return)-THREAD_SIZE+8+4*4)(%esp)
410
	CFI_REL_OFFSET eip, 0
L
Linus Torvalds 已提交
411

412
	pushl_cfi %eax
413 414 415
	SAVE_ALL
	ENABLE_INTERRUPTS(CLBR_NONE)

L
Linus Torvalds 已提交
416 417 418 419 420 421
/*
 * Load the potential sixth argument from user stack.
 * Careful about security.
 */
	cmpl $__PAGE_OFFSET-3,%ebp
	jae syscall_fault
422
	ASM_STAC
L
Linus Torvalds 已提交
423
1:	movl (%ebp),%ebp
424
	ASM_CLAC
425
	movl %ebp,PT_EBP(%esp)
426
	_ASM_EXTABLE(1b,syscall_fault)
L
Linus Torvalds 已提交
427 428 429

	GET_THREAD_INFO(%ebp)

430
	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
R
Roland McGrath 已提交
431 432
	jnz sysenter_audit
sysenter_do_call:
433
	cmpl $(NR_syscalls), %eax
L
Linus Torvalds 已提交
434 435
	jae syscall_badsys
	call *sys_call_table(,%eax,4)
436
	movl %eax,PT_EAX(%esp)
437
	LOCKDEP_SYS_EXIT
438
	DISABLE_INTERRUPTS(CLBR_ANY)
439
	TRACE_IRQS_OFF
L
Linus Torvalds 已提交
440
	movl TI_flags(%ebp), %ecx
441
	testl $_TIF_ALLWORK_MASK, %ecx
R
Roland McGrath 已提交
442 443
	jne sysexit_audit
sysenter_exit:
L
Linus Torvalds 已提交
444
/* if something modifies registers it must also disable sysexit */
445 446
	movl PT_EIP(%esp), %edx
	movl PT_OLDESP(%esp), %ecx
L
Linus Torvalds 已提交
447
	xorl %ebp,%ebp
448
	TRACE_IRQS_ON
449
1:	mov  PT_FS(%esp), %fs
450
	PTGS_TO_GS
451
	ENABLE_INTERRUPTS_SYSEXIT
R
Roland McGrath 已提交
452 453 454

#ifdef CONFIG_AUDITSYSCALL
sysenter_audit:
455
	testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
R
Roland McGrath 已提交
456 457 458 459 460 461 462 463 464
	jnz syscall_trace_entry
	addl $4,%esp
	CFI_ADJUST_CFA_OFFSET -4
	/* %esi already in 8(%esp)	   6th arg: 4th syscall arg */
	/* %edx already in 4(%esp)	   5th arg: 3rd syscall arg */
	/* %ecx already in 0(%esp)	   4th arg: 2nd syscall arg */
	movl %ebx,%ecx			/* 3rd arg: 1st syscall arg */
	movl %eax,%edx			/* 2nd arg: syscall number */
	movl $AUDIT_ARCH_I386,%eax	/* 1st arg: audit arch */
465
	call __audit_syscall_entry
466
	pushl_cfi %ebx
R
Roland McGrath 已提交
467 468 469 470
	movl PT_EAX(%esp),%eax		/* reload syscall number */
	jmp sysenter_do_call

sysexit_audit:
471
	testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
R
Roland McGrath 已提交
472 473 474 475
	jne syscall_exit_work
	TRACE_IRQS_ON
	ENABLE_INTERRUPTS(CLBR_ANY)
	movl %eax,%edx		/* second arg, syscall return value */
476 477
	cmpl $-MAX_ERRNO,%eax	/* is it an error ? */
	setbe %al		/* 1 if so, 0 if not */
R
Roland McGrath 已提交
478
	movzbl %al,%eax		/* zero-extend that */
479
	call __audit_syscall_exit
R
Roland McGrath 已提交
480 481 482
	DISABLE_INTERRUPTS(CLBR_ANY)
	TRACE_IRQS_OFF
	movl TI_flags(%ebp), %ecx
483
	testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
R
Roland McGrath 已提交
484 485 486 487 488
	jne syscall_exit_work
	movl PT_EAX(%esp),%eax	/* reload syscall return value */
	jmp sysenter_exit
#endif

489
	CFI_ENDPROC
490
.pushsection .fixup,"ax"
491
2:	movl $0,PT_FS(%esp)
492 493
	jmp 1b
.popsection
494
	_ASM_EXTABLE(1b,2b)
495
	PTGS_TO_GS_EX
R
Roland McGrath 已提交
496
ENDPROC(ia32_sysenter_target)
L
Linus Torvalds 已提交
497

498 499 500 501
/*
 * syscall stub including irq exit should be protected against kprobes
 */
	.pushsection .kprobes.text, "ax"
L
Linus Torvalds 已提交
502 503
	# system call handler stub
ENTRY(system_call)
504
	RING0_INT_FRAME			# can't unwind into user space anyway
505
	ASM_CLAC
506
	pushl_cfi %eax			# save orig_eax
L
Linus Torvalds 已提交
507 508
	SAVE_ALL
	GET_THREAD_INFO(%ebp)
509
					# system call tracing in operation / emulation
510
	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
L
Linus Torvalds 已提交
511
	jnz syscall_trace_entry
512
	cmpl $(NR_syscalls), %eax
L
Linus Torvalds 已提交
513 514 515
	jae syscall_badsys
syscall_call:
	call *sys_call_table(,%eax,4)
516
	movl %eax,PT_EAX(%esp)		# store the return value
L
Linus Torvalds 已提交
517
syscall_exit:
518
	LOCKDEP_SYS_EXIT
519
	DISABLE_INTERRUPTS(CLBR_ANY)	# make sure we don't miss an interrupt
L
Linus Torvalds 已提交
520 521
					# setting need_resched or sigpending
					# between sampling and the iret
522
	TRACE_IRQS_OFF
L
Linus Torvalds 已提交
523
	movl TI_flags(%ebp), %ecx
524
	testl $_TIF_ALLWORK_MASK, %ecx	# current->work
L
Linus Torvalds 已提交
525 526 527
	jne syscall_exit_work

restore_all:
528 529
	TRACE_IRQS_IRET
restore_all_notrace:
530
#ifdef CONFIG_X86_ESPFIX32
531 532
	movl PT_EFLAGS(%esp), %eax	# mix EFLAGS, SS and CS
	# Warning: PT_OLDSS(%esp) contains the wrong/random values if we
533 534
	# are returning to the kernel.
	# See comments in process.c:copy_thread() for details.
535 536
	movb PT_OLDSS(%esp), %ah
	movb PT_CS(%esp), %al
537
	andl $(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
538
	cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
539
	CFI_REMEMBER_STATE
L
Linus Torvalds 已提交
540
	je ldt_ss			# returning to user-space with LDT SS
541
#endif
L
Linus Torvalds 已提交
542
restore_nocheck:
543
	RESTORE_REGS 4			# skip orig_eax/error_code
A
Adrian Bunk 已提交
544
irq_return:
I
Ingo Molnar 已提交
545
	INTERRUPT_RETURN
L
Linus Torvalds 已提交
546
.section .fixup,"ax"
547
ENTRY(iret_exc)
548 549 550
	pushl $0			# no error code
	pushl $do_iret_error
	jmp error_code
L
Linus Torvalds 已提交
551
.previous
552
	_ASM_EXTABLE(irq_return,iret_exc)
L
Linus Torvalds 已提交
553

554
#ifdef CONFIG_X86_ESPFIX32
555
	CFI_RESTORE_STATE
L
Linus Torvalds 已提交
556
ldt_ss:
557 558 559 560 561 562 563 564 565
#ifdef CONFIG_PARAVIRT
	/*
	 * The kernel can't run on a non-flat stack if paravirt mode
	 * is active.  Rather than try to fixup the high bits of
	 * ESP, bypass this code entirely.  This may break DOSemu
	 * and/or Wine support in a paravirt VM, although the option
	 * is still available to implement the setting of the high
	 * 16-bits in the INTERRUPT_RETURN paravirt-op.
	 */
566
	cmpl $0, pv_info+PARAVIRT_enabled
567 568 569
	jne restore_nocheck
#endif

570 571 572 573 574 575 576 577 578 579 580
/*
 * Setup and switch to ESPFIX stack
 *
 * We're returning to userspace with a 16 bit stack. The CPU will not
 * restore the high word of ESP for us on executing iret... This is an
 * "official" bug of all the x86-compatible CPUs, which we can work
 * around to make dosemu and wine happy. We do this by preloading the
 * high word of ESP with the high word of the userspace ESP while
 * compensating for the offset by changing to the ESPFIX segment with
 * a base address that matches for the difference.
 */
581
#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8)
582 583 584 585 586
	mov %esp, %edx			/* load kernel esp */
	mov PT_OLDESP(%esp), %eax	/* load userspace esp */
	mov %dx, %ax			/* eax: new kernel esp */
	sub %eax, %edx			/* offset (low word is 0) */
	shr $16, %edx
587 588
	mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */
	mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */
589 590
	pushl_cfi $__ESPFIX_SS
	pushl_cfi %eax			/* new kernel esp */
591 592 593
	/* Disable interrupts, but do not irqtrace this section: we
	 * will soon execute iret and the tracer was already set to
	 * the irqstate after the iret */
594
	DISABLE_INTERRUPTS(CLBR_EAX)
595
	lss (%esp), %esp		/* switch to espfix segment */
S
Stas Sergeev 已提交
596 597
	CFI_ADJUST_CFA_OFFSET -8
	jmp restore_nocheck
598
#endif
599
	CFI_ENDPROC
600
ENDPROC(system_call)
L
Linus Torvalds 已提交
601 602 603

	# perform work that needs to be done immediately before resumption
	ALIGN
604
	RING0_PTREGS_FRAME		# can't unwind into user space anyway
L
Linus Torvalds 已提交
605 606 607 608 609
work_pending:
	testb $_TIF_NEED_RESCHED, %cl
	jz work_notifysig
work_resched:
	call schedule
610
	LOCKDEP_SYS_EXIT
611
	DISABLE_INTERRUPTS(CLBR_ANY)	# make sure we don't miss an interrupt
L
Linus Torvalds 已提交
612 613
					# setting need_resched or sigpending
					# between sampling and the iret
614
	TRACE_IRQS_OFF
L
Linus Torvalds 已提交
615 616 617 618 619 620 621 622 623
	movl TI_flags(%ebp), %ecx
	andl $_TIF_WORK_MASK, %ecx	# is there any work to be done other
					# than syscall tracing?
	jz restore_all
	testb $_TIF_NEED_RESCHED, %cl
	jnz work_resched

work_notifysig:				# deal with pending signals and
					# notify-resume requests
624
#ifdef CONFIG_VM86
625
	testl $X86_EFLAGS_VM, PT_EFLAGS(%esp)
L
Linus Torvalds 已提交
626 627 628
	movl %esp, %eax
	jne work_notifysig_v86		# returning to kernel-space or
					# vm86-space
629 630 631 632
1:
#else
	movl %esp, %eax
#endif
633 634
	TRACE_IRQS_ON
	ENABLE_INTERRUPTS(CLBR_NONE)
635 636 637 638
	movb PT_CS(%esp), %bl
	andb $SEGMENT_RPL_MASK, %bl
	cmpb $USER_RPL, %bl
	jb resume_kernel
L
Linus Torvalds 已提交
639 640
	xorl %edx, %edx
	call do_notify_resume
641
	jmp resume_userspace
L
Linus Torvalds 已提交
642

643
#ifdef CONFIG_VM86
L
Linus Torvalds 已提交
644 645
	ALIGN
work_notifysig_v86:
646
	pushl_cfi %ecx			# save ti_flags for do_notify_resume
L
Linus Torvalds 已提交
647
	call save_v86_state		# %eax contains pt_regs pointer
648
	popl_cfi %ecx
L
Linus Torvalds 已提交
649
	movl %eax, %esp
650
	jmp 1b
651
#endif
652
END(work_pending)
L
Linus Torvalds 已提交
653 654 655 656

	# perform syscall exit tracing
	ALIGN
syscall_trace_entry:
657
	movl $-ENOSYS,PT_EAX(%esp)
L
Linus Torvalds 已提交
658
	movl %esp, %eax
659 660
	call syscall_trace_enter
	/* What it returned is what we'll actually use.  */
661
	cmpl $(NR_syscalls), %eax
L
Linus Torvalds 已提交
662 663
	jnae syscall_call
	jmp syscall_exit
664
END(syscall_trace_entry)
L
Linus Torvalds 已提交
665 666 667 668

	# perform syscall exit tracing
	ALIGN
syscall_exit_work:
669
	testl $_TIF_WORK_SYSCALL_EXIT, %ecx
L
Linus Torvalds 已提交
670
	jz work_pending
671
	TRACE_IRQS_ON
672
	ENABLE_INTERRUPTS(CLBR_ANY)	# could let syscall_trace_leave() call
L
Linus Torvalds 已提交
673 674
					# schedule() instead
	movl %esp, %eax
675
	call syscall_trace_leave
L
Linus Torvalds 已提交
676
	jmp resume_userspace
677
END(syscall_exit_work)
678
	CFI_ENDPROC
L
Linus Torvalds 已提交
679

680
	RING0_INT_FRAME			# can't unwind into user space anyway
L
Linus Torvalds 已提交
681
syscall_fault:
682
	ASM_CLAC
L
Linus Torvalds 已提交
683
	GET_THREAD_INFO(%ebp)
684
	movl $-EFAULT,PT_EAX(%esp)
L
Linus Torvalds 已提交
685
	jmp resume_userspace
686
END(syscall_fault)
L
Linus Torvalds 已提交
687 688

syscall_badsys:
689
	movl $-ENOSYS,PT_EAX(%esp)
L
Linus Torvalds 已提交
690
	jmp resume_userspace
691
END(syscall_badsys)
692
	CFI_ENDPROC
693 694 695 696
/*
 * End of kprobes section
 */
	.popsection
L
Linus Torvalds 已提交
697

698
.macro FIXUP_ESPFIX_STACK
699 700 701 702 703 704 705
/*
 * Switch back for ESPFIX stack to the normal zerobased stack
 *
 * We can't call C functions using the ESPFIX stack. This code reads
 * the high word of the segment base from the GDT and swiches to the
 * normal stack and adjusts ESP with the matching offset.
 */
706
#ifdef CONFIG_X86_ESPFIX32
707
	/* fixup the stack */
708 709
	mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */
	mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */
710 711
	shl $16, %eax
	addl %esp, %eax			/* the adjusted stack pointer */
712 713
	pushl_cfi $__KERNEL_DS
	pushl_cfi %eax
714
	lss (%esp), %esp		/* switch to the normal stack segment */
715
	CFI_ADJUST_CFA_OFFSET -8
716
#endif
717 718
.endm
.macro UNWIND_ESPFIX_STACK
719
#ifdef CONFIG_X86_ESPFIX32
720 721 722 723 724 725 726 727 728 729
	movl %ss, %eax
	/* see if on espfix stack */
	cmpw $__ESPFIX_SS, %ax
	jne 27f
	movl $__KERNEL_DS, %eax
	movl %eax, %ds
	movl %eax, %es
	/* switch to normal stack */
	FIXUP_ESPFIX_STACK
27:
730
#endif
731
.endm
L
Linus Torvalds 已提交
732 733

/*
734 735 736
 * Build the entry stubs and pointer table with some assembler magic.
 * We pack 7 stubs into a single 32-byte chunk, which will fit in a
 * single cache line on all modern x86 implementations.
L
Linus Torvalds 已提交
737
 */
738
.section .init.rodata,"a"
L
Linus Torvalds 已提交
739
ENTRY(interrupt)
J
Jiri Olsa 已提交
740
.section .entry.text, "ax"
741 742
	.p2align 5
	.p2align CONFIG_X86_L1_CACHE_SHIFT
L
Linus Torvalds 已提交
743
ENTRY(irq_entries_start)
744
	RING0_INT_FRAME
745
vector=FIRST_EXTERNAL_VECTOR
746 747 748 749
.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7
	.balign 32
  .rept	7
    .if vector < NR_VECTORS
750
      .if vector <> FIRST_EXTERNAL_VECTOR
751
	CFI_ADJUST_CFA_OFFSET -4
752
      .endif
753
1:	pushl_cfi $(~vector+0x80)	/* Note: always in signed byte range */
754
      .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
755 756 757
	jmp 2f
      .endif
      .previous
L
Linus Torvalds 已提交
758
	.long 1b
J
Jiri Olsa 已提交
759
      .section .entry.text, "ax"
L
Linus Torvalds 已提交
760
vector=vector+1
761 762 763
    .endif
  .endr
2:	jmp common_interrupt
L
Linus Torvalds 已提交
764
.endr
765 766 767 768 769
END(irq_entries_start)

.previous
END(interrupt)
.previous
L
Linus Torvalds 已提交
770

771 772 773 774
/*
 * the CPU automatically disables interrupts when executing an IRQ vector,
 * so IRQ-flags tracing has to follow that:
 */
775
	.p2align CONFIG_X86_L1_CACHE_SHIFT
L
Linus Torvalds 已提交
776
common_interrupt:
777
	ASM_CLAC
778
	addl $-0x80,(%esp)	/* Adjust vector into the [-256,-1] range */
L
Linus Torvalds 已提交
779
	SAVE_ALL
780
	TRACE_IRQS_OFF
L
Linus Torvalds 已提交
781 782 783
	movl %esp,%eax
	call do_IRQ
	jmp ret_from_intr
784
ENDPROC(common_interrupt)
785
	CFI_ENDPROC
L
Linus Torvalds 已提交
786

787 788 789 790
/*
 *  Irq entries should be protected against kprobes
 */
	.pushsection .kprobes.text, "ax"
T
Tejun Heo 已提交
791
#define BUILD_INTERRUPT3(name, nr, fn)	\
L
Linus Torvalds 已提交
792
ENTRY(name)				\
793
	RING0_INT_FRAME;		\
794
	ASM_CLAC;			\
795
	pushl_cfi $~(nr);		\
796
	SAVE_ALL;			\
797
	TRACE_IRQS_OFF			\
L
Linus Torvalds 已提交
798
	movl %esp,%eax;			\
T
Tejun Heo 已提交
799
	call fn;			\
800
	jmp ret_from_intr;		\
801 802
	CFI_ENDPROC;			\
ENDPROC(name)
L
Linus Torvalds 已提交
803

804 805 806 807 808 809 810 811 812 813 814

#ifdef CONFIG_TRACING
#define TRACE_BUILD_INTERRUPT(name, nr)		\
	BUILD_INTERRUPT3(trace_##name, nr, smp_trace_##name)
#else
#define TRACE_BUILD_INTERRUPT(name, nr)
#endif

#define BUILD_INTERRUPT(name, nr) \
	BUILD_INTERRUPT3(name, nr, smp_##name); \
	TRACE_BUILD_INTERRUPT(name, nr)
T
Tejun Heo 已提交
815

L
Linus Torvalds 已提交
816
/* The include is where all of the SMP etc. interrupts come from */
817
#include <asm/entry_arch.h>
L
Linus Torvalds 已提交
818 819

ENTRY(coprocessor_error)
820
	RING0_INT_FRAME
821
	ASM_CLAC
822 823
	pushl_cfi $0
	pushl_cfi $do_coprocessor_error
L
Linus Torvalds 已提交
824
	jmp error_code
825
	CFI_ENDPROC
826
END(coprocessor_error)
L
Linus Torvalds 已提交
827 828

ENTRY(simd_coprocessor_error)
829
	RING0_INT_FRAME
830
	ASM_CLAC
831
	pushl_cfi $0
832 833
#ifdef CONFIG_X86_INVD_BUG
	/* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */
834
661:	pushl_cfi $do_general_protection
835 836
662:
.section .altinstructions,"a"
837
	altinstruction_entry 661b, 663f, X86_FEATURE_XMM, 662b-661b, 664f-663f
838 839 840 841 842 843
.previous
.section .altinstr_replacement,"ax"
663:	pushl $do_simd_coprocessor_error
664:
.previous
#else
844
	pushl_cfi $do_simd_coprocessor_error
845
#endif
L
Linus Torvalds 已提交
846
	jmp error_code
847
	CFI_ENDPROC
848
END(simd_coprocessor_error)
L
Linus Torvalds 已提交
849 850

ENTRY(device_not_available)
851
	RING0_INT_FRAME
852
	ASM_CLAC
853 854
	pushl_cfi $-1			# mark this as an int
	pushl_cfi $do_device_not_available
855
	jmp error_code
856
	CFI_ENDPROC
857
END(device_not_available)
L
Linus Torvalds 已提交
858

859 860
#ifdef CONFIG_PARAVIRT
ENTRY(native_iret)
I
Ingo Molnar 已提交
861
	iret
862
	_ASM_EXTABLE(native_iret, iret_exc)
863
END(native_iret)
864

865
ENTRY(native_irq_enable_sysexit)
866 867
	sti
	sysexit
868
END(native_irq_enable_sysexit)
869 870
#endif

L
Linus Torvalds 已提交
871
ENTRY(overflow)
872
	RING0_INT_FRAME
873
	ASM_CLAC
874 875
	pushl_cfi $0
	pushl_cfi $do_overflow
L
Linus Torvalds 已提交
876
	jmp error_code
877
	CFI_ENDPROC
878
END(overflow)
L
Linus Torvalds 已提交
879 880

ENTRY(bounds)
881
	RING0_INT_FRAME
882
	ASM_CLAC
883 884
	pushl_cfi $0
	pushl_cfi $do_bounds
L
Linus Torvalds 已提交
885
	jmp error_code
886
	CFI_ENDPROC
887
END(bounds)
L
Linus Torvalds 已提交
888 889

ENTRY(invalid_op)
890
	RING0_INT_FRAME
891
	ASM_CLAC
892 893
	pushl_cfi $0
	pushl_cfi $do_invalid_op
L
Linus Torvalds 已提交
894
	jmp error_code
895
	CFI_ENDPROC
896
END(invalid_op)
L
Linus Torvalds 已提交
897 898

ENTRY(coprocessor_segment_overrun)
899
	RING0_INT_FRAME
900
	ASM_CLAC
901 902
	pushl_cfi $0
	pushl_cfi $do_coprocessor_segment_overrun
L
Linus Torvalds 已提交
903
	jmp error_code
904
	CFI_ENDPROC
905
END(coprocessor_segment_overrun)
L
Linus Torvalds 已提交
906 907

ENTRY(invalid_TSS)
908
	RING0_EC_FRAME
909
	ASM_CLAC
910
	pushl_cfi $do_invalid_TSS
L
Linus Torvalds 已提交
911
	jmp error_code
912
	CFI_ENDPROC
913
END(invalid_TSS)
L
Linus Torvalds 已提交
914 915

ENTRY(segment_not_present)
916
	RING0_EC_FRAME
917
	ASM_CLAC
918
	pushl_cfi $do_segment_not_present
L
Linus Torvalds 已提交
919
	jmp error_code
920
	CFI_ENDPROC
921
END(segment_not_present)
L
Linus Torvalds 已提交
922 923

ENTRY(stack_segment)
924
	RING0_EC_FRAME
925
	ASM_CLAC
926
	pushl_cfi $do_stack_segment
L
Linus Torvalds 已提交
927
	jmp error_code
928
	CFI_ENDPROC
929
END(stack_segment)
L
Linus Torvalds 已提交
930 931

ENTRY(alignment_check)
932
	RING0_EC_FRAME
933
	ASM_CLAC
934
	pushl_cfi $do_alignment_check
L
Linus Torvalds 已提交
935
	jmp error_code
936
	CFI_ENDPROC
937
END(alignment_check)
L
Linus Torvalds 已提交
938

939 940
ENTRY(divide_error)
	RING0_INT_FRAME
941
	ASM_CLAC
942 943
	pushl_cfi $0			# no error code
	pushl_cfi $do_divide_error
L
Linus Torvalds 已提交
944
	jmp error_code
945
	CFI_ENDPROC
946
END(divide_error)
L
Linus Torvalds 已提交
947 948 949

#ifdef CONFIG_X86_MCE
ENTRY(machine_check)
950
	RING0_INT_FRAME
951
	ASM_CLAC
952 953
	pushl_cfi $0
	pushl_cfi machine_check_vector
L
Linus Torvalds 已提交
954
	jmp error_code
955
	CFI_ENDPROC
956
END(machine_check)
L
Linus Torvalds 已提交
957 958 959
#endif

ENTRY(spurious_interrupt_bug)
960
	RING0_INT_FRAME
961
	ASM_CLAC
962 963
	pushl_cfi $0
	pushl_cfi $do_spurious_interrupt_bug
L
Linus Torvalds 已提交
964
	jmp error_code
965
	CFI_ENDPROC
966
END(spurious_interrupt_bug)
967 968 969 970
/*
 * End of kprobes section
 */
	.popsection
L
Linus Torvalds 已提交
971

972
#ifdef CONFIG_XEN
973 974 975 976 977
/* Xen doesn't set %esp to be precisely what the normal sysenter
   entrypoint expects, so fix it up before using the normal path. */
ENTRY(xen_sysenter_target)
	RING0_INT_FRAME
	addl $5*4, %esp		/* remove xen-provided frame */
978
	CFI_ADJUST_CFA_OFFSET -5*4
979
	jmp sysenter_past_esp
G
Glauber Costa 已提交
980
	CFI_ENDPROC
981

982 983
ENTRY(xen_hypervisor_callback)
	CFI_STARTPROC
984
	pushl_cfi $-1 /* orig_ax = -1 => not a system call */
985 986
	SAVE_ALL
	TRACE_IRQS_OFF
987 988 989 990 991 992 993 994 995 996 997 998

	/* Check to see if we got the event in the critical
	   region in xen_iret_direct, after we've reenabled
	   events and checked for pending events.  This simulates
	   iret instruction's behaviour where it delivers a
	   pending interrupt when enabling interrupts. */
	movl PT_EIP(%esp),%eax
	cmpl $xen_iret_start_crit,%eax
	jb   1f
	cmpl $xen_iret_end_crit,%eax
	jae  1f

J
Jeremy Fitzhardinge 已提交
999
	jmp  xen_iret_crit_fixup
1000 1001

ENTRY(xen_do_upcall)
1002
1:	mov %esp, %eax
1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019
	call xen_evtchn_do_upcall
	jmp  ret_from_intr
	CFI_ENDPROC
ENDPROC(xen_hypervisor_callback)

# Hypervisor uses this for application faults while it executes.
# We get here for two reasons:
#  1. Fault while reloading DS, ES, FS or GS
#  2. Fault while executing IRET
# Category 1 we fix up by reattempting the load, and zeroing the segment
# register if the load fails.
# Category 2 we fix up by jumping to do_iret_error. We cannot use the
# normal Linux return path in this case because if we use the IRET hypercall
# to pop the stack frame we end up in an infinite loop of failsafe callbacks.
# We distinguish between categories by maintaining a status value in EAX.
ENTRY(xen_failsafe_callback)
	CFI_STARTPROC
1020
	pushl_cfi %eax
1021 1022 1023 1024 1025
	movl $1,%eax
1:	mov 4(%esp),%ds
2:	mov 8(%esp),%es
3:	mov 12(%esp),%fs
4:	mov 16(%esp),%gs
1026 1027
	/* EAX == 0 => Category 1 (Bad segment)
	   EAX != 0 => Category 2 (Bad IRET) */
1028
	testl %eax,%eax
1029
	popl_cfi %eax
1030 1031 1032
	lea 16(%esp),%esp
	CFI_ADJUST_CFA_OFFSET -16
	jz 5f
1033 1034
	jmp iret_exc
5:	pushl_cfi $-1 /* orig_ax = -1 => not a system call */
1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052
	SAVE_ALL
	jmp ret_from_exception
	CFI_ENDPROC

.section .fixup,"ax"
6:	xorl %eax,%eax
	movl %eax,4(%esp)
	jmp 1b
7:	xorl %eax,%eax
	movl %eax,8(%esp)
	jmp 2b
8:	xorl %eax,%eax
	movl %eax,12(%esp)
	jmp 3b
9:	xorl %eax,%eax
	movl %eax,16(%esp)
	jmp 4b
.previous
1053 1054 1055 1056
	_ASM_EXTABLE(1b,6b)
	_ASM_EXTABLE(2b,7b)
	_ASM_EXTABLE(3b,8b)
	_ASM_EXTABLE(4b,9b)
1057 1058
ENDPROC(xen_failsafe_callback)

1059
BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
1060 1061
		xen_evtchn_do_upcall)

1062
#endif	/* CONFIG_XEN */
1063 1064 1065 1066 1067 1068 1069

#if IS_ENABLED(CONFIG_HYPERV)

BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
	hyperv_vector_handler)

#endif /* CONFIG_HYPERV */
1070

1071
#ifdef CONFIG_FUNCTION_TRACER
1072 1073 1074 1075 1076 1077 1078
#ifdef CONFIG_DYNAMIC_FTRACE

ENTRY(mcount)
	ret
END(mcount)

ENTRY(ftrace_caller)
1079 1080 1081
	cmpl $0, function_trace_stop
	jne  ftrace_stub

1082 1083 1084
	pushl %eax
	pushl %ecx
	pushl %edx
1085 1086
	pushl $0	/* Pass NULL as regs pointer */
	movl 4*4(%esp), %eax
1087
	movl 0x4(%ebp), %edx
1088
	movl function_trace_op, %ecx
1089
	subl $MCOUNT_INSN_SIZE, %eax
1090 1091 1092 1093 1094

.globl ftrace_call
ftrace_call:
	call ftrace_stub

1095
	addl $4,%esp	/* skip NULL pointer */
1096 1097 1098
	popl %edx
	popl %ecx
	popl %eax
1099
ftrace_ret:
1100 1101 1102 1103 1104
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
.globl ftrace_graph_call
ftrace_graph_call:
	jmp ftrace_stub
#endif
1105 1106 1107 1108 1109 1110

.globl ftrace_stub
ftrace_stub:
	ret
END(ftrace_caller)

1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143
ENTRY(ftrace_regs_caller)
	pushf	/* push flags before compare (in cs location) */
	cmpl $0, function_trace_stop
	jne ftrace_restore_flags

	/*
	 * i386 does not save SS and ESP when coming from kernel.
	 * Instead, to get sp, &regs->sp is used (see ptrace.h).
	 * Unfortunately, that means eflags must be at the same location
	 * as the current return ip is. We move the return ip into the
	 * ip location, and move flags into the return ip location.
	 */
	pushl 4(%esp)	/* save return ip into ip slot */

	pushl $0	/* Load 0 into orig_ax */
	pushl %gs
	pushl %fs
	pushl %es
	pushl %ds
	pushl %eax
	pushl %ebp
	pushl %edi
	pushl %esi
	pushl %edx
	pushl %ecx
	pushl %ebx

	movl 13*4(%esp), %eax	/* Get the saved flags */
	movl %eax, 14*4(%esp)	/* Move saved flags into regs->flags location */
				/* clobbering return ip */
	movl $__KERNEL_CS,13*4(%esp)

	movl 12*4(%esp), %eax	/* Load ip (1st parameter) */
1144
	subl $MCOUNT_INSN_SIZE, %eax	/* Adjust ip */
1145
	movl 0x4(%ebp), %edx	/* Load parent ip (2nd parameter) */
1146
	movl function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */
1147
	pushl %esp		/* Save pt_regs as 4th parameter */
1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175

GLOBAL(ftrace_regs_call)
	call ftrace_stub

	addl $4, %esp		/* Skip pt_regs */
	movl 14*4(%esp), %eax	/* Move flags back into cs */
	movl %eax, 13*4(%esp)	/* Needed to keep addl from modifying flags */
	movl 12*4(%esp), %eax	/* Get return ip from regs->ip */
	movl %eax, 14*4(%esp)	/* Put return ip back for ret */

	popl %ebx
	popl %ecx
	popl %edx
	popl %esi
	popl %edi
	popl %ebp
	popl %eax
	popl %ds
	popl %es
	popl %fs
	popl %gs
	addl $8, %esp		/* Skip orig_ax and ip */
	popf			/* Pop flags at end (no addl to corrupt flags) */
	jmp ftrace_ret

ftrace_restore_flags:
	popf
	jmp  ftrace_stub
1176 1177
#else /* ! CONFIG_DYNAMIC_FTRACE */

1178
ENTRY(mcount)
1179 1180 1181
	cmpl $__PAGE_OFFSET, %esp
	jb ftrace_stub		/* Paging not enabled yet? */

1182 1183 1184
	cmpl $0, function_trace_stop
	jne  ftrace_stub

1185 1186
	cmpl $ftrace_stub, ftrace_trace_function
	jnz trace
1187
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
I
Ingo Molnar 已提交
1188
	cmpl $ftrace_stub, ftrace_graph_return
1189
	jnz ftrace_graph_caller
1190 1191 1192

	cmpl $ftrace_graph_entry_stub, ftrace_graph_entry
	jnz ftrace_graph_caller
1193
#endif
1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204
.globl ftrace_stub
ftrace_stub:
	ret

	/* taken from glibc */
trace:
	pushl %eax
	pushl %ecx
	pushl %edx
	movl 0xc(%esp), %eax
	movl 0x4(%ebp), %edx
1205
	subl $MCOUNT_INSN_SIZE, %eax
1206

1207
	call *ftrace_trace_function
1208 1209 1210 1211 1212 1213

	popl %edx
	popl %ecx
	popl %eax
	jmp ftrace_stub
END(mcount)
1214
#endif /* CONFIG_DYNAMIC_FTRACE */
1215
#endif /* CONFIG_FUNCTION_TRACER */
1216

1217 1218
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
ENTRY(ftrace_graph_caller)
1219 1220 1221
	pushl %eax
	pushl %ecx
	pushl %edx
1222
	movl 0xc(%esp), %edx
1223
	lea 0x4(%ebp), %eax
1224
	movl (%ebp), %ecx
1225
	subl $MCOUNT_INSN_SIZE, %edx
1226 1227 1228 1229
	call prepare_ftrace_return
	popl %edx
	popl %ecx
	popl %eax
1230
	ret
1231
END(ftrace_graph_caller)
1232 1233 1234 1235 1236

.globl return_to_handler
return_to_handler:
	pushl %eax
	pushl %edx
1237
	movl %ebp, %eax
1238
	call ftrace_return_to_handler
1239
	movl %eax, %ecx
1240 1241
	popl %edx
	popl %eax
1242
	jmp *%ecx
1243
#endif
1244

1245 1246 1247 1248 1249
/*
 * Some functions should be protected against kprobes
 */
	.pushsection .kprobes.text, "ax"

1250 1251 1252 1253 1254 1255 1256 1257 1258 1259
#ifdef CONFIG_TRACING
ENTRY(trace_page_fault)
	RING0_EC_FRAME
	ASM_CLAC
	pushl_cfi $trace_do_page_fault
	jmp error_code
	CFI_ENDPROC
END(trace_page_fault)
#endif

1260 1261
ENTRY(page_fault)
	RING0_EC_FRAME
1262
	ASM_CLAC
1263
	pushl_cfi $do_page_fault
1264 1265
	ALIGN
error_code:
1266
	/* the function address is in %gs's slot on the stack */
1267
	pushl_cfi %fs
1268
	/*CFI_REL_OFFSET fs, 0*/
1269
	pushl_cfi %es
1270
	/*CFI_REL_OFFSET es, 0*/
1271
	pushl_cfi %ds
1272
	/*CFI_REL_OFFSET ds, 0*/
1273
	pushl_cfi %eax
1274
	CFI_REL_OFFSET eax, 0
1275
	pushl_cfi %ebp
1276
	CFI_REL_OFFSET ebp, 0
1277
	pushl_cfi %edi
1278
	CFI_REL_OFFSET edi, 0
1279
	pushl_cfi %esi
1280
	CFI_REL_OFFSET esi, 0
1281
	pushl_cfi %edx
1282
	CFI_REL_OFFSET edx, 0
1283
	pushl_cfi %ecx
1284
	CFI_REL_OFFSET ecx, 0
1285
	pushl_cfi %ebx
1286 1287 1288 1289 1290
	CFI_REL_OFFSET ebx, 0
	cld
	movl $(__KERNEL_PERCPU), %ecx
	movl %ecx, %fs
	UNWIND_ESPFIX_STACK
1291 1292
	GS_TO_REG %ecx
	movl PT_GS(%esp), %edi		# get the function address
1293 1294
	movl PT_ORIG_EAX(%esp), %edx	# get the error code
	movl $-1, PT_ORIG_EAX(%esp)	# no syscall to restart
1295 1296
	REG_TO_PTGS %ecx
	SET_KERNEL_GS %ecx
1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319
	movl $(__USER_DS), %ecx
	movl %ecx, %ds
	movl %ecx, %es
	TRACE_IRQS_OFF
	movl %esp,%eax			# pt_regs pointer
	call *%edi
	jmp ret_from_exception
	CFI_ENDPROC
END(page_fault)

/*
 * Debug traps and NMI can happen at the one SYSENTER instruction
 * that sets up the real kernel stack. Check here, since we can't
 * allow the wrong stack to be used.
 *
 * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have
 * already pushed 3 words if it hits on the sysenter instruction:
 * eflags, cs and eip.
 *
 * We just load the right stack, and push the three (known) values
 * by hand onto the new stack - while updating the return eip past
 * the instruction that would have done it for sysenter.
 */
1320 1321 1322 1323 1324 1325 1326
.macro FIX_STACK offset ok label
	cmpw $__KERNEL_CS, 4(%esp)
	jne \ok
\label:
	movl TSS_sysenter_sp0 + \offset(%esp), %esp
	CFI_DEF_CFA esp, 0
	CFI_UNDEFINED eip
1327 1328 1329
	pushfl_cfi
	pushl_cfi $__KERNEL_CS
	pushl_cfi $sysenter_past_esp
1330
	CFI_REL_OFFSET eip, 0
1331
.endm
1332 1333 1334

ENTRY(debug)
	RING0_INT_FRAME
1335
	ASM_CLAC
1336 1337
	cmpl $ia32_sysenter_target,(%esp)
	jne debug_stack_correct
1338
	FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn
1339
debug_stack_correct:
1340
	pushl_cfi $-1			# mark this as an int
1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359
	SAVE_ALL
	TRACE_IRQS_OFF
	xorl %edx,%edx			# error code 0
	movl %esp,%eax			# pt_regs pointer
	call do_debug
	jmp ret_from_exception
	CFI_ENDPROC
END(debug)

/*
 * NMI is doubly nasty. It can happen _while_ we're handling
 * a debug fault, and the debug fault hasn't yet been able to
 * clear up the stack. So we first check whether we got  an
 * NMI on the sysenter entry path, but after that we need to
 * check whether we got an NMI on the debug path where the debug
 * fault happened on the sysenter path.
 */
ENTRY(nmi)
	RING0_INT_FRAME
1360
	ASM_CLAC
1361
#ifdef CONFIG_X86_ESPFIX32
1362
	pushl_cfi %eax
1363 1364
	movl %ss, %eax
	cmpw $__ESPFIX_SS, %ax
1365
	popl_cfi %eax
1366
	je nmi_espfix_stack
1367
#endif
1368 1369
	cmpl $ia32_sysenter_target,(%esp)
	je nmi_stack_fixup
1370
	pushl_cfi %eax
1371 1372 1373 1374 1375 1376
	movl %esp,%eax
	/* Do not access memory above the end of our stack page,
	 * it might not exist.
	 */
	andl $(THREAD_SIZE-1),%eax
	cmpl $(THREAD_SIZE-20),%eax
1377
	popl_cfi %eax
1378 1379 1380 1381 1382
	jae nmi_stack_correct
	cmpl $ia32_sysenter_target,12(%esp)
	je nmi_debug_stack_check
nmi_stack_correct:
	/* We have a RING0_INT_FRAME here */
1383
	pushl_cfi %eax
1384 1385 1386 1387
	SAVE_ALL
	xorl %edx,%edx		# zero error code
	movl %esp,%eax		# pt_regs pointer
	call do_nmi
1388
	jmp restore_all_notrace
1389 1390 1391 1392
	CFI_ENDPROC

nmi_stack_fixup:
	RING0_INT_FRAME
1393
	FIX_STACK 12, nmi_stack_correct, 1
1394 1395 1396 1397 1398 1399 1400 1401 1402 1403
	jmp nmi_stack_correct

nmi_debug_stack_check:
	/* We have a RING0_INT_FRAME here */
	cmpw $__KERNEL_CS,16(%esp)
	jne nmi_stack_correct
	cmpl $debug,(%esp)
	jb nmi_stack_correct
	cmpl $debug_esp_fix_insn,(%esp)
	ja nmi_stack_correct
1404
	FIX_STACK 24, nmi_stack_correct, 1
1405 1406
	jmp nmi_stack_correct

1407
#ifdef CONFIG_X86_ESPFIX32
1408 1409 1410 1411 1412
nmi_espfix_stack:
	/* We have a RING0_INT_FRAME here.
	 *
	 * create the pointer to lss back
	 */
1413 1414
	pushl_cfi %ss
	pushl_cfi %esp
1415
	addl $4, (%esp)
1416 1417
	/* copy the iret frame of 12 bytes */
	.rept 3
1418
	pushl_cfi 16(%esp)
1419
	.endr
1420
	pushl_cfi %eax
1421 1422 1423 1424 1425 1426 1427 1428
	SAVE_ALL
	FIXUP_ESPFIX_STACK		# %eax == %esp
	xorl %edx,%edx			# zero error code
	call do_nmi
	RESTORE_REGS
	lss 12+4(%esp), %esp		# back to espfix stack
	CFI_ADJUST_CFA_OFFSET -24
	jmp irq_return
1429
#endif
1430 1431 1432 1433 1434
	CFI_ENDPROC
END(nmi)

ENTRY(int3)
	RING0_INT_FRAME
1435
	ASM_CLAC
1436
	pushl_cfi $-1			# mark this as an int
1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447
	SAVE_ALL
	TRACE_IRQS_OFF
	xorl %edx,%edx		# zero error code
	movl %esp,%eax		# pt_regs pointer
	call do_int3
	jmp ret_from_exception
	CFI_ENDPROC
END(int3)

ENTRY(general_protection)
	RING0_EC_FRAME
1448
	pushl_cfi $do_general_protection
1449 1450 1451 1452
	jmp error_code
	CFI_ENDPROC
END(general_protection)

G
Gleb Natapov 已提交
1453 1454 1455
#ifdef CONFIG_KVM_GUEST
ENTRY(async_page_fault)
	RING0_EC_FRAME
1456
	ASM_CLAC
1457
	pushl_cfi $do_async_page_fault
G
Gleb Natapov 已提交
1458 1459
	jmp error_code
	CFI_ENDPROC
1460
END(async_page_fault)
G
Gleb Natapov 已提交
1461 1462
#endif

1463 1464 1465 1466
/*
 * End of kprobes section
 */
	.popsection