ia32entry.S 15.4 KB
Newer Older
L
Linus Torvalds 已提交
1
/*
2 3
 * Compatibility mode system call entry point for x86-64.
 *
L
Linus Torvalds 已提交
4
 * Copyright 2000-2002 Andi Kleen, SuSE Labs.
5
 */
6
#include "calling.h"
7
#include <asm/asm-offsets.h>
L
Linus Torvalds 已提交
8 9
#include <asm/current.h>
#include <asm/errno.h>
10 11
#include <asm/ia32_unistd.h>
#include <asm/thread_info.h>
L
Linus Torvalds 已提交
12
#include <asm/segment.h>
13
#include <asm/irqflags.h>
14
#include <asm/asm.h>
15
#include <asm/smap.h>
L
Linus Torvalds 已提交
16
#include <linux/linkage.h>
17
#include <linux/err.h>
L
Linus Torvalds 已提交
18

19 20 21
/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
#include <linux/elf-em.h>
#define AUDIT_ARCH_I386		(EM_386|__AUDIT_ARCH_LE)
22
#define __AUDIT_ARCH_LE		0x40000000
23 24

#ifndef CONFIG_AUDITSYSCALL
25 26
# define sysexit_audit		ia32_ret_from_sys_call
# define sysretl_audit		ia32_ret_from_sys_call
27 28
#endif

J
Jiri Olsa 已提交
29 30
	.section .entry.text, "ax"

31 32 33 34 35 36 37
#ifdef CONFIG_PARAVIRT
ENTRY(native_usergs_sysret32)
	swapgs
	sysretl
ENDPROC(native_usergs_sysret32)
#endif

L
Linus Torvalds 已提交
38
/*
39
 * 32-bit SYSENTER instruction entry.
L
Linus Torvalds 已提交
40
 *
41 42 43 44 45
 * SYSENTER loads ss, rsp, cs, and rip from previously programmed MSRs.
 * IF and VM in rflags are cleared (IOW: interrupts are off).
 * SYSENTER does not save anything on the stack,
 * and does not save old rip (!!!) and rflags.
 *
L
Linus Torvalds 已提交
46
 * Arguments:
47 48 49 50 51 52 53 54 55
 * eax  system call number
 * ebx  arg1
 * ecx  arg2
 * edx  arg3
 * esi  arg4
 * edi  arg5
 * ebp  user stack
 * 0(%ebp) arg6
 *
L
Linus Torvalds 已提交
56
 * This is purely a fast path. For anything complicated we use the int 0x80
57
 * path below. We set up a complete hardware stack frame to share code
L
Linus Torvalds 已提交
58
 * with the int 0x80 path.
59
 */
L
Linus Torvalds 已提交
60
ENTRY(ia32_sysenter_target)
61
	/*
62 63 64
	 * Interrupts are off on entry.
	 * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
	 * it is too small to ever cause noticeable irq latency.
65
	 */
66
	SWAPGS_UNSAFE_STACK
67
	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
68
	ENABLE_INTERRUPTS(CLBR_NONE)
69

70 71 72 73
	/* Zero-extending 32-bit regs, do not remove */
	movl	%ebp, %ebp
	movl	%eax, %eax

74 75 76
	movl	ASM_THREAD_INFO(TI_sysenter_return, %rsp, 0), %r10d

	/* Construct struct pt_regs on stack */
77 78 79 80
	pushq	$__USER32_DS		/* pt_regs->ss */
	pushq	%rbp			/* pt_regs->sp */
	pushfq				/* pt_regs->flags */
	pushq	$__USER32_CS		/* pt_regs->cs */
81
	pushq	%r10			/* pt_regs->ip = thread_info->sysenter_return */
82 83 84 85 86 87
	pushq	%rax			/* pt_regs->orig_ax */
	pushq	%rdi			/* pt_regs->di */
	pushq	%rsi			/* pt_regs->si */
	pushq	%rdx			/* pt_regs->dx */
	pushq	%rcx			/* pt_regs->cx */
	pushq	$-ENOSYS		/* pt_regs->ax */
L
Linus Torvalds 已提交
88
	cld
89
	sub	$(10*8), %rsp /* pt_regs->r8-11, bp, bx, r12-15 not saved */
90

91 92
	/*
	 * no need to do an access_ok check here because rbp has been
93
	 * 32-bit zero extended
94
	 */
95
	ASM_STAC
96 97
1:	movl	(%rbp), %ebp
	_ASM_EXTABLE(1b, ia32_badarg)
98
	ASM_CLAC
99 100 101 102 103 104

	/*
	 * Sysenter doesn't filter flags, so we need to clear NT
	 * ourselves.  To save a few cycles, we can check whether
	 * NT was set instead of doing an unconditional popfq.
	 */
105 106
	testl	$X86_EFLAGS_NT, EFLAGS(%rsp)
	jnz	sysenter_fix_flags
107 108
sysenter_flags_fixed:

109 110
	orl     $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
	testl   $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
111
	jnz	sysenter_tracesys
112

113
sysenter_do_call:
114 115 116 117 118 119
	/* 32-bit syscall -> 64-bit C ABI argument conversion */
	movl	%edi, %r8d		/* arg5 */
	movl	%ebp, %r9d		/* arg6 */
	xchg	%ecx, %esi		/* rsi:arg2, rcx:arg4 */
	movl	%ebx, %edi		/* arg1 */
	movl	%edx, %edx		/* arg3 (zero extension) */
120
sysenter_dispatch:
121
	cmpq	$(IA32_NR_syscalls-1), %rax
122
	ja	1f
123 124
	call	*ia32_sys_call_table(, %rax, 8)
	movq	%rax, RAX(%rsp)
125
1:
126
	DISABLE_INTERRUPTS(CLBR_NONE)
127
	TRACE_IRQS_OFF
128
	testl	$_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
129 130
	jnz	sysexit_audit
sysexit_from_sys_call:
131 132 133 134 135 136 137 138 139 140
	/*
	 * NB: SYSEXIT is not obviously safe for 64-bit kernels -- an
	 * NMI between STI and SYSEXIT has poorly specified behavior,
	 * and and NMI followed by an IRQ with usergs is fatal.  So
	 * we just pretend we're using SYSEXIT but we really use
	 * SYSRETL instead.
	 *
	 * This code path is still called 'sysexit' because it pairs
	 * with 'sysenter' and it uses the SYSENTER calling convention.
	 */
141 142
	andl    $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
	movl	RIP(%rsp), %ecx		/* User %eip */
143
	RESTORE_RSI_RDI
144 145 146 147 148
	xorl	%edx, %edx		/* Do not leak kernel information */
	xorq	%r8, %r8
	xorq	%r9, %r9
	xorq	%r10, %r10
	movl	EFLAGS(%rsp), %r11d	/* User eflags */
149
	TRACE_IRQS_ON
150 151 152 153 154

	/*
	 * SYSRETL works even on Intel CPUs.  Use it in preference to SYSEXIT,
	 * since it avoids a dicey window with interrupts enabled.
	 */
155
	movl	RSP(%rsp), %esp
156

157
	/*
158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175
	 * USERGS_SYSRET32 does:
	 *  gsbase = user's gs base
	 *  eip = ecx
	 *  rflags = r11
	 *  cs = __USER32_CS
	 *  ss = __USER_DS
	 *
	 * The prologue set RIP(%rsp) to VDSO32_SYSENTER_RETURN, which does:
	 *
	 *  pop %ebp
	 *  pop %edx
	 *  pop %ecx
	 *
	 * Therefore, we invoke SYSRETL with EDX and R8-R10 zeroed to
	 * avoid info leaks.  R11 ends up with VDSO32_SYSENTER_RETURN's
	 * address (already known to user code), and R12-R15 are
	 * callee-saved and therefore don't contain any interesting
	 * kernel data.
176
	 */
177
	USERGS_SYSRET32
L
Linus Torvalds 已提交
178

179 180
#ifdef CONFIG_AUDITSYSCALL
	.macro auditsys_entry_common
181 182 183 184 185 186 187 188 189 190 191 192 193
	movl	%esi, %r8d		/* 5th arg: 4th syscall arg */
	movl	%ecx, %r9d		/* swap with edx */
	movl	%edx, %ecx		/* 4th arg: 3rd syscall arg */
	movl	%r9d, %edx		/* 3rd arg: 2nd syscall arg */
	movl	%ebx, %esi		/* 2nd arg: 1st syscall arg */
	movl	%eax, %edi		/* 1st arg: syscall number */
	call	__audit_syscall_entry
	movl	ORIG_RAX(%rsp), %eax	/* reload syscall number */
	movl	%ebx, %edi		/* reload 1st syscall arg */
	movl	RCX(%rsp), %esi		/* reload 2nd syscall arg */
	movl	RDX(%rsp), %edx		/* reload 3rd syscall arg */
	movl	RSI(%rsp), %ecx		/* reload 4th syscall arg */
	movl	RDI(%rsp), %r8d		/* reload 5th syscall arg */
194 195
	.endm

196
	.macro auditsys_exit exit
197 198
	testl	$(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
	jnz	ia32_ret_from_sys_call
199
	TRACE_IRQS_ON
200
	ENABLE_INTERRUPTS(CLBR_NONE)
201 202 203 204 205 206 207 208 209
	movl	%eax, %esi		/* second arg, syscall return value */
	cmpl	$-MAX_ERRNO, %eax	/* is it an error ? */
	jbe	1f
	movslq	%eax, %rsi		/* if error sign extend to 64 bits */
1:	setbe	%al			/* 1 if error, 0 if not */
	movzbl	%al, %edi		/* zero-extend that into %edi */
	call	__audit_syscall_exit
	movq	RAX(%rsp), %rax		/* reload syscall return value */
	movl	$(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %edi
210
	DISABLE_INTERRUPTS(CLBR_NONE)
211
	TRACE_IRQS_OFF
212 213 214
	testl	%edi, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
	jz	\exit
	xorl	%eax, %eax		/* Do not leak kernel information */
215 216 217 218
	movq	%rax, R11(%rsp)
	movq	%rax, R10(%rsp)
	movq	%rax, R9(%rsp)
	movq	%rax, R8(%rsp)
219
	jmp	int_with_check
220 221 222 223
	.endm

sysenter_auditsys:
	auditsys_entry_common
224 225
	movl	%ebp, %r9d		/* reload 6th syscall arg */
	jmp	sysenter_dispatch
226 227 228 229 230

sysexit_audit:
	auditsys_exit sysexit_from_sys_call
#endif

231
sysenter_fix_flags:
232 233
	pushq $(X86_EFLAGS_IF|X86_EFLAGS_FIXED)
	popfq
234
	jmp	sysenter_flags_fixed
235

236 237
sysenter_tracesys:
#ifdef CONFIG_AUDITSYSCALL
238
	testl	$(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
239 240
	jz	sysenter_auditsys
#endif
241
	SAVE_EXTRA_REGS
242
	xorl	%eax, %eax		/* Do not leak kernel information */
243 244 245 246
	movq	%rax, R11(%rsp)
	movq	%rax, R10(%rsp)
	movq	%rax, R9(%rsp)
	movq	%rax, R8(%rsp)
247
	movq	%rsp, %rdi		/* &pt_regs -> arg1 */
L
Linus Torvalds 已提交
248
	call	syscall_trace_enter
249 250 251 252 253 254

	/* Reload arg registers from stack. (see sysenter_tracesys) */
	movl	RCX(%rsp), %ecx
	movl	RDX(%rsp), %edx
	movl	RSI(%rsp), %esi
	movl	RDI(%rsp), %edi
255
	movl	%eax, %eax		/* zero extension */
256

257
	RESTORE_EXTRA_REGS
L
Linus Torvalds 已提交
258
	jmp	sysenter_do_call
259
ENDPROC(ia32_sysenter_target)
L
Linus Torvalds 已提交
260 261

/*
262
 * 32-bit SYSCALL instruction entry.
L
Linus Torvalds 已提交
263
 *
264
 * 32-bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
265 266 267 268 269 270
 * then loads new ss, cs, and rip from previously programmed MSRs.
 * rflags gets masked by a value from another MSR (so CLD and CLAC
 * are not needed). SYSCALL does not save anything on the stack
 * and does not change rsp.
 *
 * Note: rflags saving+masking-with-MSR happens only in Long mode
271
 * (in legacy 32-bit mode, IF, RF and VM bits are cleared and that's it).
272 273 274 275
 * Don't get confused: rflags saving+masking depends on Long Mode Active bit
 * (EFER.LMA=1), NOT on bitness of userspace where SYSCALL executes
 * or target CS descriptor's L bit (SYSCALL does not read segment descriptors).
 *
L
Linus Torvalds 已提交
276
 * Arguments:
277 278 279 280 281 282 283 284 285 286
 * eax  system call number
 * ecx  return address
 * ebx  arg1
 * ebp  arg2	(note: not saved in the stack frame, should not be touched)
 * edx  arg3
 * esi  arg4
 * edi  arg5
 * esp  user stack
 * 0(%esp) arg6
 *
L
Linus Torvalds 已提交
287
 * This is purely a fast path. For anything complicated we use the int 0x80
288 289 290
 * path below. We set up a complete hardware stack frame to share code
 * with the int 0x80 path.
 */
L
Linus Torvalds 已提交
291
ENTRY(ia32_cstar_target)
292 293 294 295 296
	/*
	 * Interrupts are off on entry.
	 * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
	 * it is too small to ever cause noticeable irq latency.
	 */
297
	SWAPGS_UNSAFE_STACK
298 299
	movl	%esp, %r8d
	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
300
	ENABLE_INTERRUPTS(CLBR_NONE)
301

302
	/* Zero-extending 32-bit regs, do not remove */
303
	movl	%eax, %eax
304

305
	/* Construct struct pt_regs on stack */
306 307 308 309 310 311 312 313 314 315
	pushq	$__USER32_DS		/* pt_regs->ss */
	pushq	%r8			/* pt_regs->sp */
	pushq	%r11			/* pt_regs->flags */
	pushq	$__USER32_CS		/* pt_regs->cs */
	pushq	%rcx			/* pt_regs->ip */
	pushq	%rax			/* pt_regs->orig_ax */
	pushq	%rdi			/* pt_regs->di */
	pushq	%rsi			/* pt_regs->si */
	pushq	%rdx			/* pt_regs->dx */
	pushq	%rbp			/* pt_regs->cx */
316
	movl	%ebp, %ecx
317
	pushq	$-ENOSYS		/* pt_regs->ax */
318
	sub	$(10*8), %rsp		/* pt_regs->r8-11, bp, bx, r12-15 not saved */
319

320
	/*
321 322
	 * No need to do an access_ok check here because r8 has been
	 * 32-bit zero extended:
323
	 */
324
	ASM_STAC
325 326
1:	movl	(%r8), %ebp
	_ASM_EXTABLE(1b, ia32_badarg)
327
	ASM_CLAC
328 329
	orl     $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
	testl   $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
L
Linus Torvalds 已提交
330
	jnz   cstar_tracesys
331

332
cstar_do_call:
333 334 335 336 337 338 339
	/* 32-bit syscall -> 64-bit C ABI argument conversion */
	movl	%edi, %r8d		/* arg5 */
	movl	%ebp, %r9d		/* arg6 */
	xchg	%ecx, %esi		/* rsi:arg2, rcx:arg4 */
	movl	%ebx, %edi		/* arg1 */
	movl	%edx, %edx		/* arg3 (zero extension) */

340
cstar_dispatch:
341
	cmpq	$(IA32_NR_syscalls-1), %rax
342
	ja	1f
343 344 345

	call	*ia32_sys_call_table(, %rax, 8)
	movq	%rax, RAX(%rsp)
346
1:
347
	DISABLE_INTERRUPTS(CLBR_NONE)
348
	TRACE_IRQS_OFF
349 350 351
	testl	$_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
	jnz	sysretl_audit

352
sysretl_from_sys_call:
353
	andl	$~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
354
	movl	RCX(%rsp), %ebp
355
	RESTORE_RSI_RDI_RDX
356 357 358 359 360
	movl	RIP(%rsp), %ecx
	movl	EFLAGS(%rsp), %r11d
	xorq	%r10, %r10
	xorq	%r9, %r9
	xorq	%r8, %r8
361
	TRACE_IRQS_ON
362
	movl	RSP(%rsp), %esp
363
	/*
364
	 * 64-bit->32-bit SYSRET restores eip from ecx,
365 366
	 * eflags from r11 (but RF and VM bits are forced to 0),
	 * cs and ss are loaded from MSRs.
367
	 * (Note: 32-bit->32-bit SYSRET is different: since r11
368
	 * does not exist, it merely sets eflags.IF=1).
369 370 371 372 373 374 375
	 *
	 * NB: On AMD CPUs with the X86_BUG_SYSRET_SS_ATTRS bug, the ss
	 * descriptor is not reinitialized.  This means that we must
	 * avoid SYSRET with SS == NULL, which could happen if we schedule,
	 * exit the kernel, and re-enter using an interrupt vector.  (All
	 * interrupt entries on x86_64 set SS to NULL.)  We prevent that
	 * from happening by reloading SS in __switch_to.
376
	 */
377
	USERGS_SYSRET32
378

379 380 381
#ifdef CONFIG_AUDITSYSCALL
cstar_auditsys:
	auditsys_entry_common
382 383
	movl	%ebp, %r9d		/* reload 6th syscall arg */
	jmp	cstar_dispatch
384 385

sysretl_audit:
386
	auditsys_exit sysretl_from_sys_call
387 388 389 390
#endif

cstar_tracesys:
#ifdef CONFIG_AUDITSYSCALL
391 392
	testl	$(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
	jz	cstar_auditsys
393
#endif
394
	SAVE_EXTRA_REGS
395
	xorl	%eax, %eax		/* Do not leak kernel information */
396 397
	movq	%rax, R11(%rsp)
	movq	%rax, R10(%rsp)
398
	movq	%rax, R9(%rsp)
399
	movq	%rax, R8(%rsp)
400
	movq	%rsp, %rdi		/* &pt_regs -> arg1 */
401
	call	syscall_trace_enter
402 403 404 405 406 407

	/* Reload arg registers from stack. (see sysenter_tracesys) */
	movl	RCX(%rsp), %ecx
	movl	RDX(%rsp), %edx
	movl	RSI(%rsp), %esi
	movl	RDI(%rsp), %edi
408
	movl	%eax, %eax		/* zero extension */
409

410
	RESTORE_EXTRA_REGS
411
	jmp	cstar_do_call
412
END(ia32_cstar_target)
413

L
Linus Torvalds 已提交
414
ia32_badarg:
415
	ASM_CLAC
416 417
	movq	$-EFAULT, %rax
	jmp	ia32_sysret
L
Linus Torvalds 已提交
418

419
ia32_ret_from_sys_call:
420
	xorl	%eax, %eax		/* Do not leak kernel information */
421 422 423 424
	movq	%rax, R11(%rsp)
	movq	%rax, R10(%rsp)
	movq	%rax, R9(%rsp)
	movq	%rax, R8(%rsp)
425
	jmp	int_ret_from_sys_call
426

427 428
/*
 * Emulated IA32 system calls via int 0x80.
L
Linus Torvalds 已提交
429
 *
430 431 432 433 434 435 436 437
 * Arguments:
 * eax  system call number
 * ebx  arg1
 * ecx  arg2
 * edx  arg3
 * esi  arg4
 * edi  arg5
 * ebp  arg6	(note: not saved in the stack frame, should not be touched)
L
Linus Torvalds 已提交
438 439
 *
 * Notes:
440 441
 * Uses the same stack frame as the x86-64 version.
 * All registers except eax must be saved (but ptrace may violate that).
L
Linus Torvalds 已提交
442 443 444
 * Arguments are zero extended. For system calls that want sign extension and
 * take long arguments a wrapper is needed. Most calls can just be called
 * directly.
445 446
 * Assumes it is only called from user space and entered with interrupts off.
 */
L
Linus Torvalds 已提交
447 448

ENTRY(ia32_syscall)
449
	/*
450 451 452
	 * Interrupts are off on entry.
	 * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
	 * it is too small to ever cause noticeable irq latency.
453
	 */
454 455
	PARAVIRT_ADJUST_EXCEPTION_FRAME
	SWAPGS
456
	ENABLE_INTERRUPTS(CLBR_NONE)
457

458
	/* Zero-extending 32-bit regs, do not remove */
459
	movl	%eax, %eax
460

461
	/* Construct struct pt_regs on stack (iret frame is already on stack) */
462 463 464 465 466 467
	pushq	%rax			/* pt_regs->orig_ax */
	pushq	%rdi			/* pt_regs->di */
	pushq	%rsi			/* pt_regs->si */
	pushq	%rdx			/* pt_regs->dx */
	pushq	%rcx			/* pt_regs->cx */
	pushq	$-ENOSYS		/* pt_regs->ax */
468 469 470 471
	pushq	$0			/* pt_regs->r8 */
	pushq	$0			/* pt_regs->r9 */
	pushq	$0			/* pt_regs->r10 */
	pushq	$0			/* pt_regs->r11 */
L
Linus Torvalds 已提交
472
	cld
473 474 475 476 477
	sub	$(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */

	orl	$TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
	testl	$_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
	jnz	ia32_tracesys
478

479
ia32_do_call:
480 481 482 483 484 485 486
	/* 32-bit syscall -> 64-bit C ABI argument conversion */
	movl	%edi, %r8d		/* arg5 */
	movl	%ebp, %r9d		/* arg6 */
	xchg	%ecx, %esi		/* rsi:arg2, rcx:arg4 */
	movl	%ebx, %edi		/* arg1 */
	movl	%edx, %edx		/* arg3 (zero extension) */
	cmpq	$(IA32_NR_syscalls-1), %rax
487
	ja	1f
488 489 490

	call	*ia32_sys_call_table(, %rax, 8) /* RIP relative */

L
Linus Torvalds 已提交
491
ia32_sysret:
492
	movq	%rax, RAX(%rsp)
493
1:
494
	jmp	int_ret_from_sys_call
L
Linus Torvalds 已提交
495

496 497
ia32_tracesys:
	SAVE_EXTRA_REGS
498 499
	movq	%rsp, %rdi			/* &pt_regs -> arg1 */
	call	syscall_trace_enter
500 501 502 503 504 505 506 507 508 509 510
	/*
	 * Reload arg registers from stack in case ptrace changed them.
	 * Don't reload %eax because syscall_trace_enter() returned
	 * the %rax value we should see.  But do truncate it to 32 bits.
	 * If it's -1 to make us punt the syscall, then (u32)-1 is still
	 * an appropriately invalid value.
	 */
	movl	RCX(%rsp), %ecx
	movl	RDX(%rsp), %edx
	movl	RSI(%rsp), %esi
	movl	RDI(%rsp), %edi
511
	movl	%eax, %eax		/* zero extension */
512
	RESTORE_EXTRA_REGS
513
	jmp	ia32_do_call
514
END(ia32_syscall)
L
Linus Torvalds 已提交
515

516
	.macro PTREGSCALL label, func
517 518
	ALIGN
GLOBAL(\label)
519 520
	leaq	\func(%rip), %rax
	jmp	ia32_ptregs_common
L
Linus Torvalds 已提交
521 522
	.endm

523 524 525 526
	PTREGSCALL stub32_rt_sigreturn,	sys32_rt_sigreturn
	PTREGSCALL stub32_sigreturn,	sys32_sigreturn
	PTREGSCALL stub32_fork,		sys_fork
	PTREGSCALL stub32_vfork,	sys_vfork
L
Linus Torvalds 已提交
527

528 529
	ALIGN
GLOBAL(stub32_clone)
530
	leaq	sys_clone(%rip), %rax
531 532 533 534 535 536 537 538
	/*
	 * 32-bit clone API is clone(..., int tls_val, int *child_tidptr).
	 * 64-bit clone API is clone(..., int *child_tidptr, int tls_val).
	 * Native 64-bit kernel's sys_clone() implements the latter.
	 * We need to swap args here. But since tls_val is in fact ignored
	 * by sys_clone(), we can get away with an assignment
	 * (arg4 = arg5) instead of a full swap:
	 */
539
	mov	%r8, %rcx
540
	jmp	ia32_ptregs_common
541

542 543
	ALIGN
ia32_ptregs_common:
544
	SAVE_EXTRA_REGS 8
545
	call	*%rax
546 547
	RESTORE_EXTRA_REGS 8
	ret
548
END(ia32_ptregs_common)