提交 69734b64 编写于 作者: L Linus Torvalds

Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

* 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (21 commits)
  x86: Fix atomic64_xxx_cx8() functions
  x86: Fix and improve cmpxchg_double{,_local}()
  x86_64, asm: Optimise fls(), ffs() and fls64()
  x86, bitops: Move fls64.h inside __KERNEL__
  x86: Fix and improve percpu_cmpxchg{8,16}b_double()
  x86: Report cpb and eff_freq_ro flags correctly
  x86/i386: Use less assembly in strlen(), speed things up a bit
  x86: Use the same node_distance for 32 and 64-bit
  x86: Fix rflags in FAKE_STACK_FRAME
  x86: Clean up and extend do_int3()
  x86: Call do_notify_resume() with interrupts enabled
  x86/div64: Add a micro-optimization shortcut if base is power of two
  x86-64: Cleanup some assembly entry points
  x86-64: Slightly shorten line system call entry and exit paths
  x86-64: Reduce amount of redundant code generated for invalidate_interruptNN
  x86-64: Slightly shorten int_ret_from_sys_call
  x86, efi: Convert efi_phys_get_time() args to physical addresses
  x86: Default to vsyscall=emulate
  x86-64: Set siginfo and context on vsyscall emulation faults
  x86: consolidate xchg and xadd macros
  ...
...@@ -2755,11 +2755,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. ...@@ -2755,11 +2755,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
functions are at fixed addresses, they make nice functions are at fixed addresses, they make nice
targets for exploits that can control RIP. targets for exploits that can control RIP.
emulate Vsyscalls turn into traps and are emulated emulate [default] Vsyscalls turn into traps and are
reasonably safely. emulated reasonably safely.
native [default] Vsyscalls are native syscall native Vsyscalls are native syscall instructions.
instructions.
This is a little bit faster than trapping This is a little bit faster than trapping
and makes a few dynamic recompilers work and makes a few dynamic recompilers work
better than they would in emulation mode. better than they would in emulation mode.
......
...@@ -134,7 +134,7 @@ ENTRY(ia32_sysenter_target) ...@@ -134,7 +134,7 @@ ENTRY(ia32_sysenter_target)
CFI_REL_OFFSET rsp,0 CFI_REL_OFFSET rsp,0
pushfq_cfi pushfq_cfi
/*CFI_REL_OFFSET rflags,0*/ /*CFI_REL_OFFSET rflags,0*/
movl 8*3-THREAD_SIZE+TI_sysenter_return(%rsp), %r10d movl TI_sysenter_return+THREAD_INFO(%rsp,3*8-KERNEL_STACK_OFFSET),%r10d
CFI_REGISTER rip,r10 CFI_REGISTER rip,r10
pushq_cfi $__USER32_CS pushq_cfi $__USER32_CS
/*CFI_REL_OFFSET cs,0*/ /*CFI_REL_OFFSET cs,0*/
...@@ -150,9 +150,8 @@ ENTRY(ia32_sysenter_target) ...@@ -150,9 +150,8 @@ ENTRY(ia32_sysenter_target)
.section __ex_table,"a" .section __ex_table,"a"
.quad 1b,ia32_badarg .quad 1b,ia32_badarg
.previous .previous
GET_THREAD_INFO(%r10) orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
orl $TS_COMPAT,TI_status(%r10) testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
CFI_REMEMBER_STATE CFI_REMEMBER_STATE
jnz sysenter_tracesys jnz sysenter_tracesys
cmpq $(IA32_NR_syscalls-1),%rax cmpq $(IA32_NR_syscalls-1),%rax
...@@ -162,13 +161,12 @@ sysenter_do_call: ...@@ -162,13 +161,12 @@ sysenter_do_call:
sysenter_dispatch: sysenter_dispatch:
call *ia32_sys_call_table(,%rax,8) call *ia32_sys_call_table(,%rax,8)
movq %rax,RAX-ARGOFFSET(%rsp) movq %rax,RAX-ARGOFFSET(%rsp)
GET_THREAD_INFO(%r10)
DISABLE_INTERRUPTS(CLBR_NONE) DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF TRACE_IRQS_OFF
testl $_TIF_ALLWORK_MASK,TI_flags(%r10) testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
jnz sysexit_audit jnz sysexit_audit
sysexit_from_sys_call: sysexit_from_sys_call:
andl $~TS_COMPAT,TI_status(%r10) andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
/* clear IF, that popfq doesn't enable interrupts early */ /* clear IF, that popfq doesn't enable interrupts early */
andl $~0x200,EFLAGS-R11(%rsp) andl $~0x200,EFLAGS-R11(%rsp)
movl RIP-R11(%rsp),%edx /* User %eip */ movl RIP-R11(%rsp),%edx /* User %eip */
...@@ -205,7 +203,7 @@ sysexit_from_sys_call: ...@@ -205,7 +203,7 @@ sysexit_from_sys_call:
.endm .endm
.macro auditsys_exit exit .macro auditsys_exit exit
testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
jnz ia32_ret_from_sys_call jnz ia32_ret_from_sys_call
TRACE_IRQS_ON TRACE_IRQS_ON
sti sti
...@@ -215,12 +213,11 @@ sysexit_from_sys_call: ...@@ -215,12 +213,11 @@ sysexit_from_sys_call:
movzbl %al,%edi /* zero-extend that into %edi */ movzbl %al,%edi /* zero-extend that into %edi */
inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
call audit_syscall_exit call audit_syscall_exit
GET_THREAD_INFO(%r10)
movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall return value */ movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall return value */
movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
cli cli
TRACE_IRQS_OFF TRACE_IRQS_OFF
testl %edi,TI_flags(%r10) testl %edi,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
jz \exit jz \exit
CLEAR_RREGS -ARGOFFSET CLEAR_RREGS -ARGOFFSET
jmp int_with_check jmp int_with_check
...@@ -238,7 +235,7 @@ sysexit_audit: ...@@ -238,7 +235,7 @@ sysexit_audit:
sysenter_tracesys: sysenter_tracesys:
#ifdef CONFIG_AUDITSYSCALL #ifdef CONFIG_AUDITSYSCALL
testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
jz sysenter_auditsys jz sysenter_auditsys
#endif #endif
SAVE_REST SAVE_REST
...@@ -309,9 +306,8 @@ ENTRY(ia32_cstar_target) ...@@ -309,9 +306,8 @@ ENTRY(ia32_cstar_target)
.section __ex_table,"a" .section __ex_table,"a"
.quad 1b,ia32_badarg .quad 1b,ia32_badarg
.previous .previous
GET_THREAD_INFO(%r10) orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
orl $TS_COMPAT,TI_status(%r10) testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
CFI_REMEMBER_STATE CFI_REMEMBER_STATE
jnz cstar_tracesys jnz cstar_tracesys
cmpq $IA32_NR_syscalls-1,%rax cmpq $IA32_NR_syscalls-1,%rax
...@@ -321,13 +317,12 @@ cstar_do_call: ...@@ -321,13 +317,12 @@ cstar_do_call:
cstar_dispatch: cstar_dispatch:
call *ia32_sys_call_table(,%rax,8) call *ia32_sys_call_table(,%rax,8)
movq %rax,RAX-ARGOFFSET(%rsp) movq %rax,RAX-ARGOFFSET(%rsp)
GET_THREAD_INFO(%r10)
DISABLE_INTERRUPTS(CLBR_NONE) DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF TRACE_IRQS_OFF
testl $_TIF_ALLWORK_MASK,TI_flags(%r10) testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
jnz sysretl_audit jnz sysretl_audit
sysretl_from_sys_call: sysretl_from_sys_call:
andl $~TS_COMPAT,TI_status(%r10) andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
RESTORE_ARGS 0,-ARG_SKIP,0,0,0 RESTORE_ARGS 0,-ARG_SKIP,0,0,0
movl RIP-ARGOFFSET(%rsp),%ecx movl RIP-ARGOFFSET(%rsp),%ecx
CFI_REGISTER rip,rcx CFI_REGISTER rip,rcx
...@@ -355,7 +350,7 @@ sysretl_audit: ...@@ -355,7 +350,7 @@ sysretl_audit:
cstar_tracesys: cstar_tracesys:
#ifdef CONFIG_AUDITSYSCALL #ifdef CONFIG_AUDITSYSCALL
testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
jz cstar_auditsys jz cstar_auditsys
#endif #endif
xchgl %r9d,%ebp xchgl %r9d,%ebp
...@@ -420,9 +415,8 @@ ENTRY(ia32_syscall) ...@@ -420,9 +415,8 @@ ENTRY(ia32_syscall)
/* note the registers are not zero extended to the sf. /* note the registers are not zero extended to the sf.
this could be a problem. */ this could be a problem. */
SAVE_ARGS 0,1,0 SAVE_ARGS 0,1,0
GET_THREAD_INFO(%r10) orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
orl $TS_COMPAT,TI_status(%r10) testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
jnz ia32_tracesys jnz ia32_tracesys
cmpq $(IA32_NR_syscalls-1),%rax cmpq $(IA32_NR_syscalls-1),%rax
ja ia32_badsys ja ia32_badsys
...@@ -459,8 +453,8 @@ quiet_ni_syscall: ...@@ -459,8 +453,8 @@ quiet_ni_syscall:
CFI_ENDPROC CFI_ENDPROC
.macro PTREGSCALL label, func, arg .macro PTREGSCALL label, func, arg
.globl \label ALIGN
\label: GLOBAL(\label)
leaq \func(%rip),%rax leaq \func(%rip),%rax
leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */ leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
jmp ia32_ptregs_common jmp ia32_ptregs_common
...@@ -477,7 +471,8 @@ quiet_ni_syscall: ...@@ -477,7 +471,8 @@ quiet_ni_syscall:
PTREGSCALL stub32_vfork, sys_vfork, %rdi PTREGSCALL stub32_vfork, sys_vfork, %rdi
PTREGSCALL stub32_iopl, sys_iopl, %rsi PTREGSCALL stub32_iopl, sys_iopl, %rsi
ENTRY(ia32_ptregs_common) ALIGN
ia32_ptregs_common:
popq %r11 popq %r11
CFI_ENDPROC CFI_ENDPROC
CFI_STARTPROC32 simple CFI_STARTPROC32 simple
......
...@@ -4,10 +4,10 @@ ...@@ -4,10 +4,10 @@
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
.macro LOCK_PREFIX .macro LOCK_PREFIX
1: lock 672: lock
.section .smp_locks,"a" .section .smp_locks,"a"
.balign 4 .balign 4
.long 1b - . .long 672b - .
.previous .previous
.endm .endm
#else #else
......
...@@ -380,6 +380,8 @@ static inline unsigned long __fls(unsigned long word) ...@@ -380,6 +380,8 @@ static inline unsigned long __fls(unsigned long word)
return word; return word;
} }
#undef ADDR
#ifdef __KERNEL__ #ifdef __KERNEL__
/** /**
* ffs - find first set bit in word * ffs - find first set bit in word
...@@ -395,10 +397,25 @@ static inline unsigned long __fls(unsigned long word) ...@@ -395,10 +397,25 @@ static inline unsigned long __fls(unsigned long word)
static inline int ffs(int x) static inline int ffs(int x)
{ {
int r; int r;
#ifdef CONFIG_X86_CMOV
#ifdef CONFIG_X86_64
/*
* AMD64 says BSFL won't clobber the dest reg if x==0; Intel64 says the
* dest reg is undefined if x==0, but their CPU architect says its
* value is written to set it to the same as before, except that the
* top 32 bits will be cleared.
*
* We cannot do this on 32 bits because at the very least some
* 486 CPUs did not behave this way.
*/
long tmp = -1;
asm("bsfl %1,%0"
: "=r" (r)
: "rm" (x), "0" (tmp));
#elif defined(CONFIG_X86_CMOV)
asm("bsfl %1,%0\n\t" asm("bsfl %1,%0\n\t"
"cmovzl %2,%0" "cmovzl %2,%0"
: "=r" (r) : "rm" (x), "r" (-1)); : "=&r" (r) : "rm" (x), "r" (-1));
#else #else
asm("bsfl %1,%0\n\t" asm("bsfl %1,%0\n\t"
"jnz 1f\n\t" "jnz 1f\n\t"
...@@ -422,7 +439,22 @@ static inline int ffs(int x) ...@@ -422,7 +439,22 @@ static inline int ffs(int x)
static inline int fls(int x) static inline int fls(int x)
{ {
int r; int r;
#ifdef CONFIG_X86_CMOV
#ifdef CONFIG_X86_64
/*
* AMD64 says BSRL won't clobber the dest reg if x==0; Intel64 says the
* dest reg is undefined if x==0, but their CPU architect says its
* value is written to set it to the same as before, except that the
* top 32 bits will be cleared.
*
* We cannot do this on 32 bits because at the very least some
* 486 CPUs did not behave this way.
*/
long tmp = -1;
asm("bsrl %1,%0"
: "=r" (r)
: "rm" (x), "0" (tmp));
#elif defined(CONFIG_X86_CMOV)
asm("bsrl %1,%0\n\t" asm("bsrl %1,%0\n\t"
"cmovzl %2,%0" "cmovzl %2,%0"
: "=&r" (r) : "rm" (x), "rm" (-1)); : "=&r" (r) : "rm" (x), "rm" (-1));
...@@ -434,11 +466,35 @@ static inline int fls(int x) ...@@ -434,11 +466,35 @@ static inline int fls(int x)
#endif #endif
return r + 1; return r + 1;
} }
#endif /* __KERNEL__ */
#undef ADDR
#ifdef __KERNEL__ /**
* fls64 - find last set bit in a 64-bit word
* @x: the word to search
*
* This is defined in a similar way as the libc and compiler builtin
* ffsll, but returns the position of the most significant set bit.
*
* fls64(value) returns 0 if value is 0 or the position of the last
* set bit if value is nonzero. The last (most significant) bit is
* at position 64.
*/
#ifdef CONFIG_X86_64
static __always_inline int fls64(__u64 x)
{
long bitpos = -1;
/*
* AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the
* dest reg is undefined if x==0, but their CPU architect says its
* value is written to set it to the same as before.
*/
asm("bsrq %1,%0"
: "+r" (bitpos)
: "rm" (x));
return bitpos + 1;
}
#else
#include <asm-generic/bitops/fls64.h>
#endif
#include <asm-generic/bitops/find.h> #include <asm-generic/bitops/find.h>
...@@ -450,12 +506,6 @@ static inline int fls(int x) ...@@ -450,12 +506,6 @@ static inline int fls(int x)
#include <asm-generic/bitops/const_hweight.h> #include <asm-generic/bitops/const_hweight.h>
#endif /* __KERNEL__ */
#include <asm-generic/bitops/fls64.h>
#ifdef __KERNEL__
#include <asm-generic/bitops/le.h> #include <asm-generic/bitops/le.h>
#include <asm-generic/bitops/ext2-atomic-setbit.h> #include <asm-generic/bitops/ext2-atomic-setbit.h>
......
...@@ -14,6 +14,8 @@ extern void __cmpxchg_wrong_size(void) ...@@ -14,6 +14,8 @@ extern void __cmpxchg_wrong_size(void)
__compiletime_error("Bad argument size for cmpxchg"); __compiletime_error("Bad argument size for cmpxchg");
extern void __xadd_wrong_size(void) extern void __xadd_wrong_size(void)
__compiletime_error("Bad argument size for xadd"); __compiletime_error("Bad argument size for xadd");
extern void __add_wrong_size(void)
__compiletime_error("Bad argument size for add");
/* /*
* Constants for operation sizes. On 32-bit, the 64-bit size it set to * Constants for operation sizes. On 32-bit, the 64-bit size it set to
...@@ -31,60 +33,47 @@ extern void __xadd_wrong_size(void) ...@@ -31,60 +33,47 @@ extern void __xadd_wrong_size(void)
#define __X86_CASE_Q -1 /* sizeof will never return -1 */ #define __X86_CASE_Q -1 /* sizeof will never return -1 */
#endif #endif
/*
* An exchange-type operation, which takes a value and a pointer, and
* returns a the old value.
*/
#define __xchg_op(ptr, arg, op, lock) \
({ \
__typeof__ (*(ptr)) __ret = (arg); \
switch (sizeof(*(ptr))) { \
case __X86_CASE_B: \
asm volatile (lock #op "b %b0, %1\n" \
: "+r" (__ret), "+m" (*(ptr)) \
: : "memory", "cc"); \
break; \
case __X86_CASE_W: \
asm volatile (lock #op "w %w0, %1\n" \
: "+r" (__ret), "+m" (*(ptr)) \
: : "memory", "cc"); \
break; \
case __X86_CASE_L: \
asm volatile (lock #op "l %0, %1\n" \
: "+r" (__ret), "+m" (*(ptr)) \
: : "memory", "cc"); \
break; \
case __X86_CASE_Q: \
asm volatile (lock #op "q %q0, %1\n" \
: "+r" (__ret), "+m" (*(ptr)) \
: : "memory", "cc"); \
break; \
default: \
__ ## op ## _wrong_size(); \
} \
__ret; \
})
/* /*
* Note: no "lock" prefix even on SMP: xchg always implies lock anyway. * Note: no "lock" prefix even on SMP: xchg always implies lock anyway.
* Since this is generally used to protect other memory information, we * Since this is generally used to protect other memory information, we
* use "asm volatile" and "memory" clobbers to prevent gcc from moving * use "asm volatile" and "memory" clobbers to prevent gcc from moving
* information around. * information around.
*/ */
#define __xchg(x, ptr, size) \ #define xchg(ptr, v) __xchg_op((ptr), (v), xchg, "")
({ \
__typeof(*(ptr)) __x = (x); \
switch (size) { \
case __X86_CASE_B: \
{ \
volatile u8 *__ptr = (volatile u8 *)(ptr); \
asm volatile("xchgb %0,%1" \
: "=q" (__x), "+m" (*__ptr) \
: "0" (__x) \
: "memory"); \
break; \
} \
case __X86_CASE_W: \
{ \
volatile u16 *__ptr = (volatile u16 *)(ptr); \
asm volatile("xchgw %0,%1" \
: "=r" (__x), "+m" (*__ptr) \
: "0" (__x) \
: "memory"); \
break; \
} \
case __X86_CASE_L: \
{ \
volatile u32 *__ptr = (volatile u32 *)(ptr); \
asm volatile("xchgl %0,%1" \
: "=r" (__x), "+m" (*__ptr) \
: "0" (__x) \
: "memory"); \
break; \
} \
case __X86_CASE_Q: \
{ \
volatile u64 *__ptr = (volatile u64 *)(ptr); \
asm volatile("xchgq %0,%1" \
: "=r" (__x), "+m" (*__ptr) \
: "0" (__x) \
: "memory"); \
break; \
} \
default: \
__xchg_wrong_size(); \
} \
__x; \
})
#define xchg(ptr, v) \
__xchg((v), (ptr), sizeof(*ptr))
/* /*
* Atomic compare and exchange. Compare OLD with MEM, if identical, * Atomic compare and exchange. Compare OLD with MEM, if identical,
...@@ -165,46 +154,80 @@ extern void __xadd_wrong_size(void) ...@@ -165,46 +154,80 @@ extern void __xadd_wrong_size(void)
__cmpxchg_local((ptr), (old), (new), sizeof(*ptr)) __cmpxchg_local((ptr), (old), (new), sizeof(*ptr))
#endif #endif
#define __xadd(ptr, inc, lock) \ /*
* xadd() adds "inc" to "*ptr" and atomically returns the previous
* value of "*ptr".
*
* xadd() is locked when multiple CPUs are online
* xadd_sync() is always locked
* xadd_local() is never locked
*/
#define __xadd(ptr, inc, lock) __xchg_op((ptr), (inc), xadd, lock)
#define xadd(ptr, inc) __xadd((ptr), (inc), LOCK_PREFIX)
#define xadd_sync(ptr, inc) __xadd((ptr), (inc), "lock; ")
#define xadd_local(ptr, inc) __xadd((ptr), (inc), "")
#define __add(ptr, inc, lock) \
({ \ ({ \
__typeof__ (*(ptr)) __ret = (inc); \ __typeof__ (*(ptr)) __ret = (inc); \
switch (sizeof(*(ptr))) { \ switch (sizeof(*(ptr))) { \
case __X86_CASE_B: \ case __X86_CASE_B: \
asm volatile (lock "xaddb %b0, %1\n" \ asm volatile (lock "addb %b1, %0\n" \
: "+r" (__ret), "+m" (*(ptr)) \ : "+m" (*(ptr)) : "ri" (inc) \
: : "memory", "cc"); \ : "memory", "cc"); \
break; \ break; \
case __X86_CASE_W: \ case __X86_CASE_W: \
asm volatile (lock "xaddw %w0, %1\n" \ asm volatile (lock "addw %w1, %0\n" \
: "+r" (__ret), "+m" (*(ptr)) \ : "+m" (*(ptr)) : "ri" (inc) \
: : "memory", "cc"); \ : "memory", "cc"); \
break; \ break; \
case __X86_CASE_L: \ case __X86_CASE_L: \
asm volatile (lock "xaddl %0, %1\n" \ asm volatile (lock "addl %1, %0\n" \
: "+r" (__ret), "+m" (*(ptr)) \ : "+m" (*(ptr)) : "ri" (inc) \
: : "memory", "cc"); \ : "memory", "cc"); \
break; \ break; \
case __X86_CASE_Q: \ case __X86_CASE_Q: \
asm volatile (lock "xaddq %q0, %1\n" \ asm volatile (lock "addq %1, %0\n" \
: "+r" (__ret), "+m" (*(ptr)) \ : "+m" (*(ptr)) : "ri" (inc) \
: : "memory", "cc"); \ : "memory", "cc"); \
break; \ break; \
default: \ default: \
__xadd_wrong_size(); \ __add_wrong_size(); \
} \ } \
__ret; \ __ret; \
}) })
/* /*
* xadd() adds "inc" to "*ptr" and atomically returns the previous * add_*() adds "inc" to "*ptr"
* value of "*ptr".
* *
* xadd() is locked when multiple CPUs are online * __add() takes a lock prefix
* xadd_sync() is always locked * add_smp() is locked when multiple CPUs are online
* xadd_local() is never locked * add_sync() is always locked
*/ */
#define xadd(ptr, inc) __xadd((ptr), (inc), LOCK_PREFIX) #define add_smp(ptr, inc) __add((ptr), (inc), LOCK_PREFIX)
#define xadd_sync(ptr, inc) __xadd((ptr), (inc), "lock; ") #define add_sync(ptr, inc) __add((ptr), (inc), "lock; ")
#define xadd_local(ptr, inc) __xadd((ptr), (inc), "")
#define __cmpxchg_double(pfx, p1, p2, o1, o2, n1, n2) \
({ \
bool __ret; \
__typeof__(*(p1)) __old1 = (o1), __new1 = (n1); \
__typeof__(*(p2)) __old2 = (o2), __new2 = (n2); \
BUILD_BUG_ON(sizeof(*(p1)) != sizeof(long)); \
BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long)); \
VM_BUG_ON((unsigned long)(p1) % (2 * sizeof(long))); \
VM_BUG_ON((unsigned long)((p1) + 1) != (unsigned long)(p2)); \
asm volatile(pfx "cmpxchg%c4b %2; sete %0" \
: "=a" (__ret), "+d" (__old2), \
"+m" (*(p1)), "+m" (*(p2)) \
: "i" (2 * sizeof(long)), "a" (__old1), \
"b" (__new1), "c" (__new2)); \
__ret; \
})
#define cmpxchg_double(p1, p2, o1, o2, n1, n2) \
__cmpxchg_double(LOCK_PREFIX, p1, p2, o1, o2, n1, n2)
#define cmpxchg_double_local(p1, p2, o1, o2, n1, n2) \
__cmpxchg_double(, p1, p2, o1, o2, n1, n2)
#endif /* ASM_X86_CMPXCHG_H */ #endif /* ASM_X86_CMPXCHG_H */
...@@ -166,52 +166,6 @@ static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old, ...@@ -166,52 +166,6 @@ static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old,
#endif #endif
#define cmpxchg8b(ptr, o1, o2, n1, n2) \
({ \
char __ret; \
__typeof__(o2) __dummy; \
__typeof__(*(ptr)) __old1 = (o1); \
__typeof__(o2) __old2 = (o2); \
__typeof__(*(ptr)) __new1 = (n1); \
__typeof__(o2) __new2 = (n2); \
asm volatile(LOCK_PREFIX "cmpxchg8b %2; setz %1" \
: "=d"(__dummy), "=a" (__ret), "+m" (*ptr)\
: "a" (__old1), "d"(__old2), \
"b" (__new1), "c" (__new2) \
: "memory"); \
__ret; })
#define cmpxchg8b_local(ptr, o1, o2, n1, n2) \
({ \
char __ret; \
__typeof__(o2) __dummy; \
__typeof__(*(ptr)) __old1 = (o1); \
__typeof__(o2) __old2 = (o2); \
__typeof__(*(ptr)) __new1 = (n1); \
__typeof__(o2) __new2 = (n2); \
asm volatile("cmpxchg8b %2; setz %1" \
: "=d"(__dummy), "=a"(__ret), "+m" (*ptr)\
: "a" (__old), "d"(__old2), \
"b" (__new1), "c" (__new2), \
: "memory"); \
__ret; })
#define cmpxchg_double(ptr, o1, o2, n1, n2) \
({ \
BUILD_BUG_ON(sizeof(*(ptr)) != 4); \
VM_BUG_ON((unsigned long)(ptr) % 8); \
cmpxchg8b((ptr), (o1), (o2), (n1), (n2)); \
})
#define cmpxchg_double_local(ptr, o1, o2, n1, n2) \
({ \
BUILD_BUG_ON(sizeof(*(ptr)) != 4); \
VM_BUG_ON((unsigned long)(ptr) % 8); \
cmpxchg16b_local((ptr), (o1), (o2), (n1), (n2)); \
})
#define system_has_cmpxchg_double() cpu_has_cx8 #define system_has_cmpxchg_double() cpu_has_cx8
#endif /* _ASM_X86_CMPXCHG_32_H */ #endif /* _ASM_X86_CMPXCHG_32_H */
...@@ -20,49 +20,6 @@ static inline void set_64bit(volatile u64 *ptr, u64 val) ...@@ -20,49 +20,6 @@ static inline void set_64bit(volatile u64 *ptr, u64 val)
cmpxchg_local((ptr), (o), (n)); \ cmpxchg_local((ptr), (o), (n)); \
}) })
#define cmpxchg16b(ptr, o1, o2, n1, n2) \
({ \
char __ret; \
__typeof__(o2) __junk; \
__typeof__(*(ptr)) __old1 = (o1); \
__typeof__(o2) __old2 = (o2); \
__typeof__(*(ptr)) __new1 = (n1); \
__typeof__(o2) __new2 = (n2); \
asm volatile(LOCK_PREFIX "cmpxchg16b %2;setz %1" \
: "=d"(__junk), "=a"(__ret), "+m" (*ptr) \
: "b"(__new1), "c"(__new2), \
"a"(__old1), "d"(__old2)); \
__ret; })
#define cmpxchg16b_local(ptr, o1, o2, n1, n2) \
({ \
char __ret; \
__typeof__(o2) __junk; \
__typeof__(*(ptr)) __old1 = (o1); \
__typeof__(o2) __old2 = (o2); \
__typeof__(*(ptr)) __new1 = (n1); \
__typeof__(o2) __new2 = (n2); \
asm volatile("cmpxchg16b %2;setz %1" \
: "=d"(__junk), "=a"(__ret), "+m" (*ptr) \
: "b"(__new1), "c"(__new2), \
"a"(__old1), "d"(__old2)); \
__ret; })
#define cmpxchg_double(ptr, o1, o2, n1, n2) \
({ \
BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
VM_BUG_ON((unsigned long)(ptr) % 16); \
cmpxchg16b((ptr), (o1), (o2), (n1), (n2)); \
})
#define cmpxchg_double_local(ptr, o1, o2, n1, n2) \
({ \
BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
VM_BUG_ON((unsigned long)(ptr) % 16); \
cmpxchg16b_local((ptr), (o1), (o2), (n1), (n2)); \
})
#define system_has_cmpxchg_double() cpu_has_cx16 #define system_has_cmpxchg_double() cpu_has_cx16
#endif /* _ASM_X86_CMPXCHG_64_H */ #endif /* _ASM_X86_CMPXCHG_64_H */
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
#include <linux/types.h> #include <linux/types.h>
#include <linux/log2.h>
/* /*
* do_div() is NOT a C function. It wants to return * do_div() is NOT a C function. It wants to return
...@@ -21,15 +22,20 @@ ...@@ -21,15 +22,20 @@
({ \ ({ \
unsigned long __upper, __low, __high, __mod, __base; \ unsigned long __upper, __low, __high, __mod, __base; \
__base = (base); \ __base = (base); \
asm("":"=a" (__low), "=d" (__high) : "A" (n)); \ if (__builtin_constant_p(__base) && is_power_of_2(__base)) { \
__upper = __high; \ __mod = n & (__base - 1); \
if (__high) { \ n >>= ilog2(__base); \
__upper = __high % (__base); \ } else { \
__high = __high / (__base); \ asm("" : "=a" (__low), "=d" (__high) : "A" (n));\
__upper = __high; \
if (__high) { \
__upper = __high % (__base); \
__high = __high / (__base); \
} \
asm("divl %2" : "=a" (__low), "=d" (__mod) \
: "rm" (__base), "0" (__low), "1" (__upper)); \
asm("" : "=A" (n) : "a" (__low), "d" (__high)); \
} \ } \
asm("divl %2":"=a" (__low), "=d" (__mod) \
: "rm" (__base), "0" (__low), "1" (__upper)); \
asm("":"=A" (n) : "a" (__low), "d" (__high)); \
__mod; \ __mod; \
}) })
......
...@@ -451,23 +451,20 @@ do { \ ...@@ -451,23 +451,20 @@ do { \
#endif /* !CONFIG_M386 */ #endif /* !CONFIG_M386 */
#ifdef CONFIG_X86_CMPXCHG64 #ifdef CONFIG_X86_CMPXCHG64
#define percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) \ #define percpu_cmpxchg8b_double(pcp1, pcp2, o1, o2, n1, n2) \
({ \ ({ \
char __ret; \ bool __ret; \
typeof(o1) __o1 = o1; \ typeof(pcp1) __o1 = (o1), __n1 = (n1); \
typeof(o1) __n1 = n1; \ typeof(pcp2) __o2 = (o2), __n2 = (n2); \
typeof(o2) __o2 = o2; \
typeof(o2) __n2 = n2; \
typeof(o2) __dummy = n2; \
asm volatile("cmpxchg8b "__percpu_arg(1)"\n\tsetz %0\n\t" \ asm volatile("cmpxchg8b "__percpu_arg(1)"\n\tsetz %0\n\t" \
: "=a"(__ret), "=m" (pcp1), "=d"(__dummy) \ : "=a" (__ret), "+m" (pcp1), "+m" (pcp2), "+d" (__o2) \
: "b"(__n1), "c"(__n2), "a"(__o1), "d"(__o2)); \ : "b" (__n1), "c" (__n2), "a" (__o1)); \
__ret; \ __ret; \
}) })
#define __this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) #define __this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double
#define this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) #define this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double
#define irqsafe_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) #define irqsafe_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double
#endif /* CONFIG_X86_CMPXCHG64 */ #endif /* CONFIG_X86_CMPXCHG64 */
/* /*
...@@ -508,31 +505,23 @@ do { \ ...@@ -508,31 +505,23 @@ do { \
* it in software. The address used in the cmpxchg16 instruction must be * it in software. The address used in the cmpxchg16 instruction must be
* aligned to a 16 byte boundary. * aligned to a 16 byte boundary.
*/ */
#ifdef CONFIG_SMP #define percpu_cmpxchg16b_double(pcp1, pcp2, o1, o2, n1, n2) \
#define CMPXCHG16B_EMU_CALL "call this_cpu_cmpxchg16b_emu\n\t" ASM_NOP3
#else
#define CMPXCHG16B_EMU_CALL "call this_cpu_cmpxchg16b_emu\n\t" ASM_NOP2
#endif
#define percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) \
({ \ ({ \
char __ret; \ bool __ret; \
typeof(o1) __o1 = o1; \ typeof(pcp1) __o1 = (o1), __n1 = (n1); \
typeof(o1) __n1 = n1; \ typeof(pcp2) __o2 = (o2), __n2 = (n2); \
typeof(o2) __o2 = o2; \ alternative_io("leaq %P1,%%rsi\n\tcall this_cpu_cmpxchg16b_emu\n\t", \
typeof(o2) __n2 = n2; \ "cmpxchg16b " __percpu_arg(1) "\n\tsetz %0\n\t", \
typeof(o2) __dummy; \
alternative_io(CMPXCHG16B_EMU_CALL, \
"cmpxchg16b " __percpu_prefix "(%%rsi)\n\tsetz %0\n\t", \
X86_FEATURE_CX16, \ X86_FEATURE_CX16, \
ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)), \ ASM_OUTPUT2("=a" (__ret), "+m" (pcp1), \
"S" (&pcp1), "b"(__n1), "c"(__n2), \ "+m" (pcp2), "+d" (__o2)), \
"a"(__o1), "d"(__o2) : "memory"); \ "b" (__n1), "c" (__n2), "a" (__o1) : "rsi"); \
__ret; \ __ret; \
}) })
#define __this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) #define __this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double
#define this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) #define this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double
#define irqsafe_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) #define irqsafe_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double
#endif #endif
......
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
* EFLAGS bits * EFLAGS bits
*/ */
#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ #define X86_EFLAGS_CF 0x00000001 /* Carry Flag */
#define X86_EFLAGS_BIT1 0x00000002 /* Bit 1 - always on */
#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */ #define X86_EFLAGS_PF 0x00000004 /* Parity Flag */
#define X86_EFLAGS_AF 0x00000010 /* Auxiliary carry Flag */ #define X86_EFLAGS_AF 0x00000010 /* Auxiliary carry Flag */
#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ #define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */
......
...@@ -79,23 +79,10 @@ static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock) ...@@ -79,23 +79,10 @@ static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail; return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail;
} }
#if (NR_CPUS < 256)
static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
{ {
asm volatile(UNLOCK_LOCK_PREFIX "incb %0" __add(&lock->tickets.head, 1, UNLOCK_LOCK_PREFIX);
: "+m" (lock->head_tail)
:
: "memory", "cc");
} }
#else
static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
{
asm volatile(UNLOCK_LOCK_PREFIX "incw %0"
: "+m" (lock->head_tail)
:
: "memory", "cc");
}
#endif
static inline int __ticket_spin_is_locked(arch_spinlock_t *lock) static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
{ {
......
...@@ -40,7 +40,8 @@ struct thread_info { ...@@ -40,7 +40,8 @@ struct thread_info {
*/ */
__u8 supervisor_stack[0]; __u8 supervisor_stack[0];
#endif #endif
int uaccess_err; int sig_on_uaccess_error:1;
int uaccess_err:1; /* uaccess failed */
}; };
#define INIT_THREAD_INFO(tsk) \ #define INIT_THREAD_INFO(tsk) \
...@@ -231,6 +232,12 @@ static inline struct thread_info *current_thread_info(void) ...@@ -231,6 +232,12 @@ static inline struct thread_info *current_thread_info(void)
movq PER_CPU_VAR(kernel_stack),reg ; \ movq PER_CPU_VAR(kernel_stack),reg ; \
subq $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg subq $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg
/*
* Same if PER_CPU_VAR(kernel_stack) is, perhaps with some offset, already in
* a certain register (to be used in assembler memory operands).
*/
#define THREAD_INFO(reg, off) KERNEL_STACK_OFFSET+(off)-THREAD_SIZE(reg)
#endif #endif
#endif /* !X86_32 */ #endif /* !X86_32 */
......
...@@ -130,10 +130,8 @@ extern void setup_node_to_cpumask_map(void); ...@@ -130,10 +130,8 @@ extern void setup_node_to_cpumask_map(void);
.balance_interval = 1, \ .balance_interval = 1, \
} }
#ifdef CONFIG_X86_64
extern int __node_distance(int, int); extern int __node_distance(int, int);
#define node_distance(a, b) __node_distance(a, b) #define node_distance(a, b) __node_distance(a, b)
#endif
#else /* !CONFIG_NUMA */ #else /* !CONFIG_NUMA */
......
...@@ -462,7 +462,7 @@ struct __large_struct { unsigned long buf[100]; }; ...@@ -462,7 +462,7 @@ struct __large_struct { unsigned long buf[100]; };
barrier(); barrier();
#define uaccess_catch(err) \ #define uaccess_catch(err) \
(err) |= current_thread_info()->uaccess_err; \ (err) |= (current_thread_info()->uaccess_err ? -EFAULT : 0); \
current_thread_info()->uaccess_err = prev_err; \ current_thread_info()->uaccess_err = prev_err; \
} while (0) } while (0)
......
...@@ -16,5 +16,6 @@ const char *const x86_power_flags[32] = { ...@@ -16,5 +16,6 @@ const char *const x86_power_flags[32] = {
"100mhzsteps", "100mhzsteps",
"hwpstate", "hwpstate",
"", /* tsc invariant mapped to constant_tsc */ "", /* tsc invariant mapped to constant_tsc */
/* nothing */ "cpb", /* core performance boost */
"eff_freq_ro", /* Readonly aperf/mperf */
}; };
...@@ -625,6 +625,8 @@ work_notifysig: # deal with pending signals and ...@@ -625,6 +625,8 @@ work_notifysig: # deal with pending signals and
movl %esp, %eax movl %esp, %eax
jne work_notifysig_v86 # returning to kernel-space or jne work_notifysig_v86 # returning to kernel-space or
# vm86-space # vm86-space
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
xorl %edx, %edx xorl %edx, %edx
call do_notify_resume call do_notify_resume
jmp resume_userspace_sig jmp resume_userspace_sig
...@@ -638,6 +640,8 @@ work_notifysig_v86: ...@@ -638,6 +640,8 @@ work_notifysig_v86:
#else #else
movl %esp, %eax movl %esp, %eax
#endif #endif
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
xorl %edx, %edx xorl %edx, %edx
call do_notify_resume call do_notify_resume
jmp resume_userspace_sig jmp resume_userspace_sig
......
...@@ -221,7 +221,7 @@ ENDPROC(native_usergs_sysret64) ...@@ -221,7 +221,7 @@ ENDPROC(native_usergs_sysret64)
/*CFI_REL_OFFSET ss,0*/ /*CFI_REL_OFFSET ss,0*/
pushq_cfi %rax /* rsp */ pushq_cfi %rax /* rsp */
CFI_REL_OFFSET rsp,0 CFI_REL_OFFSET rsp,0
pushq_cfi $X86_EFLAGS_IF /* eflags - interrupts on */ pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_BIT1) /* eflags - interrupts on */
/*CFI_REL_OFFSET rflags,0*/ /*CFI_REL_OFFSET rflags,0*/
pushq_cfi $__KERNEL_CS /* cs */ pushq_cfi $__KERNEL_CS /* cs */
/*CFI_REL_OFFSET cs,0*/ /*CFI_REL_OFFSET cs,0*/
...@@ -411,7 +411,7 @@ ENTRY(ret_from_fork) ...@@ -411,7 +411,7 @@ ENTRY(ret_from_fork)
RESTORE_REST RESTORE_REST
testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread? testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread?
je int_ret_from_sys_call jz retint_restore_args
testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET
jnz int_ret_from_sys_call jnz int_ret_from_sys_call
...@@ -465,7 +465,7 @@ ENTRY(system_call) ...@@ -465,7 +465,7 @@ ENTRY(system_call)
* after the swapgs, so that it can do the swapgs * after the swapgs, so that it can do the swapgs
* for the guest and jump here on syscall. * for the guest and jump here on syscall.
*/ */
ENTRY(system_call_after_swapgs) GLOBAL(system_call_after_swapgs)
movq %rsp,PER_CPU_VAR(old_rsp) movq %rsp,PER_CPU_VAR(old_rsp)
movq PER_CPU_VAR(kernel_stack),%rsp movq PER_CPU_VAR(kernel_stack),%rsp
...@@ -478,8 +478,7 @@ ENTRY(system_call_after_swapgs) ...@@ -478,8 +478,7 @@ ENTRY(system_call_after_swapgs)
movq %rax,ORIG_RAX-ARGOFFSET(%rsp) movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
movq %rcx,RIP-ARGOFFSET(%rsp) movq %rcx,RIP-ARGOFFSET(%rsp)
CFI_REL_OFFSET rip,RIP-ARGOFFSET CFI_REL_OFFSET rip,RIP-ARGOFFSET
GET_THREAD_INFO(%rcx) testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
jnz tracesys jnz tracesys
system_call_fastpath: system_call_fastpath:
cmpq $__NR_syscall_max,%rax cmpq $__NR_syscall_max,%rax
...@@ -496,10 +495,9 @@ ret_from_sys_call: ...@@ -496,10 +495,9 @@ ret_from_sys_call:
/* edi: flagmask */ /* edi: flagmask */
sysret_check: sysret_check:
LOCKDEP_SYS_EXIT LOCKDEP_SYS_EXIT
GET_THREAD_INFO(%rcx)
DISABLE_INTERRUPTS(CLBR_NONE) DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF TRACE_IRQS_OFF
movl TI_flags(%rcx),%edx movl TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET),%edx
andl %edi,%edx andl %edi,%edx
jnz sysret_careful jnz sysret_careful
CFI_REMEMBER_STATE CFI_REMEMBER_STATE
...@@ -583,7 +581,7 @@ sysret_audit: ...@@ -583,7 +581,7 @@ sysret_audit:
/* Do syscall tracing */ /* Do syscall tracing */
tracesys: tracesys:
#ifdef CONFIG_AUDITSYSCALL #ifdef CONFIG_AUDITSYSCALL
testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx) testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
jz auditsys jz auditsys
#endif #endif
SAVE_REST SAVE_REST
...@@ -612,8 +610,6 @@ tracesys: ...@@ -612,8 +610,6 @@ tracesys:
GLOBAL(int_ret_from_sys_call) GLOBAL(int_ret_from_sys_call)
DISABLE_INTERRUPTS(CLBR_NONE) DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF TRACE_IRQS_OFF
testl $3,CS-ARGOFFSET(%rsp)
je retint_restore_args
movl $_TIF_ALLWORK_MASK,%edi movl $_TIF_ALLWORK_MASK,%edi
/* edi: mask to check */ /* edi: mask to check */
GLOBAL(int_with_check) GLOBAL(int_with_check)
...@@ -953,6 +949,7 @@ END(common_interrupt) ...@@ -953,6 +949,7 @@ END(common_interrupt)
ENTRY(\sym) ENTRY(\sym)
INTR_FRAME INTR_FRAME
pushq_cfi $~(\num) pushq_cfi $~(\num)
.Lcommon_\sym:
interrupt \do_sym interrupt \do_sym
jmp ret_from_intr jmp ret_from_intr
CFI_ENDPROC CFI_ENDPROC
...@@ -976,13 +973,21 @@ apicinterrupt X86_PLATFORM_IPI_VECTOR \ ...@@ -976,13 +973,21 @@ apicinterrupt X86_PLATFORM_IPI_VECTOR \
x86_platform_ipi smp_x86_platform_ipi x86_platform_ipi smp_x86_platform_ipi
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
.irp idx,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \ ALIGN
INTR_FRAME
.irp idx,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \
16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
.if NUM_INVALIDATE_TLB_VECTORS > \idx .if NUM_INVALIDATE_TLB_VECTORS > \idx
apicinterrupt (INVALIDATE_TLB_VECTOR_START)+\idx \ ENTRY(invalidate_interrupt\idx)
invalidate_interrupt\idx smp_invalidate_interrupt pushq_cfi $~(INVALIDATE_TLB_VECTOR_START+\idx)
jmp .Lcommon_invalidate_interrupt0
CFI_ADJUST_CFA_OFFSET -8
END(invalidate_interrupt\idx)
.endif .endif
.endr .endr
CFI_ENDPROC
apicinterrupt INVALIDATE_TLB_VECTOR_START, \
invalidate_interrupt0, smp_invalidate_interrupt
#endif #endif
apicinterrupt THRESHOLD_APIC_VECTOR \ apicinterrupt THRESHOLD_APIC_VECTOR \
......
...@@ -293,7 +293,7 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) ...@@ -293,7 +293,7 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
regs.orig_ax = -1; regs.orig_ax = -1;
regs.ip = (unsigned long) kernel_thread_helper; regs.ip = (unsigned long) kernel_thread_helper;
regs.cs = __KERNEL_CS | get_kernel_rpl(); regs.cs = __KERNEL_CS | get_kernel_rpl();
regs.flags = X86_EFLAGS_IF | 0x2; regs.flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
/* Ok, create the new process.. */ /* Ok, create the new process.. */
return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL); return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
......
...@@ -306,15 +306,10 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) ...@@ -306,15 +306,10 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
== NOTIFY_STOP) == NOTIFY_STOP)
return; return;
#endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ #endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */
#ifdef CONFIG_KPROBES
if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
== NOTIFY_STOP) == NOTIFY_STOP)
return; return;
#else
if (notify_die(DIE_TRAP, "int3", regs, error_code, 3, SIGTRAP)
== NOTIFY_STOP)
return;
#endif
preempt_conditional_sti(regs); preempt_conditional_sti(regs);
do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
......
...@@ -57,7 +57,7 @@ DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) = ...@@ -57,7 +57,7 @@ DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) =
.lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock), .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock),
}; };
static enum { EMULATE, NATIVE, NONE } vsyscall_mode = NATIVE; static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE;
static int __init vsyscall_setup(char *str) static int __init vsyscall_setup(char *str)
{ {
...@@ -140,11 +140,40 @@ static int addr_to_vsyscall_nr(unsigned long addr) ...@@ -140,11 +140,40 @@ static int addr_to_vsyscall_nr(unsigned long addr)
return nr; return nr;
} }
static bool write_ok_or_segv(unsigned long ptr, size_t size)
{
/*
* XXX: if access_ok, get_user, and put_user handled
* sig_on_uaccess_error, this could go away.
*/
if (!access_ok(VERIFY_WRITE, (void __user *)ptr, size)) {
siginfo_t info;
struct thread_struct *thread = &current->thread;
thread->error_code = 6; /* user fault, no page, write */
thread->cr2 = ptr;
thread->trap_no = 14;
memset(&info, 0, sizeof(info));
info.si_signo = SIGSEGV;
info.si_errno = 0;
info.si_code = SEGV_MAPERR;
info.si_addr = (void __user *)ptr;
force_sig_info(SIGSEGV, &info, current);
return false;
} else {
return true;
}
}
bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
{ {
struct task_struct *tsk; struct task_struct *tsk;
unsigned long caller; unsigned long caller;
int vsyscall_nr; int vsyscall_nr;
int prev_sig_on_uaccess_error;
long ret; long ret;
/* /*
...@@ -180,35 +209,65 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) ...@@ -180,35 +209,65 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
if (seccomp_mode(&tsk->seccomp)) if (seccomp_mode(&tsk->seccomp))
do_exit(SIGKILL); do_exit(SIGKILL);
/*
* With a real vsyscall, page faults cause SIGSEGV. We want to
* preserve that behavior to make writing exploits harder.
*/
prev_sig_on_uaccess_error = current_thread_info()->sig_on_uaccess_error;
current_thread_info()->sig_on_uaccess_error = 1;
/*
* 0 is a valid user pointer (in the access_ok sense) on 32-bit and
* 64-bit, so we don't need to special-case it here. For all the
* vsyscalls, 0 means "don't write anything" not "write it at
* address 0".
*/
ret = -EFAULT;
switch (vsyscall_nr) { switch (vsyscall_nr) {
case 0: case 0:
if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) ||
!write_ok_or_segv(regs->si, sizeof(struct timezone)))
break;
ret = sys_gettimeofday( ret = sys_gettimeofday(
(struct timeval __user *)regs->di, (struct timeval __user *)regs->di,
(struct timezone __user *)regs->si); (struct timezone __user *)regs->si);
break; break;
case 1: case 1:
if (!write_ok_or_segv(regs->di, sizeof(time_t)))
break;
ret = sys_time((time_t __user *)regs->di); ret = sys_time((time_t __user *)regs->di);
break; break;
case 2: case 2:
if (!write_ok_or_segv(regs->di, sizeof(unsigned)) ||
!write_ok_or_segv(regs->si, sizeof(unsigned)))
break;
ret = sys_getcpu((unsigned __user *)regs->di, ret = sys_getcpu((unsigned __user *)regs->di,
(unsigned __user *)regs->si, (unsigned __user *)regs->si,
0); 0);
break; break;
} }
current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error;
if (ret == -EFAULT) { if (ret == -EFAULT) {
/* /* Bad news -- userspace fed a bad pointer to a vsyscall. */
* Bad news -- userspace fed a bad pointer to a vsyscall.
*
* With a real vsyscall, that would have caused SIGSEGV.
* To make writing reliable exploits using the emulated
* vsyscalls harder, generate SIGSEGV here as well.
*/
warn_bad_vsyscall(KERN_INFO, regs, warn_bad_vsyscall(KERN_INFO, regs,
"vsyscall fault (exploit attempt?)"); "vsyscall fault (exploit attempt?)");
goto sigsegv;
/*
* If we failed to generate a signal for any reason,
* generate one here. (This should be impossible.)
*/
if (WARN_ON_ONCE(!sigismember(&tsk->pending.signal, SIGBUS) &&
!sigismember(&tsk->pending.signal, SIGSEGV)))
goto sigsegv;
return true; /* Don't emulate the ret. */
} }
regs->ax = ret; regs->ax = ret;
......
...@@ -164,15 +164,13 @@ EXPORT_SYMBOL(strchr); ...@@ -164,15 +164,13 @@ EXPORT_SYMBOL(strchr);
size_t strlen(const char *s) size_t strlen(const char *s)
{ {
int d0; int d0;
int res; size_t res;
asm volatile("repne\n\t" asm volatile("repne\n\t"
"scasb\n\t" "scasb"
"notl %0\n\t"
"decl %0"
: "=c" (res), "=&D" (d0) : "=c" (res), "=&D" (d0)
: "1" (s), "a" (0), "0" (0xffffffffu) : "1" (s), "a" (0), "0" (0xffffffffu)
: "memory"); : "memory");
return res; return ~res - 1;
} }
EXPORT_SYMBOL(strlen); EXPORT_SYMBOL(strlen);
#endif #endif
......
...@@ -25,7 +25,7 @@ int fixup_exception(struct pt_regs *regs) ...@@ -25,7 +25,7 @@ int fixup_exception(struct pt_regs *regs)
if (fixup) { if (fixup) {
/* If fixup is less than 16, it means uaccess error */ /* If fixup is less than 16, it means uaccess error */
if (fixup->fixup < 16) { if (fixup->fixup < 16) {
current_thread_info()->uaccess_err = -EFAULT; current_thread_info()->uaccess_err = 1;
regs->ip += fixup->fixup; regs->ip += fixup->fixup;
return 1; return 1;
} }
......
...@@ -626,7 +626,7 @@ pgtable_bad(struct pt_regs *regs, unsigned long error_code, ...@@ -626,7 +626,7 @@ pgtable_bad(struct pt_regs *regs, unsigned long error_code,
static noinline void static noinline void
no_context(struct pt_regs *regs, unsigned long error_code, no_context(struct pt_regs *regs, unsigned long error_code,
unsigned long address) unsigned long address, int signal, int si_code)
{ {
struct task_struct *tsk = current; struct task_struct *tsk = current;
unsigned long *stackend; unsigned long *stackend;
...@@ -634,8 +634,17 @@ no_context(struct pt_regs *regs, unsigned long error_code, ...@@ -634,8 +634,17 @@ no_context(struct pt_regs *regs, unsigned long error_code,
int sig; int sig;
/* Are we prepared to handle this kernel fault? */ /* Are we prepared to handle this kernel fault? */
if (fixup_exception(regs)) if (fixup_exception(regs)) {
if (current_thread_info()->sig_on_uaccess_error && signal) {
tsk->thread.trap_no = 14;
tsk->thread.error_code = error_code | PF_USER;
tsk->thread.cr2 = address;
/* XXX: hwpoison faults will set the wrong code. */
force_sig_info_fault(signal, si_code, address, tsk, 0);
}
return; return;
}
/* /*
* 32-bit: * 32-bit:
...@@ -755,7 +764,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, ...@@ -755,7 +764,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
if (is_f00f_bug(regs, address)) if (is_f00f_bug(regs, address))
return; return;
no_context(regs, error_code, address); no_context(regs, error_code, address, SIGSEGV, si_code);
} }
static noinline void static noinline void
...@@ -819,7 +828,7 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, ...@@ -819,7 +828,7 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
/* Kernel mode? Handle exceptions or die: */ /* Kernel mode? Handle exceptions or die: */
if (!(error_code & PF_USER)) { if (!(error_code & PF_USER)) {
no_context(regs, error_code, address); no_context(regs, error_code, address, SIGBUS, BUS_ADRERR);
return; return;
} }
...@@ -854,7 +863,7 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, ...@@ -854,7 +863,7 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
if (!(fault & VM_FAULT_RETRY)) if (!(fault & VM_FAULT_RETRY))
up_read(&current->mm->mmap_sem); up_read(&current->mm->mmap_sem);
if (!(error_code & PF_USER)) if (!(error_code & PF_USER))
no_context(regs, error_code, address); no_context(regs, error_code, address, 0, 0);
return 1; return 1;
} }
if (!(fault & VM_FAULT_ERROR)) if (!(fault & VM_FAULT_ERROR))
...@@ -864,7 +873,8 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, ...@@ -864,7 +873,8 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
/* Kernel mode? Handle exceptions or die: */ /* Kernel mode? Handle exceptions or die: */
if (!(error_code & PF_USER)) { if (!(error_code & PF_USER)) {
up_read(&current->mm->mmap_sem); up_read(&current->mm->mmap_sem);
no_context(regs, error_code, address); no_context(regs, error_code, address,
SIGSEGV, SEGV_MAPERR);
return 1; return 1;
} }
......
...@@ -238,7 +238,8 @@ static efi_status_t __init phys_efi_get_time(efi_time_t *tm, ...@@ -238,7 +238,8 @@ static efi_status_t __init phys_efi_get_time(efi_time_t *tm,
spin_lock_irqsave(&rtc_lock, flags); spin_lock_irqsave(&rtc_lock, flags);
efi_call_phys_prelog(); efi_call_phys_prelog();
status = efi_call_phys2(efi_phys.get_time, tm, tc); status = efi_call_phys2(efi_phys.get_time, virt_to_phys(tm),
virt_to_phys(tc));
efi_call_phys_epilog(); efi_call_phys_epilog();
spin_unlock_irqrestore(&rtc_lock, flags); spin_unlock_irqrestore(&rtc_lock, flags);
return status; return status;
......
...@@ -697,7 +697,7 @@ void lguest_arch_setup_regs(struct lg_cpu *cpu, unsigned long start) ...@@ -697,7 +697,7 @@ void lguest_arch_setup_regs(struct lg_cpu *cpu, unsigned long start)
* interrupts are enabled. We always leave interrupts enabled while * interrupts are enabled. We always leave interrupts enabled while
* running the Guest. * running the Guest.
*/ */
regs->eflags = X86_EFLAGS_IF | 0x2; regs->eflags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
/* /*
* The "Extended Instruction Pointer" register says where the Guest is * The "Extended Instruction Pointer" register says where the Guest is
......
...@@ -368,7 +368,7 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page ...@@ -368,7 +368,7 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page
VM_BUG_ON(!irqs_disabled()); VM_BUG_ON(!irqs_disabled());
#ifdef CONFIG_CMPXCHG_DOUBLE #ifdef CONFIG_CMPXCHG_DOUBLE
if (s->flags & __CMPXCHG_DOUBLE) { if (s->flags & __CMPXCHG_DOUBLE) {
if (cmpxchg_double(&page->freelist, if (cmpxchg_double(&page->freelist, &page->counters,
freelist_old, counters_old, freelist_old, counters_old,
freelist_new, counters_new)) freelist_new, counters_new))
return 1; return 1;
...@@ -402,7 +402,7 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page, ...@@ -402,7 +402,7 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
{ {
#ifdef CONFIG_CMPXCHG_DOUBLE #ifdef CONFIG_CMPXCHG_DOUBLE
if (s->flags & __CMPXCHG_DOUBLE) { if (s->flags & __CMPXCHG_DOUBLE) {
if (cmpxchg_double(&page->freelist, if (cmpxchg_double(&page->freelist, &page->counters,
freelist_old, counters_old, freelist_old, counters_old,
freelist_new, counters_new)) freelist_new, counters_new))
return 1; return 1;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册