diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 0293fc8daca34ab9e3055d534bcc5aebc0dc91d6..e229769606f2f4c59debdea548891f482da64633 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2755,11 +2755,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. functions are at fixed addresses, they make nice targets for exploits that can control RIP. - emulate Vsyscalls turn into traps and are emulated - reasonably safely. + emulate [default] Vsyscalls turn into traps and are + emulated reasonably safely. - native [default] Vsyscalls are native syscall - instructions. + native Vsyscalls are native syscall instructions. This is a little bit faster than trapping and makes a few dynamic recompilers work better than they would in emulation mode. diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index a6253ec1b28461e12d6f650683c6d05c99a7a7cc..3e274564f6bf0d89f06b489484eb7d86faa049d6 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -134,7 +134,7 @@ ENTRY(ia32_sysenter_target) CFI_REL_OFFSET rsp,0 pushfq_cfi /*CFI_REL_OFFSET rflags,0*/ - movl 8*3-THREAD_SIZE+TI_sysenter_return(%rsp), %r10d + movl TI_sysenter_return+THREAD_INFO(%rsp,3*8-KERNEL_STACK_OFFSET),%r10d CFI_REGISTER rip,r10 pushq_cfi $__USER32_CS /*CFI_REL_OFFSET cs,0*/ @@ -150,9 +150,8 @@ ENTRY(ia32_sysenter_target) .section __ex_table,"a" .quad 1b,ia32_badarg .previous - GET_THREAD_INFO(%r10) - orl $TS_COMPAT,TI_status(%r10) - testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) + orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) + testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) CFI_REMEMBER_STATE jnz sysenter_tracesys cmpq $(IA32_NR_syscalls-1),%rax @@ -162,13 +161,12 @@ sysenter_do_call: sysenter_dispatch: call *ia32_sys_call_table(,%rax,8) movq %rax,RAX-ARGOFFSET(%rsp) - GET_THREAD_INFO(%r10) DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF - testl $_TIF_ALLWORK_MASK,TI_flags(%r10) + testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) jnz sysexit_audit sysexit_from_sys_call: - andl $~TS_COMPAT,TI_status(%r10) + andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) /* clear IF, that popfq doesn't enable interrupts early */ andl $~0x200,EFLAGS-R11(%rsp) movl RIP-R11(%rsp),%edx /* User %eip */ @@ -205,7 +203,7 @@ sysexit_from_sys_call: .endm .macro auditsys_exit exit - testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) + testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) jnz ia32_ret_from_sys_call TRACE_IRQS_ON sti @@ -215,12 +213,11 @@ sysexit_from_sys_call: movzbl %al,%edi /* zero-extend that into %edi */ inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ call audit_syscall_exit - GET_THREAD_INFO(%r10) movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall return value */ movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi cli TRACE_IRQS_OFF - testl %edi,TI_flags(%r10) + testl %edi,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) jz \exit CLEAR_RREGS -ARGOFFSET jmp int_with_check @@ -238,7 +235,7 @@ sysexit_audit: sysenter_tracesys: #ifdef CONFIG_AUDITSYSCALL - testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) + testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) jz sysenter_auditsys #endif SAVE_REST @@ -309,9 +306,8 @@ ENTRY(ia32_cstar_target) .section __ex_table,"a" .quad 1b,ia32_badarg .previous - GET_THREAD_INFO(%r10) - orl $TS_COMPAT,TI_status(%r10) - testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) + orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) + testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) CFI_REMEMBER_STATE jnz cstar_tracesys cmpq $IA32_NR_syscalls-1,%rax @@ -321,13 +317,12 @@ cstar_do_call: cstar_dispatch: call *ia32_sys_call_table(,%rax,8) movq %rax,RAX-ARGOFFSET(%rsp) - GET_THREAD_INFO(%r10) DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF - testl $_TIF_ALLWORK_MASK,TI_flags(%r10) + testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) jnz sysretl_audit sysretl_from_sys_call: - andl $~TS_COMPAT,TI_status(%r10) + andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) RESTORE_ARGS 0,-ARG_SKIP,0,0,0 movl RIP-ARGOFFSET(%rsp),%ecx CFI_REGISTER rip,rcx @@ -355,7 +350,7 @@ sysretl_audit: cstar_tracesys: #ifdef CONFIG_AUDITSYSCALL - testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) + testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) jz cstar_auditsys #endif xchgl %r9d,%ebp @@ -420,9 +415,8 @@ ENTRY(ia32_syscall) /* note the registers are not zero extended to the sf. this could be a problem. */ SAVE_ARGS 0,1,0 - GET_THREAD_INFO(%r10) - orl $TS_COMPAT,TI_status(%r10) - testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) + orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) + testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) jnz ia32_tracesys cmpq $(IA32_NR_syscalls-1),%rax ja ia32_badsys @@ -459,8 +453,8 @@ quiet_ni_syscall: CFI_ENDPROC .macro PTREGSCALL label, func, arg - .globl \label -\label: + ALIGN +GLOBAL(\label) leaq \func(%rip),%rax leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */ jmp ia32_ptregs_common @@ -477,7 +471,8 @@ quiet_ni_syscall: PTREGSCALL stub32_vfork, sys_vfork, %rdi PTREGSCALL stub32_iopl, sys_iopl, %rsi -ENTRY(ia32_ptregs_common) + ALIGN +ia32_ptregs_common: popq %r11 CFI_ENDPROC CFI_STARTPROC32 simple diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h index 091508b533b46810603bb3dfa86a207d24e1e9c0..952bd0100c5c050289baff430ab0e4e30f56e0fc 100644 --- a/arch/x86/include/asm/alternative-asm.h +++ b/arch/x86/include/asm/alternative-asm.h @@ -4,10 +4,10 @@ #ifdef CONFIG_SMP .macro LOCK_PREFIX -1: lock +672: lock .section .smp_locks,"a" .balign 4 - .long 1b - . + .long 672b - . .previous .endm #else diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 1775d6e5920e1b8f2a463600a048a9e3b097d978..b97596e2b68c7ea3f62eebb38cd1f155719c150e 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -380,6 +380,8 @@ static inline unsigned long __fls(unsigned long word) return word; } +#undef ADDR + #ifdef __KERNEL__ /** * ffs - find first set bit in word @@ -395,10 +397,25 @@ static inline unsigned long __fls(unsigned long word) static inline int ffs(int x) { int r; -#ifdef CONFIG_X86_CMOV + +#ifdef CONFIG_X86_64 + /* + * AMD64 says BSFL won't clobber the dest reg if x==0; Intel64 says the + * dest reg is undefined if x==0, but their CPU architect says its + * value is written to set it to the same as before, except that the + * top 32 bits will be cleared. + * + * We cannot do this on 32 bits because at the very least some + * 486 CPUs did not behave this way. + */ + long tmp = -1; + asm("bsfl %1,%0" + : "=r" (r) + : "rm" (x), "0" (tmp)); +#elif defined(CONFIG_X86_CMOV) asm("bsfl %1,%0\n\t" "cmovzl %2,%0" - : "=r" (r) : "rm" (x), "r" (-1)); + : "=&r" (r) : "rm" (x), "r" (-1)); #else asm("bsfl %1,%0\n\t" "jnz 1f\n\t" @@ -422,7 +439,22 @@ static inline int ffs(int x) static inline int fls(int x) { int r; -#ifdef CONFIG_X86_CMOV + +#ifdef CONFIG_X86_64 + /* + * AMD64 says BSRL won't clobber the dest reg if x==0; Intel64 says the + * dest reg is undefined if x==0, but their CPU architect says its + * value is written to set it to the same as before, except that the + * top 32 bits will be cleared. + * + * We cannot do this on 32 bits because at the very least some + * 486 CPUs did not behave this way. + */ + long tmp = -1; + asm("bsrl %1,%0" + : "=r" (r) + : "rm" (x), "0" (tmp)); +#elif defined(CONFIG_X86_CMOV) asm("bsrl %1,%0\n\t" "cmovzl %2,%0" : "=&r" (r) : "rm" (x), "rm" (-1)); @@ -434,11 +466,35 @@ static inline int fls(int x) #endif return r + 1; } -#endif /* __KERNEL__ */ - -#undef ADDR -#ifdef __KERNEL__ +/** + * fls64 - find last set bit in a 64-bit word + * @x: the word to search + * + * This is defined in a similar way as the libc and compiler builtin + * ffsll, but returns the position of the most significant set bit. + * + * fls64(value) returns 0 if value is 0 or the position of the last + * set bit if value is nonzero. The last (most significant) bit is + * at position 64. + */ +#ifdef CONFIG_X86_64 +static __always_inline int fls64(__u64 x) +{ + long bitpos = -1; + /* + * AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the + * dest reg is undefined if x==0, but their CPU architect says its + * value is written to set it to the same as before. + */ + asm("bsrq %1,%0" + : "+r" (bitpos) + : "rm" (x)); + return bitpos + 1; +} +#else +#include +#endif #include @@ -450,12 +506,6 @@ static inline int fls(int x) #include -#endif /* __KERNEL__ */ - -#include - -#ifdef __KERNEL__ - #include #include diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h index 5d3acdf5a7a682d96681f78c86548a18203aaf98..0c9fa2745f13e4c1f242d3f37813f25849aebd6d 100644 --- a/arch/x86/include/asm/cmpxchg.h +++ b/arch/x86/include/asm/cmpxchg.h @@ -14,6 +14,8 @@ extern void __cmpxchg_wrong_size(void) __compiletime_error("Bad argument size for cmpxchg"); extern void __xadd_wrong_size(void) __compiletime_error("Bad argument size for xadd"); +extern void __add_wrong_size(void) + __compiletime_error("Bad argument size for add"); /* * Constants for operation sizes. On 32-bit, the 64-bit size it set to @@ -31,60 +33,47 @@ extern void __xadd_wrong_size(void) #define __X86_CASE_Q -1 /* sizeof will never return -1 */ #endif +/* + * An exchange-type operation, which takes a value and a pointer, and + * returns a the old value. + */ +#define __xchg_op(ptr, arg, op, lock) \ + ({ \ + __typeof__ (*(ptr)) __ret = (arg); \ + switch (sizeof(*(ptr))) { \ + case __X86_CASE_B: \ + asm volatile (lock #op "b %b0, %1\n" \ + : "+r" (__ret), "+m" (*(ptr)) \ + : : "memory", "cc"); \ + break; \ + case __X86_CASE_W: \ + asm volatile (lock #op "w %w0, %1\n" \ + : "+r" (__ret), "+m" (*(ptr)) \ + : : "memory", "cc"); \ + break; \ + case __X86_CASE_L: \ + asm volatile (lock #op "l %0, %1\n" \ + : "+r" (__ret), "+m" (*(ptr)) \ + : : "memory", "cc"); \ + break; \ + case __X86_CASE_Q: \ + asm volatile (lock #op "q %q0, %1\n" \ + : "+r" (__ret), "+m" (*(ptr)) \ + : : "memory", "cc"); \ + break; \ + default: \ + __ ## op ## _wrong_size(); \ + } \ + __ret; \ + }) + /* * Note: no "lock" prefix even on SMP: xchg always implies lock anyway. * Since this is generally used to protect other memory information, we * use "asm volatile" and "memory" clobbers to prevent gcc from moving * information around. */ -#define __xchg(x, ptr, size) \ -({ \ - __typeof(*(ptr)) __x = (x); \ - switch (size) { \ - case __X86_CASE_B: \ - { \ - volatile u8 *__ptr = (volatile u8 *)(ptr); \ - asm volatile("xchgb %0,%1" \ - : "=q" (__x), "+m" (*__ptr) \ - : "0" (__x) \ - : "memory"); \ - break; \ - } \ - case __X86_CASE_W: \ - { \ - volatile u16 *__ptr = (volatile u16 *)(ptr); \ - asm volatile("xchgw %0,%1" \ - : "=r" (__x), "+m" (*__ptr) \ - : "0" (__x) \ - : "memory"); \ - break; \ - } \ - case __X86_CASE_L: \ - { \ - volatile u32 *__ptr = (volatile u32 *)(ptr); \ - asm volatile("xchgl %0,%1" \ - : "=r" (__x), "+m" (*__ptr) \ - : "0" (__x) \ - : "memory"); \ - break; \ - } \ - case __X86_CASE_Q: \ - { \ - volatile u64 *__ptr = (volatile u64 *)(ptr); \ - asm volatile("xchgq %0,%1" \ - : "=r" (__x), "+m" (*__ptr) \ - : "0" (__x) \ - : "memory"); \ - break; \ - } \ - default: \ - __xchg_wrong_size(); \ - } \ - __x; \ -}) - -#define xchg(ptr, v) \ - __xchg((v), (ptr), sizeof(*ptr)) +#define xchg(ptr, v) __xchg_op((ptr), (v), xchg, "") /* * Atomic compare and exchange. Compare OLD with MEM, if identical, @@ -165,46 +154,80 @@ extern void __xadd_wrong_size(void) __cmpxchg_local((ptr), (old), (new), sizeof(*ptr)) #endif -#define __xadd(ptr, inc, lock) \ +/* + * xadd() adds "inc" to "*ptr" and atomically returns the previous + * value of "*ptr". + * + * xadd() is locked when multiple CPUs are online + * xadd_sync() is always locked + * xadd_local() is never locked + */ +#define __xadd(ptr, inc, lock) __xchg_op((ptr), (inc), xadd, lock) +#define xadd(ptr, inc) __xadd((ptr), (inc), LOCK_PREFIX) +#define xadd_sync(ptr, inc) __xadd((ptr), (inc), "lock; ") +#define xadd_local(ptr, inc) __xadd((ptr), (inc), "") + +#define __add(ptr, inc, lock) \ ({ \ __typeof__ (*(ptr)) __ret = (inc); \ switch (sizeof(*(ptr))) { \ case __X86_CASE_B: \ - asm volatile (lock "xaddb %b0, %1\n" \ - : "+r" (__ret), "+m" (*(ptr)) \ - : : "memory", "cc"); \ + asm volatile (lock "addb %b1, %0\n" \ + : "+m" (*(ptr)) : "ri" (inc) \ + : "memory", "cc"); \ break; \ case __X86_CASE_W: \ - asm volatile (lock "xaddw %w0, %1\n" \ - : "+r" (__ret), "+m" (*(ptr)) \ - : : "memory", "cc"); \ + asm volatile (lock "addw %w1, %0\n" \ + : "+m" (*(ptr)) : "ri" (inc) \ + : "memory", "cc"); \ break; \ case __X86_CASE_L: \ - asm volatile (lock "xaddl %0, %1\n" \ - : "+r" (__ret), "+m" (*(ptr)) \ - : : "memory", "cc"); \ + asm volatile (lock "addl %1, %0\n" \ + : "+m" (*(ptr)) : "ri" (inc) \ + : "memory", "cc"); \ break; \ case __X86_CASE_Q: \ - asm volatile (lock "xaddq %q0, %1\n" \ - : "+r" (__ret), "+m" (*(ptr)) \ - : : "memory", "cc"); \ + asm volatile (lock "addq %1, %0\n" \ + : "+m" (*(ptr)) : "ri" (inc) \ + : "memory", "cc"); \ break; \ default: \ - __xadd_wrong_size(); \ + __add_wrong_size(); \ } \ __ret; \ }) /* - * xadd() adds "inc" to "*ptr" and atomically returns the previous - * value of "*ptr". + * add_*() adds "inc" to "*ptr" * - * xadd() is locked when multiple CPUs are online - * xadd_sync() is always locked - * xadd_local() is never locked + * __add() takes a lock prefix + * add_smp() is locked when multiple CPUs are online + * add_sync() is always locked */ -#define xadd(ptr, inc) __xadd((ptr), (inc), LOCK_PREFIX) -#define xadd_sync(ptr, inc) __xadd((ptr), (inc), "lock; ") -#define xadd_local(ptr, inc) __xadd((ptr), (inc), "") +#define add_smp(ptr, inc) __add((ptr), (inc), LOCK_PREFIX) +#define add_sync(ptr, inc) __add((ptr), (inc), "lock; ") + +#define __cmpxchg_double(pfx, p1, p2, o1, o2, n1, n2) \ +({ \ + bool __ret; \ + __typeof__(*(p1)) __old1 = (o1), __new1 = (n1); \ + __typeof__(*(p2)) __old2 = (o2), __new2 = (n2); \ + BUILD_BUG_ON(sizeof(*(p1)) != sizeof(long)); \ + BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long)); \ + VM_BUG_ON((unsigned long)(p1) % (2 * sizeof(long))); \ + VM_BUG_ON((unsigned long)((p1) + 1) != (unsigned long)(p2)); \ + asm volatile(pfx "cmpxchg%c4b %2; sete %0" \ + : "=a" (__ret), "+d" (__old2), \ + "+m" (*(p1)), "+m" (*(p2)) \ + : "i" (2 * sizeof(long)), "a" (__old1), \ + "b" (__new1), "c" (__new2)); \ + __ret; \ +}) + +#define cmpxchg_double(p1, p2, o1, o2, n1, n2) \ + __cmpxchg_double(LOCK_PREFIX, p1, p2, o1, o2, n1, n2) + +#define cmpxchg_double_local(p1, p2, o1, o2, n1, n2) \ + __cmpxchg_double(, p1, p2, o1, o2, n1, n2) #endif /* ASM_X86_CMPXCHG_H */ diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index fbebb07dd80bbed5ccec9d6fd868dc0df964e74f..53f4b219336be527e017c821eee57b7bcd1bd966 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -166,52 +166,6 @@ static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old, #endif -#define cmpxchg8b(ptr, o1, o2, n1, n2) \ -({ \ - char __ret; \ - __typeof__(o2) __dummy; \ - __typeof__(*(ptr)) __old1 = (o1); \ - __typeof__(o2) __old2 = (o2); \ - __typeof__(*(ptr)) __new1 = (n1); \ - __typeof__(o2) __new2 = (n2); \ - asm volatile(LOCK_PREFIX "cmpxchg8b %2; setz %1" \ - : "=d"(__dummy), "=a" (__ret), "+m" (*ptr)\ - : "a" (__old1), "d"(__old2), \ - "b" (__new1), "c" (__new2) \ - : "memory"); \ - __ret; }) - - -#define cmpxchg8b_local(ptr, o1, o2, n1, n2) \ -({ \ - char __ret; \ - __typeof__(o2) __dummy; \ - __typeof__(*(ptr)) __old1 = (o1); \ - __typeof__(o2) __old2 = (o2); \ - __typeof__(*(ptr)) __new1 = (n1); \ - __typeof__(o2) __new2 = (n2); \ - asm volatile("cmpxchg8b %2; setz %1" \ - : "=d"(__dummy), "=a"(__ret), "+m" (*ptr)\ - : "a" (__old), "d"(__old2), \ - "b" (__new1), "c" (__new2), \ - : "memory"); \ - __ret; }) - - -#define cmpxchg_double(ptr, o1, o2, n1, n2) \ -({ \ - BUILD_BUG_ON(sizeof(*(ptr)) != 4); \ - VM_BUG_ON((unsigned long)(ptr) % 8); \ - cmpxchg8b((ptr), (o1), (o2), (n1), (n2)); \ -}) - -#define cmpxchg_double_local(ptr, o1, o2, n1, n2) \ -({ \ - BUILD_BUG_ON(sizeof(*(ptr)) != 4); \ - VM_BUG_ON((unsigned long)(ptr) % 8); \ - cmpxchg16b_local((ptr), (o1), (o2), (n1), (n2)); \ -}) - #define system_has_cmpxchg_double() cpu_has_cx8 #endif /* _ASM_X86_CMPXCHG_32_H */ diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h index 285da02c38faa843695afc868a16ca848c1498d8..614be87f1a9b94ac5276436931fc844df2610c54 100644 --- a/arch/x86/include/asm/cmpxchg_64.h +++ b/arch/x86/include/asm/cmpxchg_64.h @@ -20,49 +20,6 @@ static inline void set_64bit(volatile u64 *ptr, u64 val) cmpxchg_local((ptr), (o), (n)); \ }) -#define cmpxchg16b(ptr, o1, o2, n1, n2) \ -({ \ - char __ret; \ - __typeof__(o2) __junk; \ - __typeof__(*(ptr)) __old1 = (o1); \ - __typeof__(o2) __old2 = (o2); \ - __typeof__(*(ptr)) __new1 = (n1); \ - __typeof__(o2) __new2 = (n2); \ - asm volatile(LOCK_PREFIX "cmpxchg16b %2;setz %1" \ - : "=d"(__junk), "=a"(__ret), "+m" (*ptr) \ - : "b"(__new1), "c"(__new2), \ - "a"(__old1), "d"(__old2)); \ - __ret; }) - - -#define cmpxchg16b_local(ptr, o1, o2, n1, n2) \ -({ \ - char __ret; \ - __typeof__(o2) __junk; \ - __typeof__(*(ptr)) __old1 = (o1); \ - __typeof__(o2) __old2 = (o2); \ - __typeof__(*(ptr)) __new1 = (n1); \ - __typeof__(o2) __new2 = (n2); \ - asm volatile("cmpxchg16b %2;setz %1" \ - : "=d"(__junk), "=a"(__ret), "+m" (*ptr) \ - : "b"(__new1), "c"(__new2), \ - "a"(__old1), "d"(__old2)); \ - __ret; }) - -#define cmpxchg_double(ptr, o1, o2, n1, n2) \ -({ \ - BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ - VM_BUG_ON((unsigned long)(ptr) % 16); \ - cmpxchg16b((ptr), (o1), (o2), (n1), (n2)); \ -}) - -#define cmpxchg_double_local(ptr, o1, o2, n1, n2) \ -({ \ - BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ - VM_BUG_ON((unsigned long)(ptr) % 16); \ - cmpxchg16b_local((ptr), (o1), (o2), (n1), (n2)); \ -}) - #define system_has_cmpxchg_double() cpu_has_cx16 #endif /* _ASM_X86_CMPXCHG_64_H */ diff --git a/arch/x86/include/asm/div64.h b/arch/x86/include/asm/div64.h index 9a2d644c08efc0981dbc13b1700e81917fef7c63..ced283ac79dfff3ca6c580da509136845ed1ce5b 100644 --- a/arch/x86/include/asm/div64.h +++ b/arch/x86/include/asm/div64.h @@ -4,6 +4,7 @@ #ifdef CONFIG_X86_32 #include +#include /* * do_div() is NOT a C function. It wants to return @@ -21,15 +22,20 @@ ({ \ unsigned long __upper, __low, __high, __mod, __base; \ __base = (base); \ - asm("":"=a" (__low), "=d" (__high) : "A" (n)); \ - __upper = __high; \ - if (__high) { \ - __upper = __high % (__base); \ - __high = __high / (__base); \ + if (__builtin_constant_p(__base) && is_power_of_2(__base)) { \ + __mod = n & (__base - 1); \ + n >>= ilog2(__base); \ + } else { \ + asm("" : "=a" (__low), "=d" (__high) : "A" (n));\ + __upper = __high; \ + if (__high) { \ + __upper = __high % (__base); \ + __high = __high / (__base); \ + } \ + asm("divl %2" : "=a" (__low), "=d" (__mod) \ + : "rm" (__base), "0" (__low), "1" (__upper)); \ + asm("" : "=A" (n) : "a" (__low), "d" (__high)); \ } \ - asm("divl %2":"=a" (__low), "=d" (__mod) \ - : "rm" (__base), "0" (__low), "1" (__upper)); \ - asm("":"=A" (n) : "a" (__low), "d" (__high)); \ __mod; \ }) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 3470c9d0ebba4b67a03eb293ed689c905e097598..529bf07e8067fa21b40f823d58064427def4c814 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -451,23 +451,20 @@ do { \ #endif /* !CONFIG_M386 */ #ifdef CONFIG_X86_CMPXCHG64 -#define percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) \ +#define percpu_cmpxchg8b_double(pcp1, pcp2, o1, o2, n1, n2) \ ({ \ - char __ret; \ - typeof(o1) __o1 = o1; \ - typeof(o1) __n1 = n1; \ - typeof(o2) __o2 = o2; \ - typeof(o2) __n2 = n2; \ - typeof(o2) __dummy = n2; \ + bool __ret; \ + typeof(pcp1) __o1 = (o1), __n1 = (n1); \ + typeof(pcp2) __o2 = (o2), __n2 = (n2); \ asm volatile("cmpxchg8b "__percpu_arg(1)"\n\tsetz %0\n\t" \ - : "=a"(__ret), "=m" (pcp1), "=d"(__dummy) \ - : "b"(__n1), "c"(__n2), "a"(__o1), "d"(__o2)); \ + : "=a" (__ret), "+m" (pcp1), "+m" (pcp2), "+d" (__o2) \ + : "b" (__n1), "c" (__n2), "a" (__o1)); \ __ret; \ }) -#define __this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) -#define this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) -#define irqsafe_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) +#define __this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double +#define this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double +#define irqsafe_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double #endif /* CONFIG_X86_CMPXCHG64 */ /* @@ -508,31 +505,23 @@ do { \ * it in software. The address used in the cmpxchg16 instruction must be * aligned to a 16 byte boundary. */ -#ifdef CONFIG_SMP -#define CMPXCHG16B_EMU_CALL "call this_cpu_cmpxchg16b_emu\n\t" ASM_NOP3 -#else -#define CMPXCHG16B_EMU_CALL "call this_cpu_cmpxchg16b_emu\n\t" ASM_NOP2 -#endif -#define percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) \ +#define percpu_cmpxchg16b_double(pcp1, pcp2, o1, o2, n1, n2) \ ({ \ - char __ret; \ - typeof(o1) __o1 = o1; \ - typeof(o1) __n1 = n1; \ - typeof(o2) __o2 = o2; \ - typeof(o2) __n2 = n2; \ - typeof(o2) __dummy; \ - alternative_io(CMPXCHG16B_EMU_CALL, \ - "cmpxchg16b " __percpu_prefix "(%%rsi)\n\tsetz %0\n\t", \ + bool __ret; \ + typeof(pcp1) __o1 = (o1), __n1 = (n1); \ + typeof(pcp2) __o2 = (o2), __n2 = (n2); \ + alternative_io("leaq %P1,%%rsi\n\tcall this_cpu_cmpxchg16b_emu\n\t", \ + "cmpxchg16b " __percpu_arg(1) "\n\tsetz %0\n\t", \ X86_FEATURE_CX16, \ - ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)), \ - "S" (&pcp1), "b"(__n1), "c"(__n2), \ - "a"(__o1), "d"(__o2) : "memory"); \ + ASM_OUTPUT2("=a" (__ret), "+m" (pcp1), \ + "+m" (pcp2), "+d" (__o2)), \ + "b" (__n1), "c" (__n2), "a" (__o1) : "rsi"); \ __ret; \ }) -#define __this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) -#define this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) -#define irqsafe_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) +#define __this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double +#define this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double +#define irqsafe_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double #endif diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h index 2dddb317bb39c7e9693d56f52a2264e81e6837fb..f8ab3eaad128dd32b485563855612ebede463711 100644 --- a/arch/x86/include/asm/processor-flags.h +++ b/arch/x86/include/asm/processor-flags.h @@ -6,6 +6,7 @@ * EFLAGS bits */ #define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ +#define X86_EFLAGS_BIT1 0x00000002 /* Bit 1 - always on */ #define X86_EFLAGS_PF 0x00000004 /* Parity Flag */ #define X86_EFLAGS_AF 0x00000010 /* Auxiliary carry Flag */ #define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index 972c260919a394881523ff2001f9ab19da2cb9b6..a82c2bf504b60be3fac409131d56253db124a906 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -79,23 +79,10 @@ static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock) return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail; } -#if (NR_CPUS < 256) static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) { - asm volatile(UNLOCK_LOCK_PREFIX "incb %0" - : "+m" (lock->head_tail) - : - : "memory", "cc"); + __add(&lock->tickets.head, 1, UNLOCK_LOCK_PREFIX); } -#else -static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) -{ - asm volatile(UNLOCK_LOCK_PREFIX "incw %0" - : "+m" (lock->head_tail) - : - : "memory", "cc"); -} -#endif static inline int __ticket_spin_is_locked(arch_spinlock_t *lock) { diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index a1fe5c127b52f353ba55c3f12eb14ad71e0010fb..185b719ec61a29bcdaeed612e238d998adb66fdd 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -40,7 +40,8 @@ struct thread_info { */ __u8 supervisor_stack[0]; #endif - int uaccess_err; + int sig_on_uaccess_error:1; + int uaccess_err:1; /* uaccess failed */ }; #define INIT_THREAD_INFO(tsk) \ @@ -231,6 +232,12 @@ static inline struct thread_info *current_thread_info(void) movq PER_CPU_VAR(kernel_stack),reg ; \ subq $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg +/* + * Same if PER_CPU_VAR(kernel_stack) is, perhaps with some offset, already in + * a certain register (to be used in assembler memory operands). + */ +#define THREAD_INFO(reg, off) KERNEL_STACK_OFFSET+(off)-THREAD_SIZE(reg) + #endif #endif /* !X86_32 */ diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index c00692476e9fc6f824b77b0c420aadd18c1fae7e..800f77c600510e431947e2e92f32d950d3d6c6b7 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -130,10 +130,8 @@ extern void setup_node_to_cpumask_map(void); .balance_interval = 1, \ } -#ifdef CONFIG_X86_64 extern int __node_distance(int, int); #define node_distance(a, b) __node_distance(a, b) -#endif #else /* !CONFIG_NUMA */ diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 36361bf6fdd1ed2e977d87a6c0332410a742c658..8be5f54d93606a374943cba5080dde65ff5e9e1d 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -462,7 +462,7 @@ struct __large_struct { unsigned long buf[100]; }; barrier(); #define uaccess_catch(err) \ - (err) |= current_thread_info()->uaccess_err; \ + (err) |= (current_thread_info()->uaccess_err ? -EFAULT : 0); \ current_thread_info()->uaccess_err = prev_err; \ } while (0) diff --git a/arch/x86/kernel/cpu/powerflags.c b/arch/x86/kernel/cpu/powerflags.c index 5abbea297e0c9a84375c4fbf0189cb69ccc1e77b..7b3fe56b1c214fe4ef1a9071fe28fb62aa406a58 100644 --- a/arch/x86/kernel/cpu/powerflags.c +++ b/arch/x86/kernel/cpu/powerflags.c @@ -16,5 +16,6 @@ const char *const x86_power_flags[32] = { "100mhzsteps", "hwpstate", "", /* tsc invariant mapped to constant_tsc */ - /* nothing */ + "cpb", /* core performance boost */ + "eff_freq_ro", /* Readonly aperf/mperf */ }; diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index f3f6f5344001ee47b17eb0ff029179751dfd5171..22d0e21b4dd793fa509e52541bf9b57e7778d1d7 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -625,6 +625,8 @@ work_notifysig: # deal with pending signals and movl %esp, %eax jne work_notifysig_v86 # returning to kernel-space or # vm86-space + TRACE_IRQS_ON + ENABLE_INTERRUPTS(CLBR_NONE) xorl %edx, %edx call do_notify_resume jmp resume_userspace_sig @@ -638,6 +640,8 @@ work_notifysig_v86: #else movl %esp, %eax #endif + TRACE_IRQS_ON + ENABLE_INTERRUPTS(CLBR_NONE) xorl %edx, %edx call do_notify_resume jmp resume_userspace_sig diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index faf8d5e74b0be311198f74dabd88293587d06025..a20e1cb9dc87fea597cbd69cda747d314f4ad68e 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -221,7 +221,7 @@ ENDPROC(native_usergs_sysret64) /*CFI_REL_OFFSET ss,0*/ pushq_cfi %rax /* rsp */ CFI_REL_OFFSET rsp,0 - pushq_cfi $X86_EFLAGS_IF /* eflags - interrupts on */ + pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_BIT1) /* eflags - interrupts on */ /*CFI_REL_OFFSET rflags,0*/ pushq_cfi $__KERNEL_CS /* cs */ /*CFI_REL_OFFSET cs,0*/ @@ -411,7 +411,7 @@ ENTRY(ret_from_fork) RESTORE_REST testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread? - je int_ret_from_sys_call + jz retint_restore_args testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET jnz int_ret_from_sys_call @@ -465,7 +465,7 @@ ENTRY(system_call) * after the swapgs, so that it can do the swapgs * for the guest and jump here on syscall. */ -ENTRY(system_call_after_swapgs) +GLOBAL(system_call_after_swapgs) movq %rsp,PER_CPU_VAR(old_rsp) movq PER_CPU_VAR(kernel_stack),%rsp @@ -478,8 +478,7 @@ ENTRY(system_call_after_swapgs) movq %rax,ORIG_RAX-ARGOFFSET(%rsp) movq %rcx,RIP-ARGOFFSET(%rsp) CFI_REL_OFFSET rip,RIP-ARGOFFSET - GET_THREAD_INFO(%rcx) - testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx) + testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) jnz tracesys system_call_fastpath: cmpq $__NR_syscall_max,%rax @@ -496,10 +495,9 @@ ret_from_sys_call: /* edi: flagmask */ sysret_check: LOCKDEP_SYS_EXIT - GET_THREAD_INFO(%rcx) DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF - movl TI_flags(%rcx),%edx + movl TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET),%edx andl %edi,%edx jnz sysret_careful CFI_REMEMBER_STATE @@ -583,7 +581,7 @@ sysret_audit: /* Do syscall tracing */ tracesys: #ifdef CONFIG_AUDITSYSCALL - testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx) + testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) jz auditsys #endif SAVE_REST @@ -612,8 +610,6 @@ tracesys: GLOBAL(int_ret_from_sys_call) DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF - testl $3,CS-ARGOFFSET(%rsp) - je retint_restore_args movl $_TIF_ALLWORK_MASK,%edi /* edi: mask to check */ GLOBAL(int_with_check) @@ -953,6 +949,7 @@ END(common_interrupt) ENTRY(\sym) INTR_FRAME pushq_cfi $~(\num) +.Lcommon_\sym: interrupt \do_sym jmp ret_from_intr CFI_ENDPROC @@ -976,13 +973,21 @@ apicinterrupt X86_PLATFORM_IPI_VECTOR \ x86_platform_ipi smp_x86_platform_ipi #ifdef CONFIG_SMP -.irp idx,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \ + ALIGN + INTR_FRAME +.irp idx,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \ 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 .if NUM_INVALIDATE_TLB_VECTORS > \idx -apicinterrupt (INVALIDATE_TLB_VECTOR_START)+\idx \ - invalidate_interrupt\idx smp_invalidate_interrupt +ENTRY(invalidate_interrupt\idx) + pushq_cfi $~(INVALIDATE_TLB_VECTOR_START+\idx) + jmp .Lcommon_invalidate_interrupt0 + CFI_ADJUST_CFA_OFFSET -8 +END(invalidate_interrupt\idx) .endif .endr + CFI_ENDPROC +apicinterrupt INVALIDATE_TLB_VECTOR_START, \ + invalidate_interrupt0, smp_invalidate_interrupt #endif apicinterrupt THRESHOLD_APIC_VECTOR \ diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ee5d4fbd53b4bac72d19e3aa3000e77467f0c25e..15763af7bfe3b6202af28810c3aafbeaf14d2a0c 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -293,7 +293,7 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) regs.orig_ax = -1; regs.ip = (unsigned long) kernel_thread_helper; regs.cs = __KERNEL_CS | get_kernel_rpl(); - regs.flags = X86_EFLAGS_IF | 0x2; + regs.flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1; /* Ok, create the new process.. */ return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index a8e3eb83466cd191afa95dc1ba6839f317955b68..fa1191fb679d5bcf3dbc44a259aee1fd143a4db5 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -306,15 +306,10 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) == NOTIFY_STOP) return; #endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ -#ifdef CONFIG_KPROBES + if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) return; -#else - if (notify_die(DIE_TRAP, "int3", regs, error_code, 3, SIGTRAP) - == NOTIFY_STOP) - return; -#endif preempt_conditional_sti(regs); do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index e4d4a22e8b9430661d28aaa9650fc2f79af6c44b..b07ba9393564ddce2413cbc07e8fec27f47fa9c9 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -57,7 +57,7 @@ DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) = .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock), }; -static enum { EMULATE, NATIVE, NONE } vsyscall_mode = NATIVE; +static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE; static int __init vsyscall_setup(char *str) { @@ -140,11 +140,40 @@ static int addr_to_vsyscall_nr(unsigned long addr) return nr; } +static bool write_ok_or_segv(unsigned long ptr, size_t size) +{ + /* + * XXX: if access_ok, get_user, and put_user handled + * sig_on_uaccess_error, this could go away. + */ + + if (!access_ok(VERIFY_WRITE, (void __user *)ptr, size)) { + siginfo_t info; + struct thread_struct *thread = ¤t->thread; + + thread->error_code = 6; /* user fault, no page, write */ + thread->cr2 = ptr; + thread->trap_no = 14; + + memset(&info, 0, sizeof(info)); + info.si_signo = SIGSEGV; + info.si_errno = 0; + info.si_code = SEGV_MAPERR; + info.si_addr = (void __user *)ptr; + + force_sig_info(SIGSEGV, &info, current); + return false; + } else { + return true; + } +} + bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) { struct task_struct *tsk; unsigned long caller; int vsyscall_nr; + int prev_sig_on_uaccess_error; long ret; /* @@ -180,35 +209,65 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) if (seccomp_mode(&tsk->seccomp)) do_exit(SIGKILL); + /* + * With a real vsyscall, page faults cause SIGSEGV. We want to + * preserve that behavior to make writing exploits harder. + */ + prev_sig_on_uaccess_error = current_thread_info()->sig_on_uaccess_error; + current_thread_info()->sig_on_uaccess_error = 1; + + /* + * 0 is a valid user pointer (in the access_ok sense) on 32-bit and + * 64-bit, so we don't need to special-case it here. For all the + * vsyscalls, 0 means "don't write anything" not "write it at + * address 0". + */ + ret = -EFAULT; switch (vsyscall_nr) { case 0: + if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) || + !write_ok_or_segv(regs->si, sizeof(struct timezone))) + break; + ret = sys_gettimeofday( (struct timeval __user *)regs->di, (struct timezone __user *)regs->si); break; case 1: + if (!write_ok_or_segv(regs->di, sizeof(time_t))) + break; + ret = sys_time((time_t __user *)regs->di); break; case 2: + if (!write_ok_or_segv(regs->di, sizeof(unsigned)) || + !write_ok_or_segv(regs->si, sizeof(unsigned))) + break; + ret = sys_getcpu((unsigned __user *)regs->di, (unsigned __user *)regs->si, 0); break; } + current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error; + if (ret == -EFAULT) { - /* - * Bad news -- userspace fed a bad pointer to a vsyscall. - * - * With a real vsyscall, that would have caused SIGSEGV. - * To make writing reliable exploits using the emulated - * vsyscalls harder, generate SIGSEGV here as well. - */ + /* Bad news -- userspace fed a bad pointer to a vsyscall. */ warn_bad_vsyscall(KERN_INFO, regs, "vsyscall fault (exploit attempt?)"); - goto sigsegv; + + /* + * If we failed to generate a signal for any reason, + * generate one here. (This should be impossible.) + */ + if (WARN_ON_ONCE(!sigismember(&tsk->pending.signal, SIGBUS) && + !sigismember(&tsk->pending.signal, SIGSEGV))) + goto sigsegv; + + return true; /* Don't emulate the ret. */ } regs->ax = ret; diff --git a/arch/x86/lib/string_32.c b/arch/x86/lib/string_32.c index 82004d2bf05e160bfa6faf15745f38eb4ad89f46..bd59090825dbabb96821c30c498d18cf2220857a 100644 --- a/arch/x86/lib/string_32.c +++ b/arch/x86/lib/string_32.c @@ -164,15 +164,13 @@ EXPORT_SYMBOL(strchr); size_t strlen(const char *s) { int d0; - int res; + size_t res; asm volatile("repne\n\t" - "scasb\n\t" - "notl %0\n\t" - "decl %0" + "scasb" : "=c" (res), "=&D" (d0) : "1" (s), "a" (0), "0" (0xffffffffu) : "memory"); - return res; + return ~res - 1; } EXPORT_SYMBOL(strlen); #endif diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index d0474ad2a6e528eb87ef0904edf9311fc035acf3..1fb85dbe390ab7a1810c88ff5e60ee4f380bb93a 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -25,7 +25,7 @@ int fixup_exception(struct pt_regs *regs) if (fixup) { /* If fixup is less than 16, it means uaccess error */ if (fixup->fixup < 16) { - current_thread_info()->uaccess_err = -EFAULT; + current_thread_info()->uaccess_err = 1; regs->ip += fixup->fixup; return 1; } diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 5db0490deb070d12368aeee042b8edc35ed456d6..9d74824a708dcdd1cd3b0e6ce4798e79fbbf1c01 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -626,7 +626,7 @@ pgtable_bad(struct pt_regs *regs, unsigned long error_code, static noinline void no_context(struct pt_regs *regs, unsigned long error_code, - unsigned long address) + unsigned long address, int signal, int si_code) { struct task_struct *tsk = current; unsigned long *stackend; @@ -634,8 +634,17 @@ no_context(struct pt_regs *regs, unsigned long error_code, int sig; /* Are we prepared to handle this kernel fault? */ - if (fixup_exception(regs)) + if (fixup_exception(regs)) { + if (current_thread_info()->sig_on_uaccess_error && signal) { + tsk->thread.trap_no = 14; + tsk->thread.error_code = error_code | PF_USER; + tsk->thread.cr2 = address; + + /* XXX: hwpoison faults will set the wrong code. */ + force_sig_info_fault(signal, si_code, address, tsk, 0); + } return; + } /* * 32-bit: @@ -755,7 +764,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, if (is_f00f_bug(regs, address)) return; - no_context(regs, error_code, address); + no_context(regs, error_code, address, SIGSEGV, si_code); } static noinline void @@ -819,7 +828,7 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, /* Kernel mode? Handle exceptions or die: */ if (!(error_code & PF_USER)) { - no_context(regs, error_code, address); + no_context(regs, error_code, address, SIGBUS, BUS_ADRERR); return; } @@ -854,7 +863,7 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, if (!(fault & VM_FAULT_RETRY)) up_read(¤t->mm->mmap_sem); if (!(error_code & PF_USER)) - no_context(regs, error_code, address); + no_context(regs, error_code, address, 0, 0); return 1; } if (!(fault & VM_FAULT_ERROR)) @@ -864,7 +873,8 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, /* Kernel mode? Handle exceptions or die: */ if (!(error_code & PF_USER)) { up_read(¤t->mm->mmap_sem); - no_context(regs, error_code, address); + no_context(regs, error_code, address, + SIGSEGV, SEGV_MAPERR); return 1; } diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 4a01967f02e76c5d0156e35b94ef303162d661e6..4cf9bd0a16537d8acea91674e4022ed400c3c835 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -238,7 +238,8 @@ static efi_status_t __init phys_efi_get_time(efi_time_t *tm, spin_lock_irqsave(&rtc_lock, flags); efi_call_phys_prelog(); - status = efi_call_phys2(efi_phys.get_time, tm, tc); + status = efi_call_phys2(efi_phys.get_time, virt_to_phys(tm), + virt_to_phys(tc)); efi_call_phys_epilog(); spin_unlock_irqrestore(&rtc_lock, flags); return status; diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index 65af42f2d593cbc352b5d65318ed90ff0cc6ac92..39809035320a89c468809a5efe5ff824cfb93a27 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c @@ -697,7 +697,7 @@ void lguest_arch_setup_regs(struct lg_cpu *cpu, unsigned long start) * interrupts are enabled. We always leave interrupts enabled while * running the Guest. */ - regs->eflags = X86_EFLAGS_IF | 0x2; + regs->eflags = X86_EFLAGS_IF | X86_EFLAGS_BIT1; /* * The "Extended Instruction Pointer" register says where the Guest is diff --git a/mm/slub.c b/mm/slub.c index ed3334d9b6da77f64796f1933f7672284b159fc2..09ccee8fb58efc5ca25dca0183c16b4a99e3de2a 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -368,7 +368,7 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page VM_BUG_ON(!irqs_disabled()); #ifdef CONFIG_CMPXCHG_DOUBLE if (s->flags & __CMPXCHG_DOUBLE) { - if (cmpxchg_double(&page->freelist, + if (cmpxchg_double(&page->freelist, &page->counters, freelist_old, counters_old, freelist_new, counters_new)) return 1; @@ -402,7 +402,7 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page, { #ifdef CONFIG_CMPXCHG_DOUBLE if (s->flags & __CMPXCHG_DOUBLE) { - if (cmpxchg_double(&page->freelist, + if (cmpxchg_double(&page->freelist, &page->counters, freelist_old, counters_old, freelist_new, counters_new)) return 1;