diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index 004c1bc95d2b9082de58e0626134a779705620b6..e4448e16046dd32ab69ca17ef65a6b21745a3e0b 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -215,7 +215,9 @@ static inline void vivt_flush_cache_mm(struct mm_struct *mm) static inline void vivt_flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) + struct mm_struct *mm = vma->vm_mm; + + if (!mm || cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm))) __cpuc_flush_user_range(start & PAGE_MASK, PAGE_ALIGN(end), vma->vm_flags); } @@ -223,7 +225,9 @@ vivt_flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned static inline void vivt_flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, unsigned long pfn) { - if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) { + struct mm_struct *mm = vma->vm_mm; + + if (!mm || cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm))) { unsigned long addr = user_addr & PAGE_MASK; __cpuc_flush_user_range(addr, addr + PAGE_SIZE, vma->vm_flags); } diff --git a/arch/arm/include/asm/mutex.h b/arch/arm/include/asm/mutex.h index 93226cf23ae0a838e192be1d675ee886be9dd52e..b1479fd04a951f1d82d55f77fd01cb9f0b072f38 100644 --- a/arch/arm/include/asm/mutex.h +++ b/arch/arm/include/asm/mutex.h @@ -7,121 +7,10 @@ */ #ifndef _ASM_MUTEX_H #define _ASM_MUTEX_H - -#if __LINUX_ARM_ARCH__ < 6 -/* On pre-ARMv6 hardware the swp based implementation is the most efficient. */ -# include -#else - /* - * Attempting to lock a mutex on ARMv6+ can be done with a bastardized - * atomic decrement (it is not a reliable atomic decrement but it satisfies - * the defined semantics for our purpose, while being smaller and faster - * than a real atomic decrement or atomic swap. The idea is to attempt - * decrementing the lock value only once. If once decremented it isn't zero, - * or if its store-back fails due to a dispute on the exclusive store, we - * simply bail out immediately through the slow path where the lock will be - * reattempted until it succeeds. + * On pre-ARMv6 hardware this results in a swp-based implementation, + * which is the most efficient. For ARMv6+, we emit a pair of exclusive + * accesses instead. */ -static inline void -__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *)) -{ - int __ex_flag, __res; - - __asm__ ( - - "ldrex %0, [%2] \n\t" - "sub %0, %0, #1 \n\t" - "strex %1, %0, [%2] " - - : "=&r" (__res), "=&r" (__ex_flag) - : "r" (&(count)->counter) - : "cc","memory" ); - - __res |= __ex_flag; - if (unlikely(__res != 0)) - fail_fn(count); -} - -static inline int -__mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *)) -{ - int __ex_flag, __res; - - __asm__ ( - - "ldrex %0, [%2] \n\t" - "sub %0, %0, #1 \n\t" - "strex %1, %0, [%2] " - - : "=&r" (__res), "=&r" (__ex_flag) - : "r" (&(count)->counter) - : "cc","memory" ); - - __res |= __ex_flag; - if (unlikely(__res != 0)) - __res = fail_fn(count); - return __res; -} - -/* - * Same trick is used for the unlock fast path. However the original value, - * rather than the result, is used to test for success in order to have - * better generated assembly. - */ -static inline void -__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) -{ - int __ex_flag, __res, __orig; - - __asm__ ( - - "ldrex %0, [%3] \n\t" - "add %1, %0, #1 \n\t" - "strex %2, %1, [%3] " - - : "=&r" (__orig), "=&r" (__res), "=&r" (__ex_flag) - : "r" (&(count)->counter) - : "cc","memory" ); - - __orig |= __ex_flag; - if (unlikely(__orig != 0)) - fail_fn(count); -} - -/* - * If the unlock was done on a contended lock, or if the unlock simply fails - * then the mutex remains locked. - */ -#define __mutex_slowpath_needs_to_unlock() 1 - -/* - * For __mutex_fastpath_trylock we use another construct which could be - * described as a "single value cmpxchg". - * - * This provides the needed trylock semantics like cmpxchg would, but it is - * lighter and less generic than a true cmpxchg implementation. - */ -static inline int -__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) -{ - int __ex_flag, __res, __orig; - - __asm__ ( - - "1: ldrex %0, [%3] \n\t" - "subs %1, %0, #1 \n\t" - "strexeq %2, %1, [%3] \n\t" - "movlt %0, #0 \n\t" - "cmpeq %2, #0 \n\t" - "bgt 1b " - - : "=&r" (__orig), "=&r" (__res), "=&r" (__ex_flag) - : "r" (&count->counter) - : "cc", "memory" ); - - return __orig; -} - -#endif +#include #endif diff --git a/arch/arm/include/asm/setup.h b/arch/arm/include/asm/setup.h index 23ebc0c82a3975ae5c455dd39598e93ab33922e7..24d284a1bfc75faa0ed90fb7882a67f00128f336 100644 --- a/arch/arm/include/asm/setup.h +++ b/arch/arm/include/asm/setup.h @@ -196,7 +196,7 @@ static const struct tagtable __tagtable_##fn __tag = { tag, fn } struct membank { phys_addr_t start; - unsigned long size; + phys_addr_t size; unsigned int highmem; }; @@ -217,7 +217,7 @@ extern struct meminfo meminfo; #define bank_phys_end(bank) ((bank)->start + (bank)->size) #define bank_phys_size(bank) (bank)->size -extern int arm_add_memory(phys_addr_t start, unsigned long size); +extern int arm_add_memory(phys_addr_t start, phys_addr_t size); extern void early_print(const char *str, ...); extern void dump_machine_table(void); diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 0d1851ca6eb993a628f623c257487200fe529604..0f82098c9bfe3618115dcc4e8b2d74d95c7ddcf6 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -244,6 +244,19 @@ svc_preempt: b 1b #endif +__und_fault: + @ Correct the PC such that it is pointing at the instruction + @ which caused the fault. If the faulting instruction was ARM + @ the PC will be pointing at the next instruction, and have to + @ subtract 4. Otherwise, it is Thumb, and the PC will be + @ pointing at the second half of the Thumb instruction. We + @ have to subtract 2. + ldr r2, [r0, #S_PC] + sub r2, r2, r1 + str r2, [r0, #S_PC] + b do_undefinstr +ENDPROC(__und_fault) + .align 5 __und_svc: #ifdef CONFIG_KPROBES @@ -261,25 +274,32 @@ __und_svc: @ @ r0 - instruction @ -#ifndef CONFIG_THUMB2_KERNEL +#ifndef CONFIG_THUMB2_KERNEL ldr r0, [r4, #-4] #else + mov r1, #2 ldrh r0, [r4, #-2] @ Thumb instruction at LR - 2 cmp r0, #0xe800 @ 32-bit instruction if xx >= 0 - ldrhhs r9, [r4] @ bottom 16 bits - orrhs r0, r9, r0, lsl #16 + blo __und_svc_fault + ldrh r9, [r4] @ bottom 16 bits + add r4, r4, #2 + str r4, [sp, #S_PC] + orr r0, r9, r0, lsl #16 #endif - adr r9, BSYM(1f) + adr r9, BSYM(__und_svc_finish) mov r2, r4 bl call_fpe + mov r1, #4 @ PC correction to apply +__und_svc_fault: mov r0, sp @ struct pt_regs *regs - bl do_undefinstr + bl __und_fault @ @ IRQs off again before pulling preserved data off the stack @ -1: disable_irq_notrace +__und_svc_finish: + disable_irq_notrace @ @ restore SPSR and restart the instruction @@ -423,25 +443,33 @@ __und_usr: mov r2, r4 mov r3, r5 + @ r2 = regs->ARM_pc, which is either 2 or 4 bytes ahead of the + @ faulting instruction depending on Thumb mode. + @ r3 = regs->ARM_cpsr @ - @ fall through to the emulation code, which returns using r9 if - @ it has emulated the instruction, or the more conventional lr - @ if we are to treat this as a real undefined instruction - @ - @ r0 - instruction + @ The emulation code returns using r9 if it has emulated the + @ instruction, or the more conventional lr if we are to treat + @ this as a real undefined instruction @ adr r9, BSYM(ret_from_exception) - adr lr, BSYM(__und_usr_unknown) + tst r3, #PSR_T_BIT @ Thumb mode? - itet eq @ explicit IT needed for the 1f label - subeq r4, r2, #4 @ ARM instr at LR - 4 - subne r4, r2, #2 @ Thumb instr at LR - 2 -1: ldreqt r0, [r4] + bne __und_usr_thumb + sub r4, r2, #4 @ ARM instr at LR - 4 +1: ldrt r0, [r4] #ifdef CONFIG_CPU_ENDIAN_BE8 - reveq r0, r0 @ little endian instruction + rev r0, r0 @ little endian instruction #endif - beq call_fpe + @ r0 = 32-bit ARM instruction which caused the exception + @ r2 = PC value for the following instruction (:= regs->ARM_pc) + @ r4 = PC value for the faulting instruction + @ lr = 32-bit undefined instruction function + adr lr, BSYM(__und_usr_fault_32) + b call_fpe + +__und_usr_thumb: @ Thumb instruction + sub r4, r2, #2 @ First half of thumb instr at LR - 2 #if CONFIG_ARM_THUMB && __LINUX_ARM_ARCH__ >= 6 && CONFIG_CPU_V7 /* * Thumb-2 instruction handling. Note that because pre-v6 and >= v6 platforms @@ -455,7 +483,7 @@ __und_usr: ldr r5, .LCcpu_architecture ldr r5, [r5] cmp r5, #CPU_ARCH_ARMv7 - blo __und_usr_unknown + blo __und_usr_fault_16 @ 16bit undefined instruction /* * The following code won't get run unless the running CPU really is v7, so * coding round the lack of ldrht on older arches is pointless. Temporarily @@ -463,15 +491,18 @@ __und_usr: */ .arch armv6t2 #endif -2: - ARM( ldrht r5, [r4], #2 ) - THUMB( ldrht r5, [r4] ) - THUMB( add r4, r4, #2 ) +2: ldrht r5, [r4] cmp r5, #0xe800 @ 32bit instruction if xx != 0 - blo __und_usr_unknown -3: ldrht r0, [r4] + blo __und_usr_fault_16 @ 16bit undefined instruction +3: ldrht r0, [r2] add r2, r2, #2 @ r2 is PC + 2, make it PC + 4 + str r2, [sp, #S_PC] @ it's a 2x16bit instr, update orr r0, r0, r5, lsl #16 + adr lr, BSYM(__und_usr_fault_32) + @ r0 = the two 16-bit Thumb instructions which caused the exception + @ r2 = PC value for the following Thumb instruction (:= regs->ARM_pc) + @ r4 = PC value for the first 16-bit Thumb instruction + @ lr = 32bit undefined instruction function #if __LINUX_ARM_ARCH__ < 7 /* If the target arch was overridden, change it back: */ @@ -482,17 +513,13 @@ __und_usr: #endif #endif /* __LINUX_ARM_ARCH__ < 7 */ #else /* !(CONFIG_ARM_THUMB && __LINUX_ARM_ARCH__ >= 6 && CONFIG_CPU_V7) */ - b __und_usr_unknown + b __und_usr_fault_16 #endif - UNWIND(.fnend ) + UNWIND(.fnend) ENDPROC(__und_usr) - @ - @ fallthrough to call_fpe - @ - /* - * The out of line fixup for the ldrt above. + * The out of line fixup for the ldrt instructions above. */ .pushsection .fixup, "ax" .align 2 @@ -524,11 +551,12 @@ ENDPROC(__und_usr) * NEON handler code. * * Emulators may wish to make use of the following registers: - * r0 = instruction opcode. - * r2 = PC+4 + * r0 = instruction opcode (32-bit ARM or two 16-bit Thumb) + * r2 = PC value to resume execution after successful emulation * r9 = normal "successful" return address - * r10 = this threads thread_info structure. + * r10 = this threads thread_info structure * lr = unrecognised instruction return address + * IRQs disabled, FIQs enabled. */ @ @ Fall-through from Thumb-2 __und_usr @@ -659,12 +687,17 @@ ENTRY(no_fp) mov pc, lr ENDPROC(no_fp) -__und_usr_unknown: - enable_irq +__und_usr_fault_32: + mov r1, #4 + b 1f +__und_usr_fault_16: + mov r1, #2 +1: enable_irq mov r0, sp adr lr, BSYM(ret_from_exception) - b do_undefinstr -ENDPROC(__und_usr_unknown) + b __und_fault +ENDPROC(__und_usr_fault_32) +ENDPROC(__und_usr_fault_16) .align 5 __pabt_usr: diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index df0bf0c8cb790a5ee501c55a204a45ddbcb48aab..34e56647dceeee88d99f65d5fd0a6e00fb46a0fd 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -179,19 +179,20 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, old = *parent; *parent = return_hooker; - err = ftrace_push_return_trace(old, self_addr, &trace.depth, - frame_pointer); - if (err == -EBUSY) { - *parent = old; - return; - } - trace.func = self_addr; + trace.depth = current->curr_ret_stack + 1; /* Only trace if the calling function expects to */ if (!ftrace_graph_entry(&trace)) { - current->curr_ret_stack--; *parent = old; + return; + } + + err = ftrace_push_return_trace(old, self_addr, &trace.depth, + frame_pointer); + if (err == -EBUSY) { + *parent = old; + return; } } diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 19c95ea65b2f25c25371734a7a03f147e929978e..693b744fd572f163f053c9fef42d563e246e04d3 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -247,6 +247,7 @@ void machine_shutdown(void) void machine_halt(void) { machine_shutdown(); + local_irq_disable(); while (1); } @@ -268,6 +269,7 @@ void machine_restart(char *cmd) /* Whoops - the platform was unable to reboot. Tell the user! */ printk("Reboot failed -- System halted\n"); + local_irq_disable(); while (1); } diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index e15d83bb4ea378c1316db6a7d705eab30248a98b..a81dcecc734388f7745e399e38504645f4e0e758 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -508,7 +508,7 @@ void __init dump_machine_table(void) /* can't use cpu_relax() here as it may require MMU setup */; } -int __init arm_add_memory(phys_addr_t start, unsigned long size) +int __init arm_add_memory(phys_addr_t start, phys_addr_t size) { struct membank *bank = &meminfo.bank[meminfo.nr_banks]; @@ -538,7 +538,7 @@ int __init arm_add_memory(phys_addr_t start, unsigned long size) } #endif - bank->size = size & PAGE_MASK; + bank->size = size & ~(phys_addr_t)(PAGE_SIZE - 1); /* * Check whether this memory region has non-zero size or @@ -558,7 +558,7 @@ int __init arm_add_memory(phys_addr_t start, unsigned long size) static int __init early_mem(char *p) { static int usermem __initdata = 0; - unsigned long size; + phys_addr_t size; phys_addr_t start; char *endp; diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index aea74f5bc34abdcefbd90570e3c1b7e534b29763..ebd8ad274d76bb82488240e9543d7a1d99b5c674 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -563,7 +563,8 @@ void smp_send_stop(void) cpumask_copy(&mask, cpu_online_mask); cpumask_clear_cpu(smp_processor_id(), &mask); - smp_cross_call(&mask, IPI_CPU_STOP); + if (!cpumask_empty(&mask)) + smp_cross_call(&mask, IPI_CPU_STOP); /* Wait up to one second for other CPUs to stop */ timeout = USEC_PER_SEC; diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 8b97d739b17b1040f4f3e4bb7fe069d84808fa72..7978d4f0f3aef59ee177a0525db44c6819110ed6 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -402,18 +402,10 @@ static int call_undef_hook(struct pt_regs *regs, unsigned int instr) asmlinkage void __exception do_undefinstr(struct pt_regs *regs) { - unsigned int correction = thumb_mode(regs) ? 2 : 4; unsigned int instr; siginfo_t info; void __user *pc; - /* - * According to the ARM ARM, PC is 2 or 4 bytes ahead, - * depending whether we're in Thumb mode or not. - * Correct this offset. - */ - regs->ARM_pc -= correction; - pc = (void __user *)instruction_pointer(regs); if (processor_mode(regs) == SVC_MODE) { diff --git a/arch/arm/mm/tlb-v7.S b/arch/arm/mm/tlb-v7.S index 845f461f8ec16847e69414f5c966a9ca4f961db6..c2021139cb563fd14a74c327d453f3cf0cee183d 100644 --- a/arch/arm/mm/tlb-v7.S +++ b/arch/arm/mm/tlb-v7.S @@ -38,11 +38,19 @@ ENTRY(v7wbi_flush_user_tlb_range) dsb mov r0, r0, lsr #PAGE_SHIFT @ align address mov r1, r1, lsr #PAGE_SHIFT +#ifdef CONFIG_ARM_ERRATA_720789 + mov r3, #0 +#else asid r3, r3 @ mask ASID +#endif orr r0, r3, r0, lsl #PAGE_SHIFT @ Create initial MVA mov r1, r1, lsl #PAGE_SHIFT 1: +#ifdef CONFIG_ARM_ERRATA_720789 + ALT_SMP(mcr p15, 0, r0, c8, c3, 3) @ TLB invalidate U MVA all ASID (shareable) +#else ALT_SMP(mcr p15, 0, r0, c8, c3, 1) @ TLB invalidate U MVA (shareable) +#endif ALT_UP(mcr p15, 0, r0, c8, c7, 1) @ TLB invalidate U MVA add r0, r0, #PAGE_SZ @@ -67,7 +75,11 @@ ENTRY(v7wbi_flush_kern_tlb_range) mov r0, r0, lsl #PAGE_SHIFT mov r1, r1, lsl #PAGE_SHIFT 1: +#ifdef CONFIG_ARM_ERRATA_720789 + ALT_SMP(mcr p15, 0, r0, c8, c3, 3) @ TLB invalidate U MVA all ASID (shareable) +#else ALT_SMP(mcr p15, 0, r0, c8, c3, 1) @ TLB invalidate U MVA (shareable) +#endif ALT_UP(mcr p15, 0, r0, c8, c7, 1) @ TLB invalidate U MVA add r0, r0, #PAGE_SZ cmp r0, r1 diff --git a/arch/arm/vfp/entry.S b/arch/arm/vfp/entry.S index 4fa9903b83cf5dbb54a15623ec45fd00552d8521..cc926c98598141a2fbcdb762148ec17782982d06 100644 --- a/arch/arm/vfp/entry.S +++ b/arch/arm/vfp/entry.S @@ -7,18 +7,20 @@ * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. - * - * Basic entry code, called from the kernel's undefined instruction trap. - * r0 = faulted instruction - * r5 = faulted PC+4 - * r9 = successful return - * r10 = thread_info structure - * lr = failure return */ #include #include #include "../kernel/entry-header.S" +@ VFP entry point. +@ +@ r0 = instruction opcode (32-bit ARM or two 16-bit Thumb) +@ r2 = PC value to resume execution after successful emulation +@ r9 = normal "successful" return address +@ r10 = this threads thread_info structure +@ lr = unrecognised instruction return address +@ IRQs disabled. +@ ENTRY(do_vfp) #ifdef CONFIG_PREEMPT ldr r4, [r10, #TI_PREEMPT] @ get preempt count diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S index d50f0e486cf2b322b6e8eff1baa7c73272e66c70..ea0349f6358658065b52aa1473e877ee4fa8f5ba 100644 --- a/arch/arm/vfp/vfphw.S +++ b/arch/arm/vfp/vfphw.S @@ -62,13 +62,13 @@ @ VFP hardware support entry point. @ -@ r0 = faulted instruction -@ r2 = faulted PC+4 -@ r9 = successful return +@ r0 = instruction opcode (32-bit ARM or two 16-bit Thumb) +@ r2 = PC value to resume execution after successful emulation +@ r9 = normal "successful" return address @ r10 = vfp_state union @ r11 = CPU number -@ lr = failure return - +@ lr = unrecognised instruction return address +@ IRQs enabled. ENTRY(vfp_support_entry) DBGSTR3 "instr %08x pc %08x state %p", r0, r2, r10 @@ -162,9 +162,12 @@ vfp_hw_state_valid: @ exception before retrying branch @ out before setting an FPEXC that @ stops us reading stuff - VFPFMXR FPEXC, r1 @ restore FPEXC last - sub r2, r2, #4 - str r2, [sp, #S_PC] @ retry the instruction + VFPFMXR FPEXC, r1 @ Restore FPEXC last + sub r2, r2, #4 @ Retry current instruction - if Thumb + str r2, [sp, #S_PC] @ mode it's two 16-bit instructions, + @ else it's one 32-bit instruction, so + @ always subtract 4 from the following + @ instruction address. #ifdef CONFIG_PREEMPT get_thread_info r10 ldr r4, [r10, #TI_PREEMPT] @ get preempt count diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index 586961929e964ce63d70987c0c7c36184aff4db8..fb849d044bde9b231d481201c985f84cc37c0a3d 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -457,10 +457,16 @@ static int vfp_pm_suspend(void) /* disable, just in case */ fmxr(FPEXC, fmrx(FPEXC) & ~FPEXC_EN); + } else if (vfp_current_hw_state[ti->cpu]) { +#ifndef CONFIG_SMP + fmxr(FPEXC, fpexc | FPEXC_EN); + vfp_save_state(vfp_current_hw_state[ti->cpu], fpexc); + fmxr(FPEXC, fpexc); +#endif } /* clear any information we had about last context state */ - memset(vfp_current_hw_state, 0, sizeof(vfp_current_hw_state)); + vfp_current_hw_state[ti->cpu] = NULL; return 0; }