diff --git a/arch/sh/include/asm/fpu.h b/arch/sh/include/asm/fpu.h index fb6bbb9b1cc8e704191f5a6fb73ce001d3c9644f..06c4281aab65604af414c5081dde2b9871a3f952 100644 --- a/arch/sh/include/asm/fpu.h +++ b/arch/sh/include/asm/fpu.h @@ -2,8 +2,8 @@ #define __ASM_SH_FPU_H #ifndef __ASSEMBLY__ -#include -#include + +struct task_struct; #ifdef CONFIG_SH_FPU static inline void release_fpu(struct pt_regs *regs) @@ -16,22 +16,23 @@ static inline void grab_fpu(struct pt_regs *regs) regs->sr &= ~SR_FD; } -struct task_struct; - extern void save_fpu(struct task_struct *__tsk); -void fpu_state_restore(struct pt_regs *regs); +extern void restore_fpu(struct task_struct *__tsk); +extern void fpu_state_restore(struct pt_regs *regs); +extern void __fpu_state_restore(void); #else - -#define save_fpu(tsk) do { } while (0) -#define release_fpu(regs) do { } while (0) -#define grab_fpu(regs) do { } while (0) -#define fpu_state_restore(regs) do { } while (0) - +#define save_fpu(tsk) do { } while (0) +#define restore_fpu(tsk) do { } while (0) +#define release_fpu(regs) do { } while (0) +#define grab_fpu(regs) do { } while (0) +#define fpu_state_restore(regs) do { } while (0) +#define __fpu_state_restore(regs) do { } while (0) #endif struct user_regset; extern int do_fpu_inst(unsigned short, struct pt_regs *); +extern int init_fpu(struct task_struct *); extern int fpregs_get(struct task_struct *target, const struct user_regset *regset, @@ -65,18 +66,6 @@ static inline void clear_fpu(struct task_struct *tsk, struct pt_regs *regs) preempt_enable(); } -static inline int init_fpu(struct task_struct *tsk) -{ - if (tsk_used_math(tsk)) { - if ((boot_cpu_data.flags & CPU_HAS_FPU) && tsk == current) - unlazy_fpu(tsk, task_pt_regs(tsk)); - return 0; - } - - set_stopped_child_used_math(tsk); - return 0; -} - #endif /* __ASSEMBLY__ */ #endif /* __ASM_SH_FPU_H */ diff --git a/arch/sh/include/asm/processor_32.h b/arch/sh/include/asm/processor_32.h index 50b8c9c3fa4cfdacca0c1f7f423dfeff70f3fcef..a359898206e84603eca41bdf54ff3cae2c81d0ce 100644 --- a/arch/sh/include/asm/processor_32.h +++ b/arch/sh/include/asm/processor_32.h @@ -90,11 +90,15 @@ struct sh_fpu_soft_struct { unsigned long entry_pc; }; -union sh_fpu_union { - struct sh_fpu_hard_struct hard; - struct sh_fpu_soft_struct soft; +union thread_xstate { + struct sh_fpu_hard_struct hardfpu; + struct sh_fpu_soft_struct softfpu; }; +extern unsigned int xstate_size; +extern void free_thread_xstate(struct task_struct *); +extern struct kmem_cache *task_xstate_cachep; + struct thread_struct { /* Saved registers when thread is descheduled */ unsigned long sp; @@ -103,13 +107,13 @@ struct thread_struct { /* Hardware debugging registers */ unsigned long ubc_pc; - /* floating point info */ - union sh_fpu_union fpu; - #ifdef CONFIG_SH_DSP /* Dsp status information */ struct sh_dsp_struct dsp_status; #endif + + /* Extended processor state */ + union thread_xstate *xstate; }; /* Count of active tasks with UBC settings */ diff --git a/arch/sh/include/asm/thread_info.h b/arch/sh/include/asm/thread_info.h index 2c5b48edeab9cbd1f6ae9cee09fd34ec49193136..55a36fef6875d2cb3bb985143d3ea07781887e76 100644 --- a/arch/sh/include/asm/thread_info.h +++ b/arch/sh/include/asm/thread_info.h @@ -97,6 +97,10 @@ static inline struct thread_info *current_thread_info(void) extern struct thread_info *alloc_thread_info(struct task_struct *tsk); extern void free_thread_info(struct thread_info *ti); +extern void arch_task_cache_init(void); +#define arch_task_cache_init arch_task_cache_init +extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); +extern void init_thread_xstate(void); #define __HAVE_ARCH_THREAD_INFO_ALLOCATOR diff --git a/arch/sh/kernel/cpu/Makefile b/arch/sh/kernel/cpu/Makefile index d97c803719ecb939431c910d084a3c774b366ddd..0e48bc61c272e4eb4ebc1dfe4e1a8b7c755a0cd5 100644 --- a/arch/sh/kernel/cpu/Makefile +++ b/arch/sh/kernel/cpu/Makefile @@ -17,5 +17,7 @@ obj-$(CONFIG_ARCH_SHMOBILE) += shmobile/ obj-$(CONFIG_SH_ADC) += adc.o obj-$(CONFIG_SH_CLK_CPG) += clock-cpg.o +obj-$(CONFIG_SH_FPU) += fpu.o +obj-$(CONFIG_SH_FPU_EMU) += fpu.o obj-y += irq/ init.o clock.o hwblk.o diff --git a/arch/sh/kernel/cpu/fpu.c b/arch/sh/kernel/cpu/fpu.c new file mode 100644 index 0000000000000000000000000000000000000000..c23e6727002a470928741c3782d4f06e4774bc8b --- /dev/null +++ b/arch/sh/kernel/cpu/fpu.c @@ -0,0 +1,82 @@ +#include +#include +#include + +int init_fpu(struct task_struct *tsk) +{ + if (tsk_used_math(tsk)) { + if ((boot_cpu_data.flags & CPU_HAS_FPU) && tsk == current) + unlazy_fpu(tsk, task_pt_regs(tsk)); + return 0; + } + + /* + * Memory allocation at the first usage of the FPU and other state. + */ + if (!tsk->thread.xstate) { + tsk->thread.xstate = kmem_cache_alloc(task_xstate_cachep, + GFP_KERNEL); + if (!tsk->thread.xstate) + return -ENOMEM; + } + + if (boot_cpu_data.flags & CPU_HAS_FPU) { + struct sh_fpu_hard_struct *fp = &tsk->thread.xstate->hardfpu; + memset(fp, 0, xstate_size); + fp->fpscr = FPSCR_INIT; + } else { + struct sh_fpu_soft_struct *fp = &tsk->thread.xstate->softfpu; + memset(fp, 0, xstate_size); + fp->fpscr = FPSCR_INIT; + } + + set_stopped_child_used_math(tsk); + return 0; +} + +#ifdef CONFIG_SH_FPU +void __fpu_state_restore(void) +{ + struct task_struct *tsk = current; + + restore_fpu(tsk); + + task_thread_info(tsk)->status |= TS_USEDFPU; + tsk->fpu_counter++; +} + +void fpu_state_restore(struct pt_regs *regs) +{ + struct task_struct *tsk = current; + + if (unlikely(!user_mode(regs))) { + printk(KERN_ERR "BUG: FPU is used in kernel mode.\n"); + BUG(); + return; + } + + if (!tsk_used_math(tsk)) { + /* + * does a slab alloc which can sleep + */ + if (init_fpu(tsk)) { + /* + * ran out of memory! + */ + do_group_exit(SIGKILL); + return; + } + } + + grab_fpu(regs); + + __fpu_state_restore(); +} + +BUILD_TRAP_HANDLER(fpu_state_restore) +{ + TRAP_HANDLER_DECL; + + fpu_state_restore(regs); +} +#endif /* CONFIG_SH_FPU */ diff --git a/arch/sh/kernel/cpu/init.c b/arch/sh/kernel/cpu/init.c index 89b4b76c0d763d58f6c77229ddc8963fbc8d5508..2e23422280a790b7b7bae11947b1de9355731d58 100644 --- a/arch/sh/kernel/cpu/init.c +++ b/arch/sh/kernel/cpu/init.c @@ -28,18 +28,30 @@ #include #endif +#ifdef CONFIG_SH_FPU +#define cpu_has_fpu 1 +#else +#define cpu_has_fpu 0 +#endif + +#ifdef CONFIG_SH_DSP +#define cpu_has_dsp 1 +#else +#define cpu_has_dsp 0 +#endif + /* * Generic wrapper for command line arguments to disable on-chip * peripherals (nofpu, nodsp, and so forth). */ -#define onchip_setup(x) \ -static int x##_disabled __initdata = 0; \ - \ -static int __init x##_setup(char *opts) \ -{ \ - x##_disabled = 1; \ - return 1; \ -} \ +#define onchip_setup(x) \ +static int x##_disabled __initdata = !cpu_has_##x; \ + \ +static int __init x##_setup(char *opts) \ +{ \ + x##_disabled = 1; \ + return 1; \ +} \ __setup("no" __stringify(x), x##_setup); onchip_setup(fpu); @@ -207,6 +219,18 @@ static void detect_cache_shape(void) l2_cache_shape = -1; /* No S-cache */ } +static void __init fpu_init(void) +{ + /* Disable the FPU */ + if (fpu_disabled && (current_cpu_data.flags & CPU_HAS_FPU)) { + printk("FPU Disabled\n"); + current_cpu_data.flags &= ~CPU_HAS_FPU; + } + + disable_fpu(); + clear_used_math(); +} + #ifdef CONFIG_SH_DSP static void __init release_dsp(void) { @@ -244,9 +268,17 @@ static void __init dsp_init(void) if (sr & SR_DSP) current_cpu_data.flags |= CPU_HAS_DSP; + /* Disable the DSP */ + if (dsp_disabled && (current_cpu_data.flags & CPU_HAS_DSP)) { + printk("DSP Disabled\n"); + current_cpu_data.flags &= ~CPU_HAS_DSP; + } + /* Now that we've determined the DSP status, clear the DSP bit. */ release_dsp(); } +#else +static inline void __init dsp_init(void) { } #endif /* CONFIG_SH_DSP */ /** @@ -302,18 +334,8 @@ asmlinkage void __init sh_cpu_init(void) detect_cache_shape(); } - /* Disable the FPU */ - if (fpu_disabled) { - printk("FPU Disabled\n"); - current_cpu_data.flags &= ~CPU_HAS_FPU; - } - - /* FPU initialization */ - disable_fpu(); - if ((current_cpu_data.flags & CPU_HAS_FPU)) { - current_thread_info()->status &= ~TS_USEDFPU; - clear_used_math(); - } + fpu_init(); + dsp_init(); /* * Initialize the per-CPU ASID cache very early, since the @@ -321,18 +343,12 @@ asmlinkage void __init sh_cpu_init(void) */ current_cpu_data.asid_cache = NO_CONTEXT; -#ifdef CONFIG_SH_DSP - /* Probe for DSP */ - dsp_init(); - - /* Disable the DSP */ - if (dsp_disabled) { - printk("DSP Disabled\n"); - current_cpu_data.flags &= ~CPU_HAS_DSP; - release_dsp(); - } -#endif - speculative_execution_init(); expmask_init(); + + /* + * Boot processor to setup the FP and extended state context info. + */ + if (raw_smp_processor_id() == 0) + init_thread_xstate(); } diff --git a/arch/sh/kernel/cpu/sh2a/fpu.c b/arch/sh/kernel/cpu/sh2a/fpu.c index d395ce5740e7dff1cd66c3f786e592a0ba9c97b2..488d24e0cdf0b1aa59988a17afc7a9edd9a7e898 100644 --- a/arch/sh/kernel/cpu/sh2a/fpu.c +++ b/arch/sh/kernel/cpu/sh2a/fpu.c @@ -26,8 +26,7 @@ /* * Save FPU registers onto task structure. */ -void -save_fpu(struct task_struct *tsk) +void save_fpu(struct task_struct *tsk) { unsigned long dummy; @@ -52,7 +51,7 @@ save_fpu(struct task_struct *tsk) "fmov.s fr0, @-%0\n\t" "lds %3, fpscr\n\t" : "=r" (dummy) - : "0" ((char *)(&tsk->thread.fpu.hard.status)), + : "0" ((char *)(&tsk->thread.xstate->hardfpu.status)), "r" (FPSCR_RCHG), "r" (FPSCR_INIT) : "memory"); @@ -60,8 +59,7 @@ save_fpu(struct task_struct *tsk) disable_fpu(); } -static void -restore_fpu(struct task_struct *tsk) +void restore_fpu(struct task_struct *tsk) { unsigned long dummy; @@ -85,44 +83,11 @@ restore_fpu(struct task_struct *tsk) "lds.l @%0+, fpscr\n\t" "lds.l @%0+, fpul\n\t" : "=r" (dummy) - : "0" (&tsk->thread.fpu), "r" (FPSCR_RCHG) + : "0" (tsk->thread.xstate), "r" (FPSCR_RCHG) : "memory"); disable_fpu(); } -/* - * Load the FPU with signalling NANS. This bit pattern we're using - * has the property that no matter wether considered as single or as - * double precission represents signaling NANS. - */ - -static void -fpu_init(void) -{ - enable_fpu(); - asm volatile("lds %0, fpul\n\t" - "fsts fpul, fr0\n\t" - "fsts fpul, fr1\n\t" - "fsts fpul, fr2\n\t" - "fsts fpul, fr3\n\t" - "fsts fpul, fr4\n\t" - "fsts fpul, fr5\n\t" - "fsts fpul, fr6\n\t" - "fsts fpul, fr7\n\t" - "fsts fpul, fr8\n\t" - "fsts fpul, fr9\n\t" - "fsts fpul, fr10\n\t" - "fsts fpul, fr11\n\t" - "fsts fpul, fr12\n\t" - "fsts fpul, fr13\n\t" - "fsts fpul, fr14\n\t" - "fsts fpul, fr15\n\t" - "lds %2, fpscr\n\t" - : /* no output */ - : "r" (0), "r" (FPSCR_RCHG), "r" (FPSCR_INIT)); - disable_fpu(); -} - /* * Emulate arithmetic ops on denormalized number for some FPU insns. */ @@ -490,9 +455,9 @@ ieee_fpe_handler (struct pt_regs *regs) if ((finsn & 0xf1ff) == 0xf0ad) { /* fcnvsd */ struct task_struct *tsk = current; - if ((tsk->thread.fpu.hard.fpscr & FPSCR_FPU_ERROR)) { + if ((tsk->thread.xstate->hardfpu.fpscr & FPSCR_FPU_ERROR)) { /* FPU error */ - denormal_to_double (&tsk->thread.fpu.hard, + denormal_to_double (&tsk->thread.xstate->hardfpu, (finsn >> 8) & 0xf); } else return 0; @@ -507,9 +472,9 @@ ieee_fpe_handler (struct pt_regs *regs) n = (finsn >> 8) & 0xf; m = (finsn >> 4) & 0xf; - hx = tsk->thread.fpu.hard.fp_regs[n]; - hy = tsk->thread.fpu.hard.fp_regs[m]; - fpscr = tsk->thread.fpu.hard.fpscr; + hx = tsk->thread.xstate->hardfpu.fp_regs[n]; + hy = tsk->thread.xstate->hardfpu.fp_regs[m]; + fpscr = tsk->thread.xstate->hardfpu.fpscr; prec = fpscr & (1 << 19); if ((fpscr & FPSCR_FPU_ERROR) @@ -519,15 +484,15 @@ ieee_fpe_handler (struct pt_regs *regs) /* FPU error because of denormal */ llx = ((long long) hx << 32) - | tsk->thread.fpu.hard.fp_regs[n+1]; + | tsk->thread.xstate->hardfpu.fp_regs[n+1]; lly = ((long long) hy << 32) - | tsk->thread.fpu.hard.fp_regs[m+1]; + | tsk->thread.xstate->hardfpu.fp_regs[m+1]; if ((hx & 0x7fffffff) >= 0x00100000) llx = denormal_muld(lly, llx); else llx = denormal_muld(llx, lly); - tsk->thread.fpu.hard.fp_regs[n] = llx >> 32; - tsk->thread.fpu.hard.fp_regs[n+1] = llx & 0xffffffff; + tsk->thread.xstate->hardfpu.fp_regs[n] = llx >> 32; + tsk->thread.xstate->hardfpu.fp_regs[n+1] = llx & 0xffffffff; } else if ((fpscr & FPSCR_FPU_ERROR) && (!prec && ((hx & 0x7fffffff) < 0x00800000 || (hy & 0x7fffffff) < 0x00800000))) { @@ -536,7 +501,7 @@ ieee_fpe_handler (struct pt_regs *regs) hx = denormal_mulf(hy, hx); else hx = denormal_mulf(hx, hy); - tsk->thread.fpu.hard.fp_regs[n] = hx; + tsk->thread.xstate->hardfpu.fp_regs[n] = hx; } else return 0; @@ -550,9 +515,9 @@ ieee_fpe_handler (struct pt_regs *regs) n = (finsn >> 8) & 0xf; m = (finsn >> 4) & 0xf; - hx = tsk->thread.fpu.hard.fp_regs[n]; - hy = tsk->thread.fpu.hard.fp_regs[m]; - fpscr = tsk->thread.fpu.hard.fpscr; + hx = tsk->thread.xstate->hardfpu.fp_regs[n]; + hy = tsk->thread.xstate->hardfpu.fp_regs[m]; + fpscr = tsk->thread.xstate->hardfpu.fpscr; prec = fpscr & (1 << 19); if ((fpscr & FPSCR_FPU_ERROR) @@ -562,15 +527,15 @@ ieee_fpe_handler (struct pt_regs *regs) /* FPU error because of denormal */ llx = ((long long) hx << 32) - | tsk->thread.fpu.hard.fp_regs[n+1]; + | tsk->thread.xstate->hardfpu.fp_regs[n+1]; lly = ((long long) hy << 32) - | tsk->thread.fpu.hard.fp_regs[m+1]; + | tsk->thread.xstate->hardfpu.fp_regs[m+1]; if ((finsn & 0xf00f) == 0xf000) llx = denormal_addd(llx, lly); else llx = denormal_addd(llx, lly ^ (1LL << 63)); - tsk->thread.fpu.hard.fp_regs[n] = llx >> 32; - tsk->thread.fpu.hard.fp_regs[n+1] = llx & 0xffffffff; + tsk->thread.xstate->hardfpu.fp_regs[n] = llx >> 32; + tsk->thread.xstate->hardfpu.fp_regs[n+1] = llx & 0xffffffff; } else if ((fpscr & FPSCR_FPU_ERROR) && (!prec && ((hx & 0x7fffffff) < 0x00800000 || (hy & 0x7fffffff) < 0x00800000))) { @@ -579,7 +544,7 @@ ieee_fpe_handler (struct pt_regs *regs) hx = denormal_addf(hx, hy); else hx = denormal_addf(hx, hy ^ 0x80000000); - tsk->thread.fpu.hard.fp_regs[n] = hx; + tsk->thread.xstate->hardfpu.fp_regs[n] = hx; } else return 0; @@ -597,7 +562,7 @@ BUILD_TRAP_HANDLER(fpu_error) __unlazy_fpu(tsk, regs); if (ieee_fpe_handler(regs)) { - tsk->thread.fpu.hard.fpscr &= + tsk->thread.xstate->hardfpu.fpscr &= ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK); grab_fpu(regs); restore_fpu(tsk); @@ -607,33 +572,3 @@ BUILD_TRAP_HANDLER(fpu_error) force_sig(SIGFPE, tsk); } - -void fpu_state_restore(struct pt_regs *regs) -{ - struct task_struct *tsk = current; - - grab_fpu(regs); - if (unlikely(!user_mode(regs))) { - printk(KERN_ERR "BUG: FPU is used in kernel mode.\n"); - BUG(); - return; - } - - if (likely(used_math())) { - /* Using the FPU again. */ - restore_fpu(tsk); - } else { - /* First time FPU user. */ - fpu_init(); - set_used_math(); - } - task_thread_info(tsk)->status |= TS_USEDFPU; - tsk->fpu_counter++; -} - -BUILD_TRAP_HANDLER(fpu_state_restore) -{ - TRAP_HANDLER_DECL; - - fpu_state_restore(regs); -} diff --git a/arch/sh/kernel/cpu/sh4/fpu.c b/arch/sh/kernel/cpu/sh4/fpu.c index e97857aec8a0461c496d5512170a55e7c54816d1..447482d7f65e6bdf573c33dabe67317afbe19cdd 100644 --- a/arch/sh/kernel/cpu/sh4/fpu.c +++ b/arch/sh/kernel/cpu/sh4/fpu.c @@ -85,14 +85,14 @@ void save_fpu(struct task_struct *tsk) "fmov.s fr1, @-%0\n\t" "fmov.s fr0, @-%0\n\t" "lds %3, fpscr\n\t":"=r" (dummy) - :"0"((char *)(&tsk->thread.fpu.hard.status)), + :"0"((char *)(&tsk->thread.xstate->hardfpu.status)), "r"(FPSCR_RCHG), "r"(FPSCR_INIT) :"memory"); disable_fpu(); } -static void restore_fpu(struct task_struct *tsk) +void restore_fpu(struct task_struct *tsk) { unsigned long dummy; @@ -135,62 +135,11 @@ static void restore_fpu(struct task_struct *tsk) "lds.l @%0+, fpscr\n\t" "lds.l @%0+, fpul\n\t" :"=r" (dummy) - :"0"(&tsk->thread.fpu), "r"(FPSCR_RCHG) + :"0" (tsk->thread.xstate), "r" (FPSCR_RCHG) :"memory"); disable_fpu(); } -/* - * Load the FPU with signalling NANS. This bit pattern we're using - * has the property that no matter wether considered as single or as - * double precision represents signaling NANS. - */ - -static void fpu_init(void) -{ - enable_fpu(); - asm volatile ( "lds %0, fpul\n\t" - "lds %1, fpscr\n\t" - "fsts fpul, fr0\n\t" - "fsts fpul, fr1\n\t" - "fsts fpul, fr2\n\t" - "fsts fpul, fr3\n\t" - "fsts fpul, fr4\n\t" - "fsts fpul, fr5\n\t" - "fsts fpul, fr6\n\t" - "fsts fpul, fr7\n\t" - "fsts fpul, fr8\n\t" - "fsts fpul, fr9\n\t" - "fsts fpul, fr10\n\t" - "fsts fpul, fr11\n\t" - "fsts fpul, fr12\n\t" - "fsts fpul, fr13\n\t" - "fsts fpul, fr14\n\t" - "fsts fpul, fr15\n\t" - "frchg\n\t" - "fsts fpul, fr0\n\t" - "fsts fpul, fr1\n\t" - "fsts fpul, fr2\n\t" - "fsts fpul, fr3\n\t" - "fsts fpul, fr4\n\t" - "fsts fpul, fr5\n\t" - "fsts fpul, fr6\n\t" - "fsts fpul, fr7\n\t" - "fsts fpul, fr8\n\t" - "fsts fpul, fr9\n\t" - "fsts fpul, fr10\n\t" - "fsts fpul, fr11\n\t" - "fsts fpul, fr12\n\t" - "fsts fpul, fr13\n\t" - "fsts fpul, fr14\n\t" - "fsts fpul, fr15\n\t" - "frchg\n\t" - "lds %2, fpscr\n\t" - : /* no output */ - :"r" (0), "r"(FPSCR_RCHG), "r"(FPSCR_INIT)); - disable_fpu(); -} - /** * denormal_to_double - Given denormalized float number, * store double float @@ -282,9 +231,9 @@ static int ieee_fpe_handler(struct pt_regs *regs) /* fcnvsd */ struct task_struct *tsk = current; - if ((tsk->thread.fpu.hard.fpscr & FPSCR_CAUSE_ERROR)) + if ((tsk->thread.xstate->hardfpu.fpscr & FPSCR_CAUSE_ERROR)) /* FPU error */ - denormal_to_double(&tsk->thread.fpu.hard, + denormal_to_double(&tsk->thread.xstate->hardfpu, (finsn >> 8) & 0xf); else return 0; @@ -300,9 +249,9 @@ static int ieee_fpe_handler(struct pt_regs *regs) n = (finsn >> 8) & 0xf; m = (finsn >> 4) & 0xf; - hx = tsk->thread.fpu.hard.fp_regs[n]; - hy = tsk->thread.fpu.hard.fp_regs[m]; - fpscr = tsk->thread.fpu.hard.fpscr; + hx = tsk->thread.xstate->hardfpu.fp_regs[n]; + hy = tsk->thread.xstate->hardfpu.fp_regs[m]; + fpscr = tsk->thread.xstate->hardfpu.fpscr; prec = fpscr & FPSCR_DBL_PRECISION; if ((fpscr & FPSCR_CAUSE_ERROR) @@ -312,18 +261,18 @@ static int ieee_fpe_handler(struct pt_regs *regs) /* FPU error because of denormal (doubles) */ llx = ((long long)hx << 32) - | tsk->thread.fpu.hard.fp_regs[n + 1]; + | tsk->thread.xstate->hardfpu.fp_regs[n + 1]; lly = ((long long)hy << 32) - | tsk->thread.fpu.hard.fp_regs[m + 1]; + | tsk->thread.xstate->hardfpu.fp_regs[m + 1]; llx = float64_mul(llx, lly); - tsk->thread.fpu.hard.fp_regs[n] = llx >> 32; - tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff; + tsk->thread.xstate->hardfpu.fp_regs[n] = llx >> 32; + tsk->thread.xstate->hardfpu.fp_regs[n + 1] = llx & 0xffffffff; } else if ((fpscr & FPSCR_CAUSE_ERROR) && (!prec && ((hx & 0x7fffffff) < 0x00800000 || (hy & 0x7fffffff) < 0x00800000))) { /* FPU error because of denormal (floats) */ hx = float32_mul(hx, hy); - tsk->thread.fpu.hard.fp_regs[n] = hx; + tsk->thread.xstate->hardfpu.fp_regs[n] = hx; } else return 0; @@ -338,9 +287,9 @@ static int ieee_fpe_handler(struct pt_regs *regs) n = (finsn >> 8) & 0xf; m = (finsn >> 4) & 0xf; - hx = tsk->thread.fpu.hard.fp_regs[n]; - hy = tsk->thread.fpu.hard.fp_regs[m]; - fpscr = tsk->thread.fpu.hard.fpscr; + hx = tsk->thread.xstate->hardfpu.fp_regs[n]; + hy = tsk->thread.xstate->hardfpu.fp_regs[m]; + fpscr = tsk->thread.xstate->hardfpu.fpscr; prec = fpscr & FPSCR_DBL_PRECISION; if ((fpscr & FPSCR_CAUSE_ERROR) @@ -350,15 +299,15 @@ static int ieee_fpe_handler(struct pt_regs *regs) /* FPU error because of denormal (doubles) */ llx = ((long long)hx << 32) - | tsk->thread.fpu.hard.fp_regs[n + 1]; + | tsk->thread.xstate->hardfpu.fp_regs[n + 1]; lly = ((long long)hy << 32) - | tsk->thread.fpu.hard.fp_regs[m + 1]; + | tsk->thread.xstate->hardfpu.fp_regs[m + 1]; if ((finsn & 0xf00f) == 0xf000) llx = float64_add(llx, lly); else llx = float64_sub(llx, lly); - tsk->thread.fpu.hard.fp_regs[n] = llx >> 32; - tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff; + tsk->thread.xstate->hardfpu.fp_regs[n] = llx >> 32; + tsk->thread.xstate->hardfpu.fp_regs[n + 1] = llx & 0xffffffff; } else if ((fpscr & FPSCR_CAUSE_ERROR) && (!prec && ((hx & 0x7fffffff) < 0x00800000 || (hy & 0x7fffffff) < 0x00800000))) { @@ -367,7 +316,7 @@ static int ieee_fpe_handler(struct pt_regs *regs) hx = float32_add(hx, hy); else hx = float32_sub(hx, hy); - tsk->thread.fpu.hard.fp_regs[n] = hx; + tsk->thread.xstate->hardfpu.fp_regs[n] = hx; } else return 0; @@ -382,9 +331,9 @@ static int ieee_fpe_handler(struct pt_regs *regs) n = (finsn >> 8) & 0xf; m = (finsn >> 4) & 0xf; - hx = tsk->thread.fpu.hard.fp_regs[n]; - hy = tsk->thread.fpu.hard.fp_regs[m]; - fpscr = tsk->thread.fpu.hard.fpscr; + hx = tsk->thread.xstate->hardfpu.fp_regs[n]; + hy = tsk->thread.xstate->hardfpu.fp_regs[m]; + fpscr = tsk->thread.xstate->hardfpu.fpscr; prec = fpscr & FPSCR_DBL_PRECISION; if ((fpscr & FPSCR_CAUSE_ERROR) @@ -394,20 +343,20 @@ static int ieee_fpe_handler(struct pt_regs *regs) /* FPU error because of denormal (doubles) */ llx = ((long long)hx << 32) - | tsk->thread.fpu.hard.fp_regs[n + 1]; + | tsk->thread.xstate->hardfpu.fp_regs[n + 1]; lly = ((long long)hy << 32) - | tsk->thread.fpu.hard.fp_regs[m + 1]; + | tsk->thread.xstate->hardfpu.fp_regs[m + 1]; llx = float64_div(llx, lly); - tsk->thread.fpu.hard.fp_regs[n] = llx >> 32; - tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff; + tsk->thread.xstate->hardfpu.fp_regs[n] = llx >> 32; + tsk->thread.xstate->hardfpu.fp_regs[n + 1] = llx & 0xffffffff; } else if ((fpscr & FPSCR_CAUSE_ERROR) && (!prec && ((hx & 0x7fffffff) < 0x00800000 || (hy & 0x7fffffff) < 0x00800000))) { /* FPU error because of denormal (floats) */ hx = float32_div(hx, hy); - tsk->thread.fpu.hard.fp_regs[n] = hx; + tsk->thread.xstate->hardfpu.fp_regs[n] = hx; } else return 0; @@ -420,17 +369,17 @@ static int ieee_fpe_handler(struct pt_regs *regs) unsigned int hx; m = (finsn >> 8) & 0x7; - hx = tsk->thread.fpu.hard.fp_regs[m]; + hx = tsk->thread.xstate->hardfpu.fp_regs[m]; - if ((tsk->thread.fpu.hard.fpscr & FPSCR_CAUSE_ERROR) + if ((tsk->thread.xstate->hardfpu.fpscr & FPSCR_CAUSE_ERROR) && ((hx & 0x7fffffff) < 0x00100000)) { /* subnormal double to float conversion */ long long llx; - llx = ((long long)tsk->thread.fpu.hard.fp_regs[m] << 32) - | tsk->thread.fpu.hard.fp_regs[m + 1]; + llx = ((long long)tsk->thread.xstate->hardfpu.fp_regs[m] << 32) + | tsk->thread.xstate->hardfpu.fp_regs[m + 1]; - tsk->thread.fpu.hard.fpul = float64_to_float32(llx); + tsk->thread.xstate->hardfpu.fpul = float64_to_float32(llx); } else return 0; @@ -449,7 +398,7 @@ void float_raise(unsigned int flags) int float_rounding_mode(void) { struct task_struct *tsk = current; - int roundingMode = FPSCR_ROUNDING_MODE(tsk->thread.fpu.hard.fpscr); + int roundingMode = FPSCR_ROUNDING_MODE(tsk->thread.xstate->hardfpu.fpscr); return roundingMode; } @@ -461,16 +410,16 @@ BUILD_TRAP_HANDLER(fpu_error) __unlazy_fpu(tsk, regs); fpu_exception_flags = 0; if (ieee_fpe_handler(regs)) { - tsk->thread.fpu.hard.fpscr &= + tsk->thread.xstate->hardfpu.fpscr &= ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK); - tsk->thread.fpu.hard.fpscr |= fpu_exception_flags; + tsk->thread.xstate->hardfpu.fpscr |= fpu_exception_flags; /* Set the FPSCR flag as well as cause bits - simply * replicate the cause */ - tsk->thread.fpu.hard.fpscr |= (fpu_exception_flags >> 10); + tsk->thread.xstate->hardfpu.fpscr |= (fpu_exception_flags >> 10); grab_fpu(regs); restore_fpu(tsk); task_thread_info(tsk)->status |= TS_USEDFPU; - if ((((tsk->thread.fpu.hard.fpscr & FPSCR_ENABLE_MASK) >> 7) & + if ((((tsk->thread.xstate->hardfpu.fpscr & FPSCR_ENABLE_MASK) >> 7) & (fpu_exception_flags >> 2)) == 0) { return; } @@ -478,33 +427,3 @@ BUILD_TRAP_HANDLER(fpu_error) force_sig(SIGFPE, tsk); } - -void fpu_state_restore(struct pt_regs *regs) -{ - struct task_struct *tsk = current; - - grab_fpu(regs); - if (unlikely(!user_mode(regs))) { - printk(KERN_ERR "BUG: FPU is used in kernel mode.\n"); - BUG(); - return; - } - - if (likely(used_math())) { - /* Using the FPU again. */ - restore_fpu(tsk); - } else { - /* First time FPU user. */ - fpu_init(); - set_used_math(); - } - task_thread_info(tsk)->status |= TS_USEDFPU; - tsk->fpu_counter++; -} - -BUILD_TRAP_HANDLER(fpu_state_restore) -{ - TRAP_HANDLER_DECL; - - fpu_state_restore(regs); -} diff --git a/arch/sh/kernel/process.c b/arch/sh/kernel/process.c index 077e06e1a8899a5175118b5dd5b8de9d8660d7cc..81add9b9ea6eccb8380b685b94fda321b7b63218 100644 --- a/arch/sh/kernel/process.c +++ b/arch/sh/kernel/process.c @@ -2,6 +2,32 @@ #include #include +struct kmem_cache *task_xstate_cachep = NULL; +unsigned int xstate_size; + +int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) +{ + *dst = *src; + + if (src->thread.xstate) { + dst->thread.xstate = kmem_cache_alloc(task_xstate_cachep, + GFP_KERNEL); + if (!dst->thread.xstate) + return -ENOMEM; + memcpy(dst->thread.xstate, src->thread.xstate, xstate_size); + } + + return 0; +} + +void free_thread_xstate(struct task_struct *tsk) +{ + if (tsk->thread.xstate) { + kmem_cache_free(task_xstate_cachep, tsk->thread.xstate); + tsk->thread.xstate = NULL; + } +} + #if THREAD_SHIFT < PAGE_SHIFT static struct kmem_cache *thread_info_cache; @@ -20,6 +46,7 @@ struct thread_info *alloc_thread_info(struct task_struct *tsk) void free_thread_info(struct thread_info *ti) { + free_thread_xstate(ti->task); kmem_cache_free(thread_info_cache, ti); } @@ -41,6 +68,33 @@ struct thread_info *alloc_thread_info(struct task_struct *tsk) void free_thread_info(struct thread_info *ti) { + free_thread_xstate(ti->task); free_pages((unsigned long)ti, THREAD_SIZE_ORDER); } #endif /* THREAD_SHIFT < PAGE_SHIFT */ + +void arch_task_cache_init(void) +{ + if (!xstate_size) + return; + + task_xstate_cachep = kmem_cache_create("task_xstate", xstate_size, + __alignof__(union thread_xstate), + SLAB_PANIC | SLAB_NOTRACK, NULL); +} + +#ifdef CONFIG_SH_FPU_EMU +# define HAVE_SOFTFP 1 +#else +# define HAVE_SOFTFP 0 +#endif + +void init_thread_xstate(void) +{ + if (boot_cpu_data.flags & CPU_HAS_FPU) + xstate_size = sizeof(struct sh_fpu_hard_struct); + else if (HAVE_SOFTFP) + xstate_size = sizeof(struct sh_fpu_soft_struct); + else + xstate_size = 0; +} diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c index c4361402ec5eb74fa61ad03ca149280eea0133cd..03de6573aa76bb21f76b5bdf6c1b214745e51d17 100644 --- a/arch/sh/kernel/process_32.c +++ b/arch/sh/kernel/process_32.c @@ -156,6 +156,8 @@ void start_thread(struct pt_regs *regs, unsigned long new_pc, regs->sr = SR_FD; regs->pc = new_pc; regs->regs[15] = new_sp; + + free_thread_xstate(current); } EXPORT_SYMBOL(start_thread); @@ -316,7 +318,7 @@ __switch_to(struct task_struct *prev, struct task_struct *next) /* we're going to use this soon, after a few expensive things */ if (next->fpu_counter > 5) - prefetch(&next_t->fpu.hard); + prefetch(next_t->xstate); #ifdef CONFIG_MMU /* @@ -353,7 +355,7 @@ __switch_to(struct task_struct *prev, struct task_struct *next) * chances of needing FPU soon are obviously high now */ if (next->fpu_counter > 5) - fpu_state_restore(task_pt_regs(next)); + __fpu_state_restore(); return prev; } diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c index 9be35f34809356f40b006c91fea36ba77deea458..be9b5dcb4021a42dadb9437fdf0f4a851d1eaa13 100644 --- a/arch/sh/kernel/ptrace_32.c +++ b/arch/sh/kernel/ptrace_32.c @@ -163,10 +163,10 @@ int fpregs_get(struct task_struct *target, if ((boot_cpu_data.flags & CPU_HAS_FPU)) return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.fpu.hard, 0, -1); + &target->thread.xstate->hardfpu, 0, -1); return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.fpu.soft, 0, -1); + &target->thread.xstate->softfpu, 0, -1); } static int fpregs_set(struct task_struct *target, @@ -184,10 +184,10 @@ static int fpregs_set(struct task_struct *target, if ((boot_cpu_data.flags & CPU_HAS_FPU)) return user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.fpu.hard, 0, -1); + &target->thread.xstate->hardfpu, 0, -1); return user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.fpu.soft, 0, -1); + &target->thread.xstate->softfpu, 0, -1); } static int fpregs_active(struct task_struct *target, @@ -333,7 +333,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) else tmp = 0; } else - tmp = ((long *)&child->thread.fpu) + tmp = ((long *)child->thread.xstate) [(addr - (long)&dummy->fpu) >> 2]; } else if (addr == (long) &dummy->u_fpvalid) tmp = !!tsk_used_math(child); @@ -362,7 +362,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) else if (addr >= (long) &dummy->fpu && addr < (long) &dummy->u_fpvalid) { set_stopped_child_used_math(child); - ((long *)&child->thread.fpu) + ((long *)child->thread.xstate) [(addr - (long)&dummy->fpu) >> 2] = data; ret = 0; } else if (addr == (long) &dummy->u_fpvalid) { diff --git a/arch/sh/kernel/signal_32.c b/arch/sh/kernel/signal_32.c index 12815ce01ecd935022eb2f13779d26eb6e74e59a..6a7cce79eb4ed9561f04f6d64ead03593e6ff510 100644 --- a/arch/sh/kernel/signal_32.c +++ b/arch/sh/kernel/signal_32.c @@ -150,7 +150,7 @@ static inline int restore_sigcontext_fpu(struct sigcontext __user *sc) return 0; set_used_math(); - return __copy_from_user(&tsk->thread.fpu.hard, &sc->sc_fpregs[0], + return __copy_from_user(&tsk->thread.xstate->hardfpu, &sc->sc_fpregs[0], sizeof(long)*(16*2+2)); } @@ -175,7 +175,7 @@ static inline int save_sigcontext_fpu(struct sigcontext __user *sc, clear_used_math(); unlazy_fpu(tsk, regs); - return __copy_to_user(&sc->sc_fpregs[0], &tsk->thread.fpu.hard, + return __copy_to_user(&sc->sc_fpregs[0], &tsk->thread.xstate->hardfpu, sizeof(long)*(16*2+2)); } #endif /* CONFIG_SH_FPU */ diff --git a/arch/sh/math-emu/math.c b/arch/sh/math-emu/math.c index d6c15cae0912b487549efc059a7370fb76cacc73..1fcdb12209754ed20fb92721ca7906a05b7d3bb9 100644 --- a/arch/sh/math-emu/math.c +++ b/arch/sh/math-emu/math.c @@ -471,10 +471,10 @@ static int fpu_emulate(u16 code, struct sh_fpu_soft_struct *fregs, struct pt_reg * denormal_to_double - Given denormalized float number, * store double float * - * @fpu: Pointer to sh_fpu_hard structure + * @fpu: Pointer to sh_fpu_soft structure * @n: Index to FP register */ -static void denormal_to_double(struct sh_fpu_hard_struct *fpu, int n) +static void denormal_to_double(struct sh_fpu_soft_struct *fpu, int n) { unsigned long du, dl; unsigned long x = fpu->fpul; @@ -552,11 +552,11 @@ static int ieee_fpe_handler(struct pt_regs *regs) if ((finsn & 0xf1ff) == 0xf0ad) { /* fcnvsd */ struct task_struct *tsk = current; - if ((tsk->thread.fpu.hard.fpscr & (1 << 17))) { + if ((tsk->thread.xstate->softfpu.fpscr & (1 << 17))) { /* FPU error */ - denormal_to_double (&tsk->thread.fpu.hard, + denormal_to_double (&tsk->thread.xstate->softfpu, (finsn >> 8) & 0xf); - tsk->thread.fpu.hard.fpscr &= + tsk->thread.xstate->softfpu.fpscr &= ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK); task_thread_info(tsk)->status |= TS_USEDFPU; } else { @@ -617,7 +617,7 @@ static void fpu_init(struct sh_fpu_soft_struct *fpu) int do_fpu_inst(unsigned short inst, struct pt_regs *regs) { struct task_struct *tsk = current; - struct sh_fpu_soft_struct *fpu = &(tsk->thread.fpu.soft); + struct sh_fpu_soft_struct *fpu = &(tsk->thread.xstate->softfpu); if (!(task_thread_info(tsk)->status & TS_USEDFPU)) { /* initialize once. */