diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index f627739eca494e189f19495a4f12217752471239..50d854ce7f5997b84e2d52184ac32f294699979c 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -2090,10 +2090,11 @@ config CPU_HAS_MSA help MIPS SIMD Architecture (MSA) introduces 128 bit wide vector registers and a set of SIMD instructions to operate on them. When this option - is enabled the kernel will support detection of the MSA ASE. If you - know that your kernel will only be running on CPUs which do not - support MSA then you may wish to say N here to reduce the size of - your kernel. + is enabled the kernel will support allocating & switching MSA + vector register contexts. If you know that your kernel will only be + running on CPUs which do not support MSA or that your userland will + not be making use of it then you may wish to say N here to reduce + the size & complexity of your kernel. If unsure, say Y. diff --git a/arch/mips/include/asm/asmmacro.h b/arch/mips/include/asm/asmmacro.h index f571db371b3e6c8afbb861352d5e2dc66353facc..fe3b03c793e5fcdbaa96ef434e3734208de5f1c4 100644 --- a/arch/mips/include/asm/asmmacro.h +++ b/arch/mips/include/asm/asmmacro.h @@ -328,4 +328,74 @@ .endm #endif + .macro msa_save_all thread + st_d 0, THREAD_FPR0, \thread + st_d 1, THREAD_FPR1, \thread + st_d 2, THREAD_FPR2, \thread + st_d 3, THREAD_FPR3, \thread + st_d 4, THREAD_FPR4, \thread + st_d 5, THREAD_FPR5, \thread + st_d 6, THREAD_FPR6, \thread + st_d 7, THREAD_FPR7, \thread + st_d 8, THREAD_FPR8, \thread + st_d 9, THREAD_FPR9, \thread + st_d 10, THREAD_FPR10, \thread + st_d 11, THREAD_FPR11, \thread + st_d 12, THREAD_FPR12, \thread + st_d 13, THREAD_FPR13, \thread + st_d 14, THREAD_FPR14, \thread + st_d 15, THREAD_FPR15, \thread + st_d 16, THREAD_FPR16, \thread + st_d 17, THREAD_FPR17, \thread + st_d 18, THREAD_FPR18, \thread + st_d 19, THREAD_FPR19, \thread + st_d 20, THREAD_FPR20, \thread + st_d 21, THREAD_FPR21, \thread + st_d 22, THREAD_FPR22, \thread + st_d 23, THREAD_FPR23, \thread + st_d 24, THREAD_FPR24, \thread + st_d 25, THREAD_FPR25, \thread + st_d 26, THREAD_FPR26, \thread + st_d 27, THREAD_FPR27, \thread + st_d 28, THREAD_FPR28, \thread + st_d 29, THREAD_FPR29, \thread + st_d 30, THREAD_FPR30, \thread + st_d 31, THREAD_FPR31, \thread + .endm + + .macro msa_restore_all thread + ld_d 0, THREAD_FPR0, \thread + ld_d 1, THREAD_FPR1, \thread + ld_d 2, THREAD_FPR2, \thread + ld_d 3, THREAD_FPR3, \thread + ld_d 4, THREAD_FPR4, \thread + ld_d 5, THREAD_FPR5, \thread + ld_d 6, THREAD_FPR6, \thread + ld_d 7, THREAD_FPR7, \thread + ld_d 8, THREAD_FPR8, \thread + ld_d 9, THREAD_FPR9, \thread + ld_d 10, THREAD_FPR10, \thread + ld_d 11, THREAD_FPR11, \thread + ld_d 12, THREAD_FPR12, \thread + ld_d 13, THREAD_FPR13, \thread + ld_d 14, THREAD_FPR14, \thread + ld_d 15, THREAD_FPR15, \thread + ld_d 16, THREAD_FPR16, \thread + ld_d 17, THREAD_FPR17, \thread + ld_d 18, THREAD_FPR18, \thread + ld_d 19, THREAD_FPR19, \thread + ld_d 20, THREAD_FPR20, \thread + ld_d 21, THREAD_FPR21, \thread + ld_d 22, THREAD_FPR22, \thread + ld_d 23, THREAD_FPR23, \thread + ld_d 24, THREAD_FPR24, \thread + ld_d 25, THREAD_FPR25, \thread + ld_d 26, THREAD_FPR26, \thread + ld_d 27, THREAD_FPR27, \thread + ld_d 28, THREAD_FPR28, \thread + ld_d 29, THREAD_FPR29, \thread + ld_d 30, THREAD_FPR30, \thread + ld_d 31, THREAD_FPR31, \thread + .endm + #endif /* _ASM_ASMMACRO_H */ diff --git a/arch/mips/include/asm/msa.h b/arch/mips/include/asm/msa.h index 0a614fb83348225933c7627c9d44c2a63b17ffe2..a2aba6c3ec05ed31b0bc2d7d53ce43c7dfbe1a25 100644 --- a/arch/mips/include/asm/msa.h +++ b/arch/mips/include/asm/msa.h @@ -12,6 +12,9 @@ #include +extern void _save_msa(struct task_struct *); +extern void _restore_msa(struct task_struct *); + static inline void enable_msa(void) { if (cpu_has_msa) { @@ -36,6 +39,31 @@ static inline int is_msa_enabled(void) return read_c0_config5() & MIPS_CONF5_MSAEN; } +static inline int thread_msa_context_live(void) +{ + /* + * Check cpu_has_msa only if it's a constant. This will allow the + * compiler to optimise out code for CPUs without MSA without adding + * an extra redundant check for CPUs with MSA. + */ + if (__builtin_constant_p(cpu_has_msa) && !cpu_has_msa) + return 0; + + return test_thread_flag(TIF_MSA_CTX_LIVE); +} + +static inline void save_msa(struct task_struct *t) +{ + if (cpu_has_msa) + _save_msa(t); +} + +static inline void restore_msa(struct task_struct *t) +{ + if (cpu_has_msa) + _restore_msa(t); +} + #ifdef TOOLCHAIN_SUPPORTS_MSA #define __BUILD_MSA_CTL_REG(name, cs) \ diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h index 50cf4c3431185e12e81320bc10fe0e298d5cd4ba..ad70cba8daffaeaeb7f62a7a47308dc1597a8be4 100644 --- a/arch/mips/include/asm/processor.h +++ b/arch/mips/include/asm/processor.h @@ -96,7 +96,12 @@ extern unsigned int vced_count, vcei_count; #define NUM_FPU_REGS 32 -#define FPU_REG_WIDTH 64 + +#ifdef CONFIG_CPU_HAS_MSA +# define FPU_REG_WIDTH 128 +#else +# define FPU_REG_WIDTH 64 +#endif union fpureg { __u32 val32[FPU_REG_WIDTH / 32]; @@ -133,6 +138,7 @@ BUILD_FPR_ACCESS(64) struct mips_fpu_struct { union fpureg fpr[NUM_FPU_REGS]; unsigned int fcr31; + unsigned int msacsr; }; #define NUM_DSP_REGS 6 @@ -310,6 +316,7 @@ struct thread_struct { .fpu = { \ .fpr = {{{0,},},}, \ .fcr31 = 0, \ + .msacsr = 0, \ }, \ /* \ * FPU affinity state (null if not FPAFF) \ diff --git a/arch/mips/include/asm/switch_to.h b/arch/mips/include/asm/switch_to.h index 278d45a097286034bfba656c9d626b1ff3d810b2..495c1041a2cc24e3ca1207ac31e7e3ff41f45382 100644 --- a/arch/mips/include/asm/switch_to.h +++ b/arch/mips/include/asm/switch_to.h @@ -16,22 +16,29 @@ #include #include #include +#include struct task_struct; +enum { + FP_SAVE_NONE = 0, + FP_SAVE_VECTOR = -1, + FP_SAVE_SCALAR = 1, +}; + /** * resume - resume execution of a task * @prev: The task previously executed. * @next: The task to begin executing. * @next_ti: task_thread_info(next). - * @usedfpu: Non-zero if prev's FP context should be saved. + * @fp_save: Which, if any, FP context to save for prev. * * This function is used whilst scheduling to save the context of prev & load * the context of next. Returns prev. */ extern asmlinkage struct task_struct *resume(struct task_struct *prev, struct task_struct *next, struct thread_info *next_ti, - u32 usedfpu); + s32 fp_save); extern unsigned int ll_bit; extern struct task_struct *ll_task; @@ -75,7 +82,8 @@ do { \ #define switch_to(prev, next, last) \ do { \ - u32 __usedfpu, __c0_stat; \ + u32 __c0_stat; \ + s32 __fpsave = FP_SAVE_NONE; \ __mips_mt_fpaff_switch_to(prev); \ if (cpu_has_dsp) \ __save_dsp(prev); \ @@ -88,8 +96,12 @@ do { \ write_c0_status(__c0_stat & ~ST0_CU2); \ } \ __clear_software_ll_bit(); \ - __usedfpu = test_and_clear_tsk_thread_flag(prev, TIF_USEDFPU); \ - (last) = resume(prev, next, task_thread_info(next), __usedfpu); \ + if (test_and_clear_tsk_thread_flag(prev, TIF_USEDFPU)) \ + __fpsave = FP_SAVE_SCALAR; \ + if (test_and_clear_tsk_thread_flag(prev, TIF_USEDMSA)) \ + __fpsave = FP_SAVE_VECTOR; \ + (last) = resume(prev, next, task_thread_info(next), __fpsave); \ + disable_msa(); \ } while (0) #define finish_arch_switch(prev) \ diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h index e80ae50cae80c916700b789a707ab9b2dad65be7..d2d961d6cb86fc1edded8f0ade0adeedc0c995be 100644 --- a/arch/mips/include/asm/thread_info.h +++ b/arch/mips/include/asm/thread_info.h @@ -116,6 +116,8 @@ static inline struct thread_info *current_thread_info(void) #define TIF_LOAD_WATCH 25 /* If set, load watch registers */ #define TIF_SYSCALL_TRACEPOINT 26 /* syscall tracepoint instrumentation */ #define TIF_32BIT_FPREGS 27 /* 32-bit floating point registers */ +#define TIF_USEDMSA 29 /* MSA has been used this quantum */ +#define TIF_MSA_CTX_LIVE 30 /* MSA context must be preserved */ #define TIF_SYSCALL_TRACE 31 /* syscall trace active */ #define _TIF_SYSCALL_TRACE (1< #include #include +#include #include #include #include @@ -65,6 +66,8 @@ void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long sp) clear_used_math(); clear_fpu_owner(); init_dsp(); + clear_thread_flag(TIF_MSA_CTX_LIVE); + disable_msa(); regs->cp0_epc = pc; regs->regs[29] = sp; } @@ -89,7 +92,9 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, preempt_disable(); - if (is_fpu_owner()) + if (is_msa_enabled()) + save_msa(p); + else if (is_fpu_owner()) save_fp(p); if (cpu_has_dsp) diff --git a/arch/mips/kernel/r4k_switch.S b/arch/mips/kernel/r4k_switch.S index cc78dd9a17c788412e2254f0e505aadb789be34b..f938ecd22af039fd965398ebbb5fab86ff08283e 100644 --- a/arch/mips/kernel/r4k_switch.S +++ b/arch/mips/kernel/r4k_switch.S @@ -28,19 +28,9 @@ */ #define ST_OFF (_THREAD_SIZE - 32 - PT_SIZE + PT_STATUS) -/* - * FPU context is saved iff the process has used it's FPU in the current - * time slice as indicated by _TIF_USEDFPU. In any case, the CU1 bit for user - * space STATUS register should be 0, so that a process *always* starts its - * userland with FPU disabled after each context switch. - * - * FPU will be enabled as soon as the process accesses FPU again, through - * do_cpu() trap. - */ - /* * task_struct *resume(task_struct *prev, task_struct *next, - * struct thread_info *next_ti, int usedfpu) + * struct thread_info *next_ti, s32 fp_save) */ .align 5 LEAF(resume) @@ -50,23 +40,37 @@ LONG_S ra, THREAD_REG31(a0) /* - * check if we need to save FPU registers + * Check whether we need to save any FP context. FP context is saved + * iff the process has used the context with the scalar FPU or the MSA + * ASE in the current time slice, as indicated by _TIF_USEDFPU and + * _TIF_USEDMSA respectively. switch_to will have set fp_save + * accordingly to an FP_SAVE_ enum value. */ + beqz a3, 2f - beqz a3, 1f - - PTR_L t3, TASK_THREAD_INFO(a0) /* - * clear saved user stack CU1 bit + * We do. Clear the saved CU1 bit for prev, such that next time it is + * scheduled it will start in userland with the FPU disabled. If the + * task uses the FPU then it will be enabled again via the do_cpu trap. + * This allows us to lazily restore the FP context. */ + PTR_L t3, TASK_THREAD_INFO(a0) LONG_L t0, ST_OFF(t3) li t1, ~ST0_CU1 and t0, t0, t1 LONG_S t0, ST_OFF(t3) + /* Check whether we're saving scalar or vector context. */ + bgtz a3, 1f + + /* Save 128b MSA vector context. */ + msa_save_all a0 + b 2f + +1: /* Save 32b/64b scalar FP context. */ fpu_save_double a0 t0 t1 # c0_status passed in t0 # clobbers t1 -1: +2: #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP) PTR_LA t8, __stack_chk_guard @@ -141,6 +145,26 @@ LEAF(_restore_fp) jr ra END(_restore_fp) +#ifdef CONFIG_CPU_HAS_MSA + +/* + * Save a thread's MSA vector context. + */ +LEAF(_save_msa) + msa_save_all a0 + jr ra + END(_save_msa) + +/* + * Restore a thread's MSA vector context. + */ +LEAF(_restore_msa) + msa_restore_all a0 + jr ra + END(_restore_msa) + +#endif + /* * Load the FPU with signalling NANS. This bit pattern we're using has * the property that no matter whether considered as single or as double diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index b0c7f80e05e51cee6d83387875ff98357ffd0307..4f27cbfad1e57d4cc93aa4efb09eb56a92664ad3 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -79,6 +80,7 @@ extern asmlinkage void handle_ov(void); extern asmlinkage void handle_tr(void); extern asmlinkage void handle_fpe(void); extern asmlinkage void handle_ftlb(void); +extern asmlinkage void handle_msa(void); extern asmlinkage void handle_mdmx(void); extern asmlinkage void handle_watch(void); extern asmlinkage void handle_mt(void); @@ -1074,6 +1076,76 @@ static int default_cu2_call(struct notifier_block *nfb, unsigned long action, return NOTIFY_OK; } +static int enable_restore_fp_context(int msa) +{ + int err, was_fpu_owner; + + if (!used_math()) { + /* First time FP context user. */ + err = init_fpu(); + if (msa && !err) + enable_msa(); + if (!err) + set_used_math(); + return err; + } + + /* + * This task has formerly used the FP context. + * + * If this thread has no live MSA vector context then we can simply + * restore the scalar FP context. If it has live MSA vector context + * (that is, it has or may have used MSA since last performing a + * function call) then we'll need to restore the vector context. This + * applies even if we're currently only executing a scalar FP + * instruction. This is because if we were to later execute an MSA + * instruction then we'd either have to: + * + * - Restore the vector context & clobber any registers modified by + * scalar FP instructions between now & then. + * + * or + * + * - Not restore the vector context & lose the most significant bits + * of all vector registers. + * + * Neither of those options is acceptable. We cannot restore the least + * significant bits of the registers now & only restore the most + * significant bits later because the most significant bits of any + * vector registers whose aliased FP register is modified now will have + * been zeroed. We'd have no way to know that when restoring the vector + * context & thus may load an outdated value for the most significant + * bits of a vector register. + */ + if (!msa && !thread_msa_context_live()) + return own_fpu(1); + + /* + * This task is using or has previously used MSA. Thus we require + * that Status.FR == 1. + */ + was_fpu_owner = is_fpu_owner(); + err = own_fpu(0); + if (err) + return err; + + enable_msa(); + write_msa_csr(current->thread.fpu.msacsr); + set_thread_flag(TIF_USEDMSA); + + /* + * If this is the first time that the task is using MSA and it has + * previously used scalar FP in this time slice then we already nave + * FP context which we shouldn't clobber. + */ + if (!test_and_set_thread_flag(TIF_MSA_CTX_LIVE) && was_fpu_owner) + return 0; + + /* We need to restore the vector context. */ + restore_msa(current); + return 0; +} + asmlinkage void do_cpu(struct pt_regs *regs) { enum ctx_state prev_state; @@ -1153,12 +1225,7 @@ asmlinkage void do_cpu(struct pt_regs *regs) /* Fall through. */ case 1: - if (used_math()) /* Using the FPU again. */ - err = own_fpu(1); - else { /* First time FPU user. */ - err = init_fpu(); - set_used_math(); - } + err = enable_restore_fp_context(0); if (!raw_cpu_has_fpu || err) { int sig; @@ -1183,6 +1250,27 @@ asmlinkage void do_cpu(struct pt_regs *regs) exception_exit(prev_state); } +asmlinkage void do_msa(struct pt_regs *regs) +{ + enum ctx_state prev_state; + int err; + + prev_state = exception_enter(); + + if (!cpu_has_msa || test_thread_flag(TIF_32BIT_FPREGS)) { + force_sig(SIGILL, current); + goto out; + } + + die_if_kernel("do_msa invoked from kernel context!", regs); + + err = enable_restore_fp_context(1); + if (err) + force_sig(SIGILL, current); +out: + exception_exit(prev_state); +} + asmlinkage void do_mdmx(struct pt_regs *regs) { enum ctx_state prev_state; @@ -2041,6 +2129,7 @@ void __init trap_init(void) set_except_vector(15, handle_fpe); set_except_vector(16, handle_ftlb); + set_except_vector(21, handle_msa); set_except_vector(22, handle_mdmx); if (cpu_has_mcheck)