diff --git a/arch/nds32/Kconfig.cpu b/arch/nds32/Kconfig.cpu index bb06a1b7eef0c0914b1dc7013f7774a960088e7b..6482ed877f974b787e711e046a92ec69617c6e8b 100644 --- a/arch/nds32/Kconfig.cpu +++ b/arch/nds32/Kconfig.cpu @@ -28,6 +28,19 @@ config LAZY_FPU For nomal case, say Y. +config SUPPORT_DENORMAL_ARITHMETIC + bool "Denormal arithmetic support" + depends on FPU + default n + help + Say Y here to enable arithmetic of denormalized number. Enabling + this feature can enhance the precision for tininess number. + However, performance loss in float pointe calculations is + possibly significant due to additional FPU exception. + + If the calculated tolerance for tininess number is not critical, + say N to prevent performance loss. + config HWZOL bool "hardware zero overhead loop support" depends on CPU_D10 || CPU_D15 diff --git a/arch/nds32/include/asm/elf.h b/arch/nds32/include/asm/elf.h index f5f9cf7e054401431f89b72af891c5a44b20baba..95f3ea253e4c34e269c9b5d3ba1ccbc0a98d1f20 100644 --- a/arch/nds32/include/asm/elf.h +++ b/arch/nds32/include/asm/elf.h @@ -9,6 +9,7 @@ */ #include +#include typedef unsigned long elf_greg_t; typedef unsigned long elf_freg_t[3]; @@ -159,8 +160,18 @@ struct elf32_hdr; #endif + +#if IS_ENABLED(CONFIG_FPU) +#define FPU_AUX_ENT NEW_AUX_ENT(AT_FPUCW, FPCSR_INIT) +#else +#define FPU_AUX_ENT NEW_AUX_ENT(AT_IGNORE, 0) +#endif + #define ARCH_DLINFO \ do { \ + /* Optional FPU initialization */ \ + FPU_AUX_ENT; \ + \ NEW_AUX_ENT(AT_SYSINFO_EHDR, \ (elf_addr_t)current->mm->context.vdso); \ } while (0) diff --git a/arch/nds32/include/asm/fpu.h b/arch/nds32/include/asm/fpu.h index 9b1107b58e23ec74729f29caf00a09a51ac3773f..019f1bcfc5ee17e4f5390ebaa7806ec90395e9f2 100644 --- a/arch/nds32/include/asm/fpu.h +++ b/arch/nds32/include/asm/fpu.h @@ -28,7 +28,18 @@ extern int do_fpuemu(struct pt_regs *regs, struct fpu_struct *fpu); #define sNAN64 0xFFFFFFFFFFFFFFFFULL #define sNAN32 0xFFFFFFFFUL +#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC) +/* + * Denormalized number is unsupported by nds32 FPU. Hence the operation + * is treated as underflow cases when the final result is a denormalized + * number. To enhance precision, underflow exception trap should be + * enabled by default and kerenl will re-execute it by fpu emulator + * when getting underflow exception. + */ +#define FPCSR_INIT FPCSR_mskUDFE +#else #define FPCSR_INIT 0x0UL +#endif extern const struct fpu_struct init_fpuregs; diff --git a/arch/nds32/include/asm/syscalls.h b/arch/nds32/include/asm/syscalls.h index 78778ecff60c44efd0cb2f663699d22bc2dc5b42..da32101b455d764a125649f3791cc3f4332db15c 100644 --- a/arch/nds32/include/asm/syscalls.h +++ b/arch/nds32/include/asm/syscalls.h @@ -7,6 +7,7 @@ asmlinkage long sys_cacheflush(unsigned long addr, unsigned long len, unsigned int op); asmlinkage long sys_fadvise64_64_wrapper(int fd, int advice, loff_t offset, loff_t len); asmlinkage long sys_rt_sigreturn_wrapper(void); +asmlinkage long sys_udftrap(int option); #include diff --git a/arch/nds32/include/uapi/asm/auxvec.h b/arch/nds32/include/uapi/asm/auxvec.h index 56043ce4972f052ea9da1c7c52b19a8232abaf68..2d3213f5e59583162babb3b6ad05eafa116828ad 100644 --- a/arch/nds32/include/uapi/asm/auxvec.h +++ b/arch/nds32/include/uapi/asm/auxvec.h @@ -4,6 +4,13 @@ #ifndef __ASM_AUXVEC_H #define __ASM_AUXVEC_H +/* + * This entry gives some information about the FPU initialization + * performed by the kernel. + */ +#define AT_FPUCW 18 /* Used FPU control word. */ + + /* VDSO location */ #define AT_SYSINFO_EHDR 33 diff --git a/arch/nds32/include/uapi/asm/sigcontext.h b/arch/nds32/include/uapi/asm/sigcontext.h index 1257a78e3ae19ffd35fb2b80e6eeb0e65571dfe7..58afc416473e578817a23073693dfd87a61251c1 100644 --- a/arch/nds32/include/uapi/asm/sigcontext.h +++ b/arch/nds32/include/uapi/asm/sigcontext.h @@ -12,6 +12,15 @@ struct fpu_struct { unsigned long long fd_regs[32]; unsigned long fpcsr; + /* + * UDF_trap is used to recognize whether underflow trap is enabled + * or not. When UDF_trap == 1, this process will be traped and then + * get a SIGFPE signal when encountering an underflow exception. + * UDF_trap is only modified through setfputrap syscall. Therefore, + * UDF_trap needn't be saved or loaded to context in each context + * switch. + */ + unsigned long UDF_trap; }; struct zol_struct { diff --git a/arch/nds32/include/uapi/asm/udftrap.h b/arch/nds32/include/uapi/asm/udftrap.h new file mode 100644 index 0000000000000000000000000000000000000000..433f79d679c0acedcbf4dfd6e356b3fc2047998d --- /dev/null +++ b/arch/nds32/include/uapi/asm/udftrap.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2005-2018 Andes Technology Corporation */ +#ifndef _ASM_SETFPUTRAP +#define _ASM_SETFPUTRAP + +/* + * Options for setfputrap system call + */ +#define DISABLE_UDFTRAP 0 /* disable underflow exception trap */ +#define ENABLE_UDFTRAP 1 /* enable undeflos exception trap */ +#define GET_UDFTRAP 2 /* only get undeflos exception trap status */ + +#endif /* _ASM_CACHECTL */ diff --git a/arch/nds32/include/uapi/asm/unistd.h b/arch/nds32/include/uapi/asm/unistd.h index 603e826e04491a5167156961829e9f4ed9ff6e6d..c2c3a3e340836ff3bee656fb31ca9578ce904f08 100644 --- a/arch/nds32/include/uapi/asm/unistd.h +++ b/arch/nds32/include/uapi/asm/unistd.h @@ -9,4 +9,6 @@ /* Additional NDS32 specific syscalls. */ #define __NR_cacheflush (__NR_arch_specific_syscall) +#define __NR_udftrap (__NR_arch_specific_syscall + 1) __SYSCALL(__NR_cacheflush, sys_cacheflush) +__SYSCALL(__NR_udftrap, sys_udftrap) diff --git a/arch/nds32/kernel/fpu.c b/arch/nds32/kernel/fpu.c index 2942df6f93e66a00684c96ee8231730e86b5eff9..fddd40c7a16f9880d2082cb71aa3431ac4192ac5 100644 --- a/arch/nds32/kernel/fpu.c +++ b/arch/nds32/kernel/fpu.c @@ -12,7 +12,10 @@ const struct fpu_struct init_fpuregs = { .fd_regs = {[0 ... 31] = sNAN64}, - .fpcsr = FPCSR_INIT + .fpcsr = FPCSR_INIT, +#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC) + .UDF_trap = 0 +#endif }; void save_fpu(struct task_struct *tsk) @@ -174,6 +177,9 @@ inline void do_fpu_context_switch(struct pt_regs *regs) } else { /* First time FPU user. */ load_fpu(&init_fpuregs); +#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC) + current->thread.fpu.UDF_trap = init_fpuregs.UDF_trap; +#endif set_used_math(); } @@ -183,10 +189,12 @@ inline void fill_sigfpe_signo(unsigned int fpcsr, int *signo) { if (fpcsr & FPCSR_mskOVFT) *signo = FPE_FLTOVF; - else if (fpcsr & FPCSR_mskIVOT) - *signo = FPE_FLTINV; +#ifndef CONFIG_SUPPORT_DENORMAL_ARITHMETIC else if (fpcsr & FPCSR_mskUDFT) *signo = FPE_FLTUND; +#endif + else if (fpcsr & FPCSR_mskIVOT) + *signo = FPE_FLTINV; else if (fpcsr & FPCSR_mskDBZT) *signo = FPE_FLTDIV; else if (fpcsr & FPCSR_mskIEXT) @@ -197,11 +205,20 @@ inline void handle_fpu_exception(struct pt_regs *regs) { unsigned int fpcsr; int si_code = 0, si_signo = SIGFPE; +#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC) + unsigned long redo_except = FPCSR_mskDNIT|FPCSR_mskUDFT; +#else + unsigned long redo_except = FPCSR_mskDNIT; +#endif lose_fpu(); fpcsr = current->thread.fpu.fpcsr; - if (fpcsr & FPCSR_mskDNIT) { + if (fpcsr & redo_except) { +#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC) + if (fpcsr & FPCSR_mskUDFT) + current->thread.fpu.fpcsr &= ~FPCSR_mskIEX; +#endif si_signo = do_fpuemu(regs, ¤t->thread.fpu); fpcsr = current->thread.fpu.fpcsr; if (!si_signo) diff --git a/arch/nds32/kernel/sys_nds32.c b/arch/nds32/kernel/sys_nds32.c index 9de93ab4c52b7244960ec83163f92d9253df67bb..0835277636ce31f1a3ba75dbbcd33a7d0f0d31eb 100644 --- a/arch/nds32/kernel/sys_nds32.c +++ b/arch/nds32/kernel/sys_nds32.c @@ -6,6 +6,8 @@ #include #include +#include +#include SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len, unsigned long, prot, unsigned long, flags, @@ -48,3 +50,33 @@ SYSCALL_DEFINE3(cacheflush, unsigned int, start, unsigned int, end, int, cache) return 0; } + +SYSCALL_DEFINE1(udftrap, int, option) +{ +#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC) + int old_udftrap; + + if (!used_math()) { + load_fpu(&init_fpuregs); + current->thread.fpu.UDF_trap = init_fpuregs.UDF_trap; + set_used_math(); + } + + old_udftrap = current->thread.fpu.UDF_trap; + switch (option) { + case DISABLE_UDFTRAP: + current->thread.fpu.UDF_trap = 0; + break; + case ENABLE_UDFTRAP: + current->thread.fpu.UDF_trap = FPCSR_mskUDFE; + break; + case GET_UDFTRAP: + break; + default: + return -EINVAL; + } + return old_udftrap; +#else + return -ENOTSUPP; +#endif +} diff --git a/arch/nds32/math-emu/fpuemu.c b/arch/nds32/math-emu/fpuemu.c index 2a01333d6e5fef75d500e02d8e2c619bbd031d30..75cf1643fa78e6ede16924169cdddb49187e05ce 100644 --- a/arch/nds32/math-emu/fpuemu.c +++ b/arch/nds32/math-emu/fpuemu.c @@ -304,7 +304,12 @@ static int fpu_emu(struct fpu_struct *fpu_reg, unsigned long insn) /* * If an exception is required, generate a tidy SIGFPE exception. */ +#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC) + if (((fpu_reg->fpcsr << 5) & fpu_reg->fpcsr & FPCSR_mskALLE_NO_UDFE) || + ((fpu_reg->fpcsr & FPCSR_mskUDF) && (fpu_reg->UDF_trap))) +#else if ((fpu_reg->fpcsr << 5) & fpu_reg->fpcsr & FPCSR_mskALLE) +#endif return SIGFPE; return 0; }