提交 44e92e03 编写于 作者: V Vincent Chen 提交者: Greentime Hu

nds32: support denormalized result through FP emulator

Currently, the nds32 FPU dose not support the arithmetic of denormalized
number. When the nds32 FPU finds the result of the instruction is a
denormlized number, the nds32 FPU considers it to be an underflow condition
and rounds the result to an appropriate number. It may causes some loss
of precision. This commit proposes a solution to re-execute the
instruction by the FPU emulator to enhance the precision. To transfer
calculations from user space to kernel space, this feature will enable
the underflow exception trap by default. Enabling this feature may cause
some side effects:
  1. Performance loss due to extra FPU exception
  2. Need another scheme to control real underflow trap
       A new parameter, UDF_trap, which is belong to FPU context is used
     to control underflow trap.

User can configure this feature via CONFIG_SUPPORT_DENORMAL_ARITHMETIC
Signed-off-by: NVincent Chen <vincentc@andestech.com>
Acked-by: NGreentime Hu <greentime@andestech.com>
Signed-off-by: NGreentime Hu <greentime@andestech.com>
上级 1ac83250
...@@ -28,6 +28,19 @@ config LAZY_FPU ...@@ -28,6 +28,19 @@ config LAZY_FPU
For nomal case, say Y. For nomal case, say Y.
config SUPPORT_DENORMAL_ARITHMETIC
bool "Denormal arithmetic support"
depends on FPU
default n
help
Say Y here to enable arithmetic of denormalized number. Enabling
this feature can enhance the precision for tininess number.
However, performance loss in float pointe calculations is
possibly significant due to additional FPU exception.
If the calculated tolerance for tininess number is not critical,
say N to prevent performance loss.
config HWZOL config HWZOL
bool "hardware zero overhead loop support" bool "hardware zero overhead loop support"
depends on CPU_D10 || CPU_D15 depends on CPU_D10 || CPU_D15
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
*/ */
#include <asm/ptrace.h> #include <asm/ptrace.h>
#include <asm/fpu.h>
typedef unsigned long elf_greg_t; typedef unsigned long elf_greg_t;
typedef unsigned long elf_freg_t[3]; typedef unsigned long elf_freg_t[3];
...@@ -159,8 +160,18 @@ struct elf32_hdr; ...@@ -159,8 +160,18 @@ struct elf32_hdr;
#endif #endif
#if IS_ENABLED(CONFIG_FPU)
#define FPU_AUX_ENT NEW_AUX_ENT(AT_FPUCW, FPCSR_INIT)
#else
#define FPU_AUX_ENT NEW_AUX_ENT(AT_IGNORE, 0)
#endif
#define ARCH_DLINFO \ #define ARCH_DLINFO \
do { \ do { \
/* Optional FPU initialization */ \
FPU_AUX_ENT; \
\
NEW_AUX_ENT(AT_SYSINFO_EHDR, \ NEW_AUX_ENT(AT_SYSINFO_EHDR, \
(elf_addr_t)current->mm->context.vdso); \ (elf_addr_t)current->mm->context.vdso); \
} while (0) } while (0)
......
...@@ -28,7 +28,18 @@ extern int do_fpuemu(struct pt_regs *regs, struct fpu_struct *fpu); ...@@ -28,7 +28,18 @@ extern int do_fpuemu(struct pt_regs *regs, struct fpu_struct *fpu);
#define sNAN64 0xFFFFFFFFFFFFFFFFULL #define sNAN64 0xFFFFFFFFFFFFFFFFULL
#define sNAN32 0xFFFFFFFFUL #define sNAN32 0xFFFFFFFFUL
#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC)
/*
* Denormalized number is unsupported by nds32 FPU. Hence the operation
* is treated as underflow cases when the final result is a denormalized
* number. To enhance precision, underflow exception trap should be
* enabled by default and kerenl will re-execute it by fpu emulator
* when getting underflow exception.
*/
#define FPCSR_INIT FPCSR_mskUDFE
#else
#define FPCSR_INIT 0x0UL #define FPCSR_INIT 0x0UL
#endif
extern const struct fpu_struct init_fpuregs; extern const struct fpu_struct init_fpuregs;
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
asmlinkage long sys_cacheflush(unsigned long addr, unsigned long len, unsigned int op); asmlinkage long sys_cacheflush(unsigned long addr, unsigned long len, unsigned int op);
asmlinkage long sys_fadvise64_64_wrapper(int fd, int advice, loff_t offset, loff_t len); asmlinkage long sys_fadvise64_64_wrapper(int fd, int advice, loff_t offset, loff_t len);
asmlinkage long sys_rt_sigreturn_wrapper(void); asmlinkage long sys_rt_sigreturn_wrapper(void);
asmlinkage long sys_udftrap(int option);
#include <asm-generic/syscalls.h> #include <asm-generic/syscalls.h>
......
...@@ -4,6 +4,13 @@ ...@@ -4,6 +4,13 @@
#ifndef __ASM_AUXVEC_H #ifndef __ASM_AUXVEC_H
#define __ASM_AUXVEC_H #define __ASM_AUXVEC_H
/*
* This entry gives some information about the FPU initialization
* performed by the kernel.
*/
#define AT_FPUCW 18 /* Used FPU control word. */
/* VDSO location */ /* VDSO location */
#define AT_SYSINFO_EHDR 33 #define AT_SYSINFO_EHDR 33
......
...@@ -12,6 +12,15 @@ ...@@ -12,6 +12,15 @@
struct fpu_struct { struct fpu_struct {
unsigned long long fd_regs[32]; unsigned long long fd_regs[32];
unsigned long fpcsr; unsigned long fpcsr;
/*
* UDF_trap is used to recognize whether underflow trap is enabled
* or not. When UDF_trap == 1, this process will be traped and then
* get a SIGFPE signal when encountering an underflow exception.
* UDF_trap is only modified through setfputrap syscall. Therefore,
* UDF_trap needn't be saved or loaded to context in each context
* switch.
*/
unsigned long UDF_trap;
}; };
struct zol_struct { struct zol_struct {
......
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (C) 2005-2018 Andes Technology Corporation */
#ifndef _ASM_SETFPUTRAP
#define _ASM_SETFPUTRAP
/*
* Options for setfputrap system call
*/
#define DISABLE_UDFTRAP 0 /* disable underflow exception trap */
#define ENABLE_UDFTRAP 1 /* enable undeflos exception trap */
#define GET_UDFTRAP 2 /* only get undeflos exception trap status */
#endif /* _ASM_CACHECTL */
...@@ -9,4 +9,6 @@ ...@@ -9,4 +9,6 @@
/* Additional NDS32 specific syscalls. */ /* Additional NDS32 specific syscalls. */
#define __NR_cacheflush (__NR_arch_specific_syscall) #define __NR_cacheflush (__NR_arch_specific_syscall)
#define __NR_udftrap (__NR_arch_specific_syscall + 1)
__SYSCALL(__NR_cacheflush, sys_cacheflush) __SYSCALL(__NR_cacheflush, sys_cacheflush)
__SYSCALL(__NR_udftrap, sys_udftrap)
...@@ -12,7 +12,10 @@ ...@@ -12,7 +12,10 @@
const struct fpu_struct init_fpuregs = { const struct fpu_struct init_fpuregs = {
.fd_regs = {[0 ... 31] = sNAN64}, .fd_regs = {[0 ... 31] = sNAN64},
.fpcsr = FPCSR_INIT .fpcsr = FPCSR_INIT,
#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC)
.UDF_trap = 0
#endif
}; };
void save_fpu(struct task_struct *tsk) void save_fpu(struct task_struct *tsk)
...@@ -174,6 +177,9 @@ inline void do_fpu_context_switch(struct pt_regs *regs) ...@@ -174,6 +177,9 @@ inline void do_fpu_context_switch(struct pt_regs *regs)
} else { } else {
/* First time FPU user. */ /* First time FPU user. */
load_fpu(&init_fpuregs); load_fpu(&init_fpuregs);
#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC)
current->thread.fpu.UDF_trap = init_fpuregs.UDF_trap;
#endif
set_used_math(); set_used_math();
} }
...@@ -183,10 +189,12 @@ inline void fill_sigfpe_signo(unsigned int fpcsr, int *signo) ...@@ -183,10 +189,12 @@ inline void fill_sigfpe_signo(unsigned int fpcsr, int *signo)
{ {
if (fpcsr & FPCSR_mskOVFT) if (fpcsr & FPCSR_mskOVFT)
*signo = FPE_FLTOVF; *signo = FPE_FLTOVF;
else if (fpcsr & FPCSR_mskIVOT) #ifndef CONFIG_SUPPORT_DENORMAL_ARITHMETIC
*signo = FPE_FLTINV;
else if (fpcsr & FPCSR_mskUDFT) else if (fpcsr & FPCSR_mskUDFT)
*signo = FPE_FLTUND; *signo = FPE_FLTUND;
#endif
else if (fpcsr & FPCSR_mskIVOT)
*signo = FPE_FLTINV;
else if (fpcsr & FPCSR_mskDBZT) else if (fpcsr & FPCSR_mskDBZT)
*signo = FPE_FLTDIV; *signo = FPE_FLTDIV;
else if (fpcsr & FPCSR_mskIEXT) else if (fpcsr & FPCSR_mskIEXT)
...@@ -197,11 +205,20 @@ inline void handle_fpu_exception(struct pt_regs *regs) ...@@ -197,11 +205,20 @@ inline void handle_fpu_exception(struct pt_regs *regs)
{ {
unsigned int fpcsr; unsigned int fpcsr;
int si_code = 0, si_signo = SIGFPE; int si_code = 0, si_signo = SIGFPE;
#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC)
unsigned long redo_except = FPCSR_mskDNIT|FPCSR_mskUDFT;
#else
unsigned long redo_except = FPCSR_mskDNIT;
#endif
lose_fpu(); lose_fpu();
fpcsr = current->thread.fpu.fpcsr; fpcsr = current->thread.fpu.fpcsr;
if (fpcsr & FPCSR_mskDNIT) { if (fpcsr & redo_except) {
#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC)
if (fpcsr & FPCSR_mskUDFT)
current->thread.fpu.fpcsr &= ~FPCSR_mskIEX;
#endif
si_signo = do_fpuemu(regs, &current->thread.fpu); si_signo = do_fpuemu(regs, &current->thread.fpu);
fpcsr = current->thread.fpu.fpcsr; fpcsr = current->thread.fpu.fpcsr;
if (!si_signo) if (!si_signo)
......
...@@ -6,6 +6,8 @@ ...@@ -6,6 +6,8 @@
#include <asm/cachectl.h> #include <asm/cachectl.h>
#include <asm/proc-fns.h> #include <asm/proc-fns.h>
#include <asm/udftrap.h>
#include <asm/fpu.h>
SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len, SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len,
unsigned long, prot, unsigned long, flags, unsigned long, prot, unsigned long, flags,
...@@ -48,3 +50,33 @@ SYSCALL_DEFINE3(cacheflush, unsigned int, start, unsigned int, end, int, cache) ...@@ -48,3 +50,33 @@ SYSCALL_DEFINE3(cacheflush, unsigned int, start, unsigned int, end, int, cache)
return 0; return 0;
} }
SYSCALL_DEFINE1(udftrap, int, option)
{
#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC)
int old_udftrap;
if (!used_math()) {
load_fpu(&init_fpuregs);
current->thread.fpu.UDF_trap = init_fpuregs.UDF_trap;
set_used_math();
}
old_udftrap = current->thread.fpu.UDF_trap;
switch (option) {
case DISABLE_UDFTRAP:
current->thread.fpu.UDF_trap = 0;
break;
case ENABLE_UDFTRAP:
current->thread.fpu.UDF_trap = FPCSR_mskUDFE;
break;
case GET_UDFTRAP:
break;
default:
return -EINVAL;
}
return old_udftrap;
#else
return -ENOTSUPP;
#endif
}
...@@ -304,7 +304,12 @@ static int fpu_emu(struct fpu_struct *fpu_reg, unsigned long insn) ...@@ -304,7 +304,12 @@ static int fpu_emu(struct fpu_struct *fpu_reg, unsigned long insn)
/* /*
* If an exception is required, generate a tidy SIGFPE exception. * If an exception is required, generate a tidy SIGFPE exception.
*/ */
#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC)
if (((fpu_reg->fpcsr << 5) & fpu_reg->fpcsr & FPCSR_mskALLE_NO_UDFE) ||
((fpu_reg->fpcsr & FPCSR_mskUDF) && (fpu_reg->UDF_trap)))
#else
if ((fpu_reg->fpcsr << 5) & fpu_reg->fpcsr & FPCSR_mskALLE) if ((fpu_reg->fpcsr << 5) & fpu_reg->fpcsr & FPCSR_mskALLE)
#endif
return SIGFPE; return SIGFPE;
return 0; return 0;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册