ARM VFP support (Paul Brook)

git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@1309 c046a42c-6fe2-441c-8c8c-71466251a162

ARM VFP support (Paul Brook)
git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@1309 c046a42c-6fe2-441c-8c8c-71466251a162
b7bcbe95 · bellard · 55754d9e · b7bcbe95 · b7bcbe95 · b7bcbe95
8 changed file
--- a/Changelog
+++ b/Changelog
@@ -15,6 +15,7 @@ version 0.6.2:
  - PC parallel port support (Mark Jonckheere)
  - initial SPARC64 support (Blue Swirl)
  - armv5te user mode support (Paul Brook)
+  - ARM VFP support (Paul Brook)

 version 0.6.1:


--- a/Makefile.target
+++ b/Makefile.target
@@ -259,6 +259,10 @@ ifeq ($(TARGET_BASE_ARCH), sparc)
 LIBOBJS+= op_helper.o helper.o
 endif

+ifeq ($(TARGET_BASE_ARCH), arm)
+LIBOBJS+= op_helper.o
+endif
+
 # NOTE: the disassembler code is only needed for debugging
 LIBOBJS+=disas.o 
 ifeq ($(findstring i386, $(TARGET_ARCH) $(ARCH)),i386)

--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -346,7 +346,8 @@ int cpu_exec(CPUState *env1)
                cs_base = env->segs[R_CS].base;
                pc = cs_base + env->eip;
 #elif defined(TARGET_ARM)
-                flags = env->thumb;
+                flags = env->thumb | (env->vfp.vec_len << 1)
+                        | (env->vfp.vec_stride << 4);
                cs_base = 0;
                pc = env->regs[15];
 #elif defined(TARGET_SPARC)
@@ -619,6 +620,7 @@ int cpu_exec(CPUState *env1)
 #endif
 #elif defined(TARGET_ARM)
    env->cpsr = compute_cpsr();
+    /* XXX: Save/restore host fpu exception state?.  */
 #elif defined(TARGET_SPARC)
 #elif defined(TARGET_PPC)
 #else

--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -29,6 +29,14 @@
 #define EXCP_PREFETCH_ABORT  3
 #define EXCP_DATA_ABORT      4

+/* We currently assume float and double are IEEE single and double
+   precision respectively.
+   Doing runtime conversions is tricky because VFP registers may contain
+   integer values (eg. as the result of a FTOSI instruction).
+   A double precision register load/store must also load/store the
+   corresponding single precision pair, although it is undefined how
+   these overlap.  */
+
 typedef struct CPUARMState {
    uint32_t regs[16];
    uint32_t cpsr;
@@ -50,6 +58,7 @@ typedef struct CPUARMState {
    int interrupt_request;
    struct TranslationBlock *current_tb;
    int user_mode_only;
+    uint32_t address;

    /* in order to avoid passing too many arguments to the memory
       write helpers, we store some rarely used information in the CPU
@@ -58,6 +67,25 @@ typedef struct CPUARMState {
                                   written */
    unsigned long mem_write_vaddr; /* target virtual addr at which the
                                      memory was written */
+    /* VFP coprocessor state.  */
+    struct {
+        union {
+            float s[32];
+            double d[16];
+        } regs;
+
+        /* We store these fpcsr fields separately for convenience.  */
+        int vec_len;
+        int vec_stride;
+
+        uint32_t fpscr;
+
+        /* Temporary variables if we don't have spare fp regs.  */
+        float tmp0s, tmp1s;
+        double tmp0d, tmp1d;
+
+    } vfp;
+
    /* user data */
    void *opaque;
 } CPUARMState;

--- a/target-arm/exec.h
+++ b/target-arm/exec.h
@@ -24,13 +24,16 @@ register uint32_t T0 asm(AREG1);
 register uint32_t T1 asm(AREG2);
 register uint32_t T2 asm(AREG3);

+/* TODO: Put these in FP regs on targets that have such things.  */
+/* It is ok for FT0s and FT0d to overlap.  Likewise FT1s and FT1d.  */
+#define FT0s env->vfp.tmp0s
+#define FT1s env->vfp.tmp1s
+#define FT0d env->vfp.tmp0d
+#define FT1d env->vfp.tmp1d
+
 #include "cpu.h"
 #include "exec-all.h"

-void cpu_lock(void);
-void cpu_unlock(void);
-void cpu_loop_exit(void);
-
 /* Implemented CPSR bits.  */
 #define CACHED_CPSR_BITS 0xf8000000
 static inline int compute_cpsr(void)
@@ -51,3 +54,24 @@ static inline void regs_to_env(void)

 int cpu_arm_handle_mmu_fault (CPUState *env, target_ulong address, int rw,
                              int is_user, int is_softmmu);
+
+/* In op_helper.c */
+
+void cpu_lock(void);
+void cpu_unlock(void);
+void cpu_loop_exit(void);
+
+void raise_exception(int);
+
+void do_vfp_abss(void);
+void do_vfp_absd(void);
+void do_vfp_negs(void);
+void do_vfp_negd(void);
+void do_vfp_sqrts(void);
+void do_vfp_sqrtd(void);
+void do_vfp_cmps(void);
+void do_vfp_cmpd(void);
+void do_vfp_cmpes(void);
+void do_vfp_cmped(void);
+void do_vfp_set_fpscr(void);
+void do_vfp_get_fpscr(void);
--- a/target-arm/op.c
+++ b/target-arm/op.c
@@ -2,6 +2,7 @@
 *  ARM micro operations
 * 
 *  Copyright (c) 2003 Fabrice Bellard
+ *  Copyright (c) 2005 CodeSourcery, LLC
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
@@ -857,17 +858,259 @@ void OPPROTO op_undef_insn(void)
    cpu_loop_exit();
 }

-/* thread support */
+/* VFP support.  We follow the convention used for VFP instrunctions:
+   Single precition routines have a "s" suffix, double precision a
+   "d" suffix.  */

-spinlock_t global_cpu_lock = SPIN_LOCK_UNLOCKED;
+#define VFP_OP(name, p) void OPPROTO op_vfp_##name##p(void)

-void cpu_lock(void)
+#define VFP_BINOP(name, op) \
+VFP_OP(name, s)             \
+{                           \
+    FT0s = FT0s op FT1s;    \
+}                           \
+VFP_OP(name, d)             \
+{                           \
+    FT0d = FT0d op FT1d;    \
+}
+VFP_BINOP(add, +)
+VFP_BINOP(sub, -)
+VFP_BINOP(mul, *)
+VFP_BINOP(div, /)
+#undef VFP_BINOP
+
+#define VFP_HELPER(name)  \
+VFP_OP(name, s)           \
+{                         \
+    do_vfp_##name##s();    \
+}                         \
+VFP_OP(name, d)           \
+{                         \
+    do_vfp_##name##d();    \
+}
+VFP_HELPER(abs)
+VFP_HELPER(sqrt)
+VFP_HELPER(cmp)
+VFP_HELPER(cmpe)
+#undef VFP_HELPER
+
+/* XXX: Will this do the right thing for NANs.  Should invert the signbit
+   without looking at the rest of the value.  */
+VFP_OP(neg, s)
+{
+    FT0s = -FT0s;
+}
+
+VFP_OP(neg, d)
+{
+    FT0d = -FT0d;
+}
+
+VFP_OP(F1_ld0, s)
+{
+    FT1s = 0.0f;
+}
+
+VFP_OP(F1_ld0, d)
+{
+    FT1d = 0.0;
+}
+
+/* Helper routines to perform bitwise copies between float and int.  */
+static inline float vfp_itos(uint32_t i)
+{
+    union {
+        uint32_t i;
+        float s;
+    } v;
+
+    v.i = i;
+    return v.s;
+}
+
+static inline uint32_t vfp_stoi(float s)
+{
+    union {
+        uint32_t i;
+        float s;
+    } v;
+
+    v.s = s;
+    return v.i;
+}
+
+/* Integer to float conversion.  */
+VFP_OP(uito, s)
+{
+    FT0s = (float)(uint32_t)vfp_stoi(FT0s);
+}
+
+VFP_OP(uito, d)
+{
+    FT0d = (double)(uint32_t)vfp_stoi(FT0s);
+}
+
+VFP_OP(sito, s)
+{
+    FT0s = (float)(int32_t)vfp_stoi(FT0s);
+}
+
+VFP_OP(sito, d)
+{
+    FT0d = (double)(int32_t)vfp_stoi(FT0s);
+}
+
+/* Float to integer conversion.  */
+VFP_OP(toui, s)
+{
+    FT0s = vfp_itos((uint32_t)FT0s);
+}
+
+VFP_OP(toui, d)
+{
+    FT0s = vfp_itos((uint32_t)FT0d);
+}
+
+VFP_OP(tosi, s)
+{
+    FT0s = vfp_itos((int32_t)FT0s);
+}
+
+VFP_OP(tosi, d)
+{
+    FT0s = vfp_itos((int32_t)FT0d);
+}
+
+/* TODO: Set rounding mode properly.  */
+VFP_OP(touiz, s)
+{
+    FT0s = vfp_itos((uint32_t)FT0s);
+}
+
+VFP_OP(touiz, d)
+{
+    FT0s = vfp_itos((uint32_t)FT0d);
+}
+
+VFP_OP(tosiz, s)
+{
+    FT0s = vfp_itos((int32_t)FT0s);
+}
+
+VFP_OP(tosiz, d)
 {
-    spin_lock(&global_cpu_lock);
+    FT0s = vfp_itos((int32_t)FT0d);
 }

-void cpu_unlock(void)
+/* floating point conversion */
+VFP_OP(fcvtd, s)
 {
-    spin_unlock(&global_cpu_lock);
+    FT0d = (double)FT0s;
 }

+VFP_OP(fcvts, d)
+{
+    FT0s = (float)FT0d;
+}
+
+/* Get and Put values from registers.  */
+VFP_OP(getreg_F0, d)
+{
+  FT0d = *(double *)((char *) env + PARAM1);
+}
+
+VFP_OP(getreg_F0, s)
+{
+  FT0s = *(float *)((char *) env + PARAM1);
+}
+
+VFP_OP(getreg_F1, d)
+{
+  FT1d = *(double *)((char *) env + PARAM1);
+}
+
+VFP_OP(getreg_F1, s)
+{
+  FT1s = *(float *)((char *) env + PARAM1);
+}
+
+VFP_OP(setreg_F0, d)
+{
+  *(double *)((char *) env + PARAM1) = FT0d;
+}
+
+VFP_OP(setreg_F0, s)
+{
+  *(float *)((char *) env + PARAM1) = FT0s;
+}
+
+VFP_OP(foobar, d)
+{
+  FT0d = env->vfp.regs.s[3];
+}
+
+void OPPROTO op_vfp_movl_T0_fpscr(void)
+{
+    do_vfp_get_fpscr ();
+}
+
+void OPPROTO op_vfp_movl_T0_fpscr_flags(void)
+{
+    T0 = env->vfp.fpscr & (0xf << 28);
+}
+
+void OPPROTO op_vfp_movl_fpscr_T0(void)
+{
+    do_vfp_set_fpscr();
+}
+
+/* Move between FT0s to T0  */
+void OPPROTO op_vfp_mrs(void)
+{
+    T0 = vfp_stoi(FT0s);
+}
+
+void OPPROTO op_vfp_msr(void)
+{
+    FT0s = vfp_itos(T0);
+}
+
+/* Move between FT0d and {T0,T1} */
+void OPPROTO op_vfp_mrrd(void)
+{
+    CPU_DoubleU u;
+    
+    u.d = FT0d;
+    T0 = u.l.lower;
+    T1 = u.l.upper;
+}
+
+void OPPROTO op_vfp_mdrr(void)
+{
+    CPU_DoubleU u;
+    
+    u.l.lower = T0;
+    u.l.upper = T1;
+    FT0d = u.d;
+}
+
+/* Floating point load/store.  Address is in T1 */
+void OPPROTO op_vfp_lds(void)
+{
+    FT0s = ldfl((void *)T1);
+}
+
+void OPPROTO op_vfp_ldd(void)
+{
+    FT0d = ldfq((void *)T1);
+}
+
+void OPPROTO op_vfp_sts(void)
+{
+    stfl((void *)T1, FT0s);
+}
+
+void OPPROTO op_vfp_std(void)
+{
+    stfq((void *)T1, FT0d);
+}
--- a/target-arm/op_helper.c
+++ b/target-arm/op_helper.c
+/*
+ *  ARM helper routines
+ * 
+ *  Copyright (c) 2005 CodeSourcery, LLC
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <math.h>
+#include <fenv.h>
+#include "exec.h"
+
+/* If the host doesn't define C99 math intrinsics then use the normal
+   operators.  This may generate excess exceptions, but it's probably
+   near enough for most things.  */
+#ifndef isless
+#define isless(x, y) (x < y)
+#endif
+#ifndef isgreater
+#define isgreater(x, y) (x > y)
+#endif
+#ifndef isunordered
+#define isunordered(x, y) (!((x < y) || (x >= y)))
+#endif
+
+void raise_exception(int tt)
+{
+    env->exception_index = tt;
+    cpu_loop_exit();
+}
+
+/* thread support */
+
+spinlock_t global_cpu_lock = SPIN_LOCK_UNLOCKED;
+
+void cpu_lock(void)
+{
+    spin_lock(&global_cpu_lock);
+}
+
+void cpu_unlock(void)
+{
+    spin_unlock(&global_cpu_lock);
+}
+
+/* VFP support.  */
+
+void do_vfp_abss(void)
+{
+  FT0s = fabsf(FT0s);
+}
+
+void do_vfp_absd(void)
+{
+  FT0d = fabs(FT0d);
+}
+
+void do_vfp_sqrts(void)
+{
+  FT0s = sqrtf(FT0s);
+}
+
+void do_vfp_sqrtd(void)
+{
+  FT0d = sqrt(FT0d);
+}
+
+/* We use an == operator first to generate teh correct floating point
+   exception.  Subsequent comparisons use the exception-safe macros.  */
+#define DO_VFP_cmp(p)                     \
+void do_vfp_cmp##p(void)                  \
+{                                         \
+    uint32_t flags;                       \
+    if (FT0##p == FT1##p)                 \
+        flags = 0xc;                      \
+    else if (isless (FT0##p, FT1##p))     \
+        flags = 0x8;                      \
+    else if (isgreater (FT0##p, FT1##p))  \
+        flags = 0x2;                      \
+    else /* unordered */                  \
+        flags = 0x3;                      \
+    env->vfp.fpscr = (flags << 28) | (env->vfp.fpscr & 0x0fffffff); \
+    FORCE_RET();                          \
+}
+DO_VFP_cmp(s)
+DO_VFP_cmp(d)
+#undef DO_VFP_cmp
+
+/* We use a > operator first to get FP exceptions right.  */
+#define DO_VFP_cmpe(p)                      \
+void do_vfp_cmpe##p(void)                   \
+{                                           \
+    uint32_t flags;                         \
+    if (FT0##p > FT1##p)                    \
+        flags = 0x2;                        \
+    else if (isless (FT0##p, FT1##p))       \
+        flags = 0x8;                        \
+    else if (isunordered (FT0##p, FT1##p))  \
+        flags = 0x3;                        \
+    else /* equal */                        \
+        flags = 0xc;                        \
+    env->vfp.fpscr = (flags << 28) | (env->vfp.fpscr & 0x0fffffff); \
+    FORCE_RET();                            \
+}
+DO_VFP_cmpe(s)
+DO_VFP_cmpe(d)
+#undef DO_VFP_cmpe
+
+/* Convert host exception flags to vfp form.  */
+int vfp_exceptbits_from_host(int host_bits)
+{
+    int target_bits = 0;
+
+#ifdef FE_INVALID
+    if (host_bits & FE_INVALID)
+        target_bits |= 1;
+#endif
+#ifdef FE_DIVBYZERO
+    if (host_bits & FE_DIVBYZERO)
+        target_bits |= 2;
+#endif
+#ifdef FE_OVERFLOW
+    if (host_bits & FE_OVERFLOW)
+        target_bits |= 4;
+#endif
+#ifdef FE_UNDERFLOW
+    if (host_bits & FE_UNDERFLOW)
+        target_bits |= 8;
+#endif
+#ifdef FE_INEXACT
+    if (host_bits & FE_INEXACT)
+        target_bits |= 0x10;
+#endif
+    /* C doesn't define an inexact exception.  */
+    return target_bits;
+}
+
+/* Convert vfp exception flags to target form.  */
+int vfp_host_exceptbits_to_host(int target_bits)
+{
+    int host_bits = 0;
+
+#ifdef FE_INVALID
+    if (target_bits & 1)
+        host_bits |= FE_INVALID;
+#endif
+#ifdef FE_DIVBYZERO
+    if (target_bits & 2)
+        host_bits |= FE_DIVBYZERO;
+#endif
+#ifdef FE_OVERFLOW
+    if (target_bits & 4)
+        host_bits |= FE_OVERFLOW;
+#endif
+#ifdef FE_UNDERFLOW
+    if (target_bits & 8)
+        host_bits |= FE_UNDERFLOW;
+#endif
+#ifdef FE_INEXACT
+    if (target_bits & 0x10)
+        host_bits |= FE_INEXACT;
+#endif
+    return host_bits;
+}
+
+void do_vfp_set_fpscr(void)
+{
+    int i;
+    uint32_t changed;
+
+    changed = env->vfp.fpscr;
+    env->vfp.fpscr = (T0 & 0xffc8ffff);
+    env->vfp.vec_len = (T0 >> 16) & 7;
+    env->vfp.vec_stride = (T0 >> 20) & 3;
+
+    changed ^= T0;
+    if (changed & (3 << 22)) {
+        i = (T0 >> 22) & 3;
+        switch (i) {
+        case 0:
+            i = FE_TONEAREST;
+            break;
+        case 1:
+            i = FE_UPWARD;
+            break;
+        case 2:
+            i = FE_DOWNWARD;
+            break;
+        case 3:
+            i = FE_TOWARDZERO;
+            break;
+        }
+        fesetround (i);
+    }
+
+    /* Clear host exception flags.  */
+    feclearexcept(FE_ALL_EXCEPT);
+
+#ifdef feenableexcept
+    if (changed & 0x1f00) {
+        i = vfp_exceptbits_to_host((T0 >> 8) & 0x1f);
+        feenableexcept (i);
+        fedisableexcept (FE_ALL_EXCEPT & ~i);
+    }
+#endif
+    /* XXX: FZ and DN are not implemented.  */
+}
+
+void do_vfp_get_fpscr(void)
+{
+    int i;
+
+    T0 = (env->vfp.fpscr & 0xffc8ffff) | (env->vfp.vec_len << 16)
+          | (env->vfp.vec_stride << 20);
+    i = fetestexcept(FE_ALL_EXCEPT);
+    T0 |= vfp_exceptbits_from_host(i);
+}
--- a/target-arm/translate.c
+++ b/target-arm/translate.c