From 34b1a49cb11adf2a544e5737c1ea87499dd76d6c Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 4 Mar 2014 13:39:48 -0800 Subject: [PATCH] tcg-sparc: Use 64-bit registers with sparcv8plus Quite a lot of effort was spent composing and decomposing 64-bit quantities in registers, when we should just create them and leave them as one 64-bit register. Signed-off-by: Richard Henderson --- tcg/sparc/tcg-target.c | 525 ++++++++++++++--------------------------- tcg/sparc/tcg-target.h | 14 +- tcg/tcg.c | 80 ++++++- 3 files changed, 258 insertions(+), 361 deletions(-) diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c index 4287bbefa6..86229d5d70 100644 --- a/tcg/sparc/tcg-target.c +++ b/tcg/sparc/tcg-target.c @@ -67,6 +67,18 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { # define SPARC64 0 #endif +/* Note that sparcv8plus can only hold 64 bit quantities in %g and %o + registers. These are saved manually by the kernel in full 64-bit + slots. The %i and %l registers are saved by the register window + mechanism, which only allocates space for 32 bits. Given that this + window spill/fill can happen on any signal, we must consider the + high bits of the %i and %l registers garbage at all times. */ +#if SPARC64 +# define ALL_64 0xffffffffu +#else +# define ALL_64 0xffffu +#endif + /* Define some temporary registers. T2 is used for constant generation. */ #define TCG_REG_T1 TCG_REG_G1 #define TCG_REG_T2 TCG_REG_O7 @@ -307,14 +319,27 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) ct->ct |= TCG_CT_REG; tcg_regset_set32(ct->u.regs, 0, 0xffffffff); break; - case 'L': /* qemu_ld/st constraint */ + case 'R': ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, 0xffffffff); - // Helper args + tcg_regset_set32(ct->u.regs, 0, ALL_64); + break; + case 'A': /* qemu_ld/st address constraint */ + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, + TARGET_LONG_BITS == 64 ? ALL_64 : 0xffffffff); + reserve_helpers: tcg_regset_reset_reg(ct->u.regs, TCG_REG_O0); tcg_regset_reset_reg(ct->u.regs, TCG_REG_O1); tcg_regset_reset_reg(ct->u.regs, TCG_REG_O2); break; + case 's': /* qemu_st data 32-bit constraint */ + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, 0xffffffff); + goto reserve_helpers; + case 'S': /* qemu_st data 64-bit constraint */ + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, ALL_64); + goto reserve_helpers; case 'I': ct->ct |= TCG_CT_CONST_S11; break; @@ -408,7 +433,7 @@ static void tcg_out_movi(TCGContext *s, TCGType type, } /* A 32-bit constant, or 32-bit zero-extended to 64-bits. */ - if (type == TCG_TYPE_I32 || (arg & ~0xffffffffu) == 0) { + if (type == TCG_TYPE_I32 || arg == (uint32_t)arg) { tcg_out_sethi(s, ret, arg); if (arg & 0x3ff) { tcg_out_arithi(s, ret, ret, arg & 0x3ff, ARITH_OR); @@ -426,12 +451,12 @@ static void tcg_out_movi(TCGContext *s, TCGType type, /* A 64-bit constant decomposed into 2 32-bit pieces. */ lo = (int32_t)arg; if (check_fit_tl(lo, 13)) { - hi = (arg - lo) >> 31 >> 1; + hi = (arg - lo) >> 32; tcg_out_movi(s, TCG_TYPE_I32, ret, hi); tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX); tcg_out_arithi(s, ret, ret, lo, ARITH_ADD); } else { - hi = arg >> 31 >> 1; + hi = arg >> 32; tcg_out_movi(s, TCG_TYPE_I32, ret, hi); tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T2, lo); tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX); @@ -592,7 +617,6 @@ static void tcg_out_movcond_i32(TCGContext *s, TCGCond cond, TCGArg ret, tcg_out_movcc(s, cond, MOVCC_ICC, ret, v1, v1const); } -#if SPARC64 static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond, TCGArg arg1, TCGArg arg2, int const_arg2, int label) { @@ -640,45 +664,6 @@ static void tcg_out_movcond_i64(TCGContext *s, TCGCond cond, TCGArg ret, tcg_out_movcc(s, cond, MOVCC_XCC, ret, v1, v1const); } } -#else -static void tcg_out_brcond2_i32(TCGContext *s, TCGCond cond, - TCGArg al, TCGArg ah, - TCGArg bl, int blconst, - TCGArg bh, int bhconst, int label_dest) -{ - int scond, label_next = gen_new_label(); - - tcg_out_cmp(s, ah, bh, bhconst); - - /* Note that we fill one of the delay slots with the second compare. */ - switch (cond) { - case TCG_COND_EQ: - tcg_out_bpcc(s, COND_NE, BPCC_ICC | BPCC_PT, label_next); - tcg_out_cmp(s, al, bl, blconst); - tcg_out_bpcc(s, COND_E, BPCC_ICC | BPCC_PT, label_dest); - break; - - case TCG_COND_NE: - tcg_out_bpcc(s, COND_NE, BPCC_ICC | BPCC_PT, label_dest); - tcg_out_cmp(s, al, bl, blconst); - tcg_out_bpcc(s, COND_NE, BPCC_ICC | BPCC_PT, label_dest); - break; - - default: - scond = tcg_cond_to_bcond[tcg_high_cond(cond)]; - tcg_out_bpcc(s, scond, BPCC_ICC | BPCC_PT, label_dest); - tcg_out_nop(s); - tcg_out_bpcc(s, COND_NE, BPCC_ICC | BPCC_PT, label_next); - tcg_out_cmp(s, al, bl, blconst); - scond = tcg_cond_to_bcond[tcg_unsigned_cond(cond)]; - tcg_out_bpcc(s, scond, BPCC_ICC | BPCC_PT, label_dest); - break; - } - tcg_out_nop(s); - - tcg_out_label(s, label_next, s->code_ptr); -} -#endif static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGArg ret, TCGArg c1, TCGArg c2, int c2const) @@ -730,7 +715,6 @@ static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGArg ret, } } -#if SPARC64 static void tcg_out_setcond_i64(TCGContext *s, TCGCond cond, TCGArg ret, TCGArg c1, TCGArg c2, int c2const) { @@ -745,48 +729,6 @@ static void tcg_out_setcond_i64(TCGContext *s, TCGCond cond, TCGArg ret, tcg_out_movcc(s, cond, MOVCC_XCC, ret, 1, 1); } } -#else -static void tcg_out_setcond2_i32(TCGContext *s, TCGCond cond, TCGArg ret, - TCGArg al, TCGArg ah, - TCGArg bl, int blconst, - TCGArg bh, int bhconst) -{ - int tmp = TCG_REG_T1; - - /* Note that the low parts are fully consumed before tmp is set. */ - if (ret != ah && (bhconst || ret != bh)) { - tmp = ret; - } - - switch (cond) { - case TCG_COND_EQ: - case TCG_COND_NE: - if (bl == 0 && bh == 0) { - if (cond == TCG_COND_EQ) { - tcg_out_arith(s, TCG_REG_G0, al, ah, ARITH_ORCC); - tcg_out_movi(s, TCG_TYPE_I32, ret, 1); - } else { - tcg_out_arith(s, ret, al, ah, ARITH_ORCC); - } - } else { - tcg_out_setcond_i32(s, cond, tmp, al, bl, blconst); - tcg_out_cmp(s, ah, bh, bhconst); - tcg_out_mov(s, TCG_TYPE_I32, ret, tmp); - } - tcg_out_movcc(s, TCG_COND_NE, MOVCC_ICC, ret, cond == TCG_COND_NE, 1); - break; - - default: - /* <= : ah < bh | (ah == bh && al <= bl) */ - tcg_out_setcond_i32(s, tcg_unsigned_cond(cond), tmp, al, bl, blconst); - tcg_out_cmp(s, ah, bh, bhconst); - tcg_out_mov(s, TCG_TYPE_I32, ret, tmp); - tcg_out_movcc(s, TCG_COND_NE, MOVCC_ICC, ret, 0, 1); - tcg_out_movcc(s, tcg_high_cond(cond), MOVCC_ICC, ret, 1, 1); - break; - } -} -#endif static void tcg_out_addsub2(TCGContext *s, TCGArg rl, TCGArg rh, TCGArg al, TCGArg ah, TCGArg bl, int blconst, @@ -861,8 +803,13 @@ static void build_trampolines(TCGContext *s) } qemu_ld_trampoline[i] = tramp; - /* Find the retaddr argument register. */ - ra = TCG_REG_O3 + (!SPARC64 && TARGET_LONG_BITS == 64); + if (SPARC64 || TARGET_LONG_BITS == 32) { + ra = TCG_REG_O3; + } else { + /* Install the high part of the address. */ + tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O2, 32, SHIFT_SRLX); + ra = TCG_REG_O4; + } /* Set the retaddr operand. */ tcg_out_mov(s, TCG_TYPE_PTR, ra, TCG_REG_O7); @@ -886,12 +833,28 @@ static void build_trampolines(TCGContext *s) } qemu_st_trampoline[i] = tramp; - /* Find the retaddr argument. For 32-bit, this may be past the - last argument register, and need passing on the stack. */ - ra = (TCG_REG_O4 - + (!SPARC64 && TARGET_LONG_BITS == 64) - + (!SPARC64 && (i & MO_SIZE) == MO_64)); - + if (SPARC64) { + ra = TCG_REG_O4; + } else { + ra = TCG_REG_O1; + if (TARGET_LONG_BITS == 64) { + /* Install the high part of the address. */ + tcg_out_arithi(s, ra, ra + 1, 32, SHIFT_SRLX); + ra += 2; + } else { + ra += 1; + } + if ((i & MO_SIZE) == MO_64) { + /* Install the high part of the data. */ + tcg_out_arithi(s, ra, ra + 1, 32, SHIFT_SRLX); + ra += 2; + } else { + ra += 1; + } + /* Skip the mem_index argument. */ + ra += 1; + } + /* Set the retaddr operand. */ if (ra >= TCG_REG_O6) { tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_O7, TCG_REG_CALL_STACK, @@ -960,25 +923,16 @@ static void tcg_target_qemu_prologue(TCGContext *s) The result of the TLB comparison is in %[ix]cc. The sanitized address is in the returned register, maybe %o0. The TLB addend is in %o1. */ -static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, - int mem_index, TCGMemOp s_bits, int which) +static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, int mem_index, + TCGMemOp s_bits, int which) { const TCGReg r0 = TCG_REG_O0; const TCGReg r1 = TCG_REG_O1; const TCGReg r2 = TCG_REG_O2; - TCGReg addr = addrlo; int tlb_ofs; - if (!SPARC64 && TARGET_LONG_BITS == 64) { - /* Assemble the 64-bit address in R0. */ - tcg_out_arithi(s, r0, addrlo, 0, SHIFT_SRL); - tcg_out_arithi(s, r1, addrhi, 32, SHIFT_SLLX); - tcg_out_arith(s, r0, r0, r1, ARITH_OR); - addr = r0; - } - /* Shift the page number down. */ - tcg_out_arithi(s, r1, addrlo, TARGET_PAGE_BITS, SHIFT_SRL); + tcg_out_arithi(s, r1, addr, TARGET_PAGE_BITS, SHIFT_SRL); /* Mask out the page offset, except for the required alignment. */ tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_T1, @@ -1012,10 +966,10 @@ static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, /* If the guest address must be zero-extended, do so now. */ if (SPARC64 && TARGET_LONG_BITS == 32) { - tcg_out_arithi(s, r0, addrlo, 0, SHIFT_SRL); + tcg_out_arithi(s, r0, addr, 0, SHIFT_SRL); return r0; } - return addrlo; + return addr; } #endif /* CONFIG_SOFTMMU */ @@ -1048,78 +1002,37 @@ static const int qemu_st_opc[16] = { [MO_LEQ] = STX_LE, }; -static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) +static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr, + TCGMemOp memop, int memi, bool is_64) { - TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused)); - TCGMemOp memop, s_bits; -#if defined(CONFIG_SOFTMMU) +#ifdef CONFIG_SOFTMMU + TCGMemOp s_bits = memop & MO_SIZE; TCGReg addrz, param; uintptr_t func; - int memi; - uint32_t *label_ptr[2]; -#endif - - datalo = *args++; - datahi = (!SPARC64 && is64 ? *args++ : 0); - addrlo = *args++; - addrhi = (!SPARC64 && TARGET_LONG_BITS == 64 ? *args++ : 0); - memop = *args++; - s_bits = memop & MO_SIZE; + uint32_t *label_ptr; -#if defined(CONFIG_SOFTMMU) - memi = *args++; - addrz = tcg_out_tlb_load(s, addrlo, addrhi, memi, s_bits, + addrz = tcg_out_tlb_load(s, addr, memi, s_bits, offsetof(CPUTLBEntry, addr_read)); - if (!SPARC64 && s_bits == MO_64) { - int reg64; - - /* bne,pn %[xi]cc, label0 */ - label_ptr[0] = (uint32_t *)s->code_ptr; - tcg_out_bpcc0(s, COND_NE, BPCC_PN - | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0); - tcg_out_nop(s); - - /* TLB Hit. */ - /* Load all 64-bits into an O/G register. */ - reg64 = (datalo < 16 ? datalo : TCG_REG_O0); - tcg_out_ldst_rr(s, reg64, addrz, TCG_REG_O1, qemu_ld_opc[memop]); + /* The fast path is exactly one insn. Thus we can perform the + entire TLB Hit in the (annulled) delay slot of the branch + over the TLB Miss case. */ - /* Move the two 32-bit pieces into the destination registers. */ - tcg_out_arithi(s, datahi, reg64, 32, SHIFT_SRLX); - if (reg64 != datalo) { - tcg_out_mov(s, TCG_TYPE_I32, datalo, reg64); - } - - /* b,a,pt label1 */ - label_ptr[1] = (uint32_t *)s->code_ptr; - tcg_out_bpcc0(s, COND_A, BPCC_A | BPCC_PT, 0); - } else { - /* The fast path is exactly one insn. Thus we can perform the - entire TLB Hit in the (annulled) delay slot of the branch - over the TLB Miss case. */ - - /* beq,a,pt %[xi]cc, label0 */ - label_ptr[0] = NULL; - label_ptr[1] = (uint32_t *)s->code_ptr; - tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT - | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0); - /* delay slot */ - tcg_out_ldst_rr(s, datalo, addrz, TCG_REG_O1, qemu_ld_opc[memop]); - } + /* beq,a,pt %[xi]cc, label0 */ + label_ptr = (uint32_t *)s->code_ptr; + tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT + | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0); + /* delay slot */ + tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1, qemu_ld_opc[memop]); /* TLB Miss. */ - if (label_ptr[0]) { - *label_ptr[0] |= INSN_OFF19((unsigned long)s->code_ptr - - (unsigned long)label_ptr[0]); - } - param = TCG_REG_O1; - if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { - tcg_out_mov(s, TCG_TYPE_REG, param++, addrhi); + if (!SPARC64 && TARGET_LONG_BITS == 64) { + /* Skip the high-part; we'll perform the extract in the trampoline. */ + param++; } - tcg_out_mov(s, TCG_TYPE_REG, param++, addrlo); + tcg_out_mov(s, TCG_TYPE_REG, param++, addr); /* We use the helpers to extend SB and SW data, leaving the case of SL needing explicit extending below. */ @@ -1133,81 +1046,54 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) /* delay slot */ tcg_out_movi(s, TCG_TYPE_I32, param, memi); - switch (memop & ~MO_BSWAP) { - case MO_SL: - tcg_out_arithi(s, datalo, TCG_REG_O0, 0, SHIFT_SRA); - break; - case MO_Q: - if (TCG_TARGET_REG_BITS == 32) { - tcg_out_mov(s, TCG_TYPE_REG, datahi, TCG_REG_O0); - tcg_out_mov(s, TCG_TYPE_REG, datalo, TCG_REG_O1); - break; + /* Recall that all of the helpers return 64-bit results. + Which complicates things for sparcv8plus. */ + if (SPARC64) { + /* We let the helper sign-extend SB and SW, but leave SL for here. */ + if (is_64 && (memop & ~MO_BSWAP) == MO_SL) { + tcg_out_arithi(s, data, TCG_REG_O0, 0, SHIFT_SRA); + } else { + tcg_out_mov(s, TCG_TYPE_REG, data, TCG_REG_O0); + } + } else { + if (s_bits == MO_64) { + tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0, 32, SHIFT_SLLX); + tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O1, 0, SHIFT_SRL); + tcg_out_arith(s, data, TCG_REG_O0, TCG_REG_O1, ARITH_OR); + } else if (is_64) { + /* Re-extend from 32-bit rather than reassembling when we + know the high register must be an extension. */ + tcg_out_arithi(s, data, TCG_REG_O1, 0, + memop & MO_SIGN ? SHIFT_SRA : SHIFT_SRL); + } else { + tcg_out_mov(s, TCG_TYPE_I32, data, TCG_REG_O1); } - /* FALLTHRU */ - default: - /* mov */ - tcg_out_mov(s, TCG_TYPE_REG, datalo, TCG_REG_O0); - break; } - *label_ptr[1] |= INSN_OFF19((unsigned long)s->code_ptr - - (unsigned long)label_ptr[1]); + *label_ptr |= INSN_OFF19((uintptr_t)s->code_ptr - (uintptr_t)label_ptr); #else if (SPARC64 && TARGET_LONG_BITS == 32) { - tcg_out_arithi(s, TCG_REG_T1, addrlo, 0, SHIFT_SRL); - addrlo = TCG_REG_T1; - } - if (!SPARC64 && s_bits == MO_64) { - int reg64 = (datalo < 16 ? datalo : TCG_REG_O0); - - tcg_out_ldst_rr(s, reg64, addrlo, - (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0), - qemu_ld_opc[memop]); - - tcg_out_arithi(s, datahi, reg64, 32, SHIFT_SRLX); - if (reg64 != datalo) { - tcg_out_mov(s, TCG_TYPE_I32, datalo, reg64); - } - } else { - tcg_out_ldst_rr(s, datalo, addrlo, - (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0), - qemu_ld_opc[memop]); + tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL); + addr = TCG_REG_T1; } + tcg_out_ldst_rr(s, data, addr, + (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0), + qemu_ld_opc[memop]); #endif /* CONFIG_SOFTMMU */ } -static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) +static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr, + TCGMemOp memop, int memi) { - TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused)); - TCGMemOp memop, s_bits; -#if defined(CONFIG_SOFTMMU) - TCGReg addrz, datafull, param; +#ifdef CONFIG_SOFTMMU + TCGMemOp s_bits = memop & MO_SIZE; + TCGReg addrz, param; uintptr_t func; - int memi; uint32_t *label_ptr; -#endif - - datalo = *args++; - datahi = (!SPARC64 && is64 ? *args++ : 0); - addrlo = *args++; - addrhi = (!SPARC64 && TARGET_LONG_BITS == 64 ? *args++ : 0); - memop = *args++; - s_bits = memop & MO_SIZE; -#if defined(CONFIG_SOFTMMU) - memi = *args++; - addrz = tcg_out_tlb_load(s, addrlo, addrhi, memi, s_bits, + addrz = tcg_out_tlb_load(s, addr, memi, s_bits, offsetof(CPUTLBEntry, addr_write)); - datafull = datalo; - if (!SPARC64 && s_bits == MO_64) { - /* Reconstruct the full 64-bit value. */ - tcg_out_arithi(s, TCG_REG_T1, datalo, 0, SHIFT_SRL); - tcg_out_arithi(s, TCG_REG_O2, datahi, 32, SHIFT_SLLX); - tcg_out_arith(s, TCG_REG_O2, TCG_REG_T1, TCG_REG_O2, ARITH_OR); - datafull = TCG_REG_O2; - } - /* The fast path is exactly one insn. Thus we can perform the entire TLB Hit in the (annulled) delay slot of the branch over TLB Miss. */ /* beq,a,pt %[xi]cc, label0 */ @@ -1215,19 +1101,21 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0); /* delay slot */ - tcg_out_ldst_rr(s, datafull, addrz, TCG_REG_O1, qemu_st_opc[memop]); + tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1, qemu_st_opc[memop]); /* TLB Miss. */ param = TCG_REG_O1; - if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { - tcg_out_mov(s, TCG_TYPE_REG, param++, addrhi); + if (!SPARC64 && TARGET_LONG_BITS == 64) { + /* Skip the high-part; we'll perform the extract in the trampoline. */ + param++; } - tcg_out_mov(s, TCG_TYPE_REG, param++, addrlo); - if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) { - tcg_out_mov(s, TCG_TYPE_REG, param++, datahi); + tcg_out_mov(s, TCG_TYPE_REG, param++, addr); + if (!SPARC64 && s_bits == MO_64) { + /* Skip the high-part; we'll perform the extract in the trampoline. */ + param++; } - tcg_out_mov(s, TCG_TYPE_REG, param++, datalo); + tcg_out_mov(s, TCG_TYPE_REG, param++, data); func = qemu_st_trampoline[memop]; assert(func != 0); @@ -1235,20 +1123,13 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) /* delay slot */ tcg_out_movi(s, TCG_TYPE_REG, param, memi); - *label_ptr |= INSN_OFF19((unsigned long)s->code_ptr - - (unsigned long)label_ptr); + *label_ptr |= INSN_OFF19((uintptr_t)s->code_ptr - (uintptr_t)label_ptr); #else if (SPARC64 && TARGET_LONG_BITS == 32) { - tcg_out_arithi(s, TCG_REG_T1, addrlo, 0, SHIFT_SRL); - addrlo = TCG_REG_T1; - } - if (!SPARC64 && s_bits == MO_64) { - tcg_out_arithi(s, TCG_REG_T1, datalo, 0, SHIFT_SRL); - tcg_out_arithi(s, TCG_REG_O2, datahi, 32, SHIFT_SLLX); - tcg_out_arith(s, TCG_REG_O2, TCG_REG_T1, TCG_REG_O2, ARITH_OR); - datalo = TCG_REG_O2; + tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL); + addr = TCG_REG_T1; } - tcg_out_ldst_rr(s, datalo, addrlo, + tcg_out_ldst_rr(s, data, addr, (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0), qemu_st_opc[memop]); #endif /* CONFIG_SOFTMMU */ @@ -1298,14 +1179,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, tcg_out_movi(s, TCG_TYPE_I32, args[0], (uint32_t)args[1]); break; -#if SPARC64 #define OP_32_64(x) \ glue(glue(case INDEX_op_, x), _i32): \ glue(glue(case INDEX_op_, x), _i64) -#else -#define OP_32_64(x) \ - glue(glue(case INDEX_op_, x), _i32) -#endif + OP_32_64(ld8u): tcg_out_ldst(s, args[0], args[1], args[2], LDUB); break; @@ -1319,9 +1196,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, tcg_out_ldst(s, args[0], args[1], args[2], LDSH); break; case INDEX_op_ld_i32: -#if SPARC64 case INDEX_op_ld32u_i64: -#endif tcg_out_ldst(s, args[0], args[1], args[2], LDUW); break; OP_32_64(st8): @@ -1331,9 +1206,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, tcg_out_ldst(s, args[0], args[1], args[2], STH); break; case INDEX_op_st_i32: -#if SPARC64 case INDEX_op_st32_i64: -#endif tcg_out_ldst(s, args[0], args[1], args[2], STW); break; OP_32_64(add): @@ -1400,19 +1273,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, args[2], const_args[2], args[3], const_args[3]); break; -#if !SPARC64 - case INDEX_op_brcond2_i32: - tcg_out_brcond2_i32(s, args[4], args[0], args[1], - args[2], const_args[2], - args[3], const_args[3], args[5]); - break; - case INDEX_op_setcond2_i32: - tcg_out_setcond2_i32(s, args[5], args[0], args[1], args[2], - args[3], const_args[3], - args[4], const_args[4]); - break; -#endif - case INDEX_op_add2_i32: tcg_out_addsub2(s, args[0], args[1], args[2], args[3], args[4], const_args[4], args[5], const_args[5], @@ -1430,19 +1290,18 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, break; case INDEX_op_qemu_ld_i32: - tcg_out_qemu_ld(s, args, 0); + tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3], false); break; case INDEX_op_qemu_ld_i64: - tcg_out_qemu_ld(s, args, 1); + tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3], true); break; case INDEX_op_qemu_st_i32: - tcg_out_qemu_st(s, args, 0); + tcg_out_qemu_st(s, args[0], args[1], args[2], args[3]); break; case INDEX_op_qemu_st_i64: - tcg_out_qemu_st(s, args, 1); + tcg_out_qemu_st(s, args[0], args[1], args[2], args[3]); break; -#if SPARC64 case INDEX_op_movi_i64: tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]); break; @@ -1502,7 +1361,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, tcg_out_movcond_i64(s, args[5], args[0], args[1], args[2], const_args[2], args[3], const_args[3]); break; -#endif + gen_arith: tcg_out_arithc(s, args[0], args[1], args[2], const_args[2], c); break; @@ -1556,73 +1415,54 @@ static const TCGTargetOpDef sparc_op_defs[] = { { INDEX_op_setcond_i32, { "r", "rZ", "rJ" } }, { INDEX_op_movcond_i32, { "r", "rZ", "rJ", "rI", "0" } }, -#if !SPARC64 - { INDEX_op_brcond2_i32, { "rZ", "rZ", "rJ", "rJ" } }, - { INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rJ", "rJ" } }, -#endif - { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } }, { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } }, { INDEX_op_mulu2_i32, { "r", "r", "rZ", "rJ" } }, -#if SPARC64 - { INDEX_op_mov_i64, { "r", "r" } }, - { INDEX_op_movi_i64, { "r" } }, - { INDEX_op_ld8u_i64, { "r", "r" } }, - { INDEX_op_ld8s_i64, { "r", "r" } }, - { INDEX_op_ld16u_i64, { "r", "r" } }, - { INDEX_op_ld16s_i64, { "r", "r" } }, - { INDEX_op_ld32u_i64, { "r", "r" } }, - { INDEX_op_ld32s_i64, { "r", "r" } }, - { INDEX_op_ld_i64, { "r", "r" } }, - { INDEX_op_st8_i64, { "rZ", "r" } }, - { INDEX_op_st16_i64, { "rZ", "r" } }, - { INDEX_op_st32_i64, { "rZ", "r" } }, - { INDEX_op_st_i64, { "rZ", "r" } }, - - { INDEX_op_add_i64, { "r", "rZ", "rJ" } }, - { INDEX_op_mul_i64, { "r", "rZ", "rJ" } }, - { INDEX_op_div_i64, { "r", "rZ", "rJ" } }, - { INDEX_op_divu_i64, { "r", "rZ", "rJ" } }, - { INDEX_op_sub_i64, { "r", "rZ", "rJ" } }, - { INDEX_op_and_i64, { "r", "rZ", "rJ" } }, - { INDEX_op_andc_i64, { "r", "rZ", "rJ" } }, - { INDEX_op_or_i64, { "r", "rZ", "rJ" } }, - { INDEX_op_orc_i64, { "r", "rZ", "rJ" } }, - { INDEX_op_xor_i64, { "r", "rZ", "rJ" } }, - - { INDEX_op_shl_i64, { "r", "rZ", "rJ" } }, - { INDEX_op_shr_i64, { "r", "rZ", "rJ" } }, - { INDEX_op_sar_i64, { "r", "rZ", "rJ" } }, - - { INDEX_op_neg_i64, { "r", "rJ" } }, - { INDEX_op_not_i64, { "r", "rJ" } }, - - { INDEX_op_ext32s_i64, { "r", "r" } }, - { INDEX_op_ext32u_i64, { "r", "r" } }, - { INDEX_op_trunc_shr_i32, { "r", "r" } }, - - { INDEX_op_brcond_i64, { "rZ", "rJ" } }, - { INDEX_op_setcond_i64, { "r", "rZ", "rJ" } }, - { INDEX_op_movcond_i64, { "r", "rZ", "rJ", "rI", "0" } }, -#endif - -#if SPARC64 - { INDEX_op_qemu_ld_i32, { "r", "L" } }, - { INDEX_op_qemu_ld_i64, { "r", "L" } }, - { INDEX_op_qemu_st_i32, { "L", "L" } }, - { INDEX_op_qemu_st_i64, { "L", "L" } }, -#elif TARGET_LONG_BITS == 32 - { INDEX_op_qemu_ld_i32, { "r", "L" } }, - { INDEX_op_qemu_ld_i64, { "r", "r", "L" } }, - { INDEX_op_qemu_st_i32, { "L", "L" } }, - { INDEX_op_qemu_st_i64, { "L", "L", "L" } }, -#else - { INDEX_op_qemu_ld_i32, { "r", "L", "L" } }, - { INDEX_op_qemu_ld_i64, { "L", "L", "L", "L" } }, - { INDEX_op_qemu_st_i32, { "L", "L", "L" } }, - { INDEX_op_qemu_st_i64, { "L", "L", "L", "L" } }, -#endif + { INDEX_op_mov_i64, { "R", "R" } }, + { INDEX_op_movi_i64, { "R" } }, + { INDEX_op_ld8u_i64, { "R", "r" } }, + { INDEX_op_ld8s_i64, { "R", "r" } }, + { INDEX_op_ld16u_i64, { "R", "r" } }, + { INDEX_op_ld16s_i64, { "R", "r" } }, + { INDEX_op_ld32u_i64, { "R", "r" } }, + { INDEX_op_ld32s_i64, { "R", "r" } }, + { INDEX_op_ld_i64, { "R", "r" } }, + { INDEX_op_st8_i64, { "RZ", "r" } }, + { INDEX_op_st16_i64, { "RZ", "r" } }, + { INDEX_op_st32_i64, { "RZ", "r" } }, + { INDEX_op_st_i64, { "RZ", "r" } }, + + { INDEX_op_add_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_mul_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_div_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_divu_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_sub_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_and_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_andc_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_or_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_orc_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_xor_i64, { "R", "RZ", "RJ" } }, + + { INDEX_op_shl_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_shr_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_sar_i64, { "R", "RZ", "RJ" } }, + + { INDEX_op_neg_i64, { "R", "RJ" } }, + { INDEX_op_not_i64, { "R", "RJ" } }, + + { INDEX_op_ext32s_i64, { "R", "r" } }, + { INDEX_op_ext32u_i64, { "R", "r" } }, + { INDEX_op_trunc_shr_i32, { "r", "R" } }, + + { INDEX_op_brcond_i64, { "RZ", "RJ" } }, + { INDEX_op_setcond_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_movcond_i64, { "R", "RZ", "RJ", "RI", "0" } }, + + { INDEX_op_qemu_ld_i32, { "r", "A" } }, + { INDEX_op_qemu_ld_i64, { "R", "A" } }, + { INDEX_op_qemu_st_i32, { "s", "A" } }, + { INDEX_op_qemu_st_i64, { "S", "A" } }, { -1 }, }; @@ -1630,9 +1470,8 @@ static const TCGTargetOpDef sparc_op_defs[] = { static void tcg_target_init(TCGContext *s) { tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff); -#if SPARC64 - tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff); -#endif + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, ALL_64); + tcg_regset_set32(tcg_target_call_clobber_regs, 0, (1 << TCG_REG_G1) | (1 << TCG_REG_G2) | diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h index 61fd6b8da9..8ce85f8247 100644 --- a/tcg/sparc/tcg-target.h +++ b/tcg/sparc/tcg-target.h @@ -24,13 +24,7 @@ #ifndef TCG_TARGET_SPARC #define TCG_TARGET_SPARC 1 -#if UINTPTR_MAX == UINT32_MAX -# define TCG_TARGET_REG_BITS 32 -#elif UINTPTR_MAX == UINT64_MAX -# define TCG_TARGET_REG_BITS 64 -#else -# error Unknown pointer size for tcg target -#endif +#define TCG_TARGET_REG_BITS 64 #define TCG_TARGET_NB_REGS 32 @@ -76,7 +70,7 @@ typedef enum { /* used for function call generation */ #define TCG_REG_CALL_STACK TCG_REG_O6 -#if TCG_TARGET_REG_BITS == 64 +#ifdef __arch64__ #define TCG_TARGET_STACK_BIAS 2047 #define TCG_TARGET_STACK_ALIGN 16 #define TCG_TARGET_CALL_STACK_OFFSET (128 + 6*8 + TCG_TARGET_STACK_BIAS) @@ -86,7 +80,7 @@ typedef enum { #define TCG_TARGET_CALL_STACK_OFFSET (64 + 4 + 6*4) #endif -#if TCG_TARGET_REG_BITS == 64 +#ifdef __arch64__ #define TCG_TARGET_EXTEND_ARGS 1 #endif @@ -116,7 +110,6 @@ typedef enum { #define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 -#if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_trunc_shr_i32 1 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 0 @@ -145,7 +138,6 @@ typedef enum { #define TCG_TARGET_HAS_muls2_i64 0 #define TCG_TARGET_HAS_muluh_i64 0 #define TCG_TARGET_HAS_mulsh_i64 0 -#endif #define TCG_TARGET_HAS_new_ldst 1 diff --git a/tcg/tcg.c b/tcg/tcg.c index 21ce9fb505..e71f7a0975 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -664,7 +664,36 @@ void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned int flags, int nb_rets; TCGArg *nparam; -#if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 +#if defined(__sparc__) && !defined(__arch64__) \ + && !defined(CONFIG_TCG_INTERPRETER) + /* We have 64-bit values in one register, but need to pass as two + separate parameters. Split them. */ + int orig_sizemask = sizemask; + int orig_nargs = nargs; + TCGv_i64 retl, reth; + + TCGV_UNUSED_I64(retl); + TCGV_UNUSED_I64(reth); + if (sizemask != 0) { + TCGArg *split_args = __builtin_alloca(sizeof(TCGArg) * nargs * 2); + for (i = real_args = 0; i < nargs; ++i) { + int is_64bit = sizemask & (1 << (i+1)*2); + if (is_64bit) { + TCGv_i64 orig = MAKE_TCGV_I64(args[i]); + TCGv_i32 h = tcg_temp_new_i32(); + TCGv_i32 l = tcg_temp_new_i32(); + tcg_gen_extr_i64_i32(l, h, orig); + split_args[real_args++] = GET_TCGV_I32(h); + split_args[real_args++] = GET_TCGV_I32(l); + } else { + split_args[real_args++] = args[i]; + } + } + nargs = real_args; + args = split_args; + sizemask = 0; + } +#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 for (i = 0; i < nargs; ++i) { int is_64bit = sizemask & (1 << (i+1)*2); int is_signed = sizemask & (2 << (i+1)*2); @@ -684,8 +713,23 @@ void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned int flags, *s->gen_opc_ptr++ = INDEX_op_call; nparam = s->gen_opparam_ptr++; if (ret != TCG_CALL_DUMMY_ARG) { -#if TCG_TARGET_REG_BITS < 64 - if (sizemask & 1) { +#if defined(__sparc__) && !defined(__arch64__) \ + && !defined(CONFIG_TCG_INTERPRETER) + if (orig_sizemask & 1) { + /* The 32-bit ABI is going to return the 64-bit value in + the %o0/%o1 register pair. Prepare for this by using + two return temporaries, and reassemble below. */ + retl = tcg_temp_new_i64(); + reth = tcg_temp_new_i64(); + *s->gen_opparam_ptr++ = GET_TCGV_I64(reth); + *s->gen_opparam_ptr++ = GET_TCGV_I64(retl); + nb_rets = 2; + } else { + *s->gen_opparam_ptr++ = ret; + nb_rets = 1; + } +#else + if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) { #ifdef HOST_WORDS_BIGENDIAN *s->gen_opparam_ptr++ = ret + 1; *s->gen_opparam_ptr++ = ret; @@ -694,12 +738,11 @@ void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned int flags, *s->gen_opparam_ptr++ = ret + 1; #endif nb_rets = 2; - } else -#endif - { + } else { *s->gen_opparam_ptr++ = ret; nb_rets = 1; } +#endif } else { nb_rets = 0; } @@ -749,7 +792,29 @@ void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned int flags, /* total parameters, needed to go backward in the instruction stream */ *s->gen_opparam_ptr++ = 1 + nb_rets + real_args + 3; -#if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 +#if defined(__sparc__) && !defined(__arch64__) \ + && !defined(CONFIG_TCG_INTERPRETER) + /* Free all of the parts we allocated above. */ + for (i = real_args = 0; i < orig_nargs; ++i) { + int is_64bit = orig_sizemask & (1 << (i+1)*2); + if (is_64bit) { + TCGv_i32 h = MAKE_TCGV_I32(args[real_args++]); + TCGv_i32 l = MAKE_TCGV_I32(args[real_args++]); + tcg_temp_free_i32(h); + tcg_temp_free_i32(l); + } else { + real_args++; + } + } + if (orig_sizemask & 1) { + /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them. + Note that describing these as TCGv_i64 eliminates an unnecessary + zero-extension that tcg_gen_concat_i32_i64 would create. */ + tcg_gen_concat32_i64(MAKE_TCGV_I64(ret), retl, reth); + tcg_temp_free_i64(retl); + tcg_temp_free_i64(reth); + } +#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 for (i = 0; i < nargs; ++i) { int is_64bit = sizemask & (1 << (i+1)*2); if (!is_64bit) { @@ -2411,6 +2476,7 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def, ts = &s->temps[arg]; reg = tcg_target_call_oarg_regs[i]; assert(s->reg_to_temp[reg] == -1); + if (ts->fixed_reg) { if (ts->reg != reg) { tcg_out_mov(s, ts->type, ts->reg, reg); -- GitLab