tcg-target.inc.c 57.5 KB
Newer Older
B
blueswir1 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
/*
 * Tiny Code Generator for QEMU
 *
 * Copyright (c) 2008 Fabrice Bellard
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

25
#ifdef CONFIG_DEBUG_TCG
B
blueswir1 已提交
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
    "%g0",
    "%g1",
    "%g2",
    "%g3",
    "%g4",
    "%g5",
    "%g6",
    "%g7",
    "%o0",
    "%o1",
    "%o2",
    "%o3",
    "%o4",
    "%o5",
    "%o6",
    "%o7",
    "%l0",
    "%l1",
    "%l2",
    "%l3",
    "%l4",
    "%l5",
    "%l6",
    "%l7",
    "%i0",
    "%i1",
    "%i2",
    "%i3",
    "%i4",
    "%i5",
    "%i6",
    "%i7",
};
60
#endif
B
blueswir1 已提交
61

62 63 64 65 66 67
#ifdef __arch64__
# define SPARC64 1
#else
# define SPARC64 0
#endif

68 69 70 71 72 73 74 75 76 77 78 79
/* Note that sparcv8plus can only hold 64 bit quantities in %g and %o
   registers.  These are saved manually by the kernel in full 64-bit
   slots.  The %i and %l registers are saved by the register window
   mechanism, which only allocates space for 32 bits.  Given that this
   window spill/fill can happen on any signal, we must consider the
   high bits of the %i and %l registers garbage at all times.  */
#if SPARC64
# define ALL_64  0xffffffffu
#else
# define ALL_64  0xffffu
#endif

80 81 82 83
/* Define some temporary registers.  T2 is used for constant generation.  */
#define TCG_REG_T1  TCG_REG_G1
#define TCG_REG_T2  TCG_REG_O7

84
#ifndef CONFIG_SOFTMMU
85
# define TCG_GUEST_BASE_REG TCG_REG_I5
86
#endif
87

88 89 90
#define TCG_REG_TB  TCG_REG_I1
#define USE_REG_TB  (sizeof(void *) > 4)

91
static const int tcg_target_reg_alloc_order[] = {
B
blueswir1 已提交
92 93 94 95 96 97 98 99
    TCG_REG_L0,
    TCG_REG_L1,
    TCG_REG_L2,
    TCG_REG_L3,
    TCG_REG_L4,
    TCG_REG_L5,
    TCG_REG_L6,
    TCG_REG_L7,
100

B
blueswir1 已提交
101 102 103 104 105
    TCG_REG_I0,
    TCG_REG_I1,
    TCG_REG_I2,
    TCG_REG_I3,
    TCG_REG_I4,
106
    TCG_REG_I5,
107 108 109 110 111 112 113 114 115 116 117 118

    TCG_REG_G2,
    TCG_REG_G3,
    TCG_REG_G4,
    TCG_REG_G5,

    TCG_REG_O0,
    TCG_REG_O1,
    TCG_REG_O2,
    TCG_REG_O3,
    TCG_REG_O4,
    TCG_REG_O5,
B
blueswir1 已提交
119 120 121 122 123 124 125 126 127 128 129
};

static const int tcg_target_call_iarg_regs[6] = {
    TCG_REG_O0,
    TCG_REG_O1,
    TCG_REG_O2,
    TCG_REG_O3,
    TCG_REG_O4,
    TCG_REG_O5,
};

130
static const int tcg_target_call_oarg_regs[] = {
B
blueswir1 已提交
131
    TCG_REG_O0,
132 133 134
    TCG_REG_O1,
    TCG_REG_O2,
    TCG_REG_O3,
B
blueswir1 已提交
135 136 137 138 139 140 141 142 143
};

#define INSN_OP(x)  ((x) << 30)
#define INSN_OP2(x) ((x) << 22)
#define INSN_OP3(x) ((x) << 19)
#define INSN_OPF(x) ((x) << 5)
#define INSN_RD(x)  ((x) << 25)
#define INSN_RS1(x) ((x) << 14)
#define INSN_RS2(x) (x)
B
blueswir1 已提交
144
#define INSN_ASI(x) ((x) << 5)
B
blueswir1 已提交
145

146
#define INSN_IMM10(x) ((1 << 13) | ((x) & 0x3ff))
147
#define INSN_IMM11(x) ((1 << 13) | ((x) & 0x7ff))
B
blueswir1 已提交
148
#define INSN_IMM13(x) ((1 << 13) | ((x) & 0x1fff))
149
#define INSN_OFF16(x) ((((x) >> 2) & 0x3fff) | ((((x) >> 16) & 3) << 20))
150
#define INSN_OFF19(x) (((x) >> 2) & 0x07ffff)
151
#define INSN_COND(x) ((x) << 25)
B
blueswir1 已提交
152

153 154 155 156 157 158 159 160
#define COND_N     0x0
#define COND_E     0x1
#define COND_LE    0x2
#define COND_L     0x3
#define COND_LEU   0x4
#define COND_CS    0x5
#define COND_NEG   0x6
#define COND_VS    0x7
161
#define COND_A     0x8
162 163 164 165 166 167 168
#define COND_NE    0x9
#define COND_G     0xa
#define COND_GE    0xb
#define COND_GU    0xc
#define COND_CC    0xd
#define COND_POS   0xe
#define COND_VC    0xf
169
#define BA         (INSN_OP(0) | INSN_COND(COND_A) | INSN_OP2(0x2))
B
blueswir1 已提交
170

171 172 173 174 175 176 177
#define RCOND_Z    1
#define RCOND_LEZ  2
#define RCOND_LZ   3
#define RCOND_NZ   5
#define RCOND_GZ   6
#define RCOND_GEZ  7

178 179 180
#define MOVCC_ICC  (1 << 18)
#define MOVCC_XCC  (1 << 18 | 1 << 12)

181 182 183 184 185 186
#define BPCC_ICC   0
#define BPCC_XCC   (2 << 20)
#define BPCC_PT    (1 << 19)
#define BPCC_PN    0
#define BPCC_A     (1 << 29)

187 188
#define BPR_PT     BPCC_PT

B
blueswir1 已提交
189
#define ARITH_ADD  (INSN_OP(2) | INSN_OP3(0x00))
190
#define ARITH_ADDCC (INSN_OP(2) | INSN_OP3(0x10))
B
blueswir1 已提交
191
#define ARITH_AND  (INSN_OP(2) | INSN_OP3(0x01))
R
Richard Henderson 已提交
192
#define ARITH_ANDN (INSN_OP(2) | INSN_OP3(0x05))
B
blueswir1 已提交
193
#define ARITH_OR   (INSN_OP(2) | INSN_OP3(0x02))
194
#define ARITH_ORCC (INSN_OP(2) | INSN_OP3(0x12))
R
Richard Henderson 已提交
195
#define ARITH_ORN  (INSN_OP(2) | INSN_OP3(0x06))
B
blueswir1 已提交
196
#define ARITH_XOR  (INSN_OP(2) | INSN_OP3(0x03))
197 198
#define ARITH_SUB  (INSN_OP(2) | INSN_OP3(0x04))
#define ARITH_SUBCC (INSN_OP(2) | INSN_OP3(0x14))
199 200
#define ARITH_ADDC (INSN_OP(2) | INSN_OP3(0x08))
#define ARITH_SUBC (INSN_OP(2) | INSN_OP3(0x0c))
B
blueswir1 已提交
201
#define ARITH_UMUL (INSN_OP(2) | INSN_OP3(0x0a))
202
#define ARITH_SMUL (INSN_OP(2) | INSN_OP3(0x0b))
B
blueswir1 已提交
203 204 205 206 207
#define ARITH_UDIV (INSN_OP(2) | INSN_OP3(0x0e))
#define ARITH_SDIV (INSN_OP(2) | INSN_OP3(0x0f))
#define ARITH_MULX (INSN_OP(2) | INSN_OP3(0x09))
#define ARITH_UDIVX (INSN_OP(2) | INSN_OP3(0x0d))
#define ARITH_SDIVX (INSN_OP(2) | INSN_OP3(0x2d))
208
#define ARITH_MOVCC (INSN_OP(2) | INSN_OP3(0x2c))
209
#define ARITH_MOVR (INSN_OP(2) | INSN_OP3(0x2f))
B
blueswir1 已提交
210

211
#define ARITH_ADDXC (INSN_OP(2) | INSN_OP3(0x36) | INSN_OPF(0x11))
212
#define ARITH_UMULXHI (INSN_OP(2) | INSN_OP3(0x36) | INSN_OPF(0x16))
213

B
blueswir1 已提交
214 215 216 217 218 219 220 221
#define SHIFT_SLL  (INSN_OP(2) | INSN_OP3(0x25))
#define SHIFT_SRL  (INSN_OP(2) | INSN_OP3(0x26))
#define SHIFT_SRA  (INSN_OP(2) | INSN_OP3(0x27))

#define SHIFT_SLLX (INSN_OP(2) | INSN_OP3(0x25) | (1 << 12))
#define SHIFT_SRLX (INSN_OP(2) | INSN_OP3(0x26) | (1 << 12))
#define SHIFT_SRAX (INSN_OP(2) | INSN_OP3(0x27) | (1 << 12))

222
#define RDY        (INSN_OP(2) | INSN_OP3(0x28) | INSN_RS1(0))
223
#define WRY        (INSN_OP(2) | INSN_OP3(0x30) | INSN_RD(0))
B
blueswir1 已提交
224
#define JMPL       (INSN_OP(2) | INSN_OP3(0x38))
225
#define RETURN     (INSN_OP(2) | INSN_OP3(0x39))
B
blueswir1 已提交
226 227 228 229 230 231 232 233 234 235 236 237 238 239 240
#define SAVE       (INSN_OP(2) | INSN_OP3(0x3c))
#define RESTORE    (INSN_OP(2) | INSN_OP3(0x3d))
#define SETHI      (INSN_OP(0) | INSN_OP2(0x4))
#define CALL       INSN_OP(1)
#define LDUB       (INSN_OP(3) | INSN_OP3(0x01))
#define LDSB       (INSN_OP(3) | INSN_OP3(0x09))
#define LDUH       (INSN_OP(3) | INSN_OP3(0x02))
#define LDSH       (INSN_OP(3) | INSN_OP3(0x0a))
#define LDUW       (INSN_OP(3) | INSN_OP3(0x00))
#define LDSW       (INSN_OP(3) | INSN_OP3(0x08))
#define LDX        (INSN_OP(3) | INSN_OP3(0x0b))
#define STB        (INSN_OP(3) | INSN_OP3(0x05))
#define STH        (INSN_OP(3) | INSN_OP3(0x06))
#define STW        (INSN_OP(3) | INSN_OP3(0x04))
#define STX        (INSN_OP(3) | INSN_OP3(0x0e))
B
blueswir1 已提交
241 242 243 244 245 246 247 248 249 250 251 252
#define LDUBA      (INSN_OP(3) | INSN_OP3(0x11))
#define LDSBA      (INSN_OP(3) | INSN_OP3(0x19))
#define LDUHA      (INSN_OP(3) | INSN_OP3(0x12))
#define LDSHA      (INSN_OP(3) | INSN_OP3(0x1a))
#define LDUWA      (INSN_OP(3) | INSN_OP3(0x10))
#define LDSWA      (INSN_OP(3) | INSN_OP3(0x18))
#define LDXA       (INSN_OP(3) | INSN_OP3(0x1b))
#define STBA       (INSN_OP(3) | INSN_OP3(0x15))
#define STHA       (INSN_OP(3) | INSN_OP3(0x16))
#define STWA       (INSN_OP(3) | INSN_OP3(0x14))
#define STXA       (INSN_OP(3) | INSN_OP3(0x1e))

P
Pranith Kumar 已提交
253 254
#define MEMBAR     (INSN_OP(2) | INSN_OP3(0x28) | INSN_RS1(15) | (1 << 13))

255 256
#define NOP        (SETHI | INSN_RD(TCG_REG_G0) | 0)

B
blueswir1 已提交
257 258 259
#ifndef ASI_PRIMARY_LITTLE
#define ASI_PRIMARY_LITTLE 0x88
#endif
B
blueswir1 已提交
260

261 262 263 264 265 266 267 268 269 270
#define LDUH_LE    (LDUHA | INSN_ASI(ASI_PRIMARY_LITTLE))
#define LDSH_LE    (LDSHA | INSN_ASI(ASI_PRIMARY_LITTLE))
#define LDUW_LE    (LDUWA | INSN_ASI(ASI_PRIMARY_LITTLE))
#define LDSW_LE    (LDSWA | INSN_ASI(ASI_PRIMARY_LITTLE))
#define LDX_LE     (LDXA  | INSN_ASI(ASI_PRIMARY_LITTLE))

#define STH_LE     (STHA  | INSN_ASI(ASI_PRIMARY_LITTLE))
#define STW_LE     (STWA  | INSN_ASI(ASI_PRIMARY_LITTLE))
#define STX_LE     (STXA  | INSN_ASI(ASI_PRIMARY_LITTLE))

271 272 273 274
#ifndef use_vis3_instructions
bool use_vis3_instructions;
#endif

275
static inline int check_fit_i64(int64_t val, unsigned int bits)
276
{
277
    return val == sextract64(val, 0, bits);
278 279
}

280
static inline int check_fit_i32(int32_t val, unsigned int bits)
281
{
282
    return val == sextract32(val, 0, bits);
283 284
}

285 286 287 288 289 290 291
#define check_fit_tl    check_fit_i64
#if SPARC64
# define check_fit_ptr  check_fit_i64
#else
# define check_fit_ptr  check_fit_i32
#endif

292
static void patch_reloc(tcg_insn_unit *code_ptr, int type,
293
                        intptr_t value, intptr_t addend)
294 295
{
    uint32_t insn;
296

297
    tcg_debug_assert(addend == 0);
298 299
    value = tcg_ptr_byte_diff((tcg_insn_unit *)value, code_ptr);

300
    switch (type) {
301
    case R_SPARC_WDISP16:
302
        if (!check_fit_ptr(value >> 2, 16)) {
303 304
            tcg_abort();
        }
305
        insn = *code_ptr;
306 307
        insn &= ~INSN_OFF16(-1);
        insn |= INSN_OFF16(value);
308
        *code_ptr = insn;
309
        break;
310
    case R_SPARC_WDISP19:
311
        if (!check_fit_ptr(value >> 2, 19)) {
312 313
            tcg_abort();
        }
314
        insn = *code_ptr;
315 316
        insn &= ~INSN_OFF19(-1);
        insn |= INSN_OFF19(value);
317
        *code_ptr = insn;
318 319 320 321 322 323 324
        break;
    default:
        tcg_abort();
    }
}

/* parse target specific constraints */
325 326
static const char *target_parse_constraint(TCGArgConstraint *ct,
                                           const char *ct_str, TCGType type)
327
{
328
    switch (*ct_str++) {
329 330 331 332
    case 'r':
        ct->ct |= TCG_CT_REG;
        tcg_regset_set32(ct->u.regs, 0, 0xffffffff);
        break;
333
    case 'R':
334
        ct->ct |= TCG_CT_REG;
335 336 337 338 339 340 341
        tcg_regset_set32(ct->u.regs, 0, ALL_64);
        break;
    case 'A': /* qemu_ld/st address constraint */
        ct->ct |= TCG_CT_REG;
        tcg_regset_set32(ct->u.regs, 0,
                         TARGET_LONG_BITS == 64 ? ALL_64 : 0xffffffff);
    reserve_helpers:
342 343 344 345
        tcg_regset_reset_reg(ct->u.regs, TCG_REG_O0);
        tcg_regset_reset_reg(ct->u.regs, TCG_REG_O1);
        tcg_regset_reset_reg(ct->u.regs, TCG_REG_O2);
        break;
346 347 348 349 350 351 352 353
    case 's': /* qemu_st data 32-bit constraint */
        ct->ct |= TCG_CT_REG;
        tcg_regset_set32(ct->u.regs, 0, 0xffffffff);
        goto reserve_helpers;
    case 'S': /* qemu_st data 64-bit constraint */
        ct->ct |= TCG_CT_REG;
        tcg_regset_set32(ct->u.regs, 0, ALL_64);
        goto reserve_helpers;
354 355 356 357 358 359 360 361 362 363
    case 'I':
        ct->ct |= TCG_CT_CONST_S11;
        break;
    case 'J':
        ct->ct |= TCG_CT_CONST_S13;
        break;
    case 'Z':
        ct->ct |= TCG_CT_CONST_ZERO;
        break;
    default:
364
        return NULL;
365
    }
366
    return ct_str;
367 368 369
}

/* test if a constant matches the constraint */
370
static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
371 372 373 374 375 376
                                         const TCGArgConstraint *arg_ct)
{
    int ct = arg_ct->ct;

    if (ct & TCG_CT_CONST) {
        return 1;
377 378 379 380 381 382 383
    }

    if (type == TCG_TYPE_I32) {
        val = (int32_t)val;
    }

    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
384 385 386 387 388 389 390 391 392 393
        return 1;
    } else if ((ct & TCG_CT_CONST_S11) && check_fit_tl(val, 11)) {
        return 1;
    } else if ((ct & TCG_CT_CONST_S13) && check_fit_tl(val, 13)) {
        return 1;
    } else {
        return 0;
    }
}

394 395
static inline void tcg_out_arith(TCGContext *s, TCGReg rd, TCGReg rs1,
                                 TCGReg rs2, int op)
396
{
397
    tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) | INSN_RS2(rs2));
398 399
}

400 401
static inline void tcg_out_arithi(TCGContext *s, TCGReg rd, TCGReg rs1,
                                  int32_t offset, int op)
402
{
403
    tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) | INSN_IMM13(offset));
404 405
}

406 407
static void tcg_out_arithc(TCGContext *s, TCGReg rd, TCGReg rs1,
			   int32_t val2, int val2const, int op)
408 409 410 411 412
{
    tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1)
              | (val2const ? INSN_IMM13(val2) : INSN_RS2(val2)));
}

413 414
static inline void tcg_out_mov(TCGContext *s, TCGType type,
                               TCGReg ret, TCGReg arg)
B
blueswir1 已提交
415
{
R
Richard Henderson 已提交
416 417 418
    if (ret != arg) {
        tcg_out_arith(s, ret, arg, TCG_REG_G0, ARITH_OR);
    }
419 420
}

421
static inline void tcg_out_sethi(TCGContext *s, TCGReg ret, uint32_t arg)
422 423
{
    tcg_out32(s, SETHI | INSN_RD(ret) | ((arg & 0xfffffc00) >> 10));
B
blueswir1 已提交
424 425
}

426
static inline void tcg_out_movi_imm13(TCGContext *s, TCGReg ret, int32_t arg)
B
blueswir1 已提交
427 428 429 430
{
    tcg_out_arithi(s, ret, TCG_REG_G0, arg, ARITH_OR);
}

431 432
static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
                             tcg_target_long arg, bool in_prologue)
B
blueswir1 已提交
433
{
434
    tcg_target_long hi, lo = (int32_t)arg;
435
    tcg_target_long test, lsb;
436

437 438 439 440 441
    /* Make sure we test 32-bit constants for imm13 properly.  */
    if (type == TCG_TYPE_I32) {
        arg = lo;
    }

442 443
    /* A 13-bit constant sign-extended to 64-bits.  */
    if (check_fit_tl(arg, 13)) {
B
blueswir1 已提交
444
        tcg_out_movi_imm13(s, ret, arg);
445
        return;
B
blueswir1 已提交
446 447
    }

448
    /* A 32-bit constant, or 32-bit zero-extended to 64-bits.  */
449
    if (type == TCG_TYPE_I32 || arg == (uint32_t)arg) {
450 451 452 453 454 455 456 457
        tcg_out_sethi(s, ret, arg);
        if (arg & 0x3ff) {
            tcg_out_arithi(s, ret, ret, arg & 0x3ff, ARITH_OR);
        }
        return;
    }

    /* A 32-bit constant sign-extended to 64-bits.  */
458
    if (arg == lo) {
459 460
        tcg_out_sethi(s, ret, ~arg);
        tcg_out_arithi(s, ret, ret, (arg & 0x3ff) | -0x400, ARITH_XOR);
461 462 463
        return;
    }

464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484
    /* A 21-bit constant, shifted.  */
    lsb = ctz64(arg);
    test = (tcg_target_long)arg >> lsb;
    if (check_fit_tl(test, 13)) {
        tcg_out_movi_imm13(s, ret, test);
        tcg_out_arithi(s, ret, ret, lsb, SHIFT_SLLX);
        return;
    } else if (lsb > 10 && test == extract64(test, 0, 21)) {
        tcg_out_sethi(s, ret, test << 10);
        tcg_out_arithi(s, ret, ret, lsb - 10, SHIFT_SLLX);
        return;
    }

    if (USE_REG_TB && !in_prologue) {
        intptr_t diff = arg - (uintptr_t)s->code_gen_ptr;
        if (check_fit_ptr(diff, 13)) {
            tcg_out_arithi(s, ret, TCG_REG_TB, diff, ARITH_ADD);
            return;
        }
    }

485
    /* A 64-bit constant decomposed into 2 32-bit pieces.  */
486
    if (check_fit_i32(lo, 13)) {
487
        hi = (arg - lo) >> 32;
488 489 490
        tcg_out_movi(s, TCG_TYPE_I32, ret, hi);
        tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX);
        tcg_out_arithi(s, ret, ret, lo, ARITH_ADD);
491
    } else {
492
        hi = arg >> 32;
493 494
        tcg_out_movi(s, TCG_TYPE_I32, ret, hi);
        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T2, lo);
495 496
        tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX);
        tcg_out_arith(s, ret, ret, TCG_REG_T2, ARITH_OR);
B
blueswir1 已提交
497
    }
B
blueswir1 已提交
498 499
}

500 501 502 503 504 505
static inline void tcg_out_movi(TCGContext *s, TCGType type,
                                TCGReg ret, tcg_target_long arg)
{
    tcg_out_movi_int(s, type, ret, arg, false);
}

506 507
static inline void tcg_out_ldst_rr(TCGContext *s, TCGReg data, TCGReg a1,
                                   TCGReg a2, int op)
B
blueswir1 已提交
508
{
509
    tcg_out32(s, op | INSN_RD(data) | INSN_RS1(a1) | INSN_RS2(a2));
B
blueswir1 已提交
510 511
}

512 513
static void tcg_out_ldst(TCGContext *s, TCGReg ret, TCGReg addr,
                         intptr_t offset, int op)
B
blueswir1 已提交
514
{
515
    if (check_fit_ptr(offset, 13)) {
B
blueswir1 已提交
516 517
        tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(addr) |
                  INSN_IMM13(offset));
518
    } else {
519 520
        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, offset);
        tcg_out_ldst_rr(s, ret, addr, TCG_REG_T1, op);
521
    }
B
blueswir1 已提交
522 523
}

524
static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
525
                              TCGReg arg1, intptr_t arg2)
B
blueswir1 已提交
526
{
527
    tcg_out_ldst(s, ret, arg1, arg2, (type == TCG_TYPE_I32 ? LDUW : LDX));
B
blueswir1 已提交
528 529
}

530
static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
531
                              TCGReg arg1, intptr_t arg2)
B
blueswir1 已提交
532
{
533 534 535
    tcg_out_ldst(s, arg, arg1, arg2, (type == TCG_TYPE_I32 ? STW : STX));
}

536 537 538 539 540 541 542 543 544 545
static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
                               TCGReg base, intptr_t ofs)
{
    if (val == 0) {
        tcg_out_st(s, type, TCG_REG_G0, base, ofs);
        return true;
    }
    return false;
}

546
static void tcg_out_ld_ptr(TCGContext *s, TCGReg ret, uintptr_t arg)
547
{
548 549 550 551 552
    intptr_t diff = arg - (uintptr_t)s->code_gen_ptr;
    if (USE_REG_TB && check_fit_ptr(diff, 13)) {
        tcg_out_ld(s, TCG_TYPE_PTR, ret, TCG_REG_TB, diff);
        return;
    }
553 554
    tcg_out_movi(s, TCG_TYPE_PTR, ret, arg & ~0x3ff);
    tcg_out_ld(s, TCG_TYPE_PTR, ret, ret, arg & 0x3ff);
B
blueswir1 已提交
555 556
}

557
static inline void tcg_out_sety(TCGContext *s, TCGReg rs)
B
blueswir1 已提交
558
{
559
    tcg_out32(s, WRY | INSN_RS1(TCG_REG_G0) | INSN_RS2(rs));
B
blueswir1 已提交
560 561
}

562
static inline void tcg_out_rdy(TCGContext *s, TCGReg rd)
563 564 565 566
{
    tcg_out32(s, RDY | INSN_RD(rd));
}

567 568
static void tcg_out_div32(TCGContext *s, TCGReg rd, TCGReg rs1,
                          int32_t val2, int val2const, int uns)
569 570 571 572 573
{
    /* Load Y with the sign/zero extension of RS1 to 64-bits.  */
    if (uns) {
        tcg_out_sety(s, TCG_REG_G0);
    } else {
574 575
        tcg_out_arithi(s, TCG_REG_T1, rs1, 31, SHIFT_SRA);
        tcg_out_sety(s, TCG_REG_T1);
576 577 578 579 580 581
    }

    tcg_out_arithc(s, rd, rs1, val2, val2const,
                   uns ? ARITH_UDIV : ARITH_SDIV);
}

B
blueswir1 已提交
582 583
static inline void tcg_out_nop(TCGContext *s)
{
584
    tcg_out32(s, NOP);
B
blueswir1 已提交
585 586
}

587
static const uint8_t tcg_cond_to_bcond[] = {
588 589 590 591 592 593 594 595 596 597 598 599
    [TCG_COND_EQ] = COND_E,
    [TCG_COND_NE] = COND_NE,
    [TCG_COND_LT] = COND_L,
    [TCG_COND_GE] = COND_GE,
    [TCG_COND_LE] = COND_LE,
    [TCG_COND_GT] = COND_G,
    [TCG_COND_LTU] = COND_CS,
    [TCG_COND_GEU] = COND_CC,
    [TCG_COND_LEU] = COND_LEU,
    [TCG_COND_GTU] = COND_GU,
};

600 601 602 603 604 605 606 607 608
static const uint8_t tcg_cond_to_rcond[] = {
    [TCG_COND_EQ] = RCOND_Z,
    [TCG_COND_NE] = RCOND_NZ,
    [TCG_COND_LT] = RCOND_LZ,
    [TCG_COND_GT] = RCOND_GZ,
    [TCG_COND_LE] = RCOND_LEZ,
    [TCG_COND_GE] = RCOND_GEZ
};

609 610 611 612 613
static void tcg_out_bpcc0(TCGContext *s, int scond, int flags, int off19)
{
    tcg_out32(s, INSN_OP(0) | INSN_OP2(1) | INSN_COND(scond) | flags | off19);
}

614
static void tcg_out_bpcc(TCGContext *s, int scond, int flags, TCGLabel *l)
615 616 617 618
{
    int off19;

    if (l->has_value) {
619
        off19 = INSN_OFF19(tcg_pcrel_diff(s, l->u.value_ptr));
620 621
    } else {
        /* Make sure to preserve destinations during retranslation.  */
622
        off19 = *s->code_ptr & INSN_OFF19(-1);
623
        tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP19, l, 0);
624 625 626 627
    }
    tcg_out_bpcc0(s, scond, flags, off19);
}

628
static void tcg_out_cmp(TCGContext *s, TCGReg c1, int32_t c2, int c2const)
R
Richard Henderson 已提交
629
{
630
    tcg_out_arithc(s, TCG_REG_G0, c1, c2, c2const, ARITH_SUBCC);
R
Richard Henderson 已提交
631 632
}

633
static void tcg_out_brcond_i32(TCGContext *s, TCGCond cond, TCGReg arg1,
634
                               int32_t arg2, int const_arg2, TCGLabel *l)
635
{
R
Richard Henderson 已提交
636
    tcg_out_cmp(s, arg1, arg2, const_arg2);
637
    tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_ICC | BPCC_PT, l);
638 639 640
    tcg_out_nop(s);
}

641 642
static void tcg_out_movcc(TCGContext *s, TCGCond cond, int cc, TCGReg ret,
                          int32_t v1, int v1const)
R
Richard Henderson 已提交
643 644 645 646 647 648
{
    tcg_out32(s, ARITH_MOVCC | cc | INSN_RD(ret)
              | INSN_RS1(tcg_cond_to_bcond[cond])
              | (v1const ? INSN_IMM11(v1) : INSN_RS2(v1)));
}

649 650 651
static void tcg_out_movcond_i32(TCGContext *s, TCGCond cond, TCGReg ret,
                                TCGReg c1, int32_t c2, int c2const,
                                int32_t v1, int v1const)
R
Richard Henderson 已提交
652 653 654 655 656
{
    tcg_out_cmp(s, c1, c2, c2const);
    tcg_out_movcc(s, cond, MOVCC_ICC, ret, v1, v1const);
}

657
static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond, TCGReg arg1,
658
                               int32_t arg2, int const_arg2, TCGLabel *l)
659
{
660 661 662 663 664
    /* For 64-bit signed comparisons vs zero, we can avoid the compare.  */
    if (arg2 == 0 && !is_unsigned_cond(cond)) {
        int off16;

        if (l->has_value) {
665
            off16 = INSN_OFF16(tcg_pcrel_diff(s, l->u.value_ptr));
666 667
        } else {
            /* Make sure to preserve destinations during retranslation.  */
668
            off16 = *s->code_ptr & INSN_OFF16(-1);
669
            tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP16, l, 0);
670 671 672 673 674
        }
        tcg_out32(s, INSN_OP(0) | INSN_OP2(3) | BPR_PT | INSN_RS1(arg1)
                  | INSN_COND(tcg_cond_to_rcond[cond]) | off16);
    } else {
        tcg_out_cmp(s, arg1, arg2, const_arg2);
675
        tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_XCC | BPCC_PT, l);
676
    }
677 678
    tcg_out_nop(s);
}
R
Richard Henderson 已提交
679

680 681
static void tcg_out_movr(TCGContext *s, TCGCond cond, TCGReg ret, TCGReg c1,
                         int32_t v1, int v1const)
682 683 684 685 686 687
{
    tcg_out32(s, ARITH_MOVR | INSN_RD(ret) | INSN_RS1(c1)
              | (tcg_cond_to_rcond[cond] << 10)
              | (v1const ? INSN_IMM10(v1) : INSN_RS2(v1)));
}

688 689 690
static void tcg_out_movcond_i64(TCGContext *s, TCGCond cond, TCGReg ret,
                                TCGReg c1, int32_t c2, int c2const,
                                int32_t v1, int v1const)
R
Richard Henderson 已提交
691
{
692 693 694 695
    /* For 64-bit signed comparisons vs zero, we can avoid the compare.
       Note that the immediate range is one bit smaller, so we must check
       for that as well.  */
    if (c2 == 0 && !is_unsigned_cond(cond)
696
        && (!v1const || check_fit_i32(v1, 10))) {
697 698 699 700 701
        tcg_out_movr(s, cond, ret, c1, v1, v1const);
    } else {
        tcg_out_cmp(s, c1, c2, c2const);
        tcg_out_movcc(s, cond, MOVCC_XCC, ret, v1, v1const);
    }
R
Richard Henderson 已提交
702
}
703

704 705
static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGReg ret,
                                TCGReg c1, int32_t c2, int c2const)
706
{
707
    /* For 32-bit comparisons, we can play games with ADDC/SUBC.  */
708
    switch (cond) {
R
Richard Henderson 已提交
709 710 711 712 713
    case TCG_COND_LTU:
    case TCG_COND_GEU:
        /* The result of the comparison is in the carry bit.  */
        break;

714 715
    case TCG_COND_EQ:
    case TCG_COND_NE:
R
Richard Henderson 已提交
716
        /* For equality, we can transform to inequality vs zero.  */
717
        if (c2 != 0) {
718 719 720 721
            tcg_out_arithc(s, TCG_REG_T1, c1, c2, c2const, ARITH_XOR);
            c2 = TCG_REG_T1;
        } else {
            c2 = c1;
722
        }
723
        c1 = TCG_REG_G0, c2const = 0;
R
Richard Henderson 已提交
724
        cond = (cond == TCG_COND_EQ ? TCG_COND_GEU : TCG_COND_LTU);
725 726 727 728
	break;

    case TCG_COND_GTU:
    case TCG_COND_LEU:
R
Richard Henderson 已提交
729 730 731 732
        /* If we don't need to load a constant into a register, we can
           swap the operands on GTU/LEU.  There's no benefit to loading
           the constant into a temporary register.  */
        if (!c2const || c2 == 0) {
733
            TCGReg t = c1;
R
Richard Henderson 已提交
734 735 736 737 738 739 740
            c1 = c2;
            c2 = t;
            c2const = 0;
            cond = tcg_swap_cond(cond);
            break;
        }
        /* FALLTHRU */
741 742 743 744

    default:
        tcg_out_cmp(s, c1, c2, c2const);
        tcg_out_movi_imm13(s, ret, 0);
R
Richard Henderson 已提交
745
        tcg_out_movcc(s, cond, MOVCC_ICC, ret, 1, 1);
746 747 748 749 750
        return;
    }

    tcg_out_cmp(s, c1, c2, c2const);
    if (cond == TCG_COND_LTU) {
751
        tcg_out_arithi(s, ret, TCG_REG_G0, 0, ARITH_ADDC);
752
    } else {
753
        tcg_out_arithi(s, ret, TCG_REG_G0, -1, ARITH_SUBC);
754 755 756
    }
}

757 758
static void tcg_out_setcond_i64(TCGContext *s, TCGCond cond, TCGReg ret,
                                TCGReg c1, int32_t c2, int c2const)
759
{
760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776
    if (use_vis3_instructions) {
        switch (cond) {
        case TCG_COND_NE:
            if (c2 != 0) {
                break;
            }
            c2 = c1, c2const = 0, c1 = TCG_REG_G0;
            /* FALLTHRU */
        case TCG_COND_LTU:
            tcg_out_cmp(s, c1, c2, c2const);
            tcg_out_arith(s, ret, TCG_REG_G0, TCG_REG_G0, ARITH_ADDXC);
            return;
        default:
            break;
        }
    }

777 778 779 780 781 782 783 784 785 786
    /* For 64-bit signed comparisons vs zero, we can avoid the compare
       if the input does not overlap the output.  */
    if (c2 == 0 && !is_unsigned_cond(cond) && c1 != ret) {
        tcg_out_movi_imm13(s, ret, 0);
        tcg_out_movr(s, cond, ret, c1, 1, 1);
    } else {
        tcg_out_cmp(s, c1, c2, c2const);
        tcg_out_movi_imm13(s, ret, 0);
        tcg_out_movcc(s, cond, MOVCC_XCC, ret, 1, 1);
    }
787
}
R
Richard Henderson 已提交
788

789 790 791
static void tcg_out_addsub2_i32(TCGContext *s, TCGReg rl, TCGReg rh,
                                TCGReg al, TCGReg ah, int32_t bl, int blconst,
                                int32_t bh, int bhconst, int opl, int oph)
R
Richard Henderson 已提交
792
{
793
    TCGReg tmp = TCG_REG_T1;
R
Richard Henderson 已提交
794 795 796 797 798 799 800 801 802 803

    /* Note that the low parts are fully consumed before tmp is set.  */
    if (rl != ah && (bhconst || rl != bh)) {
        tmp = rl;
    }

    tcg_out_arithc(s, tmp, al, bl, blconst, opl);
    tcg_out_arithc(s, rh, ah, bh, bhconst, oph);
    tcg_out_mov(s, TCG_TYPE_I32, rl, tmp);
}
804

805 806 807 808 809 810 811 812 813 814 815 816 817
static void tcg_out_addsub2_i64(TCGContext *s, TCGReg rl, TCGReg rh,
                                TCGReg al, TCGReg ah, int32_t bl, int blconst,
                                int32_t bh, int bhconst, bool is_sub)
{
    TCGReg tmp = TCG_REG_T1;

    /* Note that the low parts are fully consumed before tmp is set.  */
    if (rl != ah && (bhconst || rl != bh)) {
        tmp = rl;
    }

    tcg_out_arithc(s, tmp, al, bl, blconst, is_sub ? ARITH_SUBCC : ARITH_ADDCC);

818 819 820 821 822 823 824 825
    if (use_vis3_instructions && !is_sub) {
        /* Note that ADDXC doesn't accept immediates.  */
        if (bhconst && bh != 0) {
           tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_T2, bh);
           bh = TCG_REG_T2;
        }
        tcg_out_arith(s, rh, ah, bh, ARITH_ADDXC);
    } else if (bh == TCG_REG_G0) {
826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852
	/* If we have a zero, we can perform the operation in two insns,
           with the arithmetic first, and a conditional move into place.  */
	if (rh == ah) {
            tcg_out_arithi(s, TCG_REG_T2, ah, 1,
			   is_sub ? ARITH_SUB : ARITH_ADD);
            tcg_out_movcc(s, TCG_COND_LTU, MOVCC_XCC, rh, TCG_REG_T2, 0);
	} else {
            tcg_out_arithi(s, rh, ah, 1, is_sub ? ARITH_SUB : ARITH_ADD);
	    tcg_out_movcc(s, TCG_COND_GEU, MOVCC_XCC, rh, ah, 0);
	}
    } else {
        /* Otherwise adjust BH as if there is carry into T2 ... */
        if (bhconst) {
            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_T2, bh + (is_sub ? -1 : 1));
        } else {
            tcg_out_arithi(s, TCG_REG_T2, bh, 1,
                           is_sub ? ARITH_SUB : ARITH_ADD);
        }
        /* ... smoosh T2 back to original BH if carry is clear ... */
        tcg_out_movcc(s, TCG_COND_GEU, MOVCC_XCC, TCG_REG_T2, bh, bhconst);
	/* ... and finally perform the arithmetic with the new operand.  */
        tcg_out_arith(s, rh, ah, TCG_REG_T2, is_sub ? ARITH_SUB : ARITH_ADD);
    }

    tcg_out_mov(s, TCG_TYPE_I64, rl, tmp);
}

853 854
static void tcg_out_call_nodelay(TCGContext *s, tcg_insn_unit *dest,
                                 bool in_prologue)
855
{
856
    ptrdiff_t disp = tcg_pcrel_diff(s, dest);
857 858 859 860

    if (disp == (int32_t)disp) {
        tcg_out32(s, CALL | (uint32_t)disp >> 2);
    } else {
861
        uintptr_t desti = (uintptr_t)dest;
862 863
        tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_REG_T1,
                         desti & ~0xfff, in_prologue);
864
        tcg_out_arithi(s, TCG_REG_O7, TCG_REG_T1, desti & 0xfff, JMPL);
865 866 867
    }
}

868 869
static void tcg_out_call(TCGContext *s, tcg_insn_unit *dest)
{
870
    tcg_out_call_nodelay(s, dest, false);
871 872 873
    tcg_out_nop(s);
}

P
Pranith Kumar 已提交
874 875 876 877 878 879
static void tcg_out_mb(TCGContext *s, TCGArg a0)
{
    /* Note that the TCG memory order constants mirror the Sparc MEMBAR.  */
    tcg_out32(s, MEMBAR | (a0 & TCG_MO_ALL));
}

880
#ifdef CONFIG_SOFTMMU
881 882
static tcg_insn_unit *qemu_ld_trampoline[16];
static tcg_insn_unit *qemu_st_trampoline[16];
883

884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906
static void emit_extend(TCGContext *s, TCGReg r, int op)
{
    /* Emit zero extend of 8, 16 or 32 bit data as
     * required by the MO_* value op; do nothing for 64 bit.
     */
    switch (op & MO_SIZE) {
    case MO_8:
        tcg_out_arithi(s, r, r, 0xff, ARITH_AND);
        break;
    case MO_16:
        tcg_out_arithi(s, r, r, 16, SHIFT_SLL);
        tcg_out_arithi(s, r, r, 16, SHIFT_SRL);
        break;
    case MO_32:
        if (SPARC64) {
            tcg_out_arith(s, r, r, 0, SHIFT_SRL);
        }
        break;
    case MO_64:
        break;
    }
}

907 908
static void build_trampolines(TCGContext *s)
{
909 910 911 912 913 914 915 916 917 918 919
    static void * const qemu_ld_helpers[16] = {
        [MO_UB]   = helper_ret_ldub_mmu,
        [MO_SB]   = helper_ret_ldsb_mmu,
        [MO_LEUW] = helper_le_lduw_mmu,
        [MO_LESW] = helper_le_ldsw_mmu,
        [MO_LEUL] = helper_le_ldul_mmu,
        [MO_LEQ]  = helper_le_ldq_mmu,
        [MO_BEUW] = helper_be_lduw_mmu,
        [MO_BESW] = helper_be_ldsw_mmu,
        [MO_BEUL] = helper_be_ldul_mmu,
        [MO_BEQ]  = helper_be_ldq_mmu,
920
    };
921 922 923 924 925 926 927 928
    static void * const qemu_st_helpers[16] = {
        [MO_UB]   = helper_ret_stb_mmu,
        [MO_LEUW] = helper_le_stw_mmu,
        [MO_LEUL] = helper_le_stl_mmu,
        [MO_LEQ]  = helper_le_stq_mmu,
        [MO_BEUW] = helper_be_stw_mmu,
        [MO_BEUL] = helper_be_stl_mmu,
        [MO_BEQ]  = helper_be_stq_mmu,
929 930 931 932 933 934
    };

    int i;
    TCGReg ra;

    for (i = 0; i < 16; ++i) {
935
        if (qemu_ld_helpers[i] == NULL) {
936 937 938 939
            continue;
        }

        /* May as well align the trampoline.  */
940
        while ((uintptr_t)s->code_ptr & 15) {
941 942
            tcg_out_nop(s);
        }
943
        qemu_ld_trampoline[i] = s->code_ptr;
944

945 946 947 948 949 950 951
        if (SPARC64 || TARGET_LONG_BITS == 32) {
            ra = TCG_REG_O3;
        } else {
            /* Install the high part of the address.  */
            tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O2, 32, SHIFT_SRLX);
            ra = TCG_REG_O4;
        }
952 953 954 955 956 957

        /* Set the retaddr operand.  */
        tcg_out_mov(s, TCG_TYPE_PTR, ra, TCG_REG_O7);
        /* Set the env operand.  */
        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O0, TCG_AREG0);
        /* Tail call.  */
958
        tcg_out_call_nodelay(s, qemu_ld_helpers[i], true);
959 960 961 962
        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O7, ra);
    }

    for (i = 0; i < 16; ++i) {
963
        if (qemu_st_helpers[i] == NULL) {
964 965 966 967
            continue;
        }

        /* May as well align the trampoline.  */
968
        while ((uintptr_t)s->code_ptr & 15) {
969 970
            tcg_out_nop(s);
        }
971
        qemu_st_trampoline[i] = s->code_ptr;
972

973
        if (SPARC64) {
974
            emit_extend(s, TCG_REG_O2, i);
975 976 977 978 979 980 981 982 983 984 985 986 987 988 989
            ra = TCG_REG_O4;
        } else {
            ra = TCG_REG_O1;
            if (TARGET_LONG_BITS == 64) {
                /* Install the high part of the address.  */
                tcg_out_arithi(s, ra, ra + 1, 32, SHIFT_SRLX);
                ra += 2;
            } else {
                ra += 1;
            }
            if ((i & MO_SIZE) == MO_64) {
                /* Install the high part of the data.  */
                tcg_out_arithi(s, ra, ra + 1, 32, SHIFT_SRLX);
                ra += 2;
            } else {
990
                emit_extend(s, ra, i);
991 992
                ra += 1;
            }
993
            /* Skip the oi argument.  */
994 995 996
            ra += 1;
        }
                
997 998 999 1000 1001 1002 1003 1004 1005 1006
        /* Set the retaddr operand.  */
        if (ra >= TCG_REG_O6) {
            tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_O7, TCG_REG_CALL_STACK,
                       TCG_TARGET_CALL_STACK_OFFSET);
            ra = TCG_REG_G1;
        }
        tcg_out_mov(s, TCG_TYPE_PTR, ra, TCG_REG_O7);
        /* Set the env operand.  */
        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O0, TCG_AREG0);
        /* Tail call.  */
1007
        tcg_out_call_nodelay(s, qemu_st_helpers[i], true);
1008 1009 1010 1011 1012
        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O7, ra);
    }
}
#endif

1013
/* Generate global QEMU prologue and epilogue code */
1014
static void tcg_target_qemu_prologue(TCGContext *s)
1015
{
1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029
    int tmp_buf_size, frame_size;

    /* The TCG temp buffer is at the top of the frame, immediately
       below the frame pointer.  */
    tmp_buf_size = CPU_TEMP_BUF_NLONGS * (int)sizeof(long);
    tcg_set_frame(s, TCG_REG_I6, TCG_TARGET_STACK_BIAS - tmp_buf_size,
                  tmp_buf_size);

    /* TCG_TARGET_CALL_STACK_OFFSET includes the stack bias, but is
       otherwise the minimal frame usable by callees.  */
    frame_size = TCG_TARGET_CALL_STACK_OFFSET - TCG_TARGET_STACK_BIAS;
    frame_size += TCG_STATIC_CALL_ARGS_SIZE + tmp_buf_size;
    frame_size += TCG_TARGET_STACK_ALIGN - 1;
    frame_size &= -TCG_TARGET_STACK_ALIGN;
1030
    tcg_out32(s, SAVE | INSN_RD(TCG_REG_O6) | INSN_RS1(TCG_REG_O6) |
1031
              INSN_IMM13(-frame_size));
1032

1033
#ifndef CONFIG_SOFTMMU
1034
    if (guest_base != 0) {
1035
        tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true);
1036 1037 1038 1039
        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
    }
#endif

1040 1041 1042 1043 1044 1045
    /* We choose TCG_REG_TB such that no move is required.  */
    if (USE_REG_TB) {
        QEMU_BUILD_BUG_ON(TCG_REG_TB != TCG_REG_I1);
        tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB);
    }

1046
    tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I1, 0, JMPL);
1047 1048
    /* delay slot */
    tcg_out_nop(s);
1049

1050 1051 1052 1053 1054
    /* Epilogue for goto_ptr.  */
    s->code_gen_epilogue = s->code_ptr;
    tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
    /* delay slot */
    tcg_out_movi_imm13(s, TCG_REG_O0, 0);
1055 1056 1057 1058

#ifdef CONFIG_SOFTMMU
    build_trampolines(s);
#endif
1059 1060
}

1061
#if defined(CONFIG_SOFTMMU)
1062
/* Perform the TLB load and compare.
B
blueswir1 已提交
1063

1064
   Inputs:
1065
   ADDRLO and ADDRHI contain the possible two parts of the address.
1066 1067 1068 1069 1070 1071 1072 1073 1074

   MEM_INDEX and S_BITS are the memory context and log2 size of the load.

   WHICH is the offset into the CPUTLBEntry structure of the slot to read.
   This should be offsetof addr_read or addr_write.

   The result of the TLB comparison is in %[ix]cc.  The sanitized address
   is in the returned register, maybe %o0.  The TLB addend is in %o1.  */

1075
static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, int mem_index,
1076
                               TCGMemOp opc, int which)
1077
{
1078 1079 1080
    const TCGReg r0 = TCG_REG_O0;
    const TCGReg r1 = TCG_REG_O1;
    const TCGReg r2 = TCG_REG_O2;
1081 1082
    unsigned s_bits = opc & MO_SIZE;
    unsigned a_bits = get_alignment_bits(opc);
1083 1084
    int tlb_ofs;

R
Richard Henderson 已提交
1085
    /* Shift the page number down.  */
1086
    tcg_out_arithi(s, r1, addr, TARGET_PAGE_BITS, SHIFT_SRL);
1087

1088 1089 1090 1091 1092
    /* Mask out the page offset, except for the required alignment.
       We don't support unaligned accesses.  */
    if (a_bits < s_bits) {
        a_bits = s_bits;
    }
R
Richard Henderson 已提交
1093
    tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_T1,
1094
                 TARGET_PAGE_MASK | ((1 << a_bits) - 1));
R
Richard Henderson 已提交
1095 1096 1097 1098 1099 1100

    /* Mask the tlb index.  */
    tcg_out_arithi(s, r1, r1, CPU_TLB_SIZE - 1, ARITH_AND);
    
    /* Mask page, part 2.  */
    tcg_out_arith(s, r0, addr, TCG_REG_T1, ARITH_AND);
1101

R
Richard Henderson 已提交
1102 1103
    /* Shift the tlb index into place.  */
    tcg_out_arithi(s, r1, r1, CPU_TLB_ENTRY_BITS, SHIFT_SLL);
1104 1105 1106 1107 1108 1109

    /* Relative to the current ENV.  */
    tcg_out_arith(s, r1, TCG_AREG0, r1, ARITH_ADD);

    /* Find a base address that can load both tlb comparator and addend.  */
    tlb_ofs = offsetof(CPUArchState, tlb_table[mem_index][0]);
1110
    if (!check_fit_ptr(tlb_ofs + sizeof(CPUTLBEntry), 13)) {
1111 1112 1113 1114
        if (tlb_ofs & ~0x3ff) {
            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, tlb_ofs & ~0x3ff);
            tcg_out_arith(s, r1, r1, TCG_REG_T1, ARITH_ADD);
        }
R
Richard Henderson 已提交
1115
        tlb_ofs &= 0x3ff;
1116 1117 1118 1119 1120 1121 1122 1123 1124 1125
    }

    /* Load the tlb comparator and the addend.  */
    tcg_out_ld(s, TCG_TYPE_TL, r2, r1, tlb_ofs + which);
    tcg_out_ld(s, TCG_TYPE_PTR, r1, r1, tlb_ofs+offsetof(CPUTLBEntry, addend));

    /* subcc arg0, arg2, %g0 */
    tcg_out_cmp(s, r0, r2, 0);

    /* If the guest address must be zero-extended, do so now.  */
1126
    if (SPARC64 && TARGET_LONG_BITS == 32) {
1127
        tcg_out_arithi(s, r0, addr, 0, SHIFT_SRL);
1128 1129
        return r0;
    }
1130
    return addr;
1131 1132 1133
}
#endif /* CONFIG_SOFTMMU */

1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148
static const int qemu_ld_opc[16] = {
    [MO_UB]   = LDUB,
    [MO_SB]   = LDSB,

    [MO_BEUW] = LDUH,
    [MO_BESW] = LDSH,
    [MO_BEUL] = LDUW,
    [MO_BESL] = LDSW,
    [MO_BEQ]  = LDX,

    [MO_LEUW] = LDUH_LE,
    [MO_LESW] = LDSH_LE,
    [MO_LEUL] = LDUW_LE,
    [MO_LESL] = LDSW_LE,
    [MO_LEQ]  = LDX_LE,
1149
};
1150

1151 1152 1153 1154 1155 1156 1157 1158 1159 1160
static const int qemu_st_opc[16] = {
    [MO_UB]   = STB,

    [MO_BEUW] = STH,
    [MO_BEUL] = STW,
    [MO_BEQ]  = STX,

    [MO_LEUW] = STH_LE,
    [MO_LEUL] = STW_LE,
    [MO_LEQ]  = STX_LE,
1161
};
B
blueswir1 已提交
1162

1163
static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
1164
                            TCGMemOpIdx oi, bool is_64)
1165
{
1166
    TCGMemOp memop = get_memop(oi);
1167
#ifdef CONFIG_SOFTMMU
1168
    unsigned memi = get_mmuidx(oi);
1169
    TCGReg addrz, param;
1170 1171
    tcg_insn_unit *func;
    tcg_insn_unit *label_ptr;
1172

1173
    addrz = tcg_out_tlb_load(s, addr, memi, memop,
1174
                             offsetof(CPUTLBEntry, addr_read));
1175

1176 1177 1178
    /* The fast path is exactly one insn.  Thus we can perform the
       entire TLB Hit in the (annulled) delay slot of the branch
       over the TLB Miss case.  */
1179

1180
    /* beq,a,pt %[xi]cc, label0 */
1181
    label_ptr = s->code_ptr;
1182 1183 1184
    tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT
                  | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
    /* delay slot */
1185 1186
    tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1,
                    qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]);
B
blueswir1 已提交
1187

1188
    /* TLB Miss.  */
1189

1190
    param = TCG_REG_O1;
1191 1192 1193
    if (!SPARC64 && TARGET_LONG_BITS == 64) {
        /* Skip the high-part; we'll perform the extract in the trampoline.  */
        param++;
1194
    }
1195
    tcg_out_mov(s, TCG_TYPE_REG, param++, addrz);
1196

1197 1198
    /* We use the helpers to extend SB and SW data, leaving the case
       of SL needing explicit extending below.  */
1199 1200
    if ((memop & MO_SSIZE) == MO_SL) {
        func = qemu_ld_trampoline[memop & (MO_BSWAP | MO_SIZE)];
1201
    } else {
1202
        func = qemu_ld_trampoline[memop & (MO_BSWAP | MO_SSIZE)];
1203
    }
1204
    tcg_debug_assert(func != NULL);
1205
    tcg_out_call_nodelay(s, func, false);
1206
    /* delay slot */
1207
    tcg_out_movi(s, TCG_TYPE_I32, param, oi);
1208

1209 1210 1211 1212
    /* Recall that all of the helpers return 64-bit results.
       Which complicates things for sparcv8plus.  */
    if (SPARC64) {
        /* We let the helper sign-extend SB and SW, but leave SL for here.  */
1213
        if (is_64 && (memop & MO_SSIZE) == MO_SL) {
1214 1215 1216 1217 1218
            tcg_out_arithi(s, data, TCG_REG_O0, 0, SHIFT_SRA);
        } else {
            tcg_out_mov(s, TCG_TYPE_REG, data, TCG_REG_O0);
        }
    } else {
1219
        if ((memop & MO_SIZE) == MO_64) {
1220 1221 1222 1223 1224 1225 1226 1227 1228 1229
            tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0, 32, SHIFT_SLLX);
            tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O1, 0, SHIFT_SRL);
            tcg_out_arith(s, data, TCG_REG_O0, TCG_REG_O1, ARITH_OR);
        } else if (is_64) {
            /* Re-extend from 32-bit rather than reassembling when we
               know the high register must be an extension.  */
            tcg_out_arithi(s, data, TCG_REG_O1, 0,
                           memop & MO_SIGN ? SHIFT_SRA : SHIFT_SRL);
        } else {
            tcg_out_mov(s, TCG_TYPE_I32, data, TCG_REG_O1);
1230
        }
1231 1232
    }

1233
    *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
B
blueswir1 已提交
1234
#else
1235
    if (SPARC64 && TARGET_LONG_BITS == 32) {
1236 1237
        tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL);
        addr = TCG_REG_T1;
1238
    }
1239
    tcg_out_ldst_rr(s, data, addr,
1240
                    (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0),
1241
                    qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]);
1242
#endif /* CONFIG_SOFTMMU */
1243 1244
}

1245
static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
1246
                            TCGMemOpIdx oi)
1247
{
1248
    TCGMemOp memop = get_memop(oi);
1249
#ifdef CONFIG_SOFTMMU
1250
    unsigned memi = get_mmuidx(oi);
1251
    TCGReg addrz, param;
1252 1253
    tcg_insn_unit *func;
    tcg_insn_unit *label_ptr;
1254

1255
    addrz = tcg_out_tlb_load(s, addr, memi, memop,
1256
                             offsetof(CPUTLBEntry, addr_write));
1257 1258 1259 1260

    /* The fast path is exactly one insn.  Thus we can perform the entire
       TLB Hit in the (annulled) delay slot of the branch over TLB Miss.  */
    /* beq,a,pt %[xi]cc, label0 */
1261
    label_ptr = s->code_ptr;
1262 1263
    tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT
                  | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
1264
    /* delay slot */
1265 1266
    tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1,
                    qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]);
1267 1268 1269

    /* TLB Miss.  */

1270
    param = TCG_REG_O1;
1271 1272 1273
    if (!SPARC64 && TARGET_LONG_BITS == 64) {
        /* Skip the high-part; we'll perform the extract in the trampoline.  */
        param++;
1274
    }
1275
    tcg_out_mov(s, TCG_TYPE_REG, param++, addrz);
1276
    if (!SPARC64 && (memop & MO_SIZE) == MO_64) {
1277 1278
        /* Skip the high-part; we'll perform the extract in the trampoline.  */
        param++;
1279
    }
1280
    tcg_out_mov(s, TCG_TYPE_REG, param++, data);
B
blueswir1 已提交
1281

1282
    func = qemu_st_trampoline[memop & (MO_BSWAP | MO_SIZE)];
1283
    tcg_debug_assert(func != NULL);
1284
    tcg_out_call_nodelay(s, func, false);
1285
    /* delay slot */
1286
    tcg_out_movi(s, TCG_TYPE_I32, param, oi);
1287

1288
    *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
B
blueswir1 已提交
1289
#else
1290
    if (SPARC64 && TARGET_LONG_BITS == 32) {
1291 1292
        tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL);
        addr = TCG_REG_T1;
1293
    }
1294
    tcg_out_ldst_rr(s, data, addr,
1295
                    (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0),
1296
                    qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]);
1297
#endif /* CONFIG_SOFTMMU */
1298 1299
}

1300 1301 1302
static void tcg_out_op(TCGContext *s, TCGOpcode opc,
                       const TCGArg args[TCG_MAX_OP_ARGS],
                       const int const_args[TCG_MAX_OP_ARGS])
B
blueswir1 已提交
1303
{
1304 1305 1306 1307 1308 1309 1310 1311
    TCGArg a0, a1, a2;
    int c, c2;

    /* Hoist the loads of the most common arguments.  */
    a0 = args[0];
    a1 = args[1];
    a2 = args[2];
    c2 = const_args[2];
B
blueswir1 已提交
1312 1313 1314

    switch (opc) {
    case INDEX_op_exit_tb:
1315
        if (check_fit_ptr(a0, 13)) {
1316
            tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
1317
            tcg_out_movi_imm13(s, TCG_REG_O0, a0);
1318 1319 1320 1321 1322 1323 1324 1325 1326
            break;
        } else if (USE_REG_TB) {
            intptr_t tb_diff = a0 - (uintptr_t)s->code_gen_ptr;
            if (check_fit_ptr(tb_diff, 13)) {
                tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
                /* Note that TCG_REG_TB has been unwound to O1.  */
                tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O1, tb_diff, ARITH_ADD);
                break;
            }
1327
        }
1328 1329 1330
        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I0, a0 & ~0x3ff);
        tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
        tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0, a0 & 0x3ff, ARITH_OR);
B
blueswir1 已提交
1331 1332
        break;
    case INDEX_op_goto_tb:
1333
        if (s->tb_jmp_insn_offset) {
B
blueswir1 已提交
1334
            /* direct jump method */
1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349
            if (USE_REG_TB) {
                /* make sure the patch is 8-byte aligned.  */
                if ((intptr_t)s->code_ptr & 4) {
                    tcg_out_nop(s);
                }
                s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
                tcg_out_sethi(s, TCG_REG_T1, 0);
                tcg_out_arithi(s, TCG_REG_T1, TCG_REG_T1, 0, ARITH_OR);
                tcg_out_arith(s, TCG_REG_G0, TCG_REG_TB, TCG_REG_T1, JMPL);
                tcg_out_arith(s, TCG_REG_TB, TCG_REG_TB, TCG_REG_T1, ARITH_ADD);
            } else {
                s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
                tcg_out32(s, CALL);
                tcg_out_nop(s);
            }
B
blueswir1 已提交
1350 1351
        } else {
            /* indirect jump method */
1352
            tcg_out_ld_ptr(s, TCG_REG_TB,
1353
                           (uintptr_t)(s->tb_jmp_target_addr + a0));
1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369
            tcg_out_arithi(s, TCG_REG_G0, TCG_REG_TB, 0, JMPL);
            tcg_out_nop(s);
        }
        s->tb_jmp_reset_offset[a0] = c = tcg_current_code_size(s);

        /* For the unlinked path of goto_tb, we need to reset
           TCG_REG_TB to the beginning of this TB.  */
        if (USE_REG_TB) {
            c = -c;
            if (check_fit_i32(c, 13)) {
                tcg_out_arithi(s, TCG_REG_TB, TCG_REG_TB, c, ARITH_ADD);
            } else {
                tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, c);
                tcg_out_arith(s, TCG_REG_TB, TCG_REG_TB,
                              TCG_REG_T1, ARITH_ADD);
            }
B
blueswir1 已提交
1370 1371
        }
        break;
1372 1373
    case INDEX_op_goto_ptr:
        tcg_out_arithi(s, TCG_REG_G0, a0, 0, JMPL);
1374 1375 1376 1377 1378
        if (USE_REG_TB) {
            tcg_out_arith(s, TCG_REG_TB, a0, TCG_REG_G0, ARITH_OR);
        } else {
            tcg_out_nop(s);
        }
1379
        break;
B
blueswir1 已提交
1380
    case INDEX_op_br:
1381
        tcg_out_bpcc(s, COND_A, BPCC_PT, arg_label(a0));
1382
        tcg_out_nop(s);
B
blueswir1 已提交
1383 1384 1385
        break;

#define OP_32_64(x)                             \
1386 1387
        glue(glue(case INDEX_op_, x), _i32):    \
        glue(glue(case INDEX_op_, x), _i64)
1388

1389
    OP_32_64(ld8u):
1390
        tcg_out_ldst(s, a0, a1, a2, LDUB);
B
blueswir1 已提交
1391
        break;
1392
    OP_32_64(ld8s):
1393
        tcg_out_ldst(s, a0, a1, a2, LDSB);
B
blueswir1 已提交
1394
        break;
1395
    OP_32_64(ld16u):
1396
        tcg_out_ldst(s, a0, a1, a2, LDUH);
B
blueswir1 已提交
1397
        break;
1398
    OP_32_64(ld16s):
1399
        tcg_out_ldst(s, a0, a1, a2, LDSH);
B
blueswir1 已提交
1400 1401
        break;
    case INDEX_op_ld_i32:
1402
    case INDEX_op_ld32u_i64:
1403
        tcg_out_ldst(s, a0, a1, a2, LDUW);
B
blueswir1 已提交
1404
        break;
1405
    OP_32_64(st8):
1406
        tcg_out_ldst(s, a0, a1, a2, STB);
B
blueswir1 已提交
1407
        break;
1408
    OP_32_64(st16):
1409
        tcg_out_ldst(s, a0, a1, a2, STH);
B
blueswir1 已提交
1410 1411
        break;
    case INDEX_op_st_i32:
1412
    case INDEX_op_st32_i64:
1413
        tcg_out_ldst(s, a0, a1, a2, STW);
B
blueswir1 已提交
1414
        break;
1415
    OP_32_64(add):
1416
        c = ARITH_ADD;
1417 1418
        goto gen_arith;
    OP_32_64(sub):
B
blueswir1 已提交
1419
        c = ARITH_SUB;
1420 1421
        goto gen_arith;
    OP_32_64(and):
B
blueswir1 已提交
1422
        c = ARITH_AND;
1423
        goto gen_arith;
R
Richard Henderson 已提交
1424 1425 1426
    OP_32_64(andc):
        c = ARITH_ANDN;
        goto gen_arith;
1427
    OP_32_64(or):
B
blueswir1 已提交
1428
        c = ARITH_OR;
1429
        goto gen_arith;
R
Richard Henderson 已提交
1430 1431 1432
    OP_32_64(orc):
        c = ARITH_ORN;
        goto gen_arith;
1433
    OP_32_64(xor):
B
blueswir1 已提交
1434
        c = ARITH_XOR;
1435
        goto gen_arith;
B
blueswir1 已提交
1436 1437
    case INDEX_op_shl_i32:
        c = SHIFT_SLL;
1438 1439
    do_shift32:
        /* Limit immediate shift count lest we create an illegal insn.  */
1440
        tcg_out_arithc(s, a0, a1, a2 & 31, c2, c);
1441
        break;
B
blueswir1 已提交
1442 1443
    case INDEX_op_shr_i32:
        c = SHIFT_SRL;
1444
        goto do_shift32;
B
blueswir1 已提交
1445 1446
    case INDEX_op_sar_i32:
        c = SHIFT_SRA;
1447
        goto do_shift32;
B
blueswir1 已提交
1448 1449
    case INDEX_op_mul_i32:
        c = ARITH_UMUL;
1450
        goto gen_arith;
1451

R
Richard Henderson 已提交
1452 1453 1454
    OP_32_64(neg):
	c = ARITH_SUB;
	goto gen_arith1;
R
Richard Henderson 已提交
1455 1456 1457
    OP_32_64(not):
	c = ARITH_ORN;
	goto gen_arith1;
R
Richard Henderson 已提交
1458

1459
    case INDEX_op_div_i32:
1460
        tcg_out_div32(s, a0, a1, a2, c2, 0);
1461 1462
        break;
    case INDEX_op_divu_i32:
1463
        tcg_out_div32(s, a0, a1, a2, c2, 1);
1464 1465
        break;

B
blueswir1 已提交
1466
    case INDEX_op_brcond_i32:
1467
        tcg_out_brcond_i32(s, a2, a0, a1, const_args[1], arg_label(args[3]));
B
blueswir1 已提交
1468
        break;
1469
    case INDEX_op_setcond_i32:
1470
        tcg_out_setcond_i32(s, args[3], a0, a1, a2, c2);
1471
        break;
R
Richard Henderson 已提交
1472
    case INDEX_op_movcond_i32:
1473
        tcg_out_movcond_i32(s, args[5], a0, a1, a2, c2, args[3], const_args[3]);
R
Richard Henderson 已提交
1474
        break;
1475

1476
    case INDEX_op_add2_i32:
1477 1478
        tcg_out_addsub2_i32(s, args[0], args[1], args[2], args[3],
                            args[4], const_args[4], args[5], const_args[5],
1479
                            ARITH_ADDCC, ARITH_ADDC);
1480 1481
        break;
    case INDEX_op_sub2_i32:
1482 1483
        tcg_out_addsub2_i32(s, args[0], args[1], args[2], args[3],
                            args[4], const_args[4], args[5], const_args[5],
1484
                            ARITH_SUBCC, ARITH_SUBC);
1485 1486
        break;
    case INDEX_op_mulu2_i32:
1487 1488 1489 1490 1491 1492 1493
        c = ARITH_UMUL;
        goto do_mul2;
    case INDEX_op_muls2_i32:
        c = ARITH_SMUL;
    do_mul2:
        /* The 32-bit multiply insns produce a full 64-bit result.  If the
           destination register can hold it, we can avoid the slower RDY.  */
1494 1495 1496
        tcg_out_arithc(s, a0, a2, args[3], const_args[3], c);
        if (SPARC64 || a0 <= TCG_REG_O7) {
            tcg_out_arithi(s, a1, a0, 32, SHIFT_SRLX);
1497
        } else {
1498
            tcg_out_rdy(s, a1);
1499
        }
1500
        break;
B
blueswir1 已提交
1501

1502
    case INDEX_op_qemu_ld_i32:
1503
        tcg_out_qemu_ld(s, a0, a1, a2, false);
B
blueswir1 已提交
1504
        break;
1505
    case INDEX_op_qemu_ld_i64:
1506
        tcg_out_qemu_ld(s, a0, a1, a2, true);
B
blueswir1 已提交
1507
        break;
1508 1509
    case INDEX_op_qemu_st_i32:
    case INDEX_op_qemu_st_i64:
1510
        tcg_out_qemu_st(s, a0, a1, a2);
1511
        break;
B
blueswir1 已提交
1512

1513
    case INDEX_op_ld32s_i64:
1514
        tcg_out_ldst(s, a0, a1, a2, LDSW);
1515
        break;
B
blueswir1 已提交
1516
    case INDEX_op_ld_i64:
1517
        tcg_out_ldst(s, a0, a1, a2, LDX);
B
blueswir1 已提交
1518 1519
        break;
    case INDEX_op_st_i64:
1520
        tcg_out_ldst(s, a0, a1, a2, STX);
B
blueswir1 已提交
1521 1522 1523
        break;
    case INDEX_op_shl_i64:
        c = SHIFT_SLLX;
1524 1525
    do_shift64:
        /* Limit immediate shift count lest we create an illegal insn.  */
1526
        tcg_out_arithc(s, a0, a1, a2 & 63, c2, c);
1527
        break;
B
blueswir1 已提交
1528 1529
    case INDEX_op_shr_i64:
        c = SHIFT_SRLX;
1530
        goto do_shift64;
B
blueswir1 已提交
1531 1532
    case INDEX_op_sar_i64:
        c = SHIFT_SRAX;
1533
        goto do_shift64;
B
blueswir1 已提交
1534 1535
    case INDEX_op_mul_i64:
        c = ARITH_MULX;
1536
        goto gen_arith;
1537
    case INDEX_op_div_i64:
1538
        c = ARITH_SDIVX;
1539
        goto gen_arith;
1540
    case INDEX_op_divu_i64:
B
blueswir1 已提交
1541
        c = ARITH_UDIVX;
1542
        goto gen_arith;
1543
    case INDEX_op_ext_i32_i64:
1544
    case INDEX_op_ext32s_i64:
1545
        tcg_out_arithi(s, a0, a1, 0, SHIFT_SRA);
1546
        break;
1547
    case INDEX_op_extu_i32_i64:
1548
    case INDEX_op_ext32u_i64:
1549
        tcg_out_arithi(s, a0, a1, 0, SHIFT_SRL);
1550
        break;
1551 1552 1553 1554 1555
    case INDEX_op_extrl_i64_i32:
        tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
        break;
    case INDEX_op_extrh_i64_i32:
        tcg_out_arithi(s, a0, a1, 32, SHIFT_SRLX);
1556
        break;
B
blueswir1 已提交
1557 1558

    case INDEX_op_brcond_i64:
1559
        tcg_out_brcond_i64(s, a2, a0, a1, const_args[1], arg_label(args[3]));
B
blueswir1 已提交
1560
        break;
1561
    case INDEX_op_setcond_i64:
1562
        tcg_out_setcond_i64(s, args[3], a0, a1, a2, c2);
1563
        break;
R
Richard Henderson 已提交
1564
    case INDEX_op_movcond_i64:
1565
        tcg_out_movcond_i64(s, args[5], a0, a1, a2, c2, args[3], const_args[3]);
R
Richard Henderson 已提交
1566
        break;
1567 1568 1569 1570 1571 1572 1573 1574
    case INDEX_op_add2_i64:
        tcg_out_addsub2_i64(s, args[0], args[1], args[2], args[3], args[4],
                            const_args[4], args[5], const_args[5], false);
        break;
    case INDEX_op_sub2_i64:
        tcg_out_addsub2_i64(s, args[0], args[1], args[2], args[3], args[4],
                            const_args[4], args[5], const_args[5], true);
        break;
1575 1576 1577
    case INDEX_op_muluh_i64:
        tcg_out_arith(s, args[0], args[1], args[2], ARITH_UMULXHI);
        break;
1578

1579
    gen_arith:
1580
        tcg_out_arithc(s, a0, a1, a2, c2, c);
1581 1582
        break;

R
Richard Henderson 已提交
1583
    gen_arith1:
1584
	tcg_out_arithc(s, a0, TCG_REG_G0, a1, const_args[1], c);
R
Richard Henderson 已提交
1585 1586
	break;

P
Pranith Kumar 已提交
1587 1588 1589 1590
    case INDEX_op_mb:
        tcg_out_mb(s, a0);
        break;

1591
    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
1592
    case INDEX_op_mov_i64:
1593
    case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */
1594
    case INDEX_op_movi_i64:
1595
    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
B
blueswir1 已提交
1596 1597 1598 1599 1600 1601 1602
    default:
        tcg_abort();
    }
}

static const TCGTargetOpDef sparc_op_defs[] = {
    { INDEX_op_exit_tb, { } },
1603
    { INDEX_op_goto_tb, { } },
B
blueswir1 已提交
1604
    { INDEX_op_br, { } },
1605
    { INDEX_op_goto_ptr, { "r" } },
B
blueswir1 已提交
1606 1607 1608 1609 1610 1611

    { INDEX_op_ld8u_i32, { "r", "r" } },
    { INDEX_op_ld8s_i32, { "r", "r" } },
    { INDEX_op_ld16u_i32, { "r", "r" } },
    { INDEX_op_ld16s_i32, { "r", "r" } },
    { INDEX_op_ld_i32, { "r", "r" } },
1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629
    { INDEX_op_st8_i32, { "rZ", "r" } },
    { INDEX_op_st16_i32, { "rZ", "r" } },
    { INDEX_op_st_i32, { "rZ", "r" } },

    { INDEX_op_add_i32, { "r", "rZ", "rJ" } },
    { INDEX_op_mul_i32, { "r", "rZ", "rJ" } },
    { INDEX_op_div_i32, { "r", "rZ", "rJ" } },
    { INDEX_op_divu_i32, { "r", "rZ", "rJ" } },
    { INDEX_op_sub_i32, { "r", "rZ", "rJ" } },
    { INDEX_op_and_i32, { "r", "rZ", "rJ" } },
    { INDEX_op_andc_i32, { "r", "rZ", "rJ" } },
    { INDEX_op_or_i32, { "r", "rZ", "rJ" } },
    { INDEX_op_orc_i32, { "r", "rZ", "rJ" } },
    { INDEX_op_xor_i32, { "r", "rZ", "rJ" } },

    { INDEX_op_shl_i32, { "r", "rZ", "rJ" } },
    { INDEX_op_shr_i32, { "r", "rZ", "rJ" } },
    { INDEX_op_sar_i32, { "r", "rZ", "rJ" } },
B
blueswir1 已提交
1630

R
Richard Henderson 已提交
1631
    { INDEX_op_neg_i32, { "r", "rJ" } },
R
Richard Henderson 已提交
1632
    { INDEX_op_not_i32, { "r", "rJ" } },
R
Richard Henderson 已提交
1633

1634 1635 1636
    { INDEX_op_brcond_i32, { "rZ", "rJ" } },
    { INDEX_op_setcond_i32, { "r", "rZ", "rJ" } },
    { INDEX_op_movcond_i32, { "r", "rZ", "rJ", "rI", "0" } },
1637

1638 1639 1640
    { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } },
    { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } },
    { INDEX_op_mulu2_i32, { "r", "r", "rZ", "rJ" } },
1641
    { INDEX_op_muls2_i32, { "r", "r", "rZ", "rJ" } },
B
blueswir1 已提交
1642

1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672
    { INDEX_op_ld8u_i64, { "R", "r" } },
    { INDEX_op_ld8s_i64, { "R", "r" } },
    { INDEX_op_ld16u_i64, { "R", "r" } },
    { INDEX_op_ld16s_i64, { "R", "r" } },
    { INDEX_op_ld32u_i64, { "R", "r" } },
    { INDEX_op_ld32s_i64, { "R", "r" } },
    { INDEX_op_ld_i64, { "R", "r" } },
    { INDEX_op_st8_i64, { "RZ", "r" } },
    { INDEX_op_st16_i64, { "RZ", "r" } },
    { INDEX_op_st32_i64, { "RZ", "r" } },
    { INDEX_op_st_i64, { "RZ", "r" } },

    { INDEX_op_add_i64, { "R", "RZ", "RJ" } },
    { INDEX_op_mul_i64, { "R", "RZ", "RJ" } },
    { INDEX_op_div_i64, { "R", "RZ", "RJ" } },
    { INDEX_op_divu_i64, { "R", "RZ", "RJ" } },
    { INDEX_op_sub_i64, { "R", "RZ", "RJ" } },
    { INDEX_op_and_i64, { "R", "RZ", "RJ" } },
    { INDEX_op_andc_i64, { "R", "RZ", "RJ" } },
    { INDEX_op_or_i64, { "R", "RZ", "RJ" } },
    { INDEX_op_orc_i64, { "R", "RZ", "RJ" } },
    { INDEX_op_xor_i64, { "R", "RZ", "RJ" } },

    { INDEX_op_shl_i64, { "R", "RZ", "RJ" } },
    { INDEX_op_shr_i64, { "R", "RZ", "RJ" } },
    { INDEX_op_sar_i64, { "R", "RZ", "RJ" } },

    { INDEX_op_neg_i64, { "R", "RJ" } },
    { INDEX_op_not_i64, { "R", "RJ" } },

1673 1674 1675 1676
    { INDEX_op_ext32s_i64, { "R", "R" } },
    { INDEX_op_ext32u_i64, { "R", "R" } },
    { INDEX_op_ext_i32_i64, { "R", "r" } },
    { INDEX_op_extu_i32_i64, { "R", "r" } },
1677 1678
    { INDEX_op_extrl_i64_i32,  { "r", "R" } },
    { INDEX_op_extrh_i64_i32,  { "r", "R" } },
1679 1680 1681 1682 1683

    { INDEX_op_brcond_i64, { "RZ", "RJ" } },
    { INDEX_op_setcond_i64, { "R", "RZ", "RJ" } },
    { INDEX_op_movcond_i64, { "R", "RZ", "RJ", "RI", "0" } },

1684 1685
    { INDEX_op_add2_i64, { "R", "R", "RZ", "RZ", "RJ", "RI" } },
    { INDEX_op_sub2_i64, { "R", "R", "RZ", "RZ", "RJ", "RI" } },
1686
    { INDEX_op_muluh_i64, { "R", "RZ", "RZ" } },
1687

1688 1689
    { INDEX_op_qemu_ld_i32, { "r", "A" } },
    { INDEX_op_qemu_ld_i64, { "R", "A" } },
1690 1691
    { INDEX_op_qemu_st_i32, { "sZ", "A" } },
    { INDEX_op_qemu_st_i64, { "SZ", "A" } },
1692

P
Pranith Kumar 已提交
1693
    { INDEX_op_mb, { } },
B
blueswir1 已提交
1694 1695 1696
    { -1 },
};

1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708
static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
{
    int i, n = ARRAY_SIZE(sparc_op_defs);

    for (i = 0; i < n; ++i) {
        if (sparc_op_defs[i].op == op) {
            return &sparc_op_defs[i];
        }
    }
    return NULL;
}

1709
static void tcg_target_init(TCGContext *s)
B
blueswir1 已提交
1710
{
1711 1712 1713 1714 1715 1716 1717 1718 1719
    /* Only probe for the platform and capabilities if we havn't already
       determined maximum values at compile time.  */
#ifndef use_vis3_instructions
    {
        unsigned long hwcap = qemu_getauxval(AT_HWCAP);
        use_vis3_instructions = (hwcap & HWCAP_SPARC_VIS3) != 0;
    }
#endif

B
blueswir1 已提交
1720
    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
1721 1722
    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, ALL_64);

B
blueswir1 已提交
1723
    tcg_regset_set32(tcg_target_call_clobber_regs, 0,
1724 1725 1726 1727 1728 1729 1730
                     (1 << TCG_REG_G1) |
                     (1 << TCG_REG_G2) |
                     (1 << TCG_REG_G3) |
                     (1 << TCG_REG_G4) |
                     (1 << TCG_REG_G5) |
                     (1 << TCG_REG_G6) |
                     (1 << TCG_REG_G7) |
B
blueswir1 已提交
1731 1732 1733 1734 1735 1736 1737 1738 1739
                     (1 << TCG_REG_O0) |
                     (1 << TCG_REG_O1) |
                     (1 << TCG_REG_O2) |
                     (1 << TCG_REG_O3) |
                     (1 << TCG_REG_O4) |
                     (1 << TCG_REG_O5) |
                     (1 << TCG_REG_O7));

    tcg_regset_clear(s->reserved_regs);
1740 1741 1742 1743 1744 1745 1746 1747
    tcg_regset_set_reg(s->reserved_regs, TCG_REG_G0); /* zero */
    tcg_regset_set_reg(s->reserved_regs, TCG_REG_G6); /* reserved for os */
    tcg_regset_set_reg(s->reserved_regs, TCG_REG_G7); /* thread pointer */
    tcg_regset_set_reg(s->reserved_regs, TCG_REG_I6); /* frame pointer */
    tcg_regset_set_reg(s->reserved_regs, TCG_REG_I7); /* return address */
    tcg_regset_set_reg(s->reserved_regs, TCG_REG_O6); /* stack pointer */
    tcg_regset_set_reg(s->reserved_regs, TCG_REG_T1); /* for internal use */
    tcg_regset_set_reg(s->reserved_regs, TCG_REG_T2); /* for internal use */
B
blueswir1 已提交
1748
}
1749

1750
#if SPARC64
1751
# define ELF_HOST_MACHINE  EM_SPARCV9
1752
#else
1753 1754 1755 1756 1757
# define ELF_HOST_MACHINE  EM_SPARC32PLUS
# define ELF_HOST_FLAGS    EF_SPARC_32PLUS
#endif

typedef struct {
1758
    DebugFrameHeader h;
1759
    uint8_t fde_def_cfa[SPARC64 ? 4 : 2];
1760 1761
    uint8_t fde_win_save;
    uint8_t fde_ret_save[3];
1762 1763
} DebugFrame;

1764 1765 1766 1767 1768 1769 1770
static const DebugFrame debug_frame = {
    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
    .h.cie.id = -1,
    .h.cie.version = 1,
    .h.cie.code_align = 1,
    .h.cie.data_align = -sizeof(void *) & 0x7f,
    .h.cie.return_column = 15,            /* o7 */
1771

1772
    /* Total FDE size does not include the "len" member.  */
1773
    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
1774 1775

    .fde_def_cfa = {
1776
#if SPARC64
1777 1778 1779 1780 1781 1782
        12, 30,                         /* DW_CFA_def_cfa i6, 2047 */
        (2047 & 0x7f) | 0x80, (2047 >> 7)
#else
        13, 30                          /* DW_CFA_def_cfa_register i6 */
#endif
    },
1783 1784
    .fde_win_save = 0x2d,               /* DW_CFA_GNU_window_save */
    .fde_ret_save = { 9, 15, 31 },      /* DW_CFA_register o7, i7 */
1785 1786 1787 1788 1789 1790
};

void tcg_register_jit(void *buf, size_t buf_size)
{
    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
}
1791

1792 1793
void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
                              uintptr_t addr)
1794
{
1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808
    intptr_t tb_disp = addr - tc_ptr;
    intptr_t br_disp = addr - jmp_addr;
    tcg_insn_unit i1, i2;

    /* We can reach the entire address space for ILP32.
       For LP64, the code_gen_buffer can't be larger than 2GB.  */
    tcg_debug_assert(tb_disp == (int32_t)tb_disp);
    tcg_debug_assert(br_disp == (int32_t)br_disp);

    if (!USE_REG_TB) {
        atomic_set((uint32_t *)jmp_addr, deposit32(CALL, 0, 30, br_disp >> 2));
        flush_icache_range(jmp_addr, jmp_addr + 4);
        return;
    }
1809

1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827
    /* This does not exercise the range of the branch, but we do
       still need to be able to load the new value of TCG_REG_TB.
       But this does still happen quite often.  */
    if (check_fit_ptr(tb_disp, 13)) {
        /* ba,pt %icc, addr */
        i1 = (INSN_OP(0) | INSN_OP2(1) | INSN_COND(COND_A)
              | BPCC_ICC | BPCC_PT | INSN_OFF19(br_disp));
        i2 = (ARITH_ADD | INSN_RD(TCG_REG_TB) | INSN_RS1(TCG_REG_TB)
              | INSN_IMM13(tb_disp));
    } else if (tb_disp >= 0) {
        i1 = SETHI | INSN_RD(TCG_REG_T1) | ((tb_disp & 0xfffffc00) >> 10);
        i2 = (ARITH_OR | INSN_RD(TCG_REG_T1) | INSN_RS1(TCG_REG_T1)
              | INSN_IMM13(tb_disp & 0x3ff));
    } else {
        i1 = SETHI | INSN_RD(TCG_REG_T1) | ((~tb_disp & 0xfffffc00) >> 10);
        i2 = (ARITH_XOR | INSN_RD(TCG_REG_T1) | INSN_RS1(TCG_REG_T1)
              | INSN_IMM13((tb_disp & 0x3ff) | -0x400));
    }
1828

1829 1830
    atomic_set((uint64_t *)jmp_addr, deposit64(i2, 32, 32, i1));
    flush_icache_range(jmp_addr, jmp_addr + 8);
1831
}