tcg-target.inc.c 78.8 KB
Newer Older
B
bellard 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
/*
 * Tiny Code Generator for QEMU
 *
 * Copyright (c) 2008 Fabrice Bellard
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
24

R
Richard Henderson 已提交
25 26
#include "tcg-be-ldst.h"

27
#ifdef CONFIG_DEBUG_TCG
28
static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
29 30 31 32 33 34
#if TCG_TARGET_REG_BITS == 64
    "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
    "%r8",  "%r9",  "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
#else
    "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
#endif
B
bellard 已提交
35
};
36
#endif
B
bellard 已提交
37

38
static const int tcg_target_reg_alloc_order[] = {
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
#if TCG_TARGET_REG_BITS == 64
    TCG_REG_RBP,
    TCG_REG_RBX,
    TCG_REG_R12,
    TCG_REG_R13,
    TCG_REG_R14,
    TCG_REG_R15,
    TCG_REG_R10,
    TCG_REG_R11,
    TCG_REG_R9,
    TCG_REG_R8,
    TCG_REG_RCX,
    TCG_REG_RDX,
    TCG_REG_RSI,
    TCG_REG_RDI,
    TCG_REG_RAX,
#else
B
bellard 已提交
56 57 58 59
    TCG_REG_EBX,
    TCG_REG_ESI,
    TCG_REG_EDI,
    TCG_REG_EBP,
60 61 62
    TCG_REG_ECX,
    TCG_REG_EDX,
    TCG_REG_EAX,
63
#endif
B
bellard 已提交
64 65
};

66 67
static const int tcg_target_call_iarg_regs[] = {
#if TCG_TARGET_REG_BITS == 64
S
Stefan Weil 已提交
68 69 70 71
#if defined(_WIN64)
    TCG_REG_RCX,
    TCG_REG_RDX,
#else
72 73 74 75
    TCG_REG_RDI,
    TCG_REG_RSI,
    TCG_REG_RDX,
    TCG_REG_RCX,
S
Stefan Weil 已提交
76
#endif
77 78 79
    TCG_REG_R8,
    TCG_REG_R9,
#else
80
    /* 32 bit mode uses stack based calling convention (GCC default). */
81 82 83
#endif
};

84
static const int tcg_target_call_oarg_regs[] = {
85
    TCG_REG_EAX,
86
#if TCG_TARGET_REG_BITS == 32
87
    TCG_REG_EDX
88
#endif
89
};
B
bellard 已提交
90

91 92 93
/* Constants we accept.  */
#define TCG_CT_CONST_S32 0x100
#define TCG_CT_CONST_U32 0x200
94
#define TCG_CT_CONST_I32 0x400
95

96 97 98 99 100 101 102 103 104 105 106
/* Registers used with L constraint, which are the first argument 
   registers on x86_64, and two random call clobbered registers on
   i386. */
#if TCG_TARGET_REG_BITS == 64
# define TCG_REG_L0 tcg_target_call_iarg_regs[0]
# define TCG_REG_L1 tcg_target_call_iarg_regs[1]
#else
# define TCG_REG_L0 TCG_REG_EAX
# define TCG_REG_L1 TCG_REG_EDX
#endif

107 108 109 110 111 112 113
/* The host compiler should supply <cpuid.h> to enable runtime features
   detection, as we're not going to go so far as our own inline assembly.
   If not available, default values will be assumed.  */
#if defined(CONFIG_CPUID_H)
#include <cpuid.h>
#endif

114
/* For 32-bit, we are going to attempt to determine at runtime whether cmov
115
   is available.  */
116 117
#if TCG_TARGET_REG_BITS == 64
# define have_cmov 1
118
#elif defined(CONFIG_CPUID_H) && defined(bit_CMOV)
119 120 121 122 123
static bool have_cmov;
#else
# define have_cmov 0
#endif

124 125 126 127 128 129 130 131
/* If bit_MOVBE is defined in cpuid.h (added in GCC version 4.6), we are
   going to attempt to determine at runtime whether movbe is available.  */
#if defined(CONFIG_CPUID_H) && defined(bit_MOVBE)
static bool have_movbe;
#else
# define have_movbe 0
#endif

132 133 134 135
/* We need this symbol in tcg-target.h, and we can't properly conditionalize
   it there.  Therefore we always define the variable.  */
bool have_bmi1;

136 137 138 139 140 141
#if defined(CONFIG_CPUID_H) && defined(bit_BMI2)
static bool have_bmi2;
#else
# define have_bmi2 0
#endif

142
static tcg_insn_unit *tb_ret_addr;
143

144
static void patch_reloc(tcg_insn_unit *code_ptr, int type,
145
                        intptr_t value, intptr_t addend)
B
bellard 已提交
146
{
A
aurel32 已提交
147
    value += addend;
B
bellard 已提交
148 149
    switch(type) {
    case R_386_PC32:
150 151 152 153
        value -= (uintptr_t)code_ptr;
        if (value != (int32_t)value) {
            tcg_abort();
        }
154
        tcg_patch32(code_ptr, value);
B
bellard 已提交
155
        break;
156
    case R_386_PC8:
157
        value -= (uintptr_t)code_ptr;
158 159 160
        if (value != (int8_t)value) {
            tcg_abort();
        }
161
        tcg_patch8(code_ptr, value);
162
        break;
B
bellard 已提交
163 164 165 166 167 168
    default:
        tcg_abort();
    }
}

/* parse target specific constraints */
169
static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
B
bellard 已提交
170 171 172 173 174 175 176 177 178 179 180 181 182 183
{
    const char *ct_str;

    ct_str = *pct_str;
    switch(ct_str[0]) {
    case 'a':
        ct->ct |= TCG_CT_REG;
        tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
        break;
    case 'b':
        ct->ct |= TCG_CT_REG;
        tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
        break;
    case 'c':
184
    case_c:
B
bellard 已提交
185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201
        ct->ct |= TCG_CT_REG;
        tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
        break;
    case 'd':
        ct->ct |= TCG_CT_REG;
        tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
        break;
    case 'S':
        ct->ct |= TCG_CT_REG;
        tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
        break;
    case 'D':
        ct->ct |= TCG_CT_REG;
        tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
        break;
    case 'q':
        ct->ct |= TCG_CT_REG;
202 203 204 205 206
        if (TCG_TARGET_REG_BITS == 64) {
            tcg_regset_set32(ct->u.regs, 0, 0xffff);
        } else {
            tcg_regset_set32(ct->u.regs, 0, 0xf);
        }
B
bellard 已提交
207
        break;
208 209 210 211
    case 'Q':
        ct->ct |= TCG_CT_REG;
        tcg_regset_set32(ct->u.regs, 0, 0xf);
        break;
B
bellard 已提交
212
    case 'r':
213
    case_r:
B
bellard 已提交
214
        ct->ct |= TCG_CT_REG;
215 216 217 218 219
        if (TCG_TARGET_REG_BITS == 64) {
            tcg_regset_set32(ct->u.regs, 0, 0xffff);
        } else {
            tcg_regset_set32(ct->u.regs, 0, 0xff);
        }
B
bellard 已提交
220
        break;
221 222 223 224 225 226 227
    case 'C':
        /* With SHRX et al, we need not use ECX as shift count register.  */
        if (have_bmi2) {
            goto case_r;
        } else {
            goto case_c;
        }
B
bellard 已提交
228 229 230 231

        /* qemu_ld/st address constraint */
    case 'L':
        ct->ct |= TCG_CT_REG;
232
        if (TCG_TARGET_REG_BITS == 64) {
233
            tcg_regset_set32(ct->u.regs, 0, 0xffff);
234
        } else {
235
            tcg_regset_set32(ct->u.regs, 0, 0xff);
236
        }
237 238
        tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
        tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
239 240 241 242 243 244 245
        break;

    case 'e':
        ct->ct |= TCG_CT_CONST_S32;
        break;
    case 'Z':
        ct->ct |= TCG_CT_CONST_U32;
B
bellard 已提交
246
        break;
247 248 249
    case 'I':
        ct->ct |= TCG_CT_CONST_I32;
        break;
250

B
bellard 已提交
251 252 253 254 255 256 257 258 259
    default:
        return -1;
    }
    ct_str++;
    *pct_str = ct_str;
    return 0;
}

/* test if a constant matches the constraint */
260
static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
B
bellard 已提交
261 262
                                         const TCGArgConstraint *arg_ct)
{
263 264
    int ct = arg_ct->ct;
    if (ct & TCG_CT_CONST) {
B
bellard 已提交
265
        return 1;
266 267 268 269 270 271 272
    }
    if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
        return 1;
    }
    if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
        return 1;
    }
273 274 275
    if ((ct & TCG_CT_CONST_I32) && ~val == (int32_t)~val) {
        return 1;
    }
276
    return 0;
B
bellard 已提交
277 278
}

279 280 281 282 283 284
#if TCG_TARGET_REG_BITS == 64
# define LOWREGMASK(x)	((x) & 7)
#else
# define LOWREGMASK(x)	(x)
#endif

285
#define P_EXT		0x100		/* 0x0f opcode prefix */
286 287
#define P_EXT38         0x200           /* 0x0f 0x38 opcode prefix */
#define P_DATA16        0x400           /* 0x66 opcode prefix */
288
#if TCG_TARGET_REG_BITS == 64
289 290 291 292 293
# define P_ADDR32       0x800           /* 0x67 opcode prefix */
# define P_REXW         0x1000          /* Set REX.W = 1 */
# define P_REXB_R       0x2000          /* REG field as byte register */
# define P_REXB_RM      0x4000          /* R/M field as byte register */
# define P_GS           0x8000          /* gs segment override */
294 295 296 297 298
#else
# define P_ADDR32	0
# define P_REXW		0
# define P_REXB_R	0
# define P_REXB_RM	0
299
# define P_GS           0
300
#endif
301 302
#define P_SIMDF3        0x10000         /* 0xf3 opcode prefix */
#define P_SIMDF2        0x20000         /* 0xf2 opcode prefix */
303

304 305
#define OPC_ARITH_EvIz	(0x81)
#define OPC_ARITH_EvIb	(0x83)
306
#define OPC_ARITH_GvEv	(0x03)		/* ... plus (ARITH_FOO << 3) */
307
#define OPC_ANDN        (0xf2 | P_EXT38)
308
#define OPC_ADD_GvEv	(OPC_ARITH_GvEv | (ARITH_ADD << 3))
309
#define OPC_BSWAP	(0xc8 | P_EXT)
R
Richard Henderson 已提交
310
#define OPC_CALL_Jz	(0xe8)
R
Richard Henderson 已提交
311
#define OPC_CMOVCC      (0x40 | P_EXT)  /* ... plus condition code */
312 313
#define OPC_CMP_GvEv	(OPC_ARITH_GvEv | (ARITH_CMP << 3))
#define OPC_DEC_r32	(0x48)
R
Richard Henderson 已提交
314 315 316
#define OPC_IMUL_GvEv	(0xaf | P_EXT)
#define OPC_IMUL_GvEvIb	(0x6b)
#define OPC_IMUL_GvEvIz	(0x69)
317
#define OPC_INC_r32	(0x40)
R
Richard Henderson 已提交
318 319 320 321
#define OPC_JCC_long	(0x80 | P_EXT)	/* ... plus condition code */
#define OPC_JCC_short	(0x70)		/* ... plus condition code */
#define OPC_JMP_long	(0xe9)
#define OPC_JMP_short	(0xeb)
R
Richard Henderson 已提交
322
#define OPC_LEA         (0x8d)
323 324 325
#define OPC_MOVB_EvGv	(0x88)		/* stores, more or less */
#define OPC_MOVL_EvGv	(0x89)		/* stores, more or less */
#define OPC_MOVL_GvEv	(0x8b)		/* loads, more or less */
326
#define OPC_MOVB_EvIz   (0xc6)
327
#define OPC_MOVL_EvIz	(0xc7)
R
Richard Henderson 已提交
328
#define OPC_MOVL_Iv     (0xb8)
329 330
#define OPC_MOVBE_GyMy  (0xf0 | P_EXT38)
#define OPC_MOVBE_MyGy  (0xf1 | P_EXT38)
331 332
#define OPC_MOVSBL	(0xbe | P_EXT)
#define OPC_MOVSWL	(0xbf | P_EXT)
333
#define OPC_MOVSLQ	(0x63 | P_REXW)
334 335
#define OPC_MOVZBL	(0xb6 | P_EXT)
#define OPC_MOVZWL	(0xb7 | P_EXT)
R
Richard Henderson 已提交
336 337 338 339
#define OPC_POP_r32	(0x58)
#define OPC_PUSH_r32	(0x50)
#define OPC_PUSH_Iv	(0x68)
#define OPC_PUSH_Ib	(0x6a)
R
Richard Henderson 已提交
340
#define OPC_RET		(0xc3)
341
#define OPC_SETCC	(0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
342 343 344
#define OPC_SHIFT_1	(0xd1)
#define OPC_SHIFT_Ib	(0xc1)
#define OPC_SHIFT_cl	(0xd3)
345 346 347
#define OPC_SARX        (0xf7 | P_EXT38 | P_SIMDF3)
#define OPC_SHLX        (0xf7 | P_EXT38 | P_DATA16)
#define OPC_SHRX        (0xf7 | P_EXT38 | P_SIMDF2)
348
#define OPC_TESTL	(0x85)
R
Richard Henderson 已提交
349
#define OPC_XCHG_ax_r32	(0x90)
350

351 352 353 354 355
#define OPC_GRP3_Ev	(0xf7)
#define OPC_GRP5	(0xff)

/* Group 1 opcode extensions for 0x80-0x83.
   These are also used as modifiers for OPC_ARITH.  */
B
bellard 已提交
356 357 358 359 360 361 362 363 364
#define ARITH_ADD 0
#define ARITH_OR  1
#define ARITH_ADC 2
#define ARITH_SBB 3
#define ARITH_AND 4
#define ARITH_SUB 5
#define ARITH_XOR 6
#define ARITH_CMP 7

R
Richard Henderson 已提交
365
/* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3.  */
366 367
#define SHIFT_ROL 0
#define SHIFT_ROR 1
B
bellard 已提交
368 369 370 371
#define SHIFT_SHL 4
#define SHIFT_SHR 5
#define SHIFT_SAR 7

372 373 374 375 376 377 378 379 380
/* Group 3 opcode extensions for 0xf6, 0xf7.  To be used with OPC_GRP3.  */
#define EXT3_NOT   2
#define EXT3_NEG   3
#define EXT3_MUL   4
#define EXT3_IMUL  5
#define EXT3_DIV   6
#define EXT3_IDIV  7

/* Group 5 opcode extensions for 0xff.  To be used with OPC_GRP5.  */
381 382
#define EXT5_INC_Ev	0
#define EXT5_DEC_Ev	1
383 384
#define EXT5_CALLN_Ev	2
#define EXT5_JMPN_Ev	4
R
Richard Henderson 已提交
385 386

/* Condition codes to be added to OPC_JCC_{long,short}.  */
B
bellard 已提交
387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404
#define JCC_JMP (-1)
#define JCC_JO  0x0
#define JCC_JNO 0x1
#define JCC_JB  0x2
#define JCC_JAE 0x3
#define JCC_JE  0x4
#define JCC_JNE 0x5
#define JCC_JBE 0x6
#define JCC_JA  0x7
#define JCC_JS  0x8
#define JCC_JNS 0x9
#define JCC_JP  0xa
#define JCC_JNP 0xb
#define JCC_JL  0xc
#define JCC_JGE 0xd
#define JCC_JLE 0xe
#define JCC_JG  0xf

405
static const uint8_t tcg_cond_to_jcc[] = {
B
bellard 已提交
406 407 408 409 410 411 412 413 414 415 416 417
    [TCG_COND_EQ] = JCC_JE,
    [TCG_COND_NE] = JCC_JNE,
    [TCG_COND_LT] = JCC_JL,
    [TCG_COND_GE] = JCC_JGE,
    [TCG_COND_LE] = JCC_JLE,
    [TCG_COND_GT] = JCC_JG,
    [TCG_COND_LTU] = JCC_JB,
    [TCG_COND_GEU] = JCC_JAE,
    [TCG_COND_LEU] = JCC_JBE,
    [TCG_COND_GTU] = JCC_JA,
};

418 419 420 421 422
#if TCG_TARGET_REG_BITS == 64
static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
{
    int rex;

423 424 425
    if (opc & P_GS) {
        tcg_out8(s, 0x65);
    }
426 427
    if (opc & P_DATA16) {
        /* We should never be asking for both 16 and 64-bit operation.  */
428
        tcg_debug_assert((opc & P_REXW) == 0);
429 430 431 432 433 434 435
        tcg_out8(s, 0x66);
    }
    if (opc & P_ADDR32) {
        tcg_out8(s, 0x67);
    }

    rex = 0;
436
    rex |= (opc & P_REXW) ? 0x8 : 0x0;  /* REX.W */
437 438 439
    rex |= (r & 8) >> 1;                /* REX.R */
    rex |= (x & 8) >> 2;                /* REX.X */
    rex |= (rm & 8) >> 3;               /* REX.B */
440 441 442 443 444 445 446 447 448 449 450 451 452

    /* P_REXB_{R,RM} indicates that the given register is the low byte.
       For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
       as otherwise the encoding indicates %[abcd]h.  Note that the values
       that are ORed in merely indicate that the REX byte must be present;
       those bits get discarded in output.  */
    rex |= opc & (r >= 4 ? P_REXB_R : 0);
    rex |= opc & (rm >= 4 ? P_REXB_RM : 0);

    if (rex) {
        tcg_out8(s, (uint8_t)(rex | 0x40));
    }

453
    if (opc & (P_EXT | P_EXT38)) {
454
        tcg_out8(s, 0x0f);
455 456 457
        if (opc & P_EXT38) {
            tcg_out8(s, 0x38);
        }
458
    }
459

460 461 462 463
    tcg_out8(s, opc);
}
#else
static void tcg_out_opc(TCGContext *s, int opc)
B
bellard 已提交
464
{
465 466 467
    if (opc & P_DATA16) {
        tcg_out8(s, 0x66);
    }
468
    if (opc & (P_EXT | P_EXT38)) {
B
bellard 已提交
469
        tcg_out8(s, 0x0f);
470 471 472
        if (opc & P_EXT38) {
            tcg_out8(s, 0x38);
        }
473
    }
B
bellard 已提交
474 475
    tcg_out8(s, opc);
}
476 477 478 479 480
/* Discard the register arguments to tcg_out_opc early, so as not to penalize
   the 32-bit compilation paths.  This method works with all versions of gcc,
   whereas relying on optimization may not be able to exclude them.  */
#define tcg_out_opc(s, opc, r, rm, x)  (tcg_out_opc)(s, opc)
#endif
B
bellard 已提交
481

482
static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
B
bellard 已提交
483
{
484 485
    tcg_out_opc(s, opc, r, rm, 0);
    tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
B
bellard 已提交
486 487
}

488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515
static void tcg_out_vex_modrm(TCGContext *s, int opc, int r, int v, int rm)
{
    int tmp;

    if ((opc & (P_REXW | P_EXT | P_EXT38)) || (rm & 8)) {
        /* Three byte VEX prefix.  */
        tcg_out8(s, 0xc4);

        /* VEX.m-mmmm */
        if (opc & P_EXT38) {
            tmp = 2;
        } else if (opc & P_EXT) {
            tmp = 1;
        } else {
            tcg_abort();
        }
        tmp |= 0x40;                       /* VEX.X */
        tmp |= (r & 8 ? 0 : 0x80);         /* VEX.R */
        tmp |= (rm & 8 ? 0 : 0x20);        /* VEX.B */
        tcg_out8(s, tmp);

        tmp = (opc & P_REXW ? 0x80 : 0);   /* VEX.W */
    } else {
        /* Two byte VEX prefix.  */
        tcg_out8(s, 0xc5);

        tmp = (r & 8 ? 0 : 0x80);          /* VEX.R */
    }
516 517 518 519 520 521 522 523
    /* VEX.pp */
    if (opc & P_DATA16) {
        tmp |= 1;                          /* 0x66 */
    } else if (opc & P_SIMDF3) {
        tmp |= 2;                          /* 0xf3 */
    } else if (opc & P_SIMDF2) {
        tmp |= 3;                          /* 0xf2 */
    }
524 525 526 527 528 529
    tmp |= (~v & 15) << 3;                 /* VEX.vvvv */
    tcg_out8(s, tmp);
    tcg_out8(s, opc);
    tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
}

R
Richard Henderson 已提交
530
/* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
531 532 533
   We handle either RM and INDEX missing with a negative value.  In 64-bit
   mode for absolute addresses, ~RM is the size of the immediate operand
   that will follow the instruction.  */
R
Richard Henderson 已提交
534 535

static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
536
                                     int index, int shift, intptr_t offset)
B
bellard 已提交
537
{
R
Richard Henderson 已提交
538 539
    int mod, len;

540 541 542 543
    if (index < 0 && rm < 0) {
        if (TCG_TARGET_REG_BITS == 64) {
            /* Try for a rip-relative addressing mode.  This has replaced
               the 32-bit-mode absolute addressing encoding.  */
544 545
            intptr_t pc = (intptr_t)s->code_ptr + 5 + ~rm;
            intptr_t disp = offset - pc;
546 547 548 549 550 551
            if (disp == (int32_t)disp) {
                tcg_out_opc(s, opc, r, 0, 0);
                tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
                tcg_out32(s, disp);
                return;
            }
R
Richard Henderson 已提交
552

553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573
            /* Try for an absolute address encoding.  This requires the
               use of the MODRM+SIB encoding and is therefore larger than
               rip-relative addressing.  */
            if (offset == (int32_t)offset) {
                tcg_out_opc(s, opc, r, 0, 0);
                tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
                tcg_out8(s, (4 << 3) | 5);
                tcg_out32(s, offset);
                return;
            }

            /* ??? The memory isn't directly addressable.  */
            tcg_abort();
        } else {
            /* Absolute address.  */
            tcg_out_opc(s, opc, r, 0, 0);
            tcg_out8(s, (r << 3) | 5);
            tcg_out32(s, offset);
            return;
        }
    }
R
Richard Henderson 已提交
574 575 576

    /* Find the length of the immediate addend.  Note that the encoding
       that would be used for (%ebp) indicates absolute addressing.  */
577
    if (rm < 0) {
R
Richard Henderson 已提交
578
        mod = 0, len = 4, rm = 5;
579
    } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
R
Richard Henderson 已提交
580 581 582
        mod = 0, len = 0;
    } else if (offset == (int8_t)offset) {
        mod = 0x40, len = 1;
B
bellard 已提交
583
    } else {
R
Richard Henderson 已提交
584 585 586 587 588
        mod = 0x80, len = 4;
    }

    /* Use a single byte MODRM format if possible.  Note that the encoding
       that would be used for %esp is the escape to the two byte form.  */
589
    if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
R
Richard Henderson 已提交
590
        /* Single byte MODRM format.  */
591 592
        tcg_out_opc(s, opc, r, rm, 0);
        tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
R
Richard Henderson 已提交
593 594 595 596
    } else {
        /* Two byte MODRM+SIB format.  */

        /* Note that the encoding that would place %esp into the index
597 598 599
           field indicates no index register.  In 64-bit mode, the REX.X
           bit counts, so %r12 can be used as the index.  */
        if (index < 0) {
R
Richard Henderson 已提交
600
            index = 4;
B
bellard 已提交
601
        } else {
602
            tcg_debug_assert(index != TCG_REG_ESP);
B
bellard 已提交
603
        }
R
Richard Henderson 已提交
604

605 606 607
        tcg_out_opc(s, opc, r, rm, index);
        tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
        tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
R
Richard Henderson 已提交
608 609 610 611 612
    }

    if (len == 1) {
        tcg_out8(s, offset);
    } else if (len == 4) {
B
bellard 已提交
613 614 615 616
        tcg_out32(s, offset);
    }
}

617 618
/* A simplification of the above with no index or shift.  */
static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
619
                                        int rm, intptr_t offset)
R
Richard Henderson 已提交
620 621 622 623
{
    tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
}

624 625 626
/* Generate dest op= src.  Uses the same ARITH_* codes as tgen_arithi.  */
static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
{
627 628 629 630 631
    /* Propagate an opcode prefix, such as P_REXW.  */
    int ext = subop & ~0x7;
    subop &= 0x7;

    tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
632 633
}

634 635
static inline void tcg_out_mov(TCGContext *s, TCGType type,
                               TCGReg ret, TCGReg arg)
B
bellard 已提交
636
{
637
    if (arg != ret) {
638 639
        int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
        tcg_out_modrm(s, opc, ret, arg);
640
    }
B
bellard 已提交
641 642
}

643
static void tcg_out_movi(TCGContext *s, TCGType type,
644
                         TCGReg ret, tcg_target_long arg)
B
bellard 已提交
645
{
646 647
    tcg_target_long diff;

B
bellard 已提交
648
    if (arg == 0) {
649
        tgen_arithr(s, ARITH_XOR, ret, ret);
650
        return;
651 652
    }
    if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
653 654
        tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
        tcg_out32(s, arg);
655 656 657
        return;
    }
    if (arg == (int32_t)arg) {
658 659
        tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
        tcg_out32(s, arg);
660
        return;
B
bellard 已提交
661
    }
662 663

    /* Try a 7 byte pc-relative lea before the 10 byte movq.  */
664
    diff = arg - ((uintptr_t)s->code_ptr + 7);
665 666 667 668 669 670 671 672 673
    if (diff == (int32_t)diff) {
        tcg_out_opc(s, OPC_LEA | P_REXW, ret, 0, 0);
        tcg_out8(s, (LOWREGMASK(ret) << 3) | 5);
        tcg_out32(s, diff);
        return;
    }

    tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
    tcg_out64(s, arg);
B
bellard 已提交
674 675
}

R
Richard Henderson 已提交
676 677 678
static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
{
    if (val == (int8_t)val) {
679
        tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0);
R
Richard Henderson 已提交
680
        tcg_out8(s, val);
681 682
    } else if (val == (int32_t)val) {
        tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0);
R
Richard Henderson 已提交
683
        tcg_out32(s, val);
684 685
    } else {
        tcg_abort();
R
Richard Henderson 已提交
686 687 688 689 690
    }
}

static inline void tcg_out_push(TCGContext *s, int reg)
{
691
    tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
R
Richard Henderson 已提交
692 693 694 695
}

static inline void tcg_out_pop(TCGContext *s, int reg)
{
696
    tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
R
Richard Henderson 已提交
697 698
}

699
static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
700
                              TCGReg arg1, intptr_t arg2)
B
bellard 已提交
701
{
702 703
    int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
    tcg_out_modrm_offset(s, opc, ret, arg1, arg2);
B
bellard 已提交
704 705
}

706
static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
707
                              TCGReg arg1, intptr_t arg2)
B
bellard 已提交
708
{
709 710
    int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0);
    tcg_out_modrm_offset(s, opc, arg, arg1, arg2);
B
bellard 已提交
711 712
}

713 714
static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
                        TCGReg base, intptr_t ofs)
715
{
716 717 718 719 720 721 722 723
    int rexw = 0;
    if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64) {
        if (val != (int32_t)val) {
            return false;
        }
        rexw = P_REXW;
    }
    tcg_out_modrm_offset(s, OPC_MOVL_EvIz | rexw, 0, base, ofs);
724
    tcg_out32(s, val);
725
    return true;
726 727
}

728 729
static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
{
730 731 732 733
    /* Propagate an opcode prefix, such as P_DATA16.  */
    int ext = subopc & ~0x7;
    subopc &= 0x7;

734
    if (count == 1) {
735
        tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg);
736
    } else {
737
        tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg);
738 739 740 741
        tcg_out8(s, count);
    }
}

742 743
static inline void tcg_out_bswap32(TCGContext *s, int reg)
{
744
    tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
745 746 747 748
}

static inline void tcg_out_rolw_8(TCGContext *s, int reg)
{
749
    tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8);
750 751
}

752 753 754
static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
{
    /* movzbl */
755
    tcg_debug_assert(src < 4 || TCG_TARGET_REG_BITS == 64);
756
    tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
757 758
}

759
static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
760 761
{
    /* movsbl */
762
    tcg_debug_assert(src < 4 || TCG_TARGET_REG_BITS == 64);
763
    tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
764 765
}

766 767 768 769 770 771
static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
{
    /* movzwl */
    tcg_out_modrm(s, OPC_MOVZWL, dest, src);
}

772
static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw)
773
{
774 775
    /* movsw[lq] */
    tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
776 777
}

778
static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
B
bellard 已提交
779
{
780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803
    /* 32-bit mov zero extends.  */
    tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
}

static inline void tcg_out_ext32s(TCGContext *s, int dest, int src)
{
    tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
}

static inline void tcg_out_bswap64(TCGContext *s, int reg)
{
    tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
}

static void tgen_arithi(TCGContext *s, int c, int r0,
                        tcg_target_long val, int cf)
{
    int rexw = 0;

    if (TCG_TARGET_REG_BITS == 64) {
        rexw = c & -8;
        c &= 7;
    }

804 805 806 807
    /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
       partial flags update stalls on Pentium4 and are not recommended
       by current Intel optimization manuals.  */
    if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
808
        int is_inc = (c == ARITH_ADD) ^ (val < 0);
809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830
        if (TCG_TARGET_REG_BITS == 64) {
            /* The single-byte increment encodings are re-tasked as the
               REX prefixes.  Use the MODRM encoding.  */
            tcg_out_modrm(s, OPC_GRP5 + rexw,
                          (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
        } else {
            tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
        }
        return;
    }

    if (c == ARITH_AND) {
        if (TCG_TARGET_REG_BITS == 64) {
            if (val == 0xffffffffu) {
                tcg_out_ext32u(s, r0, r0);
                return;
            }
            if (val == (uint32_t)val) {
                /* AND with no high bits set can use a 32-bit operation.  */
                rexw = 0;
            }
        }
A
Aurelien Jarno 已提交
831
        if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
832 833 834 835 836 837 838 839 840 841 842
            tcg_out_ext8u(s, r0, r0);
            return;
        }
        if (val == 0xffffu) {
            tcg_out_ext16u(s, r0, r0);
            return;
        }
    }

    if (val == (int8_t)val) {
        tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
B
bellard 已提交
843
        tcg_out8(s, val);
844 845 846 847
        return;
    }
    if (rexw == 0 || val == (int32_t)val) {
        tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
B
bellard 已提交
848
        tcg_out32(s, val);
849
        return;
B
bellard 已提交
850
    }
851 852

    tcg_abort();
B
bellard 已提交
853 854
}

A
aurel32 已提交
855
static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
B
bellard 已提交
856
{
857 858 859
    if (val != 0) {
        tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
    }
B
bellard 已提交
860 861
}

862
/* Use SMALL != 0 to force a short forward branch.  */
863
static void tcg_out_jxx(TCGContext *s, int opc, TCGLabel *l, int small)
B
bellard 已提交
864 865
{
    int32_t val, val1;
866

B
bellard 已提交
867
    if (l->has_value) {
868
        val = tcg_pcrel_diff(s, l->u.value_ptr);
B
bellard 已提交
869 870
        val1 = val - 2;
        if ((int8_t)val1 == val1) {
871
            if (opc == -1) {
R
Richard Henderson 已提交
872
                tcg_out8(s, OPC_JMP_short);
873
            } else {
R
Richard Henderson 已提交
874
                tcg_out8(s, OPC_JCC_short + opc);
875
            }
B
bellard 已提交
876 877
            tcg_out8(s, val1);
        } else {
878 879 880
            if (small) {
                tcg_abort();
            }
B
bellard 已提交
881
            if (opc == -1) {
R
Richard Henderson 已提交
882
                tcg_out8(s, OPC_JMP_long);
B
bellard 已提交
883 884
                tcg_out32(s, val - 5);
            } else {
885
                tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
B
bellard 已提交
886 887 888
                tcg_out32(s, val - 6);
            }
        }
889 890
    } else if (small) {
        if (opc == -1) {
R
Richard Henderson 已提交
891
            tcg_out8(s, OPC_JMP_short);
892
        } else {
R
Richard Henderson 已提交
893
            tcg_out8(s, OPC_JCC_short + opc);
894
        }
895
        tcg_out_reloc(s, s->code_ptr, R_386_PC8, l, -1);
896
        s->code_ptr += 1;
B
bellard 已提交
897 898
    } else {
        if (opc == -1) {
R
Richard Henderson 已提交
899
            tcg_out8(s, OPC_JMP_long);
B
bellard 已提交
900
        } else {
901
            tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
B
bellard 已提交
902
        }
903
        tcg_out_reloc(s, s->code_ptr, R_386_PC32, l, -4);
P
pbrook 已提交
904
        s->code_ptr += 4;
B
bellard 已提交
905 906 907
    }
}

R
Richard Henderson 已提交
908
static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
909
                        int const_arg2, int rexw)
B
bellard 已提交
910 911 912 913
{
    if (const_arg2) {
        if (arg2 == 0) {
            /* test r, r */
914
            tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
B
bellard 已提交
915
        } else {
916
            tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
B
bellard 已提交
917 918
        }
    } else {
919
        tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
B
bellard 已提交
920
    }
R
Richard Henderson 已提交
921 922
}

923 924
static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
                             TCGArg arg1, TCGArg arg2, int const_arg2,
925
                             TCGLabel *label, int small)
R
Richard Henderson 已提交
926
{
927
    tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
928
    tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small);
B
bellard 已提交
929 930
}

931 932 933
#if TCG_TARGET_REG_BITS == 64
static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
                             TCGArg arg1, TCGArg arg2, int const_arg2,
934
                             TCGLabel *label, int small)
935 936
{
    tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
937
    tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small);
938 939
}
#else
B
bellard 已提交
940 941
/* XXX: we implement it at the target level to avoid having to
   handle cross basic blocks temporaries */
942 943
static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
                            const int *const_args, int small)
B
bellard 已提交
944
{
945 946
    TCGLabel *label_next = gen_new_label();
    TCGLabel *label_this = arg_label(args[5]);
947

B
bellard 已提交
948 949
    switch(args[4]) {
    case TCG_COND_EQ:
950 951 952
        tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
                         label_next, 1);
        tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3],
953
                         label_this, small);
B
bellard 已提交
954 955
        break;
    case TCG_COND_NE:
956
        tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
957
                         label_this, small);
958
        tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3],
959
                         label_this, small);
B
bellard 已提交
960 961
        break;
    case TCG_COND_LT:
962
        tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
963
                         label_this, small);
964
        tcg_out_jxx(s, JCC_JNE, label_next, 1);
965
        tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
966
                         label_this, small);
B
bellard 已提交
967 968
        break;
    case TCG_COND_LE:
969
        tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
970
                         label_this, small);
971
        tcg_out_jxx(s, JCC_JNE, label_next, 1);
972
        tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
973
                         label_this, small);
B
bellard 已提交
974 975
        break;
    case TCG_COND_GT:
976
        tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
977
                         label_this, small);
978
        tcg_out_jxx(s, JCC_JNE, label_next, 1);
979
        tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
980
                         label_this, small);
B
bellard 已提交
981 982
        break;
    case TCG_COND_GE:
983
        tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
984
                         label_this, small);
985
        tcg_out_jxx(s, JCC_JNE, label_next, 1);
986
        tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
987
                         label_this, small);
B
bellard 已提交
988 989
        break;
    case TCG_COND_LTU:
990
        tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
991
                         label_this, small);
992
        tcg_out_jxx(s, JCC_JNE, label_next, 1);
993
        tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
994
                         label_this, small);
B
bellard 已提交
995 996
        break;
    case TCG_COND_LEU:
997
        tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
998
                         label_this, small);
999
        tcg_out_jxx(s, JCC_JNE, label_next, 1);
1000
        tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
1001
                         label_this, small);
B
bellard 已提交
1002 1003
        break;
    case TCG_COND_GTU:
1004
        tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
1005
                         label_this, small);
1006
        tcg_out_jxx(s, JCC_JNE, label_next, 1);
1007
        tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
1008
                         label_this, small);
B
bellard 已提交
1009 1010
        break;
    case TCG_COND_GEU:
1011
        tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
1012
                         label_this, small);
1013
        tcg_out_jxx(s, JCC_JNE, label_next, 1);
1014
        tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
1015
                         label_this, small);
B
bellard 已提交
1016 1017 1018 1019
        break;
    default:
        tcg_abort();
    }
1020
    tcg_out_label(s, label_next, s->code_ptr);
B
bellard 已提交
1021
}
1022
#endif
B
bellard 已提交
1023

1024 1025
static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
                              TCGArg arg1, TCGArg arg2, int const_arg2)
R
Richard Henderson 已提交
1026
{
1027
    tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
R
Richard Henderson 已提交
1028
    tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
1029
    tcg_out_ext8u(s, dest, dest);
R
Richard Henderson 已提交
1030 1031
}

1032 1033 1034 1035 1036 1037 1038 1039 1040
#if TCG_TARGET_REG_BITS == 64
static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
                              TCGArg arg1, TCGArg arg2, int const_arg2)
{
    tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
    tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
    tcg_out_ext8u(s, dest, dest);
}
#else
R
Richard Henderson 已提交
1041 1042 1043 1044
static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
                             const int *const_args)
{
    TCGArg new_args[6];
1045
    TCGLabel *label_true, *label_over;
R
Richard Henderson 已提交
1046 1047 1048 1049 1050 1051 1052 1053

    memcpy(new_args, args+1, 5*sizeof(TCGArg));

    if (args[0] == args[1] || args[0] == args[2]
        || (!const_args[3] && args[0] == args[3])
        || (!const_args[4] && args[0] == args[4])) {
        /* When the destination overlaps with one of the argument
           registers, don't do anything tricky.  */
1054 1055
        label_true = gen_new_label();
        label_over = gen_new_label();
R
Richard Henderson 已提交
1056

1057
        new_args[5] = label_arg(label_true);
R
Richard Henderson 已提交
1058 1059 1060 1061
        tcg_out_brcond2(s, new_args, const_args+1, 1);

        tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
        tcg_out_jxx(s, JCC_JMP, label_over, 1);
1062
        tcg_out_label(s, label_true, s->code_ptr);
R
Richard Henderson 已提交
1063 1064

        tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
1065
        tcg_out_label(s, label_over, s->code_ptr);
R
Richard Henderson 已提交
1066 1067 1068 1069 1070 1071 1072
    } else {
        /* When the destination does not overlap one of the arguments,
           clear the destination first, jump if cond false, and emit an
           increment in the true case.  This results in smaller code.  */

        tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);

1073
        label_over = gen_new_label();
R
Richard Henderson 已提交
1074
        new_args[4] = tcg_invert_cond(new_args[4]);
1075
        new_args[5] = label_arg(label_over);
R
Richard Henderson 已提交
1076 1077 1078
        tcg_out_brcond2(s, new_args, const_args+1, 1);

        tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
1079
        tcg_out_label(s, label_over, s->code_ptr);
R
Richard Henderson 已提交
1080 1081
    }
}
1082 1083
#endif

R
Richard Henderson 已提交
1084 1085 1086 1087 1088
static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest,
                              TCGArg c1, TCGArg c2, int const_c2,
                              TCGArg v1)
{
    tcg_out_cmp(s, c1, c2, const_c2, 0);
1089 1090 1091
    if (have_cmov) {
        tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1);
    } else {
1092
        TCGLabel *over = gen_new_label();
1093 1094 1095 1096
        tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1);
        tcg_out_mov(s, TCG_TYPE_I32, dest, v1);
        tcg_out_label(s, over, s->code_ptr);
    }
R
Richard Henderson 已提交
1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108
}

#if TCG_TARGET_REG_BITS == 64
static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGArg dest,
                              TCGArg c1, TCGArg c2, int const_c2,
                              TCGArg v1)
{
    tcg_out_cmp(s, c1, c2, const_c2, P_REXW);
    tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | P_REXW, dest, v1);
}
#endif

1109
static void tcg_out_branch(TCGContext *s, int call, tcg_insn_unit *dest)
1110
{
1111
    intptr_t disp = tcg_pcrel_diff(s, dest) - 5;
1112 1113 1114 1115 1116

    if (disp == (int32_t)disp) {
        tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
        tcg_out32(s, disp);
    } else {
1117
        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, (uintptr_t)dest);
1118 1119 1120 1121 1122
        tcg_out_modrm(s, OPC_GRP5,
                      call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10);
    }
}

1123
static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *dest)
1124 1125 1126
{
    tcg_out_branch(s, 1, dest);
}
R
Richard Henderson 已提交
1127

1128
static void tcg_out_jmp(TCGContext *s, tcg_insn_unit *dest)
R
Richard Henderson 已提交
1129
{
1130
    tcg_out_branch(s, 0, dest);
R
Richard Henderson 已提交
1131 1132
}

1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147
static void tcg_out_nopn(TCGContext *s, int n)
{
    int i;
    /* Emit 1 or 2 operand size prefixes for the standard one byte nop,
     * "xchg %eax,%eax", forming "xchg %ax,%ax". All cores accept the
     * duplicate prefix, and all of the interesting recent cores can
     * decode and discard the duplicates in a single cycle.
     */
    tcg_debug_assert(n >= 1);
    for (i = 1; i < n; ++i) {
        tcg_out8(s, 0x66);
    }
    tcg_out8(s, 0x90);
}

B
bellard 已提交
1148
#if defined(CONFIG_SOFTMMU)
1149 1150 1151
/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
 *                                     int mmu_idx, uintptr_t ra)
 */
1152
static void * const qemu_ld_helpers[16] = {
1153 1154 1155 1156 1157 1158 1159
    [MO_UB]   = helper_ret_ldub_mmu,
    [MO_LEUW] = helper_le_lduw_mmu,
    [MO_LEUL] = helper_le_ldul_mmu,
    [MO_LEQ]  = helper_le_ldq_mmu,
    [MO_BEUW] = helper_be_lduw_mmu,
    [MO_BEUL] = helper_be_ldul_mmu,
    [MO_BEQ]  = helper_be_ldq_mmu,
1160 1161
};

1162 1163 1164
/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
 *                                     uintxx_t val, int mmu_idx, uintptr_t ra)
 */
1165
static void * const qemu_st_helpers[16] = {
1166 1167 1168 1169 1170 1171 1172
    [MO_UB]   = helper_ret_stb_mmu,
    [MO_LEUW] = helper_le_stw_mmu,
    [MO_LEUL] = helper_le_stl_mmu,
    [MO_LEQ]  = helper_le_stq_mmu,
    [MO_BEUW] = helper_be_stw_mmu,
    [MO_BEUL] = helper_be_stl_mmu,
    [MO_BEQ]  = helper_be_stq_mmu,
1173
};
1174 1175 1176 1177

/* Perform the TLB load and compare.

   Inputs:
1178
   ADDRLO and ADDRHI contain the low and high part of the address.
1179 1180 1181 1182 1183 1184 1185 1186 1187 1188

   MEM_INDEX and S_BITS are the memory context and log2 size of the load.

   WHICH is the offset into the CPUTLBEntry structure of the slot to read.
   This should be offsetof addr_read or addr_write.

   Outputs:
   LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
   positions of the displacements of forward jumps to the TLB miss case.

1189
   Second argument register is loaded with the low part of the address.
1190 1191 1192
   In the TLB hit case, it has been adjusted as indicated by the TLB
   and so is a host address.  In the TLB miss case, it continues to
   hold a guest address.
1193

1194
   First argument register is clobbered.  */
1195

1196
static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
1197
                                    int mem_index, TCGMemOp opc,
1198
                                    tcg_insn_unit **label_ptr, int which)
1199
{
1200 1201
    const TCGReg r0 = TCG_REG_L0;
    const TCGReg r1 = TCG_REG_L1;
1202
    TCGType ttype = TCG_TYPE_I32;
1203 1204
    TCGType tlbtype = TCG_TYPE_I32;
    int trexw = 0, hrexw = 0, tlbrexw = 0;
1205 1206 1207 1208
    unsigned a_bits = get_alignment_bits(opc);
    unsigned s_bits = opc & MO_SIZE;
    unsigned a_mask = (1 << a_bits) - 1;
    unsigned s_mask = (1 << s_bits) - 1;
1209
    target_ulong tlb_mask;
1210

1211 1212 1213 1214 1215 1216 1217
    if (TCG_TARGET_REG_BITS == 64) {
        if (TARGET_LONG_BITS == 64) {
            ttype = TCG_TYPE_I64;
            trexw = P_REXW;
        }
        if (TCG_TYPE_PTR == TCG_TYPE_I64) {
            hrexw = P_REXW;
1218 1219 1220 1221
            if (TARGET_PAGE_BITS + CPU_TLB_BITS > 32) {
                tlbtype = TCG_TYPE_I64;
                tlbrexw = P_REXW;
            }
1222
        }
1223
    }
1224

1225
    tcg_out_mov(s, tlbtype, r0, addrlo);
1226 1227 1228 1229
    /* If the required alignment is at least as large as the access, simply
       copy the address and mask.  For lesser alignments, check that we don't
       cross pages for the complete access.  */
    if (a_bits >= s_bits) {
1230 1231
        tcg_out_mov(s, ttype, r1, addrlo);
    } else {
1232
        tcg_out_modrm_offset(s, OPC_LEA + trexw, r1, addrlo, s_mask - a_mask);
1233
    }
1234
    tlb_mask = TARGET_PAGE_MASK | a_mask;
1235

1236
    tcg_out_shifti(s, SHIFT_SHR + tlbrexw, r0,
1237
                   TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1238

1239
    tgen_arithi(s, ARITH_AND + trexw, r1, tlb_mask, 0);
1240
    tgen_arithi(s, ARITH_AND + tlbrexw, r0,
1241
                (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
1242

1243
    tcg_out_modrm_sib_offset(s, OPC_LEA + hrexw, r0, TCG_AREG0, r0, 0,
1244
                             offsetof(CPUArchState, tlb_table[mem_index][0])
1245 1246
                             + which);

1247
    /* cmp 0(r0), r1 */
1248
    tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, r1, r0, 0);
1249

1250 1251 1252 1253 1254 1255 1256
    /* Prepare for both the fast path add of the tlb addend, and the slow
       path function argument setup.  There are two cases worth note:
       For 32-bit guest and x86_64 host, MOVL zero-extends the guest address
       before the fastpath ADDQ below.  For 64-bit guest and x32 host, MOVQ
       copies the entire guest address for the slow path, while truncation
       for the 32-bit host happens with the fastpath ADDL below.  */
    tcg_out_mov(s, ttype, r1, addrlo);
1257

1258 1259
    /* jne slow_path */
    tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
1260
    label_ptr[0] = s->code_ptr;
1261
    s->code_ptr += 4;
1262

1263
    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1264
        /* cmp 4(r0), addrhi */
1265
        tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, r0, 4);
1266

1267 1268
        /* jne slow_path */
        tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
1269
        label_ptr[1] = s->code_ptr;
1270
        s->code_ptr += 4;
1271 1272 1273 1274
    }

    /* TLB Hit.  */

1275
    /* add addend(r0), r1 */
1276
    tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, r1, r0,
1277 1278
                         offsetof(CPUTLBEntry, addend) - which);
}
1279 1280 1281 1282 1283

/*
 * Record the context of a call to the out of line helper code for the slow path
 * for a load or store, so that we can later generate the correct helper code
 */
1284
static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1285 1286
                                TCGReg datalo, TCGReg datahi,
                                TCGReg addrlo, TCGReg addrhi,
1287
                                tcg_insn_unit *raddr,
1288
                                tcg_insn_unit **label_ptr)
1289 1290 1291 1292
{
    TCGLabelQemuLdst *label = new_ldst_label(s);

    label->is_ld = is_ld;
1293
    label->oi = oi;
1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309
    label->datalo_reg = datalo;
    label->datahi_reg = datahi;
    label->addrlo_reg = addrlo;
    label->addrhi_reg = addrhi;
    label->raddr = raddr;
    label->label_ptr[0] = label_ptr[0];
    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
        label->label_ptr[1] = label_ptr[1];
    }
}

/*
 * Generate code for the slow path for a load at the end of block
 */
static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
{
1310 1311
    TCGMemOpIdx oi = l->oi;
    TCGMemOp opc = get_memop(oi);
1312
    TCGReg data_reg;
1313
    tcg_insn_unit **label_ptr = &l->label_ptr[0];
1314 1315

    /* resolve label address */
1316
    tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
1317
    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1318
        tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334
    }

    if (TCG_TARGET_REG_BITS == 32) {
        int ofs = 0;

        tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
        ofs += 4;

        tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
        ofs += 4;

        if (TARGET_LONG_BITS == 64) {
            tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
            ofs += 4;
        }

1335
        tcg_out_sti(s, TCG_TYPE_I32, oi, TCG_REG_ESP, ofs);
1336 1337
        ofs += 4;

1338
        tcg_out_sti(s, TCG_TYPE_PTR, (uintptr_t)l->raddr, TCG_REG_ESP, ofs);
1339 1340 1341
    } else {
        tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
        /* The second argument is already loaded with addrlo.  */
1342
        tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], oi);
1343 1344 1345 1346
        tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3],
                     (uintptr_t)l->raddr);
    }

1347
    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384

    data_reg = l->datalo_reg;
    switch (opc & MO_SSIZE) {
    case MO_SB:
        tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
        break;
    case MO_SW:
        tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
        break;
#if TCG_TARGET_REG_BITS == 64
    case MO_SL:
        tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
        break;
#endif
    case MO_UB:
    case MO_UW:
        /* Note that the helpers have zero-extended to tcg_target_long.  */
    case MO_UL:
        tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
        break;
    case MO_Q:
        if (TCG_TARGET_REG_BITS == 64) {
            tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
        } else if (data_reg == TCG_REG_EDX) {
            /* xchg %edx, %eax */
            tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
            tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX);
        } else {
            tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
            tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX);
        }
        break;
    default:
        tcg_abort();
    }

    /* Jump to the code corresponding to next IR of qemu_st */
1385
    tcg_out_jmp(s, l->raddr);
1386 1387 1388 1389 1390 1391 1392
}

/*
 * Generate code for the slow path for a store at the end of block
 */
static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
{
1393 1394
    TCGMemOpIdx oi = l->oi;
    TCGMemOp opc = get_memop(oi);
1395
    TCGMemOp s_bits = opc & MO_SIZE;
1396
    tcg_insn_unit **label_ptr = &l->label_ptr[0];
1397 1398 1399
    TCGReg retaddr;

    /* resolve label address */
1400
    tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
1401
    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1402
        tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426
    }

    if (TCG_TARGET_REG_BITS == 32) {
        int ofs = 0;

        tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
        ofs += 4;

        tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
        ofs += 4;

        if (TARGET_LONG_BITS == 64) {
            tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
            ofs += 4;
        }

        tcg_out_st(s, TCG_TYPE_I32, l->datalo_reg, TCG_REG_ESP, ofs);
        ofs += 4;

        if (s_bits == MO_64) {
            tcg_out_st(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_ESP, ofs);
            ofs += 4;
        }

1427
        tcg_out_sti(s, TCG_TYPE_I32, oi, TCG_REG_ESP, ofs);
1428 1429 1430
        ofs += 4;

        retaddr = TCG_REG_EAX;
1431 1432
        tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
        tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, ofs);
1433 1434 1435 1436 1437
    } else {
        tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
        /* The second argument is already loaded with addrlo.  */
        tcg_out_mov(s, (s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
                    tcg_target_call_iarg_regs[2], l->datalo_reg);
1438
        tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], oi);
1439 1440 1441 1442 1443 1444 1445

        if (ARRAY_SIZE(tcg_target_call_iarg_regs) > 4) {
            retaddr = tcg_target_call_iarg_regs[4];
            tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
        } else {
            retaddr = TCG_REG_RAX;
            tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1446 1447
            tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP,
                       TCG_TARGET_CALL_STACK_OFFSET);
1448 1449 1450 1451 1452
        }
    }

    /* "Tail call" to the helper, with the return address back inline.  */
    tcg_out_push(s, retaddr);
1453
    tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1454
}
1455 1456 1457 1458 1459 1460 1461 1462 1463
#elif defined(__x86_64__) && defined(__linux__)
# include <asm/prctl.h>
# include <sys/prctl.h>

int arch_prctl(int code, unsigned long addr);

static int guest_base_flags;
static inline void setup_guest_base_seg(void)
{
1464
    if (arch_prctl(ARCH_SET_GS, guest_base) == 0) {
1465 1466 1467 1468 1469 1470 1471
        guest_base_flags = P_GS;
    }
}
#else
# define guest_base_flags 0
static inline void setup_guest_base_seg(void) { }
#endif /* SOFTMMU */
B
bellard 已提交
1472

1473
static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
1474 1475
                                   TCGReg base, int index, intptr_t ofs,
                                   int seg, TCGMemOp memop)
1476
{
1477 1478 1479 1480 1481 1482 1483 1484
    const TCGMemOp real_bswap = memop & MO_BSWAP;
    TCGMemOp bswap = real_bswap;
    int movop = OPC_MOVL_GvEv;

    if (have_movbe && real_bswap) {
        bswap = 0;
        movop = OPC_MOVBE_GyMy;
    }
1485 1486 1487

    switch (memop & MO_SSIZE) {
    case MO_UB:
1488 1489
        tcg_out_modrm_sib_offset(s, OPC_MOVZBL + seg, datalo,
                                 base, index, 0, ofs);
1490
        break;
1491
    case MO_SB:
1492 1493
        tcg_out_modrm_sib_offset(s, OPC_MOVSBL + P_REXW + seg, datalo,
                                 base, index, 0, ofs);
1494
        break;
1495
    case MO_UW:
1496 1497
        tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
                                 base, index, 0, ofs);
1498
        if (real_bswap) {
1499 1500 1501
            tcg_out_rolw_8(s, datalo);
        }
        break;
1502
    case MO_SW:
1503 1504
        if (real_bswap) {
            if (have_movbe) {
1505 1506
                tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
                                         datalo, base, index, 0, ofs);
1507
            } else {
1508 1509
                tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
                                         base, index, 0, ofs);
1510 1511
                tcg_out_rolw_8(s, datalo);
            }
1512 1513
            tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
        } else {
1514 1515
            tcg_out_modrm_sib_offset(s, OPC_MOVSWL + P_REXW + seg,
                                     datalo, base, index, 0, ofs);
1516 1517
        }
        break;
1518
    case MO_UL:
1519
        tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, ofs);
1520 1521 1522 1523
        if (bswap) {
            tcg_out_bswap32(s, datalo);
        }
        break;
1524
#if TCG_TARGET_REG_BITS == 64
1525
    case MO_SL:
1526
        if (real_bswap) {
1527 1528
            tcg_out_modrm_sib_offset(s, movop + seg, datalo,
                                     base, index, 0, ofs);
1529 1530 1531
            if (bswap) {
                tcg_out_bswap32(s, datalo);
            }
1532
            tcg_out_ext32s(s, datalo, datalo);
1533
        } else {
1534 1535
            tcg_out_modrm_sib_offset(s, OPC_MOVSLQ + seg, datalo,
                                     base, index, 0, ofs);
1536
        }
1537 1538
        break;
#endif
1539
    case MO_Q:
1540
        if (TCG_TARGET_REG_BITS == 64) {
1541 1542
            tcg_out_modrm_sib_offset(s, movop + P_REXW + seg, datalo,
                                     base, index, 0, ofs);
1543 1544 1545 1546
            if (bswap) {
                tcg_out_bswap64(s, datalo);
            }
        } else {
1547
            if (real_bswap) {
1548 1549 1550 1551 1552
                int t = datalo;
                datalo = datahi;
                datahi = t;
            }
            if (base != datalo) {
1553 1554 1555 1556
                tcg_out_modrm_sib_offset(s, movop + seg, datalo,
                                         base, index, 0, ofs);
                tcg_out_modrm_sib_offset(s, movop + seg, datahi,
                                         base, index, 0, ofs + 4);
1557
            } else {
1558 1559 1560 1561
                tcg_out_modrm_sib_offset(s, movop + seg, datahi,
                                         base, index, 0, ofs + 4);
                tcg_out_modrm_sib_offset(s, movop + seg, datalo,
                                         base, index, 0, ofs);
1562 1563 1564 1565 1566
            }
            if (bswap) {
                tcg_out_bswap32(s, datalo);
                tcg_out_bswap32(s, datahi);
            }
1567 1568 1569 1570 1571 1572
        }
        break;
    default:
        tcg_abort();
    }
}
P
Paul Brook 已提交
1573

B
bellard 已提交
1574 1575 1576
/* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
   EAX. It will be useful once fixed registers globals are less
   common. */
1577
static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
B
bellard 已提交
1578
{
1579
    TCGReg datalo, datahi, addrlo;
1580
    TCGReg addrhi __attribute__((unused));
1581
    TCGMemOpIdx oi;
1582
    TCGMemOp opc;
B
bellard 已提交
1583
#if defined(CONFIG_SOFTMMU)
1584
    int mem_index;
1585
    tcg_insn_unit *label_ptr[2];
B
bellard 已提交
1586 1587
#endif

1588
    datalo = *args++;
1589
    datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
1590
    addrlo = *args++;
1591
    addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
1592 1593
    oi = *args++;
    opc = get_memop(oi);
B
bellard 已提交
1594 1595

#if defined(CONFIG_SOFTMMU)
1596
    mem_index = get_mmuidx(oi);
1597

1598
    tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc,
1599
                     label_ptr, offsetof(CPUTLBEntry, addr_read));
1600 1601

    /* TLB Hit.  */
1602
    tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, opc);
B
bellard 已提交
1603

1604
    /* Record the current context of a load into ldst label */
1605 1606
    add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
                        s->code_ptr, label_ptr);
B
bellard 已提交
1607
#else
1608
    {
1609
        int32_t offset = guest_base;
1610
        TCGReg base = addrlo;
1611
        int index = -1;
1612 1613
        int seg = 0;

1614 1615 1616 1617
        /* For a 32-bit guest, the high 32 bits may contain garbage.
           We can do this with the ADDR32 prefix if we're not using
           a guest base, or when using segmentation.  Otherwise we
           need to zero-extend manually.  */
1618
        if (guest_base == 0 || guest_base_flags) {
1619 1620
            seg = guest_base_flags;
            offset = 0;
1621 1622 1623 1624 1625 1626 1627 1628
            if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
                seg |= P_ADDR32;
            }
        } else if (TCG_TARGET_REG_BITS == 64) {
            if (TARGET_LONG_BITS == 32) {
                tcg_out_ext32u(s, TCG_REG_L0, base);
                base = TCG_REG_L0;
            }
1629 1630
            if (offset != guest_base) {
                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base);
1631 1632 1633
                index = TCG_REG_L1;
                offset = 0;
            }
1634 1635
        }

1636 1637
        tcg_out_qemu_ld_direct(s, datalo, datahi,
                               base, index, offset, seg, opc);
1638
    }
B
bellard 已提交
1639
#endif
1640
}
B
bellard 已提交
1641

1642 1643 1644
static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
                                   TCGReg base, intptr_t ofs, int seg,
                                   TCGMemOp memop)
1645 1646 1647 1648
{
    /* ??? Ideally we wouldn't need a scratch register.  For user-only,
       we could perform the bswap twice to restore the original value
       instead of moving to the scratch.  But as it is, the L constraint
1649
       means that TCG_REG_L0 is definitely free here.  */
1650
    const TCGReg scratch = TCG_REG_L0;
1651 1652 1653 1654 1655 1656 1657 1658
    const TCGMemOp real_bswap = memop & MO_BSWAP;
    TCGMemOp bswap = real_bswap;
    int movop = OPC_MOVL_EvGv;

    if (have_movbe && real_bswap) {
        bswap = 0;
        movop = OPC_MOVBE_MyGy;
    }
1659

1660 1661
    switch (memop & MO_SIZE) {
    case MO_8:
A
Aurelien Jarno 已提交
1662
        /* In 32-bit mode, 8-bit stores can only happen from [abcd]x.
1663 1664 1665 1666 1667
           Use the scratch register if necessary.  */
        if (TCG_TARGET_REG_BITS == 32 && datalo >= 4) {
            tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
            datalo = scratch;
        }
1668 1669
        tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg,
                             datalo, base, ofs);
B
bellard 已提交
1670
        break;
1671
    case MO_16:
B
bellard 已提交
1672
        if (bswap) {
1673
            tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1674 1675
            tcg_out_rolw_8(s, scratch);
            datalo = scratch;
B
bellard 已提交
1676
        }
1677
        tcg_out_modrm_offset(s, movop + P_DATA16 + seg, datalo, base, ofs);
B
bellard 已提交
1678
        break;
1679
    case MO_32:
B
bellard 已提交
1680
        if (bswap) {
1681
            tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1682 1683
            tcg_out_bswap32(s, scratch);
            datalo = scratch;
B
bellard 已提交
1684
        }
1685
        tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
B
bellard 已提交
1686
        break;
1687
    case MO_64:
1688 1689 1690 1691 1692 1693
        if (TCG_TARGET_REG_BITS == 64) {
            if (bswap) {
                tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
                tcg_out_bswap64(s, scratch);
                datalo = scratch;
            }
1694
            tcg_out_modrm_offset(s, movop + P_REXW + seg, datalo, base, ofs);
1695
        } else if (bswap) {
1696
            tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
1697
            tcg_out_bswap32(s, scratch);
1698
            tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs);
1699
            tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1700
            tcg_out_bswap32(s, scratch);
1701
            tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4);
B
bellard 已提交
1702
        } else {
1703 1704 1705 1706 1707 1708 1709
            if (real_bswap) {
                int t = datalo;
                datalo = datahi;
                datahi = t;
            }
            tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
            tcg_out_modrm_offset(s, movop + seg, datahi, base, ofs+4);
B
bellard 已提交
1710 1711 1712 1713 1714 1715 1716
        }
        break;
    default:
        tcg_abort();
    }
}

1717
static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
B
bellard 已提交
1718
{
1719
    TCGReg datalo, datahi, addrlo;
1720
    TCGReg addrhi __attribute__((unused));
1721
    TCGMemOpIdx oi;
1722
    TCGMemOp opc;
B
bellard 已提交
1723
#if defined(CONFIG_SOFTMMU)
1724
    int mem_index;
1725
    tcg_insn_unit *label_ptr[2];
B
bellard 已提交
1726 1727
#endif

1728
    datalo = *args++;
1729
    datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
1730
    addrlo = *args++;
1731
    addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
1732 1733
    oi = *args++;
    opc = get_memop(oi);
B
bellard 已提交
1734 1735

#if defined(CONFIG_SOFTMMU)
1736
    mem_index = get_mmuidx(oi);
1737

1738
    tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc,
1739
                     label_ptr, offsetof(CPUTLBEntry, addr_write));
1740 1741

    /* TLB Hit.  */
1742
    tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
B
bellard 已提交
1743

1744
    /* Record the current context of a store into ldst label */
1745 1746
    add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
                        s->code_ptr, label_ptr);
1747 1748
#else
    {
1749
        int32_t offset = guest_base;
1750
        TCGReg base = addrlo;
1751 1752
        int seg = 0;

1753
        /* See comment in tcg_out_qemu_ld re zero-extension of addrlo.  */
1754
        if (guest_base == 0 || guest_base_flags) {
1755 1756
            seg = guest_base_flags;
            offset = 0;
1757 1758 1759 1760 1761 1762
            if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
                seg |= P_ADDR32;
            }
        } else if (TCG_TARGET_REG_BITS == 64) {
            /* ??? Note that we can't use the same SIB addressing scheme
               as for loads, since we require L0 free for bswap.  */
1763
            if (offset != guest_base) {
1764 1765 1766 1767
                if (TARGET_LONG_BITS == 32) {
                    tcg_out_ext32u(s, TCG_REG_L0, base);
                    base = TCG_REG_L0;
                }
1768
                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base);
1769 1770 1771 1772 1773 1774 1775
                tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
                base = TCG_REG_L1;
                offset = 0;
            } else if (TARGET_LONG_BITS == 32) {
                tcg_out_ext32u(s, TCG_REG_L1, base);
                base = TCG_REG_L1;
            }
1776 1777
        }

1778
        tcg_out_qemu_st_direct(s, datalo, datahi, base, offset, seg, opc);
1779 1780 1781
    }
#endif
}
B
bellard 已提交
1782

1783
static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
B
bellard 已提交
1784 1785
                              const TCGArg *args, const int *const_args)
{
1786
    int c, vexop, rexw = 0;
1787 1788 1789 1790 1791 1792 1793 1794 1795 1796

#if TCG_TARGET_REG_BITS == 64
# define OP_32_64(x) \
        case glue(glue(INDEX_op_, x), _i64): \
            rexw = P_REXW; /* FALLTHRU */    \
        case glue(glue(INDEX_op_, x), _i32)
#else
# define OP_32_64(x) \
        case glue(glue(INDEX_op_, x), _i32)
#endif
1797

B
bellard 已提交
1798 1799
    switch(opc) {
    case INDEX_op_exit_tb:
1800
        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]);
1801
        tcg_out_jmp(s, tb_ret_addr);
B
bellard 已提交
1802 1803
        break;
    case INDEX_op_goto_tb:
1804
        if (s->tb_jmp_insn_offset) {
B
bellard 已提交
1805
            /* direct jump method */
1806 1807 1808 1809 1810 1811 1812 1813
            int gap;
            /* jump displacement must be aligned for atomic patching;
             * see if we need to add extra nops before jump
             */
            gap = tcg_pcrel_diff(s, QEMU_ALIGN_PTR_UP(s->code_ptr + 1, 4));
            if (gap != 1) {
                tcg_out_nopn(s, gap - 1);
            }
R
Richard Henderson 已提交
1814
            tcg_out8(s, OPC_JMP_long); /* jmp im */
1815
            s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
B
bellard 已提交
1816 1817 1818
            tcg_out32(s, 0);
        } else {
            /* indirect jump method */
1819
            tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
1820
                                 (intptr_t)(s->tb_jmp_target_addr + args[0]));
B
bellard 已提交
1821
        }
1822
        s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s);
B
bellard 已提交
1823 1824
        break;
    case INDEX_op_br:
1825
        tcg_out_jxx(s, JCC_JMP, arg_label(args[0]), 0);
B
bellard 已提交
1826
        break;
1827 1828
    OP_32_64(ld8u):
        /* Note that we can ignore REXW for the zero-extend to 64-bit.  */
1829
        tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]);
B
bellard 已提交
1830
        break;
1831 1832
    OP_32_64(ld8s):
        tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]);
B
bellard 已提交
1833
        break;
1834 1835
    OP_32_64(ld16u):
        /* Note that we can ignore REXW for the zero-extend to 64-bit.  */
1836
        tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]);
B
bellard 已提交
1837
        break;
1838 1839
    OP_32_64(ld16s):
        tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]);
B
bellard 已提交
1840
        break;
1841 1842 1843
#if TCG_TARGET_REG_BITS == 64
    case INDEX_op_ld32u_i64:
#endif
B
bellard 已提交
1844
    case INDEX_op_ld_i32:
1845
        tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
B
bellard 已提交
1846
        break;
1847 1848

    OP_32_64(st8):
1849 1850 1851 1852 1853 1854 1855 1856
        if (const_args[0]) {
            tcg_out_modrm_offset(s, OPC_MOVB_EvIz,
                                 0, args[1], args[2]);
            tcg_out8(s, args[0]);
        } else {
            tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
                                 args[0], args[1], args[2]);
        }
B
bellard 已提交
1857
        break;
1858
    OP_32_64(st16):
1859 1860 1861 1862 1863 1864 1865 1866
        if (const_args[0]) {
            tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16,
                                 0, args[1], args[2]);
            tcg_out16(s, args[0]);
        } else {
            tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
                                 args[0], args[1], args[2]);
        }
B
bellard 已提交
1867
        break;
1868 1869 1870
#if TCG_TARGET_REG_BITS == 64
    case INDEX_op_st32_i64:
#endif
B
bellard 已提交
1871
    case INDEX_op_st_i32:
1872 1873 1874 1875 1876 1877
        if (const_args[0]) {
            tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, args[1], args[2]);
            tcg_out32(s, args[0]);
        } else {
            tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
        }
B
bellard 已提交
1878
        break;
1879 1880

    OP_32_64(add):
1881 1882 1883 1884 1885 1886 1887 1888 1889
        /* For 3-operand addition, use LEA.  */
        if (args[0] != args[1]) {
            TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0;

            if (const_args[2]) {
                c3 = a2, a2 = -1;
            } else if (a0 == a2) {
                /* Watch out for dest = src + dest, since we've removed
                   the matching constraint on the add.  */
1890
                tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
1891 1892 1893
                break;
            }

1894
            tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
1895 1896 1897 1898
            break;
        }
        c = ARITH_ADD;
        goto gen_arith;
1899
    OP_32_64(sub):
B
bellard 已提交
1900 1901
        c = ARITH_SUB;
        goto gen_arith;
1902
    OP_32_64(and):
B
bellard 已提交
1903 1904
        c = ARITH_AND;
        goto gen_arith;
1905
    OP_32_64(or):
B
bellard 已提交
1906 1907
        c = ARITH_OR;
        goto gen_arith;
1908
    OP_32_64(xor):
B
bellard 已提交
1909 1910 1911 1912
        c = ARITH_XOR;
        goto gen_arith;
    gen_arith:
        if (const_args[2]) {
1913
            tgen_arithi(s, c + rexw, args[0], args[2], 0);
B
bellard 已提交
1914
        } else {
1915
            tgen_arithr(s, c + rexw, args[0], args[2]);
B
bellard 已提交
1916 1917
        }
        break;
1918

1919 1920 1921 1922 1923 1924 1925 1926 1927 1928
    OP_32_64(andc):
        if (const_args[2]) {
            tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32,
                        args[0], args[1]);
            tgen_arithi(s, ARITH_AND + rexw, args[0], ~args[2], 0);
        } else {
            tcg_out_vex_modrm(s, OPC_ANDN + rexw, args[0], args[2], args[1]);
        }
        break;

1929
    OP_32_64(mul):
B
bellard 已提交
1930 1931 1932 1933
        if (const_args[2]) {
            int32_t val;
            val = args[2];
            if (val == (int8_t)val) {
1934
                tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]);
B
bellard 已提交
1935 1936
                tcg_out8(s, val);
            } else {
1937
                tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]);
B
bellard 已提交
1938 1939 1940
                tcg_out32(s, val);
            }
        } else {
1941
            tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]);
B
bellard 已提交
1942 1943
        }
        break;
1944 1945 1946

    OP_32_64(div2):
        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
B
bellard 已提交
1947
        break;
1948 1949
    OP_32_64(divu2):
        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
B
bellard 已提交
1950
        break;
1951 1952

    OP_32_64(shl):
B
bellard 已提交
1953
        c = SHIFT_SHL;
1954 1955
        vexop = OPC_SHLX;
        goto gen_shift_maybe_vex;
1956
    OP_32_64(shr):
B
bellard 已提交
1957
        c = SHIFT_SHR;
1958 1959
        vexop = OPC_SHRX;
        goto gen_shift_maybe_vex;
1960
    OP_32_64(sar):
B
bellard 已提交
1961
        c = SHIFT_SAR;
1962 1963
        vexop = OPC_SARX;
        goto gen_shift_maybe_vex;
1964
    OP_32_64(rotl):
1965
        c = SHIFT_ROL;
1966 1967
        goto gen_shift;
    OP_32_64(rotr):
1968
        c = SHIFT_ROR;
1969
        goto gen_shift;
1970 1971 1972 1973 1974 1975
    gen_shift_maybe_vex:
        if (have_bmi2 && !const_args[2]) {
            tcg_out_vex_modrm(s, vexop + rexw, args[0], args[2], args[1]);
            break;
        }
        /* FALLTHRU */
1976 1977 1978
    gen_shift:
        if (const_args[2]) {
            tcg_out_shifti(s, c + rexw, args[0], args[2]);
1979
        } else {
1980
            tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]);
1981
        }
B
bellard 已提交
1982
        break;
1983

B
bellard 已提交
1984
    case INDEX_op_brcond_i32:
1985
        tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1],
1986
                         arg_label(args[3]), 0);
B
bellard 已提交
1987
        break;
1988 1989 1990
    case INDEX_op_setcond_i32:
        tcg_out_setcond32(s, args[3], args[0], args[1],
                          args[2], const_args[2]);
B
bellard 已提交
1991
        break;
R
Richard Henderson 已提交
1992 1993 1994 1995
    case INDEX_op_movcond_i32:
        tcg_out_movcond32(s, args[5], args[0], args[1],
                          args[2], const_args[2], args[3]);
        break;
B
bellard 已提交
1996

1997
    OP_32_64(bswap16):
1998
        tcg_out_rolw_8(s, args[0]);
A
aurel32 已提交
1999
        break;
2000
    OP_32_64(bswap32):
2001
        tcg_out_bswap32(s, args[0]);
2002 2003
        break;

2004 2005
    OP_32_64(neg):
        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]);
2006
        break;
2007 2008
    OP_32_64(not):
        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]);
2009 2010
        break;

2011 2012
    OP_32_64(ext8s):
        tcg_out_ext8s(s, args[0], args[1], rexw);
2013
        break;
2014 2015
    OP_32_64(ext16s):
        tcg_out_ext16s(s, args[0], args[1], rexw);
2016
        break;
2017
    OP_32_64(ext8u):
2018
        tcg_out_ext8u(s, args[0], args[1]);
2019
        break;
2020
    OP_32_64(ext16u):
2021
        tcg_out_ext16u(s, args[0], args[1]);
2022
        break;
2023

2024 2025
    case INDEX_op_qemu_ld_i32:
        tcg_out_qemu_ld(s, args, 0);
B
bellard 已提交
2026
        break;
2027 2028
    case INDEX_op_qemu_ld_i64:
        tcg_out_qemu_ld(s, args, 1);
B
bellard 已提交
2029
        break;
2030 2031
    case INDEX_op_qemu_st_i32:
        tcg_out_qemu_st(s, args, 0);
B
bellard 已提交
2032
        break;
2033 2034
    case INDEX_op_qemu_st_i64:
        tcg_out_qemu_st(s, args, 1);
B
bellard 已提交
2035 2036
        break;

2037 2038
    OP_32_64(mulu2):
        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]);
2039
        break;
2040 2041 2042 2043
    OP_32_64(muls2):
        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IMUL, args[3]);
        break;
    OP_32_64(add2):
2044
        if (const_args[4]) {
2045
            tgen_arithi(s, ARITH_ADD + rexw, args[0], args[4], 1);
2046
        } else {
2047
            tgen_arithr(s, ARITH_ADD + rexw, args[0], args[4]);
2048 2049
        }
        if (const_args[5]) {
2050
            tgen_arithi(s, ARITH_ADC + rexw, args[1], args[5], 1);
2051
        } else {
2052
            tgen_arithr(s, ARITH_ADC + rexw, args[1], args[5]);
2053 2054
        }
        break;
2055
    OP_32_64(sub2):
2056
        if (const_args[4]) {
2057
            tgen_arithi(s, ARITH_SUB + rexw, args[0], args[4], 1);
2058
        } else {
2059
            tgen_arithr(s, ARITH_SUB + rexw, args[0], args[4]);
2060 2061
        }
        if (const_args[5]) {
2062
            tgen_arithi(s, ARITH_SBB + rexw, args[1], args[5], 1);
2063
        } else {
2064
            tgen_arithr(s, ARITH_SBB + rexw, args[1], args[5]);
2065 2066
        }
        break;
2067 2068 2069 2070 2071 2072 2073 2074

#if TCG_TARGET_REG_BITS == 32
    case INDEX_op_brcond2_i32:
        tcg_out_brcond2(s, args, const_args, 0);
        break;
    case INDEX_op_setcond2_i32:
        tcg_out_setcond2(s, args, const_args);
        break;
2075 2076 2077 2078 2079 2080 2081 2082
#else /* TCG_TARGET_REG_BITS == 64 */
    case INDEX_op_ld32s_i64:
        tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]);
        break;
    case INDEX_op_ld_i64:
        tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
        break;
    case INDEX_op_st_i64:
2083 2084 2085 2086 2087 2088 2089
        if (const_args[0]) {
            tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW,
                                 0, args[1], args[2]);
            tcg_out32(s, args[0]);
        } else {
            tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
        }
2090 2091 2092 2093
        break;

    case INDEX_op_brcond_i64:
        tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1],
2094
                         arg_label(args[3]), 0);
2095 2096 2097 2098 2099
        break;
    case INDEX_op_setcond_i64:
        tcg_out_setcond64(s, args[3], args[0], args[1],
                          args[2], const_args[2]);
        break;
R
Richard Henderson 已提交
2100 2101 2102 2103
    case INDEX_op_movcond_i64:
        tcg_out_movcond64(s, args[5], args[0], args[1],
                          args[2], const_args[2], args[3]);
        break;
2104 2105 2106 2107

    case INDEX_op_bswap64_i64:
        tcg_out_bswap64(s, args[0]);
        break;
2108
    case INDEX_op_extu_i32_i64:
2109 2110 2111
    case INDEX_op_ext32u_i64:
        tcg_out_ext32u(s, args[0], args[1]);
        break;
2112
    case INDEX_op_ext_i32_i64:
2113 2114 2115 2116 2117
    case INDEX_op_ext32s_i64:
        tcg_out_ext32s(s, args[0], args[1]);
        break;
#endif

2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133
    OP_32_64(deposit):
        if (args[3] == 0 && args[4] == 8) {
            /* load bits 0..7 */
            tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM,
                          args[2], args[0]);
        } else if (args[3] == 8 && args[4] == 8) {
            /* load bits 8..15 */
            tcg_out_modrm(s, OPC_MOVB_EvGv, args[2], args[0] + 4);
        } else if (args[3] == 0 && args[4] == 16) {
            /* load bits 0..15 */
            tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, args[2], args[0]);
        } else {
            tcg_abort();
        }
        break;

2134 2135 2136 2137 2138
    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
    case INDEX_op_mov_i64:
    case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */
    case INDEX_op_movi_i64:
    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
B
bellard 已提交
2139 2140 2141
    default:
        tcg_abort();
    }
2142 2143

#undef OP_32_64
B
bellard 已提交
2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154
}

static const TCGTargetOpDef x86_op_defs[] = {
    { INDEX_op_exit_tb, { } },
    { INDEX_op_goto_tb, { } },
    { INDEX_op_br, { } },
    { INDEX_op_ld8u_i32, { "r", "r" } },
    { INDEX_op_ld8s_i32, { "r", "r" } },
    { INDEX_op_ld16u_i32, { "r", "r" } },
    { INDEX_op_ld16s_i32, { "r", "r" } },
    { INDEX_op_ld_i32, { "r", "r" } },
2155 2156 2157
    { INDEX_op_st8_i32, { "qi", "r" } },
    { INDEX_op_st16_i32, { "ri", "r" } },
    { INDEX_op_st_i32, { "ri", "r" } },
B
bellard 已提交
2158

2159
    { INDEX_op_add_i32, { "r", "r", "ri" } },
B
bellard 已提交
2160 2161 2162 2163 2164 2165 2166
    { INDEX_op_sub_i32, { "r", "0", "ri" } },
    { INDEX_op_mul_i32, { "r", "0", "ri" } },
    { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } },
    { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } },
    { INDEX_op_and_i32, { "r", "0", "ri" } },
    { INDEX_op_or_i32, { "r", "0", "ri" } },
    { INDEX_op_xor_i32, { "r", "0", "ri" } },
2167
    { INDEX_op_andc_i32, { "r", "r", "ri" } },
B
bellard 已提交
2168

2169 2170 2171
    { INDEX_op_shl_i32, { "r", "0", "Ci" } },
    { INDEX_op_shr_i32, { "r", "0", "Ci" } },
    { INDEX_op_sar_i32, { "r", "0", "Ci" } },
2172 2173
    { INDEX_op_rotl_i32, { "r", "0", "ci" } },
    { INDEX_op_rotr_i32, { "r", "0", "ci" } },
B
bellard 已提交
2174 2175 2176

    { INDEX_op_brcond_i32, { "r", "ri" } },

A
aurel32 已提交
2177
    { INDEX_op_bswap16_i32, { "r", "0" } },
A
aurel32 已提交
2178
    { INDEX_op_bswap32_i32, { "r", "0" } },
2179 2180 2181 2182 2183 2184 2185

    { INDEX_op_neg_i32, { "r", "0" } },

    { INDEX_op_not_i32, { "r", "0" } },

    { INDEX_op_ext8s_i32, { "r", "q" } },
    { INDEX_op_ext16s_i32, { "r", "r" } },
2186 2187
    { INDEX_op_ext8u_i32, { "r", "q" } },
    { INDEX_op_ext16u_i32, { "r", "r" } },
2188

R
Richard Henderson 已提交
2189
    { INDEX_op_setcond_i32, { "q", "r", "ri" } },
2190

2191
    { INDEX_op_deposit_i32, { "Q", "0", "Q" } },
R
Richard Henderson 已提交
2192
    { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "0" } },
2193

2194
    { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
2195
    { INDEX_op_muls2_i32, { "a", "d", "a", "r" } },
2196 2197
    { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
    { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } },
2198 2199

#if TCG_TARGET_REG_BITS == 32
2200
    { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
R
Richard Henderson 已提交
2201
    { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
2202 2203 2204 2205 2206 2207 2208 2209
#else
    { INDEX_op_ld8u_i64, { "r", "r" } },
    { INDEX_op_ld8s_i64, { "r", "r" } },
    { INDEX_op_ld16u_i64, { "r", "r" } },
    { INDEX_op_ld16s_i64, { "r", "r" } },
    { INDEX_op_ld32u_i64, { "r", "r" } },
    { INDEX_op_ld32s_i64, { "r", "r" } },
    { INDEX_op_ld_i64, { "r", "r" } },
2210 2211 2212 2213
    { INDEX_op_st8_i64, { "ri", "r" } },
    { INDEX_op_st16_i64, { "ri", "r" } },
    { INDEX_op_st32_i64, { "ri", "r" } },
    { INDEX_op_st_i64, { "re", "r" } },
2214

2215
    { INDEX_op_add_i64, { "r", "r", "re" } },
2216 2217 2218 2219 2220 2221 2222
    { INDEX_op_mul_i64, { "r", "0", "re" } },
    { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } },
    { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } },
    { INDEX_op_sub_i64, { "r", "0", "re" } },
    { INDEX_op_and_i64, { "r", "0", "reZ" } },
    { INDEX_op_or_i64, { "r", "0", "re" } },
    { INDEX_op_xor_i64, { "r", "0", "re" } },
2223
    { INDEX_op_andc_i64, { "r", "r", "rI" } },
2224

2225 2226 2227
    { INDEX_op_shl_i64, { "r", "0", "Ci" } },
    { INDEX_op_shr_i64, { "r", "0", "Ci" } },
    { INDEX_op_sar_i64, { "r", "0", "Ci" } },
2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245
    { INDEX_op_rotl_i64, { "r", "0", "ci" } },
    { INDEX_op_rotr_i64, { "r", "0", "ci" } },

    { INDEX_op_brcond_i64, { "r", "re" } },
    { INDEX_op_setcond_i64, { "r", "r", "re" } },

    { INDEX_op_bswap16_i64, { "r", "0" } },
    { INDEX_op_bswap32_i64, { "r", "0" } },
    { INDEX_op_bswap64_i64, { "r", "0" } },
    { INDEX_op_neg_i64, { "r", "0" } },
    { INDEX_op_not_i64, { "r", "0" } },

    { INDEX_op_ext8s_i64, { "r", "r" } },
    { INDEX_op_ext16s_i64, { "r", "r" } },
    { INDEX_op_ext32s_i64, { "r", "r" } },
    { INDEX_op_ext8u_i64, { "r", "r" } },
    { INDEX_op_ext16u_i64, { "r", "r" } },
    { INDEX_op_ext32u_i64, { "r", "r" } },
2246

2247 2248 2249
    { INDEX_op_ext_i32_i64, { "r", "r" } },
    { INDEX_op_extu_i32_i64, { "r", "r" } },

2250
    { INDEX_op_deposit_i64, { "Q", "0", "Q" } },
R
Richard Henderson 已提交
2251
    { INDEX_op_movcond_i64, { "r", "r", "re", "r", "0" } },
2252 2253 2254 2255 2256

    { INDEX_op_mulu2_i64, { "a", "d", "a", "r" } },
    { INDEX_op_muls2_i64, { "a", "d", "a", "r" } },
    { INDEX_op_add2_i64, { "r", "r", "0", "1", "re", "re" } },
    { INDEX_op_sub2_i64, { "r", "r", "0", "1", "re", "re" } },
2257
#endif
R
Richard Henderson 已提交
2258

2259
#if TCG_TARGET_REG_BITS == 64
2260 2261 2262 2263
    { INDEX_op_qemu_ld_i32, { "r", "L" } },
    { INDEX_op_qemu_st_i32, { "L", "L" } },
    { INDEX_op_qemu_ld_i64, { "r", "L" } },
    { INDEX_op_qemu_st_i64, { "L", "L" } },
2264
#elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
2265 2266 2267 2268
    { INDEX_op_qemu_ld_i32, { "r", "L" } },
    { INDEX_op_qemu_st_i32, { "L", "L" } },
    { INDEX_op_qemu_ld_i64, { "r", "r", "L" } },
    { INDEX_op_qemu_st_i64, { "L", "L", "L" } },
B
bellard 已提交
2269
#else
2270 2271 2272 2273
    { INDEX_op_qemu_ld_i32, { "r", "L", "L" } },
    { INDEX_op_qemu_st_i32, { "L", "L", "L" } },
    { INDEX_op_qemu_ld_i64, { "r", "r", "L", "L" } },
    { INDEX_op_qemu_st_i64, { "L", "L", "L", "L" } },
B
bellard 已提交
2274 2275 2276 2277
#endif
    { -1 },
};

2278
static int tcg_target_callee_save_regs[] = {
2279 2280 2281
#if TCG_TARGET_REG_BITS == 64
    TCG_REG_RBP,
    TCG_REG_RBX,
S
Stefan Weil 已提交
2282 2283 2284 2285
#if defined(_WIN64)
    TCG_REG_RDI,
    TCG_REG_RSI,
#endif
2286 2287
    TCG_REG_R12,
    TCG_REG_R13,
B
Blue Swirl 已提交
2288
    TCG_REG_R14, /* Currently used for the global env. */
2289 2290
    TCG_REG_R15,
#else
B
Blue Swirl 已提交
2291
    TCG_REG_EBP, /* Currently used for the global env. */
2292 2293 2294
    TCG_REG_EBX,
    TCG_REG_ESI,
    TCG_REG_EDI,
2295
#endif
2296 2297
};

2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311
/* Compute frame size via macros, to share between tcg_target_qemu_prologue
   and tcg_register_jit.  */

#define PUSH_SIZE \
    ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \
     * (TCG_TARGET_REG_BITS / 8))

#define FRAME_SIZE \
    ((PUSH_SIZE \
      + TCG_STATIC_CALL_ARGS_SIZE \
      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
      + TCG_TARGET_STACK_ALIGN - 1) \
     & ~(TCG_TARGET_STACK_ALIGN - 1))

2312
/* Generate global QEMU prologue and epilogue code */
2313
static void tcg_target_qemu_prologue(TCGContext *s)
2314
{
2315
    int i, stack_addend;
2316

2317
    /* TB prologue */
2318

B
Blue Swirl 已提交
2319
    /* Reserve some stack space, also for TCG temps.  */
2320
    stack_addend = FRAME_SIZE - PUSH_SIZE;
B
Blue Swirl 已提交
2321 2322 2323 2324 2325 2326 2327 2328
    tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
                  CPU_TEMP_BUF_NLONGS * sizeof(long));

    /* Save all callee saved registers.  */
    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
        tcg_out_push(s, tcg_target_callee_save_regs[i]);
    }

B
Blue Swirl 已提交
2329 2330 2331
#if TCG_TARGET_REG_BITS == 32
    tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
               (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
2332 2333 2334 2335 2336
    tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
    /* jmp *tb.  */
    tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP,
		         (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4
			 + stack_addend);
B
Blue Swirl 已提交
2337
#else
B
Blue Swirl 已提交
2338
    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
B
Blue Swirl 已提交
2339
    tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2340
    /* jmp *tb.  */
B
Blue Swirl 已提交
2341
    tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
2342
#endif
2343

2344 2345
    /* TB epilogue */
    tb_ret_addr = s->code_ptr;
2346

2347
    tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend);
2348 2349

    for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
2350 2351
        tcg_out_pop(s, tcg_target_callee_save_regs[i]);
    }
2352
    tcg_out_opc(s, OPC_RET, 0, 0, 0);
2353 2354

#if !defined(CONFIG_SOFTMMU)
2355 2356
    /* Try to set up a segment register to point to guest_base.  */
    if (guest_base) {
2357 2358 2359
        setup_guest_base_seg();
    }
#endif
2360 2361
}

2362
static void tcg_target_init(TCGContext *s)
B
bellard 已提交
2363
{
2364
#ifdef CONFIG_CPUID_H
2365 2366
    unsigned a, b, c, d;
    int max = __get_cpuid_max(0, 0);
2367

2368 2369 2370
    if (max >= 1) {
        __cpuid(1, a, b, c, d);
#ifndef have_cmov
2371 2372 2373
        /* For 32-bit, 99% certainty that we're running on hardware that
           supports cmov, but we still need to check.  In case cmov is not
           available, we'll use a small forward branch.  */
2374 2375 2376
        have_cmov = (d & bit_CMOV) != 0;
#endif
#ifndef have_movbe
2377 2378
        /* MOVBE is only available on Intel Atom and Haswell CPUs, so we
           need to probe for it.  */
2379 2380
        have_movbe = (c & bit_MOVBE) != 0;
#endif
2381
    }
2382 2383 2384 2385 2386 2387

    if (max >= 7) {
        /* BMI1 is available on AMD Piledriver and Intel Haswell CPUs.  */
        __cpuid_count(7, 0, a, b, c, d);
#ifdef bit_BMI
        have_bmi1 = (b & bit_BMI) != 0;
2388 2389 2390
#endif
#ifndef have_bmi2
        have_bmi2 = (b & bit_BMI2) != 0;
2391
#endif
2392
    }
2393
#endif
2394

2395 2396 2397 2398 2399 2400
    if (TCG_TARGET_REG_BITS == 64) {
        tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
        tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
    } else {
        tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff);
    }
2401 2402 2403 2404 2405

    tcg_regset_clear(tcg_target_call_clobber_regs);
    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
2406
    if (TCG_TARGET_REG_BITS == 64) {
S
Stefan Weil 已提交
2407
#if !defined(_WIN64)
2408 2409
        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
S
Stefan Weil 已提交
2410
#endif
2411 2412 2413 2414 2415
        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
    }
2416

B
bellard 已提交
2417
    tcg_regset_clear(s->reserved_regs);
2418
    tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
B
bellard 已提交
2419 2420 2421

    tcg_add_target_add_op_defs(x86_op_defs);
}
2422 2423

typedef struct {
2424
    DebugFrameHeader h;
2425 2426
    uint8_t fde_def_cfa[4];
    uint8_t fde_reg_ofs[14];
2427 2428
} DebugFrame;

2429 2430 2431
/* We're expecting a 2 byte uleb128 encoded value.  */
QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));

2432 2433 2434
#if !defined(__ELF__)
    /* Host machine without ELF. */
#elif TCG_TARGET_REG_BITS == 64
2435
#define ELF_HOST_MACHINE EM_X86_64
2436 2437 2438 2439 2440 2441 2442
static const DebugFrame debug_frame = {
    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
    .h.cie.id = -1,
    .h.cie.version = 1,
    .h.cie.code_align = 1,
    .h.cie.data_align = 0x78,             /* sleb128 -8 */
    .h.cie.return_column = 16,
2443

2444
    /* Total FDE size does not include the "len" member.  */
2445
    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2446 2447

    .fde_def_cfa = {
2448 2449 2450 2451
        12, 7,                          /* DW_CFA_def_cfa %rsp, ... */
        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
        (FRAME_SIZE >> 7)
    },
2452
    .fde_reg_ofs = {
2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464
        0x90, 1,                        /* DW_CFA_offset, %rip, -8 */
        /* The following ordering must match tcg_target_callee_save_regs.  */
        0x86, 2,                        /* DW_CFA_offset, %rbp, -16 */
        0x83, 3,                        /* DW_CFA_offset, %rbx, -24 */
        0x8c, 4,                        /* DW_CFA_offset, %r12, -32 */
        0x8d, 5,                        /* DW_CFA_offset, %r13, -40 */
        0x8e, 6,                        /* DW_CFA_offset, %r14, -48 */
        0x8f, 7,                        /* DW_CFA_offset, %r15, -56 */
    }
};
#else
#define ELF_HOST_MACHINE EM_386
2465 2466 2467 2468 2469 2470 2471
static const DebugFrame debug_frame = {
    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
    .h.cie.id = -1,
    .h.cie.version = 1,
    .h.cie.code_align = 1,
    .h.cie.data_align = 0x7c,             /* sleb128 -4 */
    .h.cie.return_column = 8,
2472

2473
    /* Total FDE size does not include the "len" member.  */
2474
    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2475 2476

    .fde_def_cfa = {
2477 2478 2479 2480
        12, 4,                          /* DW_CFA_def_cfa %esp, ... */
        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
        (FRAME_SIZE >> 7)
    },
2481
    .fde_reg_ofs = {
2482 2483 2484 2485 2486 2487 2488 2489 2490 2491
        0x88, 1,                        /* DW_CFA_offset, %eip, -4 */
        /* The following ordering must match tcg_target_callee_save_regs.  */
        0x85, 2,                        /* DW_CFA_offset, %ebp, -8 */
        0x83, 3,                        /* DW_CFA_offset, %ebx, -12 */
        0x86, 4,                        /* DW_CFA_offset, %esi, -16 */
        0x87, 5,                        /* DW_CFA_offset, %edi, -20 */
    }
};
#endif

2492
#if defined(ELF_HOST_MACHINE)
2493 2494 2495 2496
void tcg_register_jit(void *buf, size_t buf_size)
{
    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
}
2497
#endif