提交 dc08f851 编写于 作者: P Peter Maydell

Merge remote-tracking branch 'rth/tcg-movbe' into staging

* rth/tcg-movbe:
  tcg/i386: cleanup useless #ifdef
  tcg/i386: use movbe instruction in qemu_ldst routines
  tcg/i386: add support for three-byte opcodes
  tcg/i386: remove hardcoded P_REXW value
  disas/i386.c: disassemble movbe instruction

Message-id: 1390692772-15282-1-git-send-email-rth@twiddle.net
Signed-off-by: NPeter Maydell <peter.maydell@linaro.org>
...@@ -2632,17 +2632,17 @@ static const struct dis386 prefix_user_table[][4] = { ...@@ -2632,17 +2632,17 @@ static const struct dis386 prefix_user_table[][4] = {
/* PREGRP87 */ /* PREGRP87 */
{ {
{ "movbe", { Gv, Ev } },
{ "(bad)", { XX } }, { "(bad)", { XX } },
{ "(bad)", { XX } }, { "movbe", { Gv, Ev } },
{ "(bad)", { XX } },
{ "crc32", { Gdq, { CRC32_Fixup, b_mode } } }, { "crc32", { Gdq, { CRC32_Fixup, b_mode } } },
}, },
/* PREGRP88 */ /* PREGRP88 */
{ {
{ "movbe", { Ev, Gv } },
{ "(bad)", { XX } }, { "(bad)", { XX } },
{ "(bad)", { XX } }, { "movbe", { Ev, Gv } },
{ "(bad)", { XX } },
{ "crc32", { Gdq, { CRC32_Fixup, v_mode } } }, { "crc32", { Gdq, { CRC32_Fixup, v_mode } } },
}, },
......
...@@ -99,18 +99,31 @@ static const int tcg_target_call_oarg_regs[] = { ...@@ -99,18 +99,31 @@ static const int tcg_target_call_oarg_regs[] = {
# define TCG_REG_L1 TCG_REG_EDX # define TCG_REG_L1 TCG_REG_EDX
#endif #endif
/* The host compiler should supply <cpuid.h> to enable runtime features
detection, as we're not going to go so far as our own inline assembly.
If not available, default values will be assumed. */
#if defined(CONFIG_CPUID_H)
#include <cpuid.h>
#endif
/* For 32-bit, we are going to attempt to determine at runtime whether cmov /* For 32-bit, we are going to attempt to determine at runtime whether cmov
is available. However, the host compiler must supply <cpuid.h>, as we're is available. */
not going to go so far as our own inline assembly. */
#if TCG_TARGET_REG_BITS == 64 #if TCG_TARGET_REG_BITS == 64
# define have_cmov 1 # define have_cmov 1
#elif defined(CONFIG_CPUID_H) #elif defined(CONFIG_CPUID_H)
#include <cpuid.h>
static bool have_cmov; static bool have_cmov;
#else #else
# define have_cmov 0 # define have_cmov 0
#endif #endif
/* If bit_MOVBE is defined in cpuid.h (added in GCC version 4.6), we are
going to attempt to determine at runtime whether movbe is available. */
#if defined(CONFIG_CPUID_H) && defined(bit_MOVBE)
static bool have_movbe;
#else
# define have_movbe 0
#endif
static uint8_t *tb_ret_addr; static uint8_t *tb_ret_addr;
static void patch_reloc(uint8_t *code_ptr, int type, static void patch_reloc(uint8_t *code_ptr, int type,
...@@ -240,13 +253,14 @@ static inline int tcg_target_const_match(tcg_target_long val, ...@@ -240,13 +253,14 @@ static inline int tcg_target_const_match(tcg_target_long val,
#endif #endif
#define P_EXT 0x100 /* 0x0f opcode prefix */ #define P_EXT 0x100 /* 0x0f opcode prefix */
#define P_DATA16 0x200 /* 0x66 opcode prefix */ #define P_EXT38 0x200 /* 0x0f 0x38 opcode prefix */
#define P_DATA16 0x400 /* 0x66 opcode prefix */
#if TCG_TARGET_REG_BITS == 64 #if TCG_TARGET_REG_BITS == 64
# define P_ADDR32 0x400 /* 0x67 opcode prefix */ # define P_ADDR32 0x800 /* 0x67 opcode prefix */
# define P_REXW 0x800 /* Set REX.W = 1 */ # define P_REXW 0x1000 /* Set REX.W = 1 */
# define P_REXB_R 0x1000 /* REG field as byte register */ # define P_REXB_R 0x2000 /* REG field as byte register */
# define P_REXB_RM 0x2000 /* R/M field as byte register */ # define P_REXB_RM 0x4000 /* R/M field as byte register */
# define P_GS 0x4000 /* gs segment override */ # define P_GS 0x8000 /* gs segment override */
#else #else
# define P_ADDR32 0 # define P_ADDR32 0
# define P_REXW 0 # define P_REXW 0
...@@ -279,6 +293,8 @@ static inline int tcg_target_const_match(tcg_target_long val, ...@@ -279,6 +293,8 @@ static inline int tcg_target_const_match(tcg_target_long val,
#define OPC_MOVB_EvIz (0xc6) #define OPC_MOVB_EvIz (0xc6)
#define OPC_MOVL_EvIz (0xc7) #define OPC_MOVL_EvIz (0xc7)
#define OPC_MOVL_Iv (0xb8) #define OPC_MOVL_Iv (0xb8)
#define OPC_MOVBE_GyMy (0xf0 | P_EXT38)
#define OPC_MOVBE_MyGy (0xf1 | P_EXT38)
#define OPC_MOVSBL (0xbe | P_EXT) #define OPC_MOVSBL (0xbe | P_EXT)
#define OPC_MOVSWL (0xbf | P_EXT) #define OPC_MOVSWL (0xbf | P_EXT)
#define OPC_MOVSLQ (0x63 | P_REXW) #define OPC_MOVSLQ (0x63 | P_REXW)
...@@ -381,7 +397,7 @@ static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x) ...@@ -381,7 +397,7 @@ static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
} }
rex = 0; rex = 0;
rex |= (opc & P_REXW) >> 8; /* REX.W */ rex |= (opc & P_REXW) ? 0x8 : 0x0; /* REX.W */
rex |= (r & 8) >> 1; /* REX.R */ rex |= (r & 8) >> 1; /* REX.R */
rex |= (x & 8) >> 2; /* REX.X */ rex |= (x & 8) >> 2; /* REX.X */
rex |= (rm & 8) >> 3; /* REX.B */ rex |= (rm & 8) >> 3; /* REX.B */
...@@ -398,9 +414,13 @@ static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x) ...@@ -398,9 +414,13 @@ static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
tcg_out8(s, (uint8_t)(rex | 0x40)); tcg_out8(s, (uint8_t)(rex | 0x40));
} }
if (opc & P_EXT) { if (opc & (P_EXT | P_EXT38)) {
tcg_out8(s, 0x0f); tcg_out8(s, 0x0f);
if (opc & P_EXT38) {
tcg_out8(s, 0x38);
}
} }
tcg_out8(s, opc); tcg_out8(s, opc);
} }
#else #else
...@@ -409,8 +429,11 @@ static void tcg_out_opc(TCGContext *s, int opc) ...@@ -409,8 +429,11 @@ static void tcg_out_opc(TCGContext *s, int opc)
if (opc & P_DATA16) { if (opc & P_DATA16) {
tcg_out8(s, 0x66); tcg_out8(s, 0x66);
} }
if (opc & P_EXT) { if (opc & (P_EXT | P_EXT38)) {
tcg_out8(s, 0x0f); tcg_out8(s, 0x0f);
if (opc & P_EXT38) {
tcg_out8(s, 0x38);
}
} }
tcg_out8(s, opc); tcg_out8(s, opc);
} }
...@@ -1336,7 +1359,14 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, ...@@ -1336,7 +1359,14 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
TCGReg base, intptr_t ofs, int seg, TCGReg base, intptr_t ofs, int seg,
TCGMemOp memop) TCGMemOp memop)
{ {
const TCGMemOp bswap = memop & MO_BSWAP; const TCGMemOp real_bswap = memop & MO_BSWAP;
TCGMemOp bswap = real_bswap;
int movop = OPC_MOVL_GvEv;
if (have_movbe && real_bswap) {
bswap = 0;
movop = OPC_MOVBE_GyMy;
}
switch (memop & MO_SSIZE) { switch (memop & MO_SSIZE) {
case MO_UB: case MO_UB:
...@@ -1347,14 +1377,19 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, ...@@ -1347,14 +1377,19 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
break; break;
case MO_UW: case MO_UW:
tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs); tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
if (bswap) { if (real_bswap) {
tcg_out_rolw_8(s, datalo); tcg_out_rolw_8(s, datalo);
} }
break; break;
case MO_SW: case MO_SW:
if (bswap) { if (real_bswap) {
tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs); if (have_movbe) {
tcg_out_rolw_8(s, datalo); tcg_out_modrm_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
datalo, base, ofs);
} else {
tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
tcg_out_rolw_8(s, datalo);
}
tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo); tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
} else { } else {
tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW + seg, tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW + seg,
...@@ -1362,16 +1397,18 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, ...@@ -1362,16 +1397,18 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
} }
break; break;
case MO_UL: case MO_UL:
tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs); tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
if (bswap) { if (bswap) {
tcg_out_bswap32(s, datalo); tcg_out_bswap32(s, datalo);
} }
break; break;
#if TCG_TARGET_REG_BITS == 64 #if TCG_TARGET_REG_BITS == 64
case MO_SL: case MO_SL:
if (bswap) { if (real_bswap) {
tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs); tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
tcg_out_bswap32(s, datalo); if (bswap) {
tcg_out_bswap32(s, datalo);
}
tcg_out_ext32s(s, datalo, datalo); tcg_out_ext32s(s, datalo, datalo);
} else { } else {
tcg_out_modrm_offset(s, OPC_MOVSLQ + seg, datalo, base, ofs); tcg_out_modrm_offset(s, OPC_MOVSLQ + seg, datalo, base, ofs);
...@@ -1380,27 +1417,22 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, ...@@ -1380,27 +1417,22 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
#endif #endif
case MO_Q: case MO_Q:
if (TCG_TARGET_REG_BITS == 64) { if (TCG_TARGET_REG_BITS == 64) {
tcg_out_modrm_offset(s, OPC_MOVL_GvEv + P_REXW + seg, tcg_out_modrm_offset(s, movop + P_REXW + seg, datalo, base, ofs);
datalo, base, ofs);
if (bswap) { if (bswap) {
tcg_out_bswap64(s, datalo); tcg_out_bswap64(s, datalo);
} }
} else { } else {
if (bswap) { if (real_bswap) {
int t = datalo; int t = datalo;
datalo = datahi; datalo = datahi;
datahi = t; datahi = t;
} }
if (base != datalo) { if (base != datalo) {
tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
datalo, base, ofs); tcg_out_modrm_offset(s, movop + seg, datahi, base, ofs + 4);
tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
datahi, base, ofs + 4);
} else { } else {
tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, tcg_out_modrm_offset(s, movop + seg, datahi, base, ofs + 4);
datahi, base, ofs + 4); tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
datalo, base, ofs);
} }
if (bswap) { if (bswap) {
tcg_out_bswap32(s, datalo); tcg_out_bswap32(s, datalo);
...@@ -1476,13 +1508,19 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, ...@@ -1476,13 +1508,19 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
TCGReg base, intptr_t ofs, int seg, TCGReg base, intptr_t ofs, int seg,
TCGMemOp memop) TCGMemOp memop)
{ {
const TCGMemOp bswap = memop & MO_BSWAP;
/* ??? Ideally we wouldn't need a scratch register. For user-only, /* ??? Ideally we wouldn't need a scratch register. For user-only,
we could perform the bswap twice to restore the original value we could perform the bswap twice to restore the original value
instead of moving to the scratch. But as it is, the L constraint instead of moving to the scratch. But as it is, the L constraint
means that TCG_REG_L0 is definitely free here. */ means that TCG_REG_L0 is definitely free here. */
const TCGReg scratch = TCG_REG_L0; const TCGReg scratch = TCG_REG_L0;
const TCGMemOp real_bswap = memop & MO_BSWAP;
TCGMemOp bswap = real_bswap;
int movop = OPC_MOVL_EvGv;
if (have_movbe && real_bswap) {
bswap = 0;
movop = OPC_MOVBE_MyGy;
}
switch (memop & MO_SIZE) { switch (memop & MO_SIZE) {
case MO_8: case MO_8:
...@@ -1501,8 +1539,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, ...@@ -1501,8 +1539,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
tcg_out_rolw_8(s, scratch); tcg_out_rolw_8(s, scratch);
datalo = scratch; datalo = scratch;
} }
tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16 + seg, tcg_out_modrm_offset(s, movop + P_DATA16 + seg, datalo, base, ofs);
datalo, base, ofs);
break; break;
case MO_32: case MO_32:
if (bswap) { if (bswap) {
...@@ -1510,7 +1547,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, ...@@ -1510,7 +1547,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
tcg_out_bswap32(s, scratch); tcg_out_bswap32(s, scratch);
datalo = scratch; datalo = scratch;
} }
tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs); tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
break; break;
case MO_64: case MO_64:
if (TCG_TARGET_REG_BITS == 64) { if (TCG_TARGET_REG_BITS == 64) {
...@@ -1519,8 +1556,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, ...@@ -1519,8 +1556,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
tcg_out_bswap64(s, scratch); tcg_out_bswap64(s, scratch);
datalo = scratch; datalo = scratch;
} }
tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_REXW + seg, tcg_out_modrm_offset(s, movop + P_REXW + seg, datalo, base, ofs);
datalo, base, ofs);
} else if (bswap) { } else if (bswap) {
tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi); tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
tcg_out_bswap32(s, scratch); tcg_out_bswap32(s, scratch);
...@@ -1529,8 +1565,13 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, ...@@ -1529,8 +1565,13 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
tcg_out_bswap32(s, scratch); tcg_out_bswap32(s, scratch);
tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4); tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4);
} else { } else {
tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs); if (real_bswap) {
tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datahi, base, ofs+4); int t = datalo;
datalo = datahi;
datahi = t;
}
tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
tcg_out_modrm_offset(s, movop + seg, datahi, base, ofs+4);
} }
break; break;
default: default:
...@@ -1985,9 +2026,7 @@ static const TCGTargetOpDef x86_op_defs[] = { ...@@ -1985,9 +2026,7 @@ static const TCGTargetOpDef x86_op_defs[] = {
{ INDEX_op_setcond_i32, { "q", "r", "ri" } }, { INDEX_op_setcond_i32, { "q", "r", "ri" } },
{ INDEX_op_deposit_i32, { "Q", "0", "Q" } }, { INDEX_op_deposit_i32, { "Q", "0", "Q" } },
#if TCG_TARGET_HAS_movcond_i32
{ INDEX_op_movcond_i32, { "r", "r", "ri", "r", "0" } }, { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "0" } },
#endif
{ INDEX_op_mulu2_i32, { "a", "d", "a", "r" } }, { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
{ INDEX_op_muls2_i32, { "a", "d", "a", "r" } }, { INDEX_op_muls2_i32, { "a", "d", "a", "r" } },
...@@ -2157,13 +2196,23 @@ static void tcg_target_qemu_prologue(TCGContext *s) ...@@ -2157,13 +2196,23 @@ static void tcg_target_qemu_prologue(TCGContext *s)
static void tcg_target_init(TCGContext *s) static void tcg_target_init(TCGContext *s)
{ {
/* For 32-bit, 99% certainty that we're running on hardware that supports #if !(defined(have_cmov) && defined(have_movbe))
cmov, but we still need to check. In case cmov is not available, we'll
use a small forward branch. */
#ifndef have_cmov
{ {
unsigned a, b, c, d; unsigned a, b, c, d;
have_cmov = (__get_cpuid(1, &a, &b, &c, &d) && (d & bit_CMOV)); int ret = __get_cpuid(1, &a, &b, &c, &d);
# ifndef have_cmov
/* For 32-bit, 99% certainty that we're running on hardware that
supports cmov, but we still need to check. In case cmov is not
available, we'll use a small forward branch. */
have_cmov = ret && (d & bit_CMOV);
# endif
# ifndef have_movbe
/* MOVBE is only available on Intel Atom and Haswell CPUs, so we
need to probe for it. */
have_movbe = ret && (c & bit_MOVBE);
# endif
} }
#endif #endif
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册