提交 e28bbd44 编写于 作者: A Avi Kivity 提交者: Marcelo Tosatti

KVM: x86 emulator: framework for streamlining arithmetic opcodes

We emulate arithmetic opcodes by executing a "similar" (same operation,
different operands) on the cpu.  This ensures accurate emulation, esp. wrt.
eflags.  However, the prologue and epilogue around the opcode is fairly long,
consisting of a switch (for the operand size) and code to load and save the
operands.  This is repeated for every opcode.

This patch introduces an alternative way to emulate arithmetic opcodes.
Instead of the above, we have four (three on i386) functions consisting
of just the opcode and a ret; one for each operand size.  For example:

   .align 8
   em_notb:
	not %al
	ret

   .align 8
   em_notw:
	not %ax
	ret

   .align 8
   em_notl:
	not %eax
	ret

   .align 8
   em_notq:
	not %rax
	ret

The prologue and epilogue are shared across all opcodes.  Note the functions
use a special calling convention; notably eflags is an input/output parameter
and is not clobbered.  Rather than dispatching the four functions through a
jump table, the functions are declared as a constant size (8) so their address
can be calculated.
Acked-by: NGleb Natapov <gleb@redhat.com>
Signed-off-by: NAvi Kivity <avi.kivity@gmail.com>
Signed-off-by: NMarcelo Tosatti <mtosatti@redhat.com>
上级 b09408d0
...@@ -149,6 +149,7 @@ ...@@ -149,6 +149,7 @@
#define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */ #define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */
#define Unaligned ((u64)1 << 42) /* Explicitly unaligned (e.g. MOVDQU) */ #define Unaligned ((u64)1 << 42) /* Explicitly unaligned (e.g. MOVDQU) */
#define Avx ((u64)1 << 43) /* Advanced Vector Extensions */ #define Avx ((u64)1 << 43) /* Advanced Vector Extensions */
#define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */
#define X2(x...) x, x #define X2(x...) x, x
#define X3(x...) X2(x), x #define X3(x...) X2(x), x
...@@ -159,6 +160,27 @@ ...@@ -159,6 +160,27 @@
#define X8(x...) X4(x), X4(x) #define X8(x...) X4(x), X4(x)
#define X16(x...) X8(x), X8(x) #define X16(x...) X8(x), X8(x)
#define NR_FASTOP (ilog2(sizeof(ulong)) + 1)
#define FASTOP_SIZE 8
/*
* fastop functions have a special calling convention:
*
* dst: [rdx]:rax (in/out)
* src: rbx (in/out)
* src2: rcx (in)
* flags: rflags (in/out)
*
* Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for
* different operand sizes can be reached by calculation, rather than a jump
* table (which would be bigger than the code).
*
* fastop functions are declared as taking a never-defined fastop parameter,
* so they can't be called from C directly.
*/
struct fastop;
struct opcode { struct opcode {
u64 flags : 56; u64 flags : 56;
u64 intercept : 8; u64 intercept : 8;
...@@ -168,6 +190,7 @@ struct opcode { ...@@ -168,6 +190,7 @@ struct opcode {
const struct group_dual *gdual; const struct group_dual *gdual;
const struct gprefix *gprefix; const struct gprefix *gprefix;
const struct escape *esc; const struct escape *esc;
void (*fastop)(struct fastop *fake);
} u; } u;
int (*check_perm)(struct x86_emulate_ctxt *ctxt); int (*check_perm)(struct x86_emulate_ctxt *ctxt);
}; };
...@@ -3646,6 +3669,7 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) ...@@ -3646,6 +3669,7 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
#define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) } #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
#define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) } #define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) }
#define I(_f, _e) { .flags = (_f), .u.execute = (_e) } #define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
#define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) }
#define II(_f, _e, _i) \ #define II(_f, _e, _i) \
{ .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i } { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i }
#define IIP(_f, _e, _i, _p) \ #define IIP(_f, _e, _i, _p) \
...@@ -4502,6 +4526,16 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt, ...@@ -4502,6 +4526,16 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,
read_mmx_reg(ctxt, &op->mm_val, op->addr.mm); read_mmx_reg(ctxt, &op->mm_val, op->addr.mm);
} }
static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
{
ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n"
: "+a"(ctxt->dst.val), "+b"(ctxt->src.val), [flags]"+D"(flags)
: "c"(ctxt->src2.val), [fastop]"S"(fop));
ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
return X86EMUL_CONTINUE;
}
int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
{ {
...@@ -4631,6 +4665,13 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) ...@@ -4631,6 +4665,13 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
} }
if (ctxt->execute) { if (ctxt->execute) {
if (ctxt->d & Fastop) {
void (*fop)(struct fastop *) = (void *)ctxt->execute;
rc = fastop(ctxt, fop);
if (rc != X86EMUL_CONTINUE)
goto done;
goto writeback;
}
rc = ctxt->execute(ctxt); rc = ctxt->execute(ctxt);
if (rc != X86EMUL_CONTINUE) if (rc != X86EMUL_CONTINUE)
goto done; goto done;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册