emulate.c 130.1 KB
Newer Older
A
Avi Kivity 已提交
1
/******************************************************************************
2
 * emulate.c
A
Avi Kivity 已提交
3 4 5 6 7 8
 *
 * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
 *
 * Copyright (c) 2005 Keir Fraser
 *
 * Linux coding style, mod r/m decoder, segment base fixes, real-mode
9
 * privileged instructions:
A
Avi Kivity 已提交
10 11
 *
 * Copyright (C) 2006 Qumranet
N
Nicolas Kaiser 已提交
12
 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
A
Avi Kivity 已提交
13 14 15 16 17 18 19 20 21 22
 *
 *   Avi Kivity <avi@qumranet.com>
 *   Yaniv Kamay <yaniv@qumranet.com>
 *
 * This work is licensed under the terms of the GNU GPL, version 2.  See
 * the COPYING file in the top-level directory.
 *
 * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
 */

23
#include <linux/kvm_host.h>
24
#include "kvm_cache_regs.h"
A
Avi Kivity 已提交
25
#include <linux/module.h>
26
#include <asm/kvm_emulate.h>
27
#include <linux/stringify.h>
A
Avi Kivity 已提交
28

29
#include "x86.h"
30
#include "tss.h"
31

32 33 34
/*
 * Operand types
 */
35 36 37 38 39 40 41 42 43
#define OpNone             0ull
#define OpImplicit         1ull  /* No generic decode */
#define OpReg              2ull  /* Register */
#define OpMem              3ull  /* Memory */
#define OpAcc              4ull  /* Accumulator: AL/AX/EAX/RAX */
#define OpDI               5ull  /* ES:DI/EDI/RDI */
#define OpMem64            6ull  /* Memory, 64-bit */
#define OpImmUByte         7ull  /* Zero-extended 8-bit immediate */
#define OpDX               8ull  /* DX register */
44 45 46
#define OpCL               9ull  /* CL register (for shifts) */
#define OpImmByte         10ull  /* 8-bit sign extended immediate */
#define OpOne             11ull  /* Implied 1 */
47
#define OpImm             12ull  /* Sign extended up to 32-bit immediate */
48 49 50 51 52 53 54
#define OpMem16           13ull  /* Memory operand (16-bit). */
#define OpMem32           14ull  /* Memory operand (32-bit). */
#define OpImmU            15ull  /* Immediate operand, zero extended */
#define OpSI              16ull  /* SI/ESI/RSI */
#define OpImmFAddr        17ull  /* Immediate far address */
#define OpMemFAddr        18ull  /* Far address in memory */
#define OpImmU16          19ull  /* Immediate operand, 16 bits, zero extended */
55 56 57 58 59 60
#define OpES              20ull  /* ES */
#define OpCS              21ull  /* CS */
#define OpSS              22ull  /* SS */
#define OpDS              23ull  /* DS */
#define OpFS              24ull  /* FS */
#define OpGS              25ull  /* GS */
61
#define OpMem8            26ull  /* 8-bit zero extended memory operand */
62
#define OpImm64           27ull  /* Sign extended 16/32/64-bit immediate */
P
Paolo Bonzini 已提交
63
#define OpXLat            28ull  /* memory at BX/EBX/RBX + zero-extended AL */
64 65
#define OpAccLo           29ull  /* Low part of extended acc (AX/AX/EAX/RAX) */
#define OpAccHi           30ull  /* High part of extended acc (-/DX/EDX/RDX) */
66 67

#define OpBits             5  /* Width of operand field */
68
#define OpMask             ((1ull << OpBits) - 1)
69

A
Avi Kivity 已提交
70 71 72 73 74 75 76 77 78 79
/*
 * Opcode effective-address decode tables.
 * Note that we only emulate instructions that have at least one memory
 * operand (excluding implicit stack references). We assume that stack
 * references and instruction fetches will never occur in special memory
 * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
 * not be handled.
 */

/* Operand sizes: 8-bit operands or specified/overridden size. */
80
#define ByteOp      (1<<0)	/* 8-bit operands. */
A
Avi Kivity 已提交
81
/* Destination operand type. */
82 83 84 85 86 87 88 89 90
#define DstShift    1
#define ImplicitOps (OpImplicit << DstShift)
#define DstReg      (OpReg << DstShift)
#define DstMem      (OpMem << DstShift)
#define DstAcc      (OpAcc << DstShift)
#define DstDI       (OpDI << DstShift)
#define DstMem64    (OpMem64 << DstShift)
#define DstImmUByte (OpImmUByte << DstShift)
#define DstDX       (OpDX << DstShift)
91
#define DstAccLo    (OpAccLo << DstShift)
92
#define DstMask     (OpMask << DstShift)
A
Avi Kivity 已提交
93
/* Source operand type. */
94 95 96 97 98 99 100 101 102 103 104 105
#define SrcShift    6
#define SrcNone     (OpNone << SrcShift)
#define SrcReg      (OpReg << SrcShift)
#define SrcMem      (OpMem << SrcShift)
#define SrcMem16    (OpMem16 << SrcShift)
#define SrcMem32    (OpMem32 << SrcShift)
#define SrcImm      (OpImm << SrcShift)
#define SrcImmByte  (OpImmByte << SrcShift)
#define SrcOne      (OpOne << SrcShift)
#define SrcImmUByte (OpImmUByte << SrcShift)
#define SrcImmU     (OpImmU << SrcShift)
#define SrcSI       (OpSI << SrcShift)
P
Paolo Bonzini 已提交
106
#define SrcXLat     (OpXLat << SrcShift)
107 108 109 110
#define SrcImmFAddr (OpImmFAddr << SrcShift)
#define SrcMemFAddr (OpMemFAddr << SrcShift)
#define SrcAcc      (OpAcc << SrcShift)
#define SrcImmU16   (OpImmU16 << SrcShift)
111
#define SrcImm64    (OpImm64 << SrcShift)
112
#define SrcDX       (OpDX << SrcShift)
113
#define SrcMem8     (OpMem8 << SrcShift)
114
#define SrcAccHi    (OpAccHi << SrcShift)
115
#define SrcMask     (OpMask << SrcShift)
116 117 118 119 120 121 122 123 124
#define BitOp       (1<<11)
#define MemAbs      (1<<12)      /* Memory operand is absolute displacement */
#define String      (1<<13)     /* String instruction (rep capable) */
#define Stack       (1<<14)     /* Stack instruction (push/pop) */
#define GroupMask   (7<<15)     /* Opcode uses one of the group mechanisms */
#define Group       (1<<15)     /* Bits 3:5 of modrm byte extend opcode */
#define GroupDual   (2<<15)     /* Alternate decoding of mod == 3 */
#define Prefix      (3<<15)     /* Instruction varies with 66/f2/f3 prefix */
#define RMExt       (4<<15)     /* Opcode extension in ModRM r/m if mod == 3 */
125
#define Escape      (5<<15)     /* Escape to coprocessor instruction */
126
#define Sse         (1<<18)     /* SSE Vector instruction */
127 128 129 130
/* Generic ModRM decode. */
#define ModRM       (1<<19)
/* Destination is only written; never read. */
#define Mov         (1<<20)
131
/* Misc flags */
132
#define Prot        (1<<21) /* instruction generates #UD if not in prot-mode */
133
#define EmulateOnUD (1<<22) /* Emulate if unsupported by the host */
134
#define NoAccess    (1<<23) /* Don't access memory (lea/invlpg/verr etc) */
135
#define Op3264      (1<<24) /* Operand is 64b in long mode, 32b otherwise */
136
#define Undefined   (1<<25) /* No Such Instruction */
137
#define Lock        (1<<26) /* lock prefix is allowed for the instruction */
138
#define Priv        (1<<27) /* instruction generates #GP if current CPL != 0 */
139
#define No64	    (1<<28)
140
#define PageTable   (1 << 29)   /* instruction used to write page table */
141
#define NotImpl     (1 << 30)   /* instruction is not implemented */
142
/* Source 2 operand type */
143
#define Src2Shift   (31)
144
#define Src2None    (OpNone << Src2Shift)
145
#define Src2Mem     (OpMem << Src2Shift)
146 147 148 149
#define Src2CL      (OpCL << Src2Shift)
#define Src2ImmByte (OpImmByte << Src2Shift)
#define Src2One     (OpOne << Src2Shift)
#define Src2Imm     (OpImm << Src2Shift)
150 151 152 153 154 155
#define Src2ES      (OpES << Src2Shift)
#define Src2CS      (OpCS << Src2Shift)
#define Src2SS      (OpSS << Src2Shift)
#define Src2DS      (OpDS << Src2Shift)
#define Src2FS      (OpFS << Src2Shift)
#define Src2GS      (OpGS << Src2Shift)
156
#define Src2Mask    (OpMask << Src2Shift)
A
Avi Kivity 已提交
157
#define Mmx         ((u64)1 << 40)  /* MMX Vector instruction */
158 159 160
#define Aligned     ((u64)1 << 41)  /* Explicitly aligned (e.g. MOVDQA) */
#define Unaligned   ((u64)1 << 42)  /* Explicitly unaligned (e.g. MOVDQU) */
#define Avx         ((u64)1 << 43)  /* Advanced Vector Extensions */
161
#define Fastop      ((u64)1 << 44)  /* Use opcode::u.fastop */
162
#define NoWrite     ((u64)1 << 45)  /* No writeback */
163
#define SrcWrite    ((u64)1 << 46)  /* Write back src operand */
164
#define NoMod	    ((u64)1 << 47)  /* Mod field is ignored */
165 166
#define Intercept   ((u64)1 << 48)  /* Has valid intercept field */
#define CheckPerm   ((u64)1 << 49)  /* Has valid check_perm field */
167
#define NoBigReal   ((u64)1 << 50)  /* No big real mode */
168
#define PrivUD      ((u64)1 << 51)  /* #UD instead of #GP on CPL > 0 */
169
#define NearBranch  ((u64)1 << 52)  /* Near branches */
170
#define No16	    ((u64)1 << 53)  /* No 16 bit operand */
A
Avi Kivity 已提交
171

172
#define DstXacc     (DstAccLo | SrcAccHi | SrcWrite)
A
Avi Kivity 已提交
173

174 175 176 177 178 179 180 181
#define X2(x...) x, x
#define X3(x...) X2(x), x
#define X4(x...) X2(x), X2(x)
#define X5(x...) X4(x), x
#define X6(x...) X4(x), X2(x)
#define X7(x...) X4(x), X3(x)
#define X8(x...) X4(x), X4(x)
#define X16(x...) X8(x), X8(x)
182

183 184 185 186 187 188
#define NR_FASTOP (ilog2(sizeof(ulong)) + 1)
#define FASTOP_SIZE 8

/*
 * fastop functions have a special calling convention:
 *
189 190
 * dst:    rax        (in/out)
 * src:    rdx        (in/out)
191 192
 * src2:   rcx        (in)
 * flags:  rflags     (in/out)
193
 * ex:     rsi        (in:fastop pointer, out:zero if exception)
194 195 196 197 198 199 200 201 202 203 204
 *
 * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for
 * different operand sizes can be reached by calculation, rather than a jump
 * table (which would be bigger than the code).
 *
 * fastop functions are declared as taking a never-defined fastop parameter,
 * so they can't be called from C directly.
 */

struct fastop;

205
struct opcode {
206 207
	u64 flags : 56;
	u64 intercept : 8;
208
	union {
209
		int (*execute)(struct x86_emulate_ctxt *ctxt);
210 211 212
		const struct opcode *group;
		const struct group_dual *gdual;
		const struct gprefix *gprefix;
213
		const struct escape *esc;
214
		void (*fastop)(struct fastop *fake);
215
	} u;
216
	int (*check_perm)(struct x86_emulate_ctxt *ctxt);
217 218 219 220 221
};

struct group_dual {
	struct opcode mod012[8];
	struct opcode mod3[8];
222 223
};

224 225 226 227 228 229 230
struct gprefix {
	struct opcode pfx_no;
	struct opcode pfx_66;
	struct opcode pfx_f2;
	struct opcode pfx_f3;
};

231 232 233 234 235
struct escape {
	struct opcode op[8];
	struct opcode high[64];
};

A
Avi Kivity 已提交
236
/* EFLAGS bit definitions. */
237 238 239 240
#define EFLG_ID (1<<21)
#define EFLG_VIP (1<<20)
#define EFLG_VIF (1<<19)
#define EFLG_AC (1<<18)
241 242
#define EFLG_VM (1<<17)
#define EFLG_RF (1<<16)
243 244
#define EFLG_IOPL (3<<12)
#define EFLG_NT (1<<14)
A
Avi Kivity 已提交
245 246
#define EFLG_OF (1<<11)
#define EFLG_DF (1<<10)
247
#define EFLG_IF (1<<9)
248
#define EFLG_TF (1<<8)
A
Avi Kivity 已提交
249 250 251 252 253 254
#define EFLG_SF (1<<7)
#define EFLG_ZF (1<<6)
#define EFLG_AF (1<<4)
#define EFLG_PF (1<<2)
#define EFLG_CF (1<<0)

255 256 257
#define EFLG_RESERVED_ZEROS_MASK 0xffc0802a
#define EFLG_RESERVED_ONE_MASK 2

258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293
static ulong reg_read(struct x86_emulate_ctxt *ctxt, unsigned nr)
{
	if (!(ctxt->regs_valid & (1 << nr))) {
		ctxt->regs_valid |= 1 << nr;
		ctxt->_regs[nr] = ctxt->ops->read_gpr(ctxt, nr);
	}
	return ctxt->_regs[nr];
}

static ulong *reg_write(struct x86_emulate_ctxt *ctxt, unsigned nr)
{
	ctxt->regs_valid |= 1 << nr;
	ctxt->regs_dirty |= 1 << nr;
	return &ctxt->_regs[nr];
}

static ulong *reg_rmw(struct x86_emulate_ctxt *ctxt, unsigned nr)
{
	reg_read(ctxt, nr);
	return reg_write(ctxt, nr);
}

static void writeback_registers(struct x86_emulate_ctxt *ctxt)
{
	unsigned reg;

	for_each_set_bit(reg, (ulong *)&ctxt->regs_dirty, 16)
		ctxt->ops->write_gpr(ctxt, reg, ctxt->_regs[reg]);
}

static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
{
	ctxt->regs_dirty = 0;
	ctxt->regs_valid = 0;
}

A
Avi Kivity 已提交
294 295 296 297 298 299
/*
 * These EFLAGS bits are restored from saved value during emulation, and
 * any changes are written back to the saved value after emulation.
 */
#define EFLAGS_MASK (EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_AF|EFLG_PF|EFLG_CF)

300 301 302 303 304 305
#ifdef CONFIG_X86_64
#define ON64(x) x
#else
#define ON64(x)
#endif

306 307
static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));

308 309 310 311 312 313 314 315 316 317 318 319 320
#define FOP_ALIGN ".align " __stringify(FASTOP_SIZE) " \n\t"
#define FOP_RET   "ret \n\t"

#define FOP_START(op) \
	extern void em_##op(struct fastop *fake); \
	asm(".pushsection .text, \"ax\" \n\t" \
	    ".global em_" #op " \n\t" \
            FOP_ALIGN \
	    "em_" #op ": \n\t"

#define FOP_END \
	    ".popsection")

321 322
#define FOPNOP() FOP_ALIGN FOP_RET

323
#define FOP1E(op,  dst) \
324 325 326 327
	FOP_ALIGN "10: " #op " %" #dst " \n\t" FOP_RET

#define FOP1EEX(op,  dst) \
	FOP1E(op, dst) _ASM_EXTABLE(10b, kvm_fastop_exception)
328 329 330 331 332 333 334 335 336

#define FASTOP1(op) \
	FOP_START(op) \
	FOP1E(op##b, al) \
	FOP1E(op##w, ax) \
	FOP1E(op##l, eax) \
	ON64(FOP1E(op##q, rax))	\
	FOP_END

337 338 339 340 341 342 343 344 345
/* 1-operand, using src2 (for MUL/DIV r/m) */
#define FASTOP1SRC2(op, name) \
	FOP_START(name) \
	FOP1E(op, cl) \
	FOP1E(op, cx) \
	FOP1E(op, ecx) \
	ON64(FOP1E(op, rcx)) \
	FOP_END

346 347 348 349 350 351 352 353 354
/* 1-operand, using src2 (for MUL/DIV r/m), with exceptions */
#define FASTOP1SRC2EX(op, name) \
	FOP_START(name) \
	FOP1EEX(op, cl) \
	FOP1EEX(op, cx) \
	FOP1EEX(op, ecx) \
	ON64(FOP1EEX(op, rcx)) \
	FOP_END

355 356 357 358 359
#define FOP2E(op,  dst, src)	   \
	FOP_ALIGN #op " %" #src ", %" #dst " \n\t" FOP_RET

#define FASTOP2(op) \
	FOP_START(op) \
360 361 362 363
	FOP2E(op##b, al, dl) \
	FOP2E(op##w, ax, dx) \
	FOP2E(op##l, eax, edx) \
	ON64(FOP2E(op##q, rax, rdx)) \
364 365
	FOP_END

366 367 368 369
/* 2 operand, word only */
#define FASTOP2W(op) \
	FOP_START(op) \
	FOPNOP() \
370 371 372
	FOP2E(op##w, ax, dx) \
	FOP2E(op##l, eax, edx) \
	ON64(FOP2E(op##q, rax, rdx)) \
373 374
	FOP_END

375 376 377 378 379 380 381 382 383
/* 2 operand, src is CL */
#define FASTOP2CL(op) \
	FOP_START(op) \
	FOP2E(op##b, al, cl) \
	FOP2E(op##w, ax, cl) \
	FOP2E(op##l, eax, cl) \
	ON64(FOP2E(op##q, rax, cl)) \
	FOP_END

384 385 386 387 388 389 390 391 392
/* 2 operand, src and dest are reversed */
#define FASTOP2R(op, name) \
	FOP_START(name) \
	FOP2E(op##b, dl, al) \
	FOP2E(op##w, dx, ax) \
	FOP2E(op##l, edx, eax) \
	ON64(FOP2E(op##q, rdx, rax)) \
	FOP_END

393 394 395 396 397 398 399
#define FOP3E(op,  dst, src, src2) \
	FOP_ALIGN #op " %" #src2 ", %" #src ", %" #dst " \n\t" FOP_RET

/* 3-operand, word-only, src2=cl */
#define FASTOP3WCL(op) \
	FOP_START(op) \
	FOPNOP() \
400 401 402
	FOP3E(op##w, ax, dx, cl) \
	FOP3E(op##l, eax, edx, cl) \
	ON64(FOP3E(op##q, rax, rdx, cl)) \
403 404
	FOP_END

405 406 407
/* Special case for SETcc - 1 instruction per cc */
#define FOP_SETCC(op) ".align 4; " #op " %al; ret \n\t"

408 409 410
asm(".global kvm_fastop_exception \n"
    "kvm_fastop_exception: xor %esi, %esi; ret");

411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429
FOP_START(setcc)
FOP_SETCC(seto)
FOP_SETCC(setno)
FOP_SETCC(setc)
FOP_SETCC(setnc)
FOP_SETCC(setz)
FOP_SETCC(setnz)
FOP_SETCC(setbe)
FOP_SETCC(setnbe)
FOP_SETCC(sets)
FOP_SETCC(setns)
FOP_SETCC(setp)
FOP_SETCC(setnp)
FOP_SETCC(setl)
FOP_SETCC(setnl)
FOP_SETCC(setle)
FOP_SETCC(setnle)
FOP_END;

P
Paolo Bonzini 已提交
430 431 432
FOP_START(salc) "pushf; sbb %al, %al; popf \n\t" FOP_RET
FOP_END;

433 434 435 436 437 438
static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt,
				    enum x86_intercept intercept,
				    enum x86_intercept_stage stage)
{
	struct x86_instruction_info info = {
		.intercept  = intercept,
439 440 441 442 443
		.rep_prefix = ctxt->rep_prefix,
		.modrm_mod  = ctxt->modrm_mod,
		.modrm_reg  = ctxt->modrm_reg,
		.modrm_rm   = ctxt->modrm_rm,
		.src_val    = ctxt->src.val64,
444
		.dst_val    = ctxt->dst.val64,
445 446 447
		.src_bytes  = ctxt->src.bytes,
		.dst_bytes  = ctxt->dst.bytes,
		.ad_bytes   = ctxt->ad_bytes,
448 449 450
		.next_rip   = ctxt->eip,
	};

451
	return ctxt->ops->intercept(ctxt, &info, stage);
452 453
}

A
Avi Kivity 已提交
454 455 456 457 458
static void assign_masked(ulong *dest, ulong src, ulong mask)
{
	*dest = (*dest & ~mask) | (src & mask);
}

459
static inline unsigned long ad_mask(struct x86_emulate_ctxt *ctxt)
460
{
461
	return (1UL << (ctxt->ad_bytes << 3)) - 1;
462 463
}

A
Avi Kivity 已提交
464 465 466 467 468 469 470 471 472 473 474
static ulong stack_mask(struct x86_emulate_ctxt *ctxt)
{
	u16 sel;
	struct desc_struct ss;

	if (ctxt->mode == X86EMUL_MODE_PROT64)
		return ~0UL;
	ctxt->ops->get_segment(ctxt, &sel, &ss, NULL, VCPU_SREG_SS);
	return ~0U >> ((ss.d ^ 1) * 16);  /* d=0: 0xffff; d=1: 0xffffffff */
}

A
Avi Kivity 已提交
475 476 477 478 479
static int stack_size(struct x86_emulate_ctxt *ctxt)
{
	return (__fls(stack_mask(ctxt)) + 1) >> 3;
}

A
Avi Kivity 已提交
480
/* Access/update address held in a register, based on addressing mode. */
481
static inline unsigned long
482
address_mask(struct x86_emulate_ctxt *ctxt, unsigned long reg)
483
{
484
	if (ctxt->ad_bytes == sizeof(unsigned long))
485 486
		return reg;
	else
487
		return reg & ad_mask(ctxt);
488 489 490
}

static inline unsigned long
491
register_address(struct x86_emulate_ctxt *ctxt, unsigned long reg)
492
{
493
	return address_mask(ctxt, reg);
494 495
}

496 497 498 499 500
static void masked_increment(ulong *reg, ulong mask, int inc)
{
	assign_masked(reg, *reg + inc, mask);
}

501
static inline void
502
register_address_increment(struct x86_emulate_ctxt *ctxt, unsigned long *reg, int inc)
503
{
504 505
	ulong mask;

506
	if (ctxt->ad_bytes == sizeof(unsigned long))
507
		mask = ~0UL;
508
	else
509 510 511 512 513 514
		mask = ad_mask(ctxt);
	masked_increment(reg, mask, inc);
}

static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc)
{
515
	masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc);
516
}
A
Avi Kivity 已提交
517

518 519 520 521 522 523 524
static u32 desc_limit_scaled(struct desc_struct *desc)
{
	u32 limit = get_desc_limit(desc);

	return desc->g ? (limit << 12) | 0xfff : limit;
}

525
static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
526 527 528 529
{
	if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
		return 0;

530
	return ctxt->ops->get_cached_segment_base(ctxt, seg);
531 532
}

533 534
static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
			     u32 error, bool valid)
535
{
536
	WARN_ON(vec > 0x1f);
537 538 539
	ctxt->exception.vector = vec;
	ctxt->exception.error_code = error;
	ctxt->exception.error_code_valid = valid;
540
	return X86EMUL_PROPAGATE_FAULT;
541 542
}

543 544 545 546 547
static int emulate_db(struct x86_emulate_ctxt *ctxt)
{
	return emulate_exception(ctxt, DB_VECTOR, 0, false);
}

548
static int emulate_gp(struct x86_emulate_ctxt *ctxt, int err)
549
{
550
	return emulate_exception(ctxt, GP_VECTOR, err, true);
551 552
}

553 554 555 556 557
static int emulate_ss(struct x86_emulate_ctxt *ctxt, int err)
{
	return emulate_exception(ctxt, SS_VECTOR, err, true);
}

558
static int emulate_ud(struct x86_emulate_ctxt *ctxt)
559
{
560
	return emulate_exception(ctxt, UD_VECTOR, 0, false);
561 562
}

563
static int emulate_ts(struct x86_emulate_ctxt *ctxt, int err)
564
{
565
	return emulate_exception(ctxt, TS_VECTOR, err, true);
566 567
}

568 569
static int emulate_de(struct x86_emulate_ctxt *ctxt)
{
570
	return emulate_exception(ctxt, DE_VECTOR, 0, false);
571 572
}

A
Avi Kivity 已提交
573 574 575 576 577
static int emulate_nm(struct x86_emulate_ctxt *ctxt)
{
	return emulate_exception(ctxt, NM_VECTOR, 0, false);
}

578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597
static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
{
	u16 selector;
	struct desc_struct desc;

	ctxt->ops->get_segment(ctxt, &selector, &desc, NULL, seg);
	return selector;
}

static void set_segment_selector(struct x86_emulate_ctxt *ctxt, u16 selector,
				 unsigned seg)
{
	u16 dummy;
	u32 base3;
	struct desc_struct desc;

	ctxt->ops->get_segment(ctxt, &dummy, &desc, &base3, seg);
	ctxt->ops->set_segment(ctxt, selector, &desc, base3, seg);
}

598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620
/*
 * x86 defines three classes of vector instructions: explicitly
 * aligned, explicitly unaligned, and the rest, which change behaviour
 * depending on whether they're AVX encoded or not.
 *
 * Also included is CMPXCHG16B which is not a vector instruction, yet it is
 * subject to the same check.
 */
static bool insn_aligned(struct x86_emulate_ctxt *ctxt, unsigned size)
{
	if (likely(size < 16))
		return false;

	if (ctxt->d & Aligned)
		return true;
	else if (ctxt->d & Unaligned)
		return false;
	else if (ctxt->d & Avx)
		return false;
	else
		return true;
}

621 622 623 624
static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
				       struct segmented_address addr,
				       unsigned *max_size, unsigned size,
				       bool write, bool fetch,
625
				       enum x86emul_mode mode, ulong *linear)
626
{
627 628
	struct desc_struct desc;
	bool usable;
629
	ulong la;
630
	u32 lim;
631
	u16 sel;
632

633
	la = seg_base(ctxt, addr.seg) + addr.ea;
634
	*max_size = 0;
635
	switch (mode) {
636
	case X86EMUL_MODE_PROT64:
637
		if (is_noncanonical_address(la))
638
			goto bad;
639 640 641 642

		*max_size = min_t(u64, ~0u, (1ull << 48) - la);
		if (size > *max_size)
			goto bad;
643 644
		break;
	default:
645 646
		usable = ctxt->ops->get_segment(ctxt, &sel, &desc, NULL,
						addr.seg);
647 648
		if (!usable)
			goto bad;
649 650 651
		/* code segment in protected mode or read-only data segment */
		if ((((ctxt->mode != X86EMUL_MODE_REAL) && (desc.type & 8))
					|| !(desc.type & 2)) && write)
652 653
			goto bad;
		/* unreadable code segment */
654
		if (!fetch && (desc.type & 8) && !(desc.type & 2))
655 656
			goto bad;
		lim = desc_limit_scaled(&desc);
657
		if ((desc.type & 8) || !(desc.type & 4)) {
658
			/* expand-up segment */
659
			if (addr.ea > lim)
660
				goto bad;
661
			*max_size = min_t(u64, ~0u, (u64)lim + 1 - addr.ea);
662
		} else {
G
Guo Chao 已提交
663
			/* expand-down segment */
664
			if (addr.ea <= lim)
665 666
				goto bad;
			lim = desc.d ? 0xffffffff : 0xffff;
667
			if (addr.ea > lim)
668
				goto bad;
669
			*max_size = min_t(u64, ~0u, (u64)lim + 1 - addr.ea);
670
		}
671 672
		if (size > *max_size)
			goto bad;
673
		la &= (u32)-1;
674 675
		break;
	}
676 677
	if (insn_aligned(ctxt, size) && ((la & (size - 1)) != 0))
		return emulate_gp(ctxt, 0);
678 679
	*linear = la;
	return X86EMUL_CONTINUE;
680 681
bad:
	if (addr.seg == VCPU_SREG_SS)
682
		return emulate_ss(ctxt, 0);
683
	else
684
		return emulate_gp(ctxt, 0);
685 686
}

687 688 689 690 691
static int linearize(struct x86_emulate_ctxt *ctxt,
		     struct segmented_address addr,
		     unsigned size, bool write,
		     ulong *linear)
{
692
	unsigned max_size;
693 694
	return __linearize(ctxt, addr, &max_size, size, write, false,
			   ctxt->mode, linear);
695 696
}

697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741
static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst,
			     enum x86emul_mode mode)
{
	ulong linear;
	int rc;
	unsigned max_size;
	struct segmented_address addr = { .seg = VCPU_SREG_CS,
					   .ea = dst };

	if (ctxt->op_bytes != sizeof(unsigned long))
		addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1);
	rc = __linearize(ctxt, addr, &max_size, 1, false, true, mode, &linear);
	if (rc == X86EMUL_CONTINUE)
		ctxt->_eip = addr.ea;
	return rc;
}

static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
{
	return assign_eip(ctxt, dst, ctxt->mode);
}

static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
			  const struct desc_struct *cs_desc)
{
	enum x86emul_mode mode = ctxt->mode;

#ifdef CONFIG_X86_64
	if (ctxt->mode >= X86EMUL_MODE_PROT32 && cs_desc->l) {
		u64 efer = 0;

		ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
		if (efer & EFER_LMA)
			mode = X86EMUL_MODE_PROT64;
	}
#endif
	if (mode == X86EMUL_MODE_PROT16 || mode == X86EMUL_MODE_PROT32)
		mode = cs_desc->d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
	return assign_eip(ctxt, dst, mode);
}

static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
{
	return assign_eip_near(ctxt, ctxt->_eip + rel);
}
742

743 744 745 746 747
static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
			      struct segmented_address addr,
			      void *data,
			      unsigned size)
{
748 749 750
	int rc;
	ulong linear;

751
	rc = linearize(ctxt, addr, size, false, &linear);
752 753
	if (rc != X86EMUL_CONTINUE)
		return rc;
754
	return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception);
755 756
}

757
/*
758
 * Prefetch the remaining bytes of the instruction without crossing page
759 760
 * boundary if they are not in fetch_cache yet.
 */
761
static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size)
762 763
{
	int rc;
764
	unsigned size, max_size;
765
	unsigned long linear;
766
	int cur_size = ctxt->fetch.end - ctxt->fetch.data;
767
	struct segmented_address addr = { .seg = VCPU_SREG_CS,
768 769
					   .ea = ctxt->eip + cur_size };

770 771 772 773 774 775 776 777 778 779
	/*
	 * We do not know exactly how many bytes will be needed, and
	 * __linearize is expensive, so fetch as much as possible.  We
	 * just have to avoid going beyond the 15 byte limit, the end
	 * of the segment, or the end of the page.
	 *
	 * __linearize is called with size 0 so that it does not do any
	 * boundary check itself.  Instead, we use max_size to check
	 * against op_size.
	 */
780 781
	rc = __linearize(ctxt, addr, &max_size, 0, false, true, ctxt->mode,
			 &linear);
782 783 784
	if (unlikely(rc != X86EMUL_CONTINUE))
		return rc;

785
	size = min_t(unsigned, 15UL ^ cur_size, max_size);
786
	size = min_t(unsigned, size, PAGE_SIZE - offset_in_page(linear));
787 788 789 790 791 792 793 794

	/*
	 * One instruction can only straddle two pages,
	 * and one has been loaded at the beginning of
	 * x86_decode_insn.  So, if not enough bytes
	 * still, we must have hit the 15-byte boundary.
	 */
	if (unlikely(size < op_size))
795 796
		return emulate_gp(ctxt, 0);

797
	rc = ctxt->ops->fetch(ctxt, linear, ctxt->fetch.end,
798 799 800
			      size, &ctxt->exception);
	if (unlikely(rc != X86EMUL_CONTINUE))
		return rc;
801
	ctxt->fetch.end += size;
802
	return X86EMUL_CONTINUE;
803 804
}

805 806
static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt,
					       unsigned size)
807
{
808 809 810 811
	unsigned done_size = ctxt->fetch.end - ctxt->fetch.ptr;

	if (unlikely(done_size < size))
		return __do_insn_fetch_bytes(ctxt, size - done_size);
812 813
	else
		return X86EMUL_CONTINUE;
814 815
}

816
/* Fetch next part of the instruction being emulated. */
817
#define insn_fetch(_type, _ctxt)					\
818 819 820
({	_type _x;							\
									\
	rc = do_insn_fetch_bytes(_ctxt, sizeof(_type));			\
821 822
	if (rc != X86EMUL_CONTINUE)					\
		goto done;						\
823
	ctxt->_eip += sizeof(_type);					\
824 825
	_x = *(_type __aligned(1) *) ctxt->fetch.ptr;			\
	ctxt->fetch.ptr += sizeof(_type);				\
826
	_x;								\
827 828
})

829
#define insn_fetch_arr(_arr, _size, _ctxt)				\
830 831
({									\
	rc = do_insn_fetch_bytes(_ctxt, _size);				\
832 833
	if (rc != X86EMUL_CONTINUE)					\
		goto done;						\
834
	ctxt->_eip += (_size);						\
835 836
	memcpy(_arr, ctxt->fetch.ptr, _size);				\
	ctxt->fetch.ptr += (_size);					\
837 838
})

839 840 841 842 843
/*
 * Given the 'reg' portion of a ModRM byte, and a register block, return a
 * pointer into the block that addresses the relevant register.
 * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
 */
844
static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg,
845
			     int byteop)
A
Avi Kivity 已提交
846 847
{
	void *p;
848
	int highbyte_regs = (ctxt->rex_prefix == 0) && byteop;
A
Avi Kivity 已提交
849 850

	if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
851 852 853
		p = (unsigned char *)reg_rmw(ctxt, modrm_reg & 3) + 1;
	else
		p = reg_rmw(ctxt, modrm_reg);
A
Avi Kivity 已提交
854 855 856 857
	return p;
}

static int read_descriptor(struct x86_emulate_ctxt *ctxt,
858
			   struct segmented_address addr,
A
Avi Kivity 已提交
859 860 861 862 863 864 865
			   u16 *size, unsigned long *address, int op_bytes)
{
	int rc;

	if (op_bytes == 2)
		op_bytes = 3;
	*address = 0;
866
	rc = segmented_read_std(ctxt, addr, size, 2);
867
	if (rc != X86EMUL_CONTINUE)
A
Avi Kivity 已提交
868
		return rc;
869
	addr.ea += 2;
870
	rc = segmented_read_std(ctxt, addr, address, op_bytes);
A
Avi Kivity 已提交
871 872 873
	return rc;
}

874 875 876 877 878 879 880 881 882 883
FASTOP2(add);
FASTOP2(or);
FASTOP2(adc);
FASTOP2(sbb);
FASTOP2(and);
FASTOP2(sub);
FASTOP2(xor);
FASTOP2(cmp);
FASTOP2(test);

884 885
FASTOP1SRC2(mul, mul_ex);
FASTOP1SRC2(imul, imul_ex);
886 887
FASTOP1SRC2EX(div, div_ex);
FASTOP1SRC2EX(idiv, idiv_ex);
888

889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913
FASTOP3WCL(shld);
FASTOP3WCL(shrd);

FASTOP2W(imul);

FASTOP1(not);
FASTOP1(neg);
FASTOP1(inc);
FASTOP1(dec);

FASTOP2CL(rol);
FASTOP2CL(ror);
FASTOP2CL(rcl);
FASTOP2CL(rcr);
FASTOP2CL(shl);
FASTOP2CL(shr);
FASTOP2CL(sar);

FASTOP2W(bsf);
FASTOP2W(bsr);
FASTOP2W(bt);
FASTOP2W(bts);
FASTOP2W(btr);
FASTOP2W(btc);

914 915
FASTOP2(xadd);

916 917
FASTOP2R(cmp, cmp_r);

918
static u8 test_cc(unsigned int condition, unsigned long flags)
919
{
920 921
	u8 rc;
	void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf);
922

923
	flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
924
	asm("push %[flags]; popf; call *%[fastop]"
925 926
	    : "=a"(rc) : [fastop]"r"(fop), [flags]"r"(flags));
	return rc;
927 928
}

929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946
static void fetch_register_operand(struct operand *op)
{
	switch (op->bytes) {
	case 1:
		op->val = *(u8 *)op->addr.reg;
		break;
	case 2:
		op->val = *(u16 *)op->addr.reg;
		break;
	case 4:
		op->val = *(u32 *)op->addr.reg;
		break;
	case 8:
		op->val = *(u64 *)op->addr.reg;
		break;
	}
}

A
Avi Kivity 已提交
947 948 949 950
static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg)
{
	ctxt->ops->get_fpu(ctxt);
	switch (reg) {
951 952 953 954 955 956 957 958
	case 0: asm("movdqa %%xmm0, %0" : "=m"(*data)); break;
	case 1: asm("movdqa %%xmm1, %0" : "=m"(*data)); break;
	case 2: asm("movdqa %%xmm2, %0" : "=m"(*data)); break;
	case 3: asm("movdqa %%xmm3, %0" : "=m"(*data)); break;
	case 4: asm("movdqa %%xmm4, %0" : "=m"(*data)); break;
	case 5: asm("movdqa %%xmm5, %0" : "=m"(*data)); break;
	case 6: asm("movdqa %%xmm6, %0" : "=m"(*data)); break;
	case 7: asm("movdqa %%xmm7, %0" : "=m"(*data)); break;
A
Avi Kivity 已提交
959
#ifdef CONFIG_X86_64
960 961 962 963 964 965 966 967
	case 8: asm("movdqa %%xmm8, %0" : "=m"(*data)); break;
	case 9: asm("movdqa %%xmm9, %0" : "=m"(*data)); break;
	case 10: asm("movdqa %%xmm10, %0" : "=m"(*data)); break;
	case 11: asm("movdqa %%xmm11, %0" : "=m"(*data)); break;
	case 12: asm("movdqa %%xmm12, %0" : "=m"(*data)); break;
	case 13: asm("movdqa %%xmm13, %0" : "=m"(*data)); break;
	case 14: asm("movdqa %%xmm14, %0" : "=m"(*data)); break;
	case 15: asm("movdqa %%xmm15, %0" : "=m"(*data)); break;
A
Avi Kivity 已提交
968 969 970 971 972 973 974 975 976 977 978
#endif
	default: BUG();
	}
	ctxt->ops->put_fpu(ctxt);
}

static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
			  int reg)
{
	ctxt->ops->get_fpu(ctxt);
	switch (reg) {
979 980 981 982 983 984 985 986
	case 0: asm("movdqa %0, %%xmm0" : : "m"(*data)); break;
	case 1: asm("movdqa %0, %%xmm1" : : "m"(*data)); break;
	case 2: asm("movdqa %0, %%xmm2" : : "m"(*data)); break;
	case 3: asm("movdqa %0, %%xmm3" : : "m"(*data)); break;
	case 4: asm("movdqa %0, %%xmm4" : : "m"(*data)); break;
	case 5: asm("movdqa %0, %%xmm5" : : "m"(*data)); break;
	case 6: asm("movdqa %0, %%xmm6" : : "m"(*data)); break;
	case 7: asm("movdqa %0, %%xmm7" : : "m"(*data)); break;
A
Avi Kivity 已提交
987
#ifdef CONFIG_X86_64
988 989 990 991 992 993 994 995
	case 8: asm("movdqa %0, %%xmm8" : : "m"(*data)); break;
	case 9: asm("movdqa %0, %%xmm9" : : "m"(*data)); break;
	case 10: asm("movdqa %0, %%xmm10" : : "m"(*data)); break;
	case 11: asm("movdqa %0, %%xmm11" : : "m"(*data)); break;
	case 12: asm("movdqa %0, %%xmm12" : : "m"(*data)); break;
	case 13: asm("movdqa %0, %%xmm13" : : "m"(*data)); break;
	case 14: asm("movdqa %0, %%xmm14" : : "m"(*data)); break;
	case 15: asm("movdqa %0, %%xmm15" : : "m"(*data)); break;
A
Avi Kivity 已提交
996 997 998 999 1000 1001
#endif
	default: BUG();
	}
	ctxt->ops->put_fpu(ctxt);
}

A
Avi Kivity 已提交
1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035
static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
{
	ctxt->ops->get_fpu(ctxt);
	switch (reg) {
	case 0: asm("movq %%mm0, %0" : "=m"(*data)); break;
	case 1: asm("movq %%mm1, %0" : "=m"(*data)); break;
	case 2: asm("movq %%mm2, %0" : "=m"(*data)); break;
	case 3: asm("movq %%mm3, %0" : "=m"(*data)); break;
	case 4: asm("movq %%mm4, %0" : "=m"(*data)); break;
	case 5: asm("movq %%mm5, %0" : "=m"(*data)); break;
	case 6: asm("movq %%mm6, %0" : "=m"(*data)); break;
	case 7: asm("movq %%mm7, %0" : "=m"(*data)); break;
	default: BUG();
	}
	ctxt->ops->put_fpu(ctxt);
}

static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
{
	ctxt->ops->get_fpu(ctxt);
	switch (reg) {
	case 0: asm("movq %0, %%mm0" : : "m"(*data)); break;
	case 1: asm("movq %0, %%mm1" : : "m"(*data)); break;
	case 2: asm("movq %0, %%mm2" : : "m"(*data)); break;
	case 3: asm("movq %0, %%mm3" : : "m"(*data)); break;
	case 4: asm("movq %0, %%mm4" : : "m"(*data)); break;
	case 5: asm("movq %0, %%mm5" : : "m"(*data)); break;
	case 6: asm("movq %0, %%mm6" : : "m"(*data)); break;
	case 7: asm("movq %0, %%mm7" : : "m"(*data)); break;
	default: BUG();
	}
	ctxt->ops->put_fpu(ctxt);
}

1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082
static int em_fninit(struct x86_emulate_ctxt *ctxt)
{
	if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
		return emulate_nm(ctxt);

	ctxt->ops->get_fpu(ctxt);
	asm volatile("fninit");
	ctxt->ops->put_fpu(ctxt);
	return X86EMUL_CONTINUE;
}

static int em_fnstcw(struct x86_emulate_ctxt *ctxt)
{
	u16 fcw;

	if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
		return emulate_nm(ctxt);

	ctxt->ops->get_fpu(ctxt);
	asm volatile("fnstcw %0": "+m"(fcw));
	ctxt->ops->put_fpu(ctxt);

	/* force 2 byte destination */
	ctxt->dst.bytes = 2;
	ctxt->dst.val = fcw;

	return X86EMUL_CONTINUE;
}

static int em_fnstsw(struct x86_emulate_ctxt *ctxt)
{
	u16 fsw;

	if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
		return emulate_nm(ctxt);

	ctxt->ops->get_fpu(ctxt);
	asm volatile("fnstsw %0": "+m"(fsw));
	ctxt->ops->put_fpu(ctxt);

	/* force 2 byte destination */
	ctxt->dst.bytes = 2;
	ctxt->dst.val = fsw;

	return X86EMUL_CONTINUE;
}

A
Avi Kivity 已提交
1083
static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
1084
				    struct operand *op)
1085
{
1086
	unsigned reg = ctxt->modrm_reg;
1087

1088 1089
	if (!(ctxt->d & ModRM))
		reg = (ctxt->b & 7) | ((ctxt->rex_prefix & 1) << 3);
A
Avi Kivity 已提交
1090

1091
	if (ctxt->d & Sse) {
A
Avi Kivity 已提交
1092 1093 1094 1095 1096 1097
		op->type = OP_XMM;
		op->bytes = 16;
		op->addr.xmm = reg;
		read_sse_reg(ctxt, &op->vec_val, reg);
		return;
	}
A
Avi Kivity 已提交
1098 1099 1100 1101 1102 1103 1104
	if (ctxt->d & Mmx) {
		reg &= 7;
		op->type = OP_MM;
		op->bytes = 8;
		op->addr.mm = reg;
		return;
	}
A
Avi Kivity 已提交
1105

1106
	op->type = OP_REG;
1107 1108 1109
	op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
	op->addr.reg = decode_register(ctxt, reg, ctxt->d & ByteOp);

1110
	fetch_register_operand(op);
1111 1112 1113
	op->orig_val = op->val;
}

1114 1115 1116 1117 1118 1119
static void adjust_modrm_seg(struct x86_emulate_ctxt *ctxt, int base_reg)
{
	if (base_reg == VCPU_REGS_RSP || base_reg == VCPU_REGS_RBP)
		ctxt->modrm_seg = VCPU_SREG_SS;
}

1120
static int decode_modrm(struct x86_emulate_ctxt *ctxt,
1121
			struct operand *op)
1122 1123
{
	u8 sib;
B
Bandan Das 已提交
1124
	int index_reg, base_reg, scale;
1125
	int rc = X86EMUL_CONTINUE;
1126
	ulong modrm_ea = 0;
1127

B
Bandan Das 已提交
1128 1129 1130
	ctxt->modrm_reg = ((ctxt->rex_prefix << 1) & 8); /* REX.R */
	index_reg = (ctxt->rex_prefix << 2) & 8; /* REX.X */
	base_reg = (ctxt->rex_prefix << 3) & 8; /* REX.B */
1131

B
Bandan Das 已提交
1132
	ctxt->modrm_mod = (ctxt->modrm & 0xc0) >> 6;
1133
	ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3;
B
Bandan Das 已提交
1134
	ctxt->modrm_rm = base_reg | (ctxt->modrm & 0x07);
1135
	ctxt->modrm_seg = VCPU_SREG_DS;
1136

1137
	if (ctxt->modrm_mod == 3 || (ctxt->d & NoMod)) {
1138
		op->type = OP_REG;
1139
		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
1140
		op->addr.reg = decode_register(ctxt, ctxt->modrm_rm,
1141
				ctxt->d & ByteOp);
1142
		if (ctxt->d & Sse) {
A
Avi Kivity 已提交
1143 1144
			op->type = OP_XMM;
			op->bytes = 16;
1145 1146
			op->addr.xmm = ctxt->modrm_rm;
			read_sse_reg(ctxt, &op->vec_val, ctxt->modrm_rm);
A
Avi Kivity 已提交
1147 1148
			return rc;
		}
A
Avi Kivity 已提交
1149 1150 1151
		if (ctxt->d & Mmx) {
			op->type = OP_MM;
			op->bytes = 8;
1152
			op->addr.mm = ctxt->modrm_rm & 7;
A
Avi Kivity 已提交
1153 1154
			return rc;
		}
1155
		fetch_register_operand(op);
1156 1157 1158
		return rc;
	}

1159 1160
	op->type = OP_MEM;

1161
	if (ctxt->ad_bytes == 2) {
1162 1163 1164 1165
		unsigned bx = reg_read(ctxt, VCPU_REGS_RBX);
		unsigned bp = reg_read(ctxt, VCPU_REGS_RBP);
		unsigned si = reg_read(ctxt, VCPU_REGS_RSI);
		unsigned di = reg_read(ctxt, VCPU_REGS_RDI);
1166 1167

		/* 16-bit ModR/M decode. */
1168
		switch (ctxt->modrm_mod) {
1169
		case 0:
1170
			if (ctxt->modrm_rm == 6)
1171
				modrm_ea += insn_fetch(u16, ctxt);
1172 1173
			break;
		case 1:
1174
			modrm_ea += insn_fetch(s8, ctxt);
1175 1176
			break;
		case 2:
1177
			modrm_ea += insn_fetch(u16, ctxt);
1178 1179
			break;
		}
1180
		switch (ctxt->modrm_rm) {
1181
		case 0:
1182
			modrm_ea += bx + si;
1183 1184
			break;
		case 1:
1185
			modrm_ea += bx + di;
1186 1187
			break;
		case 2:
1188
			modrm_ea += bp + si;
1189 1190
			break;
		case 3:
1191
			modrm_ea += bp + di;
1192 1193
			break;
		case 4:
1194
			modrm_ea += si;
1195 1196
			break;
		case 5:
1197
			modrm_ea += di;
1198 1199
			break;
		case 6:
1200
			if (ctxt->modrm_mod != 0)
1201
				modrm_ea += bp;
1202 1203
			break;
		case 7:
1204
			modrm_ea += bx;
1205 1206
			break;
		}
1207 1208 1209
		if (ctxt->modrm_rm == 2 || ctxt->modrm_rm == 3 ||
		    (ctxt->modrm_rm == 6 && ctxt->modrm_mod != 0))
			ctxt->modrm_seg = VCPU_SREG_SS;
1210
		modrm_ea = (u16)modrm_ea;
1211 1212
	} else {
		/* 32/64-bit ModR/M decode. */
1213
		if ((ctxt->modrm_rm & 7) == 4) {
1214
			sib = insn_fetch(u8, ctxt);
1215 1216 1217 1218
			index_reg |= (sib >> 3) & 7;
			base_reg |= sib & 7;
			scale = sib >> 6;

1219
			if ((base_reg & 7) == 5 && ctxt->modrm_mod == 0)
1220
				modrm_ea += insn_fetch(s32, ctxt);
1221
			else {
1222
				modrm_ea += reg_read(ctxt, base_reg);
1223 1224
				adjust_modrm_seg(ctxt, base_reg);
			}
1225
			if (index_reg != 4)
1226
				modrm_ea += reg_read(ctxt, index_reg) << scale;
1227
		} else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) {
1228
			modrm_ea += insn_fetch(s32, ctxt);
1229
			if (ctxt->mode == X86EMUL_MODE_PROT64)
1230
				ctxt->rip_relative = 1;
1231 1232
		} else {
			base_reg = ctxt->modrm_rm;
1233
			modrm_ea += reg_read(ctxt, base_reg);
1234 1235
			adjust_modrm_seg(ctxt, base_reg);
		}
1236
		switch (ctxt->modrm_mod) {
1237
		case 1:
1238
			modrm_ea += insn_fetch(s8, ctxt);
1239 1240
			break;
		case 2:
1241
			modrm_ea += insn_fetch(s32, ctxt);
1242 1243 1244
			break;
		}
	}
1245
	op->addr.mem.ea = modrm_ea;
1246 1247 1248
	if (ctxt->ad_bytes != 8)
		ctxt->memop.addr.mem.ea = (u32)ctxt->memop.addr.mem.ea;

1249 1250 1251 1252 1253
done:
	return rc;
}

static int decode_abs(struct x86_emulate_ctxt *ctxt,
1254
		      struct operand *op)
1255
{
1256
	int rc = X86EMUL_CONTINUE;
1257

1258
	op->type = OP_MEM;
1259
	switch (ctxt->ad_bytes) {
1260
	case 2:
1261
		op->addr.mem.ea = insn_fetch(u16, ctxt);
1262 1263
		break;
	case 4:
1264
		op->addr.mem.ea = insn_fetch(u32, ctxt);
1265 1266
		break;
	case 8:
1267
		op->addr.mem.ea = insn_fetch(u64, ctxt);
1268 1269 1270 1271 1272 1273
		break;
	}
done:
	return rc;
}

1274
static void fetch_bit_operand(struct x86_emulate_ctxt *ctxt)
1275
{
1276
	long sv = 0, mask;
1277

1278
	if (ctxt->dst.type == OP_MEM && ctxt->src.type == OP_REG) {
1279
		mask = ~((long)ctxt->dst.bytes * 8 - 1);
1280

1281 1282 1283 1284
		if (ctxt->src.bytes == 2)
			sv = (s16)ctxt->src.val & (s16)mask;
		else if (ctxt->src.bytes == 4)
			sv = (s32)ctxt->src.val & (s32)mask;
1285 1286
		else
			sv = (s64)ctxt->src.val & (s64)mask;
1287

1288 1289
		ctxt->dst.addr.mem.ea = address_mask(ctxt,
					   ctxt->dst.addr.mem.ea + (sv >> 3));
1290
	}
1291 1292

	/* only subword offset */
1293
	ctxt->src.val &= (ctxt->dst.bytes << 3) - 1;
1294 1295
}

1296 1297
static int read_emulated(struct x86_emulate_ctxt *ctxt,
			 unsigned long addr, void *dest, unsigned size)
A
Avi Kivity 已提交
1298
{
1299
	int rc;
1300
	struct read_cache *mc = &ctxt->mem_read;
A
Avi Kivity 已提交
1301

1302 1303
	if (mc->pos < mc->end)
		goto read_cached;
A
Avi Kivity 已提交
1304

1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316
	WARN_ON((mc->end + size) >= sizeof(mc->data));

	rc = ctxt->ops->read_emulated(ctxt, addr, mc->data + mc->end, size,
				      &ctxt->exception);
	if (rc != X86EMUL_CONTINUE)
		return rc;

	mc->end += size;

read_cached:
	memcpy(dest, mc->data + mc->pos, size);
	mc->pos += size;
1317 1318
	return X86EMUL_CONTINUE;
}
A
Avi Kivity 已提交
1319

1320 1321 1322 1323 1324
static int segmented_read(struct x86_emulate_ctxt *ctxt,
			  struct segmented_address addr,
			  void *data,
			  unsigned size)
{
1325 1326 1327
	int rc;
	ulong linear;

1328
	rc = linearize(ctxt, addr, size, false, &linear);
1329 1330
	if (rc != X86EMUL_CONTINUE)
		return rc;
1331
	return read_emulated(ctxt, linear, data, size);
1332 1333 1334 1335 1336 1337 1338
}

static int segmented_write(struct x86_emulate_ctxt *ctxt,
			   struct segmented_address addr,
			   const void *data,
			   unsigned size)
{
1339 1340 1341
	int rc;
	ulong linear;

1342
	rc = linearize(ctxt, addr, size, true, &linear);
1343 1344
	if (rc != X86EMUL_CONTINUE)
		return rc;
1345 1346
	return ctxt->ops->write_emulated(ctxt, linear, data, size,
					 &ctxt->exception);
1347 1348 1349 1350 1351 1352 1353
}

static int segmented_cmpxchg(struct x86_emulate_ctxt *ctxt,
			     struct segmented_address addr,
			     const void *orig_data, const void *data,
			     unsigned size)
{
1354 1355 1356
	int rc;
	ulong linear;

1357
	rc = linearize(ctxt, addr, size, true, &linear);
1358 1359
	if (rc != X86EMUL_CONTINUE)
		return rc;
1360 1361
	return ctxt->ops->cmpxchg_emulated(ctxt, linear, orig_data, data,
					   size, &ctxt->exception);
1362 1363
}

1364 1365 1366 1367
static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
			   unsigned int size, unsigned short port,
			   void *dest)
{
1368
	struct read_cache *rc = &ctxt->io_read;
1369

1370 1371
	if (rc->pos == rc->end) { /* refill pio read ahead */
		unsigned int in_page, n;
1372
		unsigned int count = ctxt->rep_prefix ?
1373
			address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) : 1;
1374
		in_page = (ctxt->eflags & EFLG_DF) ?
1375 1376
			offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) :
			PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI));
1377
		n = min3(in_page, (unsigned int)sizeof(rc->data) / size, count);
1378 1379 1380
		if (n == 0)
			n = 1;
		rc->pos = rc->end = 0;
1381
		if (!ctxt->ops->pio_in_emulated(ctxt, size, port, rc->data, n))
1382 1383
			return 0;
		rc->end = n * size;
A
Avi Kivity 已提交
1384 1385
	}

1386 1387
	if (ctxt->rep_prefix && (ctxt->d & String) &&
	    !(ctxt->eflags & EFLG_DF)) {
1388 1389 1390 1391 1392 1393 1394 1395
		ctxt->dst.data = rc->data + rc->pos;
		ctxt->dst.type = OP_MEM_STR;
		ctxt->dst.count = (rc->end - rc->pos) / size;
		rc->pos = rc->end;
	} else {
		memcpy(dest, rc->data + rc->pos, size);
		rc->pos += size;
	}
1396 1397
	return 1;
}
A
Avi Kivity 已提交
1398

1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414
static int read_interrupt_descriptor(struct x86_emulate_ctxt *ctxt,
				     u16 index, struct desc_struct *desc)
{
	struct desc_ptr dt;
	ulong addr;

	ctxt->ops->get_idt(ctxt, &dt);

	if (dt.size < index * 8 + 7)
		return emulate_gp(ctxt, index << 3 | 0x2);

	addr = dt.address + index * 8;
	return ctxt->ops->read_std(ctxt, addr, desc, sizeof *desc,
				   &ctxt->exception);
}

1415 1416 1417
static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
				     u16 selector, struct desc_ptr *dt)
{
1418
	const struct x86_emulate_ops *ops = ctxt->ops;
1419
	u32 base3 = 0;
1420

1421 1422
	if (selector & 1 << 2) {
		struct desc_struct desc;
1423 1424
		u16 sel;

1425
		memset (dt, 0, sizeof *dt);
1426 1427
		if (!ops->get_segment(ctxt, &sel, &desc, &base3,
				      VCPU_SREG_LDTR))
1428
			return;
1429

1430
		dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */
1431
		dt->address = get_desc_base(&desc) | ((u64)base3 << 32);
1432
	} else
1433
		ops->get_gdt(ctxt, dt);
1434
}
1435

1436 1437
/* allowed just for 8 bytes segments */
static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1438 1439
				   u16 selector, struct desc_struct *desc,
				   ulong *desc_addr_p)
1440 1441 1442 1443
{
	struct desc_ptr dt;
	u16 index = selector >> 3;
	ulong addr;
1444

1445
	get_descriptor_table_ptr(ctxt, selector, &dt);
1446

1447 1448
	if (dt.size < index * 8 + 7)
		return emulate_gp(ctxt, selector & 0xfffc);
1449

1450
	*desc_addr_p = addr = dt.address + index * 8;
1451 1452
	return ctxt->ops->read_std(ctxt, addr, desc, sizeof *desc,
				   &ctxt->exception);
1453
}
1454

1455 1456 1457 1458 1459 1460 1461
/* allowed just for 8 bytes segments */
static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
				    u16 selector, struct desc_struct *desc)
{
	struct desc_ptr dt;
	u16 index = selector >> 3;
	ulong addr;
A
Avi Kivity 已提交
1462

1463
	get_descriptor_table_ptr(ctxt, selector, &dt);
1464

1465 1466
	if (dt.size < index * 8 + 7)
		return emulate_gp(ctxt, selector & 0xfffc);
A
Avi Kivity 已提交
1467

1468
	addr = dt.address + index * 8;
1469 1470
	return ctxt->ops->write_std(ctxt, addr, desc, sizeof *desc,
				    &ctxt->exception);
1471
}
1472

1473
/* Does not support long mode */
1474
static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1475 1476 1477
				     u16 selector, int seg, u8 cpl,
				     bool in_task_switch,
				     struct desc_struct *desc)
1478
{
1479
	struct desc_struct seg_desc, old_desc;
1480
	u8 dpl, rpl;
1481 1482 1483
	unsigned err_vec = GP_VECTOR;
	u32 err_code = 0;
	bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
1484
	ulong desc_addr;
1485
	int ret;
1486
	u16 dummy;
1487
	u32 base3 = 0;
1488

1489
	memset(&seg_desc, 0, sizeof seg_desc);
1490

1491 1492 1493
	if (ctxt->mode == X86EMUL_MODE_REAL) {
		/* set real mode segment descriptor (keep limit etc. for
		 * unreal mode) */
1494
		ctxt->ops->get_segment(ctxt, &dummy, &seg_desc, NULL, seg);
1495 1496
		set_desc_base(&seg_desc, selector << 4);
		goto load;
1497 1498 1499 1500 1501 1502 1503 1504 1505
	} else if (seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) {
		/* VM86 needs a clean new segment descriptor */
		set_desc_base(&seg_desc, selector << 4);
		set_desc_limit(&seg_desc, 0xffff);
		seg_desc.type = 3;
		seg_desc.p = 1;
		seg_desc.s = 1;
		seg_desc.dpl = 3;
		goto load;
1506 1507
	}

1508 1509 1510 1511 1512 1513 1514
	rpl = selector & 3;

	/* NULL selector is not valid for TR, CS and SS (except for long mode) */
	if ((seg == VCPU_SREG_CS
	     || (seg == VCPU_SREG_SS
		 && (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl))
	     || seg == VCPU_SREG_TR)
1515 1516 1517 1518 1519 1520 1521 1522 1523 1524
	    && null_selector)
		goto exception;

	/* TR should be in GDT only */
	if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
		goto exception;

	if (null_selector) /* for NULL selector skip all following checks */
		goto load;

1525
	ret = read_segment_descriptor(ctxt, selector, &seg_desc, &desc_addr);
1526 1527 1528 1529
	if (ret != X86EMUL_CONTINUE)
		return ret;

	err_code = selector & 0xfffc;
1530
	err_vec = in_task_switch ? TS_VECTOR : GP_VECTOR;
1531

G
Guo Chao 已提交
1532
	/* can't load system descriptor into segment selector */
1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550
	if (seg <= VCPU_SREG_GS && !seg_desc.s)
		goto exception;

	if (!seg_desc.p) {
		err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
		goto exception;
	}

	dpl = seg_desc.dpl;

	switch (seg) {
	case VCPU_SREG_SS:
		/*
		 * segment is not a writable data segment or segment
		 * selector's RPL != CPL or segment selector's RPL != CPL
		 */
		if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl)
			goto exception;
A
Avi Kivity 已提交
1551
		break;
1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564
	case VCPU_SREG_CS:
		if (!(seg_desc.type & 8))
			goto exception;

		if (seg_desc.type & 4) {
			/* conforming */
			if (dpl > cpl)
				goto exception;
		} else {
			/* nonconforming */
			if (rpl > cpl || dpl != cpl)
				goto exception;
		}
1565 1566 1567 1568 1569 1570 1571 1572 1573
		/* in long-mode d/b must be clear if l is set */
		if (seg_desc.d && seg_desc.l) {
			u64 efer = 0;

			ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
			if (efer & EFER_LMA)
				goto exception;
		}

1574 1575
		/* CS(RPL) <- CPL */
		selector = (selector & 0xfffc) | cpl;
A
Avi Kivity 已提交
1576
		break;
1577 1578 1579
	case VCPU_SREG_TR:
		if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9))
			goto exception;
1580 1581 1582 1583 1584 1585
		old_desc = seg_desc;
		seg_desc.type |= 2; /* busy */
		ret = ctxt->ops->cmpxchg_emulated(ctxt, desc_addr, &old_desc, &seg_desc,
						  sizeof(seg_desc), &ctxt->exception);
		if (ret != X86EMUL_CONTINUE)
			return ret;
1586 1587 1588 1589 1590 1591
		break;
	case VCPU_SREG_LDTR:
		if (seg_desc.s || seg_desc.type != 2)
			goto exception;
		break;
	default: /*  DS, ES, FS, or GS */
1592
		/*
1593 1594 1595
		 * segment is not a data or readable code segment or
		 * ((segment is a data or nonconforming code segment)
		 * and (both RPL and CPL > DPL))
1596
		 */
1597 1598 1599 1600
		if ((seg_desc.type & 0xa) == 0x8 ||
		    (((seg_desc.type & 0xc) != 0xc) &&
		     (rpl > dpl && cpl > dpl)))
			goto exception;
A
Avi Kivity 已提交
1601
		break;
1602 1603 1604 1605 1606
	}

	if (seg_desc.s) {
		/* mark segment as accessed */
		seg_desc.type |= 1;
1607
		ret = write_segment_descriptor(ctxt, selector, &seg_desc);
1608 1609
		if (ret != X86EMUL_CONTINUE)
			return ret;
1610 1611 1612 1613 1614
	} else if (ctxt->mode == X86EMUL_MODE_PROT64) {
		ret = ctxt->ops->read_std(ctxt, desc_addr+8, &base3,
				sizeof(base3), &ctxt->exception);
		if (ret != X86EMUL_CONTINUE)
			return ret;
1615 1616 1617
		if (is_noncanonical_address(get_desc_base(&seg_desc) |
					     ((u64)base3 << 32)))
			return emulate_gp(ctxt, 0);
1618 1619
	}
load:
1620
	ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
1621 1622
	if (desc)
		*desc = seg_desc;
1623 1624
	return X86EMUL_CONTINUE;
exception:
1625
	return emulate_exception(ctxt, err_vec, err_code, true);
1626 1627
}

1628 1629 1630 1631
static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
				   u16 selector, int seg)
{
	u8 cpl = ctxt->ops->cpl(ctxt);
1632
	return __load_segment_descriptor(ctxt, selector, seg, cpl, false, NULL);
1633 1634
}

1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653
static void write_register_operand(struct operand *op)
{
	/* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
	switch (op->bytes) {
	case 1:
		*(u8 *)op->addr.reg = (u8)op->val;
		break;
	case 2:
		*(u16 *)op->addr.reg = (u16)op->val;
		break;
	case 4:
		*op->addr.reg = (u32)op->val;
		break;	/* 64b: zero-extend */
	case 8:
		*op->addr.reg = op->val;
		break;
	}
}

1654
static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op)
1655
{
1656
	switch (op->type) {
1657
	case OP_REG:
1658
		write_register_operand(op);
A
Avi Kivity 已提交
1659
		break;
1660
	case OP_MEM:
1661
		if (ctxt->lock_prefix)
P
Paolo Bonzini 已提交
1662 1663 1664 1665 1666 1667 1668
			return segmented_cmpxchg(ctxt,
						 op->addr.mem,
						 &op->orig_val,
						 &op->val,
						 op->bytes);
		else
			return segmented_write(ctxt,
1669 1670 1671
					       op->addr.mem,
					       &op->val,
					       op->bytes);
1672
		break;
1673
	case OP_MEM_STR:
P
Paolo Bonzini 已提交
1674 1675 1676 1677
		return segmented_write(ctxt,
				       op->addr.mem,
				       op->data,
				       op->bytes * op->count);
1678
		break;
A
Avi Kivity 已提交
1679
	case OP_XMM:
1680
		write_sse_reg(ctxt, &op->vec_val, op->addr.xmm);
A
Avi Kivity 已提交
1681
		break;
A
Avi Kivity 已提交
1682
	case OP_MM:
1683
		write_mmx_reg(ctxt, &op->mm_val, op->addr.mm);
A
Avi Kivity 已提交
1684
		break;
1685 1686
	case OP_NONE:
		/* no writeback */
1687
		break;
1688
	default:
1689
		break;
A
Avi Kivity 已提交
1690
	}
1691 1692
	return X86EMUL_CONTINUE;
}
A
Avi Kivity 已提交
1693

1694
static int push(struct x86_emulate_ctxt *ctxt, void *data, int bytes)
1695
{
1696
	struct segmented_address addr;
1697

1698
	rsp_increment(ctxt, -bytes);
1699
	addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
1700 1701
	addr.seg = VCPU_SREG_SS;

1702 1703 1704 1705 1706
	return segmented_write(ctxt, addr, data, bytes);
}

static int em_push(struct x86_emulate_ctxt *ctxt)
{
1707
	/* Disable writeback. */
1708
	ctxt->dst.type = OP_NONE;
1709
	return push(ctxt, &ctxt->src.val, ctxt->op_bytes);
1710
}
1711

1712 1713 1714 1715
static int emulate_pop(struct x86_emulate_ctxt *ctxt,
		       void *dest, int len)
{
	int rc;
1716
	struct segmented_address addr;
1717

1718
	addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
1719
	addr.seg = VCPU_SREG_SS;
1720
	rc = segmented_read(ctxt, addr, dest, len);
1721 1722 1723
	if (rc != X86EMUL_CONTINUE)
		return rc;

1724
	rsp_increment(ctxt, len);
1725
	return rc;
1726 1727
}

1728 1729
static int em_pop(struct x86_emulate_ctxt *ctxt)
{
1730
	return emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes);
1731 1732
}

1733
static int emulate_popf(struct x86_emulate_ctxt *ctxt,
1734
			void *dest, int len)
1735 1736
{
	int rc;
1737 1738
	unsigned long val, change_mask;
	int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
1739
	int cpl = ctxt->ops->cpl(ctxt);
1740

1741
	rc = emulate_pop(ctxt, &val, len);
1742 1743
	if (rc != X86EMUL_CONTINUE)
		return rc;
1744

1745
	change_mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_OF
1746
		| EFLG_TF | EFLG_DF | EFLG_NT | EFLG_AC | EFLG_ID;
1747

1748 1749 1750 1751 1752 1753 1754 1755 1756 1757
	switch(ctxt->mode) {
	case X86EMUL_MODE_PROT64:
	case X86EMUL_MODE_PROT32:
	case X86EMUL_MODE_PROT16:
		if (cpl == 0)
			change_mask |= EFLG_IOPL;
		if (cpl <= iopl)
			change_mask |= EFLG_IF;
		break;
	case X86EMUL_MODE_VM86:
1758 1759
		if (iopl < 3)
			return emulate_gp(ctxt, 0);
1760 1761 1762 1763 1764
		change_mask |= EFLG_IF;
		break;
	default: /* real mode */
		change_mask |= (EFLG_IOPL | EFLG_IF);
		break;
1765
	}
1766 1767 1768 1769 1770

	*(unsigned long *)dest =
		(ctxt->eflags & ~change_mask) | (val & change_mask);

	return rc;
1771 1772
}

1773 1774
static int em_popf(struct x86_emulate_ctxt *ctxt)
{
1775 1776 1777 1778
	ctxt->dst.type = OP_REG;
	ctxt->dst.addr.reg = &ctxt->eflags;
	ctxt->dst.bytes = ctxt->op_bytes;
	return emulate_popf(ctxt, &ctxt->dst.val, ctxt->op_bytes);
1779 1780
}

A
Avi Kivity 已提交
1781 1782 1783 1784 1785
static int em_enter(struct x86_emulate_ctxt *ctxt)
{
	int rc;
	unsigned frame_size = ctxt->src.val;
	unsigned nesting_level = ctxt->src2.val & 31;
1786
	ulong rbp;
A
Avi Kivity 已提交
1787 1788 1789 1790

	if (nesting_level)
		return X86EMUL_UNHANDLEABLE;

1791 1792
	rbp = reg_read(ctxt, VCPU_REGS_RBP);
	rc = push(ctxt, &rbp, stack_size(ctxt));
A
Avi Kivity 已提交
1793 1794
	if (rc != X86EMUL_CONTINUE)
		return rc;
1795
	assign_masked(reg_rmw(ctxt, VCPU_REGS_RBP), reg_read(ctxt, VCPU_REGS_RSP),
A
Avi Kivity 已提交
1796
		      stack_mask(ctxt));
1797 1798
	assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP),
		      reg_read(ctxt, VCPU_REGS_RSP) - frame_size,
A
Avi Kivity 已提交
1799 1800 1801 1802
		      stack_mask(ctxt));
	return X86EMUL_CONTINUE;
}

A
Avi Kivity 已提交
1803 1804
static int em_leave(struct x86_emulate_ctxt *ctxt)
{
1805
	assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP), reg_read(ctxt, VCPU_REGS_RBP),
A
Avi Kivity 已提交
1806
		      stack_mask(ctxt));
1807
	return emulate_pop(ctxt, reg_rmw(ctxt, VCPU_REGS_RBP), ctxt->op_bytes);
A
Avi Kivity 已提交
1808 1809
}

1810
static int em_push_sreg(struct x86_emulate_ctxt *ctxt)
1811
{
1812 1813
	int seg = ctxt->src2.val;

1814
	ctxt->src.val = get_segment_selector(ctxt, seg);
1815 1816 1817 1818
	if (ctxt->op_bytes == 4) {
		rsp_increment(ctxt, -2);
		ctxt->op_bytes = 2;
	}
1819

1820
	return em_push(ctxt);
1821 1822
}

1823
static int em_pop_sreg(struct x86_emulate_ctxt *ctxt)
1824
{
1825
	int seg = ctxt->src2.val;
1826 1827
	unsigned long selector;
	int rc;
1828

1829
	rc = emulate_pop(ctxt, &selector, ctxt->op_bytes);
1830 1831 1832
	if (rc != X86EMUL_CONTINUE)
		return rc;

1833 1834 1835
	if (ctxt->modrm_reg == VCPU_SREG_SS)
		ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;

1836
	rc = load_segment_descriptor(ctxt, (u16)selector, seg);
1837
	return rc;
1838 1839
}

1840
static int em_pusha(struct x86_emulate_ctxt *ctxt)
1841
{
1842
	unsigned long old_esp = reg_read(ctxt, VCPU_REGS_RSP);
1843 1844
	int rc = X86EMUL_CONTINUE;
	int reg = VCPU_REGS_RAX;
1845

1846 1847
	while (reg <= VCPU_REGS_RDI) {
		(reg == VCPU_REGS_RSP) ?
1848
		(ctxt->src.val = old_esp) : (ctxt->src.val = reg_read(ctxt, reg));
1849

1850
		rc = em_push(ctxt);
1851 1852
		if (rc != X86EMUL_CONTINUE)
			return rc;
1853

1854
		++reg;
1855 1856
	}

1857
	return rc;
1858 1859
}

1860 1861
static int em_pushf(struct x86_emulate_ctxt *ctxt)
{
1862
	ctxt->src.val =  (unsigned long)ctxt->eflags;
1863 1864 1865
	return em_push(ctxt);
}

1866
static int em_popa(struct x86_emulate_ctxt *ctxt)
1867
{
1868 1869
	int rc = X86EMUL_CONTINUE;
	int reg = VCPU_REGS_RDI;
1870

1871 1872
	while (reg >= VCPU_REGS_RAX) {
		if (reg == VCPU_REGS_RSP) {
1873
			rsp_increment(ctxt, ctxt->op_bytes);
1874 1875
			--reg;
		}
1876

1877
		rc = emulate_pop(ctxt, reg_rmw(ctxt, reg), ctxt->op_bytes);
1878 1879 1880
		if (rc != X86EMUL_CONTINUE)
			break;
		--reg;
1881
	}
1882
	return rc;
1883 1884
}

1885
static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
1886
{
1887
	const struct x86_emulate_ops *ops = ctxt->ops;
1888
	int rc;
1889 1890 1891 1892 1893 1894
	struct desc_ptr dt;
	gva_t cs_addr;
	gva_t eip_addr;
	u16 cs, eip;

	/* TODO: Add limit checks */
1895
	ctxt->src.val = ctxt->eflags;
1896
	rc = em_push(ctxt);
1897 1898
	if (rc != X86EMUL_CONTINUE)
		return rc;
1899 1900 1901

	ctxt->eflags &= ~(EFLG_IF | EFLG_TF | EFLG_AC);

1902
	ctxt->src.val = get_segment_selector(ctxt, VCPU_SREG_CS);
1903
	rc = em_push(ctxt);
1904 1905
	if (rc != X86EMUL_CONTINUE)
		return rc;
1906

1907
	ctxt->src.val = ctxt->_eip;
1908
	rc = em_push(ctxt);
1909 1910 1911
	if (rc != X86EMUL_CONTINUE)
		return rc;

1912
	ops->get_idt(ctxt, &dt);
1913 1914 1915 1916

	eip_addr = dt.address + (irq << 2);
	cs_addr = dt.address + (irq << 2) + 2;

1917
	rc = ops->read_std(ctxt, cs_addr, &cs, 2, &ctxt->exception);
1918 1919 1920
	if (rc != X86EMUL_CONTINUE)
		return rc;

1921
	rc = ops->read_std(ctxt, eip_addr, &eip, 2, &ctxt->exception);
1922 1923 1924
	if (rc != X86EMUL_CONTINUE)
		return rc;

1925
	rc = load_segment_descriptor(ctxt, cs, VCPU_SREG_CS);
1926 1927 1928
	if (rc != X86EMUL_CONTINUE)
		return rc;

1929
	ctxt->_eip = eip;
1930 1931 1932 1933

	return rc;
}

1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944
int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
{
	int rc;

	invalidate_registers(ctxt);
	rc = __emulate_int_real(ctxt, irq);
	if (rc == X86EMUL_CONTINUE)
		writeback_registers(ctxt);
	return rc;
}

1945
static int emulate_int(struct x86_emulate_ctxt *ctxt, int irq)
1946 1947 1948
{
	switch(ctxt->mode) {
	case X86EMUL_MODE_REAL:
1949
		return __emulate_int_real(ctxt, irq);
1950 1951 1952 1953 1954 1955 1956 1957 1958 1959
	case X86EMUL_MODE_VM86:
	case X86EMUL_MODE_PROT16:
	case X86EMUL_MODE_PROT32:
	case X86EMUL_MODE_PROT64:
	default:
		/* Protected mode interrupts unimplemented yet */
		return X86EMUL_UNHANDLEABLE;
	}
}

1960
static int emulate_iret_real(struct x86_emulate_ctxt *ctxt)
1961
{
1962 1963 1964 1965 1966 1967 1968 1969
	int rc = X86EMUL_CONTINUE;
	unsigned long temp_eip = 0;
	unsigned long temp_eflags = 0;
	unsigned long cs = 0;
	unsigned long mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_TF |
			     EFLG_IF | EFLG_DF | EFLG_OF | EFLG_IOPL | EFLG_NT | EFLG_RF |
			     EFLG_AC | EFLG_ID | (1 << 1); /* Last one is the reserved bit */
	unsigned long vm86_mask = EFLG_VM | EFLG_VIF | EFLG_VIP;
1970

1971
	/* TODO: Add stack limit check */
1972

1973
	rc = emulate_pop(ctxt, &temp_eip, ctxt->op_bytes);
1974

1975 1976
	if (rc != X86EMUL_CONTINUE)
		return rc;
1977

1978 1979
	if (temp_eip & ~0xffff)
		return emulate_gp(ctxt, 0);
1980

1981
	rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
1982

1983 1984
	if (rc != X86EMUL_CONTINUE)
		return rc;
1985

1986
	rc = emulate_pop(ctxt, &temp_eflags, ctxt->op_bytes);
1987

1988 1989
	if (rc != X86EMUL_CONTINUE)
		return rc;
1990

1991
	rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS);
1992

1993 1994
	if (rc != X86EMUL_CONTINUE)
		return rc;
1995

1996
	ctxt->_eip = temp_eip;
1997 1998


1999
	if (ctxt->op_bytes == 4)
2000
		ctxt->eflags = ((temp_eflags & mask) | (ctxt->eflags & vm86_mask));
2001
	else if (ctxt->op_bytes == 2) {
2002 2003
		ctxt->eflags &= ~0xffff;
		ctxt->eflags |= temp_eflags;
2004
	}
2005 2006 2007 2008 2009

	ctxt->eflags &= ~EFLG_RESERVED_ZEROS_MASK; /* Clear reserved zeros */
	ctxt->eflags |= EFLG_RESERVED_ONE_MASK;

	return rc;
2010 2011
}

2012
static int em_iret(struct x86_emulate_ctxt *ctxt)
2013
{
2014 2015
	switch(ctxt->mode) {
	case X86EMUL_MODE_REAL:
2016
		return emulate_iret_real(ctxt);
2017 2018 2019 2020
	case X86EMUL_MODE_VM86:
	case X86EMUL_MODE_PROT16:
	case X86EMUL_MODE_PROT32:
	case X86EMUL_MODE_PROT64:
2021
	default:
2022 2023
		/* iret from protected mode unimplemented yet */
		return X86EMUL_UNHANDLEABLE;
2024 2025 2026
	}
}

2027 2028 2029
static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
{
	int rc;
2030 2031 2032 2033 2034 2035 2036 2037 2038
	unsigned short sel, old_sel;
	struct desc_struct old_desc, new_desc;
	const struct x86_emulate_ops *ops = ctxt->ops;
	u8 cpl = ctxt->ops->cpl(ctxt);

	/* Assignment of RIP may only fail in 64-bit mode */
	if (ctxt->mode == X86EMUL_MODE_PROT64)
		ops->get_segment(ctxt, &old_sel, &old_desc, NULL,
				 VCPU_SREG_CS);
2039

2040
	memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2041

2042 2043
	rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, false,
				       &new_desc);
2044 2045 2046
	if (rc != X86EMUL_CONTINUE)
		return rc;

2047
	rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
2048
	if (rc != X86EMUL_CONTINUE) {
2049
		WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64);
2050 2051 2052 2053 2054
		/* assigning eip failed; restore the old cs */
		ops->set_segment(ctxt, old_sel, &old_desc, 0, VCPU_SREG_CS);
		return rc;
	}
	return rc;
2055 2056
}

2057
static int em_jmp_abs(struct x86_emulate_ctxt *ctxt)
2058
{
2059 2060
	return assign_eip_near(ctxt, ctxt->src.val);
}
2061

2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072
static int em_call_near_abs(struct x86_emulate_ctxt *ctxt)
{
	int rc;
	long int old_eip;

	old_eip = ctxt->_eip;
	rc = assign_eip_near(ctxt, ctxt->src.val);
	if (rc != X86EMUL_CONTINUE)
		return rc;
	ctxt->src.val = old_eip;
	rc = em_push(ctxt);
2073
	return rc;
2074 2075
}

2076
static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
2077
{
2078
	u64 old = ctxt->dst.orig_val64;
2079

2080 2081 2082
	if (ctxt->dst.bytes == 16)
		return X86EMUL_UNHANDLEABLE;

2083 2084 2085 2086
	if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) ||
	    ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) {
		*reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0);
		*reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32);
2087
		ctxt->eflags &= ~EFLG_ZF;
2088
	} else {
2089 2090
		ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) |
			(u32) reg_read(ctxt, VCPU_REGS_RBX);
2091

2092
		ctxt->eflags |= EFLG_ZF;
2093
	}
2094
	return X86EMUL_CONTINUE;
2095 2096
}

2097 2098
static int em_ret(struct x86_emulate_ctxt *ctxt)
{
2099 2100 2101 2102 2103 2104 2105 2106
	int rc;
	unsigned long eip;

	rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
	if (rc != X86EMUL_CONTINUE)
		return rc;

	return assign_eip_near(ctxt, eip);
2107 2108
}

2109
static int em_ret_far(struct x86_emulate_ctxt *ctxt)
2110 2111
{
	int rc;
2112 2113
	unsigned long eip, cs;
	u16 old_cs;
2114
	int cpl = ctxt->ops->cpl(ctxt);
2115 2116 2117 2118 2119 2120
	struct desc_struct old_desc, new_desc;
	const struct x86_emulate_ops *ops = ctxt->ops;

	if (ctxt->mode == X86EMUL_MODE_PROT64)
		ops->get_segment(ctxt, &old_cs, &old_desc, NULL,
				 VCPU_SREG_CS);
2121

2122
	rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2123
	if (rc != X86EMUL_CONTINUE)
2124
		return rc;
2125
	rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2126
	if (rc != X86EMUL_CONTINUE)
2127
		return rc;
2128 2129 2130
	/* Outer-privilege level return is not implemented */
	if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl)
		return X86EMUL_UNHANDLEABLE;
2131 2132 2133 2134
	rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, 0, false,
				       &new_desc);
	if (rc != X86EMUL_CONTINUE)
		return rc;
2135
	rc = assign_eip_far(ctxt, eip, &new_desc);
2136
	if (rc != X86EMUL_CONTINUE) {
2137
		WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64);
2138 2139
		ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
	}
2140 2141 2142
	return rc;
}

2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153
static int em_ret_far_imm(struct x86_emulate_ctxt *ctxt)
{
        int rc;

        rc = em_ret_far(ctxt);
        if (rc != X86EMUL_CONTINUE)
                return rc;
        rsp_increment(ctxt, ctxt->src.val);
        return X86EMUL_CONTINUE;
}

2154 2155 2156
static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
{
	/* Save real source value, then compare EAX against destination. */
2157 2158
	ctxt->dst.orig_val = ctxt->dst.val;
	ctxt->dst.val = reg_read(ctxt, VCPU_REGS_RAX);
2159
	ctxt->src.orig_val = ctxt->src.val;
2160
	ctxt->src.val = ctxt->dst.orig_val;
2161
	fastop(ctxt, em_cmp);
2162 2163 2164 2165 2166 2167 2168

	if (ctxt->eflags & EFLG_ZF) {
		/* Success: write back to memory. */
		ctxt->dst.val = ctxt->src.orig_val;
	} else {
		/* Failure: write the value we saw to EAX. */
		ctxt->dst.type = OP_REG;
2169
		ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
2170
		ctxt->dst.val = ctxt->dst.orig_val;
2171 2172 2173 2174
	}
	return X86EMUL_CONTINUE;
}

2175
static int em_lseg(struct x86_emulate_ctxt *ctxt)
2176
{
2177
	int seg = ctxt->src2.val;
2178 2179 2180
	unsigned short sel;
	int rc;

2181
	memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2182

2183
	rc = load_segment_descriptor(ctxt, sel, seg);
2184 2185 2186
	if (rc != X86EMUL_CONTINUE)
		return rc;

2187
	ctxt->dst.val = ctxt->src.val;
2188 2189 2190
	return rc;
}

2191
static void
2192
setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
2193
			struct desc_struct *cs, struct desc_struct *ss)
2194 2195
{
	cs->l = 0;		/* will be adjusted later */
2196
	set_desc_base(cs, 0);	/* flat segment */
2197
	cs->g = 1;		/* 4kb granularity */
2198
	set_desc_limit(cs, 0xfffff);	/* 4GB limit */
2199 2200 2201
	cs->type = 0x0b;	/* Read, Execute, Accessed */
	cs->s = 1;
	cs->dpl = 0;		/* will be adjusted later */
2202 2203
	cs->p = 1;
	cs->d = 1;
2204
	cs->avl = 0;
2205

2206 2207
	set_desc_base(ss, 0);	/* flat segment */
	set_desc_limit(ss, 0xfffff);	/* 4GB limit */
2208 2209 2210
	ss->g = 1;		/* 4kb granularity */
	ss->s = 1;
	ss->type = 0x03;	/* Read/Write, Accessed */
2211
	ss->d = 1;		/* 32bit stack segment */
2212
	ss->dpl = 0;
2213
	ss->p = 1;
2214 2215
	ss->l = 0;
	ss->avl = 0;
2216 2217
}

2218 2219 2220 2221 2222
static bool vendor_intel(struct x86_emulate_ctxt *ctxt)
{
	u32 eax, ebx, ecx, edx;

	eax = ecx = 0;
2223 2224
	ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
	return ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx
2225 2226 2227 2228
		&& ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx
		&& edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx;
}

2229 2230
static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt)
{
2231
	const struct x86_emulate_ops *ops = ctxt->ops;
2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242
	u32 eax, ebx, ecx, edx;

	/*
	 * syscall should always be enabled in longmode - so only become
	 * vendor specific (cpuid) if other modes are active...
	 */
	if (ctxt->mode == X86EMUL_MODE_PROT64)
		return true;

	eax = 0x00000000;
	ecx = 0x00000000;
2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267
	ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
	/*
	 * Intel ("GenuineIntel")
	 * remark: Intel CPUs only support "syscall" in 64bit
	 * longmode. Also an 64bit guest with a
	 * 32bit compat-app running will #UD !! While this
	 * behaviour can be fixed (by emulating) into AMD
	 * response - CPUs of AMD can't behave like Intel.
	 */
	if (ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx &&
	    ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx &&
	    edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx)
		return false;

	/* AMD ("AuthenticAMD") */
	if (ebx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx &&
	    ecx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx &&
	    edx == X86EMUL_CPUID_VENDOR_AuthenticAMD_edx)
		return true;

	/* AMD ("AMDisbetter!") */
	if (ebx == X86EMUL_CPUID_VENDOR_AMDisbetterI_ebx &&
	    ecx == X86EMUL_CPUID_VENDOR_AMDisbetterI_ecx &&
	    edx == X86EMUL_CPUID_VENDOR_AMDisbetterI_edx)
		return true;
2268 2269 2270 2271 2272

	/* default: (not Intel, not AMD), apply Intel's stricter rules... */
	return false;
}

2273
static int em_syscall(struct x86_emulate_ctxt *ctxt)
2274
{
2275
	const struct x86_emulate_ops *ops = ctxt->ops;
2276
	struct desc_struct cs, ss;
2277
	u64 msr_data;
2278
	u16 cs_sel, ss_sel;
2279
	u64 efer = 0;
2280 2281

	/* syscall is not available in real mode */
2282
	if (ctxt->mode == X86EMUL_MODE_REAL ||
2283 2284
	    ctxt->mode == X86EMUL_MODE_VM86)
		return emulate_ud(ctxt);
2285

2286 2287 2288
	if (!(em_syscall_is_enabled(ctxt)))
		return emulate_ud(ctxt);

2289
	ops->get_msr(ctxt, MSR_EFER, &efer);
2290
	setup_syscalls_segments(ctxt, &cs, &ss);
2291

2292 2293 2294
	if (!(efer & EFER_SCE))
		return emulate_ud(ctxt);

2295
	ops->get_msr(ctxt, MSR_STAR, &msr_data);
2296
	msr_data >>= 32;
2297 2298
	cs_sel = (u16)(msr_data & 0xfffc);
	ss_sel = (u16)(msr_data + 8);
2299

2300
	if (efer & EFER_LMA) {
2301
		cs.d = 0;
2302 2303
		cs.l = 1;
	}
2304 2305
	ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
	ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2306

2307
	*reg_write(ctxt, VCPU_REGS_RCX) = ctxt->_eip;
2308
	if (efer & EFER_LMA) {
2309
#ifdef CONFIG_X86_64
2310
		*reg_write(ctxt, VCPU_REGS_R11) = ctxt->eflags;
2311

2312
		ops->get_msr(ctxt,
2313 2314
			     ctxt->mode == X86EMUL_MODE_PROT64 ?
			     MSR_LSTAR : MSR_CSTAR, &msr_data);
2315
		ctxt->_eip = msr_data;
2316

2317
		ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data);
2318
		ctxt->eflags &= ~msr_data;
2319
		ctxt->eflags |= EFLG_RESERVED_ONE_MASK;
2320 2321 2322
#endif
	} else {
		/* legacy mode */
2323
		ops->get_msr(ctxt, MSR_STAR, &msr_data);
2324
		ctxt->_eip = (u32)msr_data;
2325

2326
		ctxt->eflags &= ~(EFLG_VM | EFLG_IF);
2327 2328
	}

2329
	return X86EMUL_CONTINUE;
2330 2331
}

2332
static int em_sysenter(struct x86_emulate_ctxt *ctxt)
2333
{
2334
	const struct x86_emulate_ops *ops = ctxt->ops;
2335
	struct desc_struct cs, ss;
2336
	u64 msr_data;
2337
	u16 cs_sel, ss_sel;
2338
	u64 efer = 0;
2339

2340
	ops->get_msr(ctxt, MSR_EFER, &efer);
2341
	/* inject #GP if in real mode */
2342 2343
	if (ctxt->mode == X86EMUL_MODE_REAL)
		return emulate_gp(ctxt, 0);
2344

2345 2346 2347 2348 2349 2350 2351 2352
	/*
	 * Not recognized on AMD in compat mode (but is recognized in legacy
	 * mode).
	 */
	if ((ctxt->mode == X86EMUL_MODE_PROT32) && (efer & EFER_LMA)
	    && !vendor_intel(ctxt))
		return emulate_ud(ctxt);

2353
	/* sysenter/sysexit have not been tested in 64bit mode. */
2354
	if (ctxt->mode == X86EMUL_MODE_PROT64)
2355
		return X86EMUL_UNHANDLEABLE;
2356

2357
	setup_syscalls_segments(ctxt, &cs, &ss);
2358

2359
	ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
2360 2361
	switch (ctxt->mode) {
	case X86EMUL_MODE_PROT32:
2362 2363
		if ((msr_data & 0xfffc) == 0x0)
			return emulate_gp(ctxt, 0);
2364 2365
		break;
	case X86EMUL_MODE_PROT64:
2366 2367
		if (msr_data == 0x0)
			return emulate_gp(ctxt, 0);
2368
		break;
2369 2370
	default:
		break;
2371 2372
	}

2373
	ctxt->eflags &= ~(EFLG_VM | EFLG_IF);
2374 2375 2376 2377
	cs_sel = (u16)msr_data;
	cs_sel &= ~SELECTOR_RPL_MASK;
	ss_sel = cs_sel + 8;
	ss_sel &= ~SELECTOR_RPL_MASK;
2378
	if (ctxt->mode == X86EMUL_MODE_PROT64 || (efer & EFER_LMA)) {
2379
		cs.d = 0;
2380 2381 2382
		cs.l = 1;
	}

2383 2384
	ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
	ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2385

2386
	ops->get_msr(ctxt, MSR_IA32_SYSENTER_EIP, &msr_data);
2387
	ctxt->_eip = msr_data;
2388

2389
	ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data);
2390
	*reg_write(ctxt, VCPU_REGS_RSP) = msr_data;
2391

2392
	return X86EMUL_CONTINUE;
2393 2394
}

2395
static int em_sysexit(struct x86_emulate_ctxt *ctxt)
2396
{
2397
	const struct x86_emulate_ops *ops = ctxt->ops;
2398
	struct desc_struct cs, ss;
2399
	u64 msr_data, rcx, rdx;
2400
	int usermode;
X
Xiao Guangrong 已提交
2401
	u16 cs_sel = 0, ss_sel = 0;
2402

2403 2404
	/* inject #GP if in real mode or Virtual 8086 mode */
	if (ctxt->mode == X86EMUL_MODE_REAL ||
2405 2406
	    ctxt->mode == X86EMUL_MODE_VM86)
		return emulate_gp(ctxt, 0);
2407

2408
	setup_syscalls_segments(ctxt, &cs, &ss);
2409

2410
	if ((ctxt->rex_prefix & 0x8) != 0x0)
2411 2412 2413 2414
		usermode = X86EMUL_MODE_PROT64;
	else
		usermode = X86EMUL_MODE_PROT32;

2415 2416 2417
	rcx = reg_read(ctxt, VCPU_REGS_RCX);
	rdx = reg_read(ctxt, VCPU_REGS_RDX);

2418 2419
	cs.dpl = 3;
	ss.dpl = 3;
2420
	ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
2421 2422
	switch (usermode) {
	case X86EMUL_MODE_PROT32:
2423
		cs_sel = (u16)(msr_data + 16);
2424 2425
		if ((msr_data & 0xfffc) == 0x0)
			return emulate_gp(ctxt, 0);
2426
		ss_sel = (u16)(msr_data + 24);
2427 2428
		rcx = (u32)rcx;
		rdx = (u32)rdx;
2429 2430
		break;
	case X86EMUL_MODE_PROT64:
2431
		cs_sel = (u16)(msr_data + 32);
2432 2433
		if (msr_data == 0x0)
			return emulate_gp(ctxt, 0);
2434 2435
		ss_sel = cs_sel + 8;
		cs.d = 0;
2436
		cs.l = 1;
2437 2438 2439
		if (is_noncanonical_address(rcx) ||
		    is_noncanonical_address(rdx))
			return emulate_gp(ctxt, 0);
2440 2441
		break;
	}
2442 2443
	cs_sel |= SELECTOR_RPL_MASK;
	ss_sel |= SELECTOR_RPL_MASK;
2444

2445 2446
	ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
	ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2447

2448 2449
	ctxt->_eip = rdx;
	*reg_write(ctxt, VCPU_REGS_RSP) = rcx;
2450

2451
	return X86EMUL_CONTINUE;
2452 2453
}

2454
static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt)
2455 2456 2457 2458 2459 2460 2461
{
	int iopl;
	if (ctxt->mode == X86EMUL_MODE_REAL)
		return false;
	if (ctxt->mode == X86EMUL_MODE_VM86)
		return true;
	iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
2462
	return ctxt->ops->cpl(ctxt) > iopl;
2463 2464 2465 2466 2467
}

static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
					    u16 port, u16 len)
{
2468
	const struct x86_emulate_ops *ops = ctxt->ops;
2469
	struct desc_struct tr_seg;
2470
	u32 base3;
2471
	int r;
2472
	u16 tr, io_bitmap_ptr, perm, bit_idx = port & 0x7;
2473
	unsigned mask = (1 << len) - 1;
2474
	unsigned long base;
2475

2476
	ops->get_segment(ctxt, &tr, &tr_seg, &base3, VCPU_SREG_TR);
2477
	if (!tr_seg.p)
2478
		return false;
2479
	if (desc_limit_scaled(&tr_seg) < 103)
2480
		return false;
2481 2482 2483 2484
	base = get_desc_base(&tr_seg);
#ifdef CONFIG_X86_64
	base |= ((u64)base3) << 32;
#endif
2485
	r = ops->read_std(ctxt, base + 102, &io_bitmap_ptr, 2, NULL);
2486 2487
	if (r != X86EMUL_CONTINUE)
		return false;
2488
	if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg))
2489
		return false;
2490
	r = ops->read_std(ctxt, base + io_bitmap_ptr + port/8, &perm, 2, NULL);
2491 2492 2493 2494 2495 2496 2497 2498 2499 2500
	if (r != X86EMUL_CONTINUE)
		return false;
	if ((perm >> bit_idx) & mask)
		return false;
	return true;
}

static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt,
				 u16 port, u16 len)
{
2501 2502 2503
	if (ctxt->perm_ok)
		return true;

2504 2505
	if (emulator_bad_iopl(ctxt))
		if (!emulator_io_port_access_allowed(ctxt, port, len))
2506
			return false;
2507 2508 2509

	ctxt->perm_ok = true;

2510 2511 2512
	return true;
}

2513 2514 2515
static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt,
				struct tss_segment_16 *tss)
{
2516
	tss->ip = ctxt->_eip;
2517
	tss->flag = ctxt->eflags;
2518 2519 2520 2521 2522 2523 2524 2525
	tss->ax = reg_read(ctxt, VCPU_REGS_RAX);
	tss->cx = reg_read(ctxt, VCPU_REGS_RCX);
	tss->dx = reg_read(ctxt, VCPU_REGS_RDX);
	tss->bx = reg_read(ctxt, VCPU_REGS_RBX);
	tss->sp = reg_read(ctxt, VCPU_REGS_RSP);
	tss->bp = reg_read(ctxt, VCPU_REGS_RBP);
	tss->si = reg_read(ctxt, VCPU_REGS_RSI);
	tss->di = reg_read(ctxt, VCPU_REGS_RDI);
2526

2527 2528 2529 2530 2531
	tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
	tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
	tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
	tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
	tss->ldt = get_segment_selector(ctxt, VCPU_SREG_LDTR);
2532 2533 2534 2535 2536 2537
}

static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
				 struct tss_segment_16 *tss)
{
	int ret;
2538
	u8 cpl;
2539

2540
	ctxt->_eip = tss->ip;
2541
	ctxt->eflags = tss->flag | 2;
2542 2543 2544 2545 2546 2547 2548 2549
	*reg_write(ctxt, VCPU_REGS_RAX) = tss->ax;
	*reg_write(ctxt, VCPU_REGS_RCX) = tss->cx;
	*reg_write(ctxt, VCPU_REGS_RDX) = tss->dx;
	*reg_write(ctxt, VCPU_REGS_RBX) = tss->bx;
	*reg_write(ctxt, VCPU_REGS_RSP) = tss->sp;
	*reg_write(ctxt, VCPU_REGS_RBP) = tss->bp;
	*reg_write(ctxt, VCPU_REGS_RSI) = tss->si;
	*reg_write(ctxt, VCPU_REGS_RDI) = tss->di;
2550 2551 2552 2553 2554

	/*
	 * SDM says that segment selectors are loaded before segment
	 * descriptors
	 */
2555 2556 2557 2558 2559
	set_segment_selector(ctxt, tss->ldt, VCPU_SREG_LDTR);
	set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
	set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
	set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
	set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
2560

2561 2562
	cpl = tss->cs & 3;

2563
	/*
G
Guo Chao 已提交
2564
	 * Now load segment descriptors. If fault happens at this stage
2565 2566
	 * it is handled in a context of new task
	 */
2567 2568
	ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl,
					true, NULL);
2569 2570
	if (ret != X86EMUL_CONTINUE)
		return ret;
2571 2572
	ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
					true, NULL);
2573 2574
	if (ret != X86EMUL_CONTINUE)
		return ret;
2575 2576
	ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
					true, NULL);
2577 2578
	if (ret != X86EMUL_CONTINUE)
		return ret;
2579 2580
	ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
					true, NULL);
2581 2582
	if (ret != X86EMUL_CONTINUE)
		return ret;
2583 2584
	ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
					true, NULL);
2585 2586 2587 2588 2589 2590 2591 2592 2593 2594
	if (ret != X86EMUL_CONTINUE)
		return ret;

	return X86EMUL_CONTINUE;
}

static int task_switch_16(struct x86_emulate_ctxt *ctxt,
			  u16 tss_selector, u16 old_tss_sel,
			  ulong old_tss_base, struct desc_struct *new_desc)
{
2595
	const struct x86_emulate_ops *ops = ctxt->ops;
2596 2597
	struct tss_segment_16 tss_seg;
	int ret;
2598
	u32 new_tss_base = get_desc_base(new_desc);
2599

2600
	ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
2601
			    &ctxt->exception);
2602
	if (ret != X86EMUL_CONTINUE)
2603 2604 2605
		/* FIXME: need to provide precise fault address */
		return ret;

2606
	save_state_to_tss16(ctxt, &tss_seg);
2607

2608
	ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
2609
			     &ctxt->exception);
2610
	if (ret != X86EMUL_CONTINUE)
2611 2612 2613
		/* FIXME: need to provide precise fault address */
		return ret;

2614
	ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg,
2615
			    &ctxt->exception);
2616
	if (ret != X86EMUL_CONTINUE)
2617 2618 2619 2620 2621 2622
		/* FIXME: need to provide precise fault address */
		return ret;

	if (old_tss_sel != 0xffff) {
		tss_seg.prev_task_link = old_tss_sel;

2623
		ret = ops->write_std(ctxt, new_tss_base,
2624 2625
				     &tss_seg.prev_task_link,
				     sizeof tss_seg.prev_task_link,
2626
				     &ctxt->exception);
2627
		if (ret != X86EMUL_CONTINUE)
2628 2629 2630 2631
			/* FIXME: need to provide precise fault address */
			return ret;
	}

2632
	return load_state_from_tss16(ctxt, &tss_seg);
2633 2634 2635 2636 2637
}

static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
				struct tss_segment_32 *tss)
{
2638
	/* CR3 and ldt selector are not saved intentionally */
2639
	tss->eip = ctxt->_eip;
2640
	tss->eflags = ctxt->eflags;
2641 2642 2643 2644 2645 2646 2647 2648
	tss->eax = reg_read(ctxt, VCPU_REGS_RAX);
	tss->ecx = reg_read(ctxt, VCPU_REGS_RCX);
	tss->edx = reg_read(ctxt, VCPU_REGS_RDX);
	tss->ebx = reg_read(ctxt, VCPU_REGS_RBX);
	tss->esp = reg_read(ctxt, VCPU_REGS_RSP);
	tss->ebp = reg_read(ctxt, VCPU_REGS_RBP);
	tss->esi = reg_read(ctxt, VCPU_REGS_RSI);
	tss->edi = reg_read(ctxt, VCPU_REGS_RDI);
2649

2650 2651 2652 2653 2654 2655
	tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
	tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
	tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
	tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
	tss->fs = get_segment_selector(ctxt, VCPU_SREG_FS);
	tss->gs = get_segment_selector(ctxt, VCPU_SREG_GS);
2656 2657 2658 2659 2660 2661
}

static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
				 struct tss_segment_32 *tss)
{
	int ret;
2662
	u8 cpl;
2663

2664
	if (ctxt->ops->set_cr(ctxt, 3, tss->cr3))
2665
		return emulate_gp(ctxt, 0);
2666
	ctxt->_eip = tss->eip;
2667
	ctxt->eflags = tss->eflags | 2;
2668 2669

	/* General purpose registers */
2670 2671 2672 2673 2674 2675 2676 2677
	*reg_write(ctxt, VCPU_REGS_RAX) = tss->eax;
	*reg_write(ctxt, VCPU_REGS_RCX) = tss->ecx;
	*reg_write(ctxt, VCPU_REGS_RDX) = tss->edx;
	*reg_write(ctxt, VCPU_REGS_RBX) = tss->ebx;
	*reg_write(ctxt, VCPU_REGS_RSP) = tss->esp;
	*reg_write(ctxt, VCPU_REGS_RBP) = tss->ebp;
	*reg_write(ctxt, VCPU_REGS_RSI) = tss->esi;
	*reg_write(ctxt, VCPU_REGS_RDI) = tss->edi;
2678 2679 2680

	/*
	 * SDM says that segment selectors are loaded before segment
2681 2682
	 * descriptors.  This is important because CPL checks will
	 * use CS.RPL.
2683
	 */
2684 2685 2686 2687 2688 2689 2690
	set_segment_selector(ctxt, tss->ldt_selector, VCPU_SREG_LDTR);
	set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
	set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
	set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
	set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
	set_segment_selector(ctxt, tss->fs, VCPU_SREG_FS);
	set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS);
2691

2692 2693 2694 2695 2696
	/*
	 * If we're switching between Protected Mode and VM86, we need to make
	 * sure to update the mode before loading the segment descriptors so
	 * that the selectors are interpreted correctly.
	 */
2697
	if (ctxt->eflags & X86_EFLAGS_VM) {
2698
		ctxt->mode = X86EMUL_MODE_VM86;
2699 2700
		cpl = 3;
	} else {
2701
		ctxt->mode = X86EMUL_MODE_PROT32;
2702 2703
		cpl = tss->cs & 3;
	}
2704

2705 2706 2707 2708
	/*
	 * Now load segment descriptors. If fault happenes at this stage
	 * it is handled in a context of new task
	 */
2709 2710
	ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR,
					cpl, true, NULL);
2711 2712
	if (ret != X86EMUL_CONTINUE)
		return ret;
2713 2714
	ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
					true, NULL);
2715 2716
	if (ret != X86EMUL_CONTINUE)
		return ret;
2717 2718
	ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
					true, NULL);
2719 2720
	if (ret != X86EMUL_CONTINUE)
		return ret;
2721 2722
	ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
					true, NULL);
2723 2724
	if (ret != X86EMUL_CONTINUE)
		return ret;
2725 2726
	ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
					true, NULL);
2727 2728
	if (ret != X86EMUL_CONTINUE)
		return ret;
2729 2730
	ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl,
					true, NULL);
2731 2732
	if (ret != X86EMUL_CONTINUE)
		return ret;
2733 2734
	ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl,
					true, NULL);
2735 2736 2737 2738 2739 2740 2741 2742 2743 2744
	if (ret != X86EMUL_CONTINUE)
		return ret;

	return X86EMUL_CONTINUE;
}

static int task_switch_32(struct x86_emulate_ctxt *ctxt,
			  u16 tss_selector, u16 old_tss_sel,
			  ulong old_tss_base, struct desc_struct *new_desc)
{
2745
	const struct x86_emulate_ops *ops = ctxt->ops;
2746 2747
	struct tss_segment_32 tss_seg;
	int ret;
2748
	u32 new_tss_base = get_desc_base(new_desc);
2749 2750
	u32 eip_offset = offsetof(struct tss_segment_32, eip);
	u32 ldt_sel_offset = offsetof(struct tss_segment_32, ldt_selector);
2751

2752
	ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
2753
			    &ctxt->exception);
2754
	if (ret != X86EMUL_CONTINUE)
2755 2756 2757
		/* FIXME: need to provide precise fault address */
		return ret;

2758
	save_state_to_tss32(ctxt, &tss_seg);
2759

2760 2761 2762
	/* Only GP registers and segment selectors are saved */
	ret = ops->write_std(ctxt, old_tss_base + eip_offset, &tss_seg.eip,
			     ldt_sel_offset - eip_offset, &ctxt->exception);
2763
	if (ret != X86EMUL_CONTINUE)
2764 2765 2766
		/* FIXME: need to provide precise fault address */
		return ret;

2767
	ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg,
2768
			    &ctxt->exception);
2769
	if (ret != X86EMUL_CONTINUE)
2770 2771 2772 2773 2774 2775
		/* FIXME: need to provide precise fault address */
		return ret;

	if (old_tss_sel != 0xffff) {
		tss_seg.prev_task_link = old_tss_sel;

2776
		ret = ops->write_std(ctxt, new_tss_base,
2777 2778
				     &tss_seg.prev_task_link,
				     sizeof tss_seg.prev_task_link,
2779
				     &ctxt->exception);
2780
		if (ret != X86EMUL_CONTINUE)
2781 2782 2783 2784
			/* FIXME: need to provide precise fault address */
			return ret;
	}

2785
	return load_state_from_tss32(ctxt, &tss_seg);
2786 2787 2788
}

static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
2789
				   u16 tss_selector, int idt_index, int reason,
2790
				   bool has_error_code, u32 error_code)
2791
{
2792
	const struct x86_emulate_ops *ops = ctxt->ops;
2793 2794
	struct desc_struct curr_tss_desc, next_tss_desc;
	int ret;
2795
	u16 old_tss_sel = get_segment_selector(ctxt, VCPU_SREG_TR);
2796
	ulong old_tss_base =
2797
		ops->get_cached_segment_base(ctxt, VCPU_SREG_TR);
2798
	u32 desc_limit;
2799
	ulong desc_addr;
2800 2801 2802

	/* FIXME: old_tss_base == ~0 ? */

2803
	ret = read_segment_descriptor(ctxt, tss_selector, &next_tss_desc, &desc_addr);
2804 2805
	if (ret != X86EMUL_CONTINUE)
		return ret;
2806
	ret = read_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc, &desc_addr);
2807 2808 2809 2810 2811
	if (ret != X86EMUL_CONTINUE)
		return ret;

	/* FIXME: check that next_tss_desc is tss */

2812 2813 2814 2815 2816
	/*
	 * Check privileges. The three cases are task switch caused by...
	 *
	 * 1. jmp/call/int to task gate: Check against DPL of the task gate
	 * 2. Exception/IRQ/iret: No check is performed
2817 2818
	 * 3. jmp/call to TSS/task-gate: No check is performed since the
	 *    hardware checks it before exiting.
2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834
	 */
	if (reason == TASK_SWITCH_GATE) {
		if (idt_index != -1) {
			/* Software interrupts */
			struct desc_struct task_gate_desc;
			int dpl;

			ret = read_interrupt_descriptor(ctxt, idt_index,
							&task_gate_desc);
			if (ret != X86EMUL_CONTINUE)
				return ret;

			dpl = task_gate_desc.dpl;
			if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl)
				return emulate_gp(ctxt, (idt_index << 3) | 0x2);
		}
2835 2836
	}

2837 2838 2839 2840
	desc_limit = desc_limit_scaled(&next_tss_desc);
	if (!next_tss_desc.p ||
	    ((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
	     desc_limit < 0x2b)) {
2841
		return emulate_ts(ctxt, tss_selector & 0xfffc);
2842 2843 2844 2845
	}

	if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
		curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */
2846
		write_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc);
2847 2848 2849 2850 2851 2852
	}

	if (reason == TASK_SWITCH_IRET)
		ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT;

	/* set back link to prev task only if NT bit is set in eflags
G
Guo Chao 已提交
2853
	   note that old_tss_sel is not used after this point */
2854 2855 2856 2857
	if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
		old_tss_sel = 0xffff;

	if (next_tss_desc.type & 8)
2858
		ret = task_switch_32(ctxt, tss_selector, old_tss_sel,
2859 2860
				     old_tss_base, &next_tss_desc);
	else
2861
		ret = task_switch_16(ctxt, tss_selector, old_tss_sel,
2862
				     old_tss_base, &next_tss_desc);
2863 2864
	if (ret != X86EMUL_CONTINUE)
		return ret;
2865 2866 2867 2868 2869 2870

	if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE)
		ctxt->eflags = ctxt->eflags | X86_EFLAGS_NT;

	if (reason != TASK_SWITCH_IRET) {
		next_tss_desc.type |= (1 << 1); /* set busy flag */
2871
		write_segment_descriptor(ctxt, tss_selector, &next_tss_desc);
2872 2873
	}

2874
	ops->set_cr(ctxt, 0,  ops->get_cr(ctxt, 0) | X86_CR0_TS);
2875
	ops->set_segment(ctxt, tss_selector, &next_tss_desc, 0, VCPU_SREG_TR);
2876

2877
	if (has_error_code) {
2878 2879 2880
		ctxt->op_bytes = ctxt->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2;
		ctxt->lock_prefix = 0;
		ctxt->src.val = (unsigned long) error_code;
2881
		ret = em_push(ctxt);
2882 2883
	}

2884 2885 2886 2887
	return ret;
}

int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
2888
			 u16 tss_selector, int idt_index, int reason,
2889
			 bool has_error_code, u32 error_code)
2890 2891 2892
{
	int rc;

2893
	invalidate_registers(ctxt);
2894 2895
	ctxt->_eip = ctxt->eip;
	ctxt->dst.type = OP_NONE;
2896

2897
	rc = emulator_do_task_switch(ctxt, tss_selector, idt_index, reason,
2898
				     has_error_code, error_code);
2899

2900
	if (rc == X86EMUL_CONTINUE) {
2901
		ctxt->eip = ctxt->_eip;
2902 2903
		writeback_registers(ctxt);
	}
2904

2905
	return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
2906 2907
}

2908 2909
static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
		struct operand *op)
2910
{
2911
	int df = (ctxt->eflags & EFLG_DF) ? -op->count : op->count;
2912

2913 2914
	register_address_increment(ctxt, reg_rmw(ctxt, reg), df * op->bytes);
	op->addr.mem.ea = register_address(ctxt, reg_read(ctxt, reg));
2915 2916
}

2917 2918 2919 2920 2921 2922
static int em_das(struct x86_emulate_ctxt *ctxt)
{
	u8 al, old_al;
	bool af, cf, old_cf;

	cf = ctxt->eflags & X86_EFLAGS_CF;
2923
	al = ctxt->dst.val;
2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940

	old_al = al;
	old_cf = cf;
	cf = false;
	af = ctxt->eflags & X86_EFLAGS_AF;
	if ((al & 0x0f) > 9 || af) {
		al -= 6;
		cf = old_cf | (al >= 250);
		af = true;
	} else {
		af = false;
	}
	if (old_al > 0x99 || old_cf) {
		al -= 0x60;
		cf = true;
	}

2941
	ctxt->dst.val = al;
2942
	/* Set PF, ZF, SF */
2943 2944 2945
	ctxt->src.type = OP_IMM;
	ctxt->src.val = 0;
	ctxt->src.bytes = 1;
2946
	fastop(ctxt, em_or);
2947 2948 2949 2950 2951 2952 2953 2954
	ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF);
	if (cf)
		ctxt->eflags |= X86_EFLAGS_CF;
	if (af)
		ctxt->eflags |= X86_EFLAGS_AF;
	return X86EMUL_CONTINUE;
}

P
Paolo Bonzini 已提交
2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976
static int em_aam(struct x86_emulate_ctxt *ctxt)
{
	u8 al, ah;

	if (ctxt->src.val == 0)
		return emulate_de(ctxt);

	al = ctxt->dst.val & 0xff;
	ah = al / ctxt->src.val;
	al %= ctxt->src.val;

	ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al | (ah << 8);

	/* Set PF, ZF, SF */
	ctxt->src.type = OP_IMM;
	ctxt->src.val = 0;
	ctxt->src.bytes = 1;
	fastop(ctxt, em_or);

	return X86EMUL_CONTINUE;
}

2977 2978 2979 2980 2981 2982 2983 2984 2985
static int em_aad(struct x86_emulate_ctxt *ctxt)
{
	u8 al = ctxt->dst.val & 0xff;
	u8 ah = (ctxt->dst.val >> 8) & 0xff;

	al = (al + (ah * ctxt->src.val)) & 0xff;

	ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al;

2986 2987 2988 2989 2990
	/* Set PF, ZF, SF */
	ctxt->src.type = OP_IMM;
	ctxt->src.val = 0;
	ctxt->src.bytes = 1;
	fastop(ctxt, em_or);
2991 2992 2993 2994

	return X86EMUL_CONTINUE;
}

2995 2996
static int em_call(struct x86_emulate_ctxt *ctxt)
{
2997
	int rc;
2998 2999 3000
	long rel = ctxt->src.val;

	ctxt->src.val = (unsigned long)ctxt->_eip;
3001 3002 3003
	rc = jmp_rel(ctxt, rel);
	if (rc != X86EMUL_CONTINUE)
		return rc;
3004 3005 3006
	return em_push(ctxt);
}

3007 3008 3009 3010 3011
static int em_call_far(struct x86_emulate_ctxt *ctxt)
{
	u16 sel, old_cs;
	ulong old_eip;
	int rc;
3012 3013 3014
	struct desc_struct old_desc, new_desc;
	const struct x86_emulate_ops *ops = ctxt->ops;
	int cpl = ctxt->ops->cpl(ctxt);
3015

3016
	old_eip = ctxt->_eip;
3017
	ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS);
3018

3019
	memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
3020 3021 3022
	rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, false,
				       &new_desc);
	if (rc != X86EMUL_CONTINUE)
3023 3024
		return X86EMUL_CONTINUE;

3025
	rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
3026 3027
	if (rc != X86EMUL_CONTINUE)
		goto fail;
3028

3029
	ctxt->src.val = old_cs;
3030
	rc = em_push(ctxt);
3031
	if (rc != X86EMUL_CONTINUE)
3032
		goto fail;
3033

3034
	ctxt->src.val = old_eip;
3035 3036 3037 3038 3039 3040 3041 3042 3043 3044
	rc = em_push(ctxt);
	/* If we failed, we tainted the memory, but the very least we should
	   restore cs */
	if (rc != X86EMUL_CONTINUE)
		goto fail;
	return rc;
fail:
	ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
	return rc;

3045 3046
}

3047 3048 3049
static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
{
	int rc;
3050
	unsigned long eip;
3051

3052 3053 3054 3055
	rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
	if (rc != X86EMUL_CONTINUE)
		return rc;
	rc = assign_eip_near(ctxt, eip);
3056 3057
	if (rc != X86EMUL_CONTINUE)
		return rc;
3058
	rsp_increment(ctxt, ctxt->src.val);
3059 3060 3061
	return X86EMUL_CONTINUE;
}

3062 3063 3064
static int em_xchg(struct x86_emulate_ctxt *ctxt)
{
	/* Write back the register source. */
3065 3066
	ctxt->src.val = ctxt->dst.val;
	write_register_operand(&ctxt->src);
3067 3068

	/* Write back the memory destination with implicit LOCK prefix. */
3069 3070
	ctxt->dst.val = ctxt->src.orig_val;
	ctxt->lock_prefix = 1;
3071 3072 3073
	return X86EMUL_CONTINUE;
}

3074 3075
static int em_imul_3op(struct x86_emulate_ctxt *ctxt)
{
3076
	ctxt->dst.val = ctxt->src2.val;
3077
	return fastop(ctxt, em_imul);
3078 3079
}

3080 3081
static int em_cwd(struct x86_emulate_ctxt *ctxt)
{
3082 3083
	ctxt->dst.type = OP_REG;
	ctxt->dst.bytes = ctxt->src.bytes;
3084
	ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
3085
	ctxt->dst.val = ~((ctxt->src.val >> (ctxt->src.bytes * 8 - 1)) - 1);
3086 3087 3088 3089

	return X86EMUL_CONTINUE;
}

3090 3091 3092 3093
static int em_rdtsc(struct x86_emulate_ctxt *ctxt)
{
	u64 tsc = 0;

3094
	ctxt->ops->get_msr(ctxt, MSR_IA32_TSC, &tsc);
3095 3096
	*reg_write(ctxt, VCPU_REGS_RAX) = (u32)tsc;
	*reg_write(ctxt, VCPU_REGS_RDX) = tsc >> 32;
3097 3098 3099
	return X86EMUL_CONTINUE;
}

3100 3101 3102 3103
static int em_rdpmc(struct x86_emulate_ctxt *ctxt)
{
	u64 pmc;

3104
	if (ctxt->ops->read_pmc(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &pmc))
3105
		return emulate_gp(ctxt, 0);
3106 3107
	*reg_write(ctxt, VCPU_REGS_RAX) = (u32)pmc;
	*reg_write(ctxt, VCPU_REGS_RDX) = pmc >> 32;
3108 3109 3110
	return X86EMUL_CONTINUE;
}

3111 3112
static int em_mov(struct x86_emulate_ctxt *ctxt)
{
3113
	memcpy(ctxt->dst.valptr, ctxt->src.valptr, sizeof(ctxt->src.valptr));
3114 3115 3116
	return X86EMUL_CONTINUE;
}

B
Borislav Petkov 已提交
3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151
#define FFL(x) bit(X86_FEATURE_##x)

static int em_movbe(struct x86_emulate_ctxt *ctxt)
{
	u32 ebx, ecx, edx, eax = 1;
	u16 tmp;

	/*
	 * Check MOVBE is set in the guest-visible CPUID leaf.
	 */
	ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
	if (!(ecx & FFL(MOVBE)))
		return emulate_ud(ctxt);

	switch (ctxt->op_bytes) {
	case 2:
		/*
		 * From MOVBE definition: "...When the operand size is 16 bits,
		 * the upper word of the destination register remains unchanged
		 * ..."
		 *
		 * Both casting ->valptr and ->val to u16 breaks strict aliasing
		 * rules so we have to do the operation almost per hand.
		 */
		tmp = (u16)ctxt->src.val;
		ctxt->dst.val &= ~0xffffUL;
		ctxt->dst.val |= (unsigned long)swab16(tmp);
		break;
	case 4:
		ctxt->dst.val = swab32((u32)ctxt->src.val);
		break;
	case 8:
		ctxt->dst.val = swab64(ctxt->src.val);
		break;
	default:
3152
		BUG();
B
Borislav Petkov 已提交
3153 3154 3155 3156
	}
	return X86EMUL_CONTINUE;
}

3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184
static int em_cr_write(struct x86_emulate_ctxt *ctxt)
{
	if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val))
		return emulate_gp(ctxt, 0);

	/* Disable writeback. */
	ctxt->dst.type = OP_NONE;
	return X86EMUL_CONTINUE;
}

static int em_dr_write(struct x86_emulate_ctxt *ctxt)
{
	unsigned long val;

	if (ctxt->mode == X86EMUL_MODE_PROT64)
		val = ctxt->src.val & ~0ULL;
	else
		val = ctxt->src.val & ~0U;

	/* #UD condition is already handled. */
	if (ctxt->ops->set_dr(ctxt, ctxt->modrm_reg, val) < 0)
		return emulate_gp(ctxt, 0);

	/* Disable writeback. */
	ctxt->dst.type = OP_NONE;
	return X86EMUL_CONTINUE;
}

3185 3186 3187 3188
static int em_wrmsr(struct x86_emulate_ctxt *ctxt)
{
	u64 msr_data;

3189 3190 3191
	msr_data = (u32)reg_read(ctxt, VCPU_REGS_RAX)
		| ((u64)reg_read(ctxt, VCPU_REGS_RDX) << 32);
	if (ctxt->ops->set_msr(ctxt, reg_read(ctxt, VCPU_REGS_RCX), msr_data))
3192 3193 3194 3195 3196 3197 3198 3199 3200
		return emulate_gp(ctxt, 0);

	return X86EMUL_CONTINUE;
}

static int em_rdmsr(struct x86_emulate_ctxt *ctxt)
{
	u64 msr_data;

3201
	if (ctxt->ops->get_msr(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &msr_data))
3202 3203
		return emulate_gp(ctxt, 0);

3204 3205
	*reg_write(ctxt, VCPU_REGS_RAX) = (u32)msr_data;
	*reg_write(ctxt, VCPU_REGS_RDX) = msr_data >> 32;
3206 3207 3208
	return X86EMUL_CONTINUE;
}

3209 3210
static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt)
{
3211
	if (ctxt->modrm_reg > VCPU_SREG_GS)
3212 3213
		return emulate_ud(ctxt);

3214
	ctxt->dst.val = get_segment_selector(ctxt, ctxt->modrm_reg);
3215 3216
	if (ctxt->dst.bytes == 4 && ctxt->dst.type == OP_MEM)
		ctxt->dst.bytes = 2;
3217 3218 3219 3220 3221
	return X86EMUL_CONTINUE;
}

static int em_mov_sreg_rm(struct x86_emulate_ctxt *ctxt)
{
3222
	u16 sel = ctxt->src.val;
3223

3224
	if (ctxt->modrm_reg == VCPU_SREG_CS || ctxt->modrm_reg > VCPU_SREG_GS)
3225 3226
		return emulate_ud(ctxt);

3227
	if (ctxt->modrm_reg == VCPU_SREG_SS)
3228 3229 3230
		ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;

	/* Disable writeback. */
3231 3232
	ctxt->dst.type = OP_NONE;
	return load_segment_descriptor(ctxt, sel, ctxt->modrm_reg);
3233 3234
}

A
Avi Kivity 已提交
3235 3236 3237 3238 3239 3240 3241 3242 3243
static int em_lldt(struct x86_emulate_ctxt *ctxt)
{
	u16 sel = ctxt->src.val;

	/* Disable writeback. */
	ctxt->dst.type = OP_NONE;
	return load_segment_descriptor(ctxt, sel, VCPU_SREG_LDTR);
}

A
Avi Kivity 已提交
3244 3245 3246 3247 3248 3249 3250 3251 3252
static int em_ltr(struct x86_emulate_ctxt *ctxt)
{
	u16 sel = ctxt->src.val;

	/* Disable writeback. */
	ctxt->dst.type = OP_NONE;
	return load_segment_descriptor(ctxt, sel, VCPU_SREG_TR);
}

3253 3254
static int em_invlpg(struct x86_emulate_ctxt *ctxt)
{
3255 3256 3257
	int rc;
	ulong linear;

3258
	rc = linearize(ctxt, ctxt->src.addr.mem, 1, false, &linear);
3259
	if (rc == X86EMUL_CONTINUE)
3260
		ctxt->ops->invlpg(ctxt, linear);
3261
	/* Disable writeback. */
3262
	ctxt->dst.type = OP_NONE;
3263 3264 3265
	return X86EMUL_CONTINUE;
}

3266 3267 3268 3269 3270 3271 3272 3273 3274 3275
static int em_clts(struct x86_emulate_ctxt *ctxt)
{
	ulong cr0;

	cr0 = ctxt->ops->get_cr(ctxt, 0);
	cr0 &= ~X86_CR0_TS;
	ctxt->ops->set_cr(ctxt, 0, cr0);
	return X86EMUL_CONTINUE;
}

3276 3277
static int em_vmcall(struct x86_emulate_ctxt *ctxt)
{
3278
	int rc = ctxt->ops->fix_hypercall(ctxt);
3279 3280 3281 3282 3283

	if (rc != X86EMUL_CONTINUE)
		return rc;

	/* Let the processor re-execute the fixed hypercall */
3284
	ctxt->_eip = ctxt->eip;
3285
	/* Disable writeback. */
3286
	ctxt->dst.type = OP_NONE;
3287 3288 3289
	return X86EMUL_CONTINUE;
}

3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318
static int emulate_store_desc_ptr(struct x86_emulate_ctxt *ctxt,
				  void (*get)(struct x86_emulate_ctxt *ctxt,
					      struct desc_ptr *ptr))
{
	struct desc_ptr desc_ptr;

	if (ctxt->mode == X86EMUL_MODE_PROT64)
		ctxt->op_bytes = 8;
	get(ctxt, &desc_ptr);
	if (ctxt->op_bytes == 2) {
		ctxt->op_bytes = 4;
		desc_ptr.address &= 0x00ffffff;
	}
	/* Disable writeback. */
	ctxt->dst.type = OP_NONE;
	return segmented_write(ctxt, ctxt->dst.addr.mem,
			       &desc_ptr, 2 + ctxt->op_bytes);
}

static int em_sgdt(struct x86_emulate_ctxt *ctxt)
{
	return emulate_store_desc_ptr(ctxt, ctxt->ops->get_gdt);
}

static int em_sidt(struct x86_emulate_ctxt *ctxt)
{
	return emulate_store_desc_ptr(ctxt, ctxt->ops->get_idt);
}

3319
static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt)
3320 3321 3322 3323
{
	struct desc_ptr desc_ptr;
	int rc;

3324 3325
	if (ctxt->mode == X86EMUL_MODE_PROT64)
		ctxt->op_bytes = 8;
3326
	rc = read_descriptor(ctxt, ctxt->src.addr.mem,
3327
			     &desc_ptr.size, &desc_ptr.address,
3328
			     ctxt->op_bytes);
3329 3330
	if (rc != X86EMUL_CONTINUE)
		return rc;
3331 3332 3333
	if (ctxt->mode == X86EMUL_MODE_PROT64 &&
	    is_noncanonical_address(desc_ptr.address))
		return emulate_gp(ctxt, 0);
3334 3335 3336 3337
	if (lgdt)
		ctxt->ops->set_gdt(ctxt, &desc_ptr);
	else
		ctxt->ops->set_idt(ctxt, &desc_ptr);
3338
	/* Disable writeback. */
3339
	ctxt->dst.type = OP_NONE;
3340 3341 3342
	return X86EMUL_CONTINUE;
}

3343 3344 3345 3346 3347
static int em_lgdt(struct x86_emulate_ctxt *ctxt)
{
	return em_lgdt_lidt(ctxt, true);
}

3348
static int em_vmmcall(struct x86_emulate_ctxt *ctxt)
3349 3350 3351
{
	int rc;

3352 3353
	rc = ctxt->ops->fix_hypercall(ctxt);

3354
	/* Disable writeback. */
3355
	ctxt->dst.type = OP_NONE;
3356 3357 3358 3359 3360
	return rc;
}

static int em_lidt(struct x86_emulate_ctxt *ctxt)
{
3361
	return em_lgdt_lidt(ctxt, false);
3362 3363 3364 3365
}

static int em_smsw(struct x86_emulate_ctxt *ctxt)
{
3366 3367
	if (ctxt->dst.type == OP_MEM)
		ctxt->dst.bytes = 2;
3368
	ctxt->dst.val = ctxt->ops->get_cr(ctxt, 0);
3369 3370 3371 3372 3373 3374
	return X86EMUL_CONTINUE;
}

static int em_lmsw(struct x86_emulate_ctxt *ctxt)
{
	ctxt->ops->set_cr(ctxt, 0, (ctxt->ops->get_cr(ctxt, 0) & ~0x0eul)
3375 3376
			  | (ctxt->src.val & 0x0f));
	ctxt->dst.type = OP_NONE;
3377 3378 3379
	return X86EMUL_CONTINUE;
}

3380 3381
static int em_loop(struct x86_emulate_ctxt *ctxt)
{
3382 3383
	int rc = X86EMUL_CONTINUE;

3384 3385
	register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1);
	if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) &&
3386
	    (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
3387
		rc = jmp_rel(ctxt, ctxt->src.val);
3388

3389
	return rc;
3390 3391 3392 3393
}

static int em_jcxz(struct x86_emulate_ctxt *ctxt)
{
3394 3395
	int rc = X86EMUL_CONTINUE;

3396
	if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0)
3397
		rc = jmp_rel(ctxt, ctxt->src.val);
3398

3399
	return rc;
3400 3401
}

3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419
static int em_in(struct x86_emulate_ctxt *ctxt)
{
	if (!pio_in_emulated(ctxt, ctxt->dst.bytes, ctxt->src.val,
			     &ctxt->dst.val))
		return X86EMUL_IO_NEEDED;

	return X86EMUL_CONTINUE;
}

static int em_out(struct x86_emulate_ctxt *ctxt)
{
	ctxt->ops->pio_out_emulated(ctxt, ctxt->src.bytes, ctxt->dst.val,
				    &ctxt->src.val, 1);
	/* Disable writeback. */
	ctxt->dst.type = OP_NONE;
	return X86EMUL_CONTINUE;
}

3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438
static int em_cli(struct x86_emulate_ctxt *ctxt)
{
	if (emulator_bad_iopl(ctxt))
		return emulate_gp(ctxt, 0);

	ctxt->eflags &= ~X86_EFLAGS_IF;
	return X86EMUL_CONTINUE;
}

static int em_sti(struct x86_emulate_ctxt *ctxt)
{
	if (emulator_bad_iopl(ctxt))
		return emulate_gp(ctxt, 0);

	ctxt->interruptibility = KVM_X86_SHADOW_INT_STI;
	ctxt->eflags |= X86_EFLAGS_IF;
	return X86EMUL_CONTINUE;
}

A
Avi Kivity 已提交
3439 3440 3441 3442
static int em_cpuid(struct x86_emulate_ctxt *ctxt)
{
	u32 eax, ebx, ecx, edx;

3443 3444
	eax = reg_read(ctxt, VCPU_REGS_RAX);
	ecx = reg_read(ctxt, VCPU_REGS_RCX);
A
Avi Kivity 已提交
3445
	ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
3446 3447 3448 3449
	*reg_write(ctxt, VCPU_REGS_RAX) = eax;
	*reg_write(ctxt, VCPU_REGS_RBX) = ebx;
	*reg_write(ctxt, VCPU_REGS_RCX) = ecx;
	*reg_write(ctxt, VCPU_REGS_RDX) = edx;
A
Avi Kivity 已提交
3450 3451 3452
	return X86EMUL_CONTINUE;
}

P
Paolo Bonzini 已提交
3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464
static int em_sahf(struct x86_emulate_ctxt *ctxt)
{
	u32 flags;

	flags = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF;
	flags &= *reg_rmw(ctxt, VCPU_REGS_RAX) >> 8;

	ctxt->eflags &= ~0xffUL;
	ctxt->eflags |= flags | X86_EFLAGS_FIXED;
	return X86EMUL_CONTINUE;
}

A
Avi Kivity 已提交
3465 3466
static int em_lahf(struct x86_emulate_ctxt *ctxt)
{
3467 3468
	*reg_rmw(ctxt, VCPU_REGS_RAX) &= ~0xff00UL;
	*reg_rmw(ctxt, VCPU_REGS_RAX) |= (ctxt->eflags & 0xff) << 8;
A
Avi Kivity 已提交
3469 3470 3471
	return X86EMUL_CONTINUE;
}

A
Avi Kivity 已提交
3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486
static int em_bswap(struct x86_emulate_ctxt *ctxt)
{
	switch (ctxt->op_bytes) {
#ifdef CONFIG_X86_64
	case 8:
		asm("bswap %0" : "+r"(ctxt->dst.val));
		break;
#endif
	default:
		asm("bswap %0" : "+r"(*(u32 *)&ctxt->dst.val));
		break;
	}
	return X86EMUL_CONTINUE;
}

3487 3488 3489 3490 3491 3492
static int em_clflush(struct x86_emulate_ctxt *ctxt)
{
	/* emulating clflush regardless of cpuid */
	return X86EMUL_CONTINUE;
}

3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506
static bool valid_cr(int nr)
{
	switch (nr) {
	case 0:
	case 2 ... 4:
	case 8:
		return true;
	default:
		return false;
	}
}

static int check_cr_read(struct x86_emulate_ctxt *ctxt)
{
3507
	if (!valid_cr(ctxt->modrm_reg))
3508 3509 3510 3511 3512 3513 3514
		return emulate_ud(ctxt);

	return X86EMUL_CONTINUE;
}

static int check_cr_write(struct x86_emulate_ctxt *ctxt)
{
3515 3516
	u64 new_val = ctxt->src.val64;
	int cr = ctxt->modrm_reg;
3517
	u64 efer = 0;
3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534

	static u64 cr_reserved_bits[] = {
		0xffffffff00000000ULL,
		0, 0, 0, /* CR3 checked later */
		CR4_RESERVED_BITS,
		0, 0, 0,
		CR8_RESERVED_BITS,
	};

	if (!valid_cr(cr))
		return emulate_ud(ctxt);

	if (new_val & cr_reserved_bits[cr])
		return emulate_gp(ctxt, 0);

	switch (cr) {
	case 0: {
3535
		u64 cr4;
3536 3537 3538 3539
		if (((new_val & X86_CR0_PG) && !(new_val & X86_CR0_PE)) ||
		    ((new_val & X86_CR0_NW) && !(new_val & X86_CR0_CD)))
			return emulate_gp(ctxt, 0);

3540 3541
		cr4 = ctxt->ops->get_cr(ctxt, 4);
		ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
3542 3543 3544 3545 3546 3547 3548 3549 3550 3551

		if ((new_val & X86_CR0_PG) && (efer & EFER_LME) &&
		    !(cr4 & X86_CR4_PAE))
			return emulate_gp(ctxt, 0);

		break;
		}
	case 3: {
		u64 rsvd = 0;

3552 3553
		ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
		if (efer & EFER_LMA)
N
Nadav Amit 已提交
3554
			rsvd = CR3_L_MODE_RESERVED_BITS & ~CR3_PCID_INVD;
3555 3556 3557 3558 3559 3560 3561

		if (new_val & rsvd)
			return emulate_gp(ctxt, 0);

		break;
		}
	case 4: {
3562
		ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573

		if ((efer & EFER_LMA) && !(new_val & X86_CR4_PAE))
			return emulate_gp(ctxt, 0);

		break;
		}
	}

	return X86EMUL_CONTINUE;
}

3574 3575 3576 3577
static int check_dr7_gd(struct x86_emulate_ctxt *ctxt)
{
	unsigned long dr7;

3578
	ctxt->ops->get_dr(ctxt, 7, &dr7);
3579 3580 3581 3582 3583 3584 3585

	/* Check if DR7.Global_Enable is set */
	return dr7 & (1 << 13);
}

static int check_dr_read(struct x86_emulate_ctxt *ctxt)
{
3586
	int dr = ctxt->modrm_reg;
3587 3588 3589 3590 3591
	u64 cr4;

	if (dr > 7)
		return emulate_ud(ctxt);

3592
	cr4 = ctxt->ops->get_cr(ctxt, 4);
3593 3594 3595
	if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5))
		return emulate_ud(ctxt);

3596 3597 3598 3599 3600 3601 3602
	if (check_dr7_gd(ctxt)) {
		ulong dr6;

		ctxt->ops->get_dr(ctxt, 6, &dr6);
		dr6 &= ~15;
		dr6 |= DR6_BD | DR6_RTM;
		ctxt->ops->set_dr(ctxt, 6, dr6);
3603
		return emulate_db(ctxt);
3604
	}
3605 3606 3607 3608 3609 3610

	return X86EMUL_CONTINUE;
}

static int check_dr_write(struct x86_emulate_ctxt *ctxt)
{
3611 3612
	u64 new_val = ctxt->src.val64;
	int dr = ctxt->modrm_reg;
3613 3614 3615 3616 3617 3618 3619

	if ((dr == 6 || dr == 7) && (new_val & 0xffffffff00000000ULL))
		return emulate_gp(ctxt, 0);

	return check_dr_read(ctxt);
}

3620 3621 3622 3623
static int check_svme(struct x86_emulate_ctxt *ctxt)
{
	u64 efer;

3624
	ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
3625 3626 3627 3628 3629 3630 3631 3632 3633

	if (!(efer & EFER_SVME))
		return emulate_ud(ctxt);

	return X86EMUL_CONTINUE;
}

static int check_svme_pa(struct x86_emulate_ctxt *ctxt)
{
3634
	u64 rax = reg_read(ctxt, VCPU_REGS_RAX);
3635 3636

	/* Valid physical address? */
3637
	if (rax & 0xffff000000000000ULL)
3638 3639 3640 3641 3642
		return emulate_gp(ctxt, 0);

	return check_svme(ctxt);
}

3643 3644
static int check_rdtsc(struct x86_emulate_ctxt *ctxt)
{
3645
	u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
3646

3647
	if (cr4 & X86_CR4_TSD && ctxt->ops->cpl(ctxt))
3648 3649 3650 3651 3652
		return emulate_ud(ctxt);

	return X86EMUL_CONTINUE;
}

3653 3654
static int check_rdpmc(struct x86_emulate_ctxt *ctxt)
{
3655
	u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
3656
	u64 rcx = reg_read(ctxt, VCPU_REGS_RCX);
3657

3658
	if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) ||
3659
	    ctxt->ops->check_pmc(ctxt, rcx))
3660 3661 3662 3663 3664
		return emulate_gp(ctxt, 0);

	return X86EMUL_CONTINUE;
}

3665 3666
static int check_perm_in(struct x86_emulate_ctxt *ctxt)
{
3667 3668
	ctxt->dst.bytes = min(ctxt->dst.bytes, 4u);
	if (!emulator_io_permited(ctxt, ctxt->src.val, ctxt->dst.bytes))
3669 3670 3671 3672 3673 3674 3675
		return emulate_gp(ctxt, 0);

	return X86EMUL_CONTINUE;
}

static int check_perm_out(struct x86_emulate_ctxt *ctxt)
{
3676 3677
	ctxt->src.bytes = min(ctxt->src.bytes, 4u);
	if (!emulator_io_permited(ctxt, ctxt->dst.val, ctxt->src.bytes))
3678 3679 3680 3681 3682
		return emulate_gp(ctxt, 0);

	return X86EMUL_CONTINUE;
}

3683
#define D(_y) { .flags = (_y) }
3684 3685 3686
#define DI(_y, _i) { .flags = (_y)|Intercept, .intercept = x86_intercept_##_i }
#define DIP(_y, _i, _p) { .flags = (_y)|Intercept|CheckPerm, \
		      .intercept = x86_intercept_##_i, .check_perm = (_p) }
3687
#define N    D(NotImpl)
3688
#define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
3689 3690
#define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
#define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
3691
#define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) }
3692
#define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
3693
#define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) }
3694
#define II(_f, _e, _i) \
3695
	{ .flags = (_f)|Intercept, .u.execute = (_e), .intercept = x86_intercept_##_i }
3696
#define IIP(_f, _e, _i, _p) \
3697 3698
	{ .flags = (_f)|Intercept|CheckPerm, .u.execute = (_e), \
	  .intercept = x86_intercept_##_i, .check_perm = (_p) }
3699
#define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) }
3700

3701
#define D2bv(_f)      D((_f) | ByteOp), D(_f)
3702
#define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p)
3703
#define I2bv(_f, _e)  I((_f) | ByteOp, _e), I(_f, _e)
3704
#define F2bv(_f, _e)  F((_f) | ByteOp, _e), F(_f, _e)
3705 3706
#define I2bvIP(_f, _e, _i, _p) \
	IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p)
3707

3708 3709 3710
#define F6ALU(_f, _e) F2bv((_f) | DstMem | SrcReg | ModRM, _e),		\
		F2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e),	\
		F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
3711

3712 3713 3714 3715 3716 3717
static const struct opcode group7_rm0[] = {
	N,
	I(SrcNone | Priv | EmulateOnUD,	em_vmcall),
	N, N, N, N, N, N,
};

3718
static const struct opcode group7_rm1[] = {
3719 3720
	DI(SrcNone | Priv, monitor),
	DI(SrcNone | Priv, mwait),
3721 3722 3723
	N, N, N, N, N, N,
};

3724
static const struct opcode group7_rm3[] = {
3725
	DIP(SrcNone | Prot | Priv,		vmrun,		check_svme_pa),
3726
	II(SrcNone  | Prot | EmulateOnUD,	em_vmmcall,	vmmcall),
3727 3728 3729 3730 3731 3732
	DIP(SrcNone | Prot | Priv,		vmload,		check_svme_pa),
	DIP(SrcNone | Prot | Priv,		vmsave,		check_svme_pa),
	DIP(SrcNone | Prot | Priv,		stgi,		check_svme),
	DIP(SrcNone | Prot | Priv,		clgi,		check_svme),
	DIP(SrcNone | Prot | Priv,		skinit,		check_svme),
	DIP(SrcNone | Prot | Priv,		invlpga,	check_svme),
3733
};
3734

3735
static const struct opcode group7_rm7[] = {
3736
	N,
3737
	DIP(SrcNone, rdtscp, check_rdtsc),
3738 3739
	N, N, N, N, N, N,
};
3740

3741
static const struct opcode group1[] = {
3742 3743 3744 3745 3746 3747 3748 3749
	F(Lock, em_add),
	F(Lock | PageTable, em_or),
	F(Lock, em_adc),
	F(Lock, em_sbb),
	F(Lock | PageTable, em_and),
	F(Lock, em_sub),
	F(Lock, em_xor),
	F(NoWrite, em_cmp),
3750 3751
};

3752
static const struct opcode group1A[] = {
3753
	I(DstMem | SrcNone | Mov | Stack, em_pop), N, N, N, N, N, N, N,
3754 3755
};

3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766
static const struct opcode group2[] = {
	F(DstMem | ModRM, em_rol),
	F(DstMem | ModRM, em_ror),
	F(DstMem | ModRM, em_rcl),
	F(DstMem | ModRM, em_rcr),
	F(DstMem | ModRM, em_shl),
	F(DstMem | ModRM, em_shr),
	F(DstMem | ModRM, em_shl),
	F(DstMem | ModRM, em_sar),
};

3767
static const struct opcode group3[] = {
3768 3769
	F(DstMem | SrcImm | NoWrite, em_test),
	F(DstMem | SrcImm | NoWrite, em_test),
3770 3771
	F(DstMem | SrcNone | Lock, em_not),
	F(DstMem | SrcNone | Lock, em_neg),
3772 3773
	F(DstXacc | Src2Mem, em_mul_ex),
	F(DstXacc | Src2Mem, em_imul_ex),
3774 3775
	F(DstXacc | Src2Mem, em_div_ex),
	F(DstXacc | Src2Mem, em_idiv_ex),
3776 3777
};

3778
static const struct opcode group4[] = {
3779 3780
	F(ByteOp | DstMem | SrcNone | Lock, em_inc),
	F(ByteOp | DstMem | SrcNone | Lock, em_dec),
3781 3782 3783
	N, N, N, N, N, N,
};

3784
static const struct opcode group5[] = {
3785 3786
	F(DstMem | SrcNone | Lock,		em_inc),
	F(DstMem | SrcNone | Lock,		em_dec),
3787
	I(SrcMem | NearBranch,			em_call_near_abs),
3788
	I(SrcMemFAddr | ImplicitOps | Stack,	em_call_far),
3789
	I(SrcMem | NearBranch,			em_jmp_abs),
3790 3791
	I(SrcMemFAddr | ImplicitOps,		em_jmp_far),
	I(SrcMem | Stack,			em_push), D(Undefined),
3792 3793
};

3794
static const struct opcode group6[] = {
3795 3796
	DI(Prot,	sldt),
	DI(Prot,	str),
A
Avi Kivity 已提交
3797
	II(Prot | Priv | SrcMem16, em_lldt, lldt),
A
Avi Kivity 已提交
3798
	II(Prot | Priv | SrcMem16, em_ltr, ltr),
3799 3800 3801
	N, N, N, N,
};

3802
static const struct group_dual group7 = { {
3803 3804
	II(Mov | DstMem,			em_sgdt, sgdt),
	II(Mov | DstMem,			em_sidt, sidt),
3805 3806 3807 3808 3809
	II(SrcMem | Priv,			em_lgdt, lgdt),
	II(SrcMem | Priv,			em_lidt, lidt),
	II(SrcNone | DstMem | Mov,		em_smsw, smsw), N,
	II(SrcMem16 | Mov | Priv,		em_lmsw, lmsw),
	II(SrcMem | ByteOp | Priv | NoAccess,	em_invlpg, invlpg),
3810
}, {
3811
	EXT(0, group7_rm0),
3812
	EXT(0, group7_rm1),
3813
	N, EXT(0, group7_rm3),
3814 3815 3816
	II(SrcNone | DstMem | Mov,		em_smsw, smsw), N,
	II(SrcMem16 | Mov | Priv,		em_lmsw, lmsw),
	EXT(0, group7_rm7),
3817 3818
} };

3819
static const struct opcode group8[] = {
3820
	N, N, N, N,
3821 3822 3823 3824
	F(DstMem | SrcImmByte | NoWrite,		em_bt),
	F(DstMem | SrcImmByte | Lock | PageTable,	em_bts),
	F(DstMem | SrcImmByte | Lock,			em_btr),
	F(DstMem | SrcImmByte | Lock | PageTable,	em_btc),
3825 3826
};

3827
static const struct group_dual group9 = { {
3828
	N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N,
3829 3830 3831 3832
}, {
	N, N, N, N, N, N, N, N,
} };

3833
static const struct opcode group11[] = {
3834
	I(DstMem | SrcImm | Mov | PageTable, em_mov),
3835
	X7(D(Undefined)),
3836 3837
};

3838
static const struct gprefix pfx_0f_ae_7 = {
3839
	I(SrcMem | ByteOp, em_clflush), N, N, N,
3840 3841 3842 3843 3844 3845 3846 3847
};

static const struct group_dual group15 = { {
	N, N, N, N, N, N, N, GP(0, &pfx_0f_ae_7),
}, {
	N, N, N, N, N, N, N, N,
} };

3848
static const struct gprefix pfx_0f_6f_0f_7f = {
3849
	I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov),
3850 3851
};

3852 3853
static const struct gprefix pfx_0f_2b = {
	I(0, em_mov), I(0, em_mov), N, N,
3854 3855
};

3856
static const struct gprefix pfx_0f_28_0f_29 = {
3857
	I(Aligned, em_mov), I(Aligned, em_mov), N, N,
3858 3859
};

3860 3861 3862 3863
static const struct gprefix pfx_0f_e7 = {
	N, I(Sse, em_mov), N, N,
};

3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926
static const struct escape escape_d9 = { {
	N, N, N, N, N, N, N, I(DstMem, em_fnstcw),
}, {
	/* 0xC0 - 0xC7 */
	N, N, N, N, N, N, N, N,
	/* 0xC8 - 0xCF */
	N, N, N, N, N, N, N, N,
	/* 0xD0 - 0xC7 */
	N, N, N, N, N, N, N, N,
	/* 0xD8 - 0xDF */
	N, N, N, N, N, N, N, N,
	/* 0xE0 - 0xE7 */
	N, N, N, N, N, N, N, N,
	/* 0xE8 - 0xEF */
	N, N, N, N, N, N, N, N,
	/* 0xF0 - 0xF7 */
	N, N, N, N, N, N, N, N,
	/* 0xF8 - 0xFF */
	N, N, N, N, N, N, N, N,
} };

static const struct escape escape_db = { {
	N, N, N, N, N, N, N, N,
}, {
	/* 0xC0 - 0xC7 */
	N, N, N, N, N, N, N, N,
	/* 0xC8 - 0xCF */
	N, N, N, N, N, N, N, N,
	/* 0xD0 - 0xC7 */
	N, N, N, N, N, N, N, N,
	/* 0xD8 - 0xDF */
	N, N, N, N, N, N, N, N,
	/* 0xE0 - 0xE7 */
	N, N, N, I(ImplicitOps, em_fninit), N, N, N, N,
	/* 0xE8 - 0xEF */
	N, N, N, N, N, N, N, N,
	/* 0xF0 - 0xF7 */
	N, N, N, N, N, N, N, N,
	/* 0xF8 - 0xFF */
	N, N, N, N, N, N, N, N,
} };

static const struct escape escape_dd = { {
	N, N, N, N, N, N, N, I(DstMem, em_fnstsw),
}, {
	/* 0xC0 - 0xC7 */
	N, N, N, N, N, N, N, N,
	/* 0xC8 - 0xCF */
	N, N, N, N, N, N, N, N,
	/* 0xD0 - 0xC7 */
	N, N, N, N, N, N, N, N,
	/* 0xD8 - 0xDF */
	N, N, N, N, N, N, N, N,
	/* 0xE0 - 0xE7 */
	N, N, N, N, N, N, N, N,
	/* 0xE8 - 0xEF */
	N, N, N, N, N, N, N, N,
	/* 0xF0 - 0xF7 */
	N, N, N, N, N, N, N, N,
	/* 0xF8 - 0xFF */
	N, N, N, N, N, N, N, N,
} };

3927
static const struct opcode opcode_table[256] = {
3928
	/* 0x00 - 0x07 */
3929
	F6ALU(Lock, em_add),
3930 3931
	I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg),
	I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg),
3932
	/* 0x08 - 0x0F */
3933
	F6ALU(Lock | PageTable, em_or),
3934 3935
	I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg),
	N,
3936
	/* 0x10 - 0x17 */
3937
	F6ALU(Lock, em_adc),
3938 3939
	I(ImplicitOps | Stack | No64 | Src2SS, em_push_sreg),
	I(ImplicitOps | Stack | No64 | Src2SS, em_pop_sreg),
3940
	/* 0x18 - 0x1F */
3941
	F6ALU(Lock, em_sbb),
3942 3943
	I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg),
	I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg),
3944
	/* 0x20 - 0x27 */
3945
	F6ALU(Lock | PageTable, em_and), N, N,
3946
	/* 0x28 - 0x2F */
3947
	F6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das),
3948
	/* 0x30 - 0x37 */
3949
	F6ALU(Lock, em_xor), N, N,
3950
	/* 0x38 - 0x3F */
3951
	F6ALU(NoWrite, em_cmp), N, N,
3952
	/* 0x40 - 0x4F */
3953
	X8(F(DstReg, em_inc)), X8(F(DstReg, em_dec)),
3954
	/* 0x50 - 0x57 */
3955
	X8(I(SrcReg | Stack, em_push)),
3956
	/* 0x58 - 0x5F */
3957
	X8(I(DstReg | Stack, em_pop)),
3958
	/* 0x60 - 0x67 */
3959 3960
	I(ImplicitOps | Stack | No64, em_pusha),
	I(ImplicitOps | Stack | No64, em_popa),
3961 3962 3963
	N, D(DstReg | SrcMem32 | ModRM | Mov) /* movsxd (x86/64) */ ,
	N, N, N, N,
	/* 0x68 - 0x6F */
3964 3965
	I(SrcImm | Mov | Stack, em_push),
	I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op),
3966 3967
	I(SrcImmByte | Mov | Stack, em_push),
	I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op),
3968
	I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */
3969
	I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */
3970
	/* 0x70 - 0x7F */
3971
	X16(D(SrcImmByte | NearBranch)),
3972
	/* 0x80 - 0x87 */
3973 3974 3975 3976
	G(ByteOp | DstMem | SrcImm, group1),
	G(DstMem | SrcImm, group1),
	G(ByteOp | DstMem | SrcImm | No64, group1),
	G(DstMem | SrcImmByte, group1),
3977
	F2bv(DstMem | SrcReg | ModRM | NoWrite, em_test),
3978
	I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg),
3979
	/* 0x88 - 0x8F */
3980
	I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov),
3981
	I2bv(DstReg | SrcMem | ModRM | Mov, em_mov),
3982
	I(DstMem | SrcNone | ModRM | Mov | PageTable, em_mov_rm_sreg),
3983 3984 3985
	D(ModRM | SrcMem | NoAccess | DstReg),
	I(ImplicitOps | SrcMem16 | ModRM, em_mov_sreg_rm),
	G(0, group1A),
3986
	/* 0x90 - 0x97 */
3987
	DI(SrcAcc | DstReg, pause), X7(D(SrcAcc | DstReg)),
3988
	/* 0x98 - 0x9F */
3989
	D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd),
3990
	I(SrcImmFAddr | No64, em_call_far), N,
3991
	II(ImplicitOps | Stack, em_pushf, pushf),
P
Paolo Bonzini 已提交
3992 3993
	II(ImplicitOps | Stack, em_popf, popf),
	I(ImplicitOps, em_sahf), I(ImplicitOps, em_lahf),
3994
	/* 0xA0 - 0xA7 */
3995
	I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
3996
	I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov),
3997
	I2bv(SrcSI | DstDI | Mov | String, em_mov),
3998
	F2bv(SrcSI | DstDI | String | NoWrite, em_cmp_r),
3999
	/* 0xA8 - 0xAF */
4000
	F2bv(DstAcc | SrcImm | NoWrite, em_test),
4001 4002
	I2bv(SrcAcc | DstDI | Mov | String, em_mov),
	I2bv(SrcSI | DstAcc | Mov | String, em_mov),
4003
	F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp_r),
4004
	/* 0xB0 - 0xB7 */
4005
	X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)),
4006
	/* 0xB8 - 0xBF */
4007
	X8(I(DstReg | SrcImm64 | Mov, em_mov)),
4008
	/* 0xC0 - 0xC7 */
4009
	G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2),
4010 4011
	I(ImplicitOps | NearBranch | SrcImmU16, em_ret_near_imm),
	I(ImplicitOps | NearBranch, em_ret),
4012 4013
	I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg),
	I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg),
4014
	G(ByteOp, group11), G(0, group11),
4015
	/* 0xC8 - 0xCF */
A
Avi Kivity 已提交
4016
	I(Stack | SrcImmU16 | Src2ImmByte, em_enter), I(Stack, em_leave),
4017 4018
	I(ImplicitOps | Stack | SrcImmU16, em_ret_far_imm),
	I(ImplicitOps | Stack, em_ret_far),
4019
	D(ImplicitOps), DI(SrcImmByte, intn),
4020
	D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret),
4021
	/* 0xD0 - 0xD7 */
4022 4023
	G(Src2One | ByteOp, group2), G(Src2One, group2),
	G(Src2CL | ByteOp, group2), G(Src2CL, group2),
P
Paolo Bonzini 已提交
4024
	I(DstAcc | SrcImmUByte | No64, em_aam),
P
Paolo Bonzini 已提交
4025 4026
	I(DstAcc | SrcImmUByte | No64, em_aad),
	F(DstAcc | ByteOp | No64, em_salc),
P
Paolo Bonzini 已提交
4027
	I(DstAcc | SrcXLat | ByteOp, em_mov),
4028
	/* 0xD8 - 0xDF */
4029
	N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N,
4030
	/* 0xE0 - 0xE7 */
4031 4032
	X3(I(SrcImmByte | NearBranch, em_loop)),
	I(SrcImmByte | NearBranch, em_jcxz),
4033 4034
	I2bvIP(SrcImmUByte | DstAcc, em_in,  in,  check_perm_in),
	I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out),
4035
	/* 0xE8 - 0xEF */
4036 4037 4038
	I(SrcImm | NearBranch, em_call), D(SrcImm | ImplicitOps | NearBranch),
	I(SrcImmFAddr | No64, em_jmp_far),
	D(SrcImmByte | ImplicitOps | NearBranch),
4039 4040
	I2bvIP(SrcDX | DstAcc, em_in,  in,  check_perm_in),
	I2bvIP(SrcAcc | DstDX, em_out, out, check_perm_out),
4041
	/* 0xF0 - 0xF7 */
4042
	N, DI(ImplicitOps, icebp), N, N,
4043 4044
	DI(ImplicitOps | Priv, hlt), D(ImplicitOps),
	G(ByteOp, group3), G(0, group3),
4045
	/* 0xF8 - 0xFF */
4046 4047
	D(ImplicitOps), D(ImplicitOps),
	I(ImplicitOps, em_cli), I(ImplicitOps, em_sti),
4048 4049 4050
	D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5),
};

4051
static const struct opcode twobyte_table[256] = {
4052
	/* 0x00 - 0x0F */
4053
	G(0, group6), GD(0, &group7), N, N,
4054
	N, I(ImplicitOps | EmulateOnUD, em_syscall),
4055
	II(ImplicitOps | Priv, em_clts, clts), N,
4056
	DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
4057
	N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
4058
	/* 0x10 - 0x1F */
P
Paolo Bonzini 已提交
4059
	N, N, N, N, N, N, N, N,
4060 4061
	D(ImplicitOps | ModRM | SrcMem | NoAccess),
	N, N, N, N, N, N, D(ImplicitOps | ModRM | SrcMem | NoAccess),
4062
	/* 0x20 - 0x2F */
4063 4064 4065 4066 4067 4068
	DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_read),
	DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read),
	IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_cr_write, cr_write,
						check_cr_write),
	IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_dr_write, dr_write,
						check_dr_write),
4069
	N, N, N, N,
4070 4071
	GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_28_0f_29),
	GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_28_0f_29),
4072
	N, GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_2b),
4073
	N, N, N, N,
4074
	/* 0x30 - 0x3F */
4075
	II(ImplicitOps | Priv, em_wrmsr, wrmsr),
4076
	IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc),
4077
	II(ImplicitOps | Priv, em_rdmsr, rdmsr),
4078
	IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc),
4079 4080
	I(ImplicitOps | EmulateOnUD, em_sysenter),
	I(ImplicitOps | Priv | EmulateOnUD, em_sysexit),
4081
	N, N,
4082 4083
	N, N, N, N, N, N, N, N,
	/* 0x40 - 0x4F */
4084
	X16(D(DstReg | SrcMem | ModRM)),
4085 4086 4087
	/* 0x50 - 0x5F */
	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
	/* 0x60 - 0x6F */
4088 4089 4090 4091
	N, N, N, N,
	N, N, N, N,
	N, N, N, N,
	N, N, N, GP(SrcMem | DstReg | ModRM | Mov, &pfx_0f_6f_0f_7f),
4092
	/* 0x70 - 0x7F */
4093 4094 4095 4096
	N, N, N, N,
	N, N, N, N,
	N, N, N, N,
	N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f),
4097
	/* 0x80 - 0x8F */
4098
	X16(D(SrcImm | NearBranch)),
4099
	/* 0x90 - 0x9F */
4100
	X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
4101
	/* 0xA0 - 0xA7 */
4102
	I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg),
4103 4104
	II(ImplicitOps, em_cpuid, cpuid),
	F(DstMem | SrcReg | ModRM | BitOp | NoWrite, em_bt),
4105 4106
	F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shld),
	F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N,
4107
	/* 0xA8 - 0xAF */
4108
	I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg),
4109
	DI(ImplicitOps, rsm),
4110
	F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
4111 4112
	F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
	F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
4113
	GD(0, &group15), F(DstReg | SrcMem | ModRM, em_imul),
4114
	/* 0xB0 - 0xB7 */
4115
	I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg),
4116
	I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg),
4117
	F(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr),
4118 4119
	I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg),
	I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg),
4120
	D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
4121 4122
	/* 0xB8 - 0xBF */
	N, N,
4123
	G(BitOp, group8),
4124 4125
	F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc),
	F(DstReg | SrcMem | ModRM, em_bsf), F(DstReg | SrcMem | ModRM, em_bsr),
4126
	D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
A
Avi Kivity 已提交
4127
	/* 0xC0 - 0xC7 */
4128
	F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd),
4129
	N, I(DstMem | SrcReg | ModRM | No16 | Mov, em_mov),
4130
	N, N, N, GD(0, &group9),
A
Avi Kivity 已提交
4131 4132
	/* 0xC8 - 0xCF */
	X8(I(DstReg, em_bswap)),
4133 4134 4135
	/* 0xD0 - 0xDF */
	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
	/* 0xE0 - 0xEF */
4136 4137
	N, N, N, N, N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_e7),
	N, N, N, N, N, N, N, N,
4138 4139 4140 4141
	/* 0xF0 - 0xFF */
	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N
};

4142
static const struct gprefix three_byte_0f_38_f0 = {
B
Borislav Petkov 已提交
4143
	I(DstReg | SrcMem | Mov, em_movbe), N, N, N
4144 4145 4146
};

static const struct gprefix three_byte_0f_38_f1 = {
B
Borislav Petkov 已提交
4147
	I(DstMem | SrcReg | Mov, em_movbe), N, N, N
4148 4149 4150 4151 4152 4153 4154 4155 4156
};

/*
 * Insns below are selected by the prefix which indexed by the third opcode
 * byte.
 */
static const struct opcode opcode_map_0f_38[256] = {
	/* 0x00 - 0x7f */
	X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
B
Borislav Petkov 已提交
4157 4158 4159 4160 4161 4162 4163
	/* 0x80 - 0xef */
	X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
	/* 0xf0 - 0xf1 */
	GP(EmulateOnUD | ModRM | Prefix, &three_byte_0f_38_f0),
	GP(EmulateOnUD | ModRM | Prefix, &three_byte_0f_38_f1),
	/* 0xf2 - 0xff */
	N, N, X4(N), X8(N)
4164 4165
};

4166 4167 4168 4169 4170
#undef D
#undef N
#undef G
#undef GD
#undef I
4171
#undef GP
4172
#undef EXT
4173

4174
#undef D2bv
4175
#undef D2bvIP
4176
#undef I2bv
4177
#undef I2bvIP
4178
#undef I6ALU
4179

4180
static unsigned imm_size(struct x86_emulate_ctxt *ctxt)
4181 4182 4183
{
	unsigned size;

4184
	size = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196
	if (size == 8)
		size = 4;
	return size;
}

static int decode_imm(struct x86_emulate_ctxt *ctxt, struct operand *op,
		      unsigned size, bool sign_extension)
{
	int rc = X86EMUL_CONTINUE;

	op->type = OP_IMM;
	op->bytes = size;
4197
	op->addr.mem.ea = ctxt->_eip;
4198 4199 4200
	/* NB. Immediates are sign-extended as necessary. */
	switch (op->bytes) {
	case 1:
4201
		op->val = insn_fetch(s8, ctxt);
4202 4203
		break;
	case 2:
4204
		op->val = insn_fetch(s16, ctxt);
4205 4206
		break;
	case 4:
4207
		op->val = insn_fetch(s32, ctxt);
4208
		break;
4209 4210 4211
	case 8:
		op->val = insn_fetch(s64, ctxt);
		break;
4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229
	}
	if (!sign_extension) {
		switch (op->bytes) {
		case 1:
			op->val &= 0xff;
			break;
		case 2:
			op->val &= 0xffff;
			break;
		case 4:
			op->val &= 0xffffffff;
			break;
		}
	}
done:
	return rc;
}

4230 4231 4232 4233 4234 4235 4236
static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
			  unsigned d)
{
	int rc = X86EMUL_CONTINUE;

	switch (d) {
	case OpReg:
4237
		decode_register_operand(ctxt, op);
4238 4239
		break;
	case OpImmUByte:
4240
		rc = decode_imm(ctxt, op, 1, false);
4241 4242
		break;
	case OpMem:
4243
		ctxt->memop.bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4244 4245 4246
	mem_common:
		*op = ctxt->memop;
		ctxt->memopp = op;
4247
		if (ctxt->d & BitOp)
4248 4249 4250
			fetch_bit_operand(ctxt);
		op->orig_val = op->val;
		break;
4251
	case OpMem64:
4252
		ctxt->memop.bytes = (ctxt->op_bytes == 8) ? 16 : 8;
4253
		goto mem_common;
4254 4255 4256
	case OpAcc:
		op->type = OP_REG;
		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4257
		op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
4258 4259 4260
		fetch_register_operand(op);
		op->orig_val = op->val;
		break;
4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278
	case OpAccLo:
		op->type = OP_REG;
		op->bytes = (ctxt->d & ByteOp) ? 2 : ctxt->op_bytes;
		op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
		fetch_register_operand(op);
		op->orig_val = op->val;
		break;
	case OpAccHi:
		if (ctxt->d & ByteOp) {
			op->type = OP_NONE;
			break;
		}
		op->type = OP_REG;
		op->bytes = ctxt->op_bytes;
		op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
		fetch_register_operand(op);
		op->orig_val = op->val;
		break;
4279 4280 4281 4282
	case OpDI:
		op->type = OP_MEM;
		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
		op->addr.mem.ea =
4283
			register_address(ctxt, reg_read(ctxt, VCPU_REGS_RDI));
4284 4285
		op->addr.mem.seg = VCPU_SREG_ES;
		op->val = 0;
4286
		op->count = 1;
4287 4288 4289 4290
		break;
	case OpDX:
		op->type = OP_REG;
		op->bytes = 2;
4291
		op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
4292 4293
		fetch_register_operand(op);
		break;
4294 4295
	case OpCL:
		op->bytes = 1;
4296
		op->val = reg_read(ctxt, VCPU_REGS_RCX) & 0xff;
4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307
		break;
	case OpImmByte:
		rc = decode_imm(ctxt, op, 1, true);
		break;
	case OpOne:
		op->bytes = 1;
		op->val = 1;
		break;
	case OpImm:
		rc = decode_imm(ctxt, op, imm_size(ctxt), true);
		break;
4308 4309 4310
	case OpImm64:
		rc = decode_imm(ctxt, op, ctxt->op_bytes, true);
		break;
4311 4312
	case OpMem8:
		ctxt->memop.bytes = 1;
4313
		if (ctxt->memop.type == OP_REG) {
4314 4315
			ctxt->memop.addr.reg = decode_register(ctxt,
					ctxt->modrm_rm, true);
4316 4317
			fetch_register_operand(&ctxt->memop);
		}
4318
		goto mem_common;
4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334
	case OpMem16:
		ctxt->memop.bytes = 2;
		goto mem_common;
	case OpMem32:
		ctxt->memop.bytes = 4;
		goto mem_common;
	case OpImmU16:
		rc = decode_imm(ctxt, op, 2, false);
		break;
	case OpImmU:
		rc = decode_imm(ctxt, op, imm_size(ctxt), false);
		break;
	case OpSI:
		op->type = OP_MEM;
		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
		op->addr.mem.ea =
4335
			register_address(ctxt, reg_read(ctxt, VCPU_REGS_RSI));
B
Bandan Das 已提交
4336
		op->addr.mem.seg = ctxt->seg_override;
4337
		op->val = 0;
4338
		op->count = 1;
4339
		break;
P
Paolo Bonzini 已提交
4340 4341 4342 4343 4344 4345 4346
	case OpXLat:
		op->type = OP_MEM;
		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
		op->addr.mem.ea =
			register_address(ctxt,
				reg_read(ctxt, VCPU_REGS_RBX) +
				(reg_read(ctxt, VCPU_REGS_RAX) & 0xff));
B
Bandan Das 已提交
4347
		op->addr.mem.seg = ctxt->seg_override;
P
Paolo Bonzini 已提交
4348 4349
		op->val = 0;
		break;
4350 4351 4352 4353 4354 4355 4356 4357 4358
	case OpImmFAddr:
		op->type = OP_IMM;
		op->addr.mem.ea = ctxt->_eip;
		op->bytes = ctxt->op_bytes + 2;
		insn_fetch_arr(op->valptr, op->bytes, ctxt);
		break;
	case OpMemFAddr:
		ctxt->memop.bytes = ctxt->op_bytes + 2;
		goto mem_common;
4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376
	case OpES:
		op->val = VCPU_SREG_ES;
		break;
	case OpCS:
		op->val = VCPU_SREG_CS;
		break;
	case OpSS:
		op->val = VCPU_SREG_SS;
		break;
	case OpDS:
		op->val = VCPU_SREG_DS;
		break;
	case OpFS:
		op->val = VCPU_SREG_FS;
		break;
	case OpGS:
		op->val = VCPU_SREG_GS;
		break;
4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387
	case OpImplicit:
		/* Special instructions do their own operand decoding. */
	default:
		op->type = OP_NONE; /* Disable writeback. */
		break;
	}

done:
	return rc;
}

4388
int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
4389 4390 4391
{
	int rc = X86EMUL_CONTINUE;
	int mode = ctxt->mode;
4392
	int def_op_bytes, def_ad_bytes, goffset, simd_prefix;
4393
	bool op_prefix = false;
B
Bandan Das 已提交
4394
	bool has_seg_override = false;
4395
	struct opcode opcode;
4396

4397 4398
	ctxt->memop.type = OP_NONE;
	ctxt->memopp = NULL;
4399
	ctxt->_eip = ctxt->eip;
4400 4401
	ctxt->fetch.ptr = ctxt->fetch.data;
	ctxt->fetch.end = ctxt->fetch.data + insn_len;
B
Borislav Petkov 已提交
4402
	ctxt->opcode_len = 1;
4403
	if (insn_len > 0)
4404
		memcpy(ctxt->fetch.data, insn, insn_len);
4405
	else {
4406
		rc = __do_insn_fetch_bytes(ctxt, 1);
4407 4408 4409
		if (rc != X86EMUL_CONTINUE)
			return rc;
	}
4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426

	switch (mode) {
	case X86EMUL_MODE_REAL:
	case X86EMUL_MODE_VM86:
	case X86EMUL_MODE_PROT16:
		def_op_bytes = def_ad_bytes = 2;
		break;
	case X86EMUL_MODE_PROT32:
		def_op_bytes = def_ad_bytes = 4;
		break;
#ifdef CONFIG_X86_64
	case X86EMUL_MODE_PROT64:
		def_op_bytes = 4;
		def_ad_bytes = 8;
		break;
#endif
	default:
4427
		return EMULATION_FAILED;
4428 4429
	}

4430 4431
	ctxt->op_bytes = def_op_bytes;
	ctxt->ad_bytes = def_ad_bytes;
4432 4433 4434

	/* Legacy prefixes. */
	for (;;) {
4435
		switch (ctxt->b = insn_fetch(u8, ctxt)) {
4436
		case 0x66:	/* operand-size override */
4437
			op_prefix = true;
4438
			/* switch between 2/4 bytes */
4439
			ctxt->op_bytes = def_op_bytes ^ 6;
4440 4441 4442 4443
			break;
		case 0x67:	/* address-size override */
			if (mode == X86EMUL_MODE_PROT64)
				/* switch between 4/8 bytes */
4444
				ctxt->ad_bytes = def_ad_bytes ^ 12;
4445 4446
			else
				/* switch between 2/4 bytes */
4447
				ctxt->ad_bytes = def_ad_bytes ^ 6;
4448 4449 4450 4451 4452
			break;
		case 0x26:	/* ES override */
		case 0x2e:	/* CS override */
		case 0x36:	/* SS override */
		case 0x3e:	/* DS override */
B
Bandan Das 已提交
4453 4454
			has_seg_override = true;
			ctxt->seg_override = (ctxt->b >> 3) & 3;
4455 4456 4457
			break;
		case 0x64:	/* FS override */
		case 0x65:	/* GS override */
B
Bandan Das 已提交
4458 4459
			has_seg_override = true;
			ctxt->seg_override = ctxt->b & 7;
4460 4461 4462 4463
			break;
		case 0x40 ... 0x4f: /* REX */
			if (mode != X86EMUL_MODE_PROT64)
				goto done_prefixes;
4464
			ctxt->rex_prefix = ctxt->b;
4465 4466
			continue;
		case 0xf0:	/* LOCK */
4467
			ctxt->lock_prefix = 1;
4468 4469 4470
			break;
		case 0xf2:	/* REPNE/REPNZ */
		case 0xf3:	/* REP/REPE/REPZ */
4471
			ctxt->rep_prefix = ctxt->b;
4472 4473 4474 4475 4476 4477 4478
			break;
		default:
			goto done_prefixes;
		}

		/* Any legacy prefix after a REX prefix nullifies its effect. */

4479
		ctxt->rex_prefix = 0;
4480 4481 4482 4483 4484
	}

done_prefixes:

	/* REX prefix. */
4485 4486
	if (ctxt->rex_prefix & 8)
		ctxt->op_bytes = 8;	/* REX.W */
4487 4488

	/* Opcode byte(s). */
4489
	opcode = opcode_table[ctxt->b];
4490
	/* Two-byte opcode? */
4491
	if (ctxt->b == 0x0f) {
B
Borislav Petkov 已提交
4492
		ctxt->opcode_len = 2;
4493
		ctxt->b = insn_fetch(u8, ctxt);
4494
		opcode = twobyte_table[ctxt->b];
4495 4496 4497 4498 4499 4500 4501

		/* 0F_38 opcode map */
		if (ctxt->b == 0x38) {
			ctxt->opcode_len = 3;
			ctxt->b = insn_fetch(u8, ctxt);
			opcode = opcode_map_0f_38[ctxt->b];
		}
4502
	}
4503
	ctxt->d = opcode.flags;
4504

4505 4506 4507
	if (ctxt->d & ModRM)
		ctxt->modrm = insn_fetch(u8, ctxt);

4508 4509
	/* vex-prefix instructions are not implemented */
	if (ctxt->opcode_len == 1 && (ctxt->b == 0xc5 || ctxt->b == 0xc4) &&
4510
	    (mode == X86EMUL_MODE_PROT64 || (ctxt->modrm & 0xc0) == 0xc0)) {
4511 4512 4513
		ctxt->d = NotImpl;
	}

4514 4515
	while (ctxt->d & GroupMask) {
		switch (ctxt->d & GroupMask) {
4516
		case Group:
4517
			goffset = (ctxt->modrm >> 3) & 7;
4518 4519 4520
			opcode = opcode.u.group[goffset];
			break;
		case GroupDual:
4521 4522
			goffset = (ctxt->modrm >> 3) & 7;
			if ((ctxt->modrm >> 6) == 3)
4523 4524 4525 4526 4527
				opcode = opcode.u.gdual->mod3[goffset];
			else
				opcode = opcode.u.gdual->mod012[goffset];
			break;
		case RMExt:
4528
			goffset = ctxt->modrm & 7;
4529
			opcode = opcode.u.group[goffset];
4530 4531
			break;
		case Prefix:
4532
			if (ctxt->rep_prefix && op_prefix)
4533
				return EMULATION_FAILED;
4534
			simd_prefix = op_prefix ? 0x66 : ctxt->rep_prefix;
4535 4536 4537 4538 4539 4540 4541
			switch (simd_prefix) {
			case 0x00: opcode = opcode.u.gprefix->pfx_no; break;
			case 0x66: opcode = opcode.u.gprefix->pfx_66; break;
			case 0xf2: opcode = opcode.u.gprefix->pfx_f2; break;
			case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break;
			}
			break;
4542 4543 4544 4545 4546 4547
		case Escape:
			if (ctxt->modrm > 0xbf)
				opcode = opcode.u.esc->high[ctxt->modrm - 0xc0];
			else
				opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7];
			break;
4548
		default:
4549
			return EMULATION_FAILED;
4550
		}
4551

4552
		ctxt->d &= ~(u64)GroupMask;
4553
		ctxt->d |= opcode.flags;
4554 4555
	}

4556 4557 4558 4559
	/* Unrecognised? */
	if (ctxt->d == 0)
		return EMULATION_FAILED;

4560
	ctxt->execute = opcode.u.execute;
4561

4562 4563 4564
	if (unlikely(ctxt->ud) && likely(!(ctxt->d & EmulateOnUD)))
		return EMULATION_FAILED;

4565
	if (unlikely(ctxt->d &
4566 4567
	    (NotImpl|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm|NearBranch|
	     No16))) {
4568 4569 4570 4571 4572 4573
		/*
		 * These are copied unconditionally here, and checked unconditionally
		 * in x86_emulate_insn.
		 */
		ctxt->check_perm = opcode.check_perm;
		ctxt->intercept = opcode.intercept;
4574

4575 4576
		if (ctxt->d & NotImpl)
			return EMULATION_FAILED;
4577

4578 4579 4580 4581 4582 4583
		if (mode == X86EMUL_MODE_PROT64) {
			if (ctxt->op_bytes == 4 && (ctxt->d & Stack))
				ctxt->op_bytes = 8;
			else if (ctxt->d & NearBranch)
				ctxt->op_bytes = 8;
		}
4584

4585 4586 4587 4588 4589 4590 4591
		if (ctxt->d & Op3264) {
			if (mode == X86EMUL_MODE_PROT64)
				ctxt->op_bytes = 8;
			else
				ctxt->op_bytes = 4;
		}

4592 4593 4594
		if ((ctxt->d & No16) && ctxt->op_bytes == 2)
			ctxt->op_bytes = 4;

4595 4596 4597 4598 4599
		if (ctxt->d & Sse)
			ctxt->op_bytes = 16;
		else if (ctxt->d & Mmx)
			ctxt->op_bytes = 8;
	}
A
Avi Kivity 已提交
4600

4601
	/* ModRM and SIB bytes. */
4602
	if (ctxt->d & ModRM) {
4603
		rc = decode_modrm(ctxt, &ctxt->memop);
B
Bandan Das 已提交
4604 4605 4606 4607
		if (!has_seg_override) {
			has_seg_override = true;
			ctxt->seg_override = ctxt->modrm_seg;
		}
4608
	} else if (ctxt->d & MemAbs)
4609
		rc = decode_abs(ctxt, &ctxt->memop);
4610 4611 4612
	if (rc != X86EMUL_CONTINUE)
		goto done;

B
Bandan Das 已提交
4613 4614
	if (!has_seg_override)
		ctxt->seg_override = VCPU_SREG_DS;
4615

B
Bandan Das 已提交
4616
	ctxt->memop.addr.mem.seg = ctxt->seg_override;
4617 4618 4619 4620 4621

	/*
	 * Decode and fetch the source operand: register, memory
	 * or immediate.
	 */
4622
	rc = decode_operand(ctxt, &ctxt->src, (ctxt->d >> SrcShift) & OpMask);
4623 4624 4625
	if (rc != X86EMUL_CONTINUE)
		goto done;

4626 4627 4628 4629
	/*
	 * Decode and fetch the second source operand: register, memory
	 * or immediate.
	 */
4630
	rc = decode_operand(ctxt, &ctxt->src2, (ctxt->d >> Src2Shift) & OpMask);
4631 4632 4633
	if (rc != X86EMUL_CONTINUE)
		goto done;

4634
	/* Decode and fetch the destination operand: register or memory. */
4635
	rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
4636

4637
	if (ctxt->rip_relative)
4638 4639
		ctxt->memopp->addr.mem.ea = address_mask(ctxt,
					ctxt->memopp->addr.mem.ea + ctxt->_eip);
4640

4641
done:
4642
	return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
4643 4644
}

4645 4646 4647 4648 4649
bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt)
{
	return ctxt->d & PageTable;
}

4650 4651 4652 4653 4654 4655 4656 4657 4658
static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
{
	/* The second termination condition only applies for REPE
	 * and REPNE. Test if the repeat string operation prefix is
	 * REPE/REPZ or REPNE/REPNZ and if it's the case it tests the
	 * corresponding termination condition according to:
	 * 	- if REPE/REPZ and ZF = 0 then done
	 * 	- if REPNE/REPNZ and ZF = 1 then done
	 */
4659 4660 4661
	if (((ctxt->b == 0xa6) || (ctxt->b == 0xa7) ||
	     (ctxt->b == 0xae) || (ctxt->b == 0xaf))
	    && (((ctxt->rep_prefix == REPE_PREFIX) &&
4662
		 ((ctxt->eflags & EFLG_ZF) == 0))
4663
		|| ((ctxt->rep_prefix == REPNE_PREFIX) &&
4664 4665 4666 4667 4668 4669
		    ((ctxt->eflags & EFLG_ZF) == EFLG_ZF))))
		return true;

	return false;
}

A
Avi Kivity 已提交
4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682
static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt)
{
	bool fault = false;

	ctxt->ops->get_fpu(ctxt);
	asm volatile("1: fwait \n\t"
		     "2: \n\t"
		     ".pushsection .fixup,\"ax\" \n\t"
		     "3: \n\t"
		     "movb $1, %[fault] \n\t"
		     "jmp 2b \n\t"
		     ".popsection \n\t"
		     _ASM_EXTABLE(1b, 3b)
4683
		     : [fault]"+qm"(fault));
A
Avi Kivity 已提交
4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698
	ctxt->ops->put_fpu(ctxt);

	if (unlikely(fault))
		return emulate_exception(ctxt, MF_VECTOR, 0, false);

	return X86EMUL_CONTINUE;
}

static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,
				       struct operand *op)
{
	if (op->type == OP_MM)
		read_mmx_reg(ctxt, &op->mm_val, op->addr.mm);
}

4699 4700 4701
static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
{
	ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
4702 4703
	if (!(ctxt->d & ByteOp))
		fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
4704
	asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n"
4705 4706 4707
	    : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
	      [fastop]"+S"(fop)
	    : "c"(ctxt->src2.val));
4708
	ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
4709 4710
	if (!fop) /* exception is returned in fop variable */
		return emulate_de(ctxt);
4711 4712
	return X86EMUL_CONTINUE;
}
4713

4714 4715
void init_decode_cache(struct x86_emulate_ctxt *ctxt)
{
B
Bandan Das 已提交
4716 4717
	memset(&ctxt->rip_relative, 0,
	       (void *)&ctxt->modrm - (void *)&ctxt->rip_relative);
4718 4719 4720 4721 4722 4723

	ctxt->io_read.pos = 0;
	ctxt->io_read.end = 0;
	ctxt->mem_read.end = 0;
}

4724
int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
4725
{
4726
	const struct x86_emulate_ops *ops = ctxt->ops;
4727
	int rc = X86EMUL_CONTINUE;
4728
	int saved_dst_type = ctxt->dst.type;
4729

4730
	ctxt->mem_read.pos = 0;
4731

4732 4733
	/* LOCK prefix is allowed only with some instructions */
	if (ctxt->lock_prefix && (!(ctxt->d & Lock) || ctxt->dst.type != OP_MEM)) {
4734
		rc = emulate_ud(ctxt);
4735 4736 4737
		goto done;
	}

4738
	if ((ctxt->d & SrcMask) == SrcMemFAddr && ctxt->src.type != OP_MEM) {
4739
		rc = emulate_ud(ctxt);
4740 4741 4742
		goto done;
	}

4743 4744 4745 4746 4747 4748 4749
	if (unlikely(ctxt->d &
		     (No64|Undefined|Sse|Mmx|Intercept|CheckPerm|Priv|Prot|String))) {
		if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) ||
				(ctxt->d & Undefined)) {
			rc = emulate_ud(ctxt);
			goto done;
		}
A
Avi Kivity 已提交
4750

4751 4752 4753
		if (((ctxt->d & (Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM)))
		    || ((ctxt->d & Sse) && !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) {
			rc = emulate_ud(ctxt);
A
Avi Kivity 已提交
4754
			goto done;
4755
		}
A
Avi Kivity 已提交
4756

4757 4758
		if ((ctxt->d & (Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) {
			rc = emulate_nm(ctxt);
4759
			goto done;
4760
		}
4761

4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774
		if (ctxt->d & Mmx) {
			rc = flush_pending_x87_faults(ctxt);
			if (rc != X86EMUL_CONTINUE)
				goto done;
			/*
			 * Now that we know the fpu is exception safe, we can fetch
			 * operands from it.
			 */
			fetch_possible_mmx_operand(ctxt, &ctxt->src);
			fetch_possible_mmx_operand(ctxt, &ctxt->src2);
			if (!(ctxt->d & Mov))
				fetch_possible_mmx_operand(ctxt, &ctxt->dst);
		}
4775

4776
		if (unlikely(ctxt->guest_mode) && (ctxt->d & Intercept)) {
4777 4778 4779 4780 4781
			rc = emulator_check_intercept(ctxt, ctxt->intercept,
						      X86_ICPT_PRE_EXCEPT);
			if (rc != X86EMUL_CONTINUE)
				goto done;
		}
4782

4783 4784
		/* Privileged instruction can be executed only in CPL=0 */
		if ((ctxt->d & Priv) && ops->cpl(ctxt)) {
4785 4786 4787 4788
			if (ctxt->d & PrivUD)
				rc = emulate_ud(ctxt);
			else
				rc = emulate_gp(ctxt, 0);
4789
			goto done;
4790
		}
4791

4792 4793 4794
		/* Instruction can only be executed in protected mode */
		if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
			rc = emulate_ud(ctxt);
4795
			goto done;
4796
		}
4797

4798
		/* Do instruction specific permission checks */
4799
		if (ctxt->d & CheckPerm) {
4800 4801 4802 4803 4804
			rc = ctxt->check_perm(ctxt);
			if (rc != X86EMUL_CONTINUE)
				goto done;
		}

4805
		if (unlikely(ctxt->guest_mode) && (ctxt->d & Intercept)) {
4806 4807 4808 4809 4810 4811 4812 4813 4814 4815
			rc = emulator_check_intercept(ctxt, ctxt->intercept,
						      X86_ICPT_POST_EXCEPT);
			if (rc != X86EMUL_CONTINUE)
				goto done;
		}

		if (ctxt->rep_prefix && (ctxt->d & String)) {
			/* All REP prefixes have the same first termination condition */
			if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) {
				ctxt->eip = ctxt->_eip;
4816
				ctxt->eflags &= ~EFLG_RF;
4817 4818
				goto done;
			}
4819 4820 4821
		}
	}

4822 4823 4824
	if ((ctxt->src.type == OP_MEM) && !(ctxt->d & NoAccess)) {
		rc = segmented_read(ctxt, ctxt->src.addr.mem,
				    ctxt->src.valptr, ctxt->src.bytes);
4825
		if (rc != X86EMUL_CONTINUE)
4826
			goto done;
4827
		ctxt->src.orig_val64 = ctxt->src.val64;
4828 4829
	}

4830 4831 4832
	if (ctxt->src2.type == OP_MEM) {
		rc = segmented_read(ctxt, ctxt->src2.addr.mem,
				    &ctxt->src2.val, ctxt->src2.bytes);
4833 4834 4835 4836
		if (rc != X86EMUL_CONTINUE)
			goto done;
	}

4837
	if ((ctxt->d & DstMask) == ImplicitOps)
4838 4839 4840
		goto special_insn;


4841
	if ((ctxt->dst.type == OP_MEM) && !(ctxt->d & Mov)) {
4842
		/* optimisation - avoid slow emulated read if Mov */
4843 4844
		rc = segmented_read(ctxt, ctxt->dst.addr.mem,
				   &ctxt->dst.val, ctxt->dst.bytes);
4845 4846
		if (rc != X86EMUL_CONTINUE)
			goto done;
4847
	}
4848
	ctxt->dst.orig_val = ctxt->dst.val;
4849

4850 4851
special_insn:

4852
	if (unlikely(ctxt->guest_mode) && (ctxt->d & Intercept)) {
4853
		rc = emulator_check_intercept(ctxt, ctxt->intercept,
4854
					      X86_ICPT_POST_MEMACCESS);
4855 4856 4857 4858
		if (rc != X86EMUL_CONTINUE)
			goto done;
	}

4859 4860 4861 4862
	if (ctxt->rep_prefix && (ctxt->d & String))
		ctxt->eflags |= EFLG_RF;
	else
		ctxt->eflags &= ~EFLG_RF;
4863

4864
	if (ctxt->execute) {
4865 4866 4867 4868 4869 4870 4871
		if (ctxt->d & Fastop) {
			void (*fop)(struct fastop *) = (void *)ctxt->execute;
			rc = fastop(ctxt, fop);
			if (rc != X86EMUL_CONTINUE)
				goto done;
			goto writeback;
		}
4872
		rc = ctxt->execute(ctxt);
4873 4874 4875 4876 4877
		if (rc != X86EMUL_CONTINUE)
			goto done;
		goto writeback;
	}

B
Borislav Petkov 已提交
4878
	if (ctxt->opcode_len == 2)
A
Avi Kivity 已提交
4879
		goto twobyte_insn;
4880 4881
	else if (ctxt->opcode_len == 3)
		goto threebyte_insn;
A
Avi Kivity 已提交
4882

4883
	switch (ctxt->b) {
A
Avi Kivity 已提交
4884
	case 0x63:		/* movsxd */
4885
		if (ctxt->mode != X86EMUL_MODE_PROT64)
A
Avi Kivity 已提交
4886
			goto cannot_emulate;
4887
		ctxt->dst.val = (s32) ctxt->src.val;
A
Avi Kivity 已提交
4888
		break;
4889
	case 0x70 ... 0x7f: /* jcc (short) */
4890
		if (test_cc(ctxt->b, ctxt->eflags))
4891
			rc = jmp_rel(ctxt, ctxt->src.val);
4892
		break;
N
Nitin A Kamble 已提交
4893
	case 0x8d: /* lea r16/r32, m */
4894
		ctxt->dst.val = ctxt->src.addr.mem.ea;
N
Nitin A Kamble 已提交
4895
		break;
4896
	case 0x90 ... 0x97: /* nop / xchg reg, rax */
4897
		if (ctxt->dst.addr.reg == reg_rmw(ctxt, VCPU_REGS_RAX))
4898 4899 4900
			ctxt->dst.type = OP_NONE;
		else
			rc = em_xchg(ctxt);
4901
		break;
4902
	case 0x98: /* cbw/cwde/cdqe */
4903 4904 4905 4906
		switch (ctxt->op_bytes) {
		case 2: ctxt->dst.val = (s8)ctxt->dst.val; break;
		case 4: ctxt->dst.val = (s16)ctxt->dst.val; break;
		case 8: ctxt->dst.val = (s32)ctxt->dst.val; break;
4907 4908
		}
		break;
4909
	case 0xcc:		/* int3 */
4910 4911
		rc = emulate_int(ctxt, 3);
		break;
4912
	case 0xcd:		/* int n */
4913
		rc = emulate_int(ctxt, ctxt->src.val);
4914 4915
		break;
	case 0xce:		/* into */
4916 4917
		if (ctxt->eflags & EFLG_OF)
			rc = emulate_int(ctxt, 4);
4918
		break;
4919
	case 0xe9: /* jmp rel */
4920
	case 0xeb: /* jmp rel short */
4921
		rc = jmp_rel(ctxt, ctxt->src.val);
4922
		ctxt->dst.type = OP_NONE; /* Disable writeback. */
4923
		break;
4924
	case 0xf4:              /* hlt */
4925
		ctxt->ops->halt(ctxt);
4926
		break;
4927 4928 4929 4930 4931 4932 4933
	case 0xf5:	/* cmc */
		/* complement carry flag from eflags reg */
		ctxt->eflags ^= EFLG_CF;
		break;
	case 0xf8: /* clc */
		ctxt->eflags &= ~EFLG_CF;
		break;
4934 4935 4936
	case 0xf9: /* stc */
		ctxt->eflags |= EFLG_CF;
		break;
4937 4938 4939 4940 4941 4942
	case 0xfc: /* cld */
		ctxt->eflags &= ~EFLG_DF;
		break;
	case 0xfd: /* std */
		ctxt->eflags |= EFLG_DF;
		break;
4943 4944
	default:
		goto cannot_emulate;
A
Avi Kivity 已提交
4945
	}
4946

4947 4948 4949
	if (rc != X86EMUL_CONTINUE)
		goto done;

4950
writeback:
4951 4952 4953 4954 4955 4956
	if (ctxt->d & SrcWrite) {
		BUG_ON(ctxt->src.type == OP_MEM || ctxt->src.type == OP_MEM_STR);
		rc = writeback(ctxt, &ctxt->src);
		if (rc != X86EMUL_CONTINUE)
			goto done;
	}
4957 4958 4959 4960 4961
	if (!(ctxt->d & NoWrite)) {
		rc = writeback(ctxt, &ctxt->dst);
		if (rc != X86EMUL_CONTINUE)
			goto done;
	}
4962

4963 4964 4965 4966
	/*
	 * restore dst type in case the decoding will be reused
	 * (happens for string instruction )
	 */
4967
	ctxt->dst.type = saved_dst_type;
4968

4969
	if ((ctxt->d & SrcMask) == SrcSI)
4970
		string_addr_inc(ctxt, VCPU_REGS_RSI, &ctxt->src);
4971

4972
	if ((ctxt->d & DstMask) == DstDI)
4973
		string_addr_inc(ctxt, VCPU_REGS_RDI, &ctxt->dst);
4974

4975
	if (ctxt->rep_prefix && (ctxt->d & String)) {
4976
		unsigned int count;
4977
		struct read_cache *r = &ctxt->io_read;
4978 4979 4980 4981 4982 4983
		if ((ctxt->d & SrcMask) == SrcSI)
			count = ctxt->src.count;
		else
			count = ctxt->dst.count;
		register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX),
				-count);
4984

4985 4986 4987 4988 4989
		if (!string_insn_completed(ctxt)) {
			/*
			 * Re-enter guest when pio read ahead buffer is empty
			 * or, if it is not used, after each 1024 iteration.
			 */
4990
			if ((r->end != 0 || reg_read(ctxt, VCPU_REGS_RCX) & 0x3ff) &&
4991 4992 4993 4994 4995 4996
			    (r->end == 0 || r->end != r->pos)) {
				/*
				 * Reset read cache. Usually happens before
				 * decode, but since instruction is restarted
				 * we have to do it here.
				 */
4997
				ctxt->mem_read.end = 0;
4998
				writeback_registers(ctxt);
4999 5000 5001
				return EMULATION_RESTART;
			}
			goto done; /* skip rip writeback */
5002
		}
5003
		ctxt->eflags &= ~EFLG_RF;
5004
	}
5005

5006
	ctxt->eip = ctxt->_eip;
5007 5008

done:
5009 5010
	if (rc == X86EMUL_PROPAGATE_FAULT) {
		WARN_ON(ctxt->exception.vector > 0x1f);
5011
		ctxt->have_exception = true;
5012
	}
5013 5014 5015
	if (rc == X86EMUL_INTERCEPTED)
		return EMULATION_INTERCEPTED;

5016 5017 5018
	if (rc == X86EMUL_CONTINUE)
		writeback_registers(ctxt);

5019
	return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
A
Avi Kivity 已提交
5020 5021

twobyte_insn:
5022
	switch (ctxt->b) {
5023
	case 0x09:		/* wbinvd */
5024
		(ctxt->ops->wbinvd)(ctxt);
5025 5026
		break;
	case 0x08:		/* invd */
5027 5028
	case 0x0d:		/* GrpP (prefetch) */
	case 0x18:		/* Grp16 (prefetch/nop) */
P
Paolo Bonzini 已提交
5029
	case 0x1f:		/* nop */
5030 5031
		break;
	case 0x20: /* mov cr, reg */
5032
		ctxt->dst.val = ops->get_cr(ctxt, ctxt->modrm_reg);
5033
		break;
A
Avi Kivity 已提交
5034
	case 0x21: /* mov from dr to reg */
5035
		ops->get_dr(ctxt, ctxt->modrm_reg, &ctxt->dst.val);
A
Avi Kivity 已提交
5036 5037
		break;
	case 0x40 ... 0x4f:	/* cmov */
5038 5039 5040 5041
		if (test_cc(ctxt->b, ctxt->eflags))
			ctxt->dst.val = ctxt->src.val;
		else if (ctxt->mode != X86EMUL_MODE_PROT64 ||
			 ctxt->op_bytes != 4)
5042
			ctxt->dst.type = OP_NONE; /* no writeback */
A
Avi Kivity 已提交
5043
		break;
5044
	case 0x80 ... 0x8f: /* jnz rel, etc*/
5045
		if (test_cc(ctxt->b, ctxt->eflags))
5046
			rc = jmp_rel(ctxt, ctxt->src.val);
5047
		break;
5048
	case 0x90 ... 0x9f:     /* setcc r/m8 */
5049
		ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);
5050
		break;
A
Avi Kivity 已提交
5051
	case 0xb6 ... 0xb7:	/* movzx */
5052
		ctxt->dst.bytes = ctxt->op_bytes;
5053
		ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val
5054
						       : (u16) ctxt->src.val;
A
Avi Kivity 已提交
5055 5056
		break;
	case 0xbe ... 0xbf:	/* movsx */
5057
		ctxt->dst.bytes = ctxt->op_bytes;
5058
		ctxt->dst.val = (ctxt->src.bytes == 1) ? (s8) ctxt->src.val :
5059
							(s16) ctxt->src.val;
A
Avi Kivity 已提交
5060
		break;
5061 5062
	default:
		goto cannot_emulate;
A
Avi Kivity 已提交
5063
	}
5064

5065 5066
threebyte_insn:

5067 5068 5069
	if (rc != X86EMUL_CONTINUE)
		goto done;

A
Avi Kivity 已提交
5070 5071 5072
	goto writeback;

cannot_emulate:
5073
	return EMULATION_FAILED;
A
Avi Kivity 已提交
5074
}
5075 5076 5077 5078 5079 5080 5081 5082 5083 5084

void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt)
{
	invalidate_registers(ctxt);
}

void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt)
{
	writeback_registers(ctxt);
}