emulate.c 149.4 KB
Newer Older
A
Avi Kivity 已提交
1
/******************************************************************************
2
 * emulate.c
A
Avi Kivity 已提交
3 4 5 6 7 8
 *
 * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
 *
 * Copyright (c) 2005 Keir Fraser
 *
 * Linux coding style, mod r/m decoder, segment base fixes, real-mode
9
 * privileged instructions:
A
Avi Kivity 已提交
10 11
 *
 * Copyright (C) 2006 Qumranet
N
Nicolas Kaiser 已提交
12
 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
A
Avi Kivity 已提交
13 14 15 16 17 18 19 20 21 22
 *
 *   Avi Kivity <avi@qumranet.com>
 *   Yaniv Kamay <yaniv@qumranet.com>
 *
 * This work is licensed under the terms of the GNU GPL, version 2.  See
 * the COPYING file in the top-level directory.
 *
 * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
 */

23
#include <linux/kvm_host.h>
24
#include "kvm_cache_regs.h"
25
#include <asm/kvm_emulate.h>
26
#include <linux/stringify.h>
27
#include <asm/debugreg.h>
28
#include <asm/nospec-branch.h>
A
Avi Kivity 已提交
29

30
#include "x86.h"
31
#include "tss.h"
32
#include "mmu.h"
33

34 35 36
/*
 * Operand types
 */
37 38 39 40 41 42 43 44 45
#define OpNone             0ull
#define OpImplicit         1ull  /* No generic decode */
#define OpReg              2ull  /* Register */
#define OpMem              3ull  /* Memory */
#define OpAcc              4ull  /* Accumulator: AL/AX/EAX/RAX */
#define OpDI               5ull  /* ES:DI/EDI/RDI */
#define OpMem64            6ull  /* Memory, 64-bit */
#define OpImmUByte         7ull  /* Zero-extended 8-bit immediate */
#define OpDX               8ull  /* DX register */
46 47 48
#define OpCL               9ull  /* CL register (for shifts) */
#define OpImmByte         10ull  /* 8-bit sign extended immediate */
#define OpOne             11ull  /* Implied 1 */
49
#define OpImm             12ull  /* Sign extended up to 32-bit immediate */
50 51 52 53 54 55 56
#define OpMem16           13ull  /* Memory operand (16-bit). */
#define OpMem32           14ull  /* Memory operand (32-bit). */
#define OpImmU            15ull  /* Immediate operand, zero extended */
#define OpSI              16ull  /* SI/ESI/RSI */
#define OpImmFAddr        17ull  /* Immediate far address */
#define OpMemFAddr        18ull  /* Far address in memory */
#define OpImmU16          19ull  /* Immediate operand, 16 bits, zero extended */
57 58 59 60 61 62
#define OpES              20ull  /* ES */
#define OpCS              21ull  /* CS */
#define OpSS              22ull  /* SS */
#define OpDS              23ull  /* DS */
#define OpFS              24ull  /* FS */
#define OpGS              25ull  /* GS */
63
#define OpMem8            26ull  /* 8-bit zero extended memory operand */
64
#define OpImm64           27ull  /* Sign extended 16/32/64-bit immediate */
P
Paolo Bonzini 已提交
65
#define OpXLat            28ull  /* memory at BX/EBX/RBX + zero-extended AL */
66 67
#define OpAccLo           29ull  /* Low part of extended acc (AX/AX/EAX/RAX) */
#define OpAccHi           30ull  /* High part of extended acc (-/DX/EDX/RDX) */
68 69

#define OpBits             5  /* Width of operand field */
70
#define OpMask             ((1ull << OpBits) - 1)
71

A
Avi Kivity 已提交
72 73 74 75 76 77 78 79 80 81
/*
 * Opcode effective-address decode tables.
 * Note that we only emulate instructions that have at least one memory
 * operand (excluding implicit stack references). We assume that stack
 * references and instruction fetches will never occur in special memory
 * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
 * not be handled.
 */

/* Operand sizes: 8-bit operands or specified/overridden size. */
82
#define ByteOp      (1<<0)	/* 8-bit operands. */
A
Avi Kivity 已提交
83
/* Destination operand type. */
84 85 86 87 88 89 90
#define DstShift    1
#define ImplicitOps (OpImplicit << DstShift)
#define DstReg      (OpReg << DstShift)
#define DstMem      (OpMem << DstShift)
#define DstAcc      (OpAcc << DstShift)
#define DstDI       (OpDI << DstShift)
#define DstMem64    (OpMem64 << DstShift)
91
#define DstMem16    (OpMem16 << DstShift)
92 93
#define DstImmUByte (OpImmUByte << DstShift)
#define DstDX       (OpDX << DstShift)
94
#define DstAccLo    (OpAccLo << DstShift)
95
#define DstMask     (OpMask << DstShift)
A
Avi Kivity 已提交
96
/* Source operand type. */
97 98 99 100 101 102 103 104 105 106 107 108
#define SrcShift    6
#define SrcNone     (OpNone << SrcShift)
#define SrcReg      (OpReg << SrcShift)
#define SrcMem      (OpMem << SrcShift)
#define SrcMem16    (OpMem16 << SrcShift)
#define SrcMem32    (OpMem32 << SrcShift)
#define SrcImm      (OpImm << SrcShift)
#define SrcImmByte  (OpImmByte << SrcShift)
#define SrcOne      (OpOne << SrcShift)
#define SrcImmUByte (OpImmUByte << SrcShift)
#define SrcImmU     (OpImmU << SrcShift)
#define SrcSI       (OpSI << SrcShift)
P
Paolo Bonzini 已提交
109
#define SrcXLat     (OpXLat << SrcShift)
110 111 112 113
#define SrcImmFAddr (OpImmFAddr << SrcShift)
#define SrcMemFAddr (OpMemFAddr << SrcShift)
#define SrcAcc      (OpAcc << SrcShift)
#define SrcImmU16   (OpImmU16 << SrcShift)
114
#define SrcImm64    (OpImm64 << SrcShift)
115
#define SrcDX       (OpDX << SrcShift)
116
#define SrcMem8     (OpMem8 << SrcShift)
117
#define SrcAccHi    (OpAccHi << SrcShift)
118
#define SrcMask     (OpMask << SrcShift)
119 120 121 122 123 124 125 126 127
#define BitOp       (1<<11)
#define MemAbs      (1<<12)      /* Memory operand is absolute displacement */
#define String      (1<<13)     /* String instruction (rep capable) */
#define Stack       (1<<14)     /* Stack instruction (push/pop) */
#define GroupMask   (7<<15)     /* Opcode uses one of the group mechanisms */
#define Group       (1<<15)     /* Bits 3:5 of modrm byte extend opcode */
#define GroupDual   (2<<15)     /* Alternate decoding of mod == 3 */
#define Prefix      (3<<15)     /* Instruction varies with 66/f2/f3 prefix */
#define RMExt       (4<<15)     /* Opcode extension in ModRM r/m if mod == 3 */
128
#define Escape      (5<<15)     /* Escape to coprocessor instruction */
129
#define InstrDual   (6<<15)     /* Alternate instruction decoding of mod == 3 */
130
#define ModeDual    (7<<15)     /* Different instruction for 32/64 bit */
131
#define Sse         (1<<18)     /* SSE Vector instruction */
132 133 134 135
/* Generic ModRM decode. */
#define ModRM       (1<<19)
/* Destination is only written; never read. */
#define Mov         (1<<20)
136
/* Misc flags */
137
#define Prot        (1<<21) /* instruction generates #UD if not in prot-mode */
138
#define EmulateOnUD (1<<22) /* Emulate if unsupported by the host */
139
#define NoAccess    (1<<23) /* Don't access memory (lea/invlpg/verr etc) */
140
#define Op3264      (1<<24) /* Operand is 64b in long mode, 32b otherwise */
141
#define Undefined   (1<<25) /* No Such Instruction */
142
#define Lock        (1<<26) /* lock prefix is allowed for the instruction */
143
#define Priv        (1<<27) /* instruction generates #GP if current CPL != 0 */
144
#define No64	    (1<<28)
145
#define PageTable   (1 << 29)   /* instruction used to write page table */
146
#define NotImpl     (1 << 30)   /* instruction is not implemented */
147
/* Source 2 operand type */
148
#define Src2Shift   (31)
149
#define Src2None    (OpNone << Src2Shift)
150
#define Src2Mem     (OpMem << Src2Shift)
151 152 153 154
#define Src2CL      (OpCL << Src2Shift)
#define Src2ImmByte (OpImmByte << Src2Shift)
#define Src2One     (OpOne << Src2Shift)
#define Src2Imm     (OpImm << Src2Shift)
155 156 157 158 159 160
#define Src2ES      (OpES << Src2Shift)
#define Src2CS      (OpCS << Src2Shift)
#define Src2SS      (OpSS << Src2Shift)
#define Src2DS      (OpDS << Src2Shift)
#define Src2FS      (OpFS << Src2Shift)
#define Src2GS      (OpGS << Src2Shift)
161
#define Src2Mask    (OpMask << Src2Shift)
A
Avi Kivity 已提交
162
#define Mmx         ((u64)1 << 40)  /* MMX Vector instruction */
163
#define AlignMask   ((u64)7 << 41)
164
#define Aligned     ((u64)1 << 41)  /* Explicitly aligned (e.g. MOVDQA) */
165 166 167
#define Unaligned   ((u64)2 << 41)  /* Explicitly unaligned (e.g. MOVDQU) */
#define Avx         ((u64)3 << 41)  /* Advanced Vector Extensions */
#define Aligned16   ((u64)4 << 41)  /* Aligned to 16 byte boundary (e.g. FXSAVE) */
168
#define Fastop      ((u64)1 << 44)  /* Use opcode::u.fastop */
169
#define NoWrite     ((u64)1 << 45)  /* No writeback */
170
#define SrcWrite    ((u64)1 << 46)  /* Write back src operand */
171
#define NoMod	    ((u64)1 << 47)  /* Mod field is ignored */
172 173
#define Intercept   ((u64)1 << 48)  /* Has valid intercept field */
#define CheckPerm   ((u64)1 << 49)  /* Has valid check_perm field */
174
#define PrivUD      ((u64)1 << 51)  /* #UD instead of #GP on CPL > 0 */
175
#define NearBranch  ((u64)1 << 52)  /* Near branches */
176
#define No16	    ((u64)1 << 53)  /* No 16 bit operand */
177
#define IncSP       ((u64)1 << 54)  /* SP is incremented before ModRM calc */
178
#define TwoMemOp    ((u64)1 << 55)  /* Instruction has two memory operand */
A
Avi Kivity 已提交
179

180
#define DstXacc     (DstAccLo | SrcAccHi | SrcWrite)
A
Avi Kivity 已提交
181

182 183 184 185 186 187 188 189
#define X2(x...) x, x
#define X3(x...) X2(x), x
#define X4(x...) X2(x), X2(x)
#define X5(x...) X4(x), x
#define X6(x...) X4(x), X2(x)
#define X7(x...) X4(x), X3(x)
#define X8(x...) X4(x), X4(x)
#define X16(x...) X8(x), X8(x)
190

191 192 193 194 195 196
#define NR_FASTOP (ilog2(sizeof(ulong)) + 1)
#define FASTOP_SIZE 8

/*
 * fastop functions have a special calling convention:
 *
197 198
 * dst:    rax        (in/out)
 * src:    rdx        (in/out)
199 200
 * src2:   rcx        (in)
 * flags:  rflags     (in/out)
201
 * ex:     rsi        (in:fastop pointer, out:zero if exception)
202 203 204 205 206 207 208 209 210 211 212
 *
 * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for
 * different operand sizes can be reached by calculation, rather than a jump
 * table (which would be bigger than the code).
 *
 * fastop functions are declared as taking a never-defined fastop parameter,
 * so they can't be called from C directly.
 */

struct fastop;

213
struct opcode {
214 215
	u64 flags : 56;
	u64 intercept : 8;
216
	union {
217
		int (*execute)(struct x86_emulate_ctxt *ctxt);
218 219 220
		const struct opcode *group;
		const struct group_dual *gdual;
		const struct gprefix *gprefix;
221
		const struct escape *esc;
222
		const struct instr_dual *idual;
223
		const struct mode_dual *mdual;
224
		void (*fastop)(struct fastop *fake);
225
	} u;
226
	int (*check_perm)(struct x86_emulate_ctxt *ctxt);
227 228 229 230 231
};

struct group_dual {
	struct opcode mod012[8];
	struct opcode mod3[8];
232 233
};

234 235 236 237 238 239 240
struct gprefix {
	struct opcode pfx_no;
	struct opcode pfx_66;
	struct opcode pfx_f2;
	struct opcode pfx_f3;
};

241 242 243 244 245
struct escape {
	struct opcode op[8];
	struct opcode high[64];
};

246 247 248 249 250
struct instr_dual {
	struct opcode mod012;
	struct opcode mod3;
};

251 252 253 254 255
struct mode_dual {
	struct opcode mode32;
	struct opcode mode64;
};

256 257
#define EFLG_RESERVED_ZEROS_MASK 0xffc0802a

258 259 260 261 262 263 264
enum x86_transfer_type {
	X86_TRANSFER_NONE,
	X86_TRANSFER_CALL_JMP,
	X86_TRANSFER_RET,
	X86_TRANSFER_TASK_SWITCH,
};

265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300
static ulong reg_read(struct x86_emulate_ctxt *ctxt, unsigned nr)
{
	if (!(ctxt->regs_valid & (1 << nr))) {
		ctxt->regs_valid |= 1 << nr;
		ctxt->_regs[nr] = ctxt->ops->read_gpr(ctxt, nr);
	}
	return ctxt->_regs[nr];
}

static ulong *reg_write(struct x86_emulate_ctxt *ctxt, unsigned nr)
{
	ctxt->regs_valid |= 1 << nr;
	ctxt->regs_dirty |= 1 << nr;
	return &ctxt->_regs[nr];
}

static ulong *reg_rmw(struct x86_emulate_ctxt *ctxt, unsigned nr)
{
	reg_read(ctxt, nr);
	return reg_write(ctxt, nr);
}

static void writeback_registers(struct x86_emulate_ctxt *ctxt)
{
	unsigned reg;

	for_each_set_bit(reg, (ulong *)&ctxt->regs_dirty, 16)
		ctxt->ops->write_gpr(ctxt, reg, ctxt->_regs[reg]);
}

static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
{
	ctxt->regs_dirty = 0;
	ctxt->regs_valid = 0;
}

A
Avi Kivity 已提交
301 302 303 304
/*
 * These EFLAGS bits are restored from saved value during emulation, and
 * any changes are written back to the saved value after emulation.
 */
305 306
#define EFLAGS_MASK (X86_EFLAGS_OF|X86_EFLAGS_SF|X86_EFLAGS_ZF|X86_EFLAGS_AF|\
		     X86_EFLAGS_PF|X86_EFLAGS_CF)
A
Avi Kivity 已提交
307

308 309 310 311 312 313
#ifdef CONFIG_X86_64
#define ON64(x) x
#else
#define ON64(x)
#endif

314 315
static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));

316 317 318 319 320
#define FOP_FUNC(name) \
	".align " __stringify(FASTOP_SIZE) " \n\t" \
	".type " name ", @function \n\t" \
	name ":\n\t"

321 322 323 324 325 326
#define FOP_RET   "ret \n\t"

#define FOP_START(op) \
	extern void em_##op(struct fastop *fake); \
	asm(".pushsection .text, \"ax\" \n\t" \
	    ".global em_" #op " \n\t" \
327
	    FOP_FUNC("em_" #op)
328 329 330 331

#define FOP_END \
	    ".popsection")

332 333 334
#define FOPNOP() \
	FOP_FUNC(__stringify(__UNIQUE_ID(nop))) \
	FOP_RET
335

336
#define FOP1E(op,  dst) \
337 338
	FOP_FUNC(#op "_" #dst) \
	"10: " #op " %" #dst " \n\t" FOP_RET
339 340 341

#define FOP1EEX(op,  dst) \
	FOP1E(op, dst) _ASM_EXTABLE(10b, kvm_fastop_exception)
342 343 344 345 346 347 348 349 350

#define FASTOP1(op) \
	FOP_START(op) \
	FOP1E(op##b, al) \
	FOP1E(op##w, ax) \
	FOP1E(op##l, eax) \
	ON64(FOP1E(op##q, rax))	\
	FOP_END

351 352 353 354 355 356 357 358 359
/* 1-operand, using src2 (for MUL/DIV r/m) */
#define FASTOP1SRC2(op, name) \
	FOP_START(name) \
	FOP1E(op, cl) \
	FOP1E(op, cx) \
	FOP1E(op, ecx) \
	ON64(FOP1E(op, rcx)) \
	FOP_END

360 361 362 363 364 365 366 367 368
/* 1-operand, using src2 (for MUL/DIV r/m), with exceptions */
#define FASTOP1SRC2EX(op, name) \
	FOP_START(name) \
	FOP1EEX(op, cl) \
	FOP1EEX(op, cx) \
	FOP1EEX(op, ecx) \
	ON64(FOP1EEX(op, rcx)) \
	FOP_END

369
#define FOP2E(op,  dst, src)	   \
370 371
	FOP_FUNC(#op "_" #dst "_" #src) \
	#op " %" #src ", %" #dst " \n\t" FOP_RET
372 373 374

#define FASTOP2(op) \
	FOP_START(op) \
375 376 377 378
	FOP2E(op##b, al, dl) \
	FOP2E(op##w, ax, dx) \
	FOP2E(op##l, eax, edx) \
	ON64(FOP2E(op##q, rax, rdx)) \
379 380
	FOP_END

381 382 383 384
/* 2 operand, word only */
#define FASTOP2W(op) \
	FOP_START(op) \
	FOPNOP() \
385 386 387
	FOP2E(op##w, ax, dx) \
	FOP2E(op##l, eax, edx) \
	ON64(FOP2E(op##q, rax, rdx)) \
388 389
	FOP_END

390 391 392 393 394 395 396 397 398
/* 2 operand, src is CL */
#define FASTOP2CL(op) \
	FOP_START(op) \
	FOP2E(op##b, al, cl) \
	FOP2E(op##w, ax, cl) \
	FOP2E(op##l, eax, cl) \
	ON64(FOP2E(op##q, rax, cl)) \
	FOP_END

399 400 401 402 403 404 405 406 407
/* 2 operand, src and dest are reversed */
#define FASTOP2R(op, name) \
	FOP_START(name) \
	FOP2E(op##b, dl, al) \
	FOP2E(op##w, dx, ax) \
	FOP2E(op##l, edx, eax) \
	ON64(FOP2E(op##q, rdx, rax)) \
	FOP_END

408
#define FOP3E(op,  dst, src, src2) \
409 410
	FOP_FUNC(#op "_" #dst "_" #src "_" #src2) \
	#op " %" #src2 ", %" #src ", %" #dst " \n\t" FOP_RET
411 412 413 414 415

/* 3-operand, word-only, src2=cl */
#define FASTOP3WCL(op) \
	FOP_START(op) \
	FOPNOP() \
416 417 418
	FOP3E(op##w, ax, dx, cl) \
	FOP3E(op##l, eax, edx, cl) \
	ON64(FOP3E(op##q, rax, rdx, cl)) \
419 420
	FOP_END

421
/* Special case for SETcc - 1 instruction per cc */
422 423 424 425 426 427
#define FOP_SETCC(op) \
	".align 4 \n\t" \
	".type " #op ", @function \n\t" \
	#op ": \n\t" \
	#op " %al \n\t" \
	FOP_RET
428

429 430 431 432
asm(".pushsection .fixup, \"ax\"\n"
    ".global kvm_fastop_exception \n"
    "kvm_fastop_exception: xor %esi, %esi; ret\n"
    ".popsection");
433

434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452
FOP_START(setcc)
FOP_SETCC(seto)
FOP_SETCC(setno)
FOP_SETCC(setc)
FOP_SETCC(setnc)
FOP_SETCC(setz)
FOP_SETCC(setnz)
FOP_SETCC(setbe)
FOP_SETCC(setnbe)
FOP_SETCC(sets)
FOP_SETCC(setns)
FOP_SETCC(setp)
FOP_SETCC(setnp)
FOP_SETCC(setl)
FOP_SETCC(setnl)
FOP_SETCC(setle)
FOP_SETCC(setnle)
FOP_END;

P
Paolo Bonzini 已提交
453 454 455
FOP_START(salc) "pushf; sbb %al, %al; popf \n\t" FOP_RET
FOP_END;

R
Radim Krčmář 已提交
456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475
/*
 * XXX: inoutclob user must know where the argument is being expanded.
 *      Relying on CC_HAVE_ASM_GOTO would allow us to remove _fault.
 */
#define asm_safe(insn, inoutclob...) \
({ \
	int _fault = 0; \
 \
	asm volatile("1:" insn "\n" \
	             "2:\n" \
	             ".pushsection .fixup, \"ax\"\n" \
	             "3: movl $1, %[_fault]\n" \
	             "   jmp  2b\n" \
	             ".popsection\n" \
	             _ASM_EXTABLE(1b, 3b) \
	             : [_fault] "+qm"(_fault) inoutclob ); \
 \
	_fault ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE; \
})

476 477 478 479 480 481
static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt,
				    enum x86_intercept intercept,
				    enum x86_intercept_stage stage)
{
	struct x86_instruction_info info = {
		.intercept  = intercept,
482 483 484 485 486
		.rep_prefix = ctxt->rep_prefix,
		.modrm_mod  = ctxt->modrm_mod,
		.modrm_reg  = ctxt->modrm_reg,
		.modrm_rm   = ctxt->modrm_rm,
		.src_val    = ctxt->src.val64,
487
		.dst_val    = ctxt->dst.val64,
488 489 490
		.src_bytes  = ctxt->src.bytes,
		.dst_bytes  = ctxt->dst.bytes,
		.ad_bytes   = ctxt->ad_bytes,
491 492 493
		.next_rip   = ctxt->eip,
	};

494
	return ctxt->ops->intercept(ctxt, &info, stage);
495 496
}

A
Avi Kivity 已提交
497 498 499 500 501
static void assign_masked(ulong *dest, ulong src, ulong mask)
{
	*dest = (*dest & ~mask) | (src & mask);
}

502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520
static void assign_register(unsigned long *reg, u64 val, int bytes)
{
	/* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
	switch (bytes) {
	case 1:
		*(u8 *)reg = (u8)val;
		break;
	case 2:
		*(u16 *)reg = (u16)val;
		break;
	case 4:
		*reg = (u32)val;
		break;	/* 64b: zero-extend */
	case 8:
		*reg = val;
		break;
	}
}

521
static inline unsigned long ad_mask(struct x86_emulate_ctxt *ctxt)
522
{
523
	return (1UL << (ctxt->ad_bytes << 3)) - 1;
524 525
}

A
Avi Kivity 已提交
526 527 528 529 530 531 532 533 534 535 536
static ulong stack_mask(struct x86_emulate_ctxt *ctxt)
{
	u16 sel;
	struct desc_struct ss;

	if (ctxt->mode == X86EMUL_MODE_PROT64)
		return ~0UL;
	ctxt->ops->get_segment(ctxt, &sel, &ss, NULL, VCPU_SREG_SS);
	return ~0U >> ((ss.d ^ 1) * 16);  /* d=0: 0xffff; d=1: 0xffffffff */
}

A
Avi Kivity 已提交
537 538 539 540 541
static int stack_size(struct x86_emulate_ctxt *ctxt)
{
	return (__fls(stack_mask(ctxt)) + 1) >> 3;
}

A
Avi Kivity 已提交
542
/* Access/update address held in a register, based on addressing mode. */
543
static inline unsigned long
544
address_mask(struct x86_emulate_ctxt *ctxt, unsigned long reg)
545
{
546
	if (ctxt->ad_bytes == sizeof(unsigned long))
547 548
		return reg;
	else
549
		return reg & ad_mask(ctxt);
550 551 552
}

static inline unsigned long
553
register_address(struct x86_emulate_ctxt *ctxt, int reg)
554
{
555
	return address_mask(ctxt, reg_read(ctxt, reg));
556 557
}

558 559 560 561 562
static void masked_increment(ulong *reg, ulong mask, int inc)
{
	assign_masked(reg, *reg + inc, mask);
}

563
static inline void
564
register_address_increment(struct x86_emulate_ctxt *ctxt, int reg, int inc)
565
{
566
	ulong *preg = reg_rmw(ctxt, reg);
567

568
	assign_register(preg, *preg + inc, ctxt->ad_bytes);
569 570 571 572
}

static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc)
{
573
	masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc);
574
}
A
Avi Kivity 已提交
575

576 577 578 579 580 581 582
static u32 desc_limit_scaled(struct desc_struct *desc)
{
	u32 limit = get_desc_limit(desc);

	return desc->g ? (limit << 12) | 0xfff : limit;
}

583
static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
584 585 586 587
{
	if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
		return 0;

588
	return ctxt->ops->get_cached_segment_base(ctxt, seg);
589 590
}

591 592
static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
			     u32 error, bool valid)
593
{
594
	WARN_ON(vec > 0x1f);
595 596 597
	ctxt->exception.vector = vec;
	ctxt->exception.error_code = error;
	ctxt->exception.error_code_valid = valid;
598
	return X86EMUL_PROPAGATE_FAULT;
599 600
}

601 602 603 604 605
static int emulate_db(struct x86_emulate_ctxt *ctxt)
{
	return emulate_exception(ctxt, DB_VECTOR, 0, false);
}

606
static int emulate_gp(struct x86_emulate_ctxt *ctxt, int err)
607
{
608
	return emulate_exception(ctxt, GP_VECTOR, err, true);
609 610
}

611 612 613 614 615
static int emulate_ss(struct x86_emulate_ctxt *ctxt, int err)
{
	return emulate_exception(ctxt, SS_VECTOR, err, true);
}

616
static int emulate_ud(struct x86_emulate_ctxt *ctxt)
617
{
618
	return emulate_exception(ctxt, UD_VECTOR, 0, false);
619 620
}

621
static int emulate_ts(struct x86_emulate_ctxt *ctxt, int err)
622
{
623
	return emulate_exception(ctxt, TS_VECTOR, err, true);
624 625
}

626 627
static int emulate_de(struct x86_emulate_ctxt *ctxt)
{
628
	return emulate_exception(ctxt, DE_VECTOR, 0, false);
629 630
}

A
Avi Kivity 已提交
631 632 633 634 635
static int emulate_nm(struct x86_emulate_ctxt *ctxt)
{
	return emulate_exception(ctxt, NM_VECTOR, 0, false);
}

636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655
static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
{
	u16 selector;
	struct desc_struct desc;

	ctxt->ops->get_segment(ctxt, &selector, &desc, NULL, seg);
	return selector;
}

static void set_segment_selector(struct x86_emulate_ctxt *ctxt, u16 selector,
				 unsigned seg)
{
	u16 dummy;
	u32 base3;
	struct desc_struct desc;

	ctxt->ops->get_segment(ctxt, &dummy, &desc, &base3, seg);
	ctxt->ops->set_segment(ctxt, selector, &desc, base3, seg);
}

656 657 658 659 660 661
/*
 * x86 defines three classes of vector instructions: explicitly
 * aligned, explicitly unaligned, and the rest, which change behaviour
 * depending on whether they're AVX encoded or not.
 *
 * Also included is CMPXCHG16B which is not a vector instruction, yet it is
662 663
 * subject to the same check.  FXSAVE and FXRSTOR are checked here too as their
 * 512 bytes of data must be aligned to a 16 byte boundary.
664
 */
665
static unsigned insn_alignment(struct x86_emulate_ctxt *ctxt, unsigned size)
666
{
667
	u64 alignment = ctxt->d & AlignMask;
668 669

	if (likely(size < 16))
670
		return 1;
671

672 673 674
	switch (alignment) {
	case Unaligned:
	case Avx:
675
		return 1;
676
	case Aligned16:
677
		return 16;
678 679
	case Aligned:
	default:
680
		return size;
681
	}
682 683
}

684 685 686 687
static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
				       struct segmented_address addr,
				       unsigned *max_size, unsigned size,
				       bool write, bool fetch,
688
				       enum x86emul_mode mode, ulong *linear)
689
{
690 691
	struct desc_struct desc;
	bool usable;
692
	ulong la;
693
	u32 lim;
694
	u16 sel;
695
	u8  va_bits;
696

697
	la = seg_base(ctxt, addr.seg) + addr.ea;
698
	*max_size = 0;
699
	switch (mode) {
700
	case X86EMUL_MODE_PROT64:
701
		*linear = la;
702 703
		va_bits = ctxt_virt_addr_bits(ctxt);
		if (get_canonical(la, va_bits) != la)
704
			goto bad;
705

706
		*max_size = min_t(u64, ~0u, (1ull << va_bits) - la);
707 708
		if (size > *max_size)
			goto bad;
709 710
		break;
	default:
711
		*linear = la = (u32)la;
712 713
		usable = ctxt->ops->get_segment(ctxt, &sel, &desc, NULL,
						addr.seg);
714 715
		if (!usable)
			goto bad;
716 717 718
		/* code segment in protected mode or read-only data segment */
		if ((((ctxt->mode != X86EMUL_MODE_REAL) && (desc.type & 8))
					|| !(desc.type & 2)) && write)
719 720
			goto bad;
		/* unreadable code segment */
721
		if (!fetch && (desc.type & 8) && !(desc.type & 2))
722 723
			goto bad;
		lim = desc_limit_scaled(&desc);
724
		if (!(desc.type & 8) && (desc.type & 4)) {
G
Guo Chao 已提交
725
			/* expand-down segment */
726
			if (addr.ea <= lim)
727 728 729
				goto bad;
			lim = desc.d ? 0xffffffff : 0xffff;
		}
730 731
		if (addr.ea > lim)
			goto bad;
732 733 734 735 736 737 738
		if (lim == 0xffffffff)
			*max_size = ~0u;
		else {
			*max_size = (u64)lim + 1 - addr.ea;
			if (size > *max_size)
				goto bad;
		}
739 740
		break;
	}
741
	if (la & (insn_alignment(ctxt, size) - 1))
742
		return emulate_gp(ctxt, 0);
743
	return X86EMUL_CONTINUE;
744 745
bad:
	if (addr.seg == VCPU_SREG_SS)
746
		return emulate_ss(ctxt, 0);
747
	else
748
		return emulate_gp(ctxt, 0);
749 750
}

751 752 753 754 755
static int linearize(struct x86_emulate_ctxt *ctxt,
		     struct segmented_address addr,
		     unsigned size, bool write,
		     ulong *linear)
{
756
	unsigned max_size;
757 758
	return __linearize(ctxt, addr, &max_size, size, write, false,
			   ctxt->mode, linear);
759 760
}

761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780
static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst,
			     enum x86emul_mode mode)
{
	ulong linear;
	int rc;
	unsigned max_size;
	struct segmented_address addr = { .seg = VCPU_SREG_CS,
					   .ea = dst };

	if (ctxt->op_bytes != sizeof(unsigned long))
		addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1);
	rc = __linearize(ctxt, addr, &max_size, 1, false, true, mode, &linear);
	if (rc == X86EMUL_CONTINUE)
		ctxt->_eip = addr.ea;
	return rc;
}

static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
{
	return assign_eip(ctxt, dst, ctxt->mode);
781 782
}

783 784 785 786
static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
			  const struct desc_struct *cs_desc)
{
	enum x86emul_mode mode = ctxt->mode;
787
	int rc;
788 789

#ifdef CONFIG_X86_64
790 791 792
	if (ctxt->mode >= X86EMUL_MODE_PROT16) {
		if (cs_desc->l) {
			u64 efer = 0;
793

794 795 796 797 798
			ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
			if (efer & EFER_LMA)
				mode = X86EMUL_MODE_PROT64;
		} else
			mode = X86EMUL_MODE_PROT32; /* temporary value */
799 800 801 802
	}
#endif
	if (mode == X86EMUL_MODE_PROT16 || mode == X86EMUL_MODE_PROT32)
		mode = cs_desc->d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
803 804 805 806
	rc = assign_eip(ctxt, dst, mode);
	if (rc == X86EMUL_CONTINUE)
		ctxt->mode = mode;
	return rc;
807 808 809 810 811 812
}

static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
{
	return assign_eip_near(ctxt, ctxt->_eip + rel);
}
813

814 815 816 817 818
static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
			      struct segmented_address addr,
			      void *data,
			      unsigned size)
{
819 820 821
	int rc;
	ulong linear;

822
	rc = linearize(ctxt, addr, size, false, &linear);
823 824
	if (rc != X86EMUL_CONTINUE)
		return rc;
825
	return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception);
826 827
}

828 829 830 831 832 833 834 835 836 837 838 839 840 841
static int segmented_write_std(struct x86_emulate_ctxt *ctxt,
			       struct segmented_address addr,
			       void *data,
			       unsigned int size)
{
	int rc;
	ulong linear;

	rc = linearize(ctxt, addr, size, true, &linear);
	if (rc != X86EMUL_CONTINUE)
		return rc;
	return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception);
}

842
/*
843
 * Prefetch the remaining bytes of the instruction without crossing page
844 845
 * boundary if they are not in fetch_cache yet.
 */
846
static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size)
847 848
{
	int rc;
849
	unsigned size, max_size;
850
	unsigned long linear;
851
	int cur_size = ctxt->fetch.end - ctxt->fetch.data;
852
	struct segmented_address addr = { .seg = VCPU_SREG_CS,
853 854
					   .ea = ctxt->eip + cur_size };

855 856 857 858 859 860 861 862 863 864
	/*
	 * We do not know exactly how many bytes will be needed, and
	 * __linearize is expensive, so fetch as much as possible.  We
	 * just have to avoid going beyond the 15 byte limit, the end
	 * of the segment, or the end of the page.
	 *
	 * __linearize is called with size 0 so that it does not do any
	 * boundary check itself.  Instead, we use max_size to check
	 * against op_size.
	 */
865 866
	rc = __linearize(ctxt, addr, &max_size, 0, false, true, ctxt->mode,
			 &linear);
867 868 869
	if (unlikely(rc != X86EMUL_CONTINUE))
		return rc;

870
	size = min_t(unsigned, 15UL ^ cur_size, max_size);
871
	size = min_t(unsigned, size, PAGE_SIZE - offset_in_page(linear));
872 873 874 875 876 877 878 879

	/*
	 * One instruction can only straddle two pages,
	 * and one has been loaded at the beginning of
	 * x86_decode_insn.  So, if not enough bytes
	 * still, we must have hit the 15-byte boundary.
	 */
	if (unlikely(size < op_size))
880 881
		return emulate_gp(ctxt, 0);

882
	rc = ctxt->ops->fetch(ctxt, linear, ctxt->fetch.end,
883 884 885
			      size, &ctxt->exception);
	if (unlikely(rc != X86EMUL_CONTINUE))
		return rc;
886
	ctxt->fetch.end += size;
887
	return X86EMUL_CONTINUE;
888 889
}

890 891
static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt,
					       unsigned size)
892
{
893 894 895 896
	unsigned done_size = ctxt->fetch.end - ctxt->fetch.ptr;

	if (unlikely(done_size < size))
		return __do_insn_fetch_bytes(ctxt, size - done_size);
897 898
	else
		return X86EMUL_CONTINUE;
899 900
}

901
/* Fetch next part of the instruction being emulated. */
902
#define insn_fetch(_type, _ctxt)					\
903 904 905
({	_type _x;							\
									\
	rc = do_insn_fetch_bytes(_ctxt, sizeof(_type));			\
906 907
	if (rc != X86EMUL_CONTINUE)					\
		goto done;						\
908
	ctxt->_eip += sizeof(_type);					\
909
	memcpy(&_x, ctxt->fetch.ptr, sizeof(_type));			\
910
	ctxt->fetch.ptr += sizeof(_type);				\
911
	_x;								\
912 913
})

914
#define insn_fetch_arr(_arr, _size, _ctxt)				\
915 916
({									\
	rc = do_insn_fetch_bytes(_ctxt, _size);				\
917 918
	if (rc != X86EMUL_CONTINUE)					\
		goto done;						\
919
	ctxt->_eip += (_size);						\
920 921
	memcpy(_arr, ctxt->fetch.ptr, _size);				\
	ctxt->fetch.ptr += (_size);					\
922 923
})

924 925 926 927 928
/*
 * Given the 'reg' portion of a ModRM byte, and a register block, return a
 * pointer into the block that addresses the relevant register.
 * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
 */
929
static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg,
930
			     int byteop)
A
Avi Kivity 已提交
931 932
{
	void *p;
933
	int highbyte_regs = (ctxt->rex_prefix == 0) && byteop;
A
Avi Kivity 已提交
934 935

	if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
936 937 938
		p = (unsigned char *)reg_rmw(ctxt, modrm_reg & 3) + 1;
	else
		p = reg_rmw(ctxt, modrm_reg);
A
Avi Kivity 已提交
939 940 941 942
	return p;
}

static int read_descriptor(struct x86_emulate_ctxt *ctxt,
943
			   struct segmented_address addr,
A
Avi Kivity 已提交
944 945 946 947 948 949 950
			   u16 *size, unsigned long *address, int op_bytes)
{
	int rc;

	if (op_bytes == 2)
		op_bytes = 3;
	*address = 0;
951
	rc = segmented_read_std(ctxt, addr, size, 2);
952
	if (rc != X86EMUL_CONTINUE)
A
Avi Kivity 已提交
953
		return rc;
954
	addr.ea += 2;
955
	rc = segmented_read_std(ctxt, addr, address, op_bytes);
A
Avi Kivity 已提交
956 957 958
	return rc;
}

959 960 961 962 963 964 965 966 967 968
FASTOP2(add);
FASTOP2(or);
FASTOP2(adc);
FASTOP2(sbb);
FASTOP2(and);
FASTOP2(sub);
FASTOP2(xor);
FASTOP2(cmp);
FASTOP2(test);

969 970
FASTOP1SRC2(mul, mul_ex);
FASTOP1SRC2(imul, imul_ex);
971 972
FASTOP1SRC2EX(div, div_ex);
FASTOP1SRC2EX(idiv, idiv_ex);
973

974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998
FASTOP3WCL(shld);
FASTOP3WCL(shrd);

FASTOP2W(imul);

FASTOP1(not);
FASTOP1(neg);
FASTOP1(inc);
FASTOP1(dec);

FASTOP2CL(rol);
FASTOP2CL(ror);
FASTOP2CL(rcl);
FASTOP2CL(rcr);
FASTOP2CL(shl);
FASTOP2CL(shr);
FASTOP2CL(sar);

FASTOP2W(bsf);
FASTOP2W(bsr);
FASTOP2W(bt);
FASTOP2W(bts);
FASTOP2W(btr);
FASTOP2W(btc);

999 1000
FASTOP2(xadd);

1001 1002
FASTOP2R(cmp, cmp_r);

1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018
static int em_bsf_c(struct x86_emulate_ctxt *ctxt)
{
	/* If src is zero, do not writeback, but update flags */
	if (ctxt->src.val == 0)
		ctxt->dst.type = OP_NONE;
	return fastop(ctxt, em_bsf);
}

static int em_bsr_c(struct x86_emulate_ctxt *ctxt)
{
	/* If src is zero, do not writeback, but update flags */
	if (ctxt->src.val == 0)
		ctxt->dst.type = OP_NONE;
	return fastop(ctxt, em_bsr);
}

1019
static __always_inline u8 test_cc(unsigned int condition, unsigned long flags)
1020
{
1021 1022
	u8 rc;
	void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf);
1023

1024
	flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
1025 1026
	asm("push %[flags]; popf; " CALL_NOSPEC
	    : "=a"(rc) : [thunk_target]"r"(fop), [flags]"r"(flags));
1027
	return rc;
1028 1029
}

1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047
static void fetch_register_operand(struct operand *op)
{
	switch (op->bytes) {
	case 1:
		op->val = *(u8 *)op->addr.reg;
		break;
	case 2:
		op->val = *(u16 *)op->addr.reg;
		break;
	case 4:
		op->val = *(u32 *)op->addr.reg;
		break;
	case 8:
		op->val = *(u64 *)op->addr.reg;
		break;
	}
}

A
Avi Kivity 已提交
1048 1049 1050
static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg)
{
	switch (reg) {
1051 1052 1053 1054 1055 1056 1057 1058
	case 0: asm("movdqa %%xmm0, %0" : "=m"(*data)); break;
	case 1: asm("movdqa %%xmm1, %0" : "=m"(*data)); break;
	case 2: asm("movdqa %%xmm2, %0" : "=m"(*data)); break;
	case 3: asm("movdqa %%xmm3, %0" : "=m"(*data)); break;
	case 4: asm("movdqa %%xmm4, %0" : "=m"(*data)); break;
	case 5: asm("movdqa %%xmm5, %0" : "=m"(*data)); break;
	case 6: asm("movdqa %%xmm6, %0" : "=m"(*data)); break;
	case 7: asm("movdqa %%xmm7, %0" : "=m"(*data)); break;
A
Avi Kivity 已提交
1059
#ifdef CONFIG_X86_64
1060 1061 1062 1063 1064 1065 1066 1067
	case 8: asm("movdqa %%xmm8, %0" : "=m"(*data)); break;
	case 9: asm("movdqa %%xmm9, %0" : "=m"(*data)); break;
	case 10: asm("movdqa %%xmm10, %0" : "=m"(*data)); break;
	case 11: asm("movdqa %%xmm11, %0" : "=m"(*data)); break;
	case 12: asm("movdqa %%xmm12, %0" : "=m"(*data)); break;
	case 13: asm("movdqa %%xmm13, %0" : "=m"(*data)); break;
	case 14: asm("movdqa %%xmm14, %0" : "=m"(*data)); break;
	case 15: asm("movdqa %%xmm15, %0" : "=m"(*data)); break;
A
Avi Kivity 已提交
1068 1069 1070 1071 1072 1073 1074 1075 1076
#endif
	default: BUG();
	}
}

static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
			  int reg)
{
	switch (reg) {
1077 1078 1079 1080 1081 1082 1083 1084
	case 0: asm("movdqa %0, %%xmm0" : : "m"(*data)); break;
	case 1: asm("movdqa %0, %%xmm1" : : "m"(*data)); break;
	case 2: asm("movdqa %0, %%xmm2" : : "m"(*data)); break;
	case 3: asm("movdqa %0, %%xmm3" : : "m"(*data)); break;
	case 4: asm("movdqa %0, %%xmm4" : : "m"(*data)); break;
	case 5: asm("movdqa %0, %%xmm5" : : "m"(*data)); break;
	case 6: asm("movdqa %0, %%xmm6" : : "m"(*data)); break;
	case 7: asm("movdqa %0, %%xmm7" : : "m"(*data)); break;
A
Avi Kivity 已提交
1085
#ifdef CONFIG_X86_64
1086 1087 1088 1089 1090 1091 1092 1093
	case 8: asm("movdqa %0, %%xmm8" : : "m"(*data)); break;
	case 9: asm("movdqa %0, %%xmm9" : : "m"(*data)); break;
	case 10: asm("movdqa %0, %%xmm10" : : "m"(*data)); break;
	case 11: asm("movdqa %0, %%xmm11" : : "m"(*data)); break;
	case 12: asm("movdqa %0, %%xmm12" : : "m"(*data)); break;
	case 13: asm("movdqa %0, %%xmm13" : : "m"(*data)); break;
	case 14: asm("movdqa %0, %%xmm14" : : "m"(*data)); break;
	case 15: asm("movdqa %0, %%xmm15" : : "m"(*data)); break;
A
Avi Kivity 已提交
1094 1095 1096 1097 1098
#endif
	default: BUG();
	}
}

A
Avi Kivity 已提交
1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128
static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
{
	switch (reg) {
	case 0: asm("movq %%mm0, %0" : "=m"(*data)); break;
	case 1: asm("movq %%mm1, %0" : "=m"(*data)); break;
	case 2: asm("movq %%mm2, %0" : "=m"(*data)); break;
	case 3: asm("movq %%mm3, %0" : "=m"(*data)); break;
	case 4: asm("movq %%mm4, %0" : "=m"(*data)); break;
	case 5: asm("movq %%mm5, %0" : "=m"(*data)); break;
	case 6: asm("movq %%mm6, %0" : "=m"(*data)); break;
	case 7: asm("movq %%mm7, %0" : "=m"(*data)); break;
	default: BUG();
	}
}

static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
{
	switch (reg) {
	case 0: asm("movq %0, %%mm0" : : "m"(*data)); break;
	case 1: asm("movq %0, %%mm1" : : "m"(*data)); break;
	case 2: asm("movq %0, %%mm2" : : "m"(*data)); break;
	case 3: asm("movq %0, %%mm3" : : "m"(*data)); break;
	case 4: asm("movq %0, %%mm4" : : "m"(*data)); break;
	case 5: asm("movq %0, %%mm5" : : "m"(*data)); break;
	case 6: asm("movq %0, %%mm6" : : "m"(*data)); break;
	case 7: asm("movq %0, %%mm7" : : "m"(*data)); break;
	default: BUG();
	}
}

1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165
static int em_fninit(struct x86_emulate_ctxt *ctxt)
{
	if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
		return emulate_nm(ctxt);

	asm volatile("fninit");
	return X86EMUL_CONTINUE;
}

static int em_fnstcw(struct x86_emulate_ctxt *ctxt)
{
	u16 fcw;

	if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
		return emulate_nm(ctxt);

	asm volatile("fnstcw %0": "+m"(fcw));

	ctxt->dst.val = fcw;

	return X86EMUL_CONTINUE;
}

static int em_fnstsw(struct x86_emulate_ctxt *ctxt)
{
	u16 fsw;

	if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
		return emulate_nm(ctxt);

	asm volatile("fnstsw %0": "+m"(fsw));

	ctxt->dst.val = fsw;

	return X86EMUL_CONTINUE;
}

A
Avi Kivity 已提交
1166
static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
1167
				    struct operand *op)
1168
{
1169
	unsigned reg = ctxt->modrm_reg;
1170

1171 1172
	if (!(ctxt->d & ModRM))
		reg = (ctxt->b & 7) | ((ctxt->rex_prefix & 1) << 3);
A
Avi Kivity 已提交
1173

1174
	if (ctxt->d & Sse) {
A
Avi Kivity 已提交
1175 1176 1177 1178 1179 1180
		op->type = OP_XMM;
		op->bytes = 16;
		op->addr.xmm = reg;
		read_sse_reg(ctxt, &op->vec_val, reg);
		return;
	}
A
Avi Kivity 已提交
1181 1182 1183 1184 1185 1186 1187
	if (ctxt->d & Mmx) {
		reg &= 7;
		op->type = OP_MM;
		op->bytes = 8;
		op->addr.mm = reg;
		return;
	}
A
Avi Kivity 已提交
1188

1189
	op->type = OP_REG;
1190 1191 1192
	op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
	op->addr.reg = decode_register(ctxt, reg, ctxt->d & ByteOp);

1193
	fetch_register_operand(op);
1194 1195 1196
	op->orig_val = op->val;
}

1197 1198 1199 1200 1201 1202
static void adjust_modrm_seg(struct x86_emulate_ctxt *ctxt, int base_reg)
{
	if (base_reg == VCPU_REGS_RSP || base_reg == VCPU_REGS_RBP)
		ctxt->modrm_seg = VCPU_SREG_SS;
}

1203
static int decode_modrm(struct x86_emulate_ctxt *ctxt,
1204
			struct operand *op)
1205 1206
{
	u8 sib;
B
Bandan Das 已提交
1207
	int index_reg, base_reg, scale;
1208
	int rc = X86EMUL_CONTINUE;
1209
	ulong modrm_ea = 0;
1210

B
Bandan Das 已提交
1211 1212 1213
	ctxt->modrm_reg = ((ctxt->rex_prefix << 1) & 8); /* REX.R */
	index_reg = (ctxt->rex_prefix << 2) & 8; /* REX.X */
	base_reg = (ctxt->rex_prefix << 3) & 8; /* REX.B */
1214

B
Bandan Das 已提交
1215
	ctxt->modrm_mod = (ctxt->modrm & 0xc0) >> 6;
1216
	ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3;
B
Bandan Das 已提交
1217
	ctxt->modrm_rm = base_reg | (ctxt->modrm & 0x07);
1218
	ctxt->modrm_seg = VCPU_SREG_DS;
1219

1220
	if (ctxt->modrm_mod == 3 || (ctxt->d & NoMod)) {
1221
		op->type = OP_REG;
1222
		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
1223
		op->addr.reg = decode_register(ctxt, ctxt->modrm_rm,
1224
				ctxt->d & ByteOp);
1225
		if (ctxt->d & Sse) {
A
Avi Kivity 已提交
1226 1227
			op->type = OP_XMM;
			op->bytes = 16;
1228 1229
			op->addr.xmm = ctxt->modrm_rm;
			read_sse_reg(ctxt, &op->vec_val, ctxt->modrm_rm);
A
Avi Kivity 已提交
1230 1231
			return rc;
		}
A
Avi Kivity 已提交
1232 1233 1234
		if (ctxt->d & Mmx) {
			op->type = OP_MM;
			op->bytes = 8;
1235
			op->addr.mm = ctxt->modrm_rm & 7;
A
Avi Kivity 已提交
1236 1237
			return rc;
		}
1238
		fetch_register_operand(op);
1239 1240 1241
		return rc;
	}

1242 1243
	op->type = OP_MEM;

1244
	if (ctxt->ad_bytes == 2) {
1245 1246 1247 1248
		unsigned bx = reg_read(ctxt, VCPU_REGS_RBX);
		unsigned bp = reg_read(ctxt, VCPU_REGS_RBP);
		unsigned si = reg_read(ctxt, VCPU_REGS_RSI);
		unsigned di = reg_read(ctxt, VCPU_REGS_RDI);
1249 1250

		/* 16-bit ModR/M decode. */
1251
		switch (ctxt->modrm_mod) {
1252
		case 0:
1253
			if (ctxt->modrm_rm == 6)
1254
				modrm_ea += insn_fetch(u16, ctxt);
1255 1256
			break;
		case 1:
1257
			modrm_ea += insn_fetch(s8, ctxt);
1258 1259
			break;
		case 2:
1260
			modrm_ea += insn_fetch(u16, ctxt);
1261 1262
			break;
		}
1263
		switch (ctxt->modrm_rm) {
1264
		case 0:
1265
			modrm_ea += bx + si;
1266 1267
			break;
		case 1:
1268
			modrm_ea += bx + di;
1269 1270
			break;
		case 2:
1271
			modrm_ea += bp + si;
1272 1273
			break;
		case 3:
1274
			modrm_ea += bp + di;
1275 1276
			break;
		case 4:
1277
			modrm_ea += si;
1278 1279
			break;
		case 5:
1280
			modrm_ea += di;
1281 1282
			break;
		case 6:
1283
			if (ctxt->modrm_mod != 0)
1284
				modrm_ea += bp;
1285 1286
			break;
		case 7:
1287
			modrm_ea += bx;
1288 1289
			break;
		}
1290 1291 1292
		if (ctxt->modrm_rm == 2 || ctxt->modrm_rm == 3 ||
		    (ctxt->modrm_rm == 6 && ctxt->modrm_mod != 0))
			ctxt->modrm_seg = VCPU_SREG_SS;
1293
		modrm_ea = (u16)modrm_ea;
1294 1295
	} else {
		/* 32/64-bit ModR/M decode. */
1296
		if ((ctxt->modrm_rm & 7) == 4) {
1297
			sib = insn_fetch(u8, ctxt);
1298 1299 1300 1301
			index_reg |= (sib >> 3) & 7;
			base_reg |= sib & 7;
			scale = sib >> 6;

1302
			if ((base_reg & 7) == 5 && ctxt->modrm_mod == 0)
1303
				modrm_ea += insn_fetch(s32, ctxt);
1304
			else {
1305
				modrm_ea += reg_read(ctxt, base_reg);
1306
				adjust_modrm_seg(ctxt, base_reg);
1307 1308 1309 1310
				/* Increment ESP on POP [ESP] */
				if ((ctxt->d & IncSP) &&
				    base_reg == VCPU_REGS_RSP)
					modrm_ea += ctxt->op_bytes;
1311
			}
1312
			if (index_reg != 4)
1313
				modrm_ea += reg_read(ctxt, index_reg) << scale;
1314
		} else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) {
1315
			modrm_ea += insn_fetch(s32, ctxt);
1316
			if (ctxt->mode == X86EMUL_MODE_PROT64)
1317
				ctxt->rip_relative = 1;
1318 1319
		} else {
			base_reg = ctxt->modrm_rm;
1320
			modrm_ea += reg_read(ctxt, base_reg);
1321 1322
			adjust_modrm_seg(ctxt, base_reg);
		}
1323
		switch (ctxt->modrm_mod) {
1324
		case 1:
1325
			modrm_ea += insn_fetch(s8, ctxt);
1326 1327
			break;
		case 2:
1328
			modrm_ea += insn_fetch(s32, ctxt);
1329 1330 1331
			break;
		}
	}
1332
	op->addr.mem.ea = modrm_ea;
1333 1334 1335
	if (ctxt->ad_bytes != 8)
		ctxt->memop.addr.mem.ea = (u32)ctxt->memop.addr.mem.ea;

1336 1337 1338 1339 1340
done:
	return rc;
}

static int decode_abs(struct x86_emulate_ctxt *ctxt,
1341
		      struct operand *op)
1342
{
1343
	int rc = X86EMUL_CONTINUE;
1344

1345
	op->type = OP_MEM;
1346
	switch (ctxt->ad_bytes) {
1347
	case 2:
1348
		op->addr.mem.ea = insn_fetch(u16, ctxt);
1349 1350
		break;
	case 4:
1351
		op->addr.mem.ea = insn_fetch(u32, ctxt);
1352 1353
		break;
	case 8:
1354
		op->addr.mem.ea = insn_fetch(u64, ctxt);
1355 1356 1357 1358 1359 1360
		break;
	}
done:
	return rc;
}

1361
static void fetch_bit_operand(struct x86_emulate_ctxt *ctxt)
1362
{
1363
	long sv = 0, mask;
1364

1365
	if (ctxt->dst.type == OP_MEM && ctxt->src.type == OP_REG) {
1366
		mask = ~((long)ctxt->dst.bytes * 8 - 1);
1367

1368 1369 1370 1371
		if (ctxt->src.bytes == 2)
			sv = (s16)ctxt->src.val & (s16)mask;
		else if (ctxt->src.bytes == 4)
			sv = (s32)ctxt->src.val & (s32)mask;
1372 1373
		else
			sv = (s64)ctxt->src.val & (s64)mask;
1374

1375 1376
		ctxt->dst.addr.mem.ea = address_mask(ctxt,
					   ctxt->dst.addr.mem.ea + (sv >> 3));
1377
	}
1378 1379

	/* only subword offset */
1380
	ctxt->src.val &= (ctxt->dst.bytes << 3) - 1;
1381 1382
}

1383 1384
static int read_emulated(struct x86_emulate_ctxt *ctxt,
			 unsigned long addr, void *dest, unsigned size)
A
Avi Kivity 已提交
1385
{
1386
	int rc;
1387
	struct read_cache *mc = &ctxt->mem_read;
A
Avi Kivity 已提交
1388

1389 1390
	if (mc->pos < mc->end)
		goto read_cached;
A
Avi Kivity 已提交
1391

1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403
	WARN_ON((mc->end + size) >= sizeof(mc->data));

	rc = ctxt->ops->read_emulated(ctxt, addr, mc->data + mc->end, size,
				      &ctxt->exception);
	if (rc != X86EMUL_CONTINUE)
		return rc;

	mc->end += size;

read_cached:
	memcpy(dest, mc->data + mc->pos, size);
	mc->pos += size;
1404 1405
	return X86EMUL_CONTINUE;
}
A
Avi Kivity 已提交
1406

1407 1408 1409 1410 1411
static int segmented_read(struct x86_emulate_ctxt *ctxt,
			  struct segmented_address addr,
			  void *data,
			  unsigned size)
{
1412 1413 1414
	int rc;
	ulong linear;

1415
	rc = linearize(ctxt, addr, size, false, &linear);
1416 1417
	if (rc != X86EMUL_CONTINUE)
		return rc;
1418
	return read_emulated(ctxt, linear, data, size);
1419 1420 1421 1422 1423 1424 1425
}

static int segmented_write(struct x86_emulate_ctxt *ctxt,
			   struct segmented_address addr,
			   const void *data,
			   unsigned size)
{
1426 1427 1428
	int rc;
	ulong linear;

1429
	rc = linearize(ctxt, addr, size, true, &linear);
1430 1431
	if (rc != X86EMUL_CONTINUE)
		return rc;
1432 1433
	return ctxt->ops->write_emulated(ctxt, linear, data, size,
					 &ctxt->exception);
1434 1435 1436 1437 1438 1439 1440
}

static int segmented_cmpxchg(struct x86_emulate_ctxt *ctxt,
			     struct segmented_address addr,
			     const void *orig_data, const void *data,
			     unsigned size)
{
1441 1442 1443
	int rc;
	ulong linear;

1444
	rc = linearize(ctxt, addr, size, true, &linear);
1445 1446
	if (rc != X86EMUL_CONTINUE)
		return rc;
1447 1448
	return ctxt->ops->cmpxchg_emulated(ctxt, linear, orig_data, data,
					   size, &ctxt->exception);
1449 1450
}

1451 1452 1453 1454
static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
			   unsigned int size, unsigned short port,
			   void *dest)
{
1455
	struct read_cache *rc = &ctxt->io_read;
1456

1457 1458
	if (rc->pos == rc->end) { /* refill pio read ahead */
		unsigned int in_page, n;
1459
		unsigned int count = ctxt->rep_prefix ?
1460
			address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) : 1;
1461
		in_page = (ctxt->eflags & X86_EFLAGS_DF) ?
1462 1463
			offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) :
			PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI));
1464
		n = min3(in_page, (unsigned int)sizeof(rc->data) / size, count);
1465 1466 1467
		if (n == 0)
			n = 1;
		rc->pos = rc->end = 0;
1468
		if (!ctxt->ops->pio_in_emulated(ctxt, size, port, rc->data, n))
1469 1470
			return 0;
		rc->end = n * size;
A
Avi Kivity 已提交
1471 1472
	}

1473
	if (ctxt->rep_prefix && (ctxt->d & String) &&
1474
	    !(ctxt->eflags & X86_EFLAGS_DF)) {
1475 1476 1477 1478 1479 1480 1481 1482
		ctxt->dst.data = rc->data + rc->pos;
		ctxt->dst.type = OP_MEM_STR;
		ctxt->dst.count = (rc->end - rc->pos) / size;
		rc->pos = rc->end;
	} else {
		memcpy(dest, rc->data + rc->pos, size);
		rc->pos += size;
	}
1483 1484
	return 1;
}
A
Avi Kivity 已提交
1485

1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501
static int read_interrupt_descriptor(struct x86_emulate_ctxt *ctxt,
				     u16 index, struct desc_struct *desc)
{
	struct desc_ptr dt;
	ulong addr;

	ctxt->ops->get_idt(ctxt, &dt);

	if (dt.size < index * 8 + 7)
		return emulate_gp(ctxt, index << 3 | 0x2);

	addr = dt.address + index * 8;
	return ctxt->ops->read_std(ctxt, addr, desc, sizeof *desc,
				   &ctxt->exception);
}

1502 1503 1504
static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
				     u16 selector, struct desc_ptr *dt)
{
1505
	const struct x86_emulate_ops *ops = ctxt->ops;
1506
	u32 base3 = 0;
1507

1508 1509
	if (selector & 1 << 2) {
		struct desc_struct desc;
1510 1511
		u16 sel;

1512
		memset (dt, 0, sizeof *dt);
1513 1514
		if (!ops->get_segment(ctxt, &sel, &desc, &base3,
				      VCPU_SREG_LDTR))
1515
			return;
1516

1517
		dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */
1518
		dt->address = get_desc_base(&desc) | ((u64)base3 << 32);
1519
	} else
1520
		ops->get_gdt(ctxt, dt);
1521
}
1522

1523 1524
static int get_descriptor_ptr(struct x86_emulate_ctxt *ctxt,
			      u16 selector, ulong *desc_addr_p)
1525 1526 1527 1528
{
	struct desc_ptr dt;
	u16 index = selector >> 3;
	ulong addr;
1529

1530
	get_descriptor_table_ptr(ctxt, selector, &dt);
1531

1532 1533
	if (dt.size < index * 8 + 7)
		return emulate_gp(ctxt, selector & 0xfffc);
1534

1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562
	addr = dt.address + index * 8;

#ifdef CONFIG_X86_64
	if (addr >> 32 != 0) {
		u64 efer = 0;

		ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
		if (!(efer & EFER_LMA))
			addr &= (u32)-1;
	}
#endif

	*desc_addr_p = addr;
	return X86EMUL_CONTINUE;
}

/* allowed just for 8 bytes segments */
static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
				   u16 selector, struct desc_struct *desc,
				   ulong *desc_addr_p)
{
	int rc;

	rc = get_descriptor_ptr(ctxt, selector, desc_addr_p);
	if (rc != X86EMUL_CONTINUE)
		return rc;

	return ctxt->ops->read_std(ctxt, *desc_addr_p, desc, sizeof(*desc),
1563
				   &ctxt->exception);
1564
}
1565

1566 1567 1568 1569
/* allowed just for 8 bytes segments */
static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
				    u16 selector, struct desc_struct *desc)
{
1570
	int rc;
1571
	ulong addr;
A
Avi Kivity 已提交
1572

1573 1574 1575
	rc = get_descriptor_ptr(ctxt, selector, &addr);
	if (rc != X86EMUL_CONTINUE)
		return rc;
A
Avi Kivity 已提交
1576

1577 1578
	return ctxt->ops->write_std(ctxt, addr, desc, sizeof *desc,
				    &ctxt->exception);
1579
}
1580

1581
static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1582
				     u16 selector, int seg, u8 cpl,
1583
				     enum x86_transfer_type transfer,
1584
				     struct desc_struct *desc)
1585
{
1586
	struct desc_struct seg_desc, old_desc;
1587
	u8 dpl, rpl;
1588 1589 1590
	unsigned err_vec = GP_VECTOR;
	u32 err_code = 0;
	bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
1591
	ulong desc_addr;
1592
	int ret;
1593
	u16 dummy;
1594
	u32 base3 = 0;
1595

1596
	memset(&seg_desc, 0, sizeof seg_desc);
1597

1598 1599 1600
	if (ctxt->mode == X86EMUL_MODE_REAL) {
		/* set real mode segment descriptor (keep limit etc. for
		 * unreal mode) */
1601
		ctxt->ops->get_segment(ctxt, &dummy, &seg_desc, NULL, seg);
1602 1603
		set_desc_base(&seg_desc, selector << 4);
		goto load;
1604 1605 1606 1607 1608 1609 1610 1611 1612
	} else if (seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) {
		/* VM86 needs a clean new segment descriptor */
		set_desc_base(&seg_desc, selector << 4);
		set_desc_limit(&seg_desc, 0xffff);
		seg_desc.type = 3;
		seg_desc.p = 1;
		seg_desc.s = 1;
		seg_desc.dpl = 3;
		goto load;
1613 1614
	}

1615 1616
	rpl = selector & 3;

1617 1618 1619 1620
	/* TR should be in GDT only */
	if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
		goto exception;

1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642
	/* NULL selector is not valid for TR, CS and (except for long mode) SS */
	if (null_selector) {
		if (seg == VCPU_SREG_CS || seg == VCPU_SREG_TR)
			goto exception;

		if (seg == VCPU_SREG_SS) {
			if (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl)
				goto exception;

			/*
			 * ctxt->ops->set_segment expects the CPL to be in
			 * SS.DPL, so fake an expand-up 32-bit data segment.
			 */
			seg_desc.type = 3;
			seg_desc.p = 1;
			seg_desc.s = 1;
			seg_desc.dpl = cpl;
			seg_desc.d = 1;
			seg_desc.g = 1;
		}

		/* Skip all following checks */
1643
		goto load;
1644
	}
1645

1646
	ret = read_segment_descriptor(ctxt, selector, &seg_desc, &desc_addr);
1647 1648 1649 1650
	if (ret != X86EMUL_CONTINUE)
		return ret;

	err_code = selector & 0xfffc;
1651 1652
	err_vec = (transfer == X86_TRANSFER_TASK_SWITCH) ? TS_VECTOR :
							   GP_VECTOR;
1653

G
Guo Chao 已提交
1654
	/* can't load system descriptor into segment selector */
1655 1656 1657
	if (seg <= VCPU_SREG_GS && !seg_desc.s) {
		if (transfer == X86_TRANSFER_CALL_JMP)
			return X86EMUL_UNHANDLEABLE;
1658
		goto exception;
1659
	}
1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675

	if (!seg_desc.p) {
		err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
		goto exception;
	}

	dpl = seg_desc.dpl;

	switch (seg) {
	case VCPU_SREG_SS:
		/*
		 * segment is not a writable data segment or segment
		 * selector's RPL != CPL or segment selector's RPL != CPL
		 */
		if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl)
			goto exception;
A
Avi Kivity 已提交
1676
		break;
1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689
	case VCPU_SREG_CS:
		if (!(seg_desc.type & 8))
			goto exception;

		if (seg_desc.type & 4) {
			/* conforming */
			if (dpl > cpl)
				goto exception;
		} else {
			/* nonconforming */
			if (rpl > cpl || dpl != cpl)
				goto exception;
		}
1690 1691 1692 1693 1694 1695 1696 1697 1698
		/* in long-mode d/b must be clear if l is set */
		if (seg_desc.d && seg_desc.l) {
			u64 efer = 0;

			ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
			if (efer & EFER_LMA)
				goto exception;
		}

1699 1700
		/* CS(RPL) <- CPL */
		selector = (selector & 0xfffc) | cpl;
A
Avi Kivity 已提交
1701
		break;
1702 1703 1704
	case VCPU_SREG_TR:
		if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9))
			goto exception;
1705 1706 1707 1708 1709 1710
		old_desc = seg_desc;
		seg_desc.type |= 2; /* busy */
		ret = ctxt->ops->cmpxchg_emulated(ctxt, desc_addr, &old_desc, &seg_desc,
						  sizeof(seg_desc), &ctxt->exception);
		if (ret != X86EMUL_CONTINUE)
			return ret;
1711 1712 1713 1714 1715 1716
		break;
	case VCPU_SREG_LDTR:
		if (seg_desc.s || seg_desc.type != 2)
			goto exception;
		break;
	default: /*  DS, ES, FS, or GS */
1717
		/*
1718 1719 1720
		 * segment is not a data or readable code segment or
		 * ((segment is a data or nonconforming code segment)
		 * and (both RPL and CPL > DPL))
1721
		 */
1722 1723 1724 1725
		if ((seg_desc.type & 0xa) == 0x8 ||
		    (((seg_desc.type & 0xc) != 0xc) &&
		     (rpl > dpl && cpl > dpl)))
			goto exception;
A
Avi Kivity 已提交
1726
		break;
1727 1728 1729 1730
	}

	if (seg_desc.s) {
		/* mark segment as accessed */
1731 1732 1733 1734 1735 1736 1737
		if (!(seg_desc.type & 1)) {
			seg_desc.type |= 1;
			ret = write_segment_descriptor(ctxt, selector,
						       &seg_desc);
			if (ret != X86EMUL_CONTINUE)
				return ret;
		}
1738 1739 1740 1741 1742
	} else if (ctxt->mode == X86EMUL_MODE_PROT64) {
		ret = ctxt->ops->read_std(ctxt, desc_addr+8, &base3,
				sizeof(base3), &ctxt->exception);
		if (ret != X86EMUL_CONTINUE)
			return ret;
1743 1744
		if (emul_is_noncanonical_address(get_desc_base(&seg_desc) |
				((u64)base3 << 32), ctxt))
1745
			return emulate_gp(ctxt, 0);
1746 1747
	}
load:
1748
	ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
1749 1750
	if (desc)
		*desc = seg_desc;
1751 1752
	return X86EMUL_CONTINUE;
exception:
1753
	return emulate_exception(ctxt, err_vec, err_code, true);
1754 1755
}

1756 1757 1758 1759
static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
				   u16 selector, int seg)
{
	u8 cpl = ctxt->ops->cpl(ctxt);
1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774

	/*
	 * None of MOV, POP and LSS can load a NULL selector in CPL=3, but
	 * they can load it at CPL<3 (Intel's manual says only LSS can,
	 * but it's wrong).
	 *
	 * However, the Intel manual says that putting IST=1/DPL=3 in
	 * an interrupt gate will result in SS=3 (the AMD manual instead
	 * says it doesn't), so allow SS=3 in __load_segment_descriptor
	 * and only forbid it here.
	 */
	if (seg == VCPU_SREG_SS && selector == 3 &&
	    ctxt->mode == X86EMUL_MODE_PROT64)
		return emulate_exception(ctxt, GP_VECTOR, 0, true);

1775 1776
	return __load_segment_descriptor(ctxt, selector, seg, cpl,
					 X86_TRANSFER_NONE, NULL);
1777 1778
}

1779 1780
static void write_register_operand(struct operand *op)
{
1781
	return assign_register(op->addr.reg, op->val, op->bytes);
1782 1783
}

1784
static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op)
1785
{
1786
	switch (op->type) {
1787
	case OP_REG:
1788
		write_register_operand(op);
A
Avi Kivity 已提交
1789
		break;
1790
	case OP_MEM:
1791
		if (ctxt->lock_prefix)
P
Paolo Bonzini 已提交
1792 1793 1794 1795 1796 1797 1798
			return segmented_cmpxchg(ctxt,
						 op->addr.mem,
						 &op->orig_val,
						 &op->val,
						 op->bytes);
		else
			return segmented_write(ctxt,
1799 1800 1801
					       op->addr.mem,
					       &op->val,
					       op->bytes);
1802
		break;
1803
	case OP_MEM_STR:
P
Paolo Bonzini 已提交
1804 1805 1806 1807
		return segmented_write(ctxt,
				       op->addr.mem,
				       op->data,
				       op->bytes * op->count);
1808
		break;
A
Avi Kivity 已提交
1809
	case OP_XMM:
1810
		write_sse_reg(ctxt, &op->vec_val, op->addr.xmm);
A
Avi Kivity 已提交
1811
		break;
A
Avi Kivity 已提交
1812
	case OP_MM:
1813
		write_mmx_reg(ctxt, &op->mm_val, op->addr.mm);
A
Avi Kivity 已提交
1814
		break;
1815 1816
	case OP_NONE:
		/* no writeback */
1817
		break;
1818
	default:
1819
		break;
A
Avi Kivity 已提交
1820
	}
1821 1822
	return X86EMUL_CONTINUE;
}
A
Avi Kivity 已提交
1823

1824
static int push(struct x86_emulate_ctxt *ctxt, void *data, int bytes)
1825
{
1826
	struct segmented_address addr;
1827

1828
	rsp_increment(ctxt, -bytes);
1829
	addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
1830 1831
	addr.seg = VCPU_SREG_SS;

1832 1833 1834 1835 1836
	return segmented_write(ctxt, addr, data, bytes);
}

static int em_push(struct x86_emulate_ctxt *ctxt)
{
1837
	/* Disable writeback. */
1838
	ctxt->dst.type = OP_NONE;
1839
	return push(ctxt, &ctxt->src.val, ctxt->op_bytes);
1840
}
1841

1842 1843 1844 1845
static int emulate_pop(struct x86_emulate_ctxt *ctxt,
		       void *dest, int len)
{
	int rc;
1846
	struct segmented_address addr;
1847

1848
	addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
1849
	addr.seg = VCPU_SREG_SS;
1850
	rc = segmented_read(ctxt, addr, dest, len);
1851 1852 1853
	if (rc != X86EMUL_CONTINUE)
		return rc;

1854
	rsp_increment(ctxt, len);
1855
	return rc;
1856 1857
}

1858 1859
static int em_pop(struct x86_emulate_ctxt *ctxt)
{
1860
	return emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes);
1861 1862
}

1863
static int emulate_popf(struct x86_emulate_ctxt *ctxt,
1864
			void *dest, int len)
1865 1866
{
	int rc;
1867
	unsigned long val, change_mask;
1868
	int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
1869
	int cpl = ctxt->ops->cpl(ctxt);
1870

1871
	rc = emulate_pop(ctxt, &val, len);
1872 1873
	if (rc != X86EMUL_CONTINUE)
		return rc;
1874

1875 1876 1877 1878
	change_mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
		      X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF |
		      X86_EFLAGS_TF | X86_EFLAGS_DF | X86_EFLAGS_NT |
		      X86_EFLAGS_AC | X86_EFLAGS_ID;
1879

1880 1881 1882 1883 1884
	switch(ctxt->mode) {
	case X86EMUL_MODE_PROT64:
	case X86EMUL_MODE_PROT32:
	case X86EMUL_MODE_PROT16:
		if (cpl == 0)
1885
			change_mask |= X86_EFLAGS_IOPL;
1886
		if (cpl <= iopl)
1887
			change_mask |= X86_EFLAGS_IF;
1888 1889
		break;
	case X86EMUL_MODE_VM86:
1890 1891
		if (iopl < 3)
			return emulate_gp(ctxt, 0);
1892
		change_mask |= X86_EFLAGS_IF;
1893 1894
		break;
	default: /* real mode */
1895
		change_mask |= (X86_EFLAGS_IOPL | X86_EFLAGS_IF);
1896
		break;
1897
	}
1898 1899 1900 1901 1902

	*(unsigned long *)dest =
		(ctxt->eflags & ~change_mask) | (val & change_mask);

	return rc;
1903 1904
}

1905 1906
static int em_popf(struct x86_emulate_ctxt *ctxt)
{
1907 1908 1909 1910
	ctxt->dst.type = OP_REG;
	ctxt->dst.addr.reg = &ctxt->eflags;
	ctxt->dst.bytes = ctxt->op_bytes;
	return emulate_popf(ctxt, &ctxt->dst.val, ctxt->op_bytes);
1911 1912
}

A
Avi Kivity 已提交
1913 1914 1915 1916 1917
static int em_enter(struct x86_emulate_ctxt *ctxt)
{
	int rc;
	unsigned frame_size = ctxt->src.val;
	unsigned nesting_level = ctxt->src2.val & 31;
1918
	ulong rbp;
A
Avi Kivity 已提交
1919 1920 1921 1922

	if (nesting_level)
		return X86EMUL_UNHANDLEABLE;

1923 1924
	rbp = reg_read(ctxt, VCPU_REGS_RBP);
	rc = push(ctxt, &rbp, stack_size(ctxt));
A
Avi Kivity 已提交
1925 1926
	if (rc != X86EMUL_CONTINUE)
		return rc;
1927
	assign_masked(reg_rmw(ctxt, VCPU_REGS_RBP), reg_read(ctxt, VCPU_REGS_RSP),
A
Avi Kivity 已提交
1928
		      stack_mask(ctxt));
1929 1930
	assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP),
		      reg_read(ctxt, VCPU_REGS_RSP) - frame_size,
A
Avi Kivity 已提交
1931 1932 1933 1934
		      stack_mask(ctxt));
	return X86EMUL_CONTINUE;
}

A
Avi Kivity 已提交
1935 1936
static int em_leave(struct x86_emulate_ctxt *ctxt)
{
1937
	assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP), reg_read(ctxt, VCPU_REGS_RBP),
A
Avi Kivity 已提交
1938
		      stack_mask(ctxt));
1939
	return emulate_pop(ctxt, reg_rmw(ctxt, VCPU_REGS_RBP), ctxt->op_bytes);
A
Avi Kivity 已提交
1940 1941
}

1942
static int em_push_sreg(struct x86_emulate_ctxt *ctxt)
1943
{
1944 1945
	int seg = ctxt->src2.val;

1946
	ctxt->src.val = get_segment_selector(ctxt, seg);
1947 1948 1949 1950
	if (ctxt->op_bytes == 4) {
		rsp_increment(ctxt, -2);
		ctxt->op_bytes = 2;
	}
1951

1952
	return em_push(ctxt);
1953 1954
}

1955
static int em_pop_sreg(struct x86_emulate_ctxt *ctxt)
1956
{
1957
	int seg = ctxt->src2.val;
1958 1959
	unsigned long selector;
	int rc;
1960

1961
	rc = emulate_pop(ctxt, &selector, 2);
1962 1963 1964
	if (rc != X86EMUL_CONTINUE)
		return rc;

1965 1966
	if (ctxt->modrm_reg == VCPU_SREG_SS)
		ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
1967 1968
	if (ctxt->op_bytes > 2)
		rsp_increment(ctxt, ctxt->op_bytes - 2);
1969

1970
	rc = load_segment_descriptor(ctxt, (u16)selector, seg);
1971
	return rc;
1972 1973
}

1974
static int em_pusha(struct x86_emulate_ctxt *ctxt)
1975
{
1976
	unsigned long old_esp = reg_read(ctxt, VCPU_REGS_RSP);
1977 1978
	int rc = X86EMUL_CONTINUE;
	int reg = VCPU_REGS_RAX;
1979

1980 1981
	while (reg <= VCPU_REGS_RDI) {
		(reg == VCPU_REGS_RSP) ?
1982
		(ctxt->src.val = old_esp) : (ctxt->src.val = reg_read(ctxt, reg));
1983

1984
		rc = em_push(ctxt);
1985 1986
		if (rc != X86EMUL_CONTINUE)
			return rc;
1987

1988
		++reg;
1989 1990
	}

1991
	return rc;
1992 1993
}

1994 1995
static int em_pushf(struct x86_emulate_ctxt *ctxt)
{
1996
	ctxt->src.val = (unsigned long)ctxt->eflags & ~X86_EFLAGS_VM;
1997 1998 1999
	return em_push(ctxt);
}

2000
static int em_popa(struct x86_emulate_ctxt *ctxt)
2001
{
2002 2003
	int rc = X86EMUL_CONTINUE;
	int reg = VCPU_REGS_RDI;
2004
	u32 val;
2005

2006 2007
	while (reg >= VCPU_REGS_RAX) {
		if (reg == VCPU_REGS_RSP) {
2008
			rsp_increment(ctxt, ctxt->op_bytes);
2009 2010
			--reg;
		}
2011

2012
		rc = emulate_pop(ctxt, &val, ctxt->op_bytes);
2013 2014
		if (rc != X86EMUL_CONTINUE)
			break;
2015
		assign_register(reg_rmw(ctxt, reg), val, ctxt->op_bytes);
2016
		--reg;
2017
	}
2018
	return rc;
2019 2020
}

2021
static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
2022
{
2023
	const struct x86_emulate_ops *ops = ctxt->ops;
2024
	int rc;
2025 2026 2027 2028 2029 2030
	struct desc_ptr dt;
	gva_t cs_addr;
	gva_t eip_addr;
	u16 cs, eip;

	/* TODO: Add limit checks */
2031
	ctxt->src.val = ctxt->eflags;
2032
	rc = em_push(ctxt);
2033 2034
	if (rc != X86EMUL_CONTINUE)
		return rc;
2035

2036
	ctxt->eflags &= ~(X86_EFLAGS_IF | X86_EFLAGS_TF | X86_EFLAGS_AC);
2037

2038
	ctxt->src.val = get_segment_selector(ctxt, VCPU_SREG_CS);
2039
	rc = em_push(ctxt);
2040 2041
	if (rc != X86EMUL_CONTINUE)
		return rc;
2042

2043
	ctxt->src.val = ctxt->_eip;
2044
	rc = em_push(ctxt);
2045 2046 2047
	if (rc != X86EMUL_CONTINUE)
		return rc;

2048
	ops->get_idt(ctxt, &dt);
2049 2050 2051 2052

	eip_addr = dt.address + (irq << 2);
	cs_addr = dt.address + (irq << 2) + 2;

2053
	rc = ops->read_std(ctxt, cs_addr, &cs, 2, &ctxt->exception);
2054 2055 2056
	if (rc != X86EMUL_CONTINUE)
		return rc;

2057
	rc = ops->read_std(ctxt, eip_addr, &eip, 2, &ctxt->exception);
2058 2059 2060
	if (rc != X86EMUL_CONTINUE)
		return rc;

2061
	rc = load_segment_descriptor(ctxt, cs, VCPU_SREG_CS);
2062 2063 2064
	if (rc != X86EMUL_CONTINUE)
		return rc;

2065
	ctxt->_eip = eip;
2066 2067 2068 2069

	return rc;
}

2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080
int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
{
	int rc;

	invalidate_registers(ctxt);
	rc = __emulate_int_real(ctxt, irq);
	if (rc == X86EMUL_CONTINUE)
		writeback_registers(ctxt);
	return rc;
}

2081
static int emulate_int(struct x86_emulate_ctxt *ctxt, int irq)
2082 2083 2084
{
	switch(ctxt->mode) {
	case X86EMUL_MODE_REAL:
2085
		return __emulate_int_real(ctxt, irq);
2086 2087 2088 2089 2090 2091 2092 2093 2094 2095
	case X86EMUL_MODE_VM86:
	case X86EMUL_MODE_PROT16:
	case X86EMUL_MODE_PROT32:
	case X86EMUL_MODE_PROT64:
	default:
		/* Protected mode interrupts unimplemented yet */
		return X86EMUL_UNHANDLEABLE;
	}
}

2096
static int emulate_iret_real(struct x86_emulate_ctxt *ctxt)
2097
{
2098 2099 2100 2101
	int rc = X86EMUL_CONTINUE;
	unsigned long temp_eip = 0;
	unsigned long temp_eflags = 0;
	unsigned long cs = 0;
2102 2103 2104 2105 2106
	unsigned long mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
			     X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_TF |
			     X86_EFLAGS_IF | X86_EFLAGS_DF | X86_EFLAGS_OF |
			     X86_EFLAGS_IOPL | X86_EFLAGS_NT | X86_EFLAGS_RF |
			     X86_EFLAGS_AC | X86_EFLAGS_ID |
W
Wanpeng Li 已提交
2107
			     X86_EFLAGS_FIXED;
2108 2109
	unsigned long vm86_mask = X86_EFLAGS_VM | X86_EFLAGS_VIF |
				  X86_EFLAGS_VIP;
2110

2111
	/* TODO: Add stack limit check */
2112

2113
	rc = emulate_pop(ctxt, &temp_eip, ctxt->op_bytes);
2114

2115 2116
	if (rc != X86EMUL_CONTINUE)
		return rc;
2117

2118 2119
	if (temp_eip & ~0xffff)
		return emulate_gp(ctxt, 0);
2120

2121
	rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2122

2123 2124
	if (rc != X86EMUL_CONTINUE)
		return rc;
2125

2126
	rc = emulate_pop(ctxt, &temp_eflags, ctxt->op_bytes);
2127

2128 2129
	if (rc != X86EMUL_CONTINUE)
		return rc;
2130

2131
	rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS);
2132

2133 2134
	if (rc != X86EMUL_CONTINUE)
		return rc;
2135

2136
	ctxt->_eip = temp_eip;
2137

2138
	if (ctxt->op_bytes == 4)
2139
		ctxt->eflags = ((temp_eflags & mask) | (ctxt->eflags & vm86_mask));
2140
	else if (ctxt->op_bytes == 2) {
2141 2142
		ctxt->eflags &= ~0xffff;
		ctxt->eflags |= temp_eflags;
2143
	}
2144 2145

	ctxt->eflags &= ~EFLG_RESERVED_ZEROS_MASK; /* Clear reserved zeros */
W
Wanpeng Li 已提交
2146
	ctxt->eflags |= X86_EFLAGS_FIXED;
2147
	ctxt->ops->set_nmi_mask(ctxt, false);
2148 2149

	return rc;
2150 2151
}

2152
static int em_iret(struct x86_emulate_ctxt *ctxt)
2153
{
2154 2155
	switch(ctxt->mode) {
	case X86EMUL_MODE_REAL:
2156
		return emulate_iret_real(ctxt);
2157 2158 2159 2160
	case X86EMUL_MODE_VM86:
	case X86EMUL_MODE_PROT16:
	case X86EMUL_MODE_PROT32:
	case X86EMUL_MODE_PROT64:
2161
	default:
2162 2163
		/* iret from protected mode unimplemented yet */
		return X86EMUL_UNHANDLEABLE;
2164 2165 2166
	}
}

2167 2168 2169
static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
{
	int rc;
2170 2171
	unsigned short sel;
	struct desc_struct new_desc;
2172 2173
	u8 cpl = ctxt->ops->cpl(ctxt);

2174
	memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2175

2176 2177
	rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
				       X86_TRANSFER_CALL_JMP,
2178
				       &new_desc);
2179 2180 2181
	if (rc != X86EMUL_CONTINUE)
		return rc;

2182
	rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
2183 2184 2185 2186
	/* Error handling is not implemented. */
	if (rc != X86EMUL_CONTINUE)
		return X86EMUL_UNHANDLEABLE;

2187
	return rc;
2188 2189
}

2190
static int em_jmp_abs(struct x86_emulate_ctxt *ctxt)
2191
{
2192 2193
	return assign_eip_near(ctxt, ctxt->src.val);
}
2194

2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205
static int em_call_near_abs(struct x86_emulate_ctxt *ctxt)
{
	int rc;
	long int old_eip;

	old_eip = ctxt->_eip;
	rc = assign_eip_near(ctxt, ctxt->src.val);
	if (rc != X86EMUL_CONTINUE)
		return rc;
	ctxt->src.val = old_eip;
	rc = em_push(ctxt);
2206
	return rc;
2207 2208
}

2209
static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
2210
{
2211
	u64 old = ctxt->dst.orig_val64;
2212

2213 2214 2215
	if (ctxt->dst.bytes == 16)
		return X86EMUL_UNHANDLEABLE;

2216 2217 2218 2219
	if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) ||
	    ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) {
		*reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0);
		*reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32);
2220
		ctxt->eflags &= ~X86_EFLAGS_ZF;
2221
	} else {
2222 2223
		ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) |
			(u32) reg_read(ctxt, VCPU_REGS_RBX);
2224

2225
		ctxt->eflags |= X86_EFLAGS_ZF;
2226
	}
2227
	return X86EMUL_CONTINUE;
2228 2229
}

2230 2231
static int em_ret(struct x86_emulate_ctxt *ctxt)
{
2232 2233 2234 2235 2236 2237 2238 2239
	int rc;
	unsigned long eip;

	rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
	if (rc != X86EMUL_CONTINUE)
		return rc;

	return assign_eip_near(ctxt, eip);
2240 2241
}

2242
static int em_ret_far(struct x86_emulate_ctxt *ctxt)
2243 2244
{
	int rc;
2245
	unsigned long eip, cs;
2246
	int cpl = ctxt->ops->cpl(ctxt);
2247
	struct desc_struct new_desc;
2248

2249
	rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2250
	if (rc != X86EMUL_CONTINUE)
2251
		return rc;
2252
	rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2253
	if (rc != X86EMUL_CONTINUE)
2254
		return rc;
2255 2256 2257
	/* Outer-privilege level return is not implemented */
	if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl)
		return X86EMUL_UNHANDLEABLE;
2258 2259
	rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, cpl,
				       X86_TRANSFER_RET,
2260 2261 2262
				       &new_desc);
	if (rc != X86EMUL_CONTINUE)
		return rc;
2263
	rc = assign_eip_far(ctxt, eip, &new_desc);
2264 2265 2266 2267
	/* Error handling is not implemented. */
	if (rc != X86EMUL_CONTINUE)
		return X86EMUL_UNHANDLEABLE;

2268 2269 2270
	return rc;
}

2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281
static int em_ret_far_imm(struct x86_emulate_ctxt *ctxt)
{
        int rc;

        rc = em_ret_far(ctxt);
        if (rc != X86EMUL_CONTINUE)
                return rc;
        rsp_increment(ctxt, ctxt->src.val);
        return X86EMUL_CONTINUE;
}

2282 2283 2284
static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
{
	/* Save real source value, then compare EAX against destination. */
2285 2286
	ctxt->dst.orig_val = ctxt->dst.val;
	ctxt->dst.val = reg_read(ctxt, VCPU_REGS_RAX);
2287
	ctxt->src.orig_val = ctxt->src.val;
2288
	ctxt->src.val = ctxt->dst.orig_val;
2289
	fastop(ctxt, em_cmp);
2290

2291
	if (ctxt->eflags & X86_EFLAGS_ZF) {
2292 2293
		/* Success: write back to memory; no update of EAX */
		ctxt->src.type = OP_NONE;
2294 2295 2296
		ctxt->dst.val = ctxt->src.orig_val;
	} else {
		/* Failure: write the value we saw to EAX. */
2297 2298 2299 2300
		ctxt->src.type = OP_REG;
		ctxt->src.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
		ctxt->src.val = ctxt->dst.orig_val;
		/* Create write-cycle to dest by writing the same value */
2301
		ctxt->dst.val = ctxt->dst.orig_val;
2302 2303 2304 2305
	}
	return X86EMUL_CONTINUE;
}

2306
static int em_lseg(struct x86_emulate_ctxt *ctxt)
2307
{
2308
	int seg = ctxt->src2.val;
2309 2310 2311
	unsigned short sel;
	int rc;

2312
	memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2313

2314
	rc = load_segment_descriptor(ctxt, sel, seg);
2315 2316 2317
	if (rc != X86EMUL_CONTINUE)
		return rc;

2318
	ctxt->dst.val = ctxt->src.val;
2319 2320 2321
	return rc;
}

2322 2323 2324 2325 2326 2327
static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
{
	u32 eax, ebx, ecx, edx;

	eax = 0x80000001;
	ecx = 0;
2328
	ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false);
2329 2330 2331 2332 2333 2334
	return edx & bit(X86_FEATURE_LM);
}

#define GET_SMSTATE(type, smbase, offset)				  \
	({								  \
	 type __val;							  \
2335 2336
	 int r = ctxt->ops->read_phys(ctxt, smbase + offset, &__val,      \
				      sizeof(__val));			  \
2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393
	 if (r != X86EMUL_CONTINUE)					  \
		 return X86EMUL_UNHANDLEABLE;				  \
	 __val;								  \
	})

static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags)
{
	desc->g    = (flags >> 23) & 1;
	desc->d    = (flags >> 22) & 1;
	desc->l    = (flags >> 21) & 1;
	desc->avl  = (flags >> 20) & 1;
	desc->p    = (flags >> 15) & 1;
	desc->dpl  = (flags >> 13) & 3;
	desc->s    = (flags >> 12) & 1;
	desc->type = (flags >>  8) & 15;
}

static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
{
	struct desc_struct desc;
	int offset;
	u16 selector;

	selector = GET_SMSTATE(u32, smbase, 0x7fa8 + n * 4);

	if (n < 3)
		offset = 0x7f84 + n * 12;
	else
		offset = 0x7f2c + (n - 3) * 12;

	set_desc_base(&desc,      GET_SMSTATE(u32, smbase, offset + 8));
	set_desc_limit(&desc,     GET_SMSTATE(u32, smbase, offset + 4));
	rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, offset));
	ctxt->ops->set_segment(ctxt, selector, &desc, 0, n);
	return X86EMUL_CONTINUE;
}

static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
{
	struct desc_struct desc;
	int offset;
	u16 selector;
	u32 base3;

	offset = 0x7e00 + n * 16;

	selector =                GET_SMSTATE(u16, smbase, offset);
	rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smbase, offset + 2) << 8);
	set_desc_limit(&desc,     GET_SMSTATE(u32, smbase, offset + 4));
	set_desc_base(&desc,      GET_SMSTATE(u32, smbase, offset + 8));
	base3 =                   GET_SMSTATE(u32, smbase, offset + 12);

	ctxt->ops->set_segment(ctxt, selector, &desc, base3, n);
	return X86EMUL_CONTINUE;
}

static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
2394
				    u64 cr0, u64 cr3, u64 cr4)
2395 2396
{
	int bad;
2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408
	u64 pcid;

	/* In order to later set CR4.PCIDE, CR3[11:0] must be zero.  */
	pcid = 0;
	if (cr4 & X86_CR4_PCIDE) {
		pcid = cr3 & 0xfff;
		cr3 &= ~0xfff;
	}

	bad = ctxt->ops->set_cr(ctxt, 3, cr3);
	if (bad)
		return X86EMUL_UNHANDLEABLE;
2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426

	/*
	 * First enable PAE, long mode needs it before CR0.PG = 1 is set.
	 * Then enable protected mode.	However, PCID cannot be enabled
	 * if EFER.LMA=0, so set it separately.
	 */
	bad = ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
	if (bad)
		return X86EMUL_UNHANDLEABLE;

	bad = ctxt->ops->set_cr(ctxt, 0, cr0);
	if (bad)
		return X86EMUL_UNHANDLEABLE;

	if (cr4 & X86_CR4_PCIDE) {
		bad = ctxt->ops->set_cr(ctxt, 4, cr4);
		if (bad)
			return X86EMUL_UNHANDLEABLE;
2427 2428 2429 2430 2431 2432
		if (pcid) {
			bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid);
			if (bad)
				return X86EMUL_UNHANDLEABLE;
		}

2433 2434 2435 2436 2437 2438 2439 2440 2441 2442
	}

	return X86EMUL_CONTINUE;
}

static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
{
	struct desc_struct desc;
	struct desc_ptr dt;
	u16 selector;
2443
	u32 val, cr0, cr3, cr4;
2444 2445 2446
	int i;

	cr0 =                      GET_SMSTATE(u32, smbase, 0x7ffc);
2447
	cr3 =                      GET_SMSTATE(u32, smbase, 0x7ff8);
2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488
	ctxt->eflags =             GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED;
	ctxt->_eip =               GET_SMSTATE(u32, smbase, 0x7ff0);

	for (i = 0; i < 8; i++)
		*reg_write(ctxt, i) = GET_SMSTATE(u32, smbase, 0x7fd0 + i * 4);

	val = GET_SMSTATE(u32, smbase, 0x7fcc);
	ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1);
	val = GET_SMSTATE(u32, smbase, 0x7fc8);
	ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);

	selector =                 GET_SMSTATE(u32, smbase, 0x7fc4);
	set_desc_base(&desc,       GET_SMSTATE(u32, smbase, 0x7f64));
	set_desc_limit(&desc,      GET_SMSTATE(u32, smbase, 0x7f60));
	rsm_set_desc_flags(&desc,  GET_SMSTATE(u32, smbase, 0x7f5c));
	ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR);

	selector =                 GET_SMSTATE(u32, smbase, 0x7fc0);
	set_desc_base(&desc,       GET_SMSTATE(u32, smbase, 0x7f80));
	set_desc_limit(&desc,      GET_SMSTATE(u32, smbase, 0x7f7c));
	rsm_set_desc_flags(&desc,  GET_SMSTATE(u32, smbase, 0x7f78));
	ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR);

	dt.address =               GET_SMSTATE(u32, smbase, 0x7f74);
	dt.size =                  GET_SMSTATE(u32, smbase, 0x7f70);
	ctxt->ops->set_gdt(ctxt, &dt);

	dt.address =               GET_SMSTATE(u32, smbase, 0x7f58);
	dt.size =                  GET_SMSTATE(u32, smbase, 0x7f54);
	ctxt->ops->set_idt(ctxt, &dt);

	for (i = 0; i < 6; i++) {
		int r = rsm_load_seg_32(ctxt, smbase, i);
		if (r != X86EMUL_CONTINUE)
			return r;
	}

	cr4 = GET_SMSTATE(u32, smbase, 0x7f14);

	ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8));

2489
	return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
2490 2491 2492 2493 2494 2495
}

static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
{
	struct desc_struct desc;
	struct desc_ptr dt;
2496
	u64 val, cr0, cr3, cr4;
2497 2498
	u32 base3;
	u16 selector;
2499
	int i, r;
2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512

	for (i = 0; i < 16; i++)
		*reg_write(ctxt, i) = GET_SMSTATE(u64, smbase, 0x7ff8 - i * 8);

	ctxt->_eip   = GET_SMSTATE(u64, smbase, 0x7f78);
	ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7f70) | X86_EFLAGS_FIXED;

	val = GET_SMSTATE(u32, smbase, 0x7f68);
	ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1);
	val = GET_SMSTATE(u32, smbase, 0x7f60);
	ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);

	cr0 =                       GET_SMSTATE(u64, smbase, 0x7f58);
2513
	cr3 =                       GET_SMSTATE(u64, smbase, 0x7f50);
2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540
	cr4 =                       GET_SMSTATE(u64, smbase, 0x7f48);
	ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00));
	val =                       GET_SMSTATE(u64, smbase, 0x7ed0);
	ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA);

	selector =                  GET_SMSTATE(u32, smbase, 0x7e90);
	rsm_set_desc_flags(&desc,   GET_SMSTATE(u32, smbase, 0x7e92) << 8);
	set_desc_limit(&desc,       GET_SMSTATE(u32, smbase, 0x7e94));
	set_desc_base(&desc,        GET_SMSTATE(u32, smbase, 0x7e98));
	base3 =                     GET_SMSTATE(u32, smbase, 0x7e9c);
	ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR);

	dt.size =                   GET_SMSTATE(u32, smbase, 0x7e84);
	dt.address =                GET_SMSTATE(u64, smbase, 0x7e88);
	ctxt->ops->set_idt(ctxt, &dt);

	selector =                  GET_SMSTATE(u32, smbase, 0x7e70);
	rsm_set_desc_flags(&desc,   GET_SMSTATE(u32, smbase, 0x7e72) << 8);
	set_desc_limit(&desc,       GET_SMSTATE(u32, smbase, 0x7e74));
	set_desc_base(&desc,        GET_SMSTATE(u32, smbase, 0x7e78));
	base3 =                     GET_SMSTATE(u32, smbase, 0x7e7c);
	ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR);

	dt.size =                   GET_SMSTATE(u32, smbase, 0x7e64);
	dt.address =                GET_SMSTATE(u64, smbase, 0x7e68);
	ctxt->ops->set_gdt(ctxt, &dt);

2541
	r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
2542 2543 2544
	if (r != X86EMUL_CONTINUE)
		return r;

2545
	for (i = 0; i < 6; i++) {
2546
		r = rsm_load_seg_64(ctxt, smbase, i);
2547 2548 2549 2550
		if (r != X86EMUL_CONTINUE)
			return r;
	}

2551
	return X86EMUL_CONTINUE;
2552 2553
}

P
Paolo Bonzini 已提交
2554 2555
static int em_rsm(struct x86_emulate_ctxt *ctxt)
{
2556 2557 2558 2559
	unsigned long cr0, cr4, efer;
	u64 smbase;
	int ret;

2560
	if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_MASK) == 0)
P
Paolo Bonzini 已提交
2561 2562
		return emulate_ud(ctxt);

2563 2564
	/*
	 * Get back to real mode, to prepare a safe state in which to load
2565 2566
	 * CR0/CR3/CR4/EFER.  It's all a bit more complicated if the vCPU
	 * supports long mode.
2567
	 */
2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585
	cr4 = ctxt->ops->get_cr(ctxt, 4);
	if (emulator_has_longmode(ctxt)) {
		struct desc_struct cs_desc;

		/* Zero CR4.PCIDE before CR0.PG.  */
		if (cr4 & X86_CR4_PCIDE) {
			ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
			cr4 &= ~X86_CR4_PCIDE;
		}

		/* A 32-bit code segment is required to clear EFER.LMA.  */
		memset(&cs_desc, 0, sizeof(cs_desc));
		cs_desc.type = 0xb;
		cs_desc.s = cs_desc.g = cs_desc.p = 1;
		ctxt->ops->set_segment(ctxt, 0, &cs_desc, 0, VCPU_SREG_CS);
	}

	/* For the 64-bit case, this will clear EFER.LMA.  */
2586 2587 2588
	cr0 = ctxt->ops->get_cr(ctxt, 0);
	if (cr0 & X86_CR0_PE)
		ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
2589 2590

	/* Now clear CR4.PAE (which must be done before clearing EFER.LME).  */
2591 2592
	if (cr4 & X86_CR4_PAE)
		ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
2593 2594

	/* And finally go back to 32-bit mode.  */
2595 2596 2597 2598
	efer = 0;
	ctxt->ops->set_msr(ctxt, MSR_EFER, efer);

	smbase = ctxt->ops->get_smbase(ctxt);
2599 2600 2601 2602 2603 2604 2605 2606 2607

	/*
	 * Give pre_leave_smm() a chance to make ISA-specific changes to the
	 * vCPU state (e.g. enter guest mode) before loading state from the SMM
	 * state-save area.
	 */
	if (ctxt->ops->pre_leave_smm(ctxt, smbase))
		return X86EMUL_UNHANDLEABLE;

2608 2609 2610 2611 2612 2613 2614 2615 2616 2617
	if (emulator_has_longmode(ctxt))
		ret = rsm_load_state_64(ctxt, smbase + 0x8000);
	else
		ret = rsm_load_state_32(ctxt, smbase + 0x8000);

	if (ret != X86EMUL_CONTINUE) {
		/* FIXME: should triple fault */
		return X86EMUL_UNHANDLEABLE;
	}

2618
	if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0)
2619 2620
		ctxt->ops->set_nmi_mask(ctxt, false);

2621 2622
	ctxt->ops->set_hflags(ctxt, ctxt->ops->get_hflags(ctxt) &
		~(X86EMUL_SMM_INSIDE_NMI_MASK | X86EMUL_SMM_MASK));
2623
	return X86EMUL_CONTINUE;
P
Paolo Bonzini 已提交
2624 2625
}

2626
static void
2627
setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
2628
			struct desc_struct *cs, struct desc_struct *ss)
2629 2630
{
	cs->l = 0;		/* will be adjusted later */
2631
	set_desc_base(cs, 0);	/* flat segment */
2632
	cs->g = 1;		/* 4kb granularity */
2633
	set_desc_limit(cs, 0xfffff);	/* 4GB limit */
2634 2635 2636
	cs->type = 0x0b;	/* Read, Execute, Accessed */
	cs->s = 1;
	cs->dpl = 0;		/* will be adjusted later */
2637 2638
	cs->p = 1;
	cs->d = 1;
2639
	cs->avl = 0;
2640

2641 2642
	set_desc_base(ss, 0);	/* flat segment */
	set_desc_limit(ss, 0xfffff);	/* 4GB limit */
2643 2644 2645
	ss->g = 1;		/* 4kb granularity */
	ss->s = 1;
	ss->type = 0x03;	/* Read/Write, Accessed */
2646
	ss->d = 1;		/* 32bit stack segment */
2647
	ss->dpl = 0;
2648
	ss->p = 1;
2649 2650
	ss->l = 0;
	ss->avl = 0;
2651 2652
}

2653 2654 2655 2656 2657
static bool vendor_intel(struct x86_emulate_ctxt *ctxt)
{
	u32 eax, ebx, ecx, edx;

	eax = ecx = 0;
2658
	ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false);
2659
	return ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx
2660 2661 2662 2663
		&& ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx
		&& edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx;
}

2664 2665
static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt)
{
2666
	const struct x86_emulate_ops *ops = ctxt->ops;
2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677
	u32 eax, ebx, ecx, edx;

	/*
	 * syscall should always be enabled in longmode - so only become
	 * vendor specific (cpuid) if other modes are active...
	 */
	if (ctxt->mode == X86EMUL_MODE_PROT64)
		return true;

	eax = 0x00000000;
	ecx = 0x00000000;
2678
	ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false);
2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702
	/*
	 * Intel ("GenuineIntel")
	 * remark: Intel CPUs only support "syscall" in 64bit
	 * longmode. Also an 64bit guest with a
	 * 32bit compat-app running will #UD !! While this
	 * behaviour can be fixed (by emulating) into AMD
	 * response - CPUs of AMD can't behave like Intel.
	 */
	if (ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx &&
	    ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx &&
	    edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx)
		return false;

	/* AMD ("AuthenticAMD") */
	if (ebx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx &&
	    ecx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx &&
	    edx == X86EMUL_CPUID_VENDOR_AuthenticAMD_edx)
		return true;

	/* AMD ("AMDisbetter!") */
	if (ebx == X86EMUL_CPUID_VENDOR_AMDisbetterI_ebx &&
	    ecx == X86EMUL_CPUID_VENDOR_AMDisbetterI_ecx &&
	    edx == X86EMUL_CPUID_VENDOR_AMDisbetterI_edx)
		return true;
2703 2704 2705 2706 2707

	/* default: (not Intel, not AMD), apply Intel's stricter rules... */
	return false;
}

2708
static int em_syscall(struct x86_emulate_ctxt *ctxt)
2709
{
2710
	const struct x86_emulate_ops *ops = ctxt->ops;
2711
	struct desc_struct cs, ss;
2712
	u64 msr_data;
2713
	u16 cs_sel, ss_sel;
2714
	u64 efer = 0;
2715 2716

	/* syscall is not available in real mode */
2717
	if (ctxt->mode == X86EMUL_MODE_REAL ||
2718 2719
	    ctxt->mode == X86EMUL_MODE_VM86)
		return emulate_ud(ctxt);
2720

2721 2722 2723
	if (!(em_syscall_is_enabled(ctxt)))
		return emulate_ud(ctxt);

2724
	ops->get_msr(ctxt, MSR_EFER, &efer);
2725
	setup_syscalls_segments(ctxt, &cs, &ss);
2726

2727 2728 2729
	if (!(efer & EFER_SCE))
		return emulate_ud(ctxt);

2730
	ops->get_msr(ctxt, MSR_STAR, &msr_data);
2731
	msr_data >>= 32;
2732 2733
	cs_sel = (u16)(msr_data & 0xfffc);
	ss_sel = (u16)(msr_data + 8);
2734

2735
	if (efer & EFER_LMA) {
2736
		cs.d = 0;
2737 2738
		cs.l = 1;
	}
2739 2740
	ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
	ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2741

2742
	*reg_write(ctxt, VCPU_REGS_RCX) = ctxt->_eip;
2743
	if (efer & EFER_LMA) {
2744
#ifdef CONFIG_X86_64
2745
		*reg_write(ctxt, VCPU_REGS_R11) = ctxt->eflags;
2746

2747
		ops->get_msr(ctxt,
2748 2749
			     ctxt->mode == X86EMUL_MODE_PROT64 ?
			     MSR_LSTAR : MSR_CSTAR, &msr_data);
2750
		ctxt->_eip = msr_data;
2751

2752
		ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data);
2753
		ctxt->eflags &= ~msr_data;
W
Wanpeng Li 已提交
2754
		ctxt->eflags |= X86_EFLAGS_FIXED;
2755 2756 2757
#endif
	} else {
		/* legacy mode */
2758
		ops->get_msr(ctxt, MSR_STAR, &msr_data);
2759
		ctxt->_eip = (u32)msr_data;
2760

2761
		ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
2762 2763
	}

2764
	ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
2765
	return X86EMUL_CONTINUE;
2766 2767
}

2768
static int em_sysenter(struct x86_emulate_ctxt *ctxt)
2769
{
2770
	const struct x86_emulate_ops *ops = ctxt->ops;
2771
	struct desc_struct cs, ss;
2772
	u64 msr_data;
2773
	u16 cs_sel, ss_sel;
2774
	u64 efer = 0;
2775

2776
	ops->get_msr(ctxt, MSR_EFER, &efer);
2777
	/* inject #GP if in real mode */
2778 2779
	if (ctxt->mode == X86EMUL_MODE_REAL)
		return emulate_gp(ctxt, 0);
2780

2781 2782 2783 2784
	/*
	 * Not recognized on AMD in compat mode (but is recognized in legacy
	 * mode).
	 */
2785
	if ((ctxt->mode != X86EMUL_MODE_PROT64) && (efer & EFER_LMA)
2786 2787 2788
	    && !vendor_intel(ctxt))
		return emulate_ud(ctxt);

2789
	/* sysenter/sysexit have not been tested in 64bit mode. */
2790
	if (ctxt->mode == X86EMUL_MODE_PROT64)
2791
		return X86EMUL_UNHANDLEABLE;
2792

2793
	setup_syscalls_segments(ctxt, &cs, &ss);
2794

2795
	ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
2796 2797
	if ((msr_data & 0xfffc) == 0x0)
		return emulate_gp(ctxt, 0);
2798

2799
	ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
2800
	cs_sel = (u16)msr_data & ~SEGMENT_RPL_MASK;
2801
	ss_sel = cs_sel + 8;
2802
	if (efer & EFER_LMA) {
2803
		cs.d = 0;
2804 2805 2806
		cs.l = 1;
	}

2807 2808
	ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
	ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2809

2810
	ops->get_msr(ctxt, MSR_IA32_SYSENTER_EIP, &msr_data);
2811
	ctxt->_eip = (efer & EFER_LMA) ? msr_data : (u32)msr_data;
2812

2813
	ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data);
2814 2815
	*reg_write(ctxt, VCPU_REGS_RSP) = (efer & EFER_LMA) ? msr_data :
							      (u32)msr_data;
2816

2817
	return X86EMUL_CONTINUE;
2818 2819
}

2820
static int em_sysexit(struct x86_emulate_ctxt *ctxt)
2821
{
2822
	const struct x86_emulate_ops *ops = ctxt->ops;
2823
	struct desc_struct cs, ss;
2824
	u64 msr_data, rcx, rdx;
2825
	int usermode;
X
Xiao Guangrong 已提交
2826
	u16 cs_sel = 0, ss_sel = 0;
2827

2828 2829
	/* inject #GP if in real mode or Virtual 8086 mode */
	if (ctxt->mode == X86EMUL_MODE_REAL ||
2830 2831
	    ctxt->mode == X86EMUL_MODE_VM86)
		return emulate_gp(ctxt, 0);
2832

2833
	setup_syscalls_segments(ctxt, &cs, &ss);
2834

2835
	if ((ctxt->rex_prefix & 0x8) != 0x0)
2836 2837 2838 2839
		usermode = X86EMUL_MODE_PROT64;
	else
		usermode = X86EMUL_MODE_PROT32;

2840 2841 2842
	rcx = reg_read(ctxt, VCPU_REGS_RCX);
	rdx = reg_read(ctxt, VCPU_REGS_RDX);

2843 2844
	cs.dpl = 3;
	ss.dpl = 3;
2845
	ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
2846 2847
	switch (usermode) {
	case X86EMUL_MODE_PROT32:
2848
		cs_sel = (u16)(msr_data + 16);
2849 2850
		if ((msr_data & 0xfffc) == 0x0)
			return emulate_gp(ctxt, 0);
2851
		ss_sel = (u16)(msr_data + 24);
2852 2853
		rcx = (u32)rcx;
		rdx = (u32)rdx;
2854 2855
		break;
	case X86EMUL_MODE_PROT64:
2856
		cs_sel = (u16)(msr_data + 32);
2857 2858
		if (msr_data == 0x0)
			return emulate_gp(ctxt, 0);
2859 2860
		ss_sel = cs_sel + 8;
		cs.d = 0;
2861
		cs.l = 1;
2862 2863
		if (emul_is_noncanonical_address(rcx, ctxt) ||
		    emul_is_noncanonical_address(rdx, ctxt))
2864
			return emulate_gp(ctxt, 0);
2865 2866
		break;
	}
2867 2868
	cs_sel |= SEGMENT_RPL_MASK;
	ss_sel |= SEGMENT_RPL_MASK;
2869

2870 2871
	ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
	ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2872

2873 2874
	ctxt->_eip = rdx;
	*reg_write(ctxt, VCPU_REGS_RSP) = rcx;
2875

2876
	return X86EMUL_CONTINUE;
2877 2878
}

2879
static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt)
2880 2881 2882 2883 2884 2885
{
	int iopl;
	if (ctxt->mode == X86EMUL_MODE_REAL)
		return false;
	if (ctxt->mode == X86EMUL_MODE_VM86)
		return true;
2886
	iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
2887
	return ctxt->ops->cpl(ctxt) > iopl;
2888 2889
}

2890 2891 2892
#define VMWARE_PORT_VMPORT	(0x5658)
#define VMWARE_PORT_VMRPC	(0x5659)

2893 2894 2895
static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
					    u16 port, u16 len)
{
2896
	const struct x86_emulate_ops *ops = ctxt->ops;
2897
	struct desc_struct tr_seg;
2898
	u32 base3;
2899
	int r;
2900
	u16 tr, io_bitmap_ptr, perm, bit_idx = port & 0x7;
2901
	unsigned mask = (1 << len) - 1;
2902
	unsigned long base;
2903

2904 2905 2906 2907 2908 2909 2910 2911
	/*
	 * VMware allows access to these ports even if denied
	 * by TSS I/O permission bitmap. Mimic behavior.
	 */
	if (enable_vmware_backdoor &&
	    ((port == VMWARE_PORT_VMPORT) || (port == VMWARE_PORT_VMRPC)))
		return true;

2912
	ops->get_segment(ctxt, &tr, &tr_seg, &base3, VCPU_SREG_TR);
2913
	if (!tr_seg.p)
2914
		return false;
2915
	if (desc_limit_scaled(&tr_seg) < 103)
2916
		return false;
2917 2918 2919 2920
	base = get_desc_base(&tr_seg);
#ifdef CONFIG_X86_64
	base |= ((u64)base3) << 32;
#endif
2921
	r = ops->read_std(ctxt, base + 102, &io_bitmap_ptr, 2, NULL);
2922 2923
	if (r != X86EMUL_CONTINUE)
		return false;
2924
	if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg))
2925
		return false;
2926
	r = ops->read_std(ctxt, base + io_bitmap_ptr + port/8, &perm, 2, NULL);
2927 2928 2929 2930 2931 2932 2933 2934 2935 2936
	if (r != X86EMUL_CONTINUE)
		return false;
	if ((perm >> bit_idx) & mask)
		return false;
	return true;
}

static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt,
				 u16 port, u16 len)
{
2937 2938 2939
	if (ctxt->perm_ok)
		return true;

2940 2941
	if (emulator_bad_iopl(ctxt))
		if (!emulator_io_port_access_allowed(ctxt, port, len))
2942
			return false;
2943 2944 2945

	ctxt->perm_ok = true;

2946 2947 2948
	return true;
}

2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972
static void string_registers_quirk(struct x86_emulate_ctxt *ctxt)
{
	/*
	 * Intel CPUs mask the counter and pointers in quite strange
	 * manner when ECX is zero due to REP-string optimizations.
	 */
#ifdef CONFIG_X86_64
	if (ctxt->ad_bytes != 4 || !vendor_intel(ctxt))
		return;

	*reg_write(ctxt, VCPU_REGS_RCX) = 0;

	switch (ctxt->b) {
	case 0xa4:	/* movsb */
	case 0xa5:	/* movsd/w */
		*reg_rmw(ctxt, VCPU_REGS_RSI) &= (u32)-1;
		/* fall through */
	case 0xaa:	/* stosb */
	case 0xab:	/* stosd/w */
		*reg_rmw(ctxt, VCPU_REGS_RDI) &= (u32)-1;
	}
#endif
}

2973 2974 2975
static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt,
				struct tss_segment_16 *tss)
{
2976
	tss->ip = ctxt->_eip;
2977
	tss->flag = ctxt->eflags;
2978 2979 2980 2981 2982 2983 2984 2985
	tss->ax = reg_read(ctxt, VCPU_REGS_RAX);
	tss->cx = reg_read(ctxt, VCPU_REGS_RCX);
	tss->dx = reg_read(ctxt, VCPU_REGS_RDX);
	tss->bx = reg_read(ctxt, VCPU_REGS_RBX);
	tss->sp = reg_read(ctxt, VCPU_REGS_RSP);
	tss->bp = reg_read(ctxt, VCPU_REGS_RBP);
	tss->si = reg_read(ctxt, VCPU_REGS_RSI);
	tss->di = reg_read(ctxt, VCPU_REGS_RDI);
2986

2987 2988 2989 2990 2991
	tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
	tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
	tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
	tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
	tss->ldt = get_segment_selector(ctxt, VCPU_SREG_LDTR);
2992 2993 2994 2995 2996 2997
}

static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
				 struct tss_segment_16 *tss)
{
	int ret;
2998
	u8 cpl;
2999

3000
	ctxt->_eip = tss->ip;
3001
	ctxt->eflags = tss->flag | 2;
3002 3003 3004 3005 3006 3007 3008 3009
	*reg_write(ctxt, VCPU_REGS_RAX) = tss->ax;
	*reg_write(ctxt, VCPU_REGS_RCX) = tss->cx;
	*reg_write(ctxt, VCPU_REGS_RDX) = tss->dx;
	*reg_write(ctxt, VCPU_REGS_RBX) = tss->bx;
	*reg_write(ctxt, VCPU_REGS_RSP) = tss->sp;
	*reg_write(ctxt, VCPU_REGS_RBP) = tss->bp;
	*reg_write(ctxt, VCPU_REGS_RSI) = tss->si;
	*reg_write(ctxt, VCPU_REGS_RDI) = tss->di;
3010 3011 3012 3013 3014

	/*
	 * SDM says that segment selectors are loaded before segment
	 * descriptors
	 */
3015 3016 3017 3018 3019
	set_segment_selector(ctxt, tss->ldt, VCPU_SREG_LDTR);
	set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
	set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
	set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
	set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
3020

3021 3022
	cpl = tss->cs & 3;

3023
	/*
G
Guo Chao 已提交
3024
	 * Now load segment descriptors. If fault happens at this stage
3025 3026
	 * it is handled in a context of new task
	 */
3027
	ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl,
3028
					X86_TRANSFER_TASK_SWITCH, NULL);
3029 3030
	if (ret != X86EMUL_CONTINUE)
		return ret;
3031
	ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
3032
					X86_TRANSFER_TASK_SWITCH, NULL);
3033 3034
	if (ret != X86EMUL_CONTINUE)
		return ret;
3035
	ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
3036
					X86_TRANSFER_TASK_SWITCH, NULL);
3037 3038
	if (ret != X86EMUL_CONTINUE)
		return ret;
3039
	ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
3040
					X86_TRANSFER_TASK_SWITCH, NULL);
3041 3042
	if (ret != X86EMUL_CONTINUE)
		return ret;
3043
	ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
3044
					X86_TRANSFER_TASK_SWITCH, NULL);
3045 3046 3047 3048 3049 3050 3051 3052 3053 3054
	if (ret != X86EMUL_CONTINUE)
		return ret;

	return X86EMUL_CONTINUE;
}

static int task_switch_16(struct x86_emulate_ctxt *ctxt,
			  u16 tss_selector, u16 old_tss_sel,
			  ulong old_tss_base, struct desc_struct *new_desc)
{
3055
	const struct x86_emulate_ops *ops = ctxt->ops;
3056 3057
	struct tss_segment_16 tss_seg;
	int ret;
3058
	u32 new_tss_base = get_desc_base(new_desc);
3059

3060
	ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
3061
			    &ctxt->exception);
3062
	if (ret != X86EMUL_CONTINUE)
3063 3064
		return ret;

3065
	save_state_to_tss16(ctxt, &tss_seg);
3066

3067
	ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
3068
			     &ctxt->exception);
3069
	if (ret != X86EMUL_CONTINUE)
3070 3071
		return ret;

3072
	ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg,
3073
			    &ctxt->exception);
3074
	if (ret != X86EMUL_CONTINUE)
3075 3076 3077 3078 3079
		return ret;

	if (old_tss_sel != 0xffff) {
		tss_seg.prev_task_link = old_tss_sel;

3080
		ret = ops->write_std(ctxt, new_tss_base,
3081 3082
				     &tss_seg.prev_task_link,
				     sizeof tss_seg.prev_task_link,
3083
				     &ctxt->exception);
3084
		if (ret != X86EMUL_CONTINUE)
3085 3086 3087
			return ret;
	}

3088
	return load_state_from_tss16(ctxt, &tss_seg);
3089 3090 3091 3092 3093
}

static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
				struct tss_segment_32 *tss)
{
3094
	/* CR3 and ldt selector are not saved intentionally */
3095
	tss->eip = ctxt->_eip;
3096
	tss->eflags = ctxt->eflags;
3097 3098 3099 3100 3101 3102 3103 3104
	tss->eax = reg_read(ctxt, VCPU_REGS_RAX);
	tss->ecx = reg_read(ctxt, VCPU_REGS_RCX);
	tss->edx = reg_read(ctxt, VCPU_REGS_RDX);
	tss->ebx = reg_read(ctxt, VCPU_REGS_RBX);
	tss->esp = reg_read(ctxt, VCPU_REGS_RSP);
	tss->ebp = reg_read(ctxt, VCPU_REGS_RBP);
	tss->esi = reg_read(ctxt, VCPU_REGS_RSI);
	tss->edi = reg_read(ctxt, VCPU_REGS_RDI);
3105

3106 3107 3108 3109 3110 3111
	tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
	tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
	tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
	tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
	tss->fs = get_segment_selector(ctxt, VCPU_SREG_FS);
	tss->gs = get_segment_selector(ctxt, VCPU_SREG_GS);
3112 3113 3114 3115 3116 3117
}

static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
				 struct tss_segment_32 *tss)
{
	int ret;
3118
	u8 cpl;
3119

3120
	if (ctxt->ops->set_cr(ctxt, 3, tss->cr3))
3121
		return emulate_gp(ctxt, 0);
3122
	ctxt->_eip = tss->eip;
3123
	ctxt->eflags = tss->eflags | 2;
3124 3125

	/* General purpose registers */
3126 3127 3128 3129 3130 3131 3132 3133
	*reg_write(ctxt, VCPU_REGS_RAX) = tss->eax;
	*reg_write(ctxt, VCPU_REGS_RCX) = tss->ecx;
	*reg_write(ctxt, VCPU_REGS_RDX) = tss->edx;
	*reg_write(ctxt, VCPU_REGS_RBX) = tss->ebx;
	*reg_write(ctxt, VCPU_REGS_RSP) = tss->esp;
	*reg_write(ctxt, VCPU_REGS_RBP) = tss->ebp;
	*reg_write(ctxt, VCPU_REGS_RSI) = tss->esi;
	*reg_write(ctxt, VCPU_REGS_RDI) = tss->edi;
3134 3135 3136

	/*
	 * SDM says that segment selectors are loaded before segment
3137 3138
	 * descriptors.  This is important because CPL checks will
	 * use CS.RPL.
3139
	 */
3140 3141 3142 3143 3144 3145 3146
	set_segment_selector(ctxt, tss->ldt_selector, VCPU_SREG_LDTR);
	set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
	set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
	set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
	set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
	set_segment_selector(ctxt, tss->fs, VCPU_SREG_FS);
	set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS);
3147

3148 3149 3150 3151 3152
	/*
	 * If we're switching between Protected Mode and VM86, we need to make
	 * sure to update the mode before loading the segment descriptors so
	 * that the selectors are interpreted correctly.
	 */
3153
	if (ctxt->eflags & X86_EFLAGS_VM) {
3154
		ctxt->mode = X86EMUL_MODE_VM86;
3155 3156
		cpl = 3;
	} else {
3157
		ctxt->mode = X86EMUL_MODE_PROT32;
3158 3159
		cpl = tss->cs & 3;
	}
3160

3161 3162 3163 3164
	/*
	 * Now load segment descriptors. If fault happenes at this stage
	 * it is handled in a context of new task
	 */
3165
	ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR,
3166
					cpl, X86_TRANSFER_TASK_SWITCH, NULL);
3167 3168
	if (ret != X86EMUL_CONTINUE)
		return ret;
3169
	ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
3170
					X86_TRANSFER_TASK_SWITCH, NULL);
3171 3172
	if (ret != X86EMUL_CONTINUE)
		return ret;
3173
	ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
3174
					X86_TRANSFER_TASK_SWITCH, NULL);
3175 3176
	if (ret != X86EMUL_CONTINUE)
		return ret;
3177
	ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
3178
					X86_TRANSFER_TASK_SWITCH, NULL);
3179 3180
	if (ret != X86EMUL_CONTINUE)
		return ret;
3181
	ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
3182
					X86_TRANSFER_TASK_SWITCH, NULL);
3183 3184
	if (ret != X86EMUL_CONTINUE)
		return ret;
3185
	ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl,
3186
					X86_TRANSFER_TASK_SWITCH, NULL);
3187 3188
	if (ret != X86EMUL_CONTINUE)
		return ret;
3189
	ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl,
3190
					X86_TRANSFER_TASK_SWITCH, NULL);
3191

3192
	return ret;
3193 3194 3195 3196 3197 3198
}

static int task_switch_32(struct x86_emulate_ctxt *ctxt,
			  u16 tss_selector, u16 old_tss_sel,
			  ulong old_tss_base, struct desc_struct *new_desc)
{
3199
	const struct x86_emulate_ops *ops = ctxt->ops;
3200 3201
	struct tss_segment_32 tss_seg;
	int ret;
3202
	u32 new_tss_base = get_desc_base(new_desc);
3203 3204
	u32 eip_offset = offsetof(struct tss_segment_32, eip);
	u32 ldt_sel_offset = offsetof(struct tss_segment_32, ldt_selector);
3205

3206
	ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
3207
			    &ctxt->exception);
3208
	if (ret != X86EMUL_CONTINUE)
3209 3210
		return ret;

3211
	save_state_to_tss32(ctxt, &tss_seg);
3212

3213 3214 3215
	/* Only GP registers and segment selectors are saved */
	ret = ops->write_std(ctxt, old_tss_base + eip_offset, &tss_seg.eip,
			     ldt_sel_offset - eip_offset, &ctxt->exception);
3216
	if (ret != X86EMUL_CONTINUE)
3217 3218
		return ret;

3219
	ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg,
3220
			    &ctxt->exception);
3221
	if (ret != X86EMUL_CONTINUE)
3222 3223 3224 3225 3226
		return ret;

	if (old_tss_sel != 0xffff) {
		tss_seg.prev_task_link = old_tss_sel;

3227
		ret = ops->write_std(ctxt, new_tss_base,
3228 3229
				     &tss_seg.prev_task_link,
				     sizeof tss_seg.prev_task_link,
3230
				     &ctxt->exception);
3231
		if (ret != X86EMUL_CONTINUE)
3232 3233 3234
			return ret;
	}

3235
	return load_state_from_tss32(ctxt, &tss_seg);
3236 3237 3238
}

static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
3239
				   u16 tss_selector, int idt_index, int reason,
3240
				   bool has_error_code, u32 error_code)
3241
{
3242
	const struct x86_emulate_ops *ops = ctxt->ops;
3243 3244
	struct desc_struct curr_tss_desc, next_tss_desc;
	int ret;
3245
	u16 old_tss_sel = get_segment_selector(ctxt, VCPU_SREG_TR);
3246
	ulong old_tss_base =
3247
		ops->get_cached_segment_base(ctxt, VCPU_SREG_TR);
3248
	u32 desc_limit;
3249
	ulong desc_addr, dr7;
3250 3251 3252

	/* FIXME: old_tss_base == ~0 ? */

3253
	ret = read_segment_descriptor(ctxt, tss_selector, &next_tss_desc, &desc_addr);
3254 3255
	if (ret != X86EMUL_CONTINUE)
		return ret;
3256
	ret = read_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc, &desc_addr);
3257 3258 3259 3260 3261
	if (ret != X86EMUL_CONTINUE)
		return ret;

	/* FIXME: check that next_tss_desc is tss */

3262 3263 3264 3265 3266
	/*
	 * Check privileges. The three cases are task switch caused by...
	 *
	 * 1. jmp/call/int to task gate: Check against DPL of the task gate
	 * 2. Exception/IRQ/iret: No check is performed
3267 3268
	 * 3. jmp/call to TSS/task-gate: No check is performed since the
	 *    hardware checks it before exiting.
3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284
	 */
	if (reason == TASK_SWITCH_GATE) {
		if (idt_index != -1) {
			/* Software interrupts */
			struct desc_struct task_gate_desc;
			int dpl;

			ret = read_interrupt_descriptor(ctxt, idt_index,
							&task_gate_desc);
			if (ret != X86EMUL_CONTINUE)
				return ret;

			dpl = task_gate_desc.dpl;
			if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl)
				return emulate_gp(ctxt, (idt_index << 3) | 0x2);
		}
3285 3286
	}

3287 3288 3289 3290
	desc_limit = desc_limit_scaled(&next_tss_desc);
	if (!next_tss_desc.p ||
	    ((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
	     desc_limit < 0x2b)) {
3291
		return emulate_ts(ctxt, tss_selector & 0xfffc);
3292 3293 3294 3295
	}

	if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
		curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */
3296
		write_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc);
3297 3298 3299 3300 3301 3302
	}

	if (reason == TASK_SWITCH_IRET)
		ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT;

	/* set back link to prev task only if NT bit is set in eflags
G
Guo Chao 已提交
3303
	   note that old_tss_sel is not used after this point */
3304 3305 3306 3307
	if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
		old_tss_sel = 0xffff;

	if (next_tss_desc.type & 8)
3308
		ret = task_switch_32(ctxt, tss_selector, old_tss_sel,
3309 3310
				     old_tss_base, &next_tss_desc);
	else
3311
		ret = task_switch_16(ctxt, tss_selector, old_tss_sel,
3312
				     old_tss_base, &next_tss_desc);
3313 3314
	if (ret != X86EMUL_CONTINUE)
		return ret;
3315 3316 3317 3318 3319 3320

	if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE)
		ctxt->eflags = ctxt->eflags | X86_EFLAGS_NT;

	if (reason != TASK_SWITCH_IRET) {
		next_tss_desc.type |= (1 << 1); /* set busy flag */
3321
		write_segment_descriptor(ctxt, tss_selector, &next_tss_desc);
3322 3323
	}

3324
	ops->set_cr(ctxt, 0,  ops->get_cr(ctxt, 0) | X86_CR0_TS);
3325
	ops->set_segment(ctxt, tss_selector, &next_tss_desc, 0, VCPU_SREG_TR);
3326

3327
	if (has_error_code) {
3328 3329 3330
		ctxt->op_bytes = ctxt->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2;
		ctxt->lock_prefix = 0;
		ctxt->src.val = (unsigned long) error_code;
3331
		ret = em_push(ctxt);
3332 3333
	}

3334 3335 3336
	ops->get_dr(ctxt, 7, &dr7);
	ops->set_dr(ctxt, 7, dr7 & ~(DR_LOCAL_ENABLE_MASK | DR_LOCAL_SLOWDOWN));

3337 3338 3339 3340
	return ret;
}

int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
3341
			 u16 tss_selector, int idt_index, int reason,
3342
			 bool has_error_code, u32 error_code)
3343 3344 3345
{
	int rc;

3346
	invalidate_registers(ctxt);
3347 3348
	ctxt->_eip = ctxt->eip;
	ctxt->dst.type = OP_NONE;
3349

3350
	rc = emulator_do_task_switch(ctxt, tss_selector, idt_index, reason,
3351
				     has_error_code, error_code);
3352

3353
	if (rc == X86EMUL_CONTINUE) {
3354
		ctxt->eip = ctxt->_eip;
3355 3356
		writeback_registers(ctxt);
	}
3357

3358
	return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
3359 3360
}

3361 3362
static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
		struct operand *op)
3363
{
3364
	int df = (ctxt->eflags & X86_EFLAGS_DF) ? -op->count : op->count;
3365

3366 3367
	register_address_increment(ctxt, reg, df * op->bytes);
	op->addr.mem.ea = register_address(ctxt, reg);
3368 3369
}

3370 3371 3372 3373 3374 3375
static int em_das(struct x86_emulate_ctxt *ctxt)
{
	u8 al, old_al;
	bool af, cf, old_cf;

	cf = ctxt->eflags & X86_EFLAGS_CF;
3376
	al = ctxt->dst.val;
3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393

	old_al = al;
	old_cf = cf;
	cf = false;
	af = ctxt->eflags & X86_EFLAGS_AF;
	if ((al & 0x0f) > 9 || af) {
		al -= 6;
		cf = old_cf | (al >= 250);
		af = true;
	} else {
		af = false;
	}
	if (old_al > 0x99 || old_cf) {
		al -= 0x60;
		cf = true;
	}

3394
	ctxt->dst.val = al;
3395
	/* Set PF, ZF, SF */
3396 3397 3398
	ctxt->src.type = OP_IMM;
	ctxt->src.val = 0;
	ctxt->src.bytes = 1;
3399
	fastop(ctxt, em_or);
3400 3401 3402 3403 3404 3405 3406 3407
	ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF);
	if (cf)
		ctxt->eflags |= X86_EFLAGS_CF;
	if (af)
		ctxt->eflags |= X86_EFLAGS_AF;
	return X86EMUL_CONTINUE;
}

P
Paolo Bonzini 已提交
3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429
static int em_aam(struct x86_emulate_ctxt *ctxt)
{
	u8 al, ah;

	if (ctxt->src.val == 0)
		return emulate_de(ctxt);

	al = ctxt->dst.val & 0xff;
	ah = al / ctxt->src.val;
	al %= ctxt->src.val;

	ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al | (ah << 8);

	/* Set PF, ZF, SF */
	ctxt->src.type = OP_IMM;
	ctxt->src.val = 0;
	ctxt->src.bytes = 1;
	fastop(ctxt, em_or);

	return X86EMUL_CONTINUE;
}

3430 3431 3432 3433 3434 3435 3436 3437 3438
static int em_aad(struct x86_emulate_ctxt *ctxt)
{
	u8 al = ctxt->dst.val & 0xff;
	u8 ah = (ctxt->dst.val >> 8) & 0xff;

	al = (al + (ah * ctxt->src.val)) & 0xff;

	ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al;

3439 3440 3441 3442 3443
	/* Set PF, ZF, SF */
	ctxt->src.type = OP_IMM;
	ctxt->src.val = 0;
	ctxt->src.bytes = 1;
	fastop(ctxt, em_or);
3444 3445 3446 3447

	return X86EMUL_CONTINUE;
}

3448 3449
static int em_call(struct x86_emulate_ctxt *ctxt)
{
3450
	int rc;
3451 3452 3453
	long rel = ctxt->src.val;

	ctxt->src.val = (unsigned long)ctxt->_eip;
3454 3455 3456
	rc = jmp_rel(ctxt, rel);
	if (rc != X86EMUL_CONTINUE)
		return rc;
3457 3458 3459
	return em_push(ctxt);
}

3460 3461 3462 3463 3464
static int em_call_far(struct x86_emulate_ctxt *ctxt)
{
	u16 sel, old_cs;
	ulong old_eip;
	int rc;
3465 3466 3467
	struct desc_struct old_desc, new_desc;
	const struct x86_emulate_ops *ops = ctxt->ops;
	int cpl = ctxt->ops->cpl(ctxt);
3468
	enum x86emul_mode prev_mode = ctxt->mode;
3469

3470
	old_eip = ctxt->_eip;
3471
	ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS);
3472

3473
	memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
3474 3475
	rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
				       X86_TRANSFER_CALL_JMP, &new_desc);
3476
	if (rc != X86EMUL_CONTINUE)
3477
		return rc;
3478

3479
	rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
3480 3481
	if (rc != X86EMUL_CONTINUE)
		goto fail;
3482

3483
	ctxt->src.val = old_cs;
3484
	rc = em_push(ctxt);
3485
	if (rc != X86EMUL_CONTINUE)
3486
		goto fail;
3487

3488
	ctxt->src.val = old_eip;
3489 3490 3491
	rc = em_push(ctxt);
	/* If we failed, we tainted the memory, but the very least we should
	   restore cs */
3492 3493
	if (rc != X86EMUL_CONTINUE) {
		pr_warn_once("faulting far call emulation tainted memory\n");
3494
		goto fail;
3495
	}
3496 3497 3498
	return rc;
fail:
	ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
3499
	ctxt->mode = prev_mode;
3500 3501
	return rc;

3502 3503
}

3504 3505 3506
static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
{
	int rc;
3507
	unsigned long eip;
3508

3509 3510 3511 3512
	rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
	if (rc != X86EMUL_CONTINUE)
		return rc;
	rc = assign_eip_near(ctxt, eip);
3513 3514
	if (rc != X86EMUL_CONTINUE)
		return rc;
3515
	rsp_increment(ctxt, ctxt->src.val);
3516 3517 3518
	return X86EMUL_CONTINUE;
}

3519 3520 3521
static int em_xchg(struct x86_emulate_ctxt *ctxt)
{
	/* Write back the register source. */
3522 3523
	ctxt->src.val = ctxt->dst.val;
	write_register_operand(&ctxt->src);
3524 3525

	/* Write back the memory destination with implicit LOCK prefix. */
3526 3527
	ctxt->dst.val = ctxt->src.orig_val;
	ctxt->lock_prefix = 1;
3528 3529 3530
	return X86EMUL_CONTINUE;
}

3531 3532
static int em_imul_3op(struct x86_emulate_ctxt *ctxt)
{
3533
	ctxt->dst.val = ctxt->src2.val;
3534
	return fastop(ctxt, em_imul);
3535 3536
}

3537 3538
static int em_cwd(struct x86_emulate_ctxt *ctxt)
{
3539 3540
	ctxt->dst.type = OP_REG;
	ctxt->dst.bytes = ctxt->src.bytes;
3541
	ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
3542
	ctxt->dst.val = ~((ctxt->src.val >> (ctxt->src.bytes * 8 - 1)) - 1);
3543 3544 3545 3546

	return X86EMUL_CONTINUE;
}

P
Paolo Bonzini 已提交
3547 3548 3549 3550 3551 3552 3553 3554 3555 3556
static int em_rdpid(struct x86_emulate_ctxt *ctxt)
{
	u64 tsc_aux = 0;

	if (ctxt->ops->get_msr(ctxt, MSR_TSC_AUX, &tsc_aux))
		return emulate_gp(ctxt, 0);
	ctxt->dst.val = tsc_aux;
	return X86EMUL_CONTINUE;
}

3557 3558 3559 3560
static int em_rdtsc(struct x86_emulate_ctxt *ctxt)
{
	u64 tsc = 0;

3561
	ctxt->ops->get_msr(ctxt, MSR_IA32_TSC, &tsc);
3562 3563
	*reg_write(ctxt, VCPU_REGS_RAX) = (u32)tsc;
	*reg_write(ctxt, VCPU_REGS_RDX) = tsc >> 32;
3564 3565 3566
	return X86EMUL_CONTINUE;
}

3567 3568 3569 3570
static int em_rdpmc(struct x86_emulate_ctxt *ctxt)
{
	u64 pmc;

3571
	if (ctxt->ops->read_pmc(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &pmc))
3572
		return emulate_gp(ctxt, 0);
3573 3574
	*reg_write(ctxt, VCPU_REGS_RAX) = (u32)pmc;
	*reg_write(ctxt, VCPU_REGS_RDX) = pmc >> 32;
3575 3576 3577
	return X86EMUL_CONTINUE;
}

3578 3579
static int em_mov(struct x86_emulate_ctxt *ctxt)
{
3580
	memcpy(ctxt->dst.valptr, ctxt->src.valptr, sizeof(ctxt->src.valptr));
3581 3582 3583
	return X86EMUL_CONTINUE;
}

B
Borislav Petkov 已提交
3584 3585 3586 3587 3588 3589 3590 3591 3592 3593
#define FFL(x) bit(X86_FEATURE_##x)

static int em_movbe(struct x86_emulate_ctxt *ctxt)
{
	u32 ebx, ecx, edx, eax = 1;
	u16 tmp;

	/*
	 * Check MOVBE is set in the guest-visible CPUID leaf.
	 */
3594
	ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false);
B
Borislav Petkov 已提交
3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618
	if (!(ecx & FFL(MOVBE)))
		return emulate_ud(ctxt);

	switch (ctxt->op_bytes) {
	case 2:
		/*
		 * From MOVBE definition: "...When the operand size is 16 bits,
		 * the upper word of the destination register remains unchanged
		 * ..."
		 *
		 * Both casting ->valptr and ->val to u16 breaks strict aliasing
		 * rules so we have to do the operation almost per hand.
		 */
		tmp = (u16)ctxt->src.val;
		ctxt->dst.val &= ~0xffffUL;
		ctxt->dst.val |= (unsigned long)swab16(tmp);
		break;
	case 4:
		ctxt->dst.val = swab32((u32)ctxt->src.val);
		break;
	case 8:
		ctxt->dst.val = swab64(ctxt->src.val);
		break;
	default:
3619
		BUG();
B
Borislav Petkov 已提交
3620 3621 3622 3623
	}
	return X86EMUL_CONTINUE;
}

3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651
static int em_cr_write(struct x86_emulate_ctxt *ctxt)
{
	if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val))
		return emulate_gp(ctxt, 0);

	/* Disable writeback. */
	ctxt->dst.type = OP_NONE;
	return X86EMUL_CONTINUE;
}

static int em_dr_write(struct x86_emulate_ctxt *ctxt)
{
	unsigned long val;

	if (ctxt->mode == X86EMUL_MODE_PROT64)
		val = ctxt->src.val & ~0ULL;
	else
		val = ctxt->src.val & ~0U;

	/* #UD condition is already handled. */
	if (ctxt->ops->set_dr(ctxt, ctxt->modrm_reg, val) < 0)
		return emulate_gp(ctxt, 0);

	/* Disable writeback. */
	ctxt->dst.type = OP_NONE;
	return X86EMUL_CONTINUE;
}

3652 3653 3654 3655
static int em_wrmsr(struct x86_emulate_ctxt *ctxt)
{
	u64 msr_data;

3656 3657 3658
	msr_data = (u32)reg_read(ctxt, VCPU_REGS_RAX)
		| ((u64)reg_read(ctxt, VCPU_REGS_RDX) << 32);
	if (ctxt->ops->set_msr(ctxt, reg_read(ctxt, VCPU_REGS_RCX), msr_data))
3659 3660 3661 3662 3663 3664 3665 3666 3667
		return emulate_gp(ctxt, 0);

	return X86EMUL_CONTINUE;
}

static int em_rdmsr(struct x86_emulate_ctxt *ctxt)
{
	u64 msr_data;

3668
	if (ctxt->ops->get_msr(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &msr_data))
3669 3670
		return emulate_gp(ctxt, 0);

3671 3672
	*reg_write(ctxt, VCPU_REGS_RAX) = (u32)msr_data;
	*reg_write(ctxt, VCPU_REGS_RDX) = msr_data >> 32;
3673 3674 3675
	return X86EMUL_CONTINUE;
}

P
Paolo Bonzini 已提交
3676
static int em_store_sreg(struct x86_emulate_ctxt *ctxt, int segment)
3677
{
P
Paolo Bonzini 已提交
3678 3679 3680 3681
	if (segment > VCPU_SREG_GS &&
	    (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
	    ctxt->ops->cpl(ctxt) > 0)
		return emulate_gp(ctxt, 0);
3682

P
Paolo Bonzini 已提交
3683
	ctxt->dst.val = get_segment_selector(ctxt, segment);
3684 3685
	if (ctxt->dst.bytes == 4 && ctxt->dst.type == OP_MEM)
		ctxt->dst.bytes = 2;
3686 3687 3688
	return X86EMUL_CONTINUE;
}

P
Paolo Bonzini 已提交
3689 3690 3691 3692 3693 3694 3695 3696
static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt)
{
	if (ctxt->modrm_reg > VCPU_SREG_GS)
		return emulate_ud(ctxt);

	return em_store_sreg(ctxt, ctxt->modrm_reg);
}

3697 3698
static int em_mov_sreg_rm(struct x86_emulate_ctxt *ctxt)
{
3699
	u16 sel = ctxt->src.val;
3700

3701
	if (ctxt->modrm_reg == VCPU_SREG_CS || ctxt->modrm_reg > VCPU_SREG_GS)
3702 3703
		return emulate_ud(ctxt);

3704
	if (ctxt->modrm_reg == VCPU_SREG_SS)
3705 3706 3707
		ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;

	/* Disable writeback. */
3708 3709
	ctxt->dst.type = OP_NONE;
	return load_segment_descriptor(ctxt, sel, ctxt->modrm_reg);
3710 3711
}

P
Paolo Bonzini 已提交
3712 3713 3714 3715 3716
static int em_sldt(struct x86_emulate_ctxt *ctxt)
{
	return em_store_sreg(ctxt, VCPU_SREG_LDTR);
}

A
Avi Kivity 已提交
3717 3718 3719 3720 3721 3722 3723 3724 3725
static int em_lldt(struct x86_emulate_ctxt *ctxt)
{
	u16 sel = ctxt->src.val;

	/* Disable writeback. */
	ctxt->dst.type = OP_NONE;
	return load_segment_descriptor(ctxt, sel, VCPU_SREG_LDTR);
}

P
Paolo Bonzini 已提交
3726 3727 3728 3729 3730
static int em_str(struct x86_emulate_ctxt *ctxt)
{
	return em_store_sreg(ctxt, VCPU_SREG_TR);
}

A
Avi Kivity 已提交
3731 3732 3733 3734 3735 3736 3737 3738 3739
static int em_ltr(struct x86_emulate_ctxt *ctxt)
{
	u16 sel = ctxt->src.val;

	/* Disable writeback. */
	ctxt->dst.type = OP_NONE;
	return load_segment_descriptor(ctxt, sel, VCPU_SREG_TR);
}

3740 3741
static int em_invlpg(struct x86_emulate_ctxt *ctxt)
{
3742 3743 3744
	int rc;
	ulong linear;

3745
	rc = linearize(ctxt, ctxt->src.addr.mem, 1, false, &linear);
3746
	if (rc == X86EMUL_CONTINUE)
3747
		ctxt->ops->invlpg(ctxt, linear);
3748
	/* Disable writeback. */
3749
	ctxt->dst.type = OP_NONE;
3750 3751 3752
	return X86EMUL_CONTINUE;
}

3753 3754 3755 3756 3757 3758 3759 3760 3761 3762
static int em_clts(struct x86_emulate_ctxt *ctxt)
{
	ulong cr0;

	cr0 = ctxt->ops->get_cr(ctxt, 0);
	cr0 &= ~X86_CR0_TS;
	ctxt->ops->set_cr(ctxt, 0, cr0);
	return X86EMUL_CONTINUE;
}

3763
static int em_hypercall(struct x86_emulate_ctxt *ctxt)
3764
{
3765
	int rc = ctxt->ops->fix_hypercall(ctxt);
3766 3767 3768 3769 3770

	if (rc != X86EMUL_CONTINUE)
		return rc;

	/* Let the processor re-execute the fixed hypercall */
3771
	ctxt->_eip = ctxt->eip;
3772
	/* Disable writeback. */
3773
	ctxt->dst.type = OP_NONE;
3774 3775 3776
	return X86EMUL_CONTINUE;
}

3777 3778 3779 3780 3781 3782
static int emulate_store_desc_ptr(struct x86_emulate_ctxt *ctxt,
				  void (*get)(struct x86_emulate_ctxt *ctxt,
					      struct desc_ptr *ptr))
{
	struct desc_ptr desc_ptr;

P
Paolo Bonzini 已提交
3783 3784 3785 3786
	if ((ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
	    ctxt->ops->cpl(ctxt) > 0)
		return emulate_gp(ctxt, 0);

3787 3788 3789 3790 3791 3792 3793 3794 3795
	if (ctxt->mode == X86EMUL_MODE_PROT64)
		ctxt->op_bytes = 8;
	get(ctxt, &desc_ptr);
	if (ctxt->op_bytes == 2) {
		ctxt->op_bytes = 4;
		desc_ptr.address &= 0x00ffffff;
	}
	/* Disable writeback. */
	ctxt->dst.type = OP_NONE;
3796 3797
	return segmented_write_std(ctxt, ctxt->dst.addr.mem,
				   &desc_ptr, 2 + ctxt->op_bytes);
3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809
}

static int em_sgdt(struct x86_emulate_ctxt *ctxt)
{
	return emulate_store_desc_ptr(ctxt, ctxt->ops->get_gdt);
}

static int em_sidt(struct x86_emulate_ctxt *ctxt)
{
	return emulate_store_desc_ptr(ctxt, ctxt->ops->get_idt);
}

3810
static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt)
3811 3812 3813 3814
{
	struct desc_ptr desc_ptr;
	int rc;

3815 3816
	if (ctxt->mode == X86EMUL_MODE_PROT64)
		ctxt->op_bytes = 8;
3817
	rc = read_descriptor(ctxt, ctxt->src.addr.mem,
3818
			     &desc_ptr.size, &desc_ptr.address,
3819
			     ctxt->op_bytes);
3820 3821
	if (rc != X86EMUL_CONTINUE)
		return rc;
3822
	if (ctxt->mode == X86EMUL_MODE_PROT64 &&
3823
	    emul_is_noncanonical_address(desc_ptr.address, ctxt))
3824
		return emulate_gp(ctxt, 0);
3825 3826 3827 3828
	if (lgdt)
		ctxt->ops->set_gdt(ctxt, &desc_ptr);
	else
		ctxt->ops->set_idt(ctxt, &desc_ptr);
3829
	/* Disable writeback. */
3830
	ctxt->dst.type = OP_NONE;
3831 3832 3833
	return X86EMUL_CONTINUE;
}

3834 3835 3836 3837 3838
static int em_lgdt(struct x86_emulate_ctxt *ctxt)
{
	return em_lgdt_lidt(ctxt, true);
}

3839 3840
static int em_lidt(struct x86_emulate_ctxt *ctxt)
{
3841
	return em_lgdt_lidt(ctxt, false);
3842 3843 3844 3845
}

static int em_smsw(struct x86_emulate_ctxt *ctxt)
{
P
Paolo Bonzini 已提交
3846 3847 3848 3849
	if ((ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
	    ctxt->ops->cpl(ctxt) > 0)
		return emulate_gp(ctxt, 0);

3850 3851
	if (ctxt->dst.type == OP_MEM)
		ctxt->dst.bytes = 2;
3852
	ctxt->dst.val = ctxt->ops->get_cr(ctxt, 0);
3853 3854 3855 3856 3857 3858
	return X86EMUL_CONTINUE;
}

static int em_lmsw(struct x86_emulate_ctxt *ctxt)
{
	ctxt->ops->set_cr(ctxt, 0, (ctxt->ops->get_cr(ctxt, 0) & ~0x0eul)
3859 3860
			  | (ctxt->src.val & 0x0f));
	ctxt->dst.type = OP_NONE;
3861 3862 3863
	return X86EMUL_CONTINUE;
}

3864 3865
static int em_loop(struct x86_emulate_ctxt *ctxt)
{
3866 3867
	int rc = X86EMUL_CONTINUE;

3868
	register_address_increment(ctxt, VCPU_REGS_RCX, -1);
3869
	if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) &&
3870
	    (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
3871
		rc = jmp_rel(ctxt, ctxt->src.val);
3872

3873
	return rc;
3874 3875 3876 3877
}

static int em_jcxz(struct x86_emulate_ctxt *ctxt)
{
3878 3879
	int rc = X86EMUL_CONTINUE;

3880
	if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0)
3881
		rc = jmp_rel(ctxt, ctxt->src.val);
3882

3883
	return rc;
3884 3885
}

3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903
static int em_in(struct x86_emulate_ctxt *ctxt)
{
	if (!pio_in_emulated(ctxt, ctxt->dst.bytes, ctxt->src.val,
			     &ctxt->dst.val))
		return X86EMUL_IO_NEEDED;

	return X86EMUL_CONTINUE;
}

static int em_out(struct x86_emulate_ctxt *ctxt)
{
	ctxt->ops->pio_out_emulated(ctxt, ctxt->src.bytes, ctxt->dst.val,
				    &ctxt->src.val, 1);
	/* Disable writeback. */
	ctxt->dst.type = OP_NONE;
	return X86EMUL_CONTINUE;
}

3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922
static int em_cli(struct x86_emulate_ctxt *ctxt)
{
	if (emulator_bad_iopl(ctxt))
		return emulate_gp(ctxt, 0);

	ctxt->eflags &= ~X86_EFLAGS_IF;
	return X86EMUL_CONTINUE;
}

static int em_sti(struct x86_emulate_ctxt *ctxt)
{
	if (emulator_bad_iopl(ctxt))
		return emulate_gp(ctxt, 0);

	ctxt->interruptibility = KVM_X86_SHADOW_INT_STI;
	ctxt->eflags |= X86_EFLAGS_IF;
	return X86EMUL_CONTINUE;
}

A
Avi Kivity 已提交
3923 3924 3925
static int em_cpuid(struct x86_emulate_ctxt *ctxt)
{
	u32 eax, ebx, ecx, edx;
K
Kyle Huey 已提交
3926 3927 3928 3929 3930 3931 3932
	u64 msr = 0;

	ctxt->ops->get_msr(ctxt, MSR_MISC_FEATURES_ENABLES, &msr);
	if (msr & MSR_MISC_FEATURES_ENABLES_CPUID_FAULT &&
	    ctxt->ops->cpl(ctxt)) {
		return emulate_gp(ctxt, 0);
	}
A
Avi Kivity 已提交
3933

3934 3935
	eax = reg_read(ctxt, VCPU_REGS_RAX);
	ecx = reg_read(ctxt, VCPU_REGS_RCX);
3936
	ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true);
3937 3938 3939 3940
	*reg_write(ctxt, VCPU_REGS_RAX) = eax;
	*reg_write(ctxt, VCPU_REGS_RBX) = ebx;
	*reg_write(ctxt, VCPU_REGS_RCX) = ecx;
	*reg_write(ctxt, VCPU_REGS_RDX) = edx;
A
Avi Kivity 已提交
3941 3942 3943
	return X86EMUL_CONTINUE;
}

P
Paolo Bonzini 已提交
3944 3945 3946 3947
static int em_sahf(struct x86_emulate_ctxt *ctxt)
{
	u32 flags;

3948 3949
	flags = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
		X86_EFLAGS_SF;
P
Paolo Bonzini 已提交
3950 3951 3952 3953 3954 3955 3956
	flags &= *reg_rmw(ctxt, VCPU_REGS_RAX) >> 8;

	ctxt->eflags &= ~0xffUL;
	ctxt->eflags |= flags | X86_EFLAGS_FIXED;
	return X86EMUL_CONTINUE;
}

A
Avi Kivity 已提交
3957 3958
static int em_lahf(struct x86_emulate_ctxt *ctxt)
{
3959 3960
	*reg_rmw(ctxt, VCPU_REGS_RAX) &= ~0xff00UL;
	*reg_rmw(ctxt, VCPU_REGS_RAX) |= (ctxt->eflags & 0xff) << 8;
A
Avi Kivity 已提交
3961 3962 3963
	return X86EMUL_CONTINUE;
}

A
Avi Kivity 已提交
3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978
static int em_bswap(struct x86_emulate_ctxt *ctxt)
{
	switch (ctxt->op_bytes) {
#ifdef CONFIG_X86_64
	case 8:
		asm("bswap %0" : "+r"(ctxt->dst.val));
		break;
#endif
	default:
		asm("bswap %0" : "+r"(*(u32 *)&ctxt->dst.val));
		break;
	}
	return X86EMUL_CONTINUE;
}

3979 3980 3981 3982 3983 3984
static int em_clflush(struct x86_emulate_ctxt *ctxt)
{
	/* emulating clflush regardless of cpuid */
	return X86EMUL_CONTINUE;
}

3985 3986 3987 3988 3989 3990
static int em_movsxd(struct x86_emulate_ctxt *ctxt)
{
	ctxt->dst.val = (s32) ctxt->src.val;
	return X86EMUL_CONTINUE;
}

3991 3992 3993 3994
static int check_fxsr(struct x86_emulate_ctxt *ctxt)
{
	u32 eax = 1, ebx, ecx = 0, edx;

3995
	ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false);
3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011
	if (!(edx & FFL(FXSR)))
		return emulate_ud(ctxt);

	if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
		return emulate_nm(ctxt);

	/*
	 * Don't emulate a case that should never be hit, instead of working
	 * around a lack of fxsave64/fxrstor64 on old compilers.
	 */
	if (ctxt->mode >= X86EMUL_MODE_PROT64)
		return X86EMUL_UNHANDLEABLE;

	return X86EMUL_CONTINUE;
}

4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030
/*
 * Hardware doesn't save and restore XMM 0-7 without CR4.OSFXSR, but does save
 * and restore MXCSR.
 */
static size_t __fxstate_size(int nregs)
{
	return offsetof(struct fxregs_state, xmm_space[0]) + nregs * 16;
}

static inline size_t fxstate_size(struct x86_emulate_ctxt *ctxt)
{
	bool cr4_osfxsr;
	if (ctxt->mode == X86EMUL_MODE_PROT64)
		return __fxstate_size(16);

	cr4_osfxsr = ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR;
	return __fxstate_size(cr4_osfxsr ? 8 : 0);
}

4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062
/*
 * FXSAVE and FXRSTOR have 4 different formats depending on execution mode,
 *  1) 16 bit mode
 *  2) 32 bit mode
 *     - like (1), but FIP and FDP (foo) are only 16 bit.  At least Intel CPUs
 *       preserve whole 32 bit values, though, so (1) and (2) are the same wrt.
 *       save and restore
 *  3) 64-bit mode with REX.W prefix
 *     - like (2), but XMM 8-15 are being saved and restored
 *  4) 64-bit mode without REX.W prefix
 *     - like (3), but FIP and FDP are 64 bit
 *
 * Emulation uses (3) for (1) and (2) and preserves XMM 8-15 to reach the
 * desired result.  (4) is not emulated.
 *
 * Note: Guest and host CPUID.(EAX=07H,ECX=0H):EBX[bit 13] (deprecate FPU CS
 * and FPU DS) should match.
 */
static int em_fxsave(struct x86_emulate_ctxt *ctxt)
{
	struct fxregs_state fx_state;
	int rc;

	rc = check_fxsr(ctxt);
	if (rc != X86EMUL_CONTINUE)
		return rc;

	rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state));

	if (rc != X86EMUL_CONTINUE)
		return rc;

4063 4064
	return segmented_write_std(ctxt, ctxt->memop.addr.mem, &fx_state,
		                   fxstate_size(ctxt));
4065 4066
}

4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086
/*
 * FXRSTOR might restore XMM registers not provided by the guest. Fill
 * in the host registers (via FXSAVE) instead, so they won't be modified.
 * (preemption has to stay disabled until FXRSTOR).
 *
 * Use noinline to keep the stack for other functions called by callers small.
 */
static noinline int fxregs_fixup(struct fxregs_state *fx_state,
				 const size_t used_size)
{
	struct fxregs_state fx_tmp;
	int rc;

	rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_tmp));
	memcpy((void *)fx_state + used_size, (void *)&fx_tmp + used_size,
	       __fxstate_size(16) - used_size);

	return rc;
}

4087 4088 4089 4090
static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
{
	struct fxregs_state fx_state;
	int rc;
4091
	size_t size;
4092 4093 4094 4095 4096

	rc = check_fxsr(ctxt);
	if (rc != X86EMUL_CONTINUE)
		return rc;

4097 4098 4099 4100 4101
	size = fxstate_size(ctxt);
	rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, size);
	if (rc != X86EMUL_CONTINUE)
		return rc;

4102
	if (size < __fxstate_size(16)) {
4103
		rc = fxregs_fixup(&fx_state, size);
4104 4105 4106
		if (rc != X86EMUL_CONTINUE)
			goto out;
	}
4107

4108 4109 4110 4111
	if (fx_state.mxcsr >> 16) {
		rc = emulate_gp(ctxt, 0);
		goto out;
	}
4112 4113 4114 4115

	if (rc == X86EMUL_CONTINUE)
		rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state));

4116
out:
4117 4118 4119
	return rc;
}

4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133
static bool valid_cr(int nr)
{
	switch (nr) {
	case 0:
	case 2 ... 4:
	case 8:
		return true;
	default:
		return false;
	}
}

static int check_cr_read(struct x86_emulate_ctxt *ctxt)
{
4134
	if (!valid_cr(ctxt->modrm_reg))
4135 4136 4137 4138 4139 4140 4141
		return emulate_ud(ctxt);

	return X86EMUL_CONTINUE;
}

static int check_cr_write(struct x86_emulate_ctxt *ctxt)
{
4142 4143
	u64 new_val = ctxt->src.val64;
	int cr = ctxt->modrm_reg;
4144
	u64 efer = 0;
4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161

	static u64 cr_reserved_bits[] = {
		0xffffffff00000000ULL,
		0, 0, 0, /* CR3 checked later */
		CR4_RESERVED_BITS,
		0, 0, 0,
		CR8_RESERVED_BITS,
	};

	if (!valid_cr(cr))
		return emulate_ud(ctxt);

	if (new_val & cr_reserved_bits[cr])
		return emulate_gp(ctxt, 0);

	switch (cr) {
	case 0: {
4162
		u64 cr4;
4163 4164 4165 4166
		if (((new_val & X86_CR0_PG) && !(new_val & X86_CR0_PE)) ||
		    ((new_val & X86_CR0_NW) && !(new_val & X86_CR0_CD)))
			return emulate_gp(ctxt, 0);

4167 4168
		cr4 = ctxt->ops->get_cr(ctxt, 4);
		ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
4169 4170 4171 4172 4173 4174 4175 4176 4177 4178

		if ((new_val & X86_CR0_PG) && (efer & EFER_LME) &&
		    !(cr4 & X86_CR4_PAE))
			return emulate_gp(ctxt, 0);

		break;
		}
	case 3: {
		u64 rsvd = 0;

4179
		ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
4180 4181
		if (efer & EFER_LMA) {
			u64 maxphyaddr;
4182
			u32 eax, ebx, ecx, edx;
4183

4184 4185 4186 4187
			eax = 0x80000008;
			ecx = 0;
			if (ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx,
						 &edx, false))
4188 4189 4190 4191 4192
				maxphyaddr = eax & 0xff;
			else
				maxphyaddr = 36;
			rsvd = rsvd_bits(maxphyaddr, 62);
		}
4193 4194 4195 4196 4197 4198 4199

		if (new_val & rsvd)
			return emulate_gp(ctxt, 0);

		break;
		}
	case 4: {
4200
		ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211

		if ((efer & EFER_LMA) && !(new_val & X86_CR4_PAE))
			return emulate_gp(ctxt, 0);

		break;
		}
	}

	return X86EMUL_CONTINUE;
}

4212 4213 4214 4215
static int check_dr7_gd(struct x86_emulate_ctxt *ctxt)
{
	unsigned long dr7;

4216
	ctxt->ops->get_dr(ctxt, 7, &dr7);
4217 4218 4219 4220 4221 4222 4223

	/* Check if DR7.Global_Enable is set */
	return dr7 & (1 << 13);
}

static int check_dr_read(struct x86_emulate_ctxt *ctxt)
{
4224
	int dr = ctxt->modrm_reg;
4225 4226 4227 4228 4229
	u64 cr4;

	if (dr > 7)
		return emulate_ud(ctxt);

4230
	cr4 = ctxt->ops->get_cr(ctxt, 4);
4231 4232 4233
	if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5))
		return emulate_ud(ctxt);

4234 4235 4236 4237 4238 4239 4240
	if (check_dr7_gd(ctxt)) {
		ulong dr6;

		ctxt->ops->get_dr(ctxt, 6, &dr6);
		dr6 &= ~15;
		dr6 |= DR6_BD | DR6_RTM;
		ctxt->ops->set_dr(ctxt, 6, dr6);
4241
		return emulate_db(ctxt);
4242
	}
4243 4244 4245 4246 4247 4248

	return X86EMUL_CONTINUE;
}

static int check_dr_write(struct x86_emulate_ctxt *ctxt)
{
4249 4250
	u64 new_val = ctxt->src.val64;
	int dr = ctxt->modrm_reg;
4251 4252 4253 4254 4255 4256 4257

	if ((dr == 6 || dr == 7) && (new_val & 0xffffffff00000000ULL))
		return emulate_gp(ctxt, 0);

	return check_dr_read(ctxt);
}

4258 4259
static int check_svme(struct x86_emulate_ctxt *ctxt)
{
4260
	u64 efer = 0;
4261

4262
	ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
4263 4264 4265 4266 4267 4268 4269 4270 4271

	if (!(efer & EFER_SVME))
		return emulate_ud(ctxt);

	return X86EMUL_CONTINUE;
}

static int check_svme_pa(struct x86_emulate_ctxt *ctxt)
{
4272
	u64 rax = reg_read(ctxt, VCPU_REGS_RAX);
4273 4274

	/* Valid physical address? */
4275
	if (rax & 0xffff000000000000ULL)
4276 4277 4278 4279 4280
		return emulate_gp(ctxt, 0);

	return check_svme(ctxt);
}

4281 4282
static int check_rdtsc(struct x86_emulate_ctxt *ctxt)
{
4283
	u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
4284

4285
	if (cr4 & X86_CR4_TSD && ctxt->ops->cpl(ctxt))
4286 4287 4288 4289 4290
		return emulate_ud(ctxt);

	return X86EMUL_CONTINUE;
}

4291 4292
static int check_rdpmc(struct x86_emulate_ctxt *ctxt)
{
4293
	u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
4294
	u64 rcx = reg_read(ctxt, VCPU_REGS_RCX);
4295

4296
	if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) ||
4297
	    ctxt->ops->check_pmc(ctxt, rcx))
4298 4299 4300 4301 4302
		return emulate_gp(ctxt, 0);

	return X86EMUL_CONTINUE;
}

4303 4304
static int check_perm_in(struct x86_emulate_ctxt *ctxt)
{
4305 4306
	ctxt->dst.bytes = min(ctxt->dst.bytes, 4u);
	if (!emulator_io_permited(ctxt, ctxt->src.val, ctxt->dst.bytes))
4307 4308 4309 4310 4311 4312 4313
		return emulate_gp(ctxt, 0);

	return X86EMUL_CONTINUE;
}

static int check_perm_out(struct x86_emulate_ctxt *ctxt)
{
4314 4315
	ctxt->src.bytes = min(ctxt->src.bytes, 4u);
	if (!emulator_io_permited(ctxt, ctxt->dst.val, ctxt->src.bytes))
4316 4317 4318 4319 4320
		return emulate_gp(ctxt, 0);

	return X86EMUL_CONTINUE;
}

4321
#define D(_y) { .flags = (_y) }
4322 4323 4324
#define DI(_y, _i) { .flags = (_y)|Intercept, .intercept = x86_intercept_##_i }
#define DIP(_y, _i, _p) { .flags = (_y)|Intercept|CheckPerm, \
		      .intercept = x86_intercept_##_i, .check_perm = (_p) }
4325
#define N    D(NotImpl)
4326
#define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
4327 4328
#define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
#define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
4329
#define ID(_f, _i) { .flags = ((_f) | InstrDual | ModRM), .u.idual = (_i) }
4330
#define MD(_f, _m) { .flags = ((_f) | ModeDual), .u.mdual = (_m) }
4331
#define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) }
4332
#define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
4333
#define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) }
4334
#define II(_f, _e, _i) \
4335
	{ .flags = (_f)|Intercept, .u.execute = (_e), .intercept = x86_intercept_##_i }
4336
#define IIP(_f, _e, _i, _p) \
4337 4338
	{ .flags = (_f)|Intercept|CheckPerm, .u.execute = (_e), \
	  .intercept = x86_intercept_##_i, .check_perm = (_p) }
4339
#define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) }
4340

4341
#define D2bv(_f)      D((_f) | ByteOp), D(_f)
4342
#define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p)
4343
#define I2bv(_f, _e)  I((_f) | ByteOp, _e), I(_f, _e)
4344
#define F2bv(_f, _e)  F((_f) | ByteOp, _e), F(_f, _e)
4345 4346
#define I2bvIP(_f, _e, _i, _p) \
	IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p)
4347

4348 4349 4350
#define F6ALU(_f, _e) F2bv((_f) | DstMem | SrcReg | ModRM, _e),		\
		F2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e),	\
		F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
4351

4352 4353
static const struct opcode group7_rm0[] = {
	N,
4354
	I(SrcNone | Priv | EmulateOnUD,	em_hypercall),
4355 4356 4357
	N, N, N, N, N, N,
};

4358
static const struct opcode group7_rm1[] = {
4359 4360
	DI(SrcNone | Priv, monitor),
	DI(SrcNone | Priv, mwait),
4361 4362 4363
	N, N, N, N, N, N,
};

4364
static const struct opcode group7_rm3[] = {
4365
	DIP(SrcNone | Prot | Priv,		vmrun,		check_svme_pa),
4366
	II(SrcNone  | Prot | EmulateOnUD,	em_hypercall,	vmmcall),
4367 4368 4369 4370 4371 4372
	DIP(SrcNone | Prot | Priv,		vmload,		check_svme_pa),
	DIP(SrcNone | Prot | Priv,		vmsave,		check_svme_pa),
	DIP(SrcNone | Prot | Priv,		stgi,		check_svme),
	DIP(SrcNone | Prot | Priv,		clgi,		check_svme),
	DIP(SrcNone | Prot | Priv,		skinit,		check_svme),
	DIP(SrcNone | Prot | Priv,		invlpga,	check_svme),
4373
};
4374

4375
static const struct opcode group7_rm7[] = {
4376
	N,
4377
	DIP(SrcNone, rdtscp, check_rdtsc),
4378 4379
	N, N, N, N, N, N,
};
4380

4381
static const struct opcode group1[] = {
4382 4383 4384 4385 4386 4387 4388 4389
	F(Lock, em_add),
	F(Lock | PageTable, em_or),
	F(Lock, em_adc),
	F(Lock, em_sbb),
	F(Lock | PageTable, em_and),
	F(Lock, em_sub),
	F(Lock, em_xor),
	F(NoWrite, em_cmp),
4390 4391
};

4392
static const struct opcode group1A[] = {
4393
	I(DstMem | SrcNone | Mov | Stack | IncSP | TwoMemOp, em_pop), N, N, N, N, N, N, N,
4394 4395
};

4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406
static const struct opcode group2[] = {
	F(DstMem | ModRM, em_rol),
	F(DstMem | ModRM, em_ror),
	F(DstMem | ModRM, em_rcl),
	F(DstMem | ModRM, em_rcr),
	F(DstMem | ModRM, em_shl),
	F(DstMem | ModRM, em_shr),
	F(DstMem | ModRM, em_shl),
	F(DstMem | ModRM, em_sar),
};

4407
static const struct opcode group3[] = {
4408 4409
	F(DstMem | SrcImm | NoWrite, em_test),
	F(DstMem | SrcImm | NoWrite, em_test),
4410 4411
	F(DstMem | SrcNone | Lock, em_not),
	F(DstMem | SrcNone | Lock, em_neg),
4412 4413
	F(DstXacc | Src2Mem, em_mul_ex),
	F(DstXacc | Src2Mem, em_imul_ex),
4414 4415
	F(DstXacc | Src2Mem, em_div_ex),
	F(DstXacc | Src2Mem, em_idiv_ex),
4416 4417
};

4418
static const struct opcode group4[] = {
4419 4420
	F(ByteOp | DstMem | SrcNone | Lock, em_inc),
	F(ByteOp | DstMem | SrcNone | Lock, em_dec),
4421 4422 4423
	N, N, N, N, N, N,
};

4424
static const struct opcode group5[] = {
4425 4426
	F(DstMem | SrcNone | Lock,		em_inc),
	F(DstMem | SrcNone | Lock,		em_dec),
4427
	I(SrcMem | NearBranch,			em_call_near_abs),
4428
	I(SrcMemFAddr | ImplicitOps,		em_call_far),
4429
	I(SrcMem | NearBranch,			em_jmp_abs),
4430
	I(SrcMemFAddr | ImplicitOps,		em_jmp_far),
4431
	I(SrcMem | Stack | TwoMemOp,		em_push), D(Undefined),
4432 4433
};

4434
static const struct opcode group6[] = {
P
Paolo Bonzini 已提交
4435 4436
	II(Prot | DstMem,	   em_sldt, sldt),
	II(Prot | DstMem,	   em_str, str),
A
Avi Kivity 已提交
4437
	II(Prot | Priv | SrcMem16, em_lldt, lldt),
A
Avi Kivity 已提交
4438
	II(Prot | Priv | SrcMem16, em_ltr, ltr),
4439 4440 4441
	N, N, N, N,
};

4442
static const struct group_dual group7 = { {
4443 4444
	II(Mov | DstMem,			em_sgdt, sgdt),
	II(Mov | DstMem,			em_sidt, sidt),
4445 4446 4447 4448 4449
	II(SrcMem | Priv,			em_lgdt, lgdt),
	II(SrcMem | Priv,			em_lidt, lidt),
	II(SrcNone | DstMem | Mov,		em_smsw, smsw), N,
	II(SrcMem16 | Mov | Priv,		em_lmsw, lmsw),
	II(SrcMem | ByteOp | Priv | NoAccess,	em_invlpg, invlpg),
4450
}, {
4451
	EXT(0, group7_rm0),
4452
	EXT(0, group7_rm1),
4453
	N, EXT(0, group7_rm3),
4454 4455 4456
	II(SrcNone | DstMem | Mov,		em_smsw, smsw), N,
	II(SrcMem16 | Mov | Priv,		em_lmsw, lmsw),
	EXT(0, group7_rm7),
4457 4458
} };

4459
static const struct opcode group8[] = {
4460
	N, N, N, N,
4461 4462 4463 4464
	F(DstMem | SrcImmByte | NoWrite,		em_bt),
	F(DstMem | SrcImmByte | Lock | PageTable,	em_bts),
	F(DstMem | SrcImmByte | Lock,			em_btr),
	F(DstMem | SrcImmByte | Lock | PageTable,	em_btc),
4465 4466
};

P
Paolo Bonzini 已提交
4467 4468 4469 4470 4471 4472 4473 4474 4475
/*
 * The "memory" destination is actually always a register, since we come
 * from the register case of group9.
 */
static const struct gprefix pfx_0f_c7_7 = {
	N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdtscp),
};


4476
static const struct group_dual group9 = { {
4477
	N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N,
4478
}, {
P
Paolo Bonzini 已提交
4479 4480
	N, N, N, N, N, N, N,
	GP(0, &pfx_0f_c7_7),
4481 4482
} };

4483
static const struct opcode group11[] = {
4484
	I(DstMem | SrcImm | Mov | PageTable, em_mov),
4485
	X7(D(Undefined)),
4486 4487
};

4488
static const struct gprefix pfx_0f_ae_7 = {
4489
	I(SrcMem | ByteOp, em_clflush), N, N, N,
4490 4491 4492
};

static const struct group_dual group15 = { {
4493 4494 4495
	I(ModRM | Aligned16, em_fxsave),
	I(ModRM | Aligned16, em_fxrstor),
	N, N, N, N, N, GP(0, &pfx_0f_ae_7),
4496 4497 4498 4499
}, {
	N, N, N, N, N, N, N, N,
} };

4500
static const struct gprefix pfx_0f_6f_0f_7f = {
4501
	I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov),
4502 4503
};

4504 4505 4506 4507
static const struct instr_dual instr_dual_0f_2b = {
	I(0, em_mov), N
};

4508
static const struct gprefix pfx_0f_2b = {
4509
	ID(0, &instr_dual_0f_2b), ID(0, &instr_dual_0f_2b), N, N,
4510 4511
};

4512
static const struct gprefix pfx_0f_28_0f_29 = {
4513
	I(Aligned, em_mov), I(Aligned, em_mov), N, N,
4514 4515
};

4516 4517 4518 4519
static const struct gprefix pfx_0f_e7 = {
	N, I(Sse, em_mov), N, N,
};

4520
static const struct escape escape_d9 = { {
4521
	N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstcw),
4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562
}, {
	/* 0xC0 - 0xC7 */
	N, N, N, N, N, N, N, N,
	/* 0xC8 - 0xCF */
	N, N, N, N, N, N, N, N,
	/* 0xD0 - 0xC7 */
	N, N, N, N, N, N, N, N,
	/* 0xD8 - 0xDF */
	N, N, N, N, N, N, N, N,
	/* 0xE0 - 0xE7 */
	N, N, N, N, N, N, N, N,
	/* 0xE8 - 0xEF */
	N, N, N, N, N, N, N, N,
	/* 0xF0 - 0xF7 */
	N, N, N, N, N, N, N, N,
	/* 0xF8 - 0xFF */
	N, N, N, N, N, N, N, N,
} };

static const struct escape escape_db = { {
	N, N, N, N, N, N, N, N,
}, {
	/* 0xC0 - 0xC7 */
	N, N, N, N, N, N, N, N,
	/* 0xC8 - 0xCF */
	N, N, N, N, N, N, N, N,
	/* 0xD0 - 0xC7 */
	N, N, N, N, N, N, N, N,
	/* 0xD8 - 0xDF */
	N, N, N, N, N, N, N, N,
	/* 0xE0 - 0xE7 */
	N, N, N, I(ImplicitOps, em_fninit), N, N, N, N,
	/* 0xE8 - 0xEF */
	N, N, N, N, N, N, N, N,
	/* 0xF0 - 0xF7 */
	N, N, N, N, N, N, N, N,
	/* 0xF8 - 0xFF */
	N, N, N, N, N, N, N, N,
} };

static const struct escape escape_dd = { {
4563
	N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstsw),
4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582
}, {
	/* 0xC0 - 0xC7 */
	N, N, N, N, N, N, N, N,
	/* 0xC8 - 0xCF */
	N, N, N, N, N, N, N, N,
	/* 0xD0 - 0xC7 */
	N, N, N, N, N, N, N, N,
	/* 0xD8 - 0xDF */
	N, N, N, N, N, N, N, N,
	/* 0xE0 - 0xE7 */
	N, N, N, N, N, N, N, N,
	/* 0xE8 - 0xEF */
	N, N, N, N, N, N, N, N,
	/* 0xF0 - 0xF7 */
	N, N, N, N, N, N, N, N,
	/* 0xF8 - 0xFF */
	N, N, N, N, N, N, N, N,
} };

4583 4584 4585 4586
static const struct instr_dual instr_dual_0f_c3 = {
	I(DstMem | SrcReg | ModRM | No16 | Mov, em_mov), N
};

4587 4588 4589 4590
static const struct mode_dual mode_dual_63 = {
	N, I(DstReg | SrcMem32 | ModRM | Mov, em_movsxd)
};

4591
static const struct opcode opcode_table[256] = {
4592
	/* 0x00 - 0x07 */
4593
	F6ALU(Lock, em_add),
4594 4595
	I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg),
	I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg),
4596
	/* 0x08 - 0x0F */
4597
	F6ALU(Lock | PageTable, em_or),
4598 4599
	I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg),
	N,
4600
	/* 0x10 - 0x17 */
4601
	F6ALU(Lock, em_adc),
4602 4603
	I(ImplicitOps | Stack | No64 | Src2SS, em_push_sreg),
	I(ImplicitOps | Stack | No64 | Src2SS, em_pop_sreg),
4604
	/* 0x18 - 0x1F */
4605
	F6ALU(Lock, em_sbb),
4606 4607
	I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg),
	I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg),
4608
	/* 0x20 - 0x27 */
4609
	F6ALU(Lock | PageTable, em_and), N, N,
4610
	/* 0x28 - 0x2F */
4611
	F6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das),
4612
	/* 0x30 - 0x37 */
4613
	F6ALU(Lock, em_xor), N, N,
4614
	/* 0x38 - 0x3F */
4615
	F6ALU(NoWrite, em_cmp), N, N,
4616
	/* 0x40 - 0x4F */
4617
	X8(F(DstReg, em_inc)), X8(F(DstReg, em_dec)),
4618
	/* 0x50 - 0x57 */
4619
	X8(I(SrcReg | Stack, em_push)),
4620
	/* 0x58 - 0x5F */
4621
	X8(I(DstReg | Stack, em_pop)),
4622
	/* 0x60 - 0x67 */
4623 4624
	I(ImplicitOps | Stack | No64, em_pusha),
	I(ImplicitOps | Stack | No64, em_popa),
4625
	N, MD(ModRM, &mode_dual_63),
4626 4627
	N, N, N, N,
	/* 0x68 - 0x6F */
4628 4629
	I(SrcImm | Mov | Stack, em_push),
	I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op),
4630 4631
	I(SrcImmByte | Mov | Stack, em_push),
	I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op),
4632
	I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */
4633
	I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */
4634
	/* 0x70 - 0x7F */
4635
	X16(D(SrcImmByte | NearBranch)),
4636
	/* 0x80 - 0x87 */
4637 4638 4639 4640
	G(ByteOp | DstMem | SrcImm, group1),
	G(DstMem | SrcImm, group1),
	G(ByteOp | DstMem | SrcImm | No64, group1),
	G(DstMem | SrcImmByte, group1),
4641
	F2bv(DstMem | SrcReg | ModRM | NoWrite, em_test),
4642
	I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg),
4643
	/* 0x88 - 0x8F */
4644
	I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov),
4645
	I2bv(DstReg | SrcMem | ModRM | Mov, em_mov),
4646
	I(DstMem | SrcNone | ModRM | Mov | PageTable, em_mov_rm_sreg),
4647 4648 4649
	D(ModRM | SrcMem | NoAccess | DstReg),
	I(ImplicitOps | SrcMem16 | ModRM, em_mov_sreg_rm),
	G(0, group1A),
4650
	/* 0x90 - 0x97 */
4651
	DI(SrcAcc | DstReg, pause), X7(D(SrcAcc | DstReg)),
4652
	/* 0x98 - 0x9F */
4653
	D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd),
4654
	I(SrcImmFAddr | No64, em_call_far), N,
4655
	II(ImplicitOps | Stack, em_pushf, pushf),
P
Paolo Bonzini 已提交
4656 4657
	II(ImplicitOps | Stack, em_popf, popf),
	I(ImplicitOps, em_sahf), I(ImplicitOps, em_lahf),
4658
	/* 0xA0 - 0xA7 */
4659
	I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
4660
	I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov),
4661 4662
	I2bv(SrcSI | DstDI | Mov | String | TwoMemOp, em_mov),
	F2bv(SrcSI | DstDI | String | NoWrite | TwoMemOp, em_cmp_r),
4663
	/* 0xA8 - 0xAF */
4664
	F2bv(DstAcc | SrcImm | NoWrite, em_test),
4665 4666
	I2bv(SrcAcc | DstDI | Mov | String, em_mov),
	I2bv(SrcSI | DstAcc | Mov | String, em_mov),
4667
	F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp_r),
4668
	/* 0xB0 - 0xB7 */
4669
	X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)),
4670
	/* 0xB8 - 0xBF */
4671
	X8(I(DstReg | SrcImm64 | Mov, em_mov)),
4672
	/* 0xC0 - 0xC7 */
4673
	G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2),
4674 4675
	I(ImplicitOps | NearBranch | SrcImmU16, em_ret_near_imm),
	I(ImplicitOps | NearBranch, em_ret),
4676 4677
	I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg),
	I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg),
4678
	G(ByteOp, group11), G(0, group11),
4679
	/* 0xC8 - 0xCF */
A
Avi Kivity 已提交
4680
	I(Stack | SrcImmU16 | Src2ImmByte, em_enter), I(Stack, em_leave),
4681 4682
	I(ImplicitOps | SrcImmU16, em_ret_far_imm),
	I(ImplicitOps, em_ret_far),
4683
	D(ImplicitOps), DI(SrcImmByte, intn),
4684
	D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret),
4685
	/* 0xD0 - 0xD7 */
4686 4687
	G(Src2One | ByteOp, group2), G(Src2One, group2),
	G(Src2CL | ByteOp, group2), G(Src2CL, group2),
P
Paolo Bonzini 已提交
4688
	I(DstAcc | SrcImmUByte | No64, em_aam),
P
Paolo Bonzini 已提交
4689 4690
	I(DstAcc | SrcImmUByte | No64, em_aad),
	F(DstAcc | ByteOp | No64, em_salc),
P
Paolo Bonzini 已提交
4691
	I(DstAcc | SrcXLat | ByteOp, em_mov),
4692
	/* 0xD8 - 0xDF */
4693
	N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N,
4694
	/* 0xE0 - 0xE7 */
4695 4696
	X3(I(SrcImmByte | NearBranch, em_loop)),
	I(SrcImmByte | NearBranch, em_jcxz),
4697 4698
	I2bvIP(SrcImmUByte | DstAcc, em_in,  in,  check_perm_in),
	I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out),
4699
	/* 0xE8 - 0xEF */
4700 4701 4702
	I(SrcImm | NearBranch, em_call), D(SrcImm | ImplicitOps | NearBranch),
	I(SrcImmFAddr | No64, em_jmp_far),
	D(SrcImmByte | ImplicitOps | NearBranch),
4703 4704
	I2bvIP(SrcDX | DstAcc, em_in,  in,  check_perm_in),
	I2bvIP(SrcAcc | DstDX, em_out, out, check_perm_out),
4705
	/* 0xF0 - 0xF7 */
4706
	N, DI(ImplicitOps, icebp), N, N,
4707 4708
	DI(ImplicitOps | Priv, hlt), D(ImplicitOps),
	G(ByteOp, group3), G(0, group3),
4709
	/* 0xF8 - 0xFF */
4710 4711
	D(ImplicitOps), D(ImplicitOps),
	I(ImplicitOps, em_cli), I(ImplicitOps, em_sti),
4712 4713 4714
	D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5),
};

4715
static const struct opcode twobyte_table[256] = {
4716
	/* 0x00 - 0x0F */
4717
	G(0, group6), GD(0, &group7), N, N,
4718
	N, I(ImplicitOps | EmulateOnUD, em_syscall),
4719
	II(ImplicitOps | Priv, em_clts, clts), N,
4720
	DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
4721
	N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
4722
	/* 0x10 - 0x1F */
P
Paolo Bonzini 已提交
4723
	N, N, N, N, N, N, N, N,
4724 4725
	D(ImplicitOps | ModRM | SrcMem | NoAccess),
	N, N, N, N, N, N, D(ImplicitOps | ModRM | SrcMem | NoAccess),
4726
	/* 0x20 - 0x2F */
4727 4728 4729 4730 4731 4732
	DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_read),
	DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read),
	IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_cr_write, cr_write,
						check_cr_write),
	IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_dr_write, dr_write,
						check_dr_write),
4733
	N, N, N, N,
4734 4735
	GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_28_0f_29),
	GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_28_0f_29),
4736
	N, GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_2b),
4737
	N, N, N, N,
4738
	/* 0x30 - 0x3F */
4739
	II(ImplicitOps | Priv, em_wrmsr, wrmsr),
4740
	IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc),
4741
	II(ImplicitOps | Priv, em_rdmsr, rdmsr),
4742
	IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc),
4743 4744
	I(ImplicitOps | EmulateOnUD, em_sysenter),
	I(ImplicitOps | Priv | EmulateOnUD, em_sysexit),
4745
	N, N,
4746 4747
	N, N, N, N, N, N, N, N,
	/* 0x40 - 0x4F */
4748
	X16(D(DstReg | SrcMem | ModRM)),
4749 4750 4751
	/* 0x50 - 0x5F */
	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
	/* 0x60 - 0x6F */
4752 4753 4754 4755
	N, N, N, N,
	N, N, N, N,
	N, N, N, N,
	N, N, N, GP(SrcMem | DstReg | ModRM | Mov, &pfx_0f_6f_0f_7f),
4756
	/* 0x70 - 0x7F */
4757 4758 4759 4760
	N, N, N, N,
	N, N, N, N,
	N, N, N, N,
	N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f),
4761
	/* 0x80 - 0x8F */
4762
	X16(D(SrcImm | NearBranch)),
4763
	/* 0x90 - 0x9F */
4764
	X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
4765
	/* 0xA0 - 0xA7 */
4766
	I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg),
4767 4768
	II(ImplicitOps, em_cpuid, cpuid),
	F(DstMem | SrcReg | ModRM | BitOp | NoWrite, em_bt),
4769 4770
	F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shld),
	F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N,
4771
	/* 0xA8 - 0xAF */
4772
	I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg),
4773
	II(EmulateOnUD | ImplicitOps, em_rsm, rsm),
4774
	F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
4775 4776
	F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
	F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
4777
	GD(0, &group15), F(DstReg | SrcMem | ModRM, em_imul),
4778
	/* 0xB0 - 0xB7 */
4779
	I2bv(DstMem | SrcReg | ModRM | Lock | PageTable | SrcWrite, em_cmpxchg),
4780
	I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg),
4781
	F(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr),
4782 4783
	I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg),
	I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg),
4784
	D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
4785 4786
	/* 0xB8 - 0xBF */
	N, N,
4787
	G(BitOp, group8),
4788
	F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc),
4789 4790
	I(DstReg | SrcMem | ModRM, em_bsf_c),
	I(DstReg | SrcMem | ModRM, em_bsr_c),
4791
	D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
A
Avi Kivity 已提交
4792
	/* 0xC0 - 0xC7 */
4793
	F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd),
4794
	N, ID(0, &instr_dual_0f_c3),
4795
	N, N, N, GD(0, &group9),
A
Avi Kivity 已提交
4796 4797
	/* 0xC8 - 0xCF */
	X8(I(DstReg, em_bswap)),
4798 4799 4800
	/* 0xD0 - 0xDF */
	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
	/* 0xE0 - 0xEF */
4801 4802
	N, N, N, N, N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_e7),
	N, N, N, N, N, N, N, N,
4803 4804 4805 4806
	/* 0xF0 - 0xFF */
	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N
};

4807 4808 4809 4810 4811 4812 4813 4814
static const struct instr_dual instr_dual_0f_38_f0 = {
	I(DstReg | SrcMem | Mov, em_movbe), N
};

static const struct instr_dual instr_dual_0f_38_f1 = {
	I(DstMem | SrcReg | Mov, em_movbe), N
};

4815
static const struct gprefix three_byte_0f_38_f0 = {
4816
	ID(0, &instr_dual_0f_38_f0), N, N, N
4817 4818 4819
};

static const struct gprefix three_byte_0f_38_f1 = {
4820
	ID(0, &instr_dual_0f_38_f1), N, N, N
4821 4822 4823 4824 4825 4826 4827 4828 4829
};

/*
 * Insns below are selected by the prefix which indexed by the third opcode
 * byte.
 */
static const struct opcode opcode_map_0f_38[256] = {
	/* 0x00 - 0x7f */
	X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
B
Borislav Petkov 已提交
4830 4831 4832
	/* 0x80 - 0xef */
	X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
	/* 0xf0 - 0xf1 */
4833 4834
	GP(EmulateOnUD | ModRM, &three_byte_0f_38_f0),
	GP(EmulateOnUD | ModRM, &three_byte_0f_38_f1),
B
Borislav Petkov 已提交
4835 4836
	/* 0xf2 - 0xff */
	N, N, X4(N), X8(N)
4837 4838
};

4839 4840 4841 4842 4843
#undef D
#undef N
#undef G
#undef GD
#undef I
4844
#undef GP
4845
#undef EXT
4846
#undef MD
N
Nadav Amit 已提交
4847
#undef ID
4848

4849
#undef D2bv
4850
#undef D2bvIP
4851
#undef I2bv
4852
#undef I2bvIP
4853
#undef I6ALU
4854

4855
static unsigned imm_size(struct x86_emulate_ctxt *ctxt)
4856 4857 4858
{
	unsigned size;

4859
	size = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871
	if (size == 8)
		size = 4;
	return size;
}

static int decode_imm(struct x86_emulate_ctxt *ctxt, struct operand *op,
		      unsigned size, bool sign_extension)
{
	int rc = X86EMUL_CONTINUE;

	op->type = OP_IMM;
	op->bytes = size;
4872
	op->addr.mem.ea = ctxt->_eip;
4873 4874 4875
	/* NB. Immediates are sign-extended as necessary. */
	switch (op->bytes) {
	case 1:
4876
		op->val = insn_fetch(s8, ctxt);
4877 4878
		break;
	case 2:
4879
		op->val = insn_fetch(s16, ctxt);
4880 4881
		break;
	case 4:
4882
		op->val = insn_fetch(s32, ctxt);
4883
		break;
4884 4885 4886
	case 8:
		op->val = insn_fetch(s64, ctxt);
		break;
4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904
	}
	if (!sign_extension) {
		switch (op->bytes) {
		case 1:
			op->val &= 0xff;
			break;
		case 2:
			op->val &= 0xffff;
			break;
		case 4:
			op->val &= 0xffffffff;
			break;
		}
	}
done:
	return rc;
}

4905 4906 4907 4908 4909 4910 4911
static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
			  unsigned d)
{
	int rc = X86EMUL_CONTINUE;

	switch (d) {
	case OpReg:
4912
		decode_register_operand(ctxt, op);
4913 4914
		break;
	case OpImmUByte:
4915
		rc = decode_imm(ctxt, op, 1, false);
4916 4917
		break;
	case OpMem:
4918
		ctxt->memop.bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4919 4920 4921
	mem_common:
		*op = ctxt->memop;
		ctxt->memopp = op;
4922
		if (ctxt->d & BitOp)
4923 4924 4925
			fetch_bit_operand(ctxt);
		op->orig_val = op->val;
		break;
4926
	case OpMem64:
4927
		ctxt->memop.bytes = (ctxt->op_bytes == 8) ? 16 : 8;
4928
		goto mem_common;
4929 4930 4931
	case OpAcc:
		op->type = OP_REG;
		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4932
		op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
4933 4934 4935
		fetch_register_operand(op);
		op->orig_val = op->val;
		break;
4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953
	case OpAccLo:
		op->type = OP_REG;
		op->bytes = (ctxt->d & ByteOp) ? 2 : ctxt->op_bytes;
		op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
		fetch_register_operand(op);
		op->orig_val = op->val;
		break;
	case OpAccHi:
		if (ctxt->d & ByteOp) {
			op->type = OP_NONE;
			break;
		}
		op->type = OP_REG;
		op->bytes = ctxt->op_bytes;
		op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
		fetch_register_operand(op);
		op->orig_val = op->val;
		break;
4954 4955 4956 4957
	case OpDI:
		op->type = OP_MEM;
		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
		op->addr.mem.ea =
4958
			register_address(ctxt, VCPU_REGS_RDI);
4959 4960
		op->addr.mem.seg = VCPU_SREG_ES;
		op->val = 0;
4961
		op->count = 1;
4962 4963 4964 4965
		break;
	case OpDX:
		op->type = OP_REG;
		op->bytes = 2;
4966
		op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
4967 4968
		fetch_register_operand(op);
		break;
4969
	case OpCL:
4970
		op->type = OP_IMM;
4971
		op->bytes = 1;
4972
		op->val = reg_read(ctxt, VCPU_REGS_RCX) & 0xff;
4973 4974 4975 4976 4977
		break;
	case OpImmByte:
		rc = decode_imm(ctxt, op, 1, true);
		break;
	case OpOne:
4978
		op->type = OP_IMM;
4979 4980 4981 4982 4983 4984
		op->bytes = 1;
		op->val = 1;
		break;
	case OpImm:
		rc = decode_imm(ctxt, op, imm_size(ctxt), true);
		break;
4985 4986 4987
	case OpImm64:
		rc = decode_imm(ctxt, op, ctxt->op_bytes, true);
		break;
4988 4989
	case OpMem8:
		ctxt->memop.bytes = 1;
4990
		if (ctxt->memop.type == OP_REG) {
4991 4992
			ctxt->memop.addr.reg = decode_register(ctxt,
					ctxt->modrm_rm, true);
4993 4994
			fetch_register_operand(&ctxt->memop);
		}
4995
		goto mem_common;
4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011
	case OpMem16:
		ctxt->memop.bytes = 2;
		goto mem_common;
	case OpMem32:
		ctxt->memop.bytes = 4;
		goto mem_common;
	case OpImmU16:
		rc = decode_imm(ctxt, op, 2, false);
		break;
	case OpImmU:
		rc = decode_imm(ctxt, op, imm_size(ctxt), false);
		break;
	case OpSI:
		op->type = OP_MEM;
		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
		op->addr.mem.ea =
5012
			register_address(ctxt, VCPU_REGS_RSI);
B
Bandan Das 已提交
5013
		op->addr.mem.seg = ctxt->seg_override;
5014
		op->val = 0;
5015
		op->count = 1;
5016
		break;
P
Paolo Bonzini 已提交
5017 5018 5019 5020
	case OpXLat:
		op->type = OP_MEM;
		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
		op->addr.mem.ea =
5021
			address_mask(ctxt,
P
Paolo Bonzini 已提交
5022 5023
				reg_read(ctxt, VCPU_REGS_RBX) +
				(reg_read(ctxt, VCPU_REGS_RAX) & 0xff));
B
Bandan Das 已提交
5024
		op->addr.mem.seg = ctxt->seg_override;
P
Paolo Bonzini 已提交
5025 5026
		op->val = 0;
		break;
5027 5028 5029 5030 5031 5032 5033 5034 5035
	case OpImmFAddr:
		op->type = OP_IMM;
		op->addr.mem.ea = ctxt->_eip;
		op->bytes = ctxt->op_bytes + 2;
		insn_fetch_arr(op->valptr, op->bytes, ctxt);
		break;
	case OpMemFAddr:
		ctxt->memop.bytes = ctxt->op_bytes + 2;
		goto mem_common;
5036
	case OpES:
5037
		op->type = OP_IMM;
5038 5039 5040
		op->val = VCPU_SREG_ES;
		break;
	case OpCS:
5041
		op->type = OP_IMM;
5042 5043 5044
		op->val = VCPU_SREG_CS;
		break;
	case OpSS:
5045
		op->type = OP_IMM;
5046 5047 5048
		op->val = VCPU_SREG_SS;
		break;
	case OpDS:
5049
		op->type = OP_IMM;
5050 5051 5052
		op->val = VCPU_SREG_DS;
		break;
	case OpFS:
5053
		op->type = OP_IMM;
5054 5055 5056
		op->val = VCPU_SREG_FS;
		break;
	case OpGS:
5057
		op->type = OP_IMM;
5058 5059
		op->val = VCPU_SREG_GS;
		break;
5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070
	case OpImplicit:
		/* Special instructions do their own operand decoding. */
	default:
		op->type = OP_NONE; /* Disable writeback. */
		break;
	}

done:
	return rc;
}

5071
int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
5072 5073 5074
{
	int rc = X86EMUL_CONTINUE;
	int mode = ctxt->mode;
5075
	int def_op_bytes, def_ad_bytes, goffset, simd_prefix;
5076
	bool op_prefix = false;
B
Bandan Das 已提交
5077
	bool has_seg_override = false;
5078
	struct opcode opcode;
5079 5080
	u16 dummy;
	struct desc_struct desc;
5081

5082 5083
	ctxt->memop.type = OP_NONE;
	ctxt->memopp = NULL;
5084
	ctxt->_eip = ctxt->eip;
5085 5086
	ctxt->fetch.ptr = ctxt->fetch.data;
	ctxt->fetch.end = ctxt->fetch.data + insn_len;
B
Borislav Petkov 已提交
5087
	ctxt->opcode_len = 1;
5088
	if (insn_len > 0)
5089
		memcpy(ctxt->fetch.data, insn, insn_len);
5090
	else {
5091
		rc = __do_insn_fetch_bytes(ctxt, 1);
5092 5093 5094
		if (rc != X86EMUL_CONTINUE)
			return rc;
	}
5095 5096 5097 5098

	switch (mode) {
	case X86EMUL_MODE_REAL:
	case X86EMUL_MODE_VM86:
5099 5100 5101 5102 5103
		def_op_bytes = def_ad_bytes = 2;
		ctxt->ops->get_segment(ctxt, &dummy, &desc, NULL, VCPU_SREG_CS);
		if (desc.d)
			def_op_bytes = def_ad_bytes = 4;
		break;
5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116
	case X86EMUL_MODE_PROT16:
		def_op_bytes = def_ad_bytes = 2;
		break;
	case X86EMUL_MODE_PROT32:
		def_op_bytes = def_ad_bytes = 4;
		break;
#ifdef CONFIG_X86_64
	case X86EMUL_MODE_PROT64:
		def_op_bytes = 4;
		def_ad_bytes = 8;
		break;
#endif
	default:
5117
		return EMULATION_FAILED;
5118 5119
	}

5120 5121
	ctxt->op_bytes = def_op_bytes;
	ctxt->ad_bytes = def_ad_bytes;
5122 5123 5124

	/* Legacy prefixes. */
	for (;;) {
5125
		switch (ctxt->b = insn_fetch(u8, ctxt)) {
5126
		case 0x66:	/* operand-size override */
5127
			op_prefix = true;
5128
			/* switch between 2/4 bytes */
5129
			ctxt->op_bytes = def_op_bytes ^ 6;
5130 5131 5132 5133
			break;
		case 0x67:	/* address-size override */
			if (mode == X86EMUL_MODE_PROT64)
				/* switch between 4/8 bytes */
5134
				ctxt->ad_bytes = def_ad_bytes ^ 12;
5135 5136
			else
				/* switch between 2/4 bytes */
5137
				ctxt->ad_bytes = def_ad_bytes ^ 6;
5138 5139 5140 5141 5142
			break;
		case 0x26:	/* ES override */
		case 0x2e:	/* CS override */
		case 0x36:	/* SS override */
		case 0x3e:	/* DS override */
B
Bandan Das 已提交
5143 5144
			has_seg_override = true;
			ctxt->seg_override = (ctxt->b >> 3) & 3;
5145 5146 5147
			break;
		case 0x64:	/* FS override */
		case 0x65:	/* GS override */
B
Bandan Das 已提交
5148 5149
			has_seg_override = true;
			ctxt->seg_override = ctxt->b & 7;
5150 5151 5152 5153
			break;
		case 0x40 ... 0x4f: /* REX */
			if (mode != X86EMUL_MODE_PROT64)
				goto done_prefixes;
5154
			ctxt->rex_prefix = ctxt->b;
5155 5156
			continue;
		case 0xf0:	/* LOCK */
5157
			ctxt->lock_prefix = 1;
5158 5159 5160
			break;
		case 0xf2:	/* REPNE/REPNZ */
		case 0xf3:	/* REP/REPE/REPZ */
5161
			ctxt->rep_prefix = ctxt->b;
5162 5163 5164 5165 5166 5167 5168
			break;
		default:
			goto done_prefixes;
		}

		/* Any legacy prefix after a REX prefix nullifies its effect. */

5169
		ctxt->rex_prefix = 0;
5170 5171 5172 5173 5174
	}

done_prefixes:

	/* REX prefix. */
5175 5176
	if (ctxt->rex_prefix & 8)
		ctxt->op_bytes = 8;	/* REX.W */
5177 5178

	/* Opcode byte(s). */
5179
	opcode = opcode_table[ctxt->b];
5180
	/* Two-byte opcode? */
5181
	if (ctxt->b == 0x0f) {
B
Borislav Petkov 已提交
5182
		ctxt->opcode_len = 2;
5183
		ctxt->b = insn_fetch(u8, ctxt);
5184
		opcode = twobyte_table[ctxt->b];
5185 5186 5187 5188 5189 5190 5191

		/* 0F_38 opcode map */
		if (ctxt->b == 0x38) {
			ctxt->opcode_len = 3;
			ctxt->b = insn_fetch(u8, ctxt);
			opcode = opcode_map_0f_38[ctxt->b];
		}
5192
	}
5193
	ctxt->d = opcode.flags;
5194

5195 5196 5197
	if (ctxt->d & ModRM)
		ctxt->modrm = insn_fetch(u8, ctxt);

5198 5199
	/* vex-prefix instructions are not implemented */
	if (ctxt->opcode_len == 1 && (ctxt->b == 0xc5 || ctxt->b == 0xc4) &&
5200
	    (mode == X86EMUL_MODE_PROT64 || (ctxt->modrm & 0xc0) == 0xc0)) {
5201 5202 5203
		ctxt->d = NotImpl;
	}

5204 5205
	while (ctxt->d & GroupMask) {
		switch (ctxt->d & GroupMask) {
5206
		case Group:
5207
			goffset = (ctxt->modrm >> 3) & 7;
5208 5209 5210
			opcode = opcode.u.group[goffset];
			break;
		case GroupDual:
5211 5212
			goffset = (ctxt->modrm >> 3) & 7;
			if ((ctxt->modrm >> 6) == 3)
5213 5214 5215 5216 5217
				opcode = opcode.u.gdual->mod3[goffset];
			else
				opcode = opcode.u.gdual->mod012[goffset];
			break;
		case RMExt:
5218
			goffset = ctxt->modrm & 7;
5219
			opcode = opcode.u.group[goffset];
5220 5221
			break;
		case Prefix:
5222
			if (ctxt->rep_prefix && op_prefix)
5223
				return EMULATION_FAILED;
5224
			simd_prefix = op_prefix ? 0x66 : ctxt->rep_prefix;
5225 5226 5227 5228 5229 5230 5231
			switch (simd_prefix) {
			case 0x00: opcode = opcode.u.gprefix->pfx_no; break;
			case 0x66: opcode = opcode.u.gprefix->pfx_66; break;
			case 0xf2: opcode = opcode.u.gprefix->pfx_f2; break;
			case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break;
			}
			break;
5232 5233 5234 5235 5236 5237
		case Escape:
			if (ctxt->modrm > 0xbf)
				opcode = opcode.u.esc->high[ctxt->modrm - 0xc0];
			else
				opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7];
			break;
5238 5239 5240 5241 5242 5243
		case InstrDual:
			if ((ctxt->modrm >> 6) == 3)
				opcode = opcode.u.idual->mod3;
			else
				opcode = opcode.u.idual->mod012;
			break;
5244 5245 5246 5247 5248 5249
		case ModeDual:
			if (ctxt->mode == X86EMUL_MODE_PROT64)
				opcode = opcode.u.mdual->mode64;
			else
				opcode = opcode.u.mdual->mode32;
			break;
5250
		default:
5251
			return EMULATION_FAILED;
5252
		}
5253

5254
		ctxt->d &= ~(u64)GroupMask;
5255
		ctxt->d |= opcode.flags;
5256 5257
	}

5258 5259 5260 5261
	/* Unrecognised? */
	if (ctxt->d == 0)
		return EMULATION_FAILED;

5262
	ctxt->execute = opcode.u.execute;
5263

5264 5265 5266
	if (unlikely(ctxt->ud) && likely(!(ctxt->d & EmulateOnUD)))
		return EMULATION_FAILED;

5267
	if (unlikely(ctxt->d &
5268 5269
	    (NotImpl|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm|NearBranch|
	     No16))) {
5270 5271 5272 5273 5274 5275
		/*
		 * These are copied unconditionally here, and checked unconditionally
		 * in x86_emulate_insn.
		 */
		ctxt->check_perm = opcode.check_perm;
		ctxt->intercept = opcode.intercept;
5276

5277 5278
		if (ctxt->d & NotImpl)
			return EMULATION_FAILED;
5279

5280 5281 5282 5283 5284 5285
		if (mode == X86EMUL_MODE_PROT64) {
			if (ctxt->op_bytes == 4 && (ctxt->d & Stack))
				ctxt->op_bytes = 8;
			else if (ctxt->d & NearBranch)
				ctxt->op_bytes = 8;
		}
5286

5287 5288 5289 5290 5291 5292 5293
		if (ctxt->d & Op3264) {
			if (mode == X86EMUL_MODE_PROT64)
				ctxt->op_bytes = 8;
			else
				ctxt->op_bytes = 4;
		}

5294 5295 5296
		if ((ctxt->d & No16) && ctxt->op_bytes == 2)
			ctxt->op_bytes = 4;

5297 5298 5299 5300 5301
		if (ctxt->d & Sse)
			ctxt->op_bytes = 16;
		else if (ctxt->d & Mmx)
			ctxt->op_bytes = 8;
	}
A
Avi Kivity 已提交
5302

5303
	/* ModRM and SIB bytes. */
5304
	if (ctxt->d & ModRM) {
5305
		rc = decode_modrm(ctxt, &ctxt->memop);
B
Bandan Das 已提交
5306 5307 5308 5309
		if (!has_seg_override) {
			has_seg_override = true;
			ctxt->seg_override = ctxt->modrm_seg;
		}
5310
	} else if (ctxt->d & MemAbs)
5311
		rc = decode_abs(ctxt, &ctxt->memop);
5312 5313 5314
	if (rc != X86EMUL_CONTINUE)
		goto done;

B
Bandan Das 已提交
5315 5316
	if (!has_seg_override)
		ctxt->seg_override = VCPU_SREG_DS;
5317

B
Bandan Das 已提交
5318
	ctxt->memop.addr.mem.seg = ctxt->seg_override;
5319 5320 5321 5322 5323

	/*
	 * Decode and fetch the source operand: register, memory
	 * or immediate.
	 */
5324
	rc = decode_operand(ctxt, &ctxt->src, (ctxt->d >> SrcShift) & OpMask);
5325 5326 5327
	if (rc != X86EMUL_CONTINUE)
		goto done;

5328 5329 5330 5331
	/*
	 * Decode and fetch the second source operand: register, memory
	 * or immediate.
	 */
5332
	rc = decode_operand(ctxt, &ctxt->src2, (ctxt->d >> Src2Shift) & OpMask);
5333 5334 5335
	if (rc != X86EMUL_CONTINUE)
		goto done;

5336
	/* Decode and fetch the destination operand: register or memory. */
5337
	rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
5338

5339
	if (ctxt->rip_relative && likely(ctxt->memopp))
5340 5341
		ctxt->memopp->addr.mem.ea = address_mask(ctxt,
					ctxt->memopp->addr.mem.ea + ctxt->_eip);
5342

5343
done:
5344
	return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
5345 5346
}

5347 5348 5349 5350 5351
bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt)
{
	return ctxt->d & PageTable;
}

5352 5353 5354 5355 5356 5357 5358 5359 5360
static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
{
	/* The second termination condition only applies for REPE
	 * and REPNE. Test if the repeat string operation prefix is
	 * REPE/REPZ or REPNE/REPNZ and if it's the case it tests the
	 * corresponding termination condition according to:
	 * 	- if REPE/REPZ and ZF = 0 then done
	 * 	- if REPNE/REPNZ and ZF = 1 then done
	 */
5361 5362 5363
	if (((ctxt->b == 0xa6) || (ctxt->b == 0xa7) ||
	     (ctxt->b == 0xae) || (ctxt->b == 0xaf))
	    && (((ctxt->rep_prefix == REPE_PREFIX) &&
5364
		 ((ctxt->eflags & X86_EFLAGS_ZF) == 0))
5365
		|| ((ctxt->rep_prefix == REPNE_PREFIX) &&
5366
		    ((ctxt->eflags & X86_EFLAGS_ZF) == X86_EFLAGS_ZF))))
5367 5368 5369 5370 5371
		return true;

	return false;
}

A
Avi Kivity 已提交
5372 5373
static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt)
{
R
Radim Krčmář 已提交
5374
	int rc;
A
Avi Kivity 已提交
5375

R
Radim Krčmář 已提交
5376
	rc = asm_safe("fwait");
A
Avi Kivity 已提交
5377

R
Radim Krčmář 已提交
5378
	if (unlikely(rc != X86EMUL_CONTINUE))
A
Avi Kivity 已提交
5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390
		return emulate_exception(ctxt, MF_VECTOR, 0, false);

	return X86EMUL_CONTINUE;
}

static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,
				       struct operand *op)
{
	if (op->type == OP_MM)
		read_mmx_reg(ctxt, &op->mm_val, op->addr.mm);
}

5391 5392 5393
static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
{
	ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
5394

5395 5396
	if (!(ctxt->d & ByteOp))
		fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
5397

5398
	asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n"
5399
	    : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
5400
	      [thunk_target]"+S"(fop), ASM_CALL_CONSTRAINT
5401
	    : "c"(ctxt->src2.val));
5402

5403
	ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
5404 5405
	if (!fop) /* exception is returned in fop variable */
		return emulate_de(ctxt);
5406 5407
	return X86EMUL_CONTINUE;
}
5408

5409 5410
void init_decode_cache(struct x86_emulate_ctxt *ctxt)
{
B
Bandan Das 已提交
5411 5412
	memset(&ctxt->rip_relative, 0,
	       (void *)&ctxt->modrm - (void *)&ctxt->rip_relative);
5413 5414 5415 5416 5417 5418

	ctxt->io_read.pos = 0;
	ctxt->io_read.end = 0;
	ctxt->mem_read.end = 0;
}

5419
int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
5420
{
5421
	const struct x86_emulate_ops *ops = ctxt->ops;
5422
	int rc = X86EMUL_CONTINUE;
5423
	int saved_dst_type = ctxt->dst.type;
5424
	unsigned emul_flags;
5425

5426
	ctxt->mem_read.pos = 0;
5427

5428 5429
	/* LOCK prefix is allowed only with some instructions */
	if (ctxt->lock_prefix && (!(ctxt->d & Lock) || ctxt->dst.type != OP_MEM)) {
5430
		rc = emulate_ud(ctxt);
5431 5432 5433
		goto done;
	}

5434
	if ((ctxt->d & SrcMask) == SrcMemFAddr && ctxt->src.type != OP_MEM) {
5435
		rc = emulate_ud(ctxt);
5436 5437 5438
		goto done;
	}

5439
	emul_flags = ctxt->ops->get_hflags(ctxt);
5440 5441 5442 5443 5444 5445 5446
	if (unlikely(ctxt->d &
		     (No64|Undefined|Sse|Mmx|Intercept|CheckPerm|Priv|Prot|String))) {
		if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) ||
				(ctxt->d & Undefined)) {
			rc = emulate_ud(ctxt);
			goto done;
		}
A
Avi Kivity 已提交
5447

5448 5449 5450
		if (((ctxt->d & (Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM)))
		    || ((ctxt->d & Sse) && !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) {
			rc = emulate_ud(ctxt);
A
Avi Kivity 已提交
5451
			goto done;
5452
		}
A
Avi Kivity 已提交
5453

5454 5455
		if ((ctxt->d & (Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) {
			rc = emulate_nm(ctxt);
5456
			goto done;
5457
		}
5458

5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471
		if (ctxt->d & Mmx) {
			rc = flush_pending_x87_faults(ctxt);
			if (rc != X86EMUL_CONTINUE)
				goto done;
			/*
			 * Now that we know the fpu is exception safe, we can fetch
			 * operands from it.
			 */
			fetch_possible_mmx_operand(ctxt, &ctxt->src);
			fetch_possible_mmx_operand(ctxt, &ctxt->src2);
			if (!(ctxt->d & Mov))
				fetch_possible_mmx_operand(ctxt, &ctxt->dst);
		}
5472

5473
		if (unlikely(emul_flags & X86EMUL_GUEST_MASK) && ctxt->intercept) {
5474 5475 5476 5477 5478
			rc = emulator_check_intercept(ctxt, ctxt->intercept,
						      X86_ICPT_PRE_EXCEPT);
			if (rc != X86EMUL_CONTINUE)
				goto done;
		}
5479

5480 5481 5482 5483 5484 5485
		/* Instruction can only be executed in protected mode */
		if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
			rc = emulate_ud(ctxt);
			goto done;
		}

5486 5487
		/* Privileged instruction can be executed only in CPL=0 */
		if ((ctxt->d & Priv) && ops->cpl(ctxt)) {
5488 5489 5490 5491
			if (ctxt->d & PrivUD)
				rc = emulate_ud(ctxt);
			else
				rc = emulate_gp(ctxt, 0);
5492
			goto done;
5493
		}
5494

5495
		/* Do instruction specific permission checks */
5496
		if (ctxt->d & CheckPerm) {
5497 5498 5499 5500 5501
			rc = ctxt->check_perm(ctxt);
			if (rc != X86EMUL_CONTINUE)
				goto done;
		}

5502
		if (unlikely(emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) {
5503 5504 5505 5506 5507 5508 5509 5510 5511
			rc = emulator_check_intercept(ctxt, ctxt->intercept,
						      X86_ICPT_POST_EXCEPT);
			if (rc != X86EMUL_CONTINUE)
				goto done;
		}

		if (ctxt->rep_prefix && (ctxt->d & String)) {
			/* All REP prefixes have the same first termination condition */
			if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) {
5512
				string_registers_quirk(ctxt);
5513
				ctxt->eip = ctxt->_eip;
5514
				ctxt->eflags &= ~X86_EFLAGS_RF;
5515 5516
				goto done;
			}
5517 5518 5519
		}
	}

5520 5521 5522
	if ((ctxt->src.type == OP_MEM) && !(ctxt->d & NoAccess)) {
		rc = segmented_read(ctxt, ctxt->src.addr.mem,
				    ctxt->src.valptr, ctxt->src.bytes);
5523
		if (rc != X86EMUL_CONTINUE)
5524
			goto done;
5525
		ctxt->src.orig_val64 = ctxt->src.val64;
5526 5527
	}

5528 5529 5530
	if (ctxt->src2.type == OP_MEM) {
		rc = segmented_read(ctxt, ctxt->src2.addr.mem,
				    &ctxt->src2.val, ctxt->src2.bytes);
5531 5532 5533 5534
		if (rc != X86EMUL_CONTINUE)
			goto done;
	}

5535
	if ((ctxt->d & DstMask) == ImplicitOps)
5536 5537 5538
		goto special_insn;


5539
	if ((ctxt->dst.type == OP_MEM) && !(ctxt->d & Mov)) {
5540
		/* optimisation - avoid slow emulated read if Mov */
5541 5542
		rc = segmented_read(ctxt, ctxt->dst.addr.mem,
				   &ctxt->dst.val, ctxt->dst.bytes);
5543
		if (rc != X86EMUL_CONTINUE) {
5544 5545
			if (!(ctxt->d & NoWrite) &&
			    rc == X86EMUL_PROPAGATE_FAULT &&
5546 5547
			    ctxt->exception.vector == PF_VECTOR)
				ctxt->exception.error_code |= PFERR_WRITE_MASK;
5548
			goto done;
5549
		}
5550
	}
5551 5552
	/* Copy full 64-bit value for CMPXCHG8B.  */
	ctxt->dst.orig_val64 = ctxt->dst.val64;
5553

5554 5555
special_insn:

5556
	if (unlikely(emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) {
5557
		rc = emulator_check_intercept(ctxt, ctxt->intercept,
5558
					      X86_ICPT_POST_MEMACCESS);
5559 5560 5561 5562
		if (rc != X86EMUL_CONTINUE)
			goto done;
	}

5563
	if (ctxt->rep_prefix && (ctxt->d & String))
5564
		ctxt->eflags |= X86_EFLAGS_RF;
5565
	else
5566
		ctxt->eflags &= ~X86_EFLAGS_RF;
5567

5568
	if (ctxt->execute) {
5569 5570 5571 5572 5573 5574 5575
		if (ctxt->d & Fastop) {
			void (*fop)(struct fastop *) = (void *)ctxt->execute;
			rc = fastop(ctxt, fop);
			if (rc != X86EMUL_CONTINUE)
				goto done;
			goto writeback;
		}
5576
		rc = ctxt->execute(ctxt);
5577 5578 5579 5580 5581
		if (rc != X86EMUL_CONTINUE)
			goto done;
		goto writeback;
	}

B
Borislav Petkov 已提交
5582
	if (ctxt->opcode_len == 2)
A
Avi Kivity 已提交
5583
		goto twobyte_insn;
5584 5585
	else if (ctxt->opcode_len == 3)
		goto threebyte_insn;
A
Avi Kivity 已提交
5586

5587
	switch (ctxt->b) {
5588
	case 0x70 ... 0x7f: /* jcc (short) */
5589
		if (test_cc(ctxt->b, ctxt->eflags))
5590
			rc = jmp_rel(ctxt, ctxt->src.val);
5591
		break;
N
Nitin A Kamble 已提交
5592
	case 0x8d: /* lea r16/r32, m */
5593
		ctxt->dst.val = ctxt->src.addr.mem.ea;
N
Nitin A Kamble 已提交
5594
		break;
5595
	case 0x90 ... 0x97: /* nop / xchg reg, rax */
5596
		if (ctxt->dst.addr.reg == reg_rmw(ctxt, VCPU_REGS_RAX))
5597 5598 5599
			ctxt->dst.type = OP_NONE;
		else
			rc = em_xchg(ctxt);
5600
		break;
5601
	case 0x98: /* cbw/cwde/cdqe */
5602 5603 5604 5605
		switch (ctxt->op_bytes) {
		case 2: ctxt->dst.val = (s8)ctxt->dst.val; break;
		case 4: ctxt->dst.val = (s16)ctxt->dst.val; break;
		case 8: ctxt->dst.val = (s32)ctxt->dst.val; break;
5606 5607
		}
		break;
5608
	case 0xcc:		/* int3 */
5609 5610
		rc = emulate_int(ctxt, 3);
		break;
5611
	case 0xcd:		/* int n */
5612
		rc = emulate_int(ctxt, ctxt->src.val);
5613 5614
		break;
	case 0xce:		/* into */
5615
		if (ctxt->eflags & X86_EFLAGS_OF)
5616
			rc = emulate_int(ctxt, 4);
5617
		break;
5618
	case 0xe9: /* jmp rel */
5619
	case 0xeb: /* jmp rel short */
5620
		rc = jmp_rel(ctxt, ctxt->src.val);
5621
		ctxt->dst.type = OP_NONE; /* Disable writeback. */
5622
		break;
5623
	case 0xf4:              /* hlt */
5624
		ctxt->ops->halt(ctxt);
5625
		break;
5626 5627
	case 0xf5:	/* cmc */
		/* complement carry flag from eflags reg */
5628
		ctxt->eflags ^= X86_EFLAGS_CF;
5629 5630
		break;
	case 0xf8: /* clc */
5631
		ctxt->eflags &= ~X86_EFLAGS_CF;
5632
		break;
5633
	case 0xf9: /* stc */
5634
		ctxt->eflags |= X86_EFLAGS_CF;
5635
		break;
5636
	case 0xfc: /* cld */
5637
		ctxt->eflags &= ~X86_EFLAGS_DF;
5638 5639
		break;
	case 0xfd: /* std */
5640
		ctxt->eflags |= X86_EFLAGS_DF;
5641
		break;
5642 5643
	default:
		goto cannot_emulate;
A
Avi Kivity 已提交
5644
	}
5645

5646 5647 5648
	if (rc != X86EMUL_CONTINUE)
		goto done;

5649
writeback:
5650 5651 5652 5653 5654 5655
	if (ctxt->d & SrcWrite) {
		BUG_ON(ctxt->src.type == OP_MEM || ctxt->src.type == OP_MEM_STR);
		rc = writeback(ctxt, &ctxt->src);
		if (rc != X86EMUL_CONTINUE)
			goto done;
	}
5656 5657 5658 5659 5660
	if (!(ctxt->d & NoWrite)) {
		rc = writeback(ctxt, &ctxt->dst);
		if (rc != X86EMUL_CONTINUE)
			goto done;
	}
5661

5662 5663 5664 5665
	/*
	 * restore dst type in case the decoding will be reused
	 * (happens for string instruction )
	 */
5666
	ctxt->dst.type = saved_dst_type;
5667

5668
	if ((ctxt->d & SrcMask) == SrcSI)
5669
		string_addr_inc(ctxt, VCPU_REGS_RSI, &ctxt->src);
5670

5671
	if ((ctxt->d & DstMask) == DstDI)
5672
		string_addr_inc(ctxt, VCPU_REGS_RDI, &ctxt->dst);
5673

5674
	if (ctxt->rep_prefix && (ctxt->d & String)) {
5675
		unsigned int count;
5676
		struct read_cache *r = &ctxt->io_read;
5677 5678 5679 5680
		if ((ctxt->d & SrcMask) == SrcSI)
			count = ctxt->src.count;
		else
			count = ctxt->dst.count;
5681
		register_address_increment(ctxt, VCPU_REGS_RCX, -count);
5682

5683 5684 5685 5686 5687
		if (!string_insn_completed(ctxt)) {
			/*
			 * Re-enter guest when pio read ahead buffer is empty
			 * or, if it is not used, after each 1024 iteration.
			 */
5688
			if ((r->end != 0 || reg_read(ctxt, VCPU_REGS_RCX) & 0x3ff) &&
5689 5690 5691 5692 5693 5694
			    (r->end == 0 || r->end != r->pos)) {
				/*
				 * Reset read cache. Usually happens before
				 * decode, but since instruction is restarted
				 * we have to do it here.
				 */
5695
				ctxt->mem_read.end = 0;
5696
				writeback_registers(ctxt);
5697 5698 5699
				return EMULATION_RESTART;
			}
			goto done; /* skip rip writeback */
5700
		}
5701
		ctxt->eflags &= ~X86_EFLAGS_RF;
5702
	}
5703

5704
	ctxt->eip = ctxt->_eip;
5705 5706

done:
5707 5708
	if (rc == X86EMUL_PROPAGATE_FAULT) {
		WARN_ON(ctxt->exception.vector > 0x1f);
5709
		ctxt->have_exception = true;
5710
	}
5711 5712 5713
	if (rc == X86EMUL_INTERCEPTED)
		return EMULATION_INTERCEPTED;

5714 5715 5716
	if (rc == X86EMUL_CONTINUE)
		writeback_registers(ctxt);

5717
	return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
A
Avi Kivity 已提交
5718 5719

twobyte_insn:
5720
	switch (ctxt->b) {
5721
	case 0x09:		/* wbinvd */
5722
		(ctxt->ops->wbinvd)(ctxt);
5723 5724
		break;
	case 0x08:		/* invd */
5725 5726
	case 0x0d:		/* GrpP (prefetch) */
	case 0x18:		/* Grp16 (prefetch/nop) */
P
Paolo Bonzini 已提交
5727
	case 0x1f:		/* nop */
5728 5729
		break;
	case 0x20: /* mov cr, reg */
5730
		ctxt->dst.val = ops->get_cr(ctxt, ctxt->modrm_reg);
5731
		break;
A
Avi Kivity 已提交
5732
	case 0x21: /* mov from dr to reg */
5733
		ops->get_dr(ctxt, ctxt->modrm_reg, &ctxt->dst.val);
A
Avi Kivity 已提交
5734 5735
		break;
	case 0x40 ... 0x4f:	/* cmov */
5736 5737
		if (test_cc(ctxt->b, ctxt->eflags))
			ctxt->dst.val = ctxt->src.val;
5738
		else if (ctxt->op_bytes != 4)
5739
			ctxt->dst.type = OP_NONE; /* no writeback */
A
Avi Kivity 已提交
5740
		break;
5741
	case 0x80 ... 0x8f: /* jnz rel, etc*/
5742
		if (test_cc(ctxt->b, ctxt->eflags))
5743
			rc = jmp_rel(ctxt, ctxt->src.val);
5744
		break;
5745
	case 0x90 ... 0x9f:     /* setcc r/m8 */
5746
		ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);
5747
		break;
A
Avi Kivity 已提交
5748
	case 0xb6 ... 0xb7:	/* movzx */
5749
		ctxt->dst.bytes = ctxt->op_bytes;
5750
		ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val
5751
						       : (u16) ctxt->src.val;
A
Avi Kivity 已提交
5752 5753
		break;
	case 0xbe ... 0xbf:	/* movsx */
5754
		ctxt->dst.bytes = ctxt->op_bytes;
5755
		ctxt->dst.val = (ctxt->src.bytes == 1) ? (s8) ctxt->src.val :
5756
							(s16) ctxt->src.val;
A
Avi Kivity 已提交
5757
		break;
5758 5759
	default:
		goto cannot_emulate;
A
Avi Kivity 已提交
5760
	}
5761

5762 5763
threebyte_insn:

5764 5765 5766
	if (rc != X86EMUL_CONTINUE)
		goto done;

A
Avi Kivity 已提交
5767 5768 5769
	goto writeback;

cannot_emulate:
5770
	return EMULATION_FAILED;
A
Avi Kivity 已提交
5771
}
5772 5773 5774 5775 5776 5777 5778 5779 5780 5781

void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt)
{
	invalidate_registers(ctxt);
}

void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt)
{
	writeback_registers(ctxt);
}
5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792

bool emulator_can_use_gpa(struct x86_emulate_ctxt *ctxt)
{
	if (ctxt->rep_prefix && (ctxt->d & String))
		return false;

	if (ctxt->d & TwoMemOp)
		return false;

	return true;
}