emulate.c 151.1 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-only
A
Avi Kivity 已提交
2
/******************************************************************************
3
 * emulate.c
A
Avi Kivity 已提交
4 5 6 7 8 9
 *
 * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
 *
 * Copyright (c) 2005 Keir Fraser
 *
 * Linux coding style, mod r/m decoder, segment base fixes, real-mode
10
 * privileged instructions:
A
Avi Kivity 已提交
11 12
 *
 * Copyright (C) 2006 Qumranet
N
Nicolas Kaiser 已提交
13
 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
A
Avi Kivity 已提交
14 15 16 17 18 19 20
 *
 *   Avi Kivity <avi@qumranet.com>
 *   Yaniv Kamay <yaniv@qumranet.com>
 *
 * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
 */

21
#include <linux/kvm_host.h>
22
#include "kvm_cache_regs.h"
23
#include "kvm_emulate.h"
24
#include <linux/stringify.h>
25
#include <asm/fpu/api.h>
26
#include <asm/debugreg.h>
27
#include <asm/nospec-branch.h>
A
Avi Kivity 已提交
28

29
#include "x86.h"
30
#include "tss.h"
31
#include "mmu.h"
32
#include "pmu.h"
33

34 35 36
/*
 * Operand types
 */
37 38 39 40 41 42 43 44 45
#define OpNone             0ull
#define OpImplicit         1ull  /* No generic decode */
#define OpReg              2ull  /* Register */
#define OpMem              3ull  /* Memory */
#define OpAcc              4ull  /* Accumulator: AL/AX/EAX/RAX */
#define OpDI               5ull  /* ES:DI/EDI/RDI */
#define OpMem64            6ull  /* Memory, 64-bit */
#define OpImmUByte         7ull  /* Zero-extended 8-bit immediate */
#define OpDX               8ull  /* DX register */
46 47 48
#define OpCL               9ull  /* CL register (for shifts) */
#define OpImmByte         10ull  /* 8-bit sign extended immediate */
#define OpOne             11ull  /* Implied 1 */
49
#define OpImm             12ull  /* Sign extended up to 32-bit immediate */
50 51 52 53 54 55 56
#define OpMem16           13ull  /* Memory operand (16-bit). */
#define OpMem32           14ull  /* Memory operand (32-bit). */
#define OpImmU            15ull  /* Immediate operand, zero extended */
#define OpSI              16ull  /* SI/ESI/RSI */
#define OpImmFAddr        17ull  /* Immediate far address */
#define OpMemFAddr        18ull  /* Far address in memory */
#define OpImmU16          19ull  /* Immediate operand, 16 bits, zero extended */
57 58 59 60 61 62
#define OpES              20ull  /* ES */
#define OpCS              21ull  /* CS */
#define OpSS              22ull  /* SS */
#define OpDS              23ull  /* DS */
#define OpFS              24ull  /* FS */
#define OpGS              25ull  /* GS */
63
#define OpMem8            26ull  /* 8-bit zero extended memory operand */
64
#define OpImm64           27ull  /* Sign extended 16/32/64-bit immediate */
P
Paolo Bonzini 已提交
65
#define OpXLat            28ull  /* memory at BX/EBX/RBX + zero-extended AL */
66 67
#define OpAccLo           29ull  /* Low part of extended acc (AX/AX/EAX/RAX) */
#define OpAccHi           30ull  /* High part of extended acc (-/DX/EDX/RDX) */
68 69

#define OpBits             5  /* Width of operand field */
70
#define OpMask             ((1ull << OpBits) - 1)
71

A
Avi Kivity 已提交
72 73 74 75 76 77 78 79 80 81
/*
 * Opcode effective-address decode tables.
 * Note that we only emulate instructions that have at least one memory
 * operand (excluding implicit stack references). We assume that stack
 * references and instruction fetches will never occur in special memory
 * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
 * not be handled.
 */

/* Operand sizes: 8-bit operands or specified/overridden size. */
82
#define ByteOp      (1<<0)	/* 8-bit operands. */
A
Avi Kivity 已提交
83
/* Destination operand type. */
84 85 86 87 88 89 90
#define DstShift    1
#define ImplicitOps (OpImplicit << DstShift)
#define DstReg      (OpReg << DstShift)
#define DstMem      (OpMem << DstShift)
#define DstAcc      (OpAcc << DstShift)
#define DstDI       (OpDI << DstShift)
#define DstMem64    (OpMem64 << DstShift)
91
#define DstMem16    (OpMem16 << DstShift)
92 93
#define DstImmUByte (OpImmUByte << DstShift)
#define DstDX       (OpDX << DstShift)
94
#define DstAccLo    (OpAccLo << DstShift)
95
#define DstMask     (OpMask << DstShift)
A
Avi Kivity 已提交
96
/* Source operand type. */
97 98 99 100 101 102 103 104 105 106 107 108
#define SrcShift    6
#define SrcNone     (OpNone << SrcShift)
#define SrcReg      (OpReg << SrcShift)
#define SrcMem      (OpMem << SrcShift)
#define SrcMem16    (OpMem16 << SrcShift)
#define SrcMem32    (OpMem32 << SrcShift)
#define SrcImm      (OpImm << SrcShift)
#define SrcImmByte  (OpImmByte << SrcShift)
#define SrcOne      (OpOne << SrcShift)
#define SrcImmUByte (OpImmUByte << SrcShift)
#define SrcImmU     (OpImmU << SrcShift)
#define SrcSI       (OpSI << SrcShift)
P
Paolo Bonzini 已提交
109
#define SrcXLat     (OpXLat << SrcShift)
110 111 112 113
#define SrcImmFAddr (OpImmFAddr << SrcShift)
#define SrcMemFAddr (OpMemFAddr << SrcShift)
#define SrcAcc      (OpAcc << SrcShift)
#define SrcImmU16   (OpImmU16 << SrcShift)
114
#define SrcImm64    (OpImm64 << SrcShift)
115
#define SrcDX       (OpDX << SrcShift)
116
#define SrcMem8     (OpMem8 << SrcShift)
117
#define SrcAccHi    (OpAccHi << SrcShift)
118
#define SrcMask     (OpMask << SrcShift)
119 120 121 122 123 124 125 126 127
#define BitOp       (1<<11)
#define MemAbs      (1<<12)      /* Memory operand is absolute displacement */
#define String      (1<<13)     /* String instruction (rep capable) */
#define Stack       (1<<14)     /* Stack instruction (push/pop) */
#define GroupMask   (7<<15)     /* Opcode uses one of the group mechanisms */
#define Group       (1<<15)     /* Bits 3:5 of modrm byte extend opcode */
#define GroupDual   (2<<15)     /* Alternate decoding of mod == 3 */
#define Prefix      (3<<15)     /* Instruction varies with 66/f2/f3 prefix */
#define RMExt       (4<<15)     /* Opcode extension in ModRM r/m if mod == 3 */
128
#define Escape      (5<<15)     /* Escape to coprocessor instruction */
129
#define InstrDual   (6<<15)     /* Alternate instruction decoding of mod == 3 */
130
#define ModeDual    (7<<15)     /* Different instruction for 32/64 bit */
131
#define Sse         (1<<18)     /* SSE Vector instruction */
132 133 134 135
/* Generic ModRM decode. */
#define ModRM       (1<<19)
/* Destination is only written; never read. */
#define Mov         (1<<20)
136
/* Misc flags */
137
#define Prot        (1<<21) /* instruction generates #UD if not in prot-mode */
138
#define EmulateOnUD (1<<22) /* Emulate if unsupported by the host */
139
#define NoAccess    (1<<23) /* Don't access memory (lea/invlpg/verr etc) */
140
#define Op3264      (1<<24) /* Operand is 64b in long mode, 32b otherwise */
141
#define Undefined   (1<<25) /* No Such Instruction */
142
#define Lock        (1<<26) /* lock prefix is allowed for the instruction */
143
#define Priv        (1<<27) /* instruction generates #GP if current CPL != 0 */
144
#define No64	    (1<<28)
145
#define PageTable   (1 << 29)   /* instruction used to write page table */
146
#define NotImpl     (1 << 30)   /* instruction is not implemented */
147
/* Source 2 operand type */
148
#define Src2Shift   (31)
149
#define Src2None    (OpNone << Src2Shift)
150
#define Src2Mem     (OpMem << Src2Shift)
151 152 153 154
#define Src2CL      (OpCL << Src2Shift)
#define Src2ImmByte (OpImmByte << Src2Shift)
#define Src2One     (OpOne << Src2Shift)
#define Src2Imm     (OpImm << Src2Shift)
155 156 157 158 159 160
#define Src2ES      (OpES << Src2Shift)
#define Src2CS      (OpCS << Src2Shift)
#define Src2SS      (OpSS << Src2Shift)
#define Src2DS      (OpDS << Src2Shift)
#define Src2FS      (OpFS << Src2Shift)
#define Src2GS      (OpGS << Src2Shift)
161
#define Src2Mask    (OpMask << Src2Shift)
A
Avi Kivity 已提交
162
#define Mmx         ((u64)1 << 40)  /* MMX Vector instruction */
163
#define AlignMask   ((u64)7 << 41)
164
#define Aligned     ((u64)1 << 41)  /* Explicitly aligned (e.g. MOVDQA) */
165 166 167
#define Unaligned   ((u64)2 << 41)  /* Explicitly unaligned (e.g. MOVDQU) */
#define Avx         ((u64)3 << 41)  /* Advanced Vector Extensions */
#define Aligned16   ((u64)4 << 41)  /* Aligned to 16 byte boundary (e.g. FXSAVE) */
168
#define Fastop      ((u64)1 << 44)  /* Use opcode::u.fastop */
169
#define NoWrite     ((u64)1 << 45)  /* No writeback */
170
#define SrcWrite    ((u64)1 << 46)  /* Write back src operand */
171
#define NoMod	    ((u64)1 << 47)  /* Mod field is ignored */
172 173
#define Intercept   ((u64)1 << 48)  /* Has valid intercept field */
#define CheckPerm   ((u64)1 << 49)  /* Has valid check_perm field */
174
#define PrivUD      ((u64)1 << 51)  /* #UD instead of #GP on CPL > 0 */
175
#define NearBranch  ((u64)1 << 52)  /* Near branches */
176
#define No16	    ((u64)1 << 53)  /* No 16 bit operand */
177
#define IncSP       ((u64)1 << 54)  /* SP is incremented before ModRM calc */
178
#define TwoMemOp    ((u64)1 << 55)  /* Instruction has two memory operand */
A
Avi Kivity 已提交
179

180
#define DstXacc     (DstAccLo | SrcAccHi | SrcWrite)
A
Avi Kivity 已提交
181

182 183 184 185 186 187 188 189
#define X2(x...) x, x
#define X3(x...) X2(x), x
#define X4(x...) X2(x), X2(x)
#define X5(x...) X4(x), x
#define X6(x...) X4(x), X2(x)
#define X7(x...) X4(x), X3(x)
#define X8(x...) X4(x), X4(x)
#define X16(x...) X8(x), X8(x)
190

191 192 193
#define NR_FASTOP (ilog2(sizeof(ulong)) + 1)
#define FASTOP_SIZE 8

194
struct opcode {
195 196
	u64 flags : 56;
	u64 intercept : 8;
197
	union {
198
		int (*execute)(struct x86_emulate_ctxt *ctxt);
199 200 201
		const struct opcode *group;
		const struct group_dual *gdual;
		const struct gprefix *gprefix;
202
		const struct escape *esc;
203
		const struct instr_dual *idual;
204
		const struct mode_dual *mdual;
205
		void (*fastop)(struct fastop *fake);
206
	} u;
207
	int (*check_perm)(struct x86_emulate_ctxt *ctxt);
208 209 210 211 212
};

struct group_dual {
	struct opcode mod012[8];
	struct opcode mod3[8];
213 214
};

215 216 217 218 219 220 221
struct gprefix {
	struct opcode pfx_no;
	struct opcode pfx_66;
	struct opcode pfx_f2;
	struct opcode pfx_f3;
};

222 223 224 225 226
struct escape {
	struct opcode op[8];
	struct opcode high[64];
};

227 228 229 230 231
struct instr_dual {
	struct opcode mod012;
	struct opcode mod3;
};

232 233 234 235 236
struct mode_dual {
	struct opcode mode32;
	struct opcode mode64;
};

237 238
#define EFLG_RESERVED_ZEROS_MASK 0xffc0802a

239 240 241 242 243 244 245
enum x86_transfer_type {
	X86_TRANSFER_NONE,
	X86_TRANSFER_CALL_JMP,
	X86_TRANSFER_RET,
	X86_TRANSFER_TASK_SWITCH,
};

246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281
static ulong reg_read(struct x86_emulate_ctxt *ctxt, unsigned nr)
{
	if (!(ctxt->regs_valid & (1 << nr))) {
		ctxt->regs_valid |= 1 << nr;
		ctxt->_regs[nr] = ctxt->ops->read_gpr(ctxt, nr);
	}
	return ctxt->_regs[nr];
}

static ulong *reg_write(struct x86_emulate_ctxt *ctxt, unsigned nr)
{
	ctxt->regs_valid |= 1 << nr;
	ctxt->regs_dirty |= 1 << nr;
	return &ctxt->_regs[nr];
}

static ulong *reg_rmw(struct x86_emulate_ctxt *ctxt, unsigned nr)
{
	reg_read(ctxt, nr);
	return reg_write(ctxt, nr);
}

static void writeback_registers(struct x86_emulate_ctxt *ctxt)
{
	unsigned reg;

	for_each_set_bit(reg, (ulong *)&ctxt->regs_dirty, 16)
		ctxt->ops->write_gpr(ctxt, reg, ctxt->_regs[reg]);
}

static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
{
	ctxt->regs_dirty = 0;
	ctxt->regs_valid = 0;
}

A
Avi Kivity 已提交
282 283 284 285
/*
 * These EFLAGS bits are restored from saved value during emulation, and
 * any changes are written back to the saved value after emulation.
 */
286 287
#define EFLAGS_MASK (X86_EFLAGS_OF|X86_EFLAGS_SF|X86_EFLAGS_ZF|X86_EFLAGS_AF|\
		     X86_EFLAGS_PF|X86_EFLAGS_CF)
A
Avi Kivity 已提交
288

289 290 291 292 293 294
#ifdef CONFIG_X86_64
#define ON64(x) x
#else
#define ON64(x)
#endif

295 296 297 298 299 300 301 302 303 304 305 306 307
/*
 * fastop functions have a special calling convention:
 *
 * dst:    rax        (in/out)
 * src:    rdx        (in/out)
 * src2:   rcx        (in)
 * flags:  rflags     (in/out)
 * ex:     rsi        (in:fastop pointer, out:zero if exception)
 *
 * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for
 * different operand sizes can be reached by calculation, rather than a jump
 * table (which would be bigger than the code).
 */
308
static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop);
309

310
#define __FOP_FUNC(name) \
311 312 313 314
	".align " __stringify(FASTOP_SIZE) " \n\t" \
	".type " name ", @function \n\t" \
	name ":\n\t"

315 316 317 318
#define FOP_FUNC(name) \
	__FOP_FUNC(#name)

#define __FOP_RET(name) \
319
	ASM_RET \
320 321 322 323
	".size " name ", .-" name "\n\t"

#define FOP_RET(name) \
	__FOP_RET(#name)
324

325
#define __FOP_START(op, align) \
326 327 328
	extern void em_##op(struct fastop *fake); \
	asm(".pushsection .text, \"ax\" \n\t" \
	    ".global em_" #op " \n\t" \
329
	    ".align " __stringify(align) " \n\t" \
330
	    "em_" #op ":\n\t"
331

332 333
#define FOP_START(op) __FOP_START(op, FASTOP_SIZE)

334 335 336
#define FOP_END \
	    ".popsection")

337 338 339 340
#define __FOPNOP(name) \
	__FOP_FUNC(name) \
	__FOP_RET(name)

341
#define FOPNOP() \
342
	__FOPNOP(__stringify(__UNIQUE_ID(nop)))
343

344
#define FOP1E(op,  dst) \
345 346 347
	__FOP_FUNC(#op "_" #dst) \
	"10: " #op " %" #dst " \n\t" \
	__FOP_RET(#op "_" #dst)
348 349 350

#define FOP1EEX(op,  dst) \
	FOP1E(op, dst) _ASM_EXTABLE(10b, kvm_fastop_exception)
351 352 353 354 355 356 357 358 359

#define FASTOP1(op) \
	FOP_START(op) \
	FOP1E(op##b, al) \
	FOP1E(op##w, ax) \
	FOP1E(op##l, eax) \
	ON64(FOP1E(op##q, rax))	\
	FOP_END

360 361 362 363 364 365 366 367 368
/* 1-operand, using src2 (for MUL/DIV r/m) */
#define FASTOP1SRC2(op, name) \
	FOP_START(name) \
	FOP1E(op, cl) \
	FOP1E(op, cx) \
	FOP1E(op, ecx) \
	ON64(FOP1E(op, rcx)) \
	FOP_END

369 370 371 372 373 374 375 376 377
/* 1-operand, using src2 (for MUL/DIV r/m), with exceptions */
#define FASTOP1SRC2EX(op, name) \
	FOP_START(name) \
	FOP1EEX(op, cl) \
	FOP1EEX(op, cx) \
	FOP1EEX(op, ecx) \
	ON64(FOP1EEX(op, rcx)) \
	FOP_END

378
#define FOP2E(op,  dst, src)	   \
379 380 381
	__FOP_FUNC(#op "_" #dst "_" #src) \
	#op " %" #src ", %" #dst " \n\t" \
	__FOP_RET(#op "_" #dst "_" #src)
382 383 384

#define FASTOP2(op) \
	FOP_START(op) \
385 386 387 388
	FOP2E(op##b, al, dl) \
	FOP2E(op##w, ax, dx) \
	FOP2E(op##l, eax, edx) \
	ON64(FOP2E(op##q, rax, rdx)) \
389 390
	FOP_END

391 392 393 394
/* 2 operand, word only */
#define FASTOP2W(op) \
	FOP_START(op) \
	FOPNOP() \
395 396 397
	FOP2E(op##w, ax, dx) \
	FOP2E(op##l, eax, edx) \
	ON64(FOP2E(op##q, rax, rdx)) \
398 399
	FOP_END

400 401 402 403 404 405 406 407 408
/* 2 operand, src is CL */
#define FASTOP2CL(op) \
	FOP_START(op) \
	FOP2E(op##b, al, cl) \
	FOP2E(op##w, ax, cl) \
	FOP2E(op##l, eax, cl) \
	ON64(FOP2E(op##q, rax, cl)) \
	FOP_END

409 410 411 412 413 414 415 416 417
/* 2 operand, src and dest are reversed */
#define FASTOP2R(op, name) \
	FOP_START(name) \
	FOP2E(op##b, dl, al) \
	FOP2E(op##w, dx, ax) \
	FOP2E(op##l, edx, eax) \
	ON64(FOP2E(op##q, rdx, rax)) \
	FOP_END

418
#define FOP3E(op,  dst, src, src2) \
419 420 421
	__FOP_FUNC(#op "_" #dst "_" #src "_" #src2) \
	#op " %" #src2 ", %" #src ", %" #dst " \n\t"\
	__FOP_RET(#op "_" #dst "_" #src "_" #src2)
422 423 424 425 426

/* 3-operand, word-only, src2=cl */
#define FASTOP3WCL(op) \
	FOP_START(op) \
	FOPNOP() \
427 428 429
	FOP3E(op##w, ax, dx, cl) \
	FOP3E(op##l, eax, edx, cl) \
	ON64(FOP3E(op##q, rax, rdx, cl)) \
430 431
	FOP_END

432
/* Special case for SETcc - 1 instruction per cc */
433 434 435 436

/*
 * Depending on .config the SETcc functions look like:
 *
437 438 439
 * SETcc %al			[3 bytes]
 * RET | JMP __x86_return_thunk	[1,5 bytes; CONFIG_RETPOLINE]
 * INT3				[1 byte; CONFIG_SLS]
440
 */
441 442 443 444
#define RET_LENGTH	(1 + (4 * IS_ENABLED(CONFIG_RETPOLINE)) + \
			 IS_ENABLED(CONFIG_SLS))
#define SETCC_LENGTH	(3 + RET_LENGTH)
#define SETCC_ALIGN	(4 << ((SETCC_LENGTH > 4) & 1) << ((SETCC_LENGTH > 8) & 1))
445 446
static_assert(SETCC_LENGTH <= SETCC_ALIGN);

447
#define FOP_SETCC(op) \
448
	".align " __stringify(SETCC_ALIGN) " \n\t" \
449 450 451
	".type " #op ", @function \n\t" \
	#op ": \n\t" \
	#op " %al \n\t" \
452 453
	__FOP_RET(#op) \
	".skip " __stringify(SETCC_ALIGN) " - (.-" #op "), 0xcc \n\t"
454

455 456
asm(".pushsection .fixup, \"ax\"\n"
    ".global kvm_fastop_exception \n"
457
    "kvm_fastop_exception: xor %esi, %esi; " ASM_RET
458
    ".popsection");
459

460
__FOP_START(setcc, SETCC_ALIGN)
461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478
FOP_SETCC(seto)
FOP_SETCC(setno)
FOP_SETCC(setc)
FOP_SETCC(setnc)
FOP_SETCC(setz)
FOP_SETCC(setnz)
FOP_SETCC(setbe)
FOP_SETCC(setnbe)
FOP_SETCC(sets)
FOP_SETCC(setns)
FOP_SETCC(setp)
FOP_SETCC(setnp)
FOP_SETCC(setl)
FOP_SETCC(setnl)
FOP_SETCC(setle)
FOP_SETCC(setnle)
FOP_END;

479 480 481 482
FOP_START(salc)
FOP_FUNC(salc)
"pushf; sbb %al, %al; popf \n\t"
FOP_RET(salc)
P
Paolo Bonzini 已提交
483 484
FOP_END;

R
Radim Krčmář 已提交
485 486
/*
 * XXX: inoutclob user must know where the argument is being expanded.
487
 *      Relying on CONFIG_CC_HAS_ASM_GOTO would allow us to remove _fault.
R
Radim Krčmář 已提交
488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504
 */
#define asm_safe(insn, inoutclob...) \
({ \
	int _fault = 0; \
 \
	asm volatile("1:" insn "\n" \
	             "2:\n" \
	             ".pushsection .fixup, \"ax\"\n" \
	             "3: movl $1, %[_fault]\n" \
	             "   jmp  2b\n" \
	             ".popsection\n" \
	             _ASM_EXTABLE(1b, 3b) \
	             : [_fault] "+qm"(_fault) inoutclob ); \
 \
	_fault ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE; \
})

505 506 507 508 509 510
static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt,
				    enum x86_intercept intercept,
				    enum x86_intercept_stage stage)
{
	struct x86_instruction_info info = {
		.intercept  = intercept,
511 512 513 514 515
		.rep_prefix = ctxt->rep_prefix,
		.modrm_mod  = ctxt->modrm_mod,
		.modrm_reg  = ctxt->modrm_reg,
		.modrm_rm   = ctxt->modrm_rm,
		.src_val    = ctxt->src.val64,
516
		.dst_val    = ctxt->dst.val64,
517 518 519
		.src_bytes  = ctxt->src.bytes,
		.dst_bytes  = ctxt->dst.bytes,
		.ad_bytes   = ctxt->ad_bytes,
520 521 522
		.next_rip   = ctxt->eip,
	};

523
	return ctxt->ops->intercept(ctxt, &info, stage);
524 525
}

A
Avi Kivity 已提交
526 527 528 529 530
static void assign_masked(ulong *dest, ulong src, ulong mask)
{
	*dest = (*dest & ~mask) | (src & mask);
}

531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549
static void assign_register(unsigned long *reg, u64 val, int bytes)
{
	/* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
	switch (bytes) {
	case 1:
		*(u8 *)reg = (u8)val;
		break;
	case 2:
		*(u16 *)reg = (u16)val;
		break;
	case 4:
		*reg = (u32)val;
		break;	/* 64b: zero-extend */
	case 8:
		*reg = val;
		break;
	}
}

550
static inline unsigned long ad_mask(struct x86_emulate_ctxt *ctxt)
551
{
552
	return (1UL << (ctxt->ad_bytes << 3)) - 1;
553 554
}

A
Avi Kivity 已提交
555 556 557 558 559 560 561 562 563 564 565
static ulong stack_mask(struct x86_emulate_ctxt *ctxt)
{
	u16 sel;
	struct desc_struct ss;

	if (ctxt->mode == X86EMUL_MODE_PROT64)
		return ~0UL;
	ctxt->ops->get_segment(ctxt, &sel, &ss, NULL, VCPU_SREG_SS);
	return ~0U >> ((ss.d ^ 1) * 16);  /* d=0: 0xffff; d=1: 0xffffffff */
}

A
Avi Kivity 已提交
566 567 568 569 570
static int stack_size(struct x86_emulate_ctxt *ctxt)
{
	return (__fls(stack_mask(ctxt)) + 1) >> 3;
}

A
Avi Kivity 已提交
571
/* Access/update address held in a register, based on addressing mode. */
572
static inline unsigned long
573
address_mask(struct x86_emulate_ctxt *ctxt, unsigned long reg)
574
{
575
	if (ctxt->ad_bytes == sizeof(unsigned long))
576 577
		return reg;
	else
578
		return reg & ad_mask(ctxt);
579 580 581
}

static inline unsigned long
582
register_address(struct x86_emulate_ctxt *ctxt, int reg)
583
{
584
	return address_mask(ctxt, reg_read(ctxt, reg));
585 586
}

587 588 589 590 591
static void masked_increment(ulong *reg, ulong mask, int inc)
{
	assign_masked(reg, *reg + inc, mask);
}

592
static inline void
593
register_address_increment(struct x86_emulate_ctxt *ctxt, int reg, int inc)
594
{
595
	ulong *preg = reg_rmw(ctxt, reg);
596

597
	assign_register(preg, *preg + inc, ctxt->ad_bytes);
598 599 600 601
}

static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc)
{
602
	masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc);
603
}
A
Avi Kivity 已提交
604

605 606 607 608 609 610 611
static u32 desc_limit_scaled(struct desc_struct *desc)
{
	u32 limit = get_desc_limit(desc);

	return desc->g ? (limit << 12) | 0xfff : limit;
}

612
static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
613 614 615 616
{
	if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
		return 0;

617
	return ctxt->ops->get_cached_segment_base(ctxt, seg);
618 619
}

620 621
static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
			     u32 error, bool valid)
622
{
623
	WARN_ON(vec > 0x1f);
624 625 626
	ctxt->exception.vector = vec;
	ctxt->exception.error_code = error;
	ctxt->exception.error_code_valid = valid;
627
	return X86EMUL_PROPAGATE_FAULT;
628 629
}

630 631 632 633 634
static int emulate_db(struct x86_emulate_ctxt *ctxt)
{
	return emulate_exception(ctxt, DB_VECTOR, 0, false);
}

635
static int emulate_gp(struct x86_emulate_ctxt *ctxt, int err)
636
{
637
	return emulate_exception(ctxt, GP_VECTOR, err, true);
638 639
}

640 641 642 643 644
static int emulate_ss(struct x86_emulate_ctxt *ctxt, int err)
{
	return emulate_exception(ctxt, SS_VECTOR, err, true);
}

645
static int emulate_ud(struct x86_emulate_ctxt *ctxt)
646
{
647
	return emulate_exception(ctxt, UD_VECTOR, 0, false);
648 649
}

650
static int emulate_ts(struct x86_emulate_ctxt *ctxt, int err)
651
{
652
	return emulate_exception(ctxt, TS_VECTOR, err, true);
653 654
}

655 656
static int emulate_de(struct x86_emulate_ctxt *ctxt)
{
657
	return emulate_exception(ctxt, DE_VECTOR, 0, false);
658 659
}

A
Avi Kivity 已提交
660 661 662 663 664
static int emulate_nm(struct x86_emulate_ctxt *ctxt)
{
	return emulate_exception(ctxt, NM_VECTOR, 0, false);
}

665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684
static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
{
	u16 selector;
	struct desc_struct desc;

	ctxt->ops->get_segment(ctxt, &selector, &desc, NULL, seg);
	return selector;
}

static void set_segment_selector(struct x86_emulate_ctxt *ctxt, u16 selector,
				 unsigned seg)
{
	u16 dummy;
	u32 base3;
	struct desc_struct desc;

	ctxt->ops->get_segment(ctxt, &dummy, &desc, &base3, seg);
	ctxt->ops->set_segment(ctxt, selector, &desc, base3, seg);
}

685 686 687 688 689 690 691 692 693 694 695
static inline u8 ctxt_virt_addr_bits(struct x86_emulate_ctxt *ctxt)
{
	return (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_LA57) ? 57 : 48;
}

static inline bool emul_is_noncanonical_address(u64 la,
						struct x86_emulate_ctxt *ctxt)
{
	return get_canonical(la, ctxt_virt_addr_bits(ctxt)) != la;
}

696 697 698 699 700 701
/*
 * x86 defines three classes of vector instructions: explicitly
 * aligned, explicitly unaligned, and the rest, which change behaviour
 * depending on whether they're AVX encoded or not.
 *
 * Also included is CMPXCHG16B which is not a vector instruction, yet it is
702 703
 * subject to the same check.  FXSAVE and FXRSTOR are checked here too as their
 * 512 bytes of data must be aligned to a 16 byte boundary.
704
 */
705
static unsigned insn_alignment(struct x86_emulate_ctxt *ctxt, unsigned size)
706
{
707
	u64 alignment = ctxt->d & AlignMask;
708 709

	if (likely(size < 16))
710
		return 1;
711

712 713 714
	switch (alignment) {
	case Unaligned:
	case Avx:
715
		return 1;
716
	case Aligned16:
717
		return 16;
718 719
	case Aligned:
	default:
720
		return size;
721
	}
722 723
}

724 725 726 727
static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
				       struct segmented_address addr,
				       unsigned *max_size, unsigned size,
				       bool write, bool fetch,
728
				       enum x86emul_mode mode, ulong *linear)
729
{
730 731
	struct desc_struct desc;
	bool usable;
732
	ulong la;
733
	u32 lim;
734
	u16 sel;
735
	u8  va_bits;
736

737
	la = seg_base(ctxt, addr.seg) + addr.ea;
738
	*max_size = 0;
739
	switch (mode) {
740
	case X86EMUL_MODE_PROT64:
741
		*linear = la;
742 743
		va_bits = ctxt_virt_addr_bits(ctxt);
		if (get_canonical(la, va_bits) != la)
744
			goto bad;
745

746
		*max_size = min_t(u64, ~0u, (1ull << va_bits) - la);
747 748
		if (size > *max_size)
			goto bad;
749 750
		break;
	default:
751
		*linear = la = (u32)la;
752 753
		usable = ctxt->ops->get_segment(ctxt, &sel, &desc, NULL,
						addr.seg);
754 755
		if (!usable)
			goto bad;
756 757 758
		/* code segment in protected mode or read-only data segment */
		if ((((ctxt->mode != X86EMUL_MODE_REAL) && (desc.type & 8))
					|| !(desc.type & 2)) && write)
759 760
			goto bad;
		/* unreadable code segment */
761
		if (!fetch && (desc.type & 8) && !(desc.type & 2))
762 763
			goto bad;
		lim = desc_limit_scaled(&desc);
764
		if (!(desc.type & 8) && (desc.type & 4)) {
G
Guo Chao 已提交
765
			/* expand-down segment */
766
			if (addr.ea <= lim)
767 768 769
				goto bad;
			lim = desc.d ? 0xffffffff : 0xffff;
		}
770 771
		if (addr.ea > lim)
			goto bad;
772 773 774 775 776 777 778
		if (lim == 0xffffffff)
			*max_size = ~0u;
		else {
			*max_size = (u64)lim + 1 - addr.ea;
			if (size > *max_size)
				goto bad;
		}
779 780
		break;
	}
781
	if (la & (insn_alignment(ctxt, size) - 1))
782
		return emulate_gp(ctxt, 0);
783
	return X86EMUL_CONTINUE;
784 785
bad:
	if (addr.seg == VCPU_SREG_SS)
786
		return emulate_ss(ctxt, 0);
787
	else
788
		return emulate_gp(ctxt, 0);
789 790
}

791 792 793 794 795
static int linearize(struct x86_emulate_ctxt *ctxt,
		     struct segmented_address addr,
		     unsigned size, bool write,
		     ulong *linear)
{
796
	unsigned max_size;
797 798
	return __linearize(ctxt, addr, &max_size, size, write, false,
			   ctxt->mode, linear);
799 800
}

801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820
static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst,
			     enum x86emul_mode mode)
{
	ulong linear;
	int rc;
	unsigned max_size;
	struct segmented_address addr = { .seg = VCPU_SREG_CS,
					   .ea = dst };

	if (ctxt->op_bytes != sizeof(unsigned long))
		addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1);
	rc = __linearize(ctxt, addr, &max_size, 1, false, true, mode, &linear);
	if (rc == X86EMUL_CONTINUE)
		ctxt->_eip = addr.ea;
	return rc;
}

static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
{
	return assign_eip(ctxt, dst, ctxt->mode);
821 822
}

823 824 825 826
static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
			  const struct desc_struct *cs_desc)
{
	enum x86emul_mode mode = ctxt->mode;
827
	int rc;
828 829

#ifdef CONFIG_X86_64
830 831 832
	if (ctxt->mode >= X86EMUL_MODE_PROT16) {
		if (cs_desc->l) {
			u64 efer = 0;
833

834 835 836 837 838
			ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
			if (efer & EFER_LMA)
				mode = X86EMUL_MODE_PROT64;
		} else
			mode = X86EMUL_MODE_PROT32; /* temporary value */
839 840 841 842
	}
#endif
	if (mode == X86EMUL_MODE_PROT16 || mode == X86EMUL_MODE_PROT32)
		mode = cs_desc->d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
843 844 845 846
	rc = assign_eip(ctxt, dst, mode);
	if (rc == X86EMUL_CONTINUE)
		ctxt->mode = mode;
	return rc;
847 848 849 850 851 852
}

static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
{
	return assign_eip_near(ctxt, ctxt->_eip + rel);
}
853

854 855 856
static int linear_read_system(struct x86_emulate_ctxt *ctxt, ulong linear,
			      void *data, unsigned size)
{
857
	return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception, true);
858 859 860 861 862 863
}

static int linear_write_system(struct x86_emulate_ctxt *ctxt,
			       ulong linear, void *data,
			       unsigned int size)
{
864
	return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception, true);
865 866
}

867 868 869 870 871
static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
			      struct segmented_address addr,
			      void *data,
			      unsigned size)
{
872 873 874
	int rc;
	ulong linear;

875
	rc = linearize(ctxt, addr, size, false, &linear);
876 877
	if (rc != X86EMUL_CONTINUE)
		return rc;
878
	return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception, false);
879 880
}

881 882 883 884 885 886 887 888 889 890 891
static int segmented_write_std(struct x86_emulate_ctxt *ctxt,
			       struct segmented_address addr,
			       void *data,
			       unsigned int size)
{
	int rc;
	ulong linear;

	rc = linearize(ctxt, addr, size, true, &linear);
	if (rc != X86EMUL_CONTINUE)
		return rc;
892
	return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception, false);
893 894
}

895
/*
896
 * Prefetch the remaining bytes of the instruction without crossing page
897 898
 * boundary if they are not in fetch_cache yet.
 */
899
static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size)
900 901
{
	int rc;
902
	unsigned size, max_size;
903
	unsigned long linear;
904
	int cur_size = ctxt->fetch.end - ctxt->fetch.data;
905
	struct segmented_address addr = { .seg = VCPU_SREG_CS,
906 907
					   .ea = ctxt->eip + cur_size };

908 909 910 911 912 913 914 915 916 917
	/*
	 * We do not know exactly how many bytes will be needed, and
	 * __linearize is expensive, so fetch as much as possible.  We
	 * just have to avoid going beyond the 15 byte limit, the end
	 * of the segment, or the end of the page.
	 *
	 * __linearize is called with size 0 so that it does not do any
	 * boundary check itself.  Instead, we use max_size to check
	 * against op_size.
	 */
918 919
	rc = __linearize(ctxt, addr, &max_size, 0, false, true, ctxt->mode,
			 &linear);
920 921 922
	if (unlikely(rc != X86EMUL_CONTINUE))
		return rc;

923
	size = min_t(unsigned, 15UL ^ cur_size, max_size);
924
	size = min_t(unsigned, size, PAGE_SIZE - offset_in_page(linear));
925 926 927 928 929 930 931 932

	/*
	 * One instruction can only straddle two pages,
	 * and one has been loaded at the beginning of
	 * x86_decode_insn.  So, if not enough bytes
	 * still, we must have hit the 15-byte boundary.
	 */
	if (unlikely(size < op_size))
933 934
		return emulate_gp(ctxt, 0);

935
	rc = ctxt->ops->fetch(ctxt, linear, ctxt->fetch.end,
936 937 938
			      size, &ctxt->exception);
	if (unlikely(rc != X86EMUL_CONTINUE))
		return rc;
939
	ctxt->fetch.end += size;
940
	return X86EMUL_CONTINUE;
941 942
}

943 944
static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt,
					       unsigned size)
945
{
946 947 948 949
	unsigned done_size = ctxt->fetch.end - ctxt->fetch.ptr;

	if (unlikely(done_size < size))
		return __do_insn_fetch_bytes(ctxt, size - done_size);
950 951
	else
		return X86EMUL_CONTINUE;
952 953
}

954
/* Fetch next part of the instruction being emulated. */
955
#define insn_fetch(_type, _ctxt)					\
956 957 958
({	_type _x;							\
									\
	rc = do_insn_fetch_bytes(_ctxt, sizeof(_type));			\
959 960
	if (rc != X86EMUL_CONTINUE)					\
		goto done;						\
961
	ctxt->_eip += sizeof(_type);					\
962
	memcpy(&_x, ctxt->fetch.ptr, sizeof(_type));			\
963
	ctxt->fetch.ptr += sizeof(_type);				\
964
	_x;								\
965 966
})

967
#define insn_fetch_arr(_arr, _size, _ctxt)				\
968 969
({									\
	rc = do_insn_fetch_bytes(_ctxt, _size);				\
970 971
	if (rc != X86EMUL_CONTINUE)					\
		goto done;						\
972
	ctxt->_eip += (_size);						\
973 974
	memcpy(_arr, ctxt->fetch.ptr, _size);				\
	ctxt->fetch.ptr += (_size);					\
975 976
})

977 978 979 980 981
/*
 * Given the 'reg' portion of a ModRM byte, and a register block, return a
 * pointer into the block that addresses the relevant register.
 * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
 */
982
static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg,
983
			     int byteop)
A
Avi Kivity 已提交
984 985
{
	void *p;
986
	int highbyte_regs = (ctxt->rex_prefix == 0) && byteop;
A
Avi Kivity 已提交
987 988

	if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
989 990 991
		p = (unsigned char *)reg_rmw(ctxt, modrm_reg & 3) + 1;
	else
		p = reg_rmw(ctxt, modrm_reg);
A
Avi Kivity 已提交
992 993 994 995
	return p;
}

static int read_descriptor(struct x86_emulate_ctxt *ctxt,
996
			   struct segmented_address addr,
A
Avi Kivity 已提交
997 998 999 1000 1001 1002 1003
			   u16 *size, unsigned long *address, int op_bytes)
{
	int rc;

	if (op_bytes == 2)
		op_bytes = 3;
	*address = 0;
1004
	rc = segmented_read_std(ctxt, addr, size, 2);
1005
	if (rc != X86EMUL_CONTINUE)
A
Avi Kivity 已提交
1006
		return rc;
1007
	addr.ea += 2;
1008
	rc = segmented_read_std(ctxt, addr, address, op_bytes);
A
Avi Kivity 已提交
1009 1010 1011
	return rc;
}

1012 1013 1014 1015 1016 1017 1018 1019 1020 1021
FASTOP2(add);
FASTOP2(or);
FASTOP2(adc);
FASTOP2(sbb);
FASTOP2(and);
FASTOP2(sub);
FASTOP2(xor);
FASTOP2(cmp);
FASTOP2(test);

1022 1023
FASTOP1SRC2(mul, mul_ex);
FASTOP1SRC2(imul, imul_ex);
1024 1025
FASTOP1SRC2EX(div, div_ex);
FASTOP1SRC2EX(idiv, idiv_ex);
1026

1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051
FASTOP3WCL(shld);
FASTOP3WCL(shrd);

FASTOP2W(imul);

FASTOP1(not);
FASTOP1(neg);
FASTOP1(inc);
FASTOP1(dec);

FASTOP2CL(rol);
FASTOP2CL(ror);
FASTOP2CL(rcl);
FASTOP2CL(rcr);
FASTOP2CL(shl);
FASTOP2CL(shr);
FASTOP2CL(sar);

FASTOP2W(bsf);
FASTOP2W(bsr);
FASTOP2W(bt);
FASTOP2W(bts);
FASTOP2W(btr);
FASTOP2W(btc);

1052 1053
FASTOP2(xadd);

1054 1055
FASTOP2R(cmp, cmp_r);

1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071
static int em_bsf_c(struct x86_emulate_ctxt *ctxt)
{
	/* If src is zero, do not writeback, but update flags */
	if (ctxt->src.val == 0)
		ctxt->dst.type = OP_NONE;
	return fastop(ctxt, em_bsf);
}

static int em_bsr_c(struct x86_emulate_ctxt *ctxt)
{
	/* If src is zero, do not writeback, but update flags */
	if (ctxt->src.val == 0)
		ctxt->dst.type = OP_NONE;
	return fastop(ctxt, em_bsr);
}

1072
static __always_inline u8 test_cc(unsigned int condition, unsigned long flags)
1073
{
1074
	u8 rc;
1075
	void (*fop)(void) = (void *)em_setcc + SETCC_ALIGN * (condition & 0xf);
1076

1077
	flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
1078 1079
	asm("push %[flags]; popf; " CALL_NOSPEC
	    : "=a"(rc) : [thunk_target]"r"(fop), [flags]"r"(flags));
1080
	return rc;
1081 1082
}

1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100
static void fetch_register_operand(struct operand *op)
{
	switch (op->bytes) {
	case 1:
		op->val = *(u8 *)op->addr.reg;
		break;
	case 2:
		op->val = *(u16 *)op->addr.reg;
		break;
	case 4:
		op->val = *(u32 *)op->addr.reg;
		break;
	case 8:
		op->val = *(u64 *)op->addr.reg;
		break;
	}
}

1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114
static void emulator_get_fpu(void)
{
	fpregs_lock();

	fpregs_assert_state_consistent();
	if (test_thread_flag(TIF_NEED_FPU_LOAD))
		switch_fpu_return();
}

static void emulator_put_fpu(void)
{
	fpregs_unlock();
}

1115
static void read_sse_reg(sse128_t *data, int reg)
A
Avi Kivity 已提交
1116
{
1117
	emulator_get_fpu();
A
Avi Kivity 已提交
1118
	switch (reg) {
1119 1120 1121 1122 1123 1124 1125 1126
	case 0: asm("movdqa %%xmm0, %0" : "=m"(*data)); break;
	case 1: asm("movdqa %%xmm1, %0" : "=m"(*data)); break;
	case 2: asm("movdqa %%xmm2, %0" : "=m"(*data)); break;
	case 3: asm("movdqa %%xmm3, %0" : "=m"(*data)); break;
	case 4: asm("movdqa %%xmm4, %0" : "=m"(*data)); break;
	case 5: asm("movdqa %%xmm5, %0" : "=m"(*data)); break;
	case 6: asm("movdqa %%xmm6, %0" : "=m"(*data)); break;
	case 7: asm("movdqa %%xmm7, %0" : "=m"(*data)); break;
A
Avi Kivity 已提交
1127
#ifdef CONFIG_X86_64
1128 1129 1130 1131 1132 1133 1134 1135
	case 8: asm("movdqa %%xmm8, %0" : "=m"(*data)); break;
	case 9: asm("movdqa %%xmm9, %0" : "=m"(*data)); break;
	case 10: asm("movdqa %%xmm10, %0" : "=m"(*data)); break;
	case 11: asm("movdqa %%xmm11, %0" : "=m"(*data)); break;
	case 12: asm("movdqa %%xmm12, %0" : "=m"(*data)); break;
	case 13: asm("movdqa %%xmm13, %0" : "=m"(*data)); break;
	case 14: asm("movdqa %%xmm14, %0" : "=m"(*data)); break;
	case 15: asm("movdqa %%xmm15, %0" : "=m"(*data)); break;
A
Avi Kivity 已提交
1136 1137 1138
#endif
	default: BUG();
	}
1139
	emulator_put_fpu();
A
Avi Kivity 已提交
1140 1141
}

1142
static void write_sse_reg(sse128_t *data, int reg)
A
Avi Kivity 已提交
1143
{
1144
	emulator_get_fpu();
A
Avi Kivity 已提交
1145
	switch (reg) {
1146 1147 1148 1149 1150 1151 1152 1153
	case 0: asm("movdqa %0, %%xmm0" : : "m"(*data)); break;
	case 1: asm("movdqa %0, %%xmm1" : : "m"(*data)); break;
	case 2: asm("movdqa %0, %%xmm2" : : "m"(*data)); break;
	case 3: asm("movdqa %0, %%xmm3" : : "m"(*data)); break;
	case 4: asm("movdqa %0, %%xmm4" : : "m"(*data)); break;
	case 5: asm("movdqa %0, %%xmm5" : : "m"(*data)); break;
	case 6: asm("movdqa %0, %%xmm6" : : "m"(*data)); break;
	case 7: asm("movdqa %0, %%xmm7" : : "m"(*data)); break;
A
Avi Kivity 已提交
1154
#ifdef CONFIG_X86_64
1155 1156 1157 1158 1159 1160 1161 1162
	case 8: asm("movdqa %0, %%xmm8" : : "m"(*data)); break;
	case 9: asm("movdqa %0, %%xmm9" : : "m"(*data)); break;
	case 10: asm("movdqa %0, %%xmm10" : : "m"(*data)); break;
	case 11: asm("movdqa %0, %%xmm11" : : "m"(*data)); break;
	case 12: asm("movdqa %0, %%xmm12" : : "m"(*data)); break;
	case 13: asm("movdqa %0, %%xmm13" : : "m"(*data)); break;
	case 14: asm("movdqa %0, %%xmm14" : : "m"(*data)); break;
	case 15: asm("movdqa %0, %%xmm15" : : "m"(*data)); break;
A
Avi Kivity 已提交
1163 1164 1165
#endif
	default: BUG();
	}
1166
	emulator_put_fpu();
A
Avi Kivity 已提交
1167 1168
}

1169
static void read_mmx_reg(u64 *data, int reg)
A
Avi Kivity 已提交
1170
{
1171
	emulator_get_fpu();
A
Avi Kivity 已提交
1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182
	switch (reg) {
	case 0: asm("movq %%mm0, %0" : "=m"(*data)); break;
	case 1: asm("movq %%mm1, %0" : "=m"(*data)); break;
	case 2: asm("movq %%mm2, %0" : "=m"(*data)); break;
	case 3: asm("movq %%mm3, %0" : "=m"(*data)); break;
	case 4: asm("movq %%mm4, %0" : "=m"(*data)); break;
	case 5: asm("movq %%mm5, %0" : "=m"(*data)); break;
	case 6: asm("movq %%mm6, %0" : "=m"(*data)); break;
	case 7: asm("movq %%mm7, %0" : "=m"(*data)); break;
	default: BUG();
	}
1183
	emulator_put_fpu();
A
Avi Kivity 已提交
1184 1185
}

1186
static void write_mmx_reg(u64 *data, int reg)
A
Avi Kivity 已提交
1187
{
1188
	emulator_get_fpu();
A
Avi Kivity 已提交
1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199
	switch (reg) {
	case 0: asm("movq %0, %%mm0" : : "m"(*data)); break;
	case 1: asm("movq %0, %%mm1" : : "m"(*data)); break;
	case 2: asm("movq %0, %%mm2" : : "m"(*data)); break;
	case 3: asm("movq %0, %%mm3" : : "m"(*data)); break;
	case 4: asm("movq %0, %%mm4" : : "m"(*data)); break;
	case 5: asm("movq %0, %%mm5" : : "m"(*data)); break;
	case 6: asm("movq %0, %%mm6" : : "m"(*data)); break;
	case 7: asm("movq %0, %%mm7" : : "m"(*data)); break;
	default: BUG();
	}
1200
	emulator_put_fpu();
A
Avi Kivity 已提交
1201 1202
}

1203 1204 1205 1206 1207
static int em_fninit(struct x86_emulate_ctxt *ctxt)
{
	if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
		return emulate_nm(ctxt);

1208
	emulator_get_fpu();
1209
	asm volatile("fninit");
1210
	emulator_put_fpu();
1211 1212 1213 1214 1215 1216 1217 1218 1219 1220
	return X86EMUL_CONTINUE;
}

static int em_fnstcw(struct x86_emulate_ctxt *ctxt)
{
	u16 fcw;

	if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
		return emulate_nm(ctxt);

1221
	emulator_get_fpu();
1222
	asm volatile("fnstcw %0": "+m"(fcw));
1223
	emulator_put_fpu();
1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236

	ctxt->dst.val = fcw;

	return X86EMUL_CONTINUE;
}

static int em_fnstsw(struct x86_emulate_ctxt *ctxt)
{
	u16 fsw;

	if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
		return emulate_nm(ctxt);

1237
	emulator_get_fpu();
1238
	asm volatile("fnstsw %0": "+m"(fsw));
1239
	emulator_put_fpu();
1240 1241 1242 1243 1244 1245

	ctxt->dst.val = fsw;

	return X86EMUL_CONTINUE;
}

A
Avi Kivity 已提交
1246
static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
1247
				    struct operand *op)
1248
{
1249
	unsigned reg = ctxt->modrm_reg;
1250

1251 1252
	if (!(ctxt->d & ModRM))
		reg = (ctxt->b & 7) | ((ctxt->rex_prefix & 1) << 3);
A
Avi Kivity 已提交
1253

1254
	if (ctxt->d & Sse) {
A
Avi Kivity 已提交
1255 1256 1257
		op->type = OP_XMM;
		op->bytes = 16;
		op->addr.xmm = reg;
1258
		read_sse_reg(&op->vec_val, reg);
A
Avi Kivity 已提交
1259 1260
		return;
	}
A
Avi Kivity 已提交
1261 1262 1263 1264 1265 1266 1267
	if (ctxt->d & Mmx) {
		reg &= 7;
		op->type = OP_MM;
		op->bytes = 8;
		op->addr.mm = reg;
		return;
	}
A
Avi Kivity 已提交
1268

1269
	op->type = OP_REG;
1270 1271 1272
	op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
	op->addr.reg = decode_register(ctxt, reg, ctxt->d & ByteOp);

1273
	fetch_register_operand(op);
1274 1275 1276
	op->orig_val = op->val;
}

1277 1278 1279 1280 1281 1282
static void adjust_modrm_seg(struct x86_emulate_ctxt *ctxt, int base_reg)
{
	if (base_reg == VCPU_REGS_RSP || base_reg == VCPU_REGS_RBP)
		ctxt->modrm_seg = VCPU_SREG_SS;
}

1283
static int decode_modrm(struct x86_emulate_ctxt *ctxt,
1284
			struct operand *op)
1285 1286
{
	u8 sib;
B
Bandan Das 已提交
1287
	int index_reg, base_reg, scale;
1288
	int rc = X86EMUL_CONTINUE;
1289
	ulong modrm_ea = 0;
1290

B
Bandan Das 已提交
1291 1292 1293
	ctxt->modrm_reg = ((ctxt->rex_prefix << 1) & 8); /* REX.R */
	index_reg = (ctxt->rex_prefix << 2) & 8; /* REX.X */
	base_reg = (ctxt->rex_prefix << 3) & 8; /* REX.B */
1294

B
Bandan Das 已提交
1295
	ctxt->modrm_mod = (ctxt->modrm & 0xc0) >> 6;
1296
	ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3;
B
Bandan Das 已提交
1297
	ctxt->modrm_rm = base_reg | (ctxt->modrm & 0x07);
1298
	ctxt->modrm_seg = VCPU_SREG_DS;
1299

1300
	if (ctxt->modrm_mod == 3 || (ctxt->d & NoMod)) {
1301
		op->type = OP_REG;
1302
		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
1303
		op->addr.reg = decode_register(ctxt, ctxt->modrm_rm,
1304
				ctxt->d & ByteOp);
1305
		if (ctxt->d & Sse) {
A
Avi Kivity 已提交
1306 1307
			op->type = OP_XMM;
			op->bytes = 16;
1308
			op->addr.xmm = ctxt->modrm_rm;
1309
			read_sse_reg(&op->vec_val, ctxt->modrm_rm);
A
Avi Kivity 已提交
1310 1311
			return rc;
		}
A
Avi Kivity 已提交
1312 1313 1314
		if (ctxt->d & Mmx) {
			op->type = OP_MM;
			op->bytes = 8;
1315
			op->addr.mm = ctxt->modrm_rm & 7;
A
Avi Kivity 已提交
1316 1317
			return rc;
		}
1318
		fetch_register_operand(op);
1319 1320 1321
		return rc;
	}

1322 1323
	op->type = OP_MEM;

1324
	if (ctxt->ad_bytes == 2) {
1325 1326 1327 1328
		unsigned bx = reg_read(ctxt, VCPU_REGS_RBX);
		unsigned bp = reg_read(ctxt, VCPU_REGS_RBP);
		unsigned si = reg_read(ctxt, VCPU_REGS_RSI);
		unsigned di = reg_read(ctxt, VCPU_REGS_RDI);
1329 1330

		/* 16-bit ModR/M decode. */
1331
		switch (ctxt->modrm_mod) {
1332
		case 0:
1333
			if (ctxt->modrm_rm == 6)
1334
				modrm_ea += insn_fetch(u16, ctxt);
1335 1336
			break;
		case 1:
1337
			modrm_ea += insn_fetch(s8, ctxt);
1338 1339
			break;
		case 2:
1340
			modrm_ea += insn_fetch(u16, ctxt);
1341 1342
			break;
		}
1343
		switch (ctxt->modrm_rm) {
1344
		case 0:
1345
			modrm_ea += bx + si;
1346 1347
			break;
		case 1:
1348
			modrm_ea += bx + di;
1349 1350
			break;
		case 2:
1351
			modrm_ea += bp + si;
1352 1353
			break;
		case 3:
1354
			modrm_ea += bp + di;
1355 1356
			break;
		case 4:
1357
			modrm_ea += si;
1358 1359
			break;
		case 5:
1360
			modrm_ea += di;
1361 1362
			break;
		case 6:
1363
			if (ctxt->modrm_mod != 0)
1364
				modrm_ea += bp;
1365 1366
			break;
		case 7:
1367
			modrm_ea += bx;
1368 1369
			break;
		}
1370 1371 1372
		if (ctxt->modrm_rm == 2 || ctxt->modrm_rm == 3 ||
		    (ctxt->modrm_rm == 6 && ctxt->modrm_mod != 0))
			ctxt->modrm_seg = VCPU_SREG_SS;
1373
		modrm_ea = (u16)modrm_ea;
1374 1375
	} else {
		/* 32/64-bit ModR/M decode. */
1376
		if ((ctxt->modrm_rm & 7) == 4) {
1377
			sib = insn_fetch(u8, ctxt);
1378 1379 1380 1381
			index_reg |= (sib >> 3) & 7;
			base_reg |= sib & 7;
			scale = sib >> 6;

1382
			if ((base_reg & 7) == 5 && ctxt->modrm_mod == 0)
1383
				modrm_ea += insn_fetch(s32, ctxt);
1384
			else {
1385
				modrm_ea += reg_read(ctxt, base_reg);
1386
				adjust_modrm_seg(ctxt, base_reg);
1387 1388 1389 1390
				/* Increment ESP on POP [ESP] */
				if ((ctxt->d & IncSP) &&
				    base_reg == VCPU_REGS_RSP)
					modrm_ea += ctxt->op_bytes;
1391
			}
1392
			if (index_reg != 4)
1393
				modrm_ea += reg_read(ctxt, index_reg) << scale;
1394
		} else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) {
1395
			modrm_ea += insn_fetch(s32, ctxt);
1396
			if (ctxt->mode == X86EMUL_MODE_PROT64)
1397
				ctxt->rip_relative = 1;
1398 1399
		} else {
			base_reg = ctxt->modrm_rm;
1400
			modrm_ea += reg_read(ctxt, base_reg);
1401 1402
			adjust_modrm_seg(ctxt, base_reg);
		}
1403
		switch (ctxt->modrm_mod) {
1404
		case 1:
1405
			modrm_ea += insn_fetch(s8, ctxt);
1406 1407
			break;
		case 2:
1408
			modrm_ea += insn_fetch(s32, ctxt);
1409 1410 1411
			break;
		}
	}
1412
	op->addr.mem.ea = modrm_ea;
1413 1414 1415
	if (ctxt->ad_bytes != 8)
		ctxt->memop.addr.mem.ea = (u32)ctxt->memop.addr.mem.ea;

1416 1417 1418 1419 1420
done:
	return rc;
}

static int decode_abs(struct x86_emulate_ctxt *ctxt,
1421
		      struct operand *op)
1422
{
1423
	int rc = X86EMUL_CONTINUE;
1424

1425
	op->type = OP_MEM;
1426
	switch (ctxt->ad_bytes) {
1427
	case 2:
1428
		op->addr.mem.ea = insn_fetch(u16, ctxt);
1429 1430
		break;
	case 4:
1431
		op->addr.mem.ea = insn_fetch(u32, ctxt);
1432 1433
		break;
	case 8:
1434
		op->addr.mem.ea = insn_fetch(u64, ctxt);
1435 1436 1437 1438 1439 1440
		break;
	}
done:
	return rc;
}

1441
static void fetch_bit_operand(struct x86_emulate_ctxt *ctxt)
1442
{
1443
	long sv = 0, mask;
1444

1445
	if (ctxt->dst.type == OP_MEM && ctxt->src.type == OP_REG) {
1446
		mask = ~((long)ctxt->dst.bytes * 8 - 1);
1447

1448 1449 1450 1451
		if (ctxt->src.bytes == 2)
			sv = (s16)ctxt->src.val & (s16)mask;
		else if (ctxt->src.bytes == 4)
			sv = (s32)ctxt->src.val & (s32)mask;
1452 1453
		else
			sv = (s64)ctxt->src.val & (s64)mask;
1454

1455 1456
		ctxt->dst.addr.mem.ea = address_mask(ctxt,
					   ctxt->dst.addr.mem.ea + (sv >> 3));
1457
	}
1458 1459

	/* only subword offset */
1460
	ctxt->src.val &= (ctxt->dst.bytes << 3) - 1;
1461 1462
}

1463 1464
static int read_emulated(struct x86_emulate_ctxt *ctxt,
			 unsigned long addr, void *dest, unsigned size)
A
Avi Kivity 已提交
1465
{
1466
	int rc;
1467
	struct read_cache *mc = &ctxt->mem_read;
A
Avi Kivity 已提交
1468

1469 1470
	if (mc->pos < mc->end)
		goto read_cached;
A
Avi Kivity 已提交
1471

1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483
	WARN_ON((mc->end + size) >= sizeof(mc->data));

	rc = ctxt->ops->read_emulated(ctxt, addr, mc->data + mc->end, size,
				      &ctxt->exception);
	if (rc != X86EMUL_CONTINUE)
		return rc;

	mc->end += size;

read_cached:
	memcpy(dest, mc->data + mc->pos, size);
	mc->pos += size;
1484 1485
	return X86EMUL_CONTINUE;
}
A
Avi Kivity 已提交
1486

1487 1488 1489 1490 1491
static int segmented_read(struct x86_emulate_ctxt *ctxt,
			  struct segmented_address addr,
			  void *data,
			  unsigned size)
{
1492 1493 1494
	int rc;
	ulong linear;

1495
	rc = linearize(ctxt, addr, size, false, &linear);
1496 1497
	if (rc != X86EMUL_CONTINUE)
		return rc;
1498
	return read_emulated(ctxt, linear, data, size);
1499 1500 1501 1502 1503 1504 1505
}

static int segmented_write(struct x86_emulate_ctxt *ctxt,
			   struct segmented_address addr,
			   const void *data,
			   unsigned size)
{
1506 1507 1508
	int rc;
	ulong linear;

1509
	rc = linearize(ctxt, addr, size, true, &linear);
1510 1511
	if (rc != X86EMUL_CONTINUE)
		return rc;
1512 1513
	return ctxt->ops->write_emulated(ctxt, linear, data, size,
					 &ctxt->exception);
1514 1515 1516 1517 1518 1519 1520
}

static int segmented_cmpxchg(struct x86_emulate_ctxt *ctxt,
			     struct segmented_address addr,
			     const void *orig_data, const void *data,
			     unsigned size)
{
1521 1522 1523
	int rc;
	ulong linear;

1524
	rc = linearize(ctxt, addr, size, true, &linear);
1525 1526
	if (rc != X86EMUL_CONTINUE)
		return rc;
1527 1528
	return ctxt->ops->cmpxchg_emulated(ctxt, linear, orig_data, data,
					   size, &ctxt->exception);
1529 1530
}

1531 1532 1533 1534
static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
			   unsigned int size, unsigned short port,
			   void *dest)
{
1535
	struct read_cache *rc = &ctxt->io_read;
1536

1537 1538
	if (rc->pos == rc->end) { /* refill pio read ahead */
		unsigned int in_page, n;
1539
		unsigned int count = ctxt->rep_prefix ?
1540
			address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) : 1;
1541
		in_page = (ctxt->eflags & X86_EFLAGS_DF) ?
1542 1543
			offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) :
			PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI));
1544
		n = min3(in_page, (unsigned int)sizeof(rc->data) / size, count);
1545 1546 1547
		if (n == 0)
			n = 1;
		rc->pos = rc->end = 0;
1548
		if (!ctxt->ops->pio_in_emulated(ctxt, size, port, rc->data, n))
1549 1550
			return 0;
		rc->end = n * size;
A
Avi Kivity 已提交
1551 1552
	}

1553
	if (ctxt->rep_prefix && (ctxt->d & String) &&
1554
	    !(ctxt->eflags & X86_EFLAGS_DF)) {
1555 1556 1557 1558 1559 1560 1561 1562
		ctxt->dst.data = rc->data + rc->pos;
		ctxt->dst.type = OP_MEM_STR;
		ctxt->dst.count = (rc->end - rc->pos) / size;
		rc->pos = rc->end;
	} else {
		memcpy(dest, rc->data + rc->pos, size);
		rc->pos += size;
	}
1563 1564
	return 1;
}
A
Avi Kivity 已提交
1565

1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577
static int read_interrupt_descriptor(struct x86_emulate_ctxt *ctxt,
				     u16 index, struct desc_struct *desc)
{
	struct desc_ptr dt;
	ulong addr;

	ctxt->ops->get_idt(ctxt, &dt);

	if (dt.size < index * 8 + 7)
		return emulate_gp(ctxt, index << 3 | 0x2);

	addr = dt.address + index * 8;
1578
	return linear_read_system(ctxt, addr, desc, sizeof(*desc));
1579 1580
}

1581 1582 1583
static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
				     u16 selector, struct desc_ptr *dt)
{
1584
	const struct x86_emulate_ops *ops = ctxt->ops;
1585
	u32 base3 = 0;
1586

1587 1588
	if (selector & 1 << 2) {
		struct desc_struct desc;
1589 1590
		u16 sel;

1591
		memset(dt, 0, sizeof(*dt));
1592 1593
		if (!ops->get_segment(ctxt, &sel, &desc, &base3,
				      VCPU_SREG_LDTR))
1594
			return;
1595

1596
		dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */
1597
		dt->address = get_desc_base(&desc) | ((u64)base3 << 32);
1598
	} else
1599
		ops->get_gdt(ctxt, dt);
1600
}
1601

1602 1603
static int get_descriptor_ptr(struct x86_emulate_ctxt *ctxt,
			      u16 selector, ulong *desc_addr_p)
1604 1605 1606 1607
{
	struct desc_ptr dt;
	u16 index = selector >> 3;
	ulong addr;
1608

1609
	get_descriptor_table_ptr(ctxt, selector, &dt);
1610

1611 1612
	if (dt.size < index * 8 + 7)
		return emulate_gp(ctxt, selector & 0xfffc);
1613

1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640
	addr = dt.address + index * 8;

#ifdef CONFIG_X86_64
	if (addr >> 32 != 0) {
		u64 efer = 0;

		ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
		if (!(efer & EFER_LMA))
			addr &= (u32)-1;
	}
#endif

	*desc_addr_p = addr;
	return X86EMUL_CONTINUE;
}

/* allowed just for 8 bytes segments */
static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
				   u16 selector, struct desc_struct *desc,
				   ulong *desc_addr_p)
{
	int rc;

	rc = get_descriptor_ptr(ctxt, selector, desc_addr_p);
	if (rc != X86EMUL_CONTINUE)
		return rc;

1641
	return linear_read_system(ctxt, *desc_addr_p, desc, sizeof(*desc));
1642
}
1643

1644 1645 1646 1647
/* allowed just for 8 bytes segments */
static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
				    u16 selector, struct desc_struct *desc)
{
1648
	int rc;
1649
	ulong addr;
A
Avi Kivity 已提交
1650

1651 1652 1653
	rc = get_descriptor_ptr(ctxt, selector, &addr);
	if (rc != X86EMUL_CONTINUE)
		return rc;
A
Avi Kivity 已提交
1654

1655
	return linear_write_system(ctxt, addr, desc, sizeof(*desc));
1656
}
1657

1658
static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1659
				     u16 selector, int seg, u8 cpl,
1660
				     enum x86_transfer_type transfer,
1661
				     struct desc_struct *desc)
1662
{
1663
	struct desc_struct seg_desc, old_desc;
1664
	u8 dpl, rpl;
1665 1666 1667
	unsigned err_vec = GP_VECTOR;
	u32 err_code = 0;
	bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
1668
	ulong desc_addr;
1669
	int ret;
1670
	u16 dummy;
1671
	u32 base3 = 0;
1672

1673
	memset(&seg_desc, 0, sizeof(seg_desc));
1674

1675 1676 1677
	if (ctxt->mode == X86EMUL_MODE_REAL) {
		/* set real mode segment descriptor (keep limit etc. for
		 * unreal mode) */
1678
		ctxt->ops->get_segment(ctxt, &dummy, &seg_desc, NULL, seg);
1679 1680
		set_desc_base(&seg_desc, selector << 4);
		goto load;
1681 1682 1683 1684 1685 1686 1687 1688 1689
	} else if (seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) {
		/* VM86 needs a clean new segment descriptor */
		set_desc_base(&seg_desc, selector << 4);
		set_desc_limit(&seg_desc, 0xffff);
		seg_desc.type = 3;
		seg_desc.p = 1;
		seg_desc.s = 1;
		seg_desc.dpl = 3;
		goto load;
1690 1691
	}

1692 1693
	rpl = selector & 3;

1694 1695 1696 1697
	/* TR should be in GDT only */
	if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
		goto exception;

1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719
	/* NULL selector is not valid for TR, CS and (except for long mode) SS */
	if (null_selector) {
		if (seg == VCPU_SREG_CS || seg == VCPU_SREG_TR)
			goto exception;

		if (seg == VCPU_SREG_SS) {
			if (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl)
				goto exception;

			/*
			 * ctxt->ops->set_segment expects the CPL to be in
			 * SS.DPL, so fake an expand-up 32-bit data segment.
			 */
			seg_desc.type = 3;
			seg_desc.p = 1;
			seg_desc.s = 1;
			seg_desc.dpl = cpl;
			seg_desc.d = 1;
			seg_desc.g = 1;
		}

		/* Skip all following checks */
1720
		goto load;
1721
	}
1722

1723
	ret = read_segment_descriptor(ctxt, selector, &seg_desc, &desc_addr);
1724 1725 1726 1727
	if (ret != X86EMUL_CONTINUE)
		return ret;

	err_code = selector & 0xfffc;
1728 1729
	err_vec = (transfer == X86_TRANSFER_TASK_SWITCH) ? TS_VECTOR :
							   GP_VECTOR;
1730

G
Guo Chao 已提交
1731
	/* can't load system descriptor into segment selector */
1732 1733 1734
	if (seg <= VCPU_SREG_GS && !seg_desc.s) {
		if (transfer == X86_TRANSFER_CALL_JMP)
			return X86EMUL_UNHANDLEABLE;
1735
		goto exception;
1736
	}
1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747

	dpl = seg_desc.dpl;

	switch (seg) {
	case VCPU_SREG_SS:
		/*
		 * segment is not a writable data segment or segment
		 * selector's RPL != CPL or segment selector's RPL != CPL
		 */
		if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl)
			goto exception;
A
Avi Kivity 已提交
1748
		break;
1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761
	case VCPU_SREG_CS:
		if (!(seg_desc.type & 8))
			goto exception;

		if (seg_desc.type & 4) {
			/* conforming */
			if (dpl > cpl)
				goto exception;
		} else {
			/* nonconforming */
			if (rpl > cpl || dpl != cpl)
				goto exception;
		}
1762 1763 1764 1765 1766 1767 1768 1769 1770
		/* in long-mode d/b must be clear if l is set */
		if (seg_desc.d && seg_desc.l) {
			u64 efer = 0;

			ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
			if (efer & EFER_LMA)
				goto exception;
		}

1771 1772
		/* CS(RPL) <- CPL */
		selector = (selector & 0xfffc) | cpl;
A
Avi Kivity 已提交
1773
		break;
1774 1775 1776
	case VCPU_SREG_TR:
		if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9))
			goto exception;
1777 1778 1779 1780
		if (!seg_desc.p) {
			err_vec = NP_VECTOR;
			goto exception;
		}
1781 1782 1783 1784 1785 1786
		old_desc = seg_desc;
		seg_desc.type |= 2; /* busy */
		ret = ctxt->ops->cmpxchg_emulated(ctxt, desc_addr, &old_desc, &seg_desc,
						  sizeof(seg_desc), &ctxt->exception);
		if (ret != X86EMUL_CONTINUE)
			return ret;
1787 1788 1789 1790 1791 1792
		break;
	case VCPU_SREG_LDTR:
		if (seg_desc.s || seg_desc.type != 2)
			goto exception;
		break;
	default: /*  DS, ES, FS, or GS */
1793
		/*
1794 1795 1796
		 * segment is not a data or readable code segment or
		 * ((segment is a data or nonconforming code segment)
		 * and (both RPL and CPL > DPL))
1797
		 */
1798 1799 1800 1801
		if ((seg_desc.type & 0xa) == 0x8 ||
		    (((seg_desc.type & 0xc) != 0xc) &&
		     (rpl > dpl && cpl > dpl)))
			goto exception;
A
Avi Kivity 已提交
1802
		break;
1803 1804
	}

1805 1806 1807 1808 1809
	if (!seg_desc.p) {
		err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
		goto exception;
	}

1810 1811
	if (seg_desc.s) {
		/* mark segment as accessed */
1812 1813 1814 1815 1816 1817 1818
		if (!(seg_desc.type & 1)) {
			seg_desc.type |= 1;
			ret = write_segment_descriptor(ctxt, selector,
						       &seg_desc);
			if (ret != X86EMUL_CONTINUE)
				return ret;
		}
1819
	} else if (ctxt->mode == X86EMUL_MODE_PROT64) {
1820
		ret = linear_read_system(ctxt, desc_addr+8, &base3, sizeof(base3));
1821 1822
		if (ret != X86EMUL_CONTINUE)
			return ret;
1823 1824
		if (emul_is_noncanonical_address(get_desc_base(&seg_desc) |
				((u64)base3 << 32), ctxt))
1825
			return emulate_gp(ctxt, 0);
1826 1827
	}
load:
1828
	ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
1829 1830
	if (desc)
		*desc = seg_desc;
1831 1832
	return X86EMUL_CONTINUE;
exception:
1833
	return emulate_exception(ctxt, err_vec, err_code, true);
1834 1835
}

1836 1837 1838 1839
static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
				   u16 selector, int seg)
{
	u8 cpl = ctxt->ops->cpl(ctxt);
1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854

	/*
	 * None of MOV, POP and LSS can load a NULL selector in CPL=3, but
	 * they can load it at CPL<3 (Intel's manual says only LSS can,
	 * but it's wrong).
	 *
	 * However, the Intel manual says that putting IST=1/DPL=3 in
	 * an interrupt gate will result in SS=3 (the AMD manual instead
	 * says it doesn't), so allow SS=3 in __load_segment_descriptor
	 * and only forbid it here.
	 */
	if (seg == VCPU_SREG_SS && selector == 3 &&
	    ctxt->mode == X86EMUL_MODE_PROT64)
		return emulate_exception(ctxt, GP_VECTOR, 0, true);

1855 1856
	return __load_segment_descriptor(ctxt, selector, seg, cpl,
					 X86_TRANSFER_NONE, NULL);
1857 1858
}

1859 1860
static void write_register_operand(struct operand *op)
{
1861
	return assign_register(op->addr.reg, op->val, op->bytes);
1862 1863
}

1864
static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op)
1865
{
1866
	switch (op->type) {
1867
	case OP_REG:
1868
		write_register_operand(op);
A
Avi Kivity 已提交
1869
		break;
1870
	case OP_MEM:
1871
		if (ctxt->lock_prefix)
P
Paolo Bonzini 已提交
1872 1873 1874 1875 1876 1877 1878
			return segmented_cmpxchg(ctxt,
						 op->addr.mem,
						 &op->orig_val,
						 &op->val,
						 op->bytes);
		else
			return segmented_write(ctxt,
1879 1880 1881
					       op->addr.mem,
					       &op->val,
					       op->bytes);
1882
		break;
1883
	case OP_MEM_STR:
P
Paolo Bonzini 已提交
1884 1885 1886 1887
		return segmented_write(ctxt,
				       op->addr.mem,
				       op->data,
				       op->bytes * op->count);
1888
		break;
A
Avi Kivity 已提交
1889
	case OP_XMM:
1890
		write_sse_reg(&op->vec_val, op->addr.xmm);
A
Avi Kivity 已提交
1891
		break;
A
Avi Kivity 已提交
1892
	case OP_MM:
1893
		write_mmx_reg(&op->mm_val, op->addr.mm);
A
Avi Kivity 已提交
1894
		break;
1895 1896
	case OP_NONE:
		/* no writeback */
1897
		break;
1898
	default:
1899
		break;
A
Avi Kivity 已提交
1900
	}
1901 1902
	return X86EMUL_CONTINUE;
}
A
Avi Kivity 已提交
1903

1904
static int push(struct x86_emulate_ctxt *ctxt, void *data, int bytes)
1905
{
1906
	struct segmented_address addr;
1907

1908
	rsp_increment(ctxt, -bytes);
1909
	addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
1910 1911
	addr.seg = VCPU_SREG_SS;

1912 1913 1914 1915 1916
	return segmented_write(ctxt, addr, data, bytes);
}

static int em_push(struct x86_emulate_ctxt *ctxt)
{
1917
	/* Disable writeback. */
1918
	ctxt->dst.type = OP_NONE;
1919
	return push(ctxt, &ctxt->src.val, ctxt->op_bytes);
1920
}
1921

1922 1923 1924 1925
static int emulate_pop(struct x86_emulate_ctxt *ctxt,
		       void *dest, int len)
{
	int rc;
1926
	struct segmented_address addr;
1927

1928
	addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
1929
	addr.seg = VCPU_SREG_SS;
1930
	rc = segmented_read(ctxt, addr, dest, len);
1931 1932 1933
	if (rc != X86EMUL_CONTINUE)
		return rc;

1934
	rsp_increment(ctxt, len);
1935
	return rc;
1936 1937
}

1938 1939
static int em_pop(struct x86_emulate_ctxt *ctxt)
{
1940
	return emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes);
1941 1942
}

1943
static int emulate_popf(struct x86_emulate_ctxt *ctxt,
1944
			void *dest, int len)
1945 1946
{
	int rc;
1947
	unsigned long val, change_mask;
1948
	int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
1949
	int cpl = ctxt->ops->cpl(ctxt);
1950

1951
	rc = emulate_pop(ctxt, &val, len);
1952 1953
	if (rc != X86EMUL_CONTINUE)
		return rc;
1954

1955 1956 1957 1958
	change_mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
		      X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF |
		      X86_EFLAGS_TF | X86_EFLAGS_DF | X86_EFLAGS_NT |
		      X86_EFLAGS_AC | X86_EFLAGS_ID;
1959

1960 1961 1962 1963 1964
	switch(ctxt->mode) {
	case X86EMUL_MODE_PROT64:
	case X86EMUL_MODE_PROT32:
	case X86EMUL_MODE_PROT16:
		if (cpl == 0)
1965
			change_mask |= X86_EFLAGS_IOPL;
1966
		if (cpl <= iopl)
1967
			change_mask |= X86_EFLAGS_IF;
1968 1969
		break;
	case X86EMUL_MODE_VM86:
1970 1971
		if (iopl < 3)
			return emulate_gp(ctxt, 0);
1972
		change_mask |= X86_EFLAGS_IF;
1973 1974
		break;
	default: /* real mode */
1975
		change_mask |= (X86_EFLAGS_IOPL | X86_EFLAGS_IF);
1976
		break;
1977
	}
1978 1979 1980 1981 1982

	*(unsigned long *)dest =
		(ctxt->eflags & ~change_mask) | (val & change_mask);

	return rc;
1983 1984
}

1985 1986
static int em_popf(struct x86_emulate_ctxt *ctxt)
{
1987 1988 1989 1990
	ctxt->dst.type = OP_REG;
	ctxt->dst.addr.reg = &ctxt->eflags;
	ctxt->dst.bytes = ctxt->op_bytes;
	return emulate_popf(ctxt, &ctxt->dst.val, ctxt->op_bytes);
1991 1992
}

A
Avi Kivity 已提交
1993 1994 1995 1996 1997
static int em_enter(struct x86_emulate_ctxt *ctxt)
{
	int rc;
	unsigned frame_size = ctxt->src.val;
	unsigned nesting_level = ctxt->src2.val & 31;
1998
	ulong rbp;
A
Avi Kivity 已提交
1999 2000 2001 2002

	if (nesting_level)
		return X86EMUL_UNHANDLEABLE;

2003 2004
	rbp = reg_read(ctxt, VCPU_REGS_RBP);
	rc = push(ctxt, &rbp, stack_size(ctxt));
A
Avi Kivity 已提交
2005 2006
	if (rc != X86EMUL_CONTINUE)
		return rc;
2007
	assign_masked(reg_rmw(ctxt, VCPU_REGS_RBP), reg_read(ctxt, VCPU_REGS_RSP),
A
Avi Kivity 已提交
2008
		      stack_mask(ctxt));
2009 2010
	assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP),
		      reg_read(ctxt, VCPU_REGS_RSP) - frame_size,
A
Avi Kivity 已提交
2011 2012 2013 2014
		      stack_mask(ctxt));
	return X86EMUL_CONTINUE;
}

A
Avi Kivity 已提交
2015 2016
static int em_leave(struct x86_emulate_ctxt *ctxt)
{
2017
	assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP), reg_read(ctxt, VCPU_REGS_RBP),
A
Avi Kivity 已提交
2018
		      stack_mask(ctxt));
2019
	return emulate_pop(ctxt, reg_rmw(ctxt, VCPU_REGS_RBP), ctxt->op_bytes);
A
Avi Kivity 已提交
2020 2021
}

2022
static int em_push_sreg(struct x86_emulate_ctxt *ctxt)
2023
{
2024 2025
	int seg = ctxt->src2.val;

2026
	ctxt->src.val = get_segment_selector(ctxt, seg);
2027 2028 2029 2030
	if (ctxt->op_bytes == 4) {
		rsp_increment(ctxt, -2);
		ctxt->op_bytes = 2;
	}
2031

2032
	return em_push(ctxt);
2033 2034
}

2035
static int em_pop_sreg(struct x86_emulate_ctxt *ctxt)
2036
{
2037
	int seg = ctxt->src2.val;
2038 2039
	unsigned long selector;
	int rc;
2040

2041
	rc = emulate_pop(ctxt, &selector, 2);
2042 2043 2044
	if (rc != X86EMUL_CONTINUE)
		return rc;

2045 2046
	if (ctxt->modrm_reg == VCPU_SREG_SS)
		ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
2047 2048
	if (ctxt->op_bytes > 2)
		rsp_increment(ctxt, ctxt->op_bytes - 2);
2049

2050
	rc = load_segment_descriptor(ctxt, (u16)selector, seg);
2051
	return rc;
2052 2053
}

2054
static int em_pusha(struct x86_emulate_ctxt *ctxt)
2055
{
2056
	unsigned long old_esp = reg_read(ctxt, VCPU_REGS_RSP);
2057 2058
	int rc = X86EMUL_CONTINUE;
	int reg = VCPU_REGS_RAX;
2059

2060 2061
	while (reg <= VCPU_REGS_RDI) {
		(reg == VCPU_REGS_RSP) ?
2062
		(ctxt->src.val = old_esp) : (ctxt->src.val = reg_read(ctxt, reg));
2063

2064
		rc = em_push(ctxt);
2065 2066
		if (rc != X86EMUL_CONTINUE)
			return rc;
2067

2068
		++reg;
2069 2070
	}

2071
	return rc;
2072 2073
}

2074 2075
static int em_pushf(struct x86_emulate_ctxt *ctxt)
{
2076
	ctxt->src.val = (unsigned long)ctxt->eflags & ~X86_EFLAGS_VM;
2077 2078 2079
	return em_push(ctxt);
}

2080
static int em_popa(struct x86_emulate_ctxt *ctxt)
2081
{
2082 2083
	int rc = X86EMUL_CONTINUE;
	int reg = VCPU_REGS_RDI;
2084
	u32 val;
2085

2086 2087
	while (reg >= VCPU_REGS_RAX) {
		if (reg == VCPU_REGS_RSP) {
2088
			rsp_increment(ctxt, ctxt->op_bytes);
2089 2090
			--reg;
		}
2091

2092
		rc = emulate_pop(ctxt, &val, ctxt->op_bytes);
2093 2094
		if (rc != X86EMUL_CONTINUE)
			break;
2095
		assign_register(reg_rmw(ctxt, reg), val, ctxt->op_bytes);
2096
		--reg;
2097
	}
2098
	return rc;
2099 2100
}

2101
static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
2102
{
2103
	const struct x86_emulate_ops *ops = ctxt->ops;
2104
	int rc;
2105 2106 2107 2108 2109 2110
	struct desc_ptr dt;
	gva_t cs_addr;
	gva_t eip_addr;
	u16 cs, eip;

	/* TODO: Add limit checks */
2111
	ctxt->src.val = ctxt->eflags;
2112
	rc = em_push(ctxt);
2113 2114
	if (rc != X86EMUL_CONTINUE)
		return rc;
2115

2116
	ctxt->eflags &= ~(X86_EFLAGS_IF | X86_EFLAGS_TF | X86_EFLAGS_AC);
2117

2118
	ctxt->src.val = get_segment_selector(ctxt, VCPU_SREG_CS);
2119
	rc = em_push(ctxt);
2120 2121
	if (rc != X86EMUL_CONTINUE)
		return rc;
2122

2123
	ctxt->src.val = ctxt->_eip;
2124
	rc = em_push(ctxt);
2125 2126 2127
	if (rc != X86EMUL_CONTINUE)
		return rc;

2128
	ops->get_idt(ctxt, &dt);
2129 2130 2131 2132

	eip_addr = dt.address + (irq << 2);
	cs_addr = dt.address + (irq << 2) + 2;

2133
	rc = linear_read_system(ctxt, cs_addr, &cs, 2);
2134 2135 2136
	if (rc != X86EMUL_CONTINUE)
		return rc;

2137
	rc = linear_read_system(ctxt, eip_addr, &eip, 2);
2138 2139 2140
	if (rc != X86EMUL_CONTINUE)
		return rc;

2141
	rc = load_segment_descriptor(ctxt, cs, VCPU_SREG_CS);
2142 2143 2144
	if (rc != X86EMUL_CONTINUE)
		return rc;

2145
	ctxt->_eip = eip;
2146 2147 2148 2149

	return rc;
}

2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160
int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
{
	int rc;

	invalidate_registers(ctxt);
	rc = __emulate_int_real(ctxt, irq);
	if (rc == X86EMUL_CONTINUE)
		writeback_registers(ctxt);
	return rc;
}

2161
static int emulate_int(struct x86_emulate_ctxt *ctxt, int irq)
2162 2163 2164
{
	switch(ctxt->mode) {
	case X86EMUL_MODE_REAL:
2165
		return __emulate_int_real(ctxt, irq);
2166 2167 2168 2169 2170 2171 2172 2173 2174 2175
	case X86EMUL_MODE_VM86:
	case X86EMUL_MODE_PROT16:
	case X86EMUL_MODE_PROT32:
	case X86EMUL_MODE_PROT64:
	default:
		/* Protected mode interrupts unimplemented yet */
		return X86EMUL_UNHANDLEABLE;
	}
}

2176
static int emulate_iret_real(struct x86_emulate_ctxt *ctxt)
2177
{
2178 2179 2180 2181
	int rc = X86EMUL_CONTINUE;
	unsigned long temp_eip = 0;
	unsigned long temp_eflags = 0;
	unsigned long cs = 0;
2182 2183 2184 2185 2186
	unsigned long mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
			     X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_TF |
			     X86_EFLAGS_IF | X86_EFLAGS_DF | X86_EFLAGS_OF |
			     X86_EFLAGS_IOPL | X86_EFLAGS_NT | X86_EFLAGS_RF |
			     X86_EFLAGS_AC | X86_EFLAGS_ID |
W
Wanpeng Li 已提交
2187
			     X86_EFLAGS_FIXED;
2188 2189
	unsigned long vm86_mask = X86_EFLAGS_VM | X86_EFLAGS_VIF |
				  X86_EFLAGS_VIP;
2190

2191
	/* TODO: Add stack limit check */
2192

2193
	rc = emulate_pop(ctxt, &temp_eip, ctxt->op_bytes);
2194

2195 2196
	if (rc != X86EMUL_CONTINUE)
		return rc;
2197

2198 2199
	if (temp_eip & ~0xffff)
		return emulate_gp(ctxt, 0);
2200

2201
	rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2202

2203 2204
	if (rc != X86EMUL_CONTINUE)
		return rc;
2205

2206
	rc = emulate_pop(ctxt, &temp_eflags, ctxt->op_bytes);
2207

2208 2209
	if (rc != X86EMUL_CONTINUE)
		return rc;
2210

2211
	rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS);
2212

2213 2214
	if (rc != X86EMUL_CONTINUE)
		return rc;
2215

2216
	ctxt->_eip = temp_eip;
2217

2218
	if (ctxt->op_bytes == 4)
2219
		ctxt->eflags = ((temp_eflags & mask) | (ctxt->eflags & vm86_mask));
2220
	else if (ctxt->op_bytes == 2) {
2221 2222
		ctxt->eflags &= ~0xffff;
		ctxt->eflags |= temp_eflags;
2223
	}
2224 2225

	ctxt->eflags &= ~EFLG_RESERVED_ZEROS_MASK; /* Clear reserved zeros */
W
Wanpeng Li 已提交
2226
	ctxt->eflags |= X86_EFLAGS_FIXED;
2227
	ctxt->ops->set_nmi_mask(ctxt, false);
2228 2229

	return rc;
2230 2231
}

2232
static int em_iret(struct x86_emulate_ctxt *ctxt)
2233
{
2234 2235
	switch(ctxt->mode) {
	case X86EMUL_MODE_REAL:
2236
		return emulate_iret_real(ctxt);
2237 2238 2239 2240
	case X86EMUL_MODE_VM86:
	case X86EMUL_MODE_PROT16:
	case X86EMUL_MODE_PROT32:
	case X86EMUL_MODE_PROT64:
2241
	default:
2242 2243
		/* iret from protected mode unimplemented yet */
		return X86EMUL_UNHANDLEABLE;
2244 2245 2246
	}
}

2247 2248 2249
static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
{
	int rc;
2250 2251
	unsigned short sel;
	struct desc_struct new_desc;
2252 2253
	u8 cpl = ctxt->ops->cpl(ctxt);

2254
	memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2255

2256 2257
	rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
				       X86_TRANSFER_CALL_JMP,
2258
				       &new_desc);
2259 2260 2261
	if (rc != X86EMUL_CONTINUE)
		return rc;

2262
	rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
2263 2264 2265 2266
	/* Error handling is not implemented. */
	if (rc != X86EMUL_CONTINUE)
		return X86EMUL_UNHANDLEABLE;

2267
	return rc;
2268 2269
}

2270
static int em_jmp_abs(struct x86_emulate_ctxt *ctxt)
2271
{
2272 2273
	return assign_eip_near(ctxt, ctxt->src.val);
}
2274

2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285
static int em_call_near_abs(struct x86_emulate_ctxt *ctxt)
{
	int rc;
	long int old_eip;

	old_eip = ctxt->_eip;
	rc = assign_eip_near(ctxt, ctxt->src.val);
	if (rc != X86EMUL_CONTINUE)
		return rc;
	ctxt->src.val = old_eip;
	rc = em_push(ctxt);
2286
	return rc;
2287 2288
}

2289
static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
2290
{
2291
	u64 old = ctxt->dst.orig_val64;
2292

2293 2294 2295
	if (ctxt->dst.bytes == 16)
		return X86EMUL_UNHANDLEABLE;

2296 2297 2298 2299
	if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) ||
	    ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) {
		*reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0);
		*reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32);
2300
		ctxt->eflags &= ~X86_EFLAGS_ZF;
2301
	} else {
2302 2303
		ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) |
			(u32) reg_read(ctxt, VCPU_REGS_RBX);
2304

2305
		ctxt->eflags |= X86_EFLAGS_ZF;
2306
	}
2307
	return X86EMUL_CONTINUE;
2308 2309
}

2310 2311
static int em_ret(struct x86_emulate_ctxt *ctxt)
{
2312 2313 2314 2315 2316 2317 2318 2319
	int rc;
	unsigned long eip;

	rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
	if (rc != X86EMUL_CONTINUE)
		return rc;

	return assign_eip_near(ctxt, eip);
2320 2321
}

2322
static int em_ret_far(struct x86_emulate_ctxt *ctxt)
2323 2324
{
	int rc;
2325
	unsigned long eip, cs;
2326
	int cpl = ctxt->ops->cpl(ctxt);
2327
	struct desc_struct new_desc;
2328

2329
	rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2330
	if (rc != X86EMUL_CONTINUE)
2331
		return rc;
2332
	rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2333
	if (rc != X86EMUL_CONTINUE)
2334
		return rc;
2335 2336 2337
	/* Outer-privilege level return is not implemented */
	if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl)
		return X86EMUL_UNHANDLEABLE;
2338 2339
	rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, cpl,
				       X86_TRANSFER_RET,
2340 2341 2342
				       &new_desc);
	if (rc != X86EMUL_CONTINUE)
		return rc;
2343
	rc = assign_eip_far(ctxt, eip, &new_desc);
2344 2345 2346 2347
	/* Error handling is not implemented. */
	if (rc != X86EMUL_CONTINUE)
		return X86EMUL_UNHANDLEABLE;

2348 2349 2350
	return rc;
}

2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361
static int em_ret_far_imm(struct x86_emulate_ctxt *ctxt)
{
        int rc;

        rc = em_ret_far(ctxt);
        if (rc != X86EMUL_CONTINUE)
                return rc;
        rsp_increment(ctxt, ctxt->src.val);
        return X86EMUL_CONTINUE;
}

2362 2363 2364
static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
{
	/* Save real source value, then compare EAX against destination. */
2365 2366
	ctxt->dst.orig_val = ctxt->dst.val;
	ctxt->dst.val = reg_read(ctxt, VCPU_REGS_RAX);
2367
	ctxt->src.orig_val = ctxt->src.val;
2368
	ctxt->src.val = ctxt->dst.orig_val;
2369
	fastop(ctxt, em_cmp);
2370

2371
	if (ctxt->eflags & X86_EFLAGS_ZF) {
2372 2373
		/* Success: write back to memory; no update of EAX */
		ctxt->src.type = OP_NONE;
2374 2375 2376
		ctxt->dst.val = ctxt->src.orig_val;
	} else {
		/* Failure: write the value we saw to EAX. */
2377 2378 2379 2380
		ctxt->src.type = OP_REG;
		ctxt->src.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
		ctxt->src.val = ctxt->dst.orig_val;
		/* Create write-cycle to dest by writing the same value */
2381
		ctxt->dst.val = ctxt->dst.orig_val;
2382 2383 2384 2385
	}
	return X86EMUL_CONTINUE;
}

2386
static int em_lseg(struct x86_emulate_ctxt *ctxt)
2387
{
2388
	int seg = ctxt->src2.val;
2389 2390 2391
	unsigned short sel;
	int rc;

2392
	memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2393

2394
	rc = load_segment_descriptor(ctxt, sel, seg);
2395 2396 2397
	if (rc != X86EMUL_CONTINUE)
		return rc;

2398
	ctxt->dst.val = ctxt->src.val;
2399 2400 2401
	return rc;
}

2402 2403
static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
{
2404
#ifdef CONFIG_X86_64
2405
	return ctxt->ops->guest_has_long_mode(ctxt);
2406 2407 2408
#else
	return false;
#endif
2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422
}

static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags)
{
	desc->g    = (flags >> 23) & 1;
	desc->d    = (flags >> 22) & 1;
	desc->l    = (flags >> 21) & 1;
	desc->avl  = (flags >> 20) & 1;
	desc->p    = (flags >> 15) & 1;
	desc->dpl  = (flags >> 13) & 3;
	desc->s    = (flags >> 12) & 1;
	desc->type = (flags >>  8) & 15;
}

2423 2424
static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, const char *smstate,
			   int n)
2425 2426 2427 2428 2429
{
	struct desc_struct desc;
	int offset;
	u16 selector;

2430
	selector = GET_SMSTATE(u32, smstate, 0x7fa8 + n * 4);
2431 2432 2433 2434 2435 2436

	if (n < 3)
		offset = 0x7f84 + n * 12;
	else
		offset = 0x7f2c + (n - 3) * 12;

2437 2438 2439
	set_desc_base(&desc,      GET_SMSTATE(u32, smstate, offset + 8));
	set_desc_limit(&desc,     GET_SMSTATE(u32, smstate, offset + 4));
	rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, offset));
2440 2441 2442 2443
	ctxt->ops->set_segment(ctxt, selector, &desc, 0, n);
	return X86EMUL_CONTINUE;
}

2444
#ifdef CONFIG_X86_64
2445 2446
static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, const char *smstate,
			   int n)
2447 2448 2449 2450 2451 2452 2453 2454
{
	struct desc_struct desc;
	int offset;
	u16 selector;
	u32 base3;

	offset = 0x7e00 + n * 16;

2455 2456 2457 2458 2459
	selector =                GET_SMSTATE(u16, smstate, offset);
	rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smstate, offset + 2) << 8);
	set_desc_limit(&desc,     GET_SMSTATE(u32, smstate, offset + 4));
	set_desc_base(&desc,      GET_SMSTATE(u32, smstate, offset + 8));
	base3 =                   GET_SMSTATE(u32, smstate, offset + 12);
2460 2461 2462 2463

	ctxt->ops->set_segment(ctxt, selector, &desc, base3, n);
	return X86EMUL_CONTINUE;
}
2464
#endif
2465 2466

static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
2467
				    u64 cr0, u64 cr3, u64 cr4)
2468 2469
{
	int bad;
2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481
	u64 pcid;

	/* In order to later set CR4.PCIDE, CR3[11:0] must be zero.  */
	pcid = 0;
	if (cr4 & X86_CR4_PCIDE) {
		pcid = cr3 & 0xfff;
		cr3 &= ~0xfff;
	}

	bad = ctxt->ops->set_cr(ctxt, 3, cr3);
	if (bad)
		return X86EMUL_UNHANDLEABLE;
2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499

	/*
	 * First enable PAE, long mode needs it before CR0.PG = 1 is set.
	 * Then enable protected mode.	However, PCID cannot be enabled
	 * if EFER.LMA=0, so set it separately.
	 */
	bad = ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
	if (bad)
		return X86EMUL_UNHANDLEABLE;

	bad = ctxt->ops->set_cr(ctxt, 0, cr0);
	if (bad)
		return X86EMUL_UNHANDLEABLE;

	if (cr4 & X86_CR4_PCIDE) {
		bad = ctxt->ops->set_cr(ctxt, 4, cr4);
		if (bad)
			return X86EMUL_UNHANDLEABLE;
2500 2501 2502 2503 2504 2505
		if (pcid) {
			bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid);
			if (bad)
				return X86EMUL_UNHANDLEABLE;
		}

2506 2507 2508 2509 2510
	}

	return X86EMUL_CONTINUE;
}

2511 2512
static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
			     const char *smstate)
2513 2514 2515 2516
{
	struct desc_struct desc;
	struct desc_ptr dt;
	u16 selector;
2517
	u32 val, cr0, cr3, cr4;
2518 2519
	int i;

2520 2521 2522 2523
	cr0 =                      GET_SMSTATE(u32, smstate, 0x7ffc);
	cr3 =                      GET_SMSTATE(u32, smstate, 0x7ff8);
	ctxt->eflags =             GET_SMSTATE(u32, smstate, 0x7ff4) | X86_EFLAGS_FIXED;
	ctxt->_eip =               GET_SMSTATE(u32, smstate, 0x7ff0);
2524 2525

	for (i = 0; i < 8; i++)
2526
		*reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4);
2527

2528
	val = GET_SMSTATE(u32, smstate, 0x7fcc);
2529 2530 2531 2532

	if (ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1))
		return X86EMUL_UNHANDLEABLE;

2533
	val = GET_SMSTATE(u32, smstate, 0x7fc8);
2534 2535 2536

	if (ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1))
		return X86EMUL_UNHANDLEABLE;
2537

2538 2539 2540 2541
	selector =                 GET_SMSTATE(u32, smstate, 0x7fc4);
	set_desc_base(&desc,       GET_SMSTATE(u32, smstate, 0x7f64));
	set_desc_limit(&desc,      GET_SMSTATE(u32, smstate, 0x7f60));
	rsm_set_desc_flags(&desc,  GET_SMSTATE(u32, smstate, 0x7f5c));
2542 2543
	ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR);

2544 2545 2546 2547
	selector =                 GET_SMSTATE(u32, smstate, 0x7fc0);
	set_desc_base(&desc,       GET_SMSTATE(u32, smstate, 0x7f80));
	set_desc_limit(&desc,      GET_SMSTATE(u32, smstate, 0x7f7c));
	rsm_set_desc_flags(&desc,  GET_SMSTATE(u32, smstate, 0x7f78));
2548 2549
	ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR);

2550 2551
	dt.address =               GET_SMSTATE(u32, smstate, 0x7f74);
	dt.size =                  GET_SMSTATE(u32, smstate, 0x7f70);
2552 2553
	ctxt->ops->set_gdt(ctxt, &dt);

2554 2555
	dt.address =               GET_SMSTATE(u32, smstate, 0x7f58);
	dt.size =                  GET_SMSTATE(u32, smstate, 0x7f54);
2556 2557 2558
	ctxt->ops->set_idt(ctxt, &dt);

	for (i = 0; i < 6; i++) {
2559
		int r = rsm_load_seg_32(ctxt, smstate, i);
2560 2561 2562 2563
		if (r != X86EMUL_CONTINUE)
			return r;
	}

2564
	cr4 = GET_SMSTATE(u32, smstate, 0x7f14);
2565

2566
	ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7ef8));
2567

2568
	return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
2569 2570
}

2571
#ifdef CONFIG_X86_64
2572 2573
static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
			     const char *smstate)
2574 2575 2576
{
	struct desc_struct desc;
	struct desc_ptr dt;
2577
	u64 val, cr0, cr3, cr4;
2578 2579
	u32 base3;
	u16 selector;
2580
	int i, r;
2581 2582

	for (i = 0; i < 16; i++)
2583
		*reg_write(ctxt, i) = GET_SMSTATE(u64, smstate, 0x7ff8 - i * 8);
2584

2585 2586
	ctxt->_eip   = GET_SMSTATE(u64, smstate, 0x7f78);
	ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7f70) | X86_EFLAGS_FIXED;
2587

2588
	val = GET_SMSTATE(u64, smstate, 0x7f68);
2589 2590 2591 2592

	if (ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1))
		return X86EMUL_UNHANDLEABLE;

2593
	val = GET_SMSTATE(u64, smstate, 0x7f60);
2594 2595 2596

	if (ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1))
		return X86EMUL_UNHANDLEABLE;
2597

2598 2599 2600 2601 2602
	cr0 =                       GET_SMSTATE(u64, smstate, 0x7f58);
	cr3 =                       GET_SMSTATE(u64, smstate, 0x7f50);
	cr4 =                       GET_SMSTATE(u64, smstate, 0x7f48);
	ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7f00));
	val =                       GET_SMSTATE(u64, smstate, 0x7ed0);
2603 2604 2605

	if (ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA))
		return X86EMUL_UNHANDLEABLE;
2606

2607 2608 2609 2610 2611
	selector =                  GET_SMSTATE(u32, smstate, 0x7e90);
	rsm_set_desc_flags(&desc,   GET_SMSTATE(u32, smstate, 0x7e92) << 8);
	set_desc_limit(&desc,       GET_SMSTATE(u32, smstate, 0x7e94));
	set_desc_base(&desc,        GET_SMSTATE(u32, smstate, 0x7e98));
	base3 =                     GET_SMSTATE(u32, smstate, 0x7e9c);
2612 2613
	ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR);

2614 2615
	dt.size =                   GET_SMSTATE(u32, smstate, 0x7e84);
	dt.address =                GET_SMSTATE(u64, smstate, 0x7e88);
2616 2617
	ctxt->ops->set_idt(ctxt, &dt);

2618 2619 2620 2621 2622
	selector =                  GET_SMSTATE(u32, smstate, 0x7e70);
	rsm_set_desc_flags(&desc,   GET_SMSTATE(u32, smstate, 0x7e72) << 8);
	set_desc_limit(&desc,       GET_SMSTATE(u32, smstate, 0x7e74));
	set_desc_base(&desc,        GET_SMSTATE(u32, smstate, 0x7e78));
	base3 =                     GET_SMSTATE(u32, smstate, 0x7e7c);
2623 2624
	ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR);

2625 2626
	dt.size =                   GET_SMSTATE(u32, smstate, 0x7e64);
	dt.address =                GET_SMSTATE(u64, smstate, 0x7e68);
2627 2628
	ctxt->ops->set_gdt(ctxt, &dt);

2629
	r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
2630 2631 2632
	if (r != X86EMUL_CONTINUE)
		return r;

2633
	for (i = 0; i < 6; i++) {
2634
		r = rsm_load_seg_64(ctxt, smstate, i);
2635 2636 2637 2638
		if (r != X86EMUL_CONTINUE)
			return r;
	}

2639
	return X86EMUL_CONTINUE;
2640
}
2641
#endif
2642

P
Paolo Bonzini 已提交
2643 2644
static int em_rsm(struct x86_emulate_ctxt *ctxt)
{
2645
	unsigned long cr0, cr4, efer;
2646
	char buf[512];
2647 2648 2649
	u64 smbase;
	int ret;

2650
	if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_MASK) == 0)
P
Paolo Bonzini 已提交
2651 2652
		return emulate_ud(ctxt);

2653 2654 2655 2656 2657 2658
	smbase = ctxt->ops->get_smbase(ctxt);

	ret = ctxt->ops->read_phys(ctxt, smbase + 0xfe00, buf, sizeof(buf));
	if (ret != X86EMUL_CONTINUE)
		return X86EMUL_UNHANDLEABLE;

2659 2660 2661 2662 2663 2664
	if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0)
		ctxt->ops->set_nmi_mask(ctxt, false);

	ctxt->ops->set_hflags(ctxt, ctxt->ops->get_hflags(ctxt) &
		~(X86EMUL_SMM_INSIDE_NMI_MASK | X86EMUL_SMM_MASK));

2665 2666
	/*
	 * Get back to real mode, to prepare a safe state in which to load
2667 2668
	 * CR0/CR3/CR4/EFER.  It's all a bit more complicated if the vCPU
	 * supports long mode.
2669
	 */
2670 2671 2672 2673
	if (emulator_has_longmode(ctxt)) {
		struct desc_struct cs_desc;

		/* Zero CR4.PCIDE before CR0.PG.  */
2674 2675
		cr4 = ctxt->ops->get_cr(ctxt, 4);
		if (cr4 & X86_CR4_PCIDE)
2676 2677 2678 2679 2680 2681 2682 2683 2684 2685
			ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);

		/* A 32-bit code segment is required to clear EFER.LMA.  */
		memset(&cs_desc, 0, sizeof(cs_desc));
		cs_desc.type = 0xb;
		cs_desc.s = cs_desc.g = cs_desc.p = 1;
		ctxt->ops->set_segment(ctxt, 0, &cs_desc, 0, VCPU_SREG_CS);
	}

	/* For the 64-bit case, this will clear EFER.LMA.  */
2686 2687 2688
	cr0 = ctxt->ops->get_cr(ctxt, 0);
	if (cr0 & X86_CR0_PE)
		ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
2689

2690 2691 2692 2693 2694 2695 2696 2697 2698 2699
	if (emulator_has_longmode(ctxt)) {
		/* Clear CR4.PAE before clearing EFER.LME. */
		cr4 = ctxt->ops->get_cr(ctxt, 4);
		if (cr4 & X86_CR4_PAE)
			ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);

		/* And finally go back to 32-bit mode.  */
		efer = 0;
		ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
	}
2700

2701 2702 2703 2704 2705
	/*
	 * Give pre_leave_smm() a chance to make ISA-specific changes to the
	 * vCPU state (e.g. enter guest mode) before loading state from the SMM
	 * state-save area.
	 */
2706
	if (ctxt->ops->pre_leave_smm(ctxt, buf))
2707 2708
		return X86EMUL_UNHANDLEABLE;

2709
#ifdef CONFIG_X86_64
2710
	if (emulator_has_longmode(ctxt))
2711
		ret = rsm_load_state_64(ctxt, buf);
2712
	else
2713
#endif
2714
		ret = rsm_load_state_32(ctxt, buf);
2715 2716 2717 2718 2719 2720

	if (ret != X86EMUL_CONTINUE) {
		/* FIXME: should triple fault */
		return X86EMUL_UNHANDLEABLE;
	}

2721 2722
	ctxt->ops->post_leave_smm(ctxt);

2723
	return X86EMUL_CONTINUE;
P
Paolo Bonzini 已提交
2724 2725
}

2726
static void
2727
setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
2728
			struct desc_struct *cs, struct desc_struct *ss)
2729 2730
{
	cs->l = 0;		/* will be adjusted later */
2731
	set_desc_base(cs, 0);	/* flat segment */
2732
	cs->g = 1;		/* 4kb granularity */
2733
	set_desc_limit(cs, 0xfffff);	/* 4GB limit */
2734 2735 2736
	cs->type = 0x0b;	/* Read, Execute, Accessed */
	cs->s = 1;
	cs->dpl = 0;		/* will be adjusted later */
2737 2738
	cs->p = 1;
	cs->d = 1;
2739
	cs->avl = 0;
2740

2741 2742
	set_desc_base(ss, 0);	/* flat segment */
	set_desc_limit(ss, 0xfffff);	/* 4GB limit */
2743 2744 2745
	ss->g = 1;		/* 4kb granularity */
	ss->s = 1;
	ss->type = 0x03;	/* Read/Write, Accessed */
2746
	ss->d = 1;		/* 32bit stack segment */
2747
	ss->dpl = 0;
2748
	ss->p = 1;
2749 2750
	ss->l = 0;
	ss->avl = 0;
2751 2752
}

2753 2754 2755 2756 2757
static bool vendor_intel(struct x86_emulate_ctxt *ctxt)
{
	u32 eax, ebx, ecx, edx;

	eax = ecx = 0;
2758
	ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true);
2759
	return is_guest_vendor_intel(ebx, ecx, edx);
2760 2761
}

2762 2763
static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt)
{
2764
	const struct x86_emulate_ops *ops = ctxt->ops;
2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775
	u32 eax, ebx, ecx, edx;

	/*
	 * syscall should always be enabled in longmode - so only become
	 * vendor specific (cpuid) if other modes are active...
	 */
	if (ctxt->mode == X86EMUL_MODE_PROT64)
		return true;

	eax = 0x00000000;
	ecx = 0x00000000;
2776
	ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true);
2777
	/*
2778 2779 2780 2781
	 * remark: Intel CPUs only support "syscall" in 64bit longmode. Also a
	 * 64bit guest with a 32bit compat-app running will #UD !! While this
	 * behaviour can be fixed (by emulating) into AMD response - CPUs of
	 * AMD can't behave like Intel.
2782
	 */
2783
	if (is_guest_vendor_intel(ebx, ecx, edx))
2784 2785
		return false;

2786 2787
	if (is_guest_vendor_amd(ebx, ecx, edx) ||
	    is_guest_vendor_hygon(ebx, ecx, edx))
2788 2789 2790 2791 2792 2793
		return true;

	/*
	 * default: (not Intel, not AMD, not Hygon), apply Intel's
	 * stricter rules...
	 */
2794 2795 2796
	return false;
}

2797
static int em_syscall(struct x86_emulate_ctxt *ctxt)
2798
{
2799
	const struct x86_emulate_ops *ops = ctxt->ops;
2800
	struct desc_struct cs, ss;
2801
	u64 msr_data;
2802
	u16 cs_sel, ss_sel;
2803
	u64 efer = 0;
2804 2805

	/* syscall is not available in real mode */
2806
	if (ctxt->mode == X86EMUL_MODE_REAL ||
2807 2808
	    ctxt->mode == X86EMUL_MODE_VM86)
		return emulate_ud(ctxt);
2809

2810 2811 2812
	if (!(em_syscall_is_enabled(ctxt)))
		return emulate_ud(ctxt);

2813
	ops->get_msr(ctxt, MSR_EFER, &efer);
2814 2815 2816
	if (!(efer & EFER_SCE))
		return emulate_ud(ctxt);

2817
	setup_syscalls_segments(ctxt, &cs, &ss);
2818
	ops->get_msr(ctxt, MSR_STAR, &msr_data);
2819
	msr_data >>= 32;
2820 2821
	cs_sel = (u16)(msr_data & 0xfffc);
	ss_sel = (u16)(msr_data + 8);
2822

2823
	if (efer & EFER_LMA) {
2824
		cs.d = 0;
2825 2826
		cs.l = 1;
	}
2827 2828
	ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
	ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2829

2830
	*reg_write(ctxt, VCPU_REGS_RCX) = ctxt->_eip;
2831
	if (efer & EFER_LMA) {
2832
#ifdef CONFIG_X86_64
2833
		*reg_write(ctxt, VCPU_REGS_R11) = ctxt->eflags;
2834

2835
		ops->get_msr(ctxt,
2836 2837
			     ctxt->mode == X86EMUL_MODE_PROT64 ?
			     MSR_LSTAR : MSR_CSTAR, &msr_data);
2838
		ctxt->_eip = msr_data;
2839

2840
		ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data);
2841
		ctxt->eflags &= ~msr_data;
W
Wanpeng Li 已提交
2842
		ctxt->eflags |= X86_EFLAGS_FIXED;
2843 2844 2845
#endif
	} else {
		/* legacy mode */
2846
		ops->get_msr(ctxt, MSR_STAR, &msr_data);
2847
		ctxt->_eip = (u32)msr_data;
2848

2849
		ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
2850 2851
	}

2852
	ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
2853
	return X86EMUL_CONTINUE;
2854 2855
}

2856
static int em_sysenter(struct x86_emulate_ctxt *ctxt)
2857
{
2858
	const struct x86_emulate_ops *ops = ctxt->ops;
2859
	struct desc_struct cs, ss;
2860
	u64 msr_data;
2861
	u16 cs_sel, ss_sel;
2862
	u64 efer = 0;
2863

2864
	ops->get_msr(ctxt, MSR_EFER, &efer);
2865
	/* inject #GP if in real mode */
2866 2867
	if (ctxt->mode == X86EMUL_MODE_REAL)
		return emulate_gp(ctxt, 0);
2868

2869 2870 2871 2872
	/*
	 * Not recognized on AMD in compat mode (but is recognized in legacy
	 * mode).
	 */
2873
	if ((ctxt->mode != X86EMUL_MODE_PROT64) && (efer & EFER_LMA)
2874 2875 2876
	    && !vendor_intel(ctxt))
		return emulate_ud(ctxt);

2877
	/* sysenter/sysexit have not been tested in 64bit mode. */
2878
	if (ctxt->mode == X86EMUL_MODE_PROT64)
2879
		return X86EMUL_UNHANDLEABLE;
2880

2881
	ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
2882 2883
	if ((msr_data & 0xfffc) == 0x0)
		return emulate_gp(ctxt, 0);
2884

2885
	setup_syscalls_segments(ctxt, &cs, &ss);
2886
	ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
2887
	cs_sel = (u16)msr_data & ~SEGMENT_RPL_MASK;
2888
	ss_sel = cs_sel + 8;
2889
	if (efer & EFER_LMA) {
2890
		cs.d = 0;
2891 2892 2893
		cs.l = 1;
	}

2894 2895
	ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
	ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2896

2897
	ops->get_msr(ctxt, MSR_IA32_SYSENTER_EIP, &msr_data);
2898
	ctxt->_eip = (efer & EFER_LMA) ? msr_data : (u32)msr_data;
2899

2900
	ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data);
2901 2902
	*reg_write(ctxt, VCPU_REGS_RSP) = (efer & EFER_LMA) ? msr_data :
							      (u32)msr_data;
2903 2904
	if (efer & EFER_LMA)
		ctxt->mode = X86EMUL_MODE_PROT64;
2905

2906
	return X86EMUL_CONTINUE;
2907 2908
}

2909
static int em_sysexit(struct x86_emulate_ctxt *ctxt)
2910
{
2911
	const struct x86_emulate_ops *ops = ctxt->ops;
2912
	struct desc_struct cs, ss;
2913
	u64 msr_data, rcx, rdx;
2914
	int usermode;
X
Xiao Guangrong 已提交
2915
	u16 cs_sel = 0, ss_sel = 0;
2916

2917 2918
	/* inject #GP if in real mode or Virtual 8086 mode */
	if (ctxt->mode == X86EMUL_MODE_REAL ||
2919 2920
	    ctxt->mode == X86EMUL_MODE_VM86)
		return emulate_gp(ctxt, 0);
2921

2922
	setup_syscalls_segments(ctxt, &cs, &ss);
2923

2924
	if ((ctxt->rex_prefix & 0x8) != 0x0)
2925 2926 2927 2928
		usermode = X86EMUL_MODE_PROT64;
	else
		usermode = X86EMUL_MODE_PROT32;

2929 2930 2931
	rcx = reg_read(ctxt, VCPU_REGS_RCX);
	rdx = reg_read(ctxt, VCPU_REGS_RDX);

2932 2933
	cs.dpl = 3;
	ss.dpl = 3;
2934
	ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
2935 2936
	switch (usermode) {
	case X86EMUL_MODE_PROT32:
2937
		cs_sel = (u16)(msr_data + 16);
2938 2939
		if ((msr_data & 0xfffc) == 0x0)
			return emulate_gp(ctxt, 0);
2940
		ss_sel = (u16)(msr_data + 24);
2941 2942
		rcx = (u32)rcx;
		rdx = (u32)rdx;
2943 2944
		break;
	case X86EMUL_MODE_PROT64:
2945
		cs_sel = (u16)(msr_data + 32);
2946 2947
		if (msr_data == 0x0)
			return emulate_gp(ctxt, 0);
2948 2949
		ss_sel = cs_sel + 8;
		cs.d = 0;
2950
		cs.l = 1;
2951 2952
		if (emul_is_noncanonical_address(rcx, ctxt) ||
		    emul_is_noncanonical_address(rdx, ctxt))
2953
			return emulate_gp(ctxt, 0);
2954 2955
		break;
	}
2956 2957
	cs_sel |= SEGMENT_RPL_MASK;
	ss_sel |= SEGMENT_RPL_MASK;
2958

2959 2960
	ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
	ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2961

2962 2963
	ctxt->_eip = rdx;
	*reg_write(ctxt, VCPU_REGS_RSP) = rcx;
2964

2965
	return X86EMUL_CONTINUE;
2966 2967
}

2968
static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt)
2969 2970 2971 2972 2973 2974
{
	int iopl;
	if (ctxt->mode == X86EMUL_MODE_REAL)
		return false;
	if (ctxt->mode == X86EMUL_MODE_VM86)
		return true;
2975
	iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
2976
	return ctxt->ops->cpl(ctxt) > iopl;
2977 2978
}

2979 2980 2981
#define VMWARE_PORT_VMPORT	(0x5658)
#define VMWARE_PORT_VMRPC	(0x5659)

2982 2983 2984
static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
					    u16 port, u16 len)
{
2985
	const struct x86_emulate_ops *ops = ctxt->ops;
2986
	struct desc_struct tr_seg;
2987
	u32 base3;
2988
	int r;
2989
	u16 tr, io_bitmap_ptr, perm, bit_idx = port & 0x7;
2990
	unsigned mask = (1 << len) - 1;
2991
	unsigned long base;
2992

2993 2994 2995 2996 2997 2998 2999 3000
	/*
	 * VMware allows access to these ports even if denied
	 * by TSS I/O permission bitmap. Mimic behavior.
	 */
	if (enable_vmware_backdoor &&
	    ((port == VMWARE_PORT_VMPORT) || (port == VMWARE_PORT_VMRPC)))
		return true;

3001
	ops->get_segment(ctxt, &tr, &tr_seg, &base3, VCPU_SREG_TR);
3002
	if (!tr_seg.p)
3003
		return false;
3004
	if (desc_limit_scaled(&tr_seg) < 103)
3005
		return false;
3006 3007 3008 3009
	base = get_desc_base(&tr_seg);
#ifdef CONFIG_X86_64
	base |= ((u64)base3) << 32;
#endif
3010
	r = ops->read_std(ctxt, base + 102, &io_bitmap_ptr, 2, NULL, true);
3011 3012
	if (r != X86EMUL_CONTINUE)
		return false;
3013
	if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg))
3014
		return false;
3015
	r = ops->read_std(ctxt, base + io_bitmap_ptr + port/8, &perm, 2, NULL, true);
3016 3017 3018 3019 3020 3021 3022 3023 3024 3025
	if (r != X86EMUL_CONTINUE)
		return false;
	if ((perm >> bit_idx) & mask)
		return false;
	return true;
}

static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt,
				 u16 port, u16 len)
{
3026 3027 3028
	if (ctxt->perm_ok)
		return true;

3029 3030
	if (emulator_bad_iopl(ctxt))
		if (!emulator_io_port_access_allowed(ctxt, port, len))
3031
			return false;
3032 3033 3034

	ctxt->perm_ok = true;

3035 3036 3037
	return true;
}

3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053
static void string_registers_quirk(struct x86_emulate_ctxt *ctxt)
{
	/*
	 * Intel CPUs mask the counter and pointers in quite strange
	 * manner when ECX is zero due to REP-string optimizations.
	 */
#ifdef CONFIG_X86_64
	if (ctxt->ad_bytes != 4 || !vendor_intel(ctxt))
		return;

	*reg_write(ctxt, VCPU_REGS_RCX) = 0;

	switch (ctxt->b) {
	case 0xa4:	/* movsb */
	case 0xa5:	/* movsd/w */
		*reg_rmw(ctxt, VCPU_REGS_RSI) &= (u32)-1;
3054
		fallthrough;
3055 3056 3057 3058 3059 3060 3061
	case 0xaa:	/* stosb */
	case 0xab:	/* stosd/w */
		*reg_rmw(ctxt, VCPU_REGS_RDI) &= (u32)-1;
	}
#endif
}

3062 3063 3064
static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt,
				struct tss_segment_16 *tss)
{
3065
	tss->ip = ctxt->_eip;
3066
	tss->flag = ctxt->eflags;
3067 3068 3069 3070 3071 3072 3073 3074
	tss->ax = reg_read(ctxt, VCPU_REGS_RAX);
	tss->cx = reg_read(ctxt, VCPU_REGS_RCX);
	tss->dx = reg_read(ctxt, VCPU_REGS_RDX);
	tss->bx = reg_read(ctxt, VCPU_REGS_RBX);
	tss->sp = reg_read(ctxt, VCPU_REGS_RSP);
	tss->bp = reg_read(ctxt, VCPU_REGS_RBP);
	tss->si = reg_read(ctxt, VCPU_REGS_RSI);
	tss->di = reg_read(ctxt, VCPU_REGS_RDI);
3075

3076 3077 3078 3079 3080
	tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
	tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
	tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
	tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
	tss->ldt = get_segment_selector(ctxt, VCPU_SREG_LDTR);
3081 3082 3083 3084 3085 3086
}

static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
				 struct tss_segment_16 *tss)
{
	int ret;
3087
	u8 cpl;
3088

3089
	ctxt->_eip = tss->ip;
3090
	ctxt->eflags = tss->flag | 2;
3091 3092 3093 3094 3095 3096 3097 3098
	*reg_write(ctxt, VCPU_REGS_RAX) = tss->ax;
	*reg_write(ctxt, VCPU_REGS_RCX) = tss->cx;
	*reg_write(ctxt, VCPU_REGS_RDX) = tss->dx;
	*reg_write(ctxt, VCPU_REGS_RBX) = tss->bx;
	*reg_write(ctxt, VCPU_REGS_RSP) = tss->sp;
	*reg_write(ctxt, VCPU_REGS_RBP) = tss->bp;
	*reg_write(ctxt, VCPU_REGS_RSI) = tss->si;
	*reg_write(ctxt, VCPU_REGS_RDI) = tss->di;
3099 3100 3101 3102 3103

	/*
	 * SDM says that segment selectors are loaded before segment
	 * descriptors
	 */
3104 3105 3106 3107 3108
	set_segment_selector(ctxt, tss->ldt, VCPU_SREG_LDTR);
	set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
	set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
	set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
	set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
3109

3110 3111
	cpl = tss->cs & 3;

3112
	/*
G
Guo Chao 已提交
3113
	 * Now load segment descriptors. If fault happens at this stage
3114 3115
	 * it is handled in a context of new task
	 */
3116
	ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl,
3117
					X86_TRANSFER_TASK_SWITCH, NULL);
3118 3119
	if (ret != X86EMUL_CONTINUE)
		return ret;
3120
	ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
3121
					X86_TRANSFER_TASK_SWITCH, NULL);
3122 3123
	if (ret != X86EMUL_CONTINUE)
		return ret;
3124
	ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
3125
					X86_TRANSFER_TASK_SWITCH, NULL);
3126 3127
	if (ret != X86EMUL_CONTINUE)
		return ret;
3128
	ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
3129
					X86_TRANSFER_TASK_SWITCH, NULL);
3130 3131
	if (ret != X86EMUL_CONTINUE)
		return ret;
3132
	ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
3133
					X86_TRANSFER_TASK_SWITCH, NULL);
3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145
	if (ret != X86EMUL_CONTINUE)
		return ret;

	return X86EMUL_CONTINUE;
}

static int task_switch_16(struct x86_emulate_ctxt *ctxt,
			  u16 tss_selector, u16 old_tss_sel,
			  ulong old_tss_base, struct desc_struct *new_desc)
{
	struct tss_segment_16 tss_seg;
	int ret;
3146
	u32 new_tss_base = get_desc_base(new_desc);
3147

3148
	ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg));
3149
	if (ret != X86EMUL_CONTINUE)
3150 3151
		return ret;

3152
	save_state_to_tss16(ctxt, &tss_seg);
3153

3154
	ret = linear_write_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg));
3155
	if (ret != X86EMUL_CONTINUE)
3156 3157
		return ret;

3158
	ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof(tss_seg));
3159
	if (ret != X86EMUL_CONTINUE)
3160 3161 3162 3163 3164
		return ret;

	if (old_tss_sel != 0xffff) {
		tss_seg.prev_task_link = old_tss_sel;

3165 3166
		ret = linear_write_system(ctxt, new_tss_base,
					  &tss_seg.prev_task_link,
3167
					  sizeof(tss_seg.prev_task_link));
3168
		if (ret != X86EMUL_CONTINUE)
3169 3170 3171
			return ret;
	}

3172
	return load_state_from_tss16(ctxt, &tss_seg);
3173 3174 3175 3176 3177
}

static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
				struct tss_segment_32 *tss)
{
3178
	/* CR3 and ldt selector are not saved intentionally */
3179
	tss->eip = ctxt->_eip;
3180
	tss->eflags = ctxt->eflags;
3181 3182 3183 3184 3185 3186 3187 3188
	tss->eax = reg_read(ctxt, VCPU_REGS_RAX);
	tss->ecx = reg_read(ctxt, VCPU_REGS_RCX);
	tss->edx = reg_read(ctxt, VCPU_REGS_RDX);
	tss->ebx = reg_read(ctxt, VCPU_REGS_RBX);
	tss->esp = reg_read(ctxt, VCPU_REGS_RSP);
	tss->ebp = reg_read(ctxt, VCPU_REGS_RBP);
	tss->esi = reg_read(ctxt, VCPU_REGS_RSI);
	tss->edi = reg_read(ctxt, VCPU_REGS_RDI);
3189

3190 3191 3192 3193 3194 3195
	tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
	tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
	tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
	tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
	tss->fs = get_segment_selector(ctxt, VCPU_SREG_FS);
	tss->gs = get_segment_selector(ctxt, VCPU_SREG_GS);
3196 3197 3198 3199 3200 3201
}

static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
				 struct tss_segment_32 *tss)
{
	int ret;
3202
	u8 cpl;
3203

3204
	if (ctxt->ops->set_cr(ctxt, 3, tss->cr3))
3205
		return emulate_gp(ctxt, 0);
3206
	ctxt->_eip = tss->eip;
3207
	ctxt->eflags = tss->eflags | 2;
3208 3209

	/* General purpose registers */
3210 3211 3212 3213 3214 3215 3216 3217
	*reg_write(ctxt, VCPU_REGS_RAX) = tss->eax;
	*reg_write(ctxt, VCPU_REGS_RCX) = tss->ecx;
	*reg_write(ctxt, VCPU_REGS_RDX) = tss->edx;
	*reg_write(ctxt, VCPU_REGS_RBX) = tss->ebx;
	*reg_write(ctxt, VCPU_REGS_RSP) = tss->esp;
	*reg_write(ctxt, VCPU_REGS_RBP) = tss->ebp;
	*reg_write(ctxt, VCPU_REGS_RSI) = tss->esi;
	*reg_write(ctxt, VCPU_REGS_RDI) = tss->edi;
3218 3219 3220

	/*
	 * SDM says that segment selectors are loaded before segment
3221 3222
	 * descriptors.  This is important because CPL checks will
	 * use CS.RPL.
3223
	 */
3224 3225 3226 3227 3228 3229 3230
	set_segment_selector(ctxt, tss->ldt_selector, VCPU_SREG_LDTR);
	set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
	set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
	set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
	set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
	set_segment_selector(ctxt, tss->fs, VCPU_SREG_FS);
	set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS);
3231

3232 3233 3234 3235 3236
	/*
	 * If we're switching between Protected Mode and VM86, we need to make
	 * sure to update the mode before loading the segment descriptors so
	 * that the selectors are interpreted correctly.
	 */
3237
	if (ctxt->eflags & X86_EFLAGS_VM) {
3238
		ctxt->mode = X86EMUL_MODE_VM86;
3239 3240
		cpl = 3;
	} else {
3241
		ctxt->mode = X86EMUL_MODE_PROT32;
3242 3243
		cpl = tss->cs & 3;
	}
3244

3245 3246 3247 3248
	/*
	 * Now load segment descriptors. If fault happenes at this stage
	 * it is handled in a context of new task
	 */
3249
	ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR,
3250
					cpl, X86_TRANSFER_TASK_SWITCH, NULL);
3251 3252
	if (ret != X86EMUL_CONTINUE)
		return ret;
3253
	ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
3254
					X86_TRANSFER_TASK_SWITCH, NULL);
3255 3256
	if (ret != X86EMUL_CONTINUE)
		return ret;
3257
	ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
3258
					X86_TRANSFER_TASK_SWITCH, NULL);
3259 3260
	if (ret != X86EMUL_CONTINUE)
		return ret;
3261
	ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
3262
					X86_TRANSFER_TASK_SWITCH, NULL);
3263 3264
	if (ret != X86EMUL_CONTINUE)
		return ret;
3265
	ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
3266
					X86_TRANSFER_TASK_SWITCH, NULL);
3267 3268
	if (ret != X86EMUL_CONTINUE)
		return ret;
3269
	ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl,
3270
					X86_TRANSFER_TASK_SWITCH, NULL);
3271 3272
	if (ret != X86EMUL_CONTINUE)
		return ret;
3273
	ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl,
3274
					X86_TRANSFER_TASK_SWITCH, NULL);
3275

3276
	return ret;
3277 3278 3279 3280 3281 3282 3283 3284
}

static int task_switch_32(struct x86_emulate_ctxt *ctxt,
			  u16 tss_selector, u16 old_tss_sel,
			  ulong old_tss_base, struct desc_struct *new_desc)
{
	struct tss_segment_32 tss_seg;
	int ret;
3285
	u32 new_tss_base = get_desc_base(new_desc);
3286 3287
	u32 eip_offset = offsetof(struct tss_segment_32, eip);
	u32 ldt_sel_offset = offsetof(struct tss_segment_32, ldt_selector);
3288

3289
	ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg));
3290
	if (ret != X86EMUL_CONTINUE)
3291 3292
		return ret;

3293
	save_state_to_tss32(ctxt, &tss_seg);
3294

3295
	/* Only GP registers and segment selectors are saved */
3296 3297
	ret = linear_write_system(ctxt, old_tss_base + eip_offset, &tss_seg.eip,
				  ldt_sel_offset - eip_offset);
3298
	if (ret != X86EMUL_CONTINUE)
3299 3300
		return ret;

3301
	ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof(tss_seg));
3302
	if (ret != X86EMUL_CONTINUE)
3303 3304 3305 3306 3307
		return ret;

	if (old_tss_sel != 0xffff) {
		tss_seg.prev_task_link = old_tss_sel;

3308 3309
		ret = linear_write_system(ctxt, new_tss_base,
					  &tss_seg.prev_task_link,
3310
					  sizeof(tss_seg.prev_task_link));
3311
		if (ret != X86EMUL_CONTINUE)
3312 3313 3314
			return ret;
	}

3315
	return load_state_from_tss32(ctxt, &tss_seg);
3316 3317 3318
}

static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
3319
				   u16 tss_selector, int idt_index, int reason,
3320
				   bool has_error_code, u32 error_code)
3321
{
3322
	const struct x86_emulate_ops *ops = ctxt->ops;
3323 3324
	struct desc_struct curr_tss_desc, next_tss_desc;
	int ret;
3325
	u16 old_tss_sel = get_segment_selector(ctxt, VCPU_SREG_TR);
3326
	ulong old_tss_base =
3327
		ops->get_cached_segment_base(ctxt, VCPU_SREG_TR);
3328
	u32 desc_limit;
3329
	ulong desc_addr, dr7;
3330 3331 3332

	/* FIXME: old_tss_base == ~0 ? */

3333
	ret = read_segment_descriptor(ctxt, tss_selector, &next_tss_desc, &desc_addr);
3334 3335
	if (ret != X86EMUL_CONTINUE)
		return ret;
3336
	ret = read_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc, &desc_addr);
3337 3338 3339 3340 3341
	if (ret != X86EMUL_CONTINUE)
		return ret;

	/* FIXME: check that next_tss_desc is tss */

3342 3343 3344 3345 3346
	/*
	 * Check privileges. The three cases are task switch caused by...
	 *
	 * 1. jmp/call/int to task gate: Check against DPL of the task gate
	 * 2. Exception/IRQ/iret: No check is performed
3347 3348
	 * 3. jmp/call to TSS/task-gate: No check is performed since the
	 *    hardware checks it before exiting.
3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364
	 */
	if (reason == TASK_SWITCH_GATE) {
		if (idt_index != -1) {
			/* Software interrupts */
			struct desc_struct task_gate_desc;
			int dpl;

			ret = read_interrupt_descriptor(ctxt, idt_index,
							&task_gate_desc);
			if (ret != X86EMUL_CONTINUE)
				return ret;

			dpl = task_gate_desc.dpl;
			if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl)
				return emulate_gp(ctxt, (idt_index << 3) | 0x2);
		}
3365 3366
	}

3367 3368 3369 3370
	desc_limit = desc_limit_scaled(&next_tss_desc);
	if (!next_tss_desc.p ||
	    ((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
	     desc_limit < 0x2b)) {
3371
		return emulate_ts(ctxt, tss_selector & 0xfffc);
3372 3373 3374 3375
	}

	if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
		curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */
3376
		write_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc);
3377 3378 3379 3380 3381 3382
	}

	if (reason == TASK_SWITCH_IRET)
		ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT;

	/* set back link to prev task only if NT bit is set in eflags
G
Guo Chao 已提交
3383
	   note that old_tss_sel is not used after this point */
3384 3385 3386 3387
	if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
		old_tss_sel = 0xffff;

	if (next_tss_desc.type & 8)
3388
		ret = task_switch_32(ctxt, tss_selector, old_tss_sel,
3389 3390
				     old_tss_base, &next_tss_desc);
	else
3391
		ret = task_switch_16(ctxt, tss_selector, old_tss_sel,
3392
				     old_tss_base, &next_tss_desc);
3393 3394
	if (ret != X86EMUL_CONTINUE)
		return ret;
3395 3396 3397 3398 3399 3400

	if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE)
		ctxt->eflags = ctxt->eflags | X86_EFLAGS_NT;

	if (reason != TASK_SWITCH_IRET) {
		next_tss_desc.type |= (1 << 1); /* set busy flag */
3401
		write_segment_descriptor(ctxt, tss_selector, &next_tss_desc);
3402 3403
	}

3404
	ops->set_cr(ctxt, 0,  ops->get_cr(ctxt, 0) | X86_CR0_TS);
3405
	ops->set_segment(ctxt, tss_selector, &next_tss_desc, 0, VCPU_SREG_TR);
3406

3407
	if (has_error_code) {
3408 3409 3410
		ctxt->op_bytes = ctxt->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2;
		ctxt->lock_prefix = 0;
		ctxt->src.val = (unsigned long) error_code;
3411
		ret = em_push(ctxt);
3412 3413
	}

3414 3415 3416
	ops->get_dr(ctxt, 7, &dr7);
	ops->set_dr(ctxt, 7, dr7 & ~(DR_LOCAL_ENABLE_MASK | DR_LOCAL_SLOWDOWN));

3417 3418 3419 3420
	return ret;
}

int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
3421
			 u16 tss_selector, int idt_index, int reason,
3422
			 bool has_error_code, u32 error_code)
3423 3424 3425
{
	int rc;

3426
	invalidate_registers(ctxt);
3427 3428
	ctxt->_eip = ctxt->eip;
	ctxt->dst.type = OP_NONE;
3429

3430
	rc = emulator_do_task_switch(ctxt, tss_selector, idt_index, reason,
3431
				     has_error_code, error_code);
3432

3433
	if (rc == X86EMUL_CONTINUE) {
3434
		ctxt->eip = ctxt->_eip;
3435 3436
		writeback_registers(ctxt);
	}
3437

3438
	return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
3439 3440
}

3441 3442
static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
		struct operand *op)
3443
{
3444
	int df = (ctxt->eflags & X86_EFLAGS_DF) ? -op->count : op->count;
3445

3446 3447
	register_address_increment(ctxt, reg, df * op->bytes);
	op->addr.mem.ea = register_address(ctxt, reg);
3448 3449
}

3450 3451 3452 3453 3454 3455
static int em_das(struct x86_emulate_ctxt *ctxt)
{
	u8 al, old_al;
	bool af, cf, old_cf;

	cf = ctxt->eflags & X86_EFLAGS_CF;
3456
	al = ctxt->dst.val;
3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473

	old_al = al;
	old_cf = cf;
	cf = false;
	af = ctxt->eflags & X86_EFLAGS_AF;
	if ((al & 0x0f) > 9 || af) {
		al -= 6;
		cf = old_cf | (al >= 250);
		af = true;
	} else {
		af = false;
	}
	if (old_al > 0x99 || old_cf) {
		al -= 0x60;
		cf = true;
	}

3474
	ctxt->dst.val = al;
3475
	/* Set PF, ZF, SF */
3476 3477 3478
	ctxt->src.type = OP_IMM;
	ctxt->src.val = 0;
	ctxt->src.bytes = 1;
3479
	fastop(ctxt, em_or);
3480 3481 3482 3483 3484 3485 3486 3487
	ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF);
	if (cf)
		ctxt->eflags |= X86_EFLAGS_CF;
	if (af)
		ctxt->eflags |= X86_EFLAGS_AF;
	return X86EMUL_CONTINUE;
}

P
Paolo Bonzini 已提交
3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509
static int em_aam(struct x86_emulate_ctxt *ctxt)
{
	u8 al, ah;

	if (ctxt->src.val == 0)
		return emulate_de(ctxt);

	al = ctxt->dst.val & 0xff;
	ah = al / ctxt->src.val;
	al %= ctxt->src.val;

	ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al | (ah << 8);

	/* Set PF, ZF, SF */
	ctxt->src.type = OP_IMM;
	ctxt->src.val = 0;
	ctxt->src.bytes = 1;
	fastop(ctxt, em_or);

	return X86EMUL_CONTINUE;
}

3510 3511 3512 3513 3514 3515 3516 3517 3518
static int em_aad(struct x86_emulate_ctxt *ctxt)
{
	u8 al = ctxt->dst.val & 0xff;
	u8 ah = (ctxt->dst.val >> 8) & 0xff;

	al = (al + (ah * ctxt->src.val)) & 0xff;

	ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al;

3519 3520 3521 3522 3523
	/* Set PF, ZF, SF */
	ctxt->src.type = OP_IMM;
	ctxt->src.val = 0;
	ctxt->src.bytes = 1;
	fastop(ctxt, em_or);
3524 3525 3526 3527

	return X86EMUL_CONTINUE;
}

3528 3529
static int em_call(struct x86_emulate_ctxt *ctxt)
{
3530
	int rc;
3531 3532 3533
	long rel = ctxt->src.val;

	ctxt->src.val = (unsigned long)ctxt->_eip;
3534 3535 3536
	rc = jmp_rel(ctxt, rel);
	if (rc != X86EMUL_CONTINUE)
		return rc;
3537 3538 3539
	return em_push(ctxt);
}

3540 3541 3542 3543 3544
static int em_call_far(struct x86_emulate_ctxt *ctxt)
{
	u16 sel, old_cs;
	ulong old_eip;
	int rc;
3545 3546 3547
	struct desc_struct old_desc, new_desc;
	const struct x86_emulate_ops *ops = ctxt->ops;
	int cpl = ctxt->ops->cpl(ctxt);
3548
	enum x86emul_mode prev_mode = ctxt->mode;
3549

3550
	old_eip = ctxt->_eip;
3551
	ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS);
3552

3553
	memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
3554 3555
	rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
				       X86_TRANSFER_CALL_JMP, &new_desc);
3556
	if (rc != X86EMUL_CONTINUE)
3557
		return rc;
3558

3559
	rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
3560 3561
	if (rc != X86EMUL_CONTINUE)
		goto fail;
3562

3563
	ctxt->src.val = old_cs;
3564
	rc = em_push(ctxt);
3565
	if (rc != X86EMUL_CONTINUE)
3566
		goto fail;
3567

3568
	ctxt->src.val = old_eip;
3569 3570 3571
	rc = em_push(ctxt);
	/* If we failed, we tainted the memory, but the very least we should
	   restore cs */
3572 3573
	if (rc != X86EMUL_CONTINUE) {
		pr_warn_once("faulting far call emulation tainted memory\n");
3574
		goto fail;
3575
	}
3576 3577 3578
	return rc;
fail:
	ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
3579
	ctxt->mode = prev_mode;
3580 3581
	return rc;

3582 3583
}

3584 3585 3586
static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
{
	int rc;
3587
	unsigned long eip;
3588

3589 3590 3591 3592
	rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
	if (rc != X86EMUL_CONTINUE)
		return rc;
	rc = assign_eip_near(ctxt, eip);
3593 3594
	if (rc != X86EMUL_CONTINUE)
		return rc;
3595
	rsp_increment(ctxt, ctxt->src.val);
3596 3597 3598
	return X86EMUL_CONTINUE;
}

3599 3600 3601
static int em_xchg(struct x86_emulate_ctxt *ctxt)
{
	/* Write back the register source. */
3602 3603
	ctxt->src.val = ctxt->dst.val;
	write_register_operand(&ctxt->src);
3604 3605

	/* Write back the memory destination with implicit LOCK prefix. */
3606 3607
	ctxt->dst.val = ctxt->src.orig_val;
	ctxt->lock_prefix = 1;
3608 3609 3610
	return X86EMUL_CONTINUE;
}

3611 3612
static int em_imul_3op(struct x86_emulate_ctxt *ctxt)
{
3613
	ctxt->dst.val = ctxt->src2.val;
3614
	return fastop(ctxt, em_imul);
3615 3616
}

3617 3618
static int em_cwd(struct x86_emulate_ctxt *ctxt)
{
3619 3620
	ctxt->dst.type = OP_REG;
	ctxt->dst.bytes = ctxt->src.bytes;
3621
	ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
3622
	ctxt->dst.val = ~((ctxt->src.val >> (ctxt->src.bytes * 8 - 1)) - 1);
3623 3624 3625 3626

	return X86EMUL_CONTINUE;
}

P
Paolo Bonzini 已提交
3627 3628 3629 3630
static int em_rdpid(struct x86_emulate_ctxt *ctxt)
{
	u64 tsc_aux = 0;

3631
	if (!ctxt->ops->guest_has_rdpid(ctxt))
3632
		return emulate_ud(ctxt);
3633 3634

	ctxt->ops->get_msr(ctxt, MSR_TSC_AUX, &tsc_aux);
P
Paolo Bonzini 已提交
3635 3636 3637 3638
	ctxt->dst.val = tsc_aux;
	return X86EMUL_CONTINUE;
}

3639 3640 3641 3642
static int em_rdtsc(struct x86_emulate_ctxt *ctxt)
{
	u64 tsc = 0;

3643
	ctxt->ops->get_msr(ctxt, MSR_IA32_TSC, &tsc);
3644 3645
	*reg_write(ctxt, VCPU_REGS_RAX) = (u32)tsc;
	*reg_write(ctxt, VCPU_REGS_RDX) = tsc >> 32;
3646 3647 3648
	return X86EMUL_CONTINUE;
}

3649 3650 3651 3652
static int em_rdpmc(struct x86_emulate_ctxt *ctxt)
{
	u64 pmc;

3653
	if (ctxt->ops->read_pmc(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &pmc))
3654
		return emulate_gp(ctxt, 0);
3655 3656
	*reg_write(ctxt, VCPU_REGS_RAX) = (u32)pmc;
	*reg_write(ctxt, VCPU_REGS_RDX) = pmc >> 32;
3657 3658 3659
	return X86EMUL_CONTINUE;
}

3660 3661
static int em_mov(struct x86_emulate_ctxt *ctxt)
{
3662
	memcpy(ctxt->dst.valptr, ctxt->src.valptr, sizeof(ctxt->src.valptr));
3663 3664 3665
	return X86EMUL_CONTINUE;
}

B
Borislav Petkov 已提交
3666 3667 3668 3669
static int em_movbe(struct x86_emulate_ctxt *ctxt)
{
	u16 tmp;

3670
	if (!ctxt->ops->guest_has_movbe(ctxt))
B
Borislav Petkov 已提交
3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693
		return emulate_ud(ctxt);

	switch (ctxt->op_bytes) {
	case 2:
		/*
		 * From MOVBE definition: "...When the operand size is 16 bits,
		 * the upper word of the destination register remains unchanged
		 * ..."
		 *
		 * Both casting ->valptr and ->val to u16 breaks strict aliasing
		 * rules so we have to do the operation almost per hand.
		 */
		tmp = (u16)ctxt->src.val;
		ctxt->dst.val &= ~0xffffUL;
		ctxt->dst.val |= (unsigned long)swab16(tmp);
		break;
	case 4:
		ctxt->dst.val = swab32((u32)ctxt->src.val);
		break;
	case 8:
		ctxt->dst.val = swab64(ctxt->src.val);
		break;
	default:
3694
		BUG();
B
Borislav Petkov 已提交
3695 3696 3697 3698
	}
	return X86EMUL_CONTINUE;
}

3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726
static int em_cr_write(struct x86_emulate_ctxt *ctxt)
{
	if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val))
		return emulate_gp(ctxt, 0);

	/* Disable writeback. */
	ctxt->dst.type = OP_NONE;
	return X86EMUL_CONTINUE;
}

static int em_dr_write(struct x86_emulate_ctxt *ctxt)
{
	unsigned long val;

	if (ctxt->mode == X86EMUL_MODE_PROT64)
		val = ctxt->src.val & ~0ULL;
	else
		val = ctxt->src.val & ~0U;

	/* #UD condition is already handled. */
	if (ctxt->ops->set_dr(ctxt, ctxt->modrm_reg, val) < 0)
		return emulate_gp(ctxt, 0);

	/* Disable writeback. */
	ctxt->dst.type = OP_NONE;
	return X86EMUL_CONTINUE;
}

3727 3728
static int em_wrmsr(struct x86_emulate_ctxt *ctxt)
{
3729
	u64 msr_index = reg_read(ctxt, VCPU_REGS_RCX);
3730
	u64 msr_data;
3731
	int r;
3732

3733 3734
	msr_data = (u32)reg_read(ctxt, VCPU_REGS_RAX)
		| ((u64)reg_read(ctxt, VCPU_REGS_RDX) << 32);
3735 3736 3737 3738 3739
	r = ctxt->ops->set_msr(ctxt, msr_index, msr_data);

	if (r == X86EMUL_IO_NEEDED)
		return r;

3740
	if (r > 0)
3741 3742
		return emulate_gp(ctxt, 0);

3743
	return r < 0 ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE;
3744 3745 3746 3747
}

static int em_rdmsr(struct x86_emulate_ctxt *ctxt)
{
3748
	u64 msr_index = reg_read(ctxt, VCPU_REGS_RCX);
3749
	u64 msr_data;
3750 3751 3752 3753 3754 3755
	int r;

	r = ctxt->ops->get_msr(ctxt, msr_index, &msr_data);

	if (r == X86EMUL_IO_NEEDED)
		return r;
3756

3757
	if (r)
3758 3759
		return emulate_gp(ctxt, 0);

3760 3761
	*reg_write(ctxt, VCPU_REGS_RAX) = (u32)msr_data;
	*reg_write(ctxt, VCPU_REGS_RDX) = msr_data >> 32;
3762 3763 3764
	return X86EMUL_CONTINUE;
}

P
Paolo Bonzini 已提交
3765
static int em_store_sreg(struct x86_emulate_ctxt *ctxt, int segment)
3766
{
P
Paolo Bonzini 已提交
3767 3768 3769 3770
	if (segment > VCPU_SREG_GS &&
	    (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
	    ctxt->ops->cpl(ctxt) > 0)
		return emulate_gp(ctxt, 0);
3771

P
Paolo Bonzini 已提交
3772
	ctxt->dst.val = get_segment_selector(ctxt, segment);
3773 3774
	if (ctxt->dst.bytes == 4 && ctxt->dst.type == OP_MEM)
		ctxt->dst.bytes = 2;
3775 3776 3777
	return X86EMUL_CONTINUE;
}

P
Paolo Bonzini 已提交
3778 3779 3780 3781 3782 3783 3784 3785
static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt)
{
	if (ctxt->modrm_reg > VCPU_SREG_GS)
		return emulate_ud(ctxt);

	return em_store_sreg(ctxt, ctxt->modrm_reg);
}

3786 3787
static int em_mov_sreg_rm(struct x86_emulate_ctxt *ctxt)
{
3788
	u16 sel = ctxt->src.val;
3789

3790
	if (ctxt->modrm_reg == VCPU_SREG_CS || ctxt->modrm_reg > VCPU_SREG_GS)
3791 3792
		return emulate_ud(ctxt);

3793
	if (ctxt->modrm_reg == VCPU_SREG_SS)
3794 3795 3796
		ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;

	/* Disable writeback. */
3797 3798
	ctxt->dst.type = OP_NONE;
	return load_segment_descriptor(ctxt, sel, ctxt->modrm_reg);
3799 3800
}

P
Paolo Bonzini 已提交
3801 3802 3803 3804 3805
static int em_sldt(struct x86_emulate_ctxt *ctxt)
{
	return em_store_sreg(ctxt, VCPU_SREG_LDTR);
}

A
Avi Kivity 已提交
3806 3807 3808 3809 3810 3811 3812 3813 3814
static int em_lldt(struct x86_emulate_ctxt *ctxt)
{
	u16 sel = ctxt->src.val;

	/* Disable writeback. */
	ctxt->dst.type = OP_NONE;
	return load_segment_descriptor(ctxt, sel, VCPU_SREG_LDTR);
}

P
Paolo Bonzini 已提交
3815 3816 3817 3818 3819
static int em_str(struct x86_emulate_ctxt *ctxt)
{
	return em_store_sreg(ctxt, VCPU_SREG_TR);
}

A
Avi Kivity 已提交
3820 3821 3822 3823 3824 3825 3826 3827 3828
static int em_ltr(struct x86_emulate_ctxt *ctxt)
{
	u16 sel = ctxt->src.val;

	/* Disable writeback. */
	ctxt->dst.type = OP_NONE;
	return load_segment_descriptor(ctxt, sel, VCPU_SREG_TR);
}

3829 3830
static int em_invlpg(struct x86_emulate_ctxt *ctxt)
{
3831 3832 3833
	int rc;
	ulong linear;

3834
	rc = linearize(ctxt, ctxt->src.addr.mem, 1, false, &linear);
3835
	if (rc == X86EMUL_CONTINUE)
3836
		ctxt->ops->invlpg(ctxt, linear);
3837
	/* Disable writeback. */
3838
	ctxt->dst.type = OP_NONE;
3839 3840 3841
	return X86EMUL_CONTINUE;
}

3842 3843 3844 3845 3846 3847 3848 3849 3850 3851
static int em_clts(struct x86_emulate_ctxt *ctxt)
{
	ulong cr0;

	cr0 = ctxt->ops->get_cr(ctxt, 0);
	cr0 &= ~X86_CR0_TS;
	ctxt->ops->set_cr(ctxt, 0, cr0);
	return X86EMUL_CONTINUE;
}

3852
static int em_hypercall(struct x86_emulate_ctxt *ctxt)
3853
{
3854
	int rc = ctxt->ops->fix_hypercall(ctxt);
3855 3856 3857 3858 3859

	if (rc != X86EMUL_CONTINUE)
		return rc;

	/* Let the processor re-execute the fixed hypercall */
3860
	ctxt->_eip = ctxt->eip;
3861
	/* Disable writeback. */
3862
	ctxt->dst.type = OP_NONE;
3863 3864 3865
	return X86EMUL_CONTINUE;
}

3866 3867 3868 3869 3870 3871
static int emulate_store_desc_ptr(struct x86_emulate_ctxt *ctxt,
				  void (*get)(struct x86_emulate_ctxt *ctxt,
					      struct desc_ptr *ptr))
{
	struct desc_ptr desc_ptr;

P
Paolo Bonzini 已提交
3872 3873 3874 3875
	if ((ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
	    ctxt->ops->cpl(ctxt) > 0)
		return emulate_gp(ctxt, 0);

3876 3877 3878 3879 3880 3881 3882 3883 3884
	if (ctxt->mode == X86EMUL_MODE_PROT64)
		ctxt->op_bytes = 8;
	get(ctxt, &desc_ptr);
	if (ctxt->op_bytes == 2) {
		ctxt->op_bytes = 4;
		desc_ptr.address &= 0x00ffffff;
	}
	/* Disable writeback. */
	ctxt->dst.type = OP_NONE;
3885 3886
	return segmented_write_std(ctxt, ctxt->dst.addr.mem,
				   &desc_ptr, 2 + ctxt->op_bytes);
3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898
}

static int em_sgdt(struct x86_emulate_ctxt *ctxt)
{
	return emulate_store_desc_ptr(ctxt, ctxt->ops->get_gdt);
}

static int em_sidt(struct x86_emulate_ctxt *ctxt)
{
	return emulate_store_desc_ptr(ctxt, ctxt->ops->get_idt);
}

3899
static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt)
3900 3901 3902 3903
{
	struct desc_ptr desc_ptr;
	int rc;

3904 3905
	if (ctxt->mode == X86EMUL_MODE_PROT64)
		ctxt->op_bytes = 8;
3906
	rc = read_descriptor(ctxt, ctxt->src.addr.mem,
3907
			     &desc_ptr.size, &desc_ptr.address,
3908
			     ctxt->op_bytes);
3909 3910
	if (rc != X86EMUL_CONTINUE)
		return rc;
3911
	if (ctxt->mode == X86EMUL_MODE_PROT64 &&
3912
	    emul_is_noncanonical_address(desc_ptr.address, ctxt))
3913
		return emulate_gp(ctxt, 0);
3914 3915 3916 3917
	if (lgdt)
		ctxt->ops->set_gdt(ctxt, &desc_ptr);
	else
		ctxt->ops->set_idt(ctxt, &desc_ptr);
3918
	/* Disable writeback. */
3919
	ctxt->dst.type = OP_NONE;
3920 3921 3922
	return X86EMUL_CONTINUE;
}

3923 3924 3925 3926 3927
static int em_lgdt(struct x86_emulate_ctxt *ctxt)
{
	return em_lgdt_lidt(ctxt, true);
}

3928 3929
static int em_lidt(struct x86_emulate_ctxt *ctxt)
{
3930
	return em_lgdt_lidt(ctxt, false);
3931 3932 3933 3934
}

static int em_smsw(struct x86_emulate_ctxt *ctxt)
{
P
Paolo Bonzini 已提交
3935 3936 3937 3938
	if ((ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
	    ctxt->ops->cpl(ctxt) > 0)
		return emulate_gp(ctxt, 0);

3939 3940
	if (ctxt->dst.type == OP_MEM)
		ctxt->dst.bytes = 2;
3941
	ctxt->dst.val = ctxt->ops->get_cr(ctxt, 0);
3942 3943 3944 3945 3946 3947
	return X86EMUL_CONTINUE;
}

static int em_lmsw(struct x86_emulate_ctxt *ctxt)
{
	ctxt->ops->set_cr(ctxt, 0, (ctxt->ops->get_cr(ctxt, 0) & ~0x0eul)
3948 3949
			  | (ctxt->src.val & 0x0f));
	ctxt->dst.type = OP_NONE;
3950 3951 3952
	return X86EMUL_CONTINUE;
}

3953 3954
static int em_loop(struct x86_emulate_ctxt *ctxt)
{
3955 3956
	int rc = X86EMUL_CONTINUE;

3957
	register_address_increment(ctxt, VCPU_REGS_RCX, -1);
3958
	if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) &&
3959
	    (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
3960
		rc = jmp_rel(ctxt, ctxt->src.val);
3961

3962
	return rc;
3963 3964 3965 3966
}

static int em_jcxz(struct x86_emulate_ctxt *ctxt)
{
3967 3968
	int rc = X86EMUL_CONTINUE;

3969
	if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0)
3970
		rc = jmp_rel(ctxt, ctxt->src.val);
3971

3972
	return rc;
3973 3974
}

3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992
static int em_in(struct x86_emulate_ctxt *ctxt)
{
	if (!pio_in_emulated(ctxt, ctxt->dst.bytes, ctxt->src.val,
			     &ctxt->dst.val))
		return X86EMUL_IO_NEEDED;

	return X86EMUL_CONTINUE;
}

static int em_out(struct x86_emulate_ctxt *ctxt)
{
	ctxt->ops->pio_out_emulated(ctxt, ctxt->src.bytes, ctxt->dst.val,
				    &ctxt->src.val, 1);
	/* Disable writeback. */
	ctxt->dst.type = OP_NONE;
	return X86EMUL_CONTINUE;
}

3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011
static int em_cli(struct x86_emulate_ctxt *ctxt)
{
	if (emulator_bad_iopl(ctxt))
		return emulate_gp(ctxt, 0);

	ctxt->eflags &= ~X86_EFLAGS_IF;
	return X86EMUL_CONTINUE;
}

static int em_sti(struct x86_emulate_ctxt *ctxt)
{
	if (emulator_bad_iopl(ctxt))
		return emulate_gp(ctxt, 0);

	ctxt->interruptibility = KVM_X86_SHADOW_INT_STI;
	ctxt->eflags |= X86_EFLAGS_IF;
	return X86EMUL_CONTINUE;
}

A
Avi Kivity 已提交
4012 4013 4014
static int em_cpuid(struct x86_emulate_ctxt *ctxt)
{
	u32 eax, ebx, ecx, edx;
K
Kyle Huey 已提交
4015 4016 4017 4018 4019 4020 4021
	u64 msr = 0;

	ctxt->ops->get_msr(ctxt, MSR_MISC_FEATURES_ENABLES, &msr);
	if (msr & MSR_MISC_FEATURES_ENABLES_CPUID_FAULT &&
	    ctxt->ops->cpl(ctxt)) {
		return emulate_gp(ctxt, 0);
	}
A
Avi Kivity 已提交
4022

4023 4024
	eax = reg_read(ctxt, VCPU_REGS_RAX);
	ecx = reg_read(ctxt, VCPU_REGS_RCX);
4025
	ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false);
4026 4027 4028 4029
	*reg_write(ctxt, VCPU_REGS_RAX) = eax;
	*reg_write(ctxt, VCPU_REGS_RBX) = ebx;
	*reg_write(ctxt, VCPU_REGS_RCX) = ecx;
	*reg_write(ctxt, VCPU_REGS_RDX) = edx;
A
Avi Kivity 已提交
4030 4031 4032
	return X86EMUL_CONTINUE;
}

P
Paolo Bonzini 已提交
4033 4034 4035 4036
static int em_sahf(struct x86_emulate_ctxt *ctxt)
{
	u32 flags;

4037 4038
	flags = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
		X86_EFLAGS_SF;
P
Paolo Bonzini 已提交
4039 4040 4041 4042 4043 4044 4045
	flags &= *reg_rmw(ctxt, VCPU_REGS_RAX) >> 8;

	ctxt->eflags &= ~0xffUL;
	ctxt->eflags |= flags | X86_EFLAGS_FIXED;
	return X86EMUL_CONTINUE;
}

A
Avi Kivity 已提交
4046 4047
static int em_lahf(struct x86_emulate_ctxt *ctxt)
{
4048 4049
	*reg_rmw(ctxt, VCPU_REGS_RAX) &= ~0xff00UL;
	*reg_rmw(ctxt, VCPU_REGS_RAX) |= (ctxt->eflags & 0xff) << 8;
A
Avi Kivity 已提交
4050 4051 4052
	return X86EMUL_CONTINUE;
}

A
Avi Kivity 已提交
4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067
static int em_bswap(struct x86_emulate_ctxt *ctxt)
{
	switch (ctxt->op_bytes) {
#ifdef CONFIG_X86_64
	case 8:
		asm("bswap %0" : "+r"(ctxt->dst.val));
		break;
#endif
	default:
		asm("bswap %0" : "+r"(*(u32 *)&ctxt->dst.val));
		break;
	}
	return X86EMUL_CONTINUE;
}

4068 4069 4070 4071 4072 4073
static int em_clflush(struct x86_emulate_ctxt *ctxt)
{
	/* emulating clflush regardless of cpuid */
	return X86EMUL_CONTINUE;
}

4074 4075 4076 4077 4078 4079
static int em_clflushopt(struct x86_emulate_ctxt *ctxt)
{
	/* emulating clflushopt regardless of cpuid */
	return X86EMUL_CONTINUE;
}

4080 4081 4082 4083 4084 4085
static int em_movsxd(struct x86_emulate_ctxt *ctxt)
{
	ctxt->dst.val = (s32) ctxt->src.val;
	return X86EMUL_CONTINUE;
}

4086 4087
static int check_fxsr(struct x86_emulate_ctxt *ctxt)
{
4088
	if (!ctxt->ops->guest_has_fxsr(ctxt))
4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103
		return emulate_ud(ctxt);

	if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
		return emulate_nm(ctxt);

	/*
	 * Don't emulate a case that should never be hit, instead of working
	 * around a lack of fxsave64/fxrstor64 on old compilers.
	 */
	if (ctxt->mode >= X86EMUL_MODE_PROT64)
		return X86EMUL_UNHANDLEABLE;

	return X86EMUL_CONTINUE;
}

4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122
/*
 * Hardware doesn't save and restore XMM 0-7 without CR4.OSFXSR, but does save
 * and restore MXCSR.
 */
static size_t __fxstate_size(int nregs)
{
	return offsetof(struct fxregs_state, xmm_space[0]) + nregs * 16;
}

static inline size_t fxstate_size(struct x86_emulate_ctxt *ctxt)
{
	bool cr4_osfxsr;
	if (ctxt->mode == X86EMUL_MODE_PROT64)
		return __fxstate_size(16);

	cr4_osfxsr = ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR;
	return __fxstate_size(cr4_osfxsr ? 8 : 0);
}

4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149
/*
 * FXSAVE and FXRSTOR have 4 different formats depending on execution mode,
 *  1) 16 bit mode
 *  2) 32 bit mode
 *     - like (1), but FIP and FDP (foo) are only 16 bit.  At least Intel CPUs
 *       preserve whole 32 bit values, though, so (1) and (2) are the same wrt.
 *       save and restore
 *  3) 64-bit mode with REX.W prefix
 *     - like (2), but XMM 8-15 are being saved and restored
 *  4) 64-bit mode without REX.W prefix
 *     - like (3), but FIP and FDP are 64 bit
 *
 * Emulation uses (3) for (1) and (2) and preserves XMM 8-15 to reach the
 * desired result.  (4) is not emulated.
 *
 * Note: Guest and host CPUID.(EAX=07H,ECX=0H):EBX[bit 13] (deprecate FPU CS
 * and FPU DS) should match.
 */
static int em_fxsave(struct x86_emulate_ctxt *ctxt)
{
	struct fxregs_state fx_state;
	int rc;

	rc = check_fxsr(ctxt);
	if (rc != X86EMUL_CONTINUE)
		return rc;

4150 4151
	emulator_get_fpu();

4152 4153
	rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state));

4154 4155
	emulator_put_fpu();

4156 4157 4158
	if (rc != X86EMUL_CONTINUE)
		return rc;

4159 4160
	return segmented_write_std(ctxt, ctxt->memop.addr.mem, &fx_state,
		                   fxstate_size(ctxt));
4161 4162
}

4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182
/*
 * FXRSTOR might restore XMM registers not provided by the guest. Fill
 * in the host registers (via FXSAVE) instead, so they won't be modified.
 * (preemption has to stay disabled until FXRSTOR).
 *
 * Use noinline to keep the stack for other functions called by callers small.
 */
static noinline int fxregs_fixup(struct fxregs_state *fx_state,
				 const size_t used_size)
{
	struct fxregs_state fx_tmp;
	int rc;

	rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_tmp));
	memcpy((void *)fx_state + used_size, (void *)&fx_tmp + used_size,
	       __fxstate_size(16) - used_size);

	return rc;
}

4183 4184 4185 4186
static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
{
	struct fxregs_state fx_state;
	int rc;
4187
	size_t size;
4188 4189 4190 4191 4192

	rc = check_fxsr(ctxt);
	if (rc != X86EMUL_CONTINUE)
		return rc;

4193 4194 4195 4196 4197
	size = fxstate_size(ctxt);
	rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, size);
	if (rc != X86EMUL_CONTINUE)
		return rc;

4198 4199
	emulator_get_fpu();

4200
	if (size < __fxstate_size(16)) {
4201
		rc = fxregs_fixup(&fx_state, size);
4202 4203 4204
		if (rc != X86EMUL_CONTINUE)
			goto out;
	}
4205

4206 4207 4208 4209
	if (fx_state.mxcsr >> 16) {
		rc = emulate_gp(ctxt, 0);
		goto out;
	}
4210 4211 4212 4213

	if (rc == X86EMUL_CONTINUE)
		rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state));

4214
out:
4215 4216
	emulator_put_fpu();

4217 4218 4219
	return rc;
}

4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233
static int em_xsetbv(struct x86_emulate_ctxt *ctxt)
{
	u32 eax, ecx, edx;

	eax = reg_read(ctxt, VCPU_REGS_RAX);
	edx = reg_read(ctxt, VCPU_REGS_RDX);
	ecx = reg_read(ctxt, VCPU_REGS_RCX);

	if (ctxt->ops->set_xcr(ctxt, ecx, ((u64)edx << 32) | eax))
		return emulate_gp(ctxt, 0);

	return X86EMUL_CONTINUE;
}

4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245
static bool valid_cr(int nr)
{
	switch (nr) {
	case 0:
	case 2 ... 4:
	case 8:
		return true;
	default:
		return false;
	}
}

4246
static int check_cr_access(struct x86_emulate_ctxt *ctxt)
4247
{
4248
	if (!valid_cr(ctxt->modrm_reg))
4249 4250 4251 4252 4253
		return emulate_ud(ctxt);

	return X86EMUL_CONTINUE;
}

4254 4255 4256 4257
static int check_dr7_gd(struct x86_emulate_ctxt *ctxt)
{
	unsigned long dr7;

4258
	ctxt->ops->get_dr(ctxt, 7, &dr7);
4259 4260 4261 4262 4263 4264 4265

	/* Check if DR7.Global_Enable is set */
	return dr7 & (1 << 13);
}

static int check_dr_read(struct x86_emulate_ctxt *ctxt)
{
4266
	int dr = ctxt->modrm_reg;
4267 4268 4269 4270 4271
	u64 cr4;

	if (dr > 7)
		return emulate_ud(ctxt);

4272
	cr4 = ctxt->ops->get_cr(ctxt, 4);
4273 4274 4275
	if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5))
		return emulate_ud(ctxt);

4276 4277 4278 4279
	if (check_dr7_gd(ctxt)) {
		ulong dr6;

		ctxt->ops->get_dr(ctxt, 6, &dr6);
4280
		dr6 &= ~DR_TRAP_BITS;
4281 4282
		dr6 |= DR6_BD | DR6_RTM;
		ctxt->ops->set_dr(ctxt, 6, dr6);
4283
		return emulate_db(ctxt);
4284
	}
4285 4286 4287 4288 4289 4290

	return X86EMUL_CONTINUE;
}

static int check_dr_write(struct x86_emulate_ctxt *ctxt)
{
4291 4292
	u64 new_val = ctxt->src.val64;
	int dr = ctxt->modrm_reg;
4293 4294 4295 4296 4297 4298 4299

	if ((dr == 6 || dr == 7) && (new_val & 0xffffffff00000000ULL))
		return emulate_gp(ctxt, 0);

	return check_dr_read(ctxt);
}

4300 4301
static int check_svme(struct x86_emulate_ctxt *ctxt)
{
4302
	u64 efer = 0;
4303

4304
	ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
4305 4306 4307 4308 4309 4310 4311 4312 4313

	if (!(efer & EFER_SVME))
		return emulate_ud(ctxt);

	return X86EMUL_CONTINUE;
}

static int check_svme_pa(struct x86_emulate_ctxt *ctxt)
{
4314
	u64 rax = reg_read(ctxt, VCPU_REGS_RAX);
4315 4316

	/* Valid physical address? */
4317
	if (rax & 0xffff000000000000ULL)
4318 4319 4320 4321 4322
		return emulate_gp(ctxt, 0);

	return check_svme(ctxt);
}

4323 4324
static int check_rdtsc(struct x86_emulate_ctxt *ctxt)
{
4325
	u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
4326

4327
	if (cr4 & X86_CR4_TSD && ctxt->ops->cpl(ctxt))
4328 4329 4330 4331 4332
		return emulate_ud(ctxt);

	return X86EMUL_CONTINUE;
}

4333 4334
static int check_rdpmc(struct x86_emulate_ctxt *ctxt)
{
4335
	u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
4336
	u64 rcx = reg_read(ctxt, VCPU_REGS_RCX);
4337

4338 4339 4340 4341 4342 4343 4344
	/*
	 * VMware allows access to these Pseduo-PMCs even when read via RDPMC
	 * in Ring3 when CR4.PCE=0.
	 */
	if (enable_vmware_backdoor && is_vmware_backdoor_pmc(rcx))
		return X86EMUL_CONTINUE;

4345
	if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) ||
4346
	    ctxt->ops->check_pmc(ctxt, rcx))
4347 4348 4349 4350 4351
		return emulate_gp(ctxt, 0);

	return X86EMUL_CONTINUE;
}

4352 4353
static int check_perm_in(struct x86_emulate_ctxt *ctxt)
{
4354 4355
	ctxt->dst.bytes = min(ctxt->dst.bytes, 4u);
	if (!emulator_io_permited(ctxt, ctxt->src.val, ctxt->dst.bytes))
4356 4357 4358 4359 4360 4361 4362
		return emulate_gp(ctxt, 0);

	return X86EMUL_CONTINUE;
}

static int check_perm_out(struct x86_emulate_ctxt *ctxt)
{
4363 4364
	ctxt->src.bytes = min(ctxt->src.bytes, 4u);
	if (!emulator_io_permited(ctxt, ctxt->dst.val, ctxt->src.bytes))
4365 4366 4367 4368 4369
		return emulate_gp(ctxt, 0);

	return X86EMUL_CONTINUE;
}

4370
#define D(_y) { .flags = (_y) }
4371 4372 4373
#define DI(_y, _i) { .flags = (_y)|Intercept, .intercept = x86_intercept_##_i }
#define DIP(_y, _i, _p) { .flags = (_y)|Intercept|CheckPerm, \
		      .intercept = x86_intercept_##_i, .check_perm = (_p) }
4374
#define N    D(NotImpl)
4375
#define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
4376 4377
#define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
#define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
4378
#define ID(_f, _i) { .flags = ((_f) | InstrDual | ModRM), .u.idual = (_i) }
4379
#define MD(_f, _m) { .flags = ((_f) | ModeDual), .u.mdual = (_m) }
4380
#define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) }
4381
#define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
4382
#define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) }
4383
#define II(_f, _e, _i) \
4384
	{ .flags = (_f)|Intercept, .u.execute = (_e), .intercept = x86_intercept_##_i }
4385
#define IIP(_f, _e, _i, _p) \
4386 4387
	{ .flags = (_f)|Intercept|CheckPerm, .u.execute = (_e), \
	  .intercept = x86_intercept_##_i, .check_perm = (_p) }
4388
#define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) }
4389

4390
#define D2bv(_f)      D((_f) | ByteOp), D(_f)
4391
#define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p)
4392
#define I2bv(_f, _e)  I((_f) | ByteOp, _e), I(_f, _e)
4393
#define F2bv(_f, _e)  F((_f) | ByteOp, _e), F(_f, _e)
4394 4395
#define I2bvIP(_f, _e, _i, _p) \
	IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p)
4396

4397 4398 4399
#define F6ALU(_f, _e) F2bv((_f) | DstMem | SrcReg | ModRM, _e),		\
		F2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e),	\
		F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
4400

4401 4402
static const struct opcode group7_rm0[] = {
	N,
4403
	I(SrcNone | Priv | EmulateOnUD,	em_hypercall),
4404 4405 4406
	N, N, N, N, N, N,
};

4407
static const struct opcode group7_rm1[] = {
4408 4409
	DI(SrcNone | Priv, monitor),
	DI(SrcNone | Priv, mwait),
4410 4411 4412
	N, N, N, N, N, N,
};

4413 4414 4415 4416 4417 4418
static const struct opcode group7_rm2[] = {
	N,
	II(ImplicitOps | Priv,			em_xsetbv,	xsetbv),
	N, N, N, N, N, N,
};

4419
static const struct opcode group7_rm3[] = {
4420
	DIP(SrcNone | Prot | Priv,		vmrun,		check_svme_pa),
4421
	II(SrcNone  | Prot | EmulateOnUD,	em_hypercall,	vmmcall),
4422 4423 4424 4425 4426 4427
	DIP(SrcNone | Prot | Priv,		vmload,		check_svme_pa),
	DIP(SrcNone | Prot | Priv,		vmsave,		check_svme_pa),
	DIP(SrcNone | Prot | Priv,		stgi,		check_svme),
	DIP(SrcNone | Prot | Priv,		clgi,		check_svme),
	DIP(SrcNone | Prot | Priv,		skinit,		check_svme),
	DIP(SrcNone | Prot | Priv,		invlpga,	check_svme),
4428
};
4429

4430
static const struct opcode group7_rm7[] = {
4431
	N,
4432
	DIP(SrcNone, rdtscp, check_rdtsc),
4433 4434
	N, N, N, N, N, N,
};
4435

4436
static const struct opcode group1[] = {
4437 4438 4439 4440 4441 4442 4443 4444
	F(Lock, em_add),
	F(Lock | PageTable, em_or),
	F(Lock, em_adc),
	F(Lock, em_sbb),
	F(Lock | PageTable, em_and),
	F(Lock, em_sub),
	F(Lock, em_xor),
	F(NoWrite, em_cmp),
4445 4446
};

4447
static const struct opcode group1A[] = {
4448
	I(DstMem | SrcNone | Mov | Stack | IncSP | TwoMemOp, em_pop), N, N, N, N, N, N, N,
4449 4450
};

4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461
static const struct opcode group2[] = {
	F(DstMem | ModRM, em_rol),
	F(DstMem | ModRM, em_ror),
	F(DstMem | ModRM, em_rcl),
	F(DstMem | ModRM, em_rcr),
	F(DstMem | ModRM, em_shl),
	F(DstMem | ModRM, em_shr),
	F(DstMem | ModRM, em_shl),
	F(DstMem | ModRM, em_sar),
};

4462
static const struct opcode group3[] = {
4463 4464
	F(DstMem | SrcImm | NoWrite, em_test),
	F(DstMem | SrcImm | NoWrite, em_test),
4465 4466
	F(DstMem | SrcNone | Lock, em_not),
	F(DstMem | SrcNone | Lock, em_neg),
4467 4468
	F(DstXacc | Src2Mem, em_mul_ex),
	F(DstXacc | Src2Mem, em_imul_ex),
4469 4470
	F(DstXacc | Src2Mem, em_div_ex),
	F(DstXacc | Src2Mem, em_idiv_ex),
4471 4472
};

4473
static const struct opcode group4[] = {
4474 4475
	F(ByteOp | DstMem | SrcNone | Lock, em_inc),
	F(ByteOp | DstMem | SrcNone | Lock, em_dec),
4476 4477 4478
	N, N, N, N, N, N,
};

4479
static const struct opcode group5[] = {
4480 4481
	F(DstMem | SrcNone | Lock,		em_inc),
	F(DstMem | SrcNone | Lock,		em_dec),
4482
	I(SrcMem | NearBranch,			em_call_near_abs),
4483
	I(SrcMemFAddr | ImplicitOps,		em_call_far),
4484
	I(SrcMem | NearBranch,			em_jmp_abs),
4485
	I(SrcMemFAddr | ImplicitOps,		em_jmp_far),
4486
	I(SrcMem | Stack | TwoMemOp,		em_push), D(Undefined),
4487 4488
};

4489
static const struct opcode group6[] = {
P
Paolo Bonzini 已提交
4490 4491
	II(Prot | DstMem,	   em_sldt, sldt),
	II(Prot | DstMem,	   em_str, str),
A
Avi Kivity 已提交
4492
	II(Prot | Priv | SrcMem16, em_lldt, lldt),
A
Avi Kivity 已提交
4493
	II(Prot | Priv | SrcMem16, em_ltr, ltr),
4494 4495 4496
	N, N, N, N,
};

4497
static const struct group_dual group7 = { {
4498 4499
	II(Mov | DstMem,			em_sgdt, sgdt),
	II(Mov | DstMem,			em_sidt, sidt),
4500 4501 4502 4503 4504
	II(SrcMem | Priv,			em_lgdt, lgdt),
	II(SrcMem | Priv,			em_lidt, lidt),
	II(SrcNone | DstMem | Mov,		em_smsw, smsw), N,
	II(SrcMem16 | Mov | Priv,		em_lmsw, lmsw),
	II(SrcMem | ByteOp | Priv | NoAccess,	em_invlpg, invlpg),
4505
}, {
4506
	EXT(0, group7_rm0),
4507
	EXT(0, group7_rm1),
4508 4509
	EXT(0, group7_rm2),
	EXT(0, group7_rm3),
4510 4511 4512
	II(SrcNone | DstMem | Mov,		em_smsw, smsw), N,
	II(SrcMem16 | Mov | Priv,		em_lmsw, lmsw),
	EXT(0, group7_rm7),
4513 4514
} };

4515
static const struct opcode group8[] = {
4516
	N, N, N, N,
4517 4518 4519 4520
	F(DstMem | SrcImmByte | NoWrite,		em_bt),
	F(DstMem | SrcImmByte | Lock | PageTable,	em_bts),
	F(DstMem | SrcImmByte | Lock,			em_btr),
	F(DstMem | SrcImmByte | Lock | PageTable,	em_btc),
4521 4522
};

P
Paolo Bonzini 已提交
4523 4524 4525 4526 4527
/*
 * The "memory" destination is actually always a register, since we come
 * from the register case of group9.
 */
static const struct gprefix pfx_0f_c7_7 = {
4528
	N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdpid),
P
Paolo Bonzini 已提交
4529 4530 4531
};


4532
static const struct group_dual group9 = { {
4533
	N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N,
4534
}, {
P
Paolo Bonzini 已提交
4535 4536
	N, N, N, N, N, N, N,
	GP(0, &pfx_0f_c7_7),
4537 4538
} };

4539
static const struct opcode group11[] = {
4540
	I(DstMem | SrcImm | Mov | PageTable, em_mov),
4541
	X7(D(Undefined)),
4542 4543
};

4544
static const struct gprefix pfx_0f_ae_7 = {
4545
	I(SrcMem | ByteOp, em_clflush), I(SrcMem | ByteOp, em_clflushopt), N, N,
4546 4547 4548
};

static const struct group_dual group15 = { {
4549 4550 4551
	I(ModRM | Aligned16, em_fxsave),
	I(ModRM | Aligned16, em_fxrstor),
	N, N, N, N, N, GP(0, &pfx_0f_ae_7),
4552 4553 4554 4555
}, {
	N, N, N, N, N, N, N, N,
} };

4556
static const struct gprefix pfx_0f_6f_0f_7f = {
4557
	I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov),
4558 4559
};

4560 4561 4562 4563
static const struct instr_dual instr_dual_0f_2b = {
	I(0, em_mov), N
};

4564
static const struct gprefix pfx_0f_2b = {
4565
	ID(0, &instr_dual_0f_2b), ID(0, &instr_dual_0f_2b), N, N,
4566 4567
};

4568 4569 4570 4571
static const struct gprefix pfx_0f_10_0f_11 = {
	I(Unaligned, em_mov), I(Unaligned, em_mov), N, N,
};

4572
static const struct gprefix pfx_0f_28_0f_29 = {
4573
	I(Aligned, em_mov), I(Aligned, em_mov), N, N,
4574 4575
};

4576 4577 4578 4579
static const struct gprefix pfx_0f_e7 = {
	N, I(Sse, em_mov), N, N,
};

4580
static const struct escape escape_d9 = { {
4581
	N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstcw),
4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622
}, {
	/* 0xC0 - 0xC7 */
	N, N, N, N, N, N, N, N,
	/* 0xC8 - 0xCF */
	N, N, N, N, N, N, N, N,
	/* 0xD0 - 0xC7 */
	N, N, N, N, N, N, N, N,
	/* 0xD8 - 0xDF */
	N, N, N, N, N, N, N, N,
	/* 0xE0 - 0xE7 */
	N, N, N, N, N, N, N, N,
	/* 0xE8 - 0xEF */
	N, N, N, N, N, N, N, N,
	/* 0xF0 - 0xF7 */
	N, N, N, N, N, N, N, N,
	/* 0xF8 - 0xFF */
	N, N, N, N, N, N, N, N,
} };

static const struct escape escape_db = { {
	N, N, N, N, N, N, N, N,
}, {
	/* 0xC0 - 0xC7 */
	N, N, N, N, N, N, N, N,
	/* 0xC8 - 0xCF */
	N, N, N, N, N, N, N, N,
	/* 0xD0 - 0xC7 */
	N, N, N, N, N, N, N, N,
	/* 0xD8 - 0xDF */
	N, N, N, N, N, N, N, N,
	/* 0xE0 - 0xE7 */
	N, N, N, I(ImplicitOps, em_fninit), N, N, N, N,
	/* 0xE8 - 0xEF */
	N, N, N, N, N, N, N, N,
	/* 0xF0 - 0xF7 */
	N, N, N, N, N, N, N, N,
	/* 0xF8 - 0xFF */
	N, N, N, N, N, N, N, N,
} };

static const struct escape escape_dd = { {
4623
	N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstsw),
4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642
}, {
	/* 0xC0 - 0xC7 */
	N, N, N, N, N, N, N, N,
	/* 0xC8 - 0xCF */
	N, N, N, N, N, N, N, N,
	/* 0xD0 - 0xC7 */
	N, N, N, N, N, N, N, N,
	/* 0xD8 - 0xDF */
	N, N, N, N, N, N, N, N,
	/* 0xE0 - 0xE7 */
	N, N, N, N, N, N, N, N,
	/* 0xE8 - 0xEF */
	N, N, N, N, N, N, N, N,
	/* 0xF0 - 0xF7 */
	N, N, N, N, N, N, N, N,
	/* 0xF8 - 0xFF */
	N, N, N, N, N, N, N, N,
} };

4643 4644 4645 4646
static const struct instr_dual instr_dual_0f_c3 = {
	I(DstMem | SrcReg | ModRM | No16 | Mov, em_mov), N
};

4647 4648 4649 4650
static const struct mode_dual mode_dual_63 = {
	N, I(DstReg | SrcMem32 | ModRM | Mov, em_movsxd)
};

4651
static const struct opcode opcode_table[256] = {
4652
	/* 0x00 - 0x07 */
4653
	F6ALU(Lock, em_add),
4654 4655
	I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg),
	I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg),
4656
	/* 0x08 - 0x0F */
4657
	F6ALU(Lock | PageTable, em_or),
4658 4659
	I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg),
	N,
4660
	/* 0x10 - 0x17 */
4661
	F6ALU(Lock, em_adc),
4662 4663
	I(ImplicitOps | Stack | No64 | Src2SS, em_push_sreg),
	I(ImplicitOps | Stack | No64 | Src2SS, em_pop_sreg),
4664
	/* 0x18 - 0x1F */
4665
	F6ALU(Lock, em_sbb),
4666 4667
	I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg),
	I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg),
4668
	/* 0x20 - 0x27 */
4669
	F6ALU(Lock | PageTable, em_and), N, N,
4670
	/* 0x28 - 0x2F */
4671
	F6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das),
4672
	/* 0x30 - 0x37 */
4673
	F6ALU(Lock, em_xor), N, N,
4674
	/* 0x38 - 0x3F */
4675
	F6ALU(NoWrite, em_cmp), N, N,
4676
	/* 0x40 - 0x4F */
4677
	X8(F(DstReg, em_inc)), X8(F(DstReg, em_dec)),
4678
	/* 0x50 - 0x57 */
4679
	X8(I(SrcReg | Stack, em_push)),
4680
	/* 0x58 - 0x5F */
4681
	X8(I(DstReg | Stack, em_pop)),
4682
	/* 0x60 - 0x67 */
4683 4684
	I(ImplicitOps | Stack | No64, em_pusha),
	I(ImplicitOps | Stack | No64, em_popa),
4685
	N, MD(ModRM, &mode_dual_63),
4686 4687
	N, N, N, N,
	/* 0x68 - 0x6F */
4688 4689
	I(SrcImm | Mov | Stack, em_push),
	I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op),
4690 4691
	I(SrcImmByte | Mov | Stack, em_push),
	I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op),
4692
	I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */
4693
	I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */
4694
	/* 0x70 - 0x7F */
4695
	X16(D(SrcImmByte | NearBranch)),
4696
	/* 0x80 - 0x87 */
4697 4698 4699 4700
	G(ByteOp | DstMem | SrcImm, group1),
	G(DstMem | SrcImm, group1),
	G(ByteOp | DstMem | SrcImm | No64, group1),
	G(DstMem | SrcImmByte, group1),
4701
	F2bv(DstMem | SrcReg | ModRM | NoWrite, em_test),
4702
	I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg),
4703
	/* 0x88 - 0x8F */
4704
	I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov),
4705
	I2bv(DstReg | SrcMem | ModRM | Mov, em_mov),
4706
	I(DstMem | SrcNone | ModRM | Mov | PageTable, em_mov_rm_sreg),
4707 4708 4709
	D(ModRM | SrcMem | NoAccess | DstReg),
	I(ImplicitOps | SrcMem16 | ModRM, em_mov_sreg_rm),
	G(0, group1A),
4710
	/* 0x90 - 0x97 */
4711
	DI(SrcAcc | DstReg, pause), X7(D(SrcAcc | DstReg)),
4712
	/* 0x98 - 0x9F */
4713
	D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd),
4714
	I(SrcImmFAddr | No64, em_call_far), N,
4715
	II(ImplicitOps | Stack, em_pushf, pushf),
P
Paolo Bonzini 已提交
4716 4717
	II(ImplicitOps | Stack, em_popf, popf),
	I(ImplicitOps, em_sahf), I(ImplicitOps, em_lahf),
4718
	/* 0xA0 - 0xA7 */
4719
	I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
4720
	I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov),
4721 4722
	I2bv(SrcSI | DstDI | Mov | String | TwoMemOp, em_mov),
	F2bv(SrcSI | DstDI | String | NoWrite | TwoMemOp, em_cmp_r),
4723
	/* 0xA8 - 0xAF */
4724
	F2bv(DstAcc | SrcImm | NoWrite, em_test),
4725 4726
	I2bv(SrcAcc | DstDI | Mov | String, em_mov),
	I2bv(SrcSI | DstAcc | Mov | String, em_mov),
4727
	F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp_r),
4728
	/* 0xB0 - 0xB7 */
4729
	X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)),
4730
	/* 0xB8 - 0xBF */
4731
	X8(I(DstReg | SrcImm64 | Mov, em_mov)),
4732
	/* 0xC0 - 0xC7 */
4733
	G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2),
4734 4735
	I(ImplicitOps | NearBranch | SrcImmU16, em_ret_near_imm),
	I(ImplicitOps | NearBranch, em_ret),
4736 4737
	I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg),
	I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg),
4738
	G(ByteOp, group11), G(0, group11),
4739
	/* 0xC8 - 0xCF */
A
Avi Kivity 已提交
4740
	I(Stack | SrcImmU16 | Src2ImmByte, em_enter), I(Stack, em_leave),
4741 4742
	I(ImplicitOps | SrcImmU16, em_ret_far_imm),
	I(ImplicitOps, em_ret_far),
4743
	D(ImplicitOps), DI(SrcImmByte, intn),
4744
	D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret),
4745
	/* 0xD0 - 0xD7 */
4746 4747
	G(Src2One | ByteOp, group2), G(Src2One, group2),
	G(Src2CL | ByteOp, group2), G(Src2CL, group2),
P
Paolo Bonzini 已提交
4748
	I(DstAcc | SrcImmUByte | No64, em_aam),
P
Paolo Bonzini 已提交
4749 4750
	I(DstAcc | SrcImmUByte | No64, em_aad),
	F(DstAcc | ByteOp | No64, em_salc),
P
Paolo Bonzini 已提交
4751
	I(DstAcc | SrcXLat | ByteOp, em_mov),
4752
	/* 0xD8 - 0xDF */
4753
	N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N,
4754
	/* 0xE0 - 0xE7 */
4755 4756
	X3(I(SrcImmByte | NearBranch, em_loop)),
	I(SrcImmByte | NearBranch, em_jcxz),
4757 4758
	I2bvIP(SrcImmUByte | DstAcc, em_in,  in,  check_perm_in),
	I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out),
4759
	/* 0xE8 - 0xEF */
4760 4761 4762
	I(SrcImm | NearBranch, em_call), D(SrcImm | ImplicitOps | NearBranch),
	I(SrcImmFAddr | No64, em_jmp_far),
	D(SrcImmByte | ImplicitOps | NearBranch),
4763 4764
	I2bvIP(SrcDX | DstAcc, em_in,  in,  check_perm_in),
	I2bvIP(SrcAcc | DstDX, em_out, out, check_perm_out),
4765
	/* 0xF0 - 0xF7 */
4766
	N, DI(ImplicitOps, icebp), N, N,
4767 4768
	DI(ImplicitOps | Priv, hlt), D(ImplicitOps),
	G(ByteOp, group3), G(0, group3),
4769
	/* 0xF8 - 0xFF */
4770 4771
	D(ImplicitOps), D(ImplicitOps),
	I(ImplicitOps, em_cli), I(ImplicitOps, em_sti),
4772 4773 4774
	D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5),
};

4775
static const struct opcode twobyte_table[256] = {
4776
	/* 0x00 - 0x0F */
4777
	G(0, group6), GD(0, &group7), N, N,
4778
	N, I(ImplicitOps | EmulateOnUD, em_syscall),
4779
	II(ImplicitOps | Priv, em_clts, clts), N,
4780
	DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
4781
	N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
4782
	/* 0x10 - 0x1F */
4783 4784 4785
	GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_10_0f_11),
	GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_10_0f_11),
	N, N, N, N, N, N,
4786 4787 4788 4789 4790 4791
	D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 4 * prefetch + 4 * reserved NOP */
	D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
	D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */
	D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */
	D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */
	D(ImplicitOps | ModRM | SrcMem | NoAccess), /* NOP + 7 * reserved NOP */
4792
	/* 0x20 - 0x2F */
4793
	DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_access),
4794 4795
	DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read),
	IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_cr_write, cr_write,
4796
						check_cr_access),
4797 4798
	IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_dr_write, dr_write,
						check_dr_write),
4799
	N, N, N, N,
4800 4801
	GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_28_0f_29),
	GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_28_0f_29),
4802
	N, GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_2b),
4803
	N, N, N, N,
4804
	/* 0x30 - 0x3F */
4805
	II(ImplicitOps | Priv, em_wrmsr, wrmsr),
4806
	IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc),
4807
	II(ImplicitOps | Priv, em_rdmsr, rdmsr),
4808
	IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc),
4809 4810
	I(ImplicitOps | EmulateOnUD, em_sysenter),
	I(ImplicitOps | Priv | EmulateOnUD, em_sysexit),
4811
	N, N,
4812 4813
	N, N, N, N, N, N, N, N,
	/* 0x40 - 0x4F */
4814
	X16(D(DstReg | SrcMem | ModRM)),
4815 4816 4817
	/* 0x50 - 0x5F */
	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
	/* 0x60 - 0x6F */
4818 4819 4820 4821
	N, N, N, N,
	N, N, N, N,
	N, N, N, N,
	N, N, N, GP(SrcMem | DstReg | ModRM | Mov, &pfx_0f_6f_0f_7f),
4822
	/* 0x70 - 0x7F */
4823 4824 4825 4826
	N, N, N, N,
	N, N, N, N,
	N, N, N, N,
	N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f),
4827
	/* 0x80 - 0x8F */
4828
	X16(D(SrcImm | NearBranch)),
4829
	/* 0x90 - 0x9F */
4830
	X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
4831
	/* 0xA0 - 0xA7 */
4832
	I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg),
4833 4834
	II(ImplicitOps, em_cpuid, cpuid),
	F(DstMem | SrcReg | ModRM | BitOp | NoWrite, em_bt),
4835 4836
	F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shld),
	F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N,
4837
	/* 0xA8 - 0xAF */
4838
	I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg),
4839
	II(EmulateOnUD | ImplicitOps, em_rsm, rsm),
4840
	F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
4841 4842
	F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
	F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
4843
	GD(0, &group15), F(DstReg | SrcMem | ModRM, em_imul),
4844
	/* 0xB0 - 0xB7 */
4845
	I2bv(DstMem | SrcReg | ModRM | Lock | PageTable | SrcWrite, em_cmpxchg),
4846
	I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg),
4847
	F(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr),
4848 4849
	I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg),
	I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg),
4850
	D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
4851 4852
	/* 0xB8 - 0xBF */
	N, N,
4853
	G(BitOp, group8),
4854
	F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc),
4855 4856
	I(DstReg | SrcMem | ModRM, em_bsf_c),
	I(DstReg | SrcMem | ModRM, em_bsr_c),
4857
	D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
A
Avi Kivity 已提交
4858
	/* 0xC0 - 0xC7 */
4859
	F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd),
4860
	N, ID(0, &instr_dual_0f_c3),
4861
	N, N, N, GD(0, &group9),
A
Avi Kivity 已提交
4862 4863
	/* 0xC8 - 0xCF */
	X8(I(DstReg, em_bswap)),
4864 4865 4866
	/* 0xD0 - 0xDF */
	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
	/* 0xE0 - 0xEF */
4867 4868
	N, N, N, N, N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_e7),
	N, N, N, N, N, N, N, N,
4869 4870 4871 4872
	/* 0xF0 - 0xFF */
	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N
};

4873 4874 4875 4876 4877 4878 4879 4880
static const struct instr_dual instr_dual_0f_38_f0 = {
	I(DstReg | SrcMem | Mov, em_movbe), N
};

static const struct instr_dual instr_dual_0f_38_f1 = {
	I(DstMem | SrcReg | Mov, em_movbe), N
};

4881
static const struct gprefix three_byte_0f_38_f0 = {
4882
	ID(0, &instr_dual_0f_38_f0), N, N, N
4883 4884 4885
};

static const struct gprefix three_byte_0f_38_f1 = {
4886
	ID(0, &instr_dual_0f_38_f1), N, N, N
4887 4888 4889 4890 4891 4892 4893 4894 4895
};

/*
 * Insns below are selected by the prefix which indexed by the third opcode
 * byte.
 */
static const struct opcode opcode_map_0f_38[256] = {
	/* 0x00 - 0x7f */
	X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
B
Borislav Petkov 已提交
4896 4897 4898
	/* 0x80 - 0xef */
	X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
	/* 0xf0 - 0xf1 */
4899 4900
	GP(EmulateOnUD | ModRM, &three_byte_0f_38_f0),
	GP(EmulateOnUD | ModRM, &three_byte_0f_38_f1),
B
Borislav Petkov 已提交
4901 4902
	/* 0xf2 - 0xff */
	N, N, X4(N), X8(N)
4903 4904
};

4905 4906 4907 4908 4909
#undef D
#undef N
#undef G
#undef GD
#undef I
4910
#undef GP
4911
#undef EXT
4912
#undef MD
N
Nadav Amit 已提交
4913
#undef ID
4914

4915
#undef D2bv
4916
#undef D2bvIP
4917
#undef I2bv
4918
#undef I2bvIP
4919
#undef I6ALU
4920

4921
static unsigned imm_size(struct x86_emulate_ctxt *ctxt)
4922 4923 4924
{
	unsigned size;

4925
	size = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937
	if (size == 8)
		size = 4;
	return size;
}

static int decode_imm(struct x86_emulate_ctxt *ctxt, struct operand *op,
		      unsigned size, bool sign_extension)
{
	int rc = X86EMUL_CONTINUE;

	op->type = OP_IMM;
	op->bytes = size;
4938
	op->addr.mem.ea = ctxt->_eip;
4939 4940 4941
	/* NB. Immediates are sign-extended as necessary. */
	switch (op->bytes) {
	case 1:
4942
		op->val = insn_fetch(s8, ctxt);
4943 4944
		break;
	case 2:
4945
		op->val = insn_fetch(s16, ctxt);
4946 4947
		break;
	case 4:
4948
		op->val = insn_fetch(s32, ctxt);
4949
		break;
4950 4951 4952
	case 8:
		op->val = insn_fetch(s64, ctxt);
		break;
4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970
	}
	if (!sign_extension) {
		switch (op->bytes) {
		case 1:
			op->val &= 0xff;
			break;
		case 2:
			op->val &= 0xffff;
			break;
		case 4:
			op->val &= 0xffffffff;
			break;
		}
	}
done:
	return rc;
}

4971 4972 4973 4974 4975 4976 4977
static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
			  unsigned d)
{
	int rc = X86EMUL_CONTINUE;

	switch (d) {
	case OpReg:
4978
		decode_register_operand(ctxt, op);
4979 4980
		break;
	case OpImmUByte:
4981
		rc = decode_imm(ctxt, op, 1, false);
4982 4983
		break;
	case OpMem:
4984
		ctxt->memop.bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4985 4986 4987
	mem_common:
		*op = ctxt->memop;
		ctxt->memopp = op;
4988
		if (ctxt->d & BitOp)
4989 4990 4991
			fetch_bit_operand(ctxt);
		op->orig_val = op->val;
		break;
4992
	case OpMem64:
4993
		ctxt->memop.bytes = (ctxt->op_bytes == 8) ? 16 : 8;
4994
		goto mem_common;
4995 4996 4997
	case OpAcc:
		op->type = OP_REG;
		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4998
		op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
4999 5000 5001
		fetch_register_operand(op);
		op->orig_val = op->val;
		break;
5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019
	case OpAccLo:
		op->type = OP_REG;
		op->bytes = (ctxt->d & ByteOp) ? 2 : ctxt->op_bytes;
		op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
		fetch_register_operand(op);
		op->orig_val = op->val;
		break;
	case OpAccHi:
		if (ctxt->d & ByteOp) {
			op->type = OP_NONE;
			break;
		}
		op->type = OP_REG;
		op->bytes = ctxt->op_bytes;
		op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
		fetch_register_operand(op);
		op->orig_val = op->val;
		break;
5020 5021 5022 5023
	case OpDI:
		op->type = OP_MEM;
		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
		op->addr.mem.ea =
5024
			register_address(ctxt, VCPU_REGS_RDI);
5025 5026
		op->addr.mem.seg = VCPU_SREG_ES;
		op->val = 0;
5027
		op->count = 1;
5028 5029 5030 5031
		break;
	case OpDX:
		op->type = OP_REG;
		op->bytes = 2;
5032
		op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
5033 5034
		fetch_register_operand(op);
		break;
5035
	case OpCL:
5036
		op->type = OP_IMM;
5037
		op->bytes = 1;
5038
		op->val = reg_read(ctxt, VCPU_REGS_RCX) & 0xff;
5039 5040 5041 5042 5043
		break;
	case OpImmByte:
		rc = decode_imm(ctxt, op, 1, true);
		break;
	case OpOne:
5044
		op->type = OP_IMM;
5045 5046 5047 5048 5049 5050
		op->bytes = 1;
		op->val = 1;
		break;
	case OpImm:
		rc = decode_imm(ctxt, op, imm_size(ctxt), true);
		break;
5051 5052 5053
	case OpImm64:
		rc = decode_imm(ctxt, op, ctxt->op_bytes, true);
		break;
5054 5055
	case OpMem8:
		ctxt->memop.bytes = 1;
5056
		if (ctxt->memop.type == OP_REG) {
5057 5058
			ctxt->memop.addr.reg = decode_register(ctxt,
					ctxt->modrm_rm, true);
5059 5060
			fetch_register_operand(&ctxt->memop);
		}
5061
		goto mem_common;
5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077
	case OpMem16:
		ctxt->memop.bytes = 2;
		goto mem_common;
	case OpMem32:
		ctxt->memop.bytes = 4;
		goto mem_common;
	case OpImmU16:
		rc = decode_imm(ctxt, op, 2, false);
		break;
	case OpImmU:
		rc = decode_imm(ctxt, op, imm_size(ctxt), false);
		break;
	case OpSI:
		op->type = OP_MEM;
		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
		op->addr.mem.ea =
5078
			register_address(ctxt, VCPU_REGS_RSI);
B
Bandan Das 已提交
5079
		op->addr.mem.seg = ctxt->seg_override;
5080
		op->val = 0;
5081
		op->count = 1;
5082
		break;
P
Paolo Bonzini 已提交
5083 5084 5085 5086
	case OpXLat:
		op->type = OP_MEM;
		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
		op->addr.mem.ea =
5087
			address_mask(ctxt,
P
Paolo Bonzini 已提交
5088 5089
				reg_read(ctxt, VCPU_REGS_RBX) +
				(reg_read(ctxt, VCPU_REGS_RAX) & 0xff));
B
Bandan Das 已提交
5090
		op->addr.mem.seg = ctxt->seg_override;
P
Paolo Bonzini 已提交
5091 5092
		op->val = 0;
		break;
5093 5094 5095 5096 5097 5098 5099 5100 5101
	case OpImmFAddr:
		op->type = OP_IMM;
		op->addr.mem.ea = ctxt->_eip;
		op->bytes = ctxt->op_bytes + 2;
		insn_fetch_arr(op->valptr, op->bytes, ctxt);
		break;
	case OpMemFAddr:
		ctxt->memop.bytes = ctxt->op_bytes + 2;
		goto mem_common;
5102
	case OpES:
5103
		op->type = OP_IMM;
5104 5105 5106
		op->val = VCPU_SREG_ES;
		break;
	case OpCS:
5107
		op->type = OP_IMM;
5108 5109 5110
		op->val = VCPU_SREG_CS;
		break;
	case OpSS:
5111
		op->type = OP_IMM;
5112 5113 5114
		op->val = VCPU_SREG_SS;
		break;
	case OpDS:
5115
		op->type = OP_IMM;
5116 5117 5118
		op->val = VCPU_SREG_DS;
		break;
	case OpFS:
5119
		op->type = OP_IMM;
5120 5121 5122
		op->val = VCPU_SREG_FS;
		break;
	case OpGS:
5123
		op->type = OP_IMM;
5124 5125
		op->val = VCPU_SREG_GS;
		break;
5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136
	case OpImplicit:
		/* Special instructions do their own operand decoding. */
	default:
		op->type = OP_NONE; /* Disable writeback. */
		break;
	}

done:
	return rc;
}

5137
int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
5138 5139 5140
{
	int rc = X86EMUL_CONTINUE;
	int mode = ctxt->mode;
5141
	int def_op_bytes, def_ad_bytes, goffset, simd_prefix;
5142
	bool op_prefix = false;
B
Bandan Das 已提交
5143
	bool has_seg_override = false;
5144
	struct opcode opcode;
5145 5146
	u16 dummy;
	struct desc_struct desc;
5147

5148 5149
	ctxt->memop.type = OP_NONE;
	ctxt->memopp = NULL;
5150
	ctxt->_eip = ctxt->eip;
5151 5152
	ctxt->fetch.ptr = ctxt->fetch.data;
	ctxt->fetch.end = ctxt->fetch.data + insn_len;
B
Borislav Petkov 已提交
5153
	ctxt->opcode_len = 1;
5154
	ctxt->intercept = x86_intercept_none;
5155
	if (insn_len > 0)
5156
		memcpy(ctxt->fetch.data, insn, insn_len);
5157
	else {
5158
		rc = __do_insn_fetch_bytes(ctxt, 1);
5159
		if (rc != X86EMUL_CONTINUE)
5160
			goto done;
5161
	}
5162 5163 5164 5165

	switch (mode) {
	case X86EMUL_MODE_REAL:
	case X86EMUL_MODE_VM86:
5166 5167 5168 5169 5170
		def_op_bytes = def_ad_bytes = 2;
		ctxt->ops->get_segment(ctxt, &dummy, &desc, NULL, VCPU_SREG_CS);
		if (desc.d)
			def_op_bytes = def_ad_bytes = 4;
		break;
5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183
	case X86EMUL_MODE_PROT16:
		def_op_bytes = def_ad_bytes = 2;
		break;
	case X86EMUL_MODE_PROT32:
		def_op_bytes = def_ad_bytes = 4;
		break;
#ifdef CONFIG_X86_64
	case X86EMUL_MODE_PROT64:
		def_op_bytes = 4;
		def_ad_bytes = 8;
		break;
#endif
	default:
5184
		return EMULATION_FAILED;
5185 5186
	}

5187 5188
	ctxt->op_bytes = def_op_bytes;
	ctxt->ad_bytes = def_ad_bytes;
5189 5190 5191

	/* Legacy prefixes. */
	for (;;) {
5192
		switch (ctxt->b = insn_fetch(u8, ctxt)) {
5193
		case 0x66:	/* operand-size override */
5194
			op_prefix = true;
5195
			/* switch between 2/4 bytes */
5196
			ctxt->op_bytes = def_op_bytes ^ 6;
5197 5198 5199 5200
			break;
		case 0x67:	/* address-size override */
			if (mode == X86EMUL_MODE_PROT64)
				/* switch between 4/8 bytes */
5201
				ctxt->ad_bytes = def_ad_bytes ^ 12;
5202 5203
			else
				/* switch between 2/4 bytes */
5204
				ctxt->ad_bytes = def_ad_bytes ^ 6;
5205 5206
			break;
		case 0x26:	/* ES override */
5207 5208 5209
			has_seg_override = true;
			ctxt->seg_override = VCPU_SREG_ES;
			break;
5210
		case 0x2e:	/* CS override */
5211 5212 5213
			has_seg_override = true;
			ctxt->seg_override = VCPU_SREG_CS;
			break;
5214
		case 0x36:	/* SS override */
5215 5216 5217
			has_seg_override = true;
			ctxt->seg_override = VCPU_SREG_SS;
			break;
5218
		case 0x3e:	/* DS override */
B
Bandan Das 已提交
5219
			has_seg_override = true;
5220
			ctxt->seg_override = VCPU_SREG_DS;
5221 5222
			break;
		case 0x64:	/* FS override */
5223 5224 5225
			has_seg_override = true;
			ctxt->seg_override = VCPU_SREG_FS;
			break;
5226
		case 0x65:	/* GS override */
B
Bandan Das 已提交
5227
			has_seg_override = true;
5228
			ctxt->seg_override = VCPU_SREG_GS;
5229 5230 5231 5232
			break;
		case 0x40 ... 0x4f: /* REX */
			if (mode != X86EMUL_MODE_PROT64)
				goto done_prefixes;
5233
			ctxt->rex_prefix = ctxt->b;
5234 5235
			continue;
		case 0xf0:	/* LOCK */
5236
			ctxt->lock_prefix = 1;
5237 5238 5239
			break;
		case 0xf2:	/* REPNE/REPNZ */
		case 0xf3:	/* REP/REPE/REPZ */
5240
			ctxt->rep_prefix = ctxt->b;
5241 5242 5243 5244 5245 5246 5247
			break;
		default:
			goto done_prefixes;
		}

		/* Any legacy prefix after a REX prefix nullifies its effect. */

5248
		ctxt->rex_prefix = 0;
5249 5250 5251 5252 5253
	}

done_prefixes:

	/* REX prefix. */
5254 5255
	if (ctxt->rex_prefix & 8)
		ctxt->op_bytes = 8;	/* REX.W */
5256 5257

	/* Opcode byte(s). */
5258
	opcode = opcode_table[ctxt->b];
5259
	/* Two-byte opcode? */
5260
	if (ctxt->b == 0x0f) {
B
Borislav Petkov 已提交
5261
		ctxt->opcode_len = 2;
5262
		ctxt->b = insn_fetch(u8, ctxt);
5263
		opcode = twobyte_table[ctxt->b];
5264 5265 5266 5267 5268 5269 5270

		/* 0F_38 opcode map */
		if (ctxt->b == 0x38) {
			ctxt->opcode_len = 3;
			ctxt->b = insn_fetch(u8, ctxt);
			opcode = opcode_map_0f_38[ctxt->b];
		}
5271
	}
5272
	ctxt->d = opcode.flags;
5273

5274 5275 5276
	if (ctxt->d & ModRM)
		ctxt->modrm = insn_fetch(u8, ctxt);

5277 5278
	/* vex-prefix instructions are not implemented */
	if (ctxt->opcode_len == 1 && (ctxt->b == 0xc5 || ctxt->b == 0xc4) &&
5279
	    (mode == X86EMUL_MODE_PROT64 || (ctxt->modrm & 0xc0) == 0xc0)) {
5280 5281 5282
		ctxt->d = NotImpl;
	}

5283 5284
	while (ctxt->d & GroupMask) {
		switch (ctxt->d & GroupMask) {
5285
		case Group:
5286
			goffset = (ctxt->modrm >> 3) & 7;
5287 5288 5289
			opcode = opcode.u.group[goffset];
			break;
		case GroupDual:
5290 5291
			goffset = (ctxt->modrm >> 3) & 7;
			if ((ctxt->modrm >> 6) == 3)
5292 5293 5294 5295 5296
				opcode = opcode.u.gdual->mod3[goffset];
			else
				opcode = opcode.u.gdual->mod012[goffset];
			break;
		case RMExt:
5297
			goffset = ctxt->modrm & 7;
5298
			opcode = opcode.u.group[goffset];
5299 5300
			break;
		case Prefix:
5301
			if (ctxt->rep_prefix && op_prefix)
5302
				return EMULATION_FAILED;
5303
			simd_prefix = op_prefix ? 0x66 : ctxt->rep_prefix;
5304 5305 5306 5307 5308 5309 5310
			switch (simd_prefix) {
			case 0x00: opcode = opcode.u.gprefix->pfx_no; break;
			case 0x66: opcode = opcode.u.gprefix->pfx_66; break;
			case 0xf2: opcode = opcode.u.gprefix->pfx_f2; break;
			case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break;
			}
			break;
5311
		case Escape:
5312 5313 5314 5315 5316 5317 5318
			if (ctxt->modrm > 0xbf) {
				size_t size = ARRAY_SIZE(opcode.u.esc->high);
				u32 index = array_index_nospec(
					ctxt->modrm - 0xc0, size);

				opcode = opcode.u.esc->high[index];
			} else {
5319
				opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7];
5320
			}
5321
			break;
5322 5323 5324 5325 5326 5327
		case InstrDual:
			if ((ctxt->modrm >> 6) == 3)
				opcode = opcode.u.idual->mod3;
			else
				opcode = opcode.u.idual->mod012;
			break;
5328 5329 5330 5331 5332 5333
		case ModeDual:
			if (ctxt->mode == X86EMUL_MODE_PROT64)
				opcode = opcode.u.mdual->mode64;
			else
				opcode = opcode.u.mdual->mode32;
			break;
5334
		default:
5335
			return EMULATION_FAILED;
5336
		}
5337

5338
		ctxt->d &= ~(u64)GroupMask;
5339
		ctxt->d |= opcode.flags;
5340 5341
	}

5342 5343 5344 5345
	/* Unrecognised? */
	if (ctxt->d == 0)
		return EMULATION_FAILED;

5346
	ctxt->execute = opcode.u.execute;
5347

5348 5349 5350
	if (unlikely(ctxt->ud) && likely(!(ctxt->d & EmulateOnUD)))
		return EMULATION_FAILED;

5351
	if (unlikely(ctxt->d &
5352 5353
	    (NotImpl|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm|NearBranch|
	     No16))) {
5354 5355 5356 5357 5358 5359
		/*
		 * These are copied unconditionally here, and checked unconditionally
		 * in x86_emulate_insn.
		 */
		ctxt->check_perm = opcode.check_perm;
		ctxt->intercept = opcode.intercept;
5360

5361 5362
		if (ctxt->d & NotImpl)
			return EMULATION_FAILED;
5363

5364 5365 5366 5367 5368 5369
		if (mode == X86EMUL_MODE_PROT64) {
			if (ctxt->op_bytes == 4 && (ctxt->d & Stack))
				ctxt->op_bytes = 8;
			else if (ctxt->d & NearBranch)
				ctxt->op_bytes = 8;
		}
5370

5371 5372 5373 5374 5375 5376 5377
		if (ctxt->d & Op3264) {
			if (mode == X86EMUL_MODE_PROT64)
				ctxt->op_bytes = 8;
			else
				ctxt->op_bytes = 4;
		}

5378 5379 5380
		if ((ctxt->d & No16) && ctxt->op_bytes == 2)
			ctxt->op_bytes = 4;

5381 5382 5383 5384 5385
		if (ctxt->d & Sse)
			ctxt->op_bytes = 16;
		else if (ctxt->d & Mmx)
			ctxt->op_bytes = 8;
	}
A
Avi Kivity 已提交
5386

5387
	/* ModRM and SIB bytes. */
5388
	if (ctxt->d & ModRM) {
5389
		rc = decode_modrm(ctxt, &ctxt->memop);
B
Bandan Das 已提交
5390 5391 5392 5393
		if (!has_seg_override) {
			has_seg_override = true;
			ctxt->seg_override = ctxt->modrm_seg;
		}
5394
	} else if (ctxt->d & MemAbs)
5395
		rc = decode_abs(ctxt, &ctxt->memop);
5396 5397 5398
	if (rc != X86EMUL_CONTINUE)
		goto done;

B
Bandan Das 已提交
5399 5400
	if (!has_seg_override)
		ctxt->seg_override = VCPU_SREG_DS;
5401

B
Bandan Das 已提交
5402
	ctxt->memop.addr.mem.seg = ctxt->seg_override;
5403 5404 5405 5406 5407

	/*
	 * Decode and fetch the source operand: register, memory
	 * or immediate.
	 */
5408
	rc = decode_operand(ctxt, &ctxt->src, (ctxt->d >> SrcShift) & OpMask);
5409 5410 5411
	if (rc != X86EMUL_CONTINUE)
		goto done;

5412 5413 5414 5415
	/*
	 * Decode and fetch the second source operand: register, memory
	 * or immediate.
	 */
5416
	rc = decode_operand(ctxt, &ctxt->src2, (ctxt->d >> Src2Shift) & OpMask);
5417 5418 5419
	if (rc != X86EMUL_CONTINUE)
		goto done;

5420
	/* Decode and fetch the destination operand: register or memory. */
5421
	rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
5422

5423
	if (ctxt->rip_relative && likely(ctxt->memopp))
5424 5425
		ctxt->memopp->addr.mem.ea = address_mask(ctxt,
					ctxt->memopp->addr.mem.ea + ctxt->_eip);
5426

5427
done:
5428 5429
	if (rc == X86EMUL_PROPAGATE_FAULT)
		ctxt->have_exception = true;
5430
	return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
5431 5432
}

5433 5434 5435 5436 5437
bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt)
{
	return ctxt->d & PageTable;
}

5438 5439 5440 5441 5442 5443 5444 5445 5446
static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
{
	/* The second termination condition only applies for REPE
	 * and REPNE. Test if the repeat string operation prefix is
	 * REPE/REPZ or REPNE/REPNZ and if it's the case it tests the
	 * corresponding termination condition according to:
	 * 	- if REPE/REPZ and ZF = 0 then done
	 * 	- if REPNE/REPNZ and ZF = 1 then done
	 */
5447 5448 5449
	if (((ctxt->b == 0xa6) || (ctxt->b == 0xa7) ||
	     (ctxt->b == 0xae) || (ctxt->b == 0xaf))
	    && (((ctxt->rep_prefix == REPE_PREFIX) &&
5450
		 ((ctxt->eflags & X86_EFLAGS_ZF) == 0))
5451
		|| ((ctxt->rep_prefix == REPNE_PREFIX) &&
5452
		    ((ctxt->eflags & X86_EFLAGS_ZF) == X86_EFLAGS_ZF))))
5453 5454 5455 5456 5457
		return true;

	return false;
}

A
Avi Kivity 已提交
5458 5459
static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt)
{
R
Radim Krčmář 已提交
5460
	int rc;
A
Avi Kivity 已提交
5461

5462
	emulator_get_fpu();
R
Radim Krčmář 已提交
5463
	rc = asm_safe("fwait");
5464
	emulator_put_fpu();
A
Avi Kivity 已提交
5465

R
Radim Krčmář 已提交
5466
	if (unlikely(rc != X86EMUL_CONTINUE))
A
Avi Kivity 已提交
5467 5468 5469 5470 5471
		return emulate_exception(ctxt, MF_VECTOR, 0, false);

	return X86EMUL_CONTINUE;
}

5472
static void fetch_possible_mmx_operand(struct operand *op)
A
Avi Kivity 已提交
5473 5474
{
	if (op->type == OP_MM)
5475
		read_mmx_reg(&op->mm_val, op->addr.mm);
A
Avi Kivity 已提交
5476 5477
}

5478
static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop)
5479 5480
{
	ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
5481

5482 5483
	if (!(ctxt->d & ByteOp))
		fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
5484

5485
	asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n"
5486
	    : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
5487
	      [thunk_target]"+S"(fop), ASM_CALL_CONSTRAINT
5488
	    : "c"(ctxt->src2.val));
5489

5490
	ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
5491 5492
	if (!fop) /* exception is returned in fop variable */
		return emulate_de(ctxt);
5493 5494
	return X86EMUL_CONTINUE;
}
5495

5496 5497
void init_decode_cache(struct x86_emulate_ctxt *ctxt)
{
B
Bandan Das 已提交
5498 5499
	memset(&ctxt->rip_relative, 0,
	       (void *)&ctxt->modrm - (void *)&ctxt->rip_relative);
5500 5501 5502 5503 5504 5505

	ctxt->io_read.pos = 0;
	ctxt->io_read.end = 0;
	ctxt->mem_read.end = 0;
}

5506
int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
5507
{
5508
	const struct x86_emulate_ops *ops = ctxt->ops;
5509
	int rc = X86EMUL_CONTINUE;
5510
	int saved_dst_type = ctxt->dst.type;
5511
	unsigned emul_flags;
5512

5513
	ctxt->mem_read.pos = 0;
5514

5515 5516
	/* LOCK prefix is allowed only with some instructions */
	if (ctxt->lock_prefix && (!(ctxt->d & Lock) || ctxt->dst.type != OP_MEM)) {
5517
		rc = emulate_ud(ctxt);
5518 5519 5520
		goto done;
	}

5521
	if ((ctxt->d & SrcMask) == SrcMemFAddr && ctxt->src.type != OP_MEM) {
5522
		rc = emulate_ud(ctxt);
5523 5524 5525
		goto done;
	}

5526
	emul_flags = ctxt->ops->get_hflags(ctxt);
5527 5528 5529 5530 5531 5532 5533
	if (unlikely(ctxt->d &
		     (No64|Undefined|Sse|Mmx|Intercept|CheckPerm|Priv|Prot|String))) {
		if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) ||
				(ctxt->d & Undefined)) {
			rc = emulate_ud(ctxt);
			goto done;
		}
A
Avi Kivity 已提交
5534

5535 5536 5537
		if (((ctxt->d & (Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM)))
		    || ((ctxt->d & Sse) && !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) {
			rc = emulate_ud(ctxt);
A
Avi Kivity 已提交
5538
			goto done;
5539
		}
A
Avi Kivity 已提交
5540

5541 5542
		if ((ctxt->d & (Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) {
			rc = emulate_nm(ctxt);
5543
			goto done;
5544
		}
5545

5546 5547 5548 5549 5550 5551 5552 5553
		if (ctxt->d & Mmx) {
			rc = flush_pending_x87_faults(ctxt);
			if (rc != X86EMUL_CONTINUE)
				goto done;
			/*
			 * Now that we know the fpu is exception safe, we can fetch
			 * operands from it.
			 */
5554 5555
			fetch_possible_mmx_operand(&ctxt->src);
			fetch_possible_mmx_operand(&ctxt->src2);
5556
			if (!(ctxt->d & Mov))
5557
				fetch_possible_mmx_operand(&ctxt->dst);
5558
		}
5559

5560
		if (unlikely(emul_flags & X86EMUL_GUEST_MASK) && ctxt->intercept) {
5561 5562 5563 5564 5565
			rc = emulator_check_intercept(ctxt, ctxt->intercept,
						      X86_ICPT_PRE_EXCEPT);
			if (rc != X86EMUL_CONTINUE)
				goto done;
		}
5566

5567 5568 5569 5570 5571 5572
		/* Instruction can only be executed in protected mode */
		if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
			rc = emulate_ud(ctxt);
			goto done;
		}

5573 5574
		/* Privileged instruction can be executed only in CPL=0 */
		if ((ctxt->d & Priv) && ops->cpl(ctxt)) {
5575 5576 5577 5578
			if (ctxt->d & PrivUD)
				rc = emulate_ud(ctxt);
			else
				rc = emulate_gp(ctxt, 0);
5579
			goto done;
5580
		}
5581

5582
		/* Do instruction specific permission checks */
5583
		if (ctxt->d & CheckPerm) {
5584 5585 5586 5587 5588
			rc = ctxt->check_perm(ctxt);
			if (rc != X86EMUL_CONTINUE)
				goto done;
		}

5589
		if (unlikely(emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) {
5590 5591 5592 5593 5594 5595 5596 5597 5598
			rc = emulator_check_intercept(ctxt, ctxt->intercept,
						      X86_ICPT_POST_EXCEPT);
			if (rc != X86EMUL_CONTINUE)
				goto done;
		}

		if (ctxt->rep_prefix && (ctxt->d & String)) {
			/* All REP prefixes have the same first termination condition */
			if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) {
5599
				string_registers_quirk(ctxt);
5600
				ctxt->eip = ctxt->_eip;
5601
				ctxt->eflags &= ~X86_EFLAGS_RF;
5602 5603
				goto done;
			}
5604 5605 5606
		}
	}

5607 5608 5609
	if ((ctxt->src.type == OP_MEM) && !(ctxt->d & NoAccess)) {
		rc = segmented_read(ctxt, ctxt->src.addr.mem,
				    ctxt->src.valptr, ctxt->src.bytes);
5610
		if (rc != X86EMUL_CONTINUE)
5611
			goto done;
5612
		ctxt->src.orig_val64 = ctxt->src.val64;
5613 5614
	}

5615 5616 5617
	if (ctxt->src2.type == OP_MEM) {
		rc = segmented_read(ctxt, ctxt->src2.addr.mem,
				    &ctxt->src2.val, ctxt->src2.bytes);
5618 5619 5620 5621
		if (rc != X86EMUL_CONTINUE)
			goto done;
	}

5622
	if ((ctxt->d & DstMask) == ImplicitOps)
5623 5624 5625
		goto special_insn;


5626
	if ((ctxt->dst.type == OP_MEM) && !(ctxt->d & Mov)) {
5627
		/* optimisation - avoid slow emulated read if Mov */
5628 5629
		rc = segmented_read(ctxt, ctxt->dst.addr.mem,
				   &ctxt->dst.val, ctxt->dst.bytes);
5630
		if (rc != X86EMUL_CONTINUE) {
5631 5632
			if (!(ctxt->d & NoWrite) &&
			    rc == X86EMUL_PROPAGATE_FAULT &&
5633 5634
			    ctxt->exception.vector == PF_VECTOR)
				ctxt->exception.error_code |= PFERR_WRITE_MASK;
5635
			goto done;
5636
		}
5637
	}
5638 5639
	/* Copy full 64-bit value for CMPXCHG8B.  */
	ctxt->dst.orig_val64 = ctxt->dst.val64;
5640

5641 5642
special_insn:

5643
	if (unlikely(emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) {
5644
		rc = emulator_check_intercept(ctxt, ctxt->intercept,
5645
					      X86_ICPT_POST_MEMACCESS);
5646 5647 5648 5649
		if (rc != X86EMUL_CONTINUE)
			goto done;
	}

5650
	if (ctxt->rep_prefix && (ctxt->d & String))
5651
		ctxt->eflags |= X86_EFLAGS_RF;
5652
	else
5653
		ctxt->eflags &= ~X86_EFLAGS_RF;
5654

5655
	if (ctxt->execute) {
5656
		if (ctxt->d & Fastop)
5657
			rc = fastop(ctxt, ctxt->fop);
5658
		else
5659
			rc = ctxt->execute(ctxt);
5660 5661 5662 5663 5664
		if (rc != X86EMUL_CONTINUE)
			goto done;
		goto writeback;
	}

B
Borislav Petkov 已提交
5665
	if (ctxt->opcode_len == 2)
A
Avi Kivity 已提交
5666
		goto twobyte_insn;
5667 5668
	else if (ctxt->opcode_len == 3)
		goto threebyte_insn;
A
Avi Kivity 已提交
5669

5670
	switch (ctxt->b) {
5671
	case 0x70 ... 0x7f: /* jcc (short) */
5672
		if (test_cc(ctxt->b, ctxt->eflags))
5673
			rc = jmp_rel(ctxt, ctxt->src.val);
5674
		break;
N
Nitin A Kamble 已提交
5675
	case 0x8d: /* lea r16/r32, m */
5676
		ctxt->dst.val = ctxt->src.addr.mem.ea;
N
Nitin A Kamble 已提交
5677
		break;
5678
	case 0x90 ... 0x97: /* nop / xchg reg, rax */
5679
		if (ctxt->dst.addr.reg == reg_rmw(ctxt, VCPU_REGS_RAX))
5680 5681 5682
			ctxt->dst.type = OP_NONE;
		else
			rc = em_xchg(ctxt);
5683
		break;
5684
	case 0x98: /* cbw/cwde/cdqe */
5685 5686 5687 5688
		switch (ctxt->op_bytes) {
		case 2: ctxt->dst.val = (s8)ctxt->dst.val; break;
		case 4: ctxt->dst.val = (s16)ctxt->dst.val; break;
		case 8: ctxt->dst.val = (s32)ctxt->dst.val; break;
5689 5690
		}
		break;
5691
	case 0xcc:		/* int3 */
5692 5693
		rc = emulate_int(ctxt, 3);
		break;
5694
	case 0xcd:		/* int n */
5695
		rc = emulate_int(ctxt, ctxt->src.val);
5696 5697
		break;
	case 0xce:		/* into */
5698
		if (ctxt->eflags & X86_EFLAGS_OF)
5699
			rc = emulate_int(ctxt, 4);
5700
		break;
5701
	case 0xe9: /* jmp rel */
5702
	case 0xeb: /* jmp rel short */
5703
		rc = jmp_rel(ctxt, ctxt->src.val);
5704
		ctxt->dst.type = OP_NONE; /* Disable writeback. */
5705
		break;
5706
	case 0xf4:              /* hlt */
5707
		ctxt->ops->halt(ctxt);
5708
		break;
5709 5710
	case 0xf5:	/* cmc */
		/* complement carry flag from eflags reg */
5711
		ctxt->eflags ^= X86_EFLAGS_CF;
5712 5713
		break;
	case 0xf8: /* clc */
5714
		ctxt->eflags &= ~X86_EFLAGS_CF;
5715
		break;
5716
	case 0xf9: /* stc */
5717
		ctxt->eflags |= X86_EFLAGS_CF;
5718
		break;
5719
	case 0xfc: /* cld */
5720
		ctxt->eflags &= ~X86_EFLAGS_DF;
5721 5722
		break;
	case 0xfd: /* std */
5723
		ctxt->eflags |= X86_EFLAGS_DF;
5724
		break;
5725 5726
	default:
		goto cannot_emulate;
A
Avi Kivity 已提交
5727
	}
5728

5729 5730 5731
	if (rc != X86EMUL_CONTINUE)
		goto done;

5732
writeback:
5733 5734 5735 5736 5737 5738
	if (ctxt->d & SrcWrite) {
		BUG_ON(ctxt->src.type == OP_MEM || ctxt->src.type == OP_MEM_STR);
		rc = writeback(ctxt, &ctxt->src);
		if (rc != X86EMUL_CONTINUE)
			goto done;
	}
5739 5740 5741 5742 5743
	if (!(ctxt->d & NoWrite)) {
		rc = writeback(ctxt, &ctxt->dst);
		if (rc != X86EMUL_CONTINUE)
			goto done;
	}
5744

5745 5746 5747 5748
	/*
	 * restore dst type in case the decoding will be reused
	 * (happens for string instruction )
	 */
5749
	ctxt->dst.type = saved_dst_type;
5750

5751
	if ((ctxt->d & SrcMask) == SrcSI)
5752
		string_addr_inc(ctxt, VCPU_REGS_RSI, &ctxt->src);
5753

5754
	if ((ctxt->d & DstMask) == DstDI)
5755
		string_addr_inc(ctxt, VCPU_REGS_RDI, &ctxt->dst);
5756

5757
	if (ctxt->rep_prefix && (ctxt->d & String)) {
5758
		unsigned int count;
5759
		struct read_cache *r = &ctxt->io_read;
5760 5761 5762 5763
		if ((ctxt->d & SrcMask) == SrcSI)
			count = ctxt->src.count;
		else
			count = ctxt->dst.count;
5764
		register_address_increment(ctxt, VCPU_REGS_RCX, -count);
5765

5766 5767 5768 5769 5770
		if (!string_insn_completed(ctxt)) {
			/*
			 * Re-enter guest when pio read ahead buffer is empty
			 * or, if it is not used, after each 1024 iteration.
			 */
5771
			if ((r->end != 0 || reg_read(ctxt, VCPU_REGS_RCX) & 0x3ff) &&
5772 5773 5774 5775 5776 5777
			    (r->end == 0 || r->end != r->pos)) {
				/*
				 * Reset read cache. Usually happens before
				 * decode, but since instruction is restarted
				 * we have to do it here.
				 */
5778
				ctxt->mem_read.end = 0;
5779
				writeback_registers(ctxt);
5780 5781 5782
				return EMULATION_RESTART;
			}
			goto done; /* skip rip writeback */
5783
		}
5784
		ctxt->eflags &= ~X86_EFLAGS_RF;
5785
	}
5786

5787
	ctxt->eip = ctxt->_eip;
5788 5789
	if (ctxt->mode != X86EMUL_MODE_PROT64)
		ctxt->eip = (u32)ctxt->_eip;
5790 5791

done:
5792 5793
	if (rc == X86EMUL_PROPAGATE_FAULT) {
		WARN_ON(ctxt->exception.vector > 0x1f);
5794
		ctxt->have_exception = true;
5795
	}
5796 5797 5798
	if (rc == X86EMUL_INTERCEPTED)
		return EMULATION_INTERCEPTED;

5799 5800 5801
	if (rc == X86EMUL_CONTINUE)
		writeback_registers(ctxt);

5802
	return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
A
Avi Kivity 已提交
5803 5804

twobyte_insn:
5805
	switch (ctxt->b) {
5806
	case 0x09:		/* wbinvd */
5807
		(ctxt->ops->wbinvd)(ctxt);
5808 5809
		break;
	case 0x08:		/* invd */
5810 5811
	case 0x0d:		/* GrpP (prefetch) */
	case 0x18:		/* Grp16 (prefetch/nop) */
P
Paolo Bonzini 已提交
5812
	case 0x1f:		/* nop */
5813 5814
		break;
	case 0x20: /* mov cr, reg */
5815
		ctxt->dst.val = ops->get_cr(ctxt, ctxt->modrm_reg);
5816
		break;
A
Avi Kivity 已提交
5817
	case 0x21: /* mov from dr to reg */
5818
		ops->get_dr(ctxt, ctxt->modrm_reg, &ctxt->dst.val);
A
Avi Kivity 已提交
5819 5820
		break;
	case 0x40 ... 0x4f:	/* cmov */
5821 5822
		if (test_cc(ctxt->b, ctxt->eflags))
			ctxt->dst.val = ctxt->src.val;
5823
		else if (ctxt->op_bytes != 4)
5824
			ctxt->dst.type = OP_NONE; /* no writeback */
A
Avi Kivity 已提交
5825
		break;
5826
	case 0x80 ... 0x8f: /* jnz rel, etc*/
5827
		if (test_cc(ctxt->b, ctxt->eflags))
5828
			rc = jmp_rel(ctxt, ctxt->src.val);
5829
		break;
5830
	case 0x90 ... 0x9f:     /* setcc r/m8 */
5831
		ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);
5832
		break;
A
Avi Kivity 已提交
5833
	case 0xb6 ... 0xb7:	/* movzx */
5834
		ctxt->dst.bytes = ctxt->op_bytes;
5835
		ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val
5836
						       : (u16) ctxt->src.val;
A
Avi Kivity 已提交
5837 5838
		break;
	case 0xbe ... 0xbf:	/* movsx */
5839
		ctxt->dst.bytes = ctxt->op_bytes;
5840
		ctxt->dst.val = (ctxt->src.bytes == 1) ? (s8) ctxt->src.val :
5841
							(s16) ctxt->src.val;
A
Avi Kivity 已提交
5842
		break;
5843 5844
	default:
		goto cannot_emulate;
A
Avi Kivity 已提交
5845
	}
5846

5847 5848
threebyte_insn:

5849 5850 5851
	if (rc != X86EMUL_CONTINUE)
		goto done;

A
Avi Kivity 已提交
5852 5853 5854
	goto writeback;

cannot_emulate:
5855
	return EMULATION_FAILED;
A
Avi Kivity 已提交
5856
}
5857 5858 5859 5860 5861 5862 5863 5864 5865 5866

void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt)
{
	invalidate_registers(ctxt);
}

void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt)
{
	writeback_registers(ctxt);
}
5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877

bool emulator_can_use_gpa(struct x86_emulate_ctxt *ctxt)
{
	if (ctxt->rep_prefix && (ctxt->d & String))
		return false;

	if (ctxt->d & TwoMemOp)
		return false;

	return true;
}