emulate.c 107.3 KB
Newer Older
A
Avi Kivity 已提交
1
/******************************************************************************
2
 * emulate.c
A
Avi Kivity 已提交
3 4 5 6 7 8
 *
 * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
 *
 * Copyright (c) 2005 Keir Fraser
 *
 * Linux coding style, mod r/m decoder, segment base fixes, real-mode
9
 * privileged instructions:
A
Avi Kivity 已提交
10 11
 *
 * Copyright (C) 2006 Qumranet
N
Nicolas Kaiser 已提交
12
 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
A
Avi Kivity 已提交
13 14 15 16 17 18 19 20 21 22
 *
 *   Avi Kivity <avi@qumranet.com>
 *   Yaniv Kamay <yaniv@qumranet.com>
 *
 * This work is licensed under the terms of the GNU GPL, version 2.  See
 * the COPYING file in the top-level directory.
 *
 * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
 */

23
#include <linux/kvm_host.h>
24
#include "kvm_cache_regs.h"
A
Avi Kivity 已提交
25
#include <linux/module.h>
26
#include <asm/kvm_emulate.h>
A
Avi Kivity 已提交
27

28
#include "x86.h"
29
#include "tss.h"
30

31 32 33
/*
 * Operand types
 */
34 35 36 37 38 39 40 41 42
#define OpNone             0ull
#define OpImplicit         1ull  /* No generic decode */
#define OpReg              2ull  /* Register */
#define OpMem              3ull  /* Memory */
#define OpAcc              4ull  /* Accumulator: AL/AX/EAX/RAX */
#define OpDI               5ull  /* ES:DI/EDI/RDI */
#define OpMem64            6ull  /* Memory, 64-bit */
#define OpImmUByte         7ull  /* Zero-extended 8-bit immediate */
#define OpDX               8ull  /* DX register */
43 44 45 46
#define OpCL               9ull  /* CL register (for shifts) */
#define OpImmByte         10ull  /* 8-bit sign extended immediate */
#define OpOne             11ull  /* Implied 1 */
#define OpImm             12ull  /* Sign extended immediate */
47 48 49 50 51 52 53
#define OpMem16           13ull  /* Memory operand (16-bit). */
#define OpMem32           14ull  /* Memory operand (32-bit). */
#define OpImmU            15ull  /* Immediate operand, zero extended */
#define OpSI              16ull  /* SI/ESI/RSI */
#define OpImmFAddr        17ull  /* Immediate far address */
#define OpMemFAddr        18ull  /* Far address in memory */
#define OpImmU16          19ull  /* Immediate operand, 16 bits, zero extended */
54 55 56 57 58 59
#define OpES              20ull  /* ES */
#define OpCS              21ull  /* CS */
#define OpSS              22ull  /* SS */
#define OpDS              23ull  /* DS */
#define OpFS              24ull  /* FS */
#define OpGS              25ull  /* GS */
60 61

#define OpBits             5  /* Width of operand field */
62
#define OpMask             ((1ull << OpBits) - 1)
63

A
Avi Kivity 已提交
64 65 66 67 68 69 70 71 72 73
/*
 * Opcode effective-address decode tables.
 * Note that we only emulate instructions that have at least one memory
 * operand (excluding implicit stack references). We assume that stack
 * references and instruction fetches will never occur in special memory
 * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
 * not be handled.
 */

/* Operand sizes: 8-bit operands or specified/overridden size. */
74
#define ByteOp      (1<<0)	/* 8-bit operands. */
A
Avi Kivity 已提交
75
/* Destination operand type. */
76 77 78 79 80 81 82 83 84 85
#define DstShift    1
#define ImplicitOps (OpImplicit << DstShift)
#define DstReg      (OpReg << DstShift)
#define DstMem      (OpMem << DstShift)
#define DstAcc      (OpAcc << DstShift)
#define DstDI       (OpDI << DstShift)
#define DstMem64    (OpMem64 << DstShift)
#define DstImmUByte (OpImmUByte << DstShift)
#define DstDX       (OpDX << DstShift)
#define DstMask     (OpMask << DstShift)
A
Avi Kivity 已提交
86
/* Source operand type. */
87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104
#define SrcShift    6
#define SrcNone     (OpNone << SrcShift)
#define SrcReg      (OpReg << SrcShift)
#define SrcMem      (OpMem << SrcShift)
#define SrcMem16    (OpMem16 << SrcShift)
#define SrcMem32    (OpMem32 << SrcShift)
#define SrcImm      (OpImm << SrcShift)
#define SrcImmByte  (OpImmByte << SrcShift)
#define SrcOne      (OpOne << SrcShift)
#define SrcImmUByte (OpImmUByte << SrcShift)
#define SrcImmU     (OpImmU << SrcShift)
#define SrcSI       (OpSI << SrcShift)
#define SrcImmFAddr (OpImmFAddr << SrcShift)
#define SrcMemFAddr (OpMemFAddr << SrcShift)
#define SrcAcc      (OpAcc << SrcShift)
#define SrcImmU16   (OpImmU16 << SrcShift)
#define SrcDX       (OpDX << SrcShift)
#define SrcMask     (OpMask << SrcShift)
105 106 107 108 109 110 111 112 113 114
#define BitOp       (1<<11)
#define MemAbs      (1<<12)      /* Memory operand is absolute displacement */
#define String      (1<<13)     /* String instruction (rep capable) */
#define Stack       (1<<14)     /* Stack instruction (push/pop) */
#define GroupMask   (7<<15)     /* Opcode uses one of the group mechanisms */
#define Group       (1<<15)     /* Bits 3:5 of modrm byte extend opcode */
#define GroupDual   (2<<15)     /* Alternate decoding of mod == 3 */
#define Prefix      (3<<15)     /* Instruction varies with 66/f2/f3 prefix */
#define RMExt       (4<<15)     /* Opcode extension in ModRM r/m if mod == 3 */
#define Sse         (1<<18)     /* SSE Vector instruction */
115 116 117 118
/* Generic ModRM decode. */
#define ModRM       (1<<19)
/* Destination is only written; never read. */
#define Mov         (1<<20)
119
/* Misc flags */
120
#define Prot        (1<<21) /* instruction generates #UD if not in prot-mode */
121
#define VendorSpecific (1<<22) /* Vendor specific instruction */
122
#define NoAccess    (1<<23) /* Don't access memory (lea/invlpg/verr etc) */
123
#define Op3264      (1<<24) /* Operand is 64b in long mode, 32b otherwise */
124
#define Undefined   (1<<25) /* No Such Instruction */
125
#define Lock        (1<<26) /* lock prefix is allowed for the instruction */
126
#define Priv        (1<<27) /* instruction generates #GP if current CPL != 0 */
127
#define No64	    (1<<28)
128
/* Source 2 operand type */
129 130 131 132 133 134
#define Src2Shift   (29)
#define Src2None    (OpNone << Src2Shift)
#define Src2CL      (OpCL << Src2Shift)
#define Src2ImmByte (OpImmByte << Src2Shift)
#define Src2One     (OpOne << Src2Shift)
#define Src2Imm     (OpImm << Src2Shift)
135 136 137 138 139 140
#define Src2ES      (OpES << Src2Shift)
#define Src2CS      (OpCS << Src2Shift)
#define Src2SS      (OpSS << Src2Shift)
#define Src2DS      (OpDS << Src2Shift)
#define Src2FS      (OpFS << Src2Shift)
#define Src2GS      (OpGS << Src2Shift)
141
#define Src2Mask    (OpMask << Src2Shift)
A
Avi Kivity 已提交
142

143 144 145 146 147 148 149 150
#define X2(x...) x, x
#define X3(x...) X2(x), x
#define X4(x...) X2(x), X2(x)
#define X5(x...) X4(x), x
#define X6(x...) X4(x), X2(x)
#define X7(x...) X4(x), X3(x)
#define X8(x...) X4(x), X4(x)
#define X16(x...) X8(x), X8(x)
151

152
struct opcode {
153 154
	u64 flags : 56;
	u64 intercept : 8;
155
	union {
156
		int (*execute)(struct x86_emulate_ctxt *ctxt);
157 158
		struct opcode *group;
		struct group_dual *gdual;
159
		struct gprefix *gprefix;
160
	} u;
161
	int (*check_perm)(struct x86_emulate_ctxt *ctxt);
162 163 164 165 166
};

struct group_dual {
	struct opcode mod012[8];
	struct opcode mod3[8];
167 168
};

169 170 171 172 173 174 175
struct gprefix {
	struct opcode pfx_no;
	struct opcode pfx_66;
	struct opcode pfx_f2;
	struct opcode pfx_f3;
};

A
Avi Kivity 已提交
176
/* EFLAGS bit definitions. */
177 178 179 180
#define EFLG_ID (1<<21)
#define EFLG_VIP (1<<20)
#define EFLG_VIF (1<<19)
#define EFLG_AC (1<<18)
181 182
#define EFLG_VM (1<<17)
#define EFLG_RF (1<<16)
183 184
#define EFLG_IOPL (3<<12)
#define EFLG_NT (1<<14)
A
Avi Kivity 已提交
185 186
#define EFLG_OF (1<<11)
#define EFLG_DF (1<<10)
187
#define EFLG_IF (1<<9)
188
#define EFLG_TF (1<<8)
A
Avi Kivity 已提交
189 190 191 192 193 194
#define EFLG_SF (1<<7)
#define EFLG_ZF (1<<6)
#define EFLG_AF (1<<4)
#define EFLG_PF (1<<2)
#define EFLG_CF (1<<0)

195 196 197
#define EFLG_RESERVED_ZEROS_MASK 0xffc0802a
#define EFLG_RESERVED_ONE_MASK 2

A
Avi Kivity 已提交
198 199 200 201 202 203 204
/*
 * Instruction emulation:
 * Most instructions are emulated directly via a fragment of inline assembly
 * code. This allows us to save/restore EFLAGS and thus very easily pick up
 * any modified flags.
 */

205
#if defined(CONFIG_X86_64)
A
Avi Kivity 已提交
206 207 208 209 210 211 212 213 214 215 216 217 218 219
#define _LO32 "k"		/* force 32-bit operand */
#define _STK  "%%rsp"		/* stack pointer */
#elif defined(__i386__)
#define _LO32 ""		/* force 32-bit operand */
#define _STK  "%%esp"		/* stack pointer */
#endif

/*
 * These EFLAGS bits are restored from saved value during emulation, and
 * any changes are written back to the saved value after emulation.
 */
#define EFLAGS_MASK (EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_AF|EFLG_PF|EFLG_CF)

/* Before executing instruction: restore necessary bits in EFLAGS. */
220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
#define _PRE_EFLAGS(_sav, _msk, _tmp)					\
	/* EFLAGS = (_sav & _msk) | (EFLAGS & ~_msk); _sav &= ~_msk; */ \
	"movl %"_sav",%"_LO32 _tmp"; "                                  \
	"push %"_tmp"; "                                                \
	"push %"_tmp"; "                                                \
	"movl %"_msk",%"_LO32 _tmp"; "                                  \
	"andl %"_LO32 _tmp",("_STK"); "                                 \
	"pushf; "                                                       \
	"notl %"_LO32 _tmp"; "                                          \
	"andl %"_LO32 _tmp",("_STK"); "                                 \
	"andl %"_LO32 _tmp","__stringify(BITS_PER_LONG/4)"("_STK"); "	\
	"pop  %"_tmp"; "                                                \
	"orl  %"_LO32 _tmp",("_STK"); "                                 \
	"popf; "                                                        \
	"pop  %"_sav"; "
A
Avi Kivity 已提交
235 236 237 238 239 240 241 242 243

/* After executing instruction: write-back necessary bits in EFLAGS. */
#define _POST_EFLAGS(_sav, _msk, _tmp) \
	/* _sav |= EFLAGS & _msk; */		\
	"pushf; "				\
	"pop  %"_tmp"; "			\
	"andl %"_msk",%"_LO32 _tmp"; "		\
	"orl  %"_LO32 _tmp",%"_sav"; "

244 245 246 247 248 249
#ifdef CONFIG_X86_64
#define ON64(x) x
#else
#define ON64(x)
#endif

250
#define ____emulate_2op(ctxt, _op, _x, _y, _suffix, _dsttype)	\
251 252 253 254 255
	do {								\
		__asm__ __volatile__ (					\
			_PRE_EFLAGS("0", "4", "2")			\
			_op _suffix " %"_x"3,%1; "			\
			_POST_EFLAGS("0", "4", "2")			\
256 257
			: "=m" ((ctxt)->eflags),			\
			  "+q" (*(_dsttype*)&(ctxt)->dst.val),		\
258
			  "=&r" (_tmp)					\
259
			: _y ((ctxt)->src.val), "i" (EFLAGS_MASK));	\
260
	} while (0)
261 262


A
Avi Kivity 已提交
263
/* Raw emulation: instruction has two explicit operands. */
264
#define __emulate_2op_nobyte(ctxt,_op,_wx,_wy,_lx,_ly,_qx,_qy)		\
265 266 267
	do {								\
		unsigned long _tmp;					\
									\
268
		switch ((ctxt)->dst.bytes) {				\
269
		case 2:							\
270
			____emulate_2op(ctxt,_op,_wx,_wy,"w",u16);	\
271 272
			break;						\
		case 4:							\
273
			____emulate_2op(ctxt,_op,_lx,_ly,"l",u32);	\
274 275
			break;						\
		case 8:							\
276
			ON64(____emulate_2op(ctxt,_op,_qx,_qy,"q",u64)); \
277 278
			break;						\
		}							\
A
Avi Kivity 已提交
279 280
	} while (0)

281
#define __emulate_2op(ctxt,_op,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy)		     \
A
Avi Kivity 已提交
282
	do {								     \
283
		unsigned long _tmp;					     \
284
		switch ((ctxt)->dst.bytes) {				     \
A
Avi Kivity 已提交
285
		case 1:							     \
286
			____emulate_2op(ctxt,_op,_bx,_by,"b",u8);	     \
A
Avi Kivity 已提交
287 288
			break;						     \
		default:						     \
289
			__emulate_2op_nobyte(ctxt, _op,			     \
A
Avi Kivity 已提交
290 291 292 293 294 295
					     _wx, _wy, _lx, _ly, _qx, _qy);  \
			break;						     \
		}							     \
	} while (0)

/* Source operand is byte-sized and may be restricted to just %cl. */
296 297
#define emulate_2op_SrcB(ctxt, _op)					\
	__emulate_2op(ctxt, _op, "b", "c", "b", "c", "b", "c", "b", "c")
A
Avi Kivity 已提交
298 299

/* Source operand is byte, word, long or quad sized. */
300 301
#define emulate_2op_SrcV(ctxt, _op)					\
	__emulate_2op(ctxt, _op, "b", "q", "w", "r", _LO32, "r", "", "r")
A
Avi Kivity 已提交
302 303

/* Source operand is word, long or quad sized. */
304 305
#define emulate_2op_SrcV_nobyte(ctxt, _op)				\
	__emulate_2op_nobyte(ctxt, _op, "w", "r", _LO32, "r", "", "r")
A
Avi Kivity 已提交
306

307
/* Instruction has three operands and one operand is stored in ECX register */
308
#define __emulate_2op_cl(ctxt, _op, _suffix, _type)		\
309 310
	do {								\
		unsigned long _tmp;					\
311 312 313
		_type _clv  = (ctxt)->src2.val;				\
		_type _srcv = (ctxt)->src.val;				\
		_type _dstv = (ctxt)->dst.val;				\
314 315 316 317 318
									\
		__asm__ __volatile__ (					\
			_PRE_EFLAGS("0", "5", "2")			\
			_op _suffix " %4,%1 \n"				\
			_POST_EFLAGS("0", "5", "2")			\
319
			: "=m" ((ctxt)->eflags), "+r" (_dstv), "=&r" (_tmp) \
320 321 322
			: "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK)	\
			);						\
									\
323 324 325
		(ctxt)->src2.val  = (unsigned long) _clv;		\
		(ctxt)->src2.val = (unsigned long) _srcv;		\
		(ctxt)->dst.val = (unsigned long) _dstv;		\
326 327
	} while (0)

328
#define emulate_2op_cl(ctxt, _op)					\
329
	do {								\
330
		switch ((ctxt)->dst.bytes) {				\
331
		case 2:							\
332
			__emulate_2op_cl(ctxt, _op, "w", u16);		\
333 334
			break;						\
		case 4:							\
335
			__emulate_2op_cl(ctxt, _op, "l", u32);		\
336 337
			break;						\
		case 8:							\
338
			ON64(__emulate_2op_cl(ctxt, _op, "q", ulong));	\
339 340
			break;						\
		}							\
341 342
	} while (0)

343
#define __emulate_1op(ctxt, _op, _suffix)				\
A
Avi Kivity 已提交
344 345 346
	do {								\
		unsigned long _tmp;					\
									\
347 348 349 350
		__asm__ __volatile__ (					\
			_PRE_EFLAGS("0", "3", "2")			\
			_op _suffix " %1; "				\
			_POST_EFLAGS("0", "3", "2")			\
351
			: "=m" ((ctxt)->eflags), "+m" ((ctxt)->dst.val), \
352 353 354 355 356
			  "=&r" (_tmp)					\
			: "i" (EFLAGS_MASK));				\
	} while (0)

/* Instruction has only one explicit operand (no source operand). */
357
#define emulate_1op(ctxt, _op)						\
358
	do {								\
359 360 361 362 363
		switch ((ctxt)->dst.bytes) {				\
		case 1:	__emulate_1op(ctxt, _op, "b"); break;		\
		case 2:	__emulate_1op(ctxt, _op, "w"); break;		\
		case 4:	__emulate_1op(ctxt, _op, "l"); break;		\
		case 8:	ON64(__emulate_1op(ctxt, _op, "q")); break;	\
A
Avi Kivity 已提交
364 365 366
		}							\
	} while (0)

367
#define __emulate_1op_rax_rdx(ctxt, _op, _suffix, _ex)			\
368 369
	do {								\
		unsigned long _tmp;					\
370 371
		ulong *rax = &(ctxt)->regs[VCPU_REGS_RAX];		\
		ulong *rdx = &(ctxt)->regs[VCPU_REGS_RDX];		\
372 373 374 375 376 377 378 379 380 381 382 383
									\
		__asm__ __volatile__ (					\
			_PRE_EFLAGS("0", "5", "1")			\
			"1: \n\t"					\
			_op _suffix " %6; "				\
			"2: \n\t"					\
			_POST_EFLAGS("0", "5", "1")			\
			".pushsection .fixup,\"ax\" \n\t"		\
			"3: movb $1, %4 \n\t"				\
			"jmp 2b \n\t"					\
			".popsection \n\t"				\
			_ASM_EXTABLE(1b, 3b)				\
384 385 386 387
			: "=m" ((ctxt)->eflags), "=&r" (_tmp),		\
			  "+a" (*rax), "+d" (*rdx), "+qm"(_ex)		\
			: "i" (EFLAGS_MASK), "m" ((ctxt)->src.val),	\
			  "a" (*rax), "d" (*rdx));			\
388 389
	} while (0)

390
/* instruction has only one source operand, destination is implicit (e.g. mul, div, imul, idiv) */
391
#define emulate_1op_rax_rdx(ctxt, _op, _ex)	\
392
	do {								\
393
		switch((ctxt)->src.bytes) {				\
394
		case 1:							\
395
			__emulate_1op_rax_rdx(ctxt, _op, "b", _ex);	\
396 397
			break;						\
		case 2:							\
398
			__emulate_1op_rax_rdx(ctxt, _op, "w", _ex);	\
399 400
			break;						\
		case 4:							\
401
			__emulate_1op_rax_rdx(ctxt, _op, "l", _ex);	\
402 403
			break;						\
		case 8: ON64(						\
404
			__emulate_1op_rax_rdx(ctxt, _op, "q", _ex));	\
405 406 407 408
			break;						\
		}							\
	} while (0)

409 410 411 412 413 414
static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt,
				    enum x86_intercept intercept,
				    enum x86_intercept_stage stage)
{
	struct x86_instruction_info info = {
		.intercept  = intercept,
415 416 417 418 419 420 421 422
		.rep_prefix = ctxt->rep_prefix,
		.modrm_mod  = ctxt->modrm_mod,
		.modrm_reg  = ctxt->modrm_reg,
		.modrm_rm   = ctxt->modrm_rm,
		.src_val    = ctxt->src.val64,
		.src_bytes  = ctxt->src.bytes,
		.dst_bytes  = ctxt->dst.bytes,
		.ad_bytes   = ctxt->ad_bytes,
423 424 425
		.next_rip   = ctxt->eip,
	};

426
	return ctxt->ops->intercept(ctxt, &info, stage);
427 428
}

429
static inline unsigned long ad_mask(struct x86_emulate_ctxt *ctxt)
430
{
431
	return (1UL << (ctxt->ad_bytes << 3)) - 1;
432 433
}

A
Avi Kivity 已提交
434
/* Access/update address held in a register, based on addressing mode. */
435
static inline unsigned long
436
address_mask(struct x86_emulate_ctxt *ctxt, unsigned long reg)
437
{
438
	if (ctxt->ad_bytes == sizeof(unsigned long))
439 440
		return reg;
	else
441
		return reg & ad_mask(ctxt);
442 443 444
}

static inline unsigned long
445
register_address(struct x86_emulate_ctxt *ctxt, unsigned long reg)
446
{
447
	return address_mask(ctxt, reg);
448 449
}

450
static inline void
451
register_address_increment(struct x86_emulate_ctxt *ctxt, unsigned long *reg, int inc)
452
{
453
	if (ctxt->ad_bytes == sizeof(unsigned long))
454 455
		*reg += inc;
	else
456
		*reg = (*reg & ~ad_mask(ctxt)) | ((*reg + inc) & ad_mask(ctxt));
457
}
A
Avi Kivity 已提交
458

459
static inline void jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
460
{
461
	register_address_increment(ctxt, &ctxt->_eip, rel);
462
}
463

464 465 466 467 468 469 470
static u32 desc_limit_scaled(struct desc_struct *desc)
{
	u32 limit = get_desc_limit(desc);

	return desc->g ? (limit << 12) | 0xfff : limit;
}

471
static void set_seg_override(struct x86_emulate_ctxt *ctxt, int seg)
472
{
473 474
	ctxt->has_seg_override = true;
	ctxt->seg_override = seg;
475 476
}

477
static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
478 479 480 481
{
	if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
		return 0;

482
	return ctxt->ops->get_cached_segment_base(ctxt, seg);
483 484
}

485
static unsigned seg_override(struct x86_emulate_ctxt *ctxt)
486
{
487
	if (!ctxt->has_seg_override)
488 489
		return 0;

490
	return ctxt->seg_override;
491 492
}

493 494
static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
			     u32 error, bool valid)
495
{
496 497 498
	ctxt->exception.vector = vec;
	ctxt->exception.error_code = error;
	ctxt->exception.error_code_valid = valid;
499
	return X86EMUL_PROPAGATE_FAULT;
500 501
}

502 503 504 505 506
static int emulate_db(struct x86_emulate_ctxt *ctxt)
{
	return emulate_exception(ctxt, DB_VECTOR, 0, false);
}

507
static int emulate_gp(struct x86_emulate_ctxt *ctxt, int err)
508
{
509
	return emulate_exception(ctxt, GP_VECTOR, err, true);
510 511
}

512 513 514 515 516
static int emulate_ss(struct x86_emulate_ctxt *ctxt, int err)
{
	return emulate_exception(ctxt, SS_VECTOR, err, true);
}

517
static int emulate_ud(struct x86_emulate_ctxt *ctxt)
518
{
519
	return emulate_exception(ctxt, UD_VECTOR, 0, false);
520 521
}

522
static int emulate_ts(struct x86_emulate_ctxt *ctxt, int err)
523
{
524
	return emulate_exception(ctxt, TS_VECTOR, err, true);
525 526
}

527 528
static int emulate_de(struct x86_emulate_ctxt *ctxt)
{
529
	return emulate_exception(ctxt, DE_VECTOR, 0, false);
530 531
}

A
Avi Kivity 已提交
532 533 534 535 536
static int emulate_nm(struct x86_emulate_ctxt *ctxt)
{
	return emulate_exception(ctxt, NM_VECTOR, 0, false);
}

537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556
static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
{
	u16 selector;
	struct desc_struct desc;

	ctxt->ops->get_segment(ctxt, &selector, &desc, NULL, seg);
	return selector;
}

static void set_segment_selector(struct x86_emulate_ctxt *ctxt, u16 selector,
				 unsigned seg)
{
	u16 dummy;
	u32 base3;
	struct desc_struct desc;

	ctxt->ops->get_segment(ctxt, &dummy, &desc, &base3, seg);
	ctxt->ops->set_segment(ctxt, selector, &desc, base3, seg);
}

557
static int __linearize(struct x86_emulate_ctxt *ctxt,
558
		     struct segmented_address addr,
559
		     unsigned size, bool write, bool fetch,
560 561
		     ulong *linear)
{
562 563
	struct desc_struct desc;
	bool usable;
564
	ulong la;
565
	u32 lim;
566
	u16 sel;
567
	unsigned cpl, rpl;
568

569
	la = seg_base(ctxt, addr.seg) + addr.ea;
570 571 572 573 574 575 576 577
	switch (ctxt->mode) {
	case X86EMUL_MODE_REAL:
		break;
	case X86EMUL_MODE_PROT64:
		if (((signed long)la << 16) >> 16 != la)
			return emulate_gp(ctxt, 0);
		break;
	default:
578 579
		usable = ctxt->ops->get_segment(ctxt, &sel, &desc, NULL,
						addr.seg);
580 581 582 583 584 585
		if (!usable)
			goto bad;
		/* code segment or read-only data segment */
		if (((desc.type & 8) || !(desc.type & 2)) && write)
			goto bad;
		/* unreadable code segment */
586
		if (!fetch && (desc.type & 8) && !(desc.type & 2))
587 588 589 590 591 592 593 594 595 596 597 598 599 600
			goto bad;
		lim = desc_limit_scaled(&desc);
		if ((desc.type & 8) || !(desc.type & 4)) {
			/* expand-up segment */
			if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim)
				goto bad;
		} else {
			/* exapand-down segment */
			if (addr.ea <= lim || (u32)(addr.ea + size - 1) <= lim)
				goto bad;
			lim = desc.d ? 0xffffffff : 0xffff;
			if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim)
				goto bad;
		}
601
		cpl = ctxt->ops->cpl(ctxt);
602
		rpl = sel & 3;
603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618
		cpl = max(cpl, rpl);
		if (!(desc.type & 8)) {
			/* data segment */
			if (cpl > desc.dpl)
				goto bad;
		} else if ((desc.type & 8) && !(desc.type & 4)) {
			/* nonconforming code segment */
			if (cpl != desc.dpl)
				goto bad;
		} else if ((desc.type & 8) && (desc.type & 4)) {
			/* conforming code segment */
			if (cpl < desc.dpl)
				goto bad;
		}
		break;
	}
619
	if (fetch ? ctxt->mode != X86EMUL_MODE_PROT64 : ctxt->ad_bytes != 8)
620 621 622
		la &= (u32)-1;
	*linear = la;
	return X86EMUL_CONTINUE;
623 624 625 626 627
bad:
	if (addr.seg == VCPU_SREG_SS)
		return emulate_ss(ctxt, addr.seg);
	else
		return emulate_gp(ctxt, addr.seg);
628 629
}

630 631 632 633 634 635 636 637 638
static int linearize(struct x86_emulate_ctxt *ctxt,
		     struct segmented_address addr,
		     unsigned size, bool write,
		     ulong *linear)
{
	return __linearize(ctxt, addr, size, write, false, linear);
}


639 640 641 642 643
static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
			      struct segmented_address addr,
			      void *data,
			      unsigned size)
{
644 645 646
	int rc;
	ulong linear;

647
	rc = linearize(ctxt, addr, size, false, &linear);
648 649
	if (rc != X86EMUL_CONTINUE)
		return rc;
650
	return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception);
651 652
}

653 654 655 656 657 658 659 660
/*
 * Fetch the next byte of the instruction being emulated which is pointed to
 * by ctxt->_eip, then increment ctxt->_eip.
 *
 * Also prefetch the remaining bytes of the instruction without crossing page
 * boundary if they are not in fetch_cache yet.
 */
static int do_insn_fetch_byte(struct x86_emulate_ctxt *ctxt, u8 *dest)
661
{
662
	struct fetch_cache *fc = &ctxt->fetch;
663
	int rc;
664
	int size, cur_size;
665

666
	if (ctxt->_eip == fc->end) {
667
		unsigned long linear;
668 669
		struct segmented_address addr = { .seg = VCPU_SREG_CS,
						  .ea  = ctxt->_eip };
670
		cur_size = fc->end - fc->start;
671 672
		size = min(15UL - cur_size,
			   PAGE_SIZE - offset_in_page(ctxt->_eip));
673
		rc = __linearize(ctxt, addr, size, false, true, &linear);
674
		if (unlikely(rc != X86EMUL_CONTINUE))
675
			return rc;
676 677
		rc = ctxt->ops->fetch(ctxt, linear, fc->data + cur_size,
				      size, &ctxt->exception);
678
		if (unlikely(rc != X86EMUL_CONTINUE))
679
			return rc;
680
		fc->end += size;
681
	}
682 683
	*dest = fc->data[ctxt->_eip - fc->start];
	ctxt->_eip++;
684
	return X86EMUL_CONTINUE;
685 686 687
}

static int do_insn_fetch(struct x86_emulate_ctxt *ctxt,
688
			 void *dest, unsigned size)
689
{
690
	int rc;
691

692
	/* x86 instructions are limited to 15 bytes. */
693
	if (unlikely(ctxt->_eip + size - ctxt->eip > 15))
694
		return X86EMUL_UNHANDLEABLE;
695
	while (size--) {
696
		rc = do_insn_fetch_byte(ctxt, dest++);
697
		if (rc != X86EMUL_CONTINUE)
698 699
			return rc;
	}
700
	return X86EMUL_CONTINUE;
701 702
}

703
/* Fetch next part of the instruction being emulated. */
704
#define insn_fetch(_type, _ctxt)					\
705
({	unsigned long _x;						\
706
	rc = do_insn_fetch(_ctxt, &_x, sizeof(_type));			\
707 708 709 710 711
	if (rc != X86EMUL_CONTINUE)					\
		goto done;						\
	(_type)_x;							\
})

712 713
#define insn_fetch_arr(_arr, _size, _ctxt)				\
({	rc = do_insn_fetch(_ctxt, _arr, (_size));			\
714 715 716 717
	if (rc != X86EMUL_CONTINUE)					\
		goto done;						\
})

718 719 720 721 722 723 724
/*
 * Given the 'reg' portion of a ModRM byte, and a register block, return a
 * pointer into the block that addresses the relevant register.
 * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
 */
static void *decode_register(u8 modrm_reg, unsigned long *regs,
			     int highbyte_regs)
A
Avi Kivity 已提交
725 726 727 728 729 730 731 732 733 734
{
	void *p;

	p = &regs[modrm_reg];
	if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
		p = (unsigned char *)&regs[modrm_reg & 3] + 1;
	return p;
}

static int read_descriptor(struct x86_emulate_ctxt *ctxt,
735
			   struct segmented_address addr,
A
Avi Kivity 已提交
736 737 738 739 740 741 742
			   u16 *size, unsigned long *address, int op_bytes)
{
	int rc;

	if (op_bytes == 2)
		op_bytes = 3;
	*address = 0;
743
	rc = segmented_read_std(ctxt, addr, size, 2);
744
	if (rc != X86EMUL_CONTINUE)
A
Avi Kivity 已提交
745
		return rc;
746
	addr.ea += 2;
747
	rc = segmented_read_std(ctxt, addr, address, op_bytes);
A
Avi Kivity 已提交
748 749 750
	return rc;
}

751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785
static int test_cc(unsigned int condition, unsigned int flags)
{
	int rc = 0;

	switch ((condition & 15) >> 1) {
	case 0: /* o */
		rc |= (flags & EFLG_OF);
		break;
	case 1: /* b/c/nae */
		rc |= (flags & EFLG_CF);
		break;
	case 2: /* z/e */
		rc |= (flags & EFLG_ZF);
		break;
	case 3: /* be/na */
		rc |= (flags & (EFLG_CF|EFLG_ZF));
		break;
	case 4: /* s */
		rc |= (flags & EFLG_SF);
		break;
	case 5: /* p/pe */
		rc |= (flags & EFLG_PF);
		break;
	case 7: /* le/ng */
		rc |= (flags & EFLG_ZF);
		/* fall through */
	case 6: /* l/nge */
		rc |= (!(flags & EFLG_SF) != !(flags & EFLG_OF));
		break;
	}

	/* Odd condition identifiers (lsb == 1) have inverted sense. */
	return (!!rc ^ (condition & 1));
}

786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803
static void fetch_register_operand(struct operand *op)
{
	switch (op->bytes) {
	case 1:
		op->val = *(u8 *)op->addr.reg;
		break;
	case 2:
		op->val = *(u16 *)op->addr.reg;
		break;
	case 4:
		op->val = *(u32 *)op->addr.reg;
		break;
	case 8:
		op->val = *(u64 *)op->addr.reg;
		break;
	}
}

A
Avi Kivity 已提交
804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860
static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg)
{
	ctxt->ops->get_fpu(ctxt);
	switch (reg) {
	case 0: asm("movdqu %%xmm0, %0" : "=m"(*data)); break;
	case 1: asm("movdqu %%xmm1, %0" : "=m"(*data)); break;
	case 2: asm("movdqu %%xmm2, %0" : "=m"(*data)); break;
	case 3: asm("movdqu %%xmm3, %0" : "=m"(*data)); break;
	case 4: asm("movdqu %%xmm4, %0" : "=m"(*data)); break;
	case 5: asm("movdqu %%xmm5, %0" : "=m"(*data)); break;
	case 6: asm("movdqu %%xmm6, %0" : "=m"(*data)); break;
	case 7: asm("movdqu %%xmm7, %0" : "=m"(*data)); break;
#ifdef CONFIG_X86_64
	case 8: asm("movdqu %%xmm8, %0" : "=m"(*data)); break;
	case 9: asm("movdqu %%xmm9, %0" : "=m"(*data)); break;
	case 10: asm("movdqu %%xmm10, %0" : "=m"(*data)); break;
	case 11: asm("movdqu %%xmm11, %0" : "=m"(*data)); break;
	case 12: asm("movdqu %%xmm12, %0" : "=m"(*data)); break;
	case 13: asm("movdqu %%xmm13, %0" : "=m"(*data)); break;
	case 14: asm("movdqu %%xmm14, %0" : "=m"(*data)); break;
	case 15: asm("movdqu %%xmm15, %0" : "=m"(*data)); break;
#endif
	default: BUG();
	}
	ctxt->ops->put_fpu(ctxt);
}

static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
			  int reg)
{
	ctxt->ops->get_fpu(ctxt);
	switch (reg) {
	case 0: asm("movdqu %0, %%xmm0" : : "m"(*data)); break;
	case 1: asm("movdqu %0, %%xmm1" : : "m"(*data)); break;
	case 2: asm("movdqu %0, %%xmm2" : : "m"(*data)); break;
	case 3: asm("movdqu %0, %%xmm3" : : "m"(*data)); break;
	case 4: asm("movdqu %0, %%xmm4" : : "m"(*data)); break;
	case 5: asm("movdqu %0, %%xmm5" : : "m"(*data)); break;
	case 6: asm("movdqu %0, %%xmm6" : : "m"(*data)); break;
	case 7: asm("movdqu %0, %%xmm7" : : "m"(*data)); break;
#ifdef CONFIG_X86_64
	case 8: asm("movdqu %0, %%xmm8" : : "m"(*data)); break;
	case 9: asm("movdqu %0, %%xmm9" : : "m"(*data)); break;
	case 10: asm("movdqu %0, %%xmm10" : : "m"(*data)); break;
	case 11: asm("movdqu %0, %%xmm11" : : "m"(*data)); break;
	case 12: asm("movdqu %0, %%xmm12" : : "m"(*data)); break;
	case 13: asm("movdqu %0, %%xmm13" : : "m"(*data)); break;
	case 14: asm("movdqu %0, %%xmm14" : : "m"(*data)); break;
	case 15: asm("movdqu %0, %%xmm15" : : "m"(*data)); break;
#endif
	default: BUG();
	}
	ctxt->ops->put_fpu(ctxt);
}

static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
				    struct operand *op,
861 862
				    int inhibit_bytereg)
{
863 864
	unsigned reg = ctxt->modrm_reg;
	int highbyte_regs = ctxt->rex_prefix == 0;
865

866 867
	if (!(ctxt->d & ModRM))
		reg = (ctxt->b & 7) | ((ctxt->rex_prefix & 1) << 3);
A
Avi Kivity 已提交
868

869
	if (ctxt->d & Sse) {
A
Avi Kivity 已提交
870 871 872 873 874 875 876
		op->type = OP_XMM;
		op->bytes = 16;
		op->addr.xmm = reg;
		read_sse_reg(ctxt, &op->vec_val, reg);
		return;
	}

877
	op->type = OP_REG;
878 879
	if ((ctxt->d & ByteOp) && !inhibit_bytereg) {
		op->addr.reg = decode_register(reg, ctxt->regs, highbyte_regs);
880 881
		op->bytes = 1;
	} else {
882 883
		op->addr.reg = decode_register(reg, ctxt->regs, 0);
		op->bytes = ctxt->op_bytes;
884
	}
885
	fetch_register_operand(op);
886 887 888
	op->orig_val = op->val;
}

889
static int decode_modrm(struct x86_emulate_ctxt *ctxt,
890
			struct operand *op)
891 892
{
	u8 sib;
893
	int index_reg = 0, base_reg = 0, scale;
894
	int rc = X86EMUL_CONTINUE;
895
	ulong modrm_ea = 0;
896

897 898 899 900
	if (ctxt->rex_prefix) {
		ctxt->modrm_reg = (ctxt->rex_prefix & 4) << 1;	/* REX.R */
		index_reg = (ctxt->rex_prefix & 2) << 2; /* REX.X */
		ctxt->modrm_rm = base_reg = (ctxt->rex_prefix & 1) << 3; /* REG.B */
901 902
	}

903
	ctxt->modrm = insn_fetch(u8, ctxt);
904 905 906 907
	ctxt->modrm_mod |= (ctxt->modrm & 0xc0) >> 6;
	ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3;
	ctxt->modrm_rm |= (ctxt->modrm & 0x07);
	ctxt->modrm_seg = VCPU_SREG_DS;
908

909
	if (ctxt->modrm_mod == 3) {
910
		op->type = OP_REG;
911 912 913 914
		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
		op->addr.reg = decode_register(ctxt->modrm_rm,
					       ctxt->regs, ctxt->d & ByteOp);
		if (ctxt->d & Sse) {
A
Avi Kivity 已提交
915 916
			op->type = OP_XMM;
			op->bytes = 16;
917 918
			op->addr.xmm = ctxt->modrm_rm;
			read_sse_reg(ctxt, &op->vec_val, ctxt->modrm_rm);
A
Avi Kivity 已提交
919 920
			return rc;
		}
921
		fetch_register_operand(op);
922 923 924
		return rc;
	}

925 926
	op->type = OP_MEM;

927 928 929 930 931
	if (ctxt->ad_bytes == 2) {
		unsigned bx = ctxt->regs[VCPU_REGS_RBX];
		unsigned bp = ctxt->regs[VCPU_REGS_RBP];
		unsigned si = ctxt->regs[VCPU_REGS_RSI];
		unsigned di = ctxt->regs[VCPU_REGS_RDI];
932 933

		/* 16-bit ModR/M decode. */
934
		switch (ctxt->modrm_mod) {
935
		case 0:
936
			if (ctxt->modrm_rm == 6)
937
				modrm_ea += insn_fetch(u16, ctxt);
938 939
			break;
		case 1:
940
			modrm_ea += insn_fetch(s8, ctxt);
941 942
			break;
		case 2:
943
			modrm_ea += insn_fetch(u16, ctxt);
944 945
			break;
		}
946
		switch (ctxt->modrm_rm) {
947
		case 0:
948
			modrm_ea += bx + si;
949 950
			break;
		case 1:
951
			modrm_ea += bx + di;
952 953
			break;
		case 2:
954
			modrm_ea += bp + si;
955 956
			break;
		case 3:
957
			modrm_ea += bp + di;
958 959
			break;
		case 4:
960
			modrm_ea += si;
961 962
			break;
		case 5:
963
			modrm_ea += di;
964 965
			break;
		case 6:
966
			if (ctxt->modrm_mod != 0)
967
				modrm_ea += bp;
968 969
			break;
		case 7:
970
			modrm_ea += bx;
971 972
			break;
		}
973 974 975
		if (ctxt->modrm_rm == 2 || ctxt->modrm_rm == 3 ||
		    (ctxt->modrm_rm == 6 && ctxt->modrm_mod != 0))
			ctxt->modrm_seg = VCPU_SREG_SS;
976
		modrm_ea = (u16)modrm_ea;
977 978
	} else {
		/* 32/64-bit ModR/M decode. */
979
		if ((ctxt->modrm_rm & 7) == 4) {
980
			sib = insn_fetch(u8, ctxt);
981 982 983 984
			index_reg |= (sib >> 3) & 7;
			base_reg |= sib & 7;
			scale = sib >> 6;

985
			if ((base_reg & 7) == 5 && ctxt->modrm_mod == 0)
986
				modrm_ea += insn_fetch(s32, ctxt);
987
			else
988
				modrm_ea += ctxt->regs[base_reg];
989
			if (index_reg != 4)
990 991
				modrm_ea += ctxt->regs[index_reg] << scale;
		} else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) {
992
			if (ctxt->mode == X86EMUL_MODE_PROT64)
993
				ctxt->rip_relative = 1;
994
		} else
995 996
			modrm_ea += ctxt->regs[ctxt->modrm_rm];
		switch (ctxt->modrm_mod) {
997
		case 0:
998
			if (ctxt->modrm_rm == 5)
999
				modrm_ea += insn_fetch(s32, ctxt);
1000 1001
			break;
		case 1:
1002
			modrm_ea += insn_fetch(s8, ctxt);
1003 1004
			break;
		case 2:
1005
			modrm_ea += insn_fetch(s32, ctxt);
1006 1007 1008
			break;
		}
	}
1009
	op->addr.mem.ea = modrm_ea;
1010 1011 1012 1013 1014
done:
	return rc;
}

static int decode_abs(struct x86_emulate_ctxt *ctxt,
1015
		      struct operand *op)
1016
{
1017
	int rc = X86EMUL_CONTINUE;
1018

1019
	op->type = OP_MEM;
1020
	switch (ctxt->ad_bytes) {
1021
	case 2:
1022
		op->addr.mem.ea = insn_fetch(u16, ctxt);
1023 1024
		break;
	case 4:
1025
		op->addr.mem.ea = insn_fetch(u32, ctxt);
1026 1027
		break;
	case 8:
1028
		op->addr.mem.ea = insn_fetch(u64, ctxt);
1029 1030 1031 1032 1033 1034
		break;
	}
done:
	return rc;
}

1035
static void fetch_bit_operand(struct x86_emulate_ctxt *ctxt)
1036
{
1037
	long sv = 0, mask;
1038

1039 1040
	if (ctxt->dst.type == OP_MEM && ctxt->src.type == OP_REG) {
		mask = ~(ctxt->dst.bytes * 8 - 1);
1041

1042 1043 1044 1045
		if (ctxt->src.bytes == 2)
			sv = (s16)ctxt->src.val & (s16)mask;
		else if (ctxt->src.bytes == 4)
			sv = (s32)ctxt->src.val & (s32)mask;
1046

1047
		ctxt->dst.addr.mem.ea += (sv >> 3);
1048
	}
1049 1050

	/* only subword offset */
1051
	ctxt->src.val &= (ctxt->dst.bytes << 3) - 1;
1052 1053
}

1054 1055
static int read_emulated(struct x86_emulate_ctxt *ctxt,
			 unsigned long addr, void *dest, unsigned size)
A
Avi Kivity 已提交
1056
{
1057
	int rc;
1058
	struct read_cache *mc = &ctxt->mem_read;
A
Avi Kivity 已提交
1059

1060 1061 1062 1063 1064
	while (size) {
		int n = min(size, 8u);
		size -= n;
		if (mc->pos < mc->end)
			goto read_cached;
1065

1066 1067
		rc = ctxt->ops->read_emulated(ctxt, addr, mc->data + mc->end, n,
					      &ctxt->exception);
1068 1069 1070
		if (rc != X86EMUL_CONTINUE)
			return rc;
		mc->end += n;
A
Avi Kivity 已提交
1071

1072 1073 1074 1075 1076
	read_cached:
		memcpy(dest, mc->data + mc->pos, n);
		mc->pos += n;
		dest += n;
		addr += n;
A
Avi Kivity 已提交
1077
	}
1078 1079
	return X86EMUL_CONTINUE;
}
A
Avi Kivity 已提交
1080

1081 1082 1083 1084 1085
static int segmented_read(struct x86_emulate_ctxt *ctxt,
			  struct segmented_address addr,
			  void *data,
			  unsigned size)
{
1086 1087 1088
	int rc;
	ulong linear;

1089
	rc = linearize(ctxt, addr, size, false, &linear);
1090 1091
	if (rc != X86EMUL_CONTINUE)
		return rc;
1092
	return read_emulated(ctxt, linear, data, size);
1093 1094 1095 1096 1097 1098 1099
}

static int segmented_write(struct x86_emulate_ctxt *ctxt,
			   struct segmented_address addr,
			   const void *data,
			   unsigned size)
{
1100 1101 1102
	int rc;
	ulong linear;

1103
	rc = linearize(ctxt, addr, size, true, &linear);
1104 1105
	if (rc != X86EMUL_CONTINUE)
		return rc;
1106 1107
	return ctxt->ops->write_emulated(ctxt, linear, data, size,
					 &ctxt->exception);
1108 1109 1110 1111 1112 1113 1114
}

static int segmented_cmpxchg(struct x86_emulate_ctxt *ctxt,
			     struct segmented_address addr,
			     const void *orig_data, const void *data,
			     unsigned size)
{
1115 1116 1117
	int rc;
	ulong linear;

1118
	rc = linearize(ctxt, addr, size, true, &linear);
1119 1120
	if (rc != X86EMUL_CONTINUE)
		return rc;
1121 1122
	return ctxt->ops->cmpxchg_emulated(ctxt, linear, orig_data, data,
					   size, &ctxt->exception);
1123 1124
}

1125 1126 1127 1128
static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
			   unsigned int size, unsigned short port,
			   void *dest)
{
1129
	struct read_cache *rc = &ctxt->io_read;
1130

1131 1132
	if (rc->pos == rc->end) { /* refill pio read ahead */
		unsigned int in_page, n;
1133 1134
		unsigned int count = ctxt->rep_prefix ?
			address_mask(ctxt, ctxt->regs[VCPU_REGS_RCX]) : 1;
1135
		in_page = (ctxt->eflags & EFLG_DF) ?
1136 1137
			offset_in_page(ctxt->regs[VCPU_REGS_RDI]) :
			PAGE_SIZE - offset_in_page(ctxt->regs[VCPU_REGS_RDI]);
1138 1139 1140 1141 1142
		n = min(min(in_page, (unsigned int)sizeof(rc->data)) / size,
			count);
		if (n == 0)
			n = 1;
		rc->pos = rc->end = 0;
1143
		if (!ctxt->ops->pio_in_emulated(ctxt, size, port, rc->data, n))
1144 1145
			return 0;
		rc->end = n * size;
A
Avi Kivity 已提交
1146 1147
	}

1148 1149 1150 1151
	memcpy(dest, rc->data + rc->pos, size);
	rc->pos += size;
	return 1;
}
A
Avi Kivity 已提交
1152

1153 1154 1155
static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
				     u16 selector, struct desc_ptr *dt)
{
1156 1157
	struct x86_emulate_ops *ops = ctxt->ops;

1158 1159
	if (selector & 1 << 2) {
		struct desc_struct desc;
1160 1161
		u16 sel;

1162
		memset (dt, 0, sizeof *dt);
1163
		if (!ops->get_segment(ctxt, &sel, &desc, NULL, VCPU_SREG_LDTR))
1164
			return;
1165

1166 1167 1168
		dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */
		dt->address = get_desc_base(&desc);
	} else
1169
		ops->get_gdt(ctxt, dt);
1170
}
1171

1172 1173 1174 1175 1176 1177 1178
/* allowed just for 8 bytes segments */
static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
				   u16 selector, struct desc_struct *desc)
{
	struct desc_ptr dt;
	u16 index = selector >> 3;
	ulong addr;
1179

1180
	get_descriptor_table_ptr(ctxt, selector, &dt);
1181

1182 1183
	if (dt.size < index * 8 + 7)
		return emulate_gp(ctxt, selector & 0xfffc);
1184

1185 1186 1187
	addr = dt.address + index * 8;
	return ctxt->ops->read_std(ctxt, addr, desc, sizeof *desc,
				   &ctxt->exception);
1188
}
1189

1190 1191 1192 1193 1194 1195 1196
/* allowed just for 8 bytes segments */
static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
				    u16 selector, struct desc_struct *desc)
{
	struct desc_ptr dt;
	u16 index = selector >> 3;
	ulong addr;
A
Avi Kivity 已提交
1197

1198
	get_descriptor_table_ptr(ctxt, selector, &dt);
1199

1200 1201
	if (dt.size < index * 8 + 7)
		return emulate_gp(ctxt, selector & 0xfffc);
A
Avi Kivity 已提交
1202

1203
	addr = dt.address + index * 8;
1204 1205
	return ctxt->ops->write_std(ctxt, addr, desc, sizeof *desc,
				    &ctxt->exception);
1206
}
1207

1208
/* Does not support long mode */
1209 1210 1211 1212 1213 1214 1215 1216 1217
static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
				   u16 selector, int seg)
{
	struct desc_struct seg_desc;
	u8 dpl, rpl, cpl;
	unsigned err_vec = GP_VECTOR;
	u32 err_code = 0;
	bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
	int ret;
1218

1219
	memset(&seg_desc, 0, sizeof seg_desc);
1220

1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243
	if ((seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86)
	    || ctxt->mode == X86EMUL_MODE_REAL) {
		/* set real mode segment descriptor */
		set_desc_base(&seg_desc, selector << 4);
		set_desc_limit(&seg_desc, 0xffff);
		seg_desc.type = 3;
		seg_desc.p = 1;
		seg_desc.s = 1;
		goto load;
	}

	/* NULL selector is not valid for TR, CS and SS */
	if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR)
	    && null_selector)
		goto exception;

	/* TR should be in GDT only */
	if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
		goto exception;

	if (null_selector) /* for NULL selector skip all following checks */
		goto load;

1244
	ret = read_segment_descriptor(ctxt, selector, &seg_desc);
1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261
	if (ret != X86EMUL_CONTINUE)
		return ret;

	err_code = selector & 0xfffc;
	err_vec = GP_VECTOR;

	/* can't load system descriptor into segment selecor */
	if (seg <= VCPU_SREG_GS && !seg_desc.s)
		goto exception;

	if (!seg_desc.p) {
		err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
		goto exception;
	}

	rpl = selector & 3;
	dpl = seg_desc.dpl;
1262
	cpl = ctxt->ops->cpl(ctxt);
1263 1264 1265 1266 1267 1268 1269 1270 1271

	switch (seg) {
	case VCPU_SREG_SS:
		/*
		 * segment is not a writable data segment or segment
		 * selector's RPL != CPL or segment selector's RPL != CPL
		 */
		if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl)
			goto exception;
A
Avi Kivity 已提交
1272
		break;
1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287
	case VCPU_SREG_CS:
		if (!(seg_desc.type & 8))
			goto exception;

		if (seg_desc.type & 4) {
			/* conforming */
			if (dpl > cpl)
				goto exception;
		} else {
			/* nonconforming */
			if (rpl > cpl || dpl != cpl)
				goto exception;
		}
		/* CS(RPL) <- CPL */
		selector = (selector & 0xfffc) | cpl;
A
Avi Kivity 已提交
1288
		break;
1289 1290 1291 1292 1293 1294 1295 1296 1297
	case VCPU_SREG_TR:
		if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9))
			goto exception;
		break;
	case VCPU_SREG_LDTR:
		if (seg_desc.s || seg_desc.type != 2)
			goto exception;
		break;
	default: /*  DS, ES, FS, or GS */
1298
		/*
1299 1300 1301
		 * segment is not a data or readable code segment or
		 * ((segment is a data or nonconforming code segment)
		 * and (both RPL and CPL > DPL))
1302
		 */
1303 1304 1305 1306
		if ((seg_desc.type & 0xa) == 0x8 ||
		    (((seg_desc.type & 0xc) != 0xc) &&
		     (rpl > dpl && cpl > dpl)))
			goto exception;
A
Avi Kivity 已提交
1307
		break;
1308 1309 1310 1311 1312
	}

	if (seg_desc.s) {
		/* mark segment as accessed */
		seg_desc.type |= 1;
1313
		ret = write_segment_descriptor(ctxt, selector, &seg_desc);
1314 1315 1316 1317
		if (ret != X86EMUL_CONTINUE)
			return ret;
	}
load:
1318
	ctxt->ops->set_segment(ctxt, selector, &seg_desc, 0, seg);
1319 1320 1321 1322 1323 1324
	return X86EMUL_CONTINUE;
exception:
	emulate_exception(ctxt, err_vec, err_code, true);
	return X86EMUL_PROPAGATE_FAULT;
}

1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343
static void write_register_operand(struct operand *op)
{
	/* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
	switch (op->bytes) {
	case 1:
		*(u8 *)op->addr.reg = (u8)op->val;
		break;
	case 2:
		*(u16 *)op->addr.reg = (u16)op->val;
		break;
	case 4:
		*op->addr.reg = (u32)op->val;
		break;	/* 64b: zero-extend */
	case 8:
		*op->addr.reg = op->val;
		break;
	}
}

1344
static int writeback(struct x86_emulate_ctxt *ctxt)
1345 1346 1347
{
	int rc;

1348
	switch (ctxt->dst.type) {
1349
	case OP_REG:
1350
		write_register_operand(&ctxt->dst);
A
Avi Kivity 已提交
1351
		break;
1352
	case OP_MEM:
1353
		if (ctxt->lock_prefix)
1354
			rc = segmented_cmpxchg(ctxt,
1355 1356 1357 1358
					       ctxt->dst.addr.mem,
					       &ctxt->dst.orig_val,
					       &ctxt->dst.val,
					       ctxt->dst.bytes);
1359
		else
1360
			rc = segmented_write(ctxt,
1361 1362 1363
					     ctxt->dst.addr.mem,
					     &ctxt->dst.val,
					     ctxt->dst.bytes);
1364 1365
		if (rc != X86EMUL_CONTINUE)
			return rc;
1366
		break;
A
Avi Kivity 已提交
1367
	case OP_XMM:
1368
		write_sse_reg(ctxt, &ctxt->dst.vec_val, ctxt->dst.addr.xmm);
A
Avi Kivity 已提交
1369
		break;
1370 1371
	case OP_NONE:
		/* no writeback */
1372
		break;
1373
	default:
1374
		break;
A
Avi Kivity 已提交
1375
	}
1376 1377
	return X86EMUL_CONTINUE;
}
A
Avi Kivity 已提交
1378

1379
static int em_push(struct x86_emulate_ctxt *ctxt)
1380
{
1381
	struct segmented_address addr;
1382

1383 1384
	register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RSP], -ctxt->op_bytes);
	addr.ea = register_address(ctxt, ctxt->regs[VCPU_REGS_RSP]);
1385 1386 1387
	addr.seg = VCPU_SREG_SS;

	/* Disable writeback. */
1388 1389
	ctxt->dst.type = OP_NONE;
	return segmented_write(ctxt, addr, &ctxt->src.val, ctxt->op_bytes);
1390
}
1391

1392 1393 1394 1395
static int emulate_pop(struct x86_emulate_ctxt *ctxt,
		       void *dest, int len)
{
	int rc;
1396
	struct segmented_address addr;
1397

1398
	addr.ea = register_address(ctxt, ctxt->regs[VCPU_REGS_RSP]);
1399
	addr.seg = VCPU_SREG_SS;
1400
	rc = segmented_read(ctxt, addr, dest, len);
1401 1402 1403
	if (rc != X86EMUL_CONTINUE)
		return rc;

1404
	register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RSP], len);
1405
	return rc;
1406 1407
}

1408 1409
static int em_pop(struct x86_emulate_ctxt *ctxt)
{
1410
	return emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes);
1411 1412
}

1413
static int emulate_popf(struct x86_emulate_ctxt *ctxt,
1414
			void *dest, int len)
1415 1416
{
	int rc;
1417 1418
	unsigned long val, change_mask;
	int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
1419
	int cpl = ctxt->ops->cpl(ctxt);
1420

1421
	rc = emulate_pop(ctxt, &val, len);
1422 1423
	if (rc != X86EMUL_CONTINUE)
		return rc;
1424

1425 1426
	change_mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_OF
		| EFLG_TF | EFLG_DF | EFLG_NT | EFLG_RF | EFLG_AC | EFLG_ID;
1427

1428 1429 1430 1431 1432 1433 1434 1435 1436 1437
	switch(ctxt->mode) {
	case X86EMUL_MODE_PROT64:
	case X86EMUL_MODE_PROT32:
	case X86EMUL_MODE_PROT16:
		if (cpl == 0)
			change_mask |= EFLG_IOPL;
		if (cpl <= iopl)
			change_mask |= EFLG_IF;
		break;
	case X86EMUL_MODE_VM86:
1438 1439
		if (iopl < 3)
			return emulate_gp(ctxt, 0);
1440 1441 1442 1443 1444
		change_mask |= EFLG_IF;
		break;
	default: /* real mode */
		change_mask |= (EFLG_IOPL | EFLG_IF);
		break;
1445
	}
1446 1447 1448 1449 1450

	*(unsigned long *)dest =
		(ctxt->eflags & ~change_mask) | (val & change_mask);

	return rc;
1451 1452
}

1453 1454
static int em_popf(struct x86_emulate_ctxt *ctxt)
{
1455 1456 1457 1458
	ctxt->dst.type = OP_REG;
	ctxt->dst.addr.reg = &ctxt->eflags;
	ctxt->dst.bytes = ctxt->op_bytes;
	return emulate_popf(ctxt, &ctxt->dst.val, ctxt->op_bytes);
1459 1460
}

1461
static int em_push_sreg(struct x86_emulate_ctxt *ctxt)
1462
{
1463 1464
	int seg = ctxt->src2.val;

1465
	ctxt->src.val = get_segment_selector(ctxt, seg);
1466

1467
	return em_push(ctxt);
1468 1469
}

1470
static int em_pop_sreg(struct x86_emulate_ctxt *ctxt)
1471
{
1472
	int seg = ctxt->src2.val;
1473 1474
	unsigned long selector;
	int rc;
1475

1476
	rc = emulate_pop(ctxt, &selector, ctxt->op_bytes);
1477 1478 1479
	if (rc != X86EMUL_CONTINUE)
		return rc;

1480
	rc = load_segment_descriptor(ctxt, (u16)selector, seg);
1481
	return rc;
1482 1483
}

1484
static int em_pusha(struct x86_emulate_ctxt *ctxt)
1485
{
1486
	unsigned long old_esp = ctxt->regs[VCPU_REGS_RSP];
1487 1488
	int rc = X86EMUL_CONTINUE;
	int reg = VCPU_REGS_RAX;
1489

1490 1491
	while (reg <= VCPU_REGS_RDI) {
		(reg == VCPU_REGS_RSP) ?
1492
		(ctxt->src.val = old_esp) : (ctxt->src.val = ctxt->regs[reg]);
1493

1494
		rc = em_push(ctxt);
1495 1496
		if (rc != X86EMUL_CONTINUE)
			return rc;
1497

1498
		++reg;
1499 1500
	}

1501
	return rc;
1502 1503
}

1504 1505
static int em_pushf(struct x86_emulate_ctxt *ctxt)
{
1506
	ctxt->src.val =  (unsigned long)ctxt->eflags;
1507 1508 1509
	return em_push(ctxt);
}

1510
static int em_popa(struct x86_emulate_ctxt *ctxt)
1511
{
1512 1513
	int rc = X86EMUL_CONTINUE;
	int reg = VCPU_REGS_RDI;
1514

1515 1516
	while (reg >= VCPU_REGS_RAX) {
		if (reg == VCPU_REGS_RSP) {
1517 1518
			register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RSP],
							ctxt->op_bytes);
1519 1520
			--reg;
		}
1521

1522
		rc = emulate_pop(ctxt, &ctxt->regs[reg], ctxt->op_bytes);
1523 1524 1525
		if (rc != X86EMUL_CONTINUE)
			break;
		--reg;
1526
	}
1527
	return rc;
1528 1529
}

1530
int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
1531
{
1532
	struct x86_emulate_ops *ops = ctxt->ops;
1533
	int rc;
1534 1535 1536 1537 1538 1539
	struct desc_ptr dt;
	gva_t cs_addr;
	gva_t eip_addr;
	u16 cs, eip;

	/* TODO: Add limit checks */
1540
	ctxt->src.val = ctxt->eflags;
1541
	rc = em_push(ctxt);
1542 1543
	if (rc != X86EMUL_CONTINUE)
		return rc;
1544 1545 1546

	ctxt->eflags &= ~(EFLG_IF | EFLG_TF | EFLG_AC);

1547
	ctxt->src.val = get_segment_selector(ctxt, VCPU_SREG_CS);
1548
	rc = em_push(ctxt);
1549 1550
	if (rc != X86EMUL_CONTINUE)
		return rc;
1551

1552
	ctxt->src.val = ctxt->_eip;
1553
	rc = em_push(ctxt);
1554 1555 1556
	if (rc != X86EMUL_CONTINUE)
		return rc;

1557
	ops->get_idt(ctxt, &dt);
1558 1559 1560 1561

	eip_addr = dt.address + (irq << 2);
	cs_addr = dt.address + (irq << 2) + 2;

1562
	rc = ops->read_std(ctxt, cs_addr, &cs, 2, &ctxt->exception);
1563 1564 1565
	if (rc != X86EMUL_CONTINUE)
		return rc;

1566
	rc = ops->read_std(ctxt, eip_addr, &eip, 2, &ctxt->exception);
1567 1568 1569
	if (rc != X86EMUL_CONTINUE)
		return rc;

1570
	rc = load_segment_descriptor(ctxt, cs, VCPU_SREG_CS);
1571 1572 1573
	if (rc != X86EMUL_CONTINUE)
		return rc;

1574
	ctxt->_eip = eip;
1575 1576 1577 1578

	return rc;
}

1579
static int emulate_int(struct x86_emulate_ctxt *ctxt, int irq)
1580 1581 1582
{
	switch(ctxt->mode) {
	case X86EMUL_MODE_REAL:
1583
		return emulate_int_real(ctxt, irq);
1584 1585 1586 1587 1588 1589 1590 1591 1592 1593
	case X86EMUL_MODE_VM86:
	case X86EMUL_MODE_PROT16:
	case X86EMUL_MODE_PROT32:
	case X86EMUL_MODE_PROT64:
	default:
		/* Protected mode interrupts unimplemented yet */
		return X86EMUL_UNHANDLEABLE;
	}
}

1594
static int emulate_iret_real(struct x86_emulate_ctxt *ctxt)
1595
{
1596 1597 1598 1599 1600 1601 1602 1603
	int rc = X86EMUL_CONTINUE;
	unsigned long temp_eip = 0;
	unsigned long temp_eflags = 0;
	unsigned long cs = 0;
	unsigned long mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_TF |
			     EFLG_IF | EFLG_DF | EFLG_OF | EFLG_IOPL | EFLG_NT | EFLG_RF |
			     EFLG_AC | EFLG_ID | (1 << 1); /* Last one is the reserved bit */
	unsigned long vm86_mask = EFLG_VM | EFLG_VIF | EFLG_VIP;
1604

1605
	/* TODO: Add stack limit check */
1606

1607
	rc = emulate_pop(ctxt, &temp_eip, ctxt->op_bytes);
1608

1609 1610
	if (rc != X86EMUL_CONTINUE)
		return rc;
1611

1612 1613
	if (temp_eip & ~0xffff)
		return emulate_gp(ctxt, 0);
1614

1615
	rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
1616

1617 1618
	if (rc != X86EMUL_CONTINUE)
		return rc;
1619

1620
	rc = emulate_pop(ctxt, &temp_eflags, ctxt->op_bytes);
1621

1622 1623
	if (rc != X86EMUL_CONTINUE)
		return rc;
1624

1625
	rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS);
1626

1627 1628
	if (rc != X86EMUL_CONTINUE)
		return rc;
1629

1630
	ctxt->_eip = temp_eip;
1631 1632


1633
	if (ctxt->op_bytes == 4)
1634
		ctxt->eflags = ((temp_eflags & mask) | (ctxt->eflags & vm86_mask));
1635
	else if (ctxt->op_bytes == 2) {
1636 1637
		ctxt->eflags &= ~0xffff;
		ctxt->eflags |= temp_eflags;
1638
	}
1639 1640 1641 1642 1643

	ctxt->eflags &= ~EFLG_RESERVED_ZEROS_MASK; /* Clear reserved zeros */
	ctxt->eflags |= EFLG_RESERVED_ONE_MASK;

	return rc;
1644 1645
}

1646
static int em_iret(struct x86_emulate_ctxt *ctxt)
1647
{
1648 1649
	switch(ctxt->mode) {
	case X86EMUL_MODE_REAL:
1650
		return emulate_iret_real(ctxt);
1651 1652 1653 1654
	case X86EMUL_MODE_VM86:
	case X86EMUL_MODE_PROT16:
	case X86EMUL_MODE_PROT32:
	case X86EMUL_MODE_PROT64:
1655
	default:
1656 1657
		/* iret from protected mode unimplemented yet */
		return X86EMUL_UNHANDLEABLE;
1658 1659 1660
	}
}

1661 1662 1663 1664 1665
static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
{
	int rc;
	unsigned short sel;

1666
	memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
1667

1668
	rc = load_segment_descriptor(ctxt, sel, VCPU_SREG_CS);
1669 1670 1671
	if (rc != X86EMUL_CONTINUE)
		return rc;

1672 1673
	ctxt->_eip = 0;
	memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes);
1674 1675 1676
	return X86EMUL_CONTINUE;
}

1677
static int em_grp1a(struct x86_emulate_ctxt *ctxt)
1678
{
1679
	return emulate_pop(ctxt, &ctxt->dst.val, ctxt->dst.bytes);
1680 1681
}

1682
static int em_grp2(struct x86_emulate_ctxt *ctxt)
1683
{
1684
	switch (ctxt->modrm_reg) {
1685
	case 0:	/* rol */
1686
		emulate_2op_SrcB(ctxt, "rol");
1687 1688
		break;
	case 1:	/* ror */
1689
		emulate_2op_SrcB(ctxt, "ror");
1690 1691
		break;
	case 2:	/* rcl */
1692
		emulate_2op_SrcB(ctxt, "rcl");
1693 1694
		break;
	case 3:	/* rcr */
1695
		emulate_2op_SrcB(ctxt, "rcr");
1696 1697 1698
		break;
	case 4:	/* sal/shl */
	case 6:	/* sal/shl */
1699
		emulate_2op_SrcB(ctxt, "sal");
1700 1701
		break;
	case 5:	/* shr */
1702
		emulate_2op_SrcB(ctxt, "shr");
1703 1704
		break;
	case 7:	/* sar */
1705
		emulate_2op_SrcB(ctxt, "sar");
1706 1707
		break;
	}
1708
	return X86EMUL_CONTINUE;
1709 1710
}

1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739
static int em_not(struct x86_emulate_ctxt *ctxt)
{
	ctxt->dst.val = ~ctxt->dst.val;
	return X86EMUL_CONTINUE;
}

static int em_neg(struct x86_emulate_ctxt *ctxt)
{
	emulate_1op(ctxt, "neg");
	return X86EMUL_CONTINUE;
}

static int em_mul_ex(struct x86_emulate_ctxt *ctxt)
{
	u8 ex = 0;

	emulate_1op_rax_rdx(ctxt, "mul", ex);
	return X86EMUL_CONTINUE;
}

static int em_imul_ex(struct x86_emulate_ctxt *ctxt)
{
	u8 ex = 0;

	emulate_1op_rax_rdx(ctxt, "imul", ex);
	return X86EMUL_CONTINUE;
}

static int em_div_ex(struct x86_emulate_ctxt *ctxt)
1740
{
1741
	u8 de = 0;
1742

1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753
	emulate_1op_rax_rdx(ctxt, "div", de);
	if (de)
		return emulate_de(ctxt);
	return X86EMUL_CONTINUE;
}

static int em_idiv_ex(struct x86_emulate_ctxt *ctxt)
{
	u8 de = 0;

	emulate_1op_rax_rdx(ctxt, "idiv", de);
1754 1755
	if (de)
		return emulate_de(ctxt);
1756
	return X86EMUL_CONTINUE;
1757 1758
}

1759
static int em_grp45(struct x86_emulate_ctxt *ctxt)
1760
{
1761
	int rc = X86EMUL_CONTINUE;
1762

1763
	switch (ctxt->modrm_reg) {
1764
	case 0:	/* inc */
1765
		emulate_1op(ctxt, "inc");
1766 1767
		break;
	case 1:	/* dec */
1768
		emulate_1op(ctxt, "dec");
1769
		break;
1770 1771
	case 2: /* call near abs */ {
		long int old_eip;
1772 1773 1774
		old_eip = ctxt->_eip;
		ctxt->_eip = ctxt->src.val;
		ctxt->src.val = old_eip;
1775
		rc = em_push(ctxt);
1776 1777
		break;
	}
1778
	case 4: /* jmp abs */
1779
		ctxt->_eip = ctxt->src.val;
1780
		break;
1781 1782 1783
	case 5: /* jmp far */
		rc = em_jmp_far(ctxt);
		break;
1784
	case 6:	/* push */
1785
		rc = em_push(ctxt);
1786 1787
		break;
	}
1788
	return rc;
1789 1790
}

1791
static int em_grp9(struct x86_emulate_ctxt *ctxt)
1792
{
1793
	u64 old = ctxt->dst.orig_val64;
1794

1795 1796 1797 1798
	if (((u32) (old >> 0) != (u32) ctxt->regs[VCPU_REGS_RAX]) ||
	    ((u32) (old >> 32) != (u32) ctxt->regs[VCPU_REGS_RDX])) {
		ctxt->regs[VCPU_REGS_RAX] = (u32) (old >> 0);
		ctxt->regs[VCPU_REGS_RDX] = (u32) (old >> 32);
1799
		ctxt->eflags &= ~EFLG_ZF;
1800
	} else {
1801 1802
		ctxt->dst.val64 = ((u64)ctxt->regs[VCPU_REGS_RCX] << 32) |
			(u32) ctxt->regs[VCPU_REGS_RBX];
1803

1804
		ctxt->eflags |= EFLG_ZF;
1805
	}
1806
	return X86EMUL_CONTINUE;
1807 1808
}

1809 1810
static int em_ret(struct x86_emulate_ctxt *ctxt)
{
1811 1812 1813
	ctxt->dst.type = OP_REG;
	ctxt->dst.addr.reg = &ctxt->_eip;
	ctxt->dst.bytes = ctxt->op_bytes;
1814 1815 1816
	return em_pop(ctxt);
}

1817
static int em_ret_far(struct x86_emulate_ctxt *ctxt)
1818 1819 1820 1821
{
	int rc;
	unsigned long cs;

1822
	rc = emulate_pop(ctxt, &ctxt->_eip, ctxt->op_bytes);
1823
	if (rc != X86EMUL_CONTINUE)
1824
		return rc;
1825 1826 1827
	if (ctxt->op_bytes == 4)
		ctxt->_eip = (u32)ctxt->_eip;
	rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
1828
	if (rc != X86EMUL_CONTINUE)
1829
		return rc;
1830
	rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS);
1831 1832 1833
	return rc;
}

1834
static int em_lseg(struct x86_emulate_ctxt *ctxt)
1835
{
1836
	int seg = ctxt->src2.val;
1837 1838 1839
	unsigned short sel;
	int rc;

1840
	memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
1841

1842
	rc = load_segment_descriptor(ctxt, sel, seg);
1843 1844 1845
	if (rc != X86EMUL_CONTINUE)
		return rc;

1846
	ctxt->dst.val = ctxt->src.val;
1847 1848 1849
	return rc;
}

1850
static void
1851
setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
1852
			struct desc_struct *cs, struct desc_struct *ss)
1853
{
1854 1855
	u16 selector;

1856
	memset(cs, 0, sizeof(struct desc_struct));
1857
	ctxt->ops->get_segment(ctxt, &selector, cs, NULL, VCPU_SREG_CS);
1858
	memset(ss, 0, sizeof(struct desc_struct));
1859 1860

	cs->l = 0;		/* will be adjusted later */
1861
	set_desc_base(cs, 0);	/* flat segment */
1862
	cs->g = 1;		/* 4kb granularity */
1863
	set_desc_limit(cs, 0xfffff);	/* 4GB limit */
1864 1865 1866
	cs->type = 0x0b;	/* Read, Execute, Accessed */
	cs->s = 1;
	cs->dpl = 0;		/* will be adjusted later */
1867 1868
	cs->p = 1;
	cs->d = 1;
1869

1870 1871
	set_desc_base(ss, 0);	/* flat segment */
	set_desc_limit(ss, 0xfffff);	/* 4GB limit */
1872 1873 1874
	ss->g = 1;		/* 4kb granularity */
	ss->s = 1;
	ss->type = 0x03;	/* Read/Write, Accessed */
1875
	ss->d = 1;		/* 32bit stack segment */
1876
	ss->dpl = 0;
1877
	ss->p = 1;
1878 1879
}

1880
static int em_syscall(struct x86_emulate_ctxt *ctxt)
1881
{
1882
	struct x86_emulate_ops *ops = ctxt->ops;
1883
	struct desc_struct cs, ss;
1884
	u64 msr_data;
1885
	u16 cs_sel, ss_sel;
1886
	u64 efer = 0;
1887 1888

	/* syscall is not available in real mode */
1889
	if (ctxt->mode == X86EMUL_MODE_REAL ||
1890 1891
	    ctxt->mode == X86EMUL_MODE_VM86)
		return emulate_ud(ctxt);
1892

1893
	ops->get_msr(ctxt, MSR_EFER, &efer);
1894
	setup_syscalls_segments(ctxt, &cs, &ss);
1895

1896
	ops->get_msr(ctxt, MSR_STAR, &msr_data);
1897
	msr_data >>= 32;
1898 1899
	cs_sel = (u16)(msr_data & 0xfffc);
	ss_sel = (u16)(msr_data + 8);
1900

1901
	if (efer & EFER_LMA) {
1902
		cs.d = 0;
1903 1904
		cs.l = 1;
	}
1905 1906
	ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
	ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
1907

1908
	ctxt->regs[VCPU_REGS_RCX] = ctxt->_eip;
1909
	if (efer & EFER_LMA) {
1910
#ifdef CONFIG_X86_64
1911
		ctxt->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF;
1912

1913
		ops->get_msr(ctxt,
1914 1915
			     ctxt->mode == X86EMUL_MODE_PROT64 ?
			     MSR_LSTAR : MSR_CSTAR, &msr_data);
1916
		ctxt->_eip = msr_data;
1917

1918
		ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data);
1919 1920 1921 1922
		ctxt->eflags &= ~(msr_data | EFLG_RF);
#endif
	} else {
		/* legacy mode */
1923
		ops->get_msr(ctxt, MSR_STAR, &msr_data);
1924
		ctxt->_eip = (u32)msr_data;
1925 1926 1927 1928

		ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
	}

1929
	return X86EMUL_CONTINUE;
1930 1931
}

1932
static int em_sysenter(struct x86_emulate_ctxt *ctxt)
1933
{
1934
	struct x86_emulate_ops *ops = ctxt->ops;
1935
	struct desc_struct cs, ss;
1936
	u64 msr_data;
1937
	u16 cs_sel, ss_sel;
1938
	u64 efer = 0;
1939

1940
	ops->get_msr(ctxt, MSR_EFER, &efer);
1941
	/* inject #GP if in real mode */
1942 1943
	if (ctxt->mode == X86EMUL_MODE_REAL)
		return emulate_gp(ctxt, 0);
1944 1945 1946 1947

	/* XXX sysenter/sysexit have not been tested in 64bit mode.
	* Therefore, we inject an #UD.
	*/
1948 1949
	if (ctxt->mode == X86EMUL_MODE_PROT64)
		return emulate_ud(ctxt);
1950

1951
	setup_syscalls_segments(ctxt, &cs, &ss);
1952

1953
	ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
1954 1955
	switch (ctxt->mode) {
	case X86EMUL_MODE_PROT32:
1956 1957
		if ((msr_data & 0xfffc) == 0x0)
			return emulate_gp(ctxt, 0);
1958 1959
		break;
	case X86EMUL_MODE_PROT64:
1960 1961
		if (msr_data == 0x0)
			return emulate_gp(ctxt, 0);
1962 1963 1964 1965
		break;
	}

	ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
1966 1967 1968 1969
	cs_sel = (u16)msr_data;
	cs_sel &= ~SELECTOR_RPL_MASK;
	ss_sel = cs_sel + 8;
	ss_sel &= ~SELECTOR_RPL_MASK;
1970
	if (ctxt->mode == X86EMUL_MODE_PROT64 || (efer & EFER_LMA)) {
1971
		cs.d = 0;
1972 1973 1974
		cs.l = 1;
	}

1975 1976
	ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
	ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
1977

1978
	ops->get_msr(ctxt, MSR_IA32_SYSENTER_EIP, &msr_data);
1979
	ctxt->_eip = msr_data;
1980

1981
	ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data);
1982
	ctxt->regs[VCPU_REGS_RSP] = msr_data;
1983

1984
	return X86EMUL_CONTINUE;
1985 1986
}

1987
static int em_sysexit(struct x86_emulate_ctxt *ctxt)
1988
{
1989
	struct x86_emulate_ops *ops = ctxt->ops;
1990
	struct desc_struct cs, ss;
1991 1992
	u64 msr_data;
	int usermode;
X
Xiao Guangrong 已提交
1993
	u16 cs_sel = 0, ss_sel = 0;
1994

1995 1996
	/* inject #GP if in real mode or Virtual 8086 mode */
	if (ctxt->mode == X86EMUL_MODE_REAL ||
1997 1998
	    ctxt->mode == X86EMUL_MODE_VM86)
		return emulate_gp(ctxt, 0);
1999

2000
	setup_syscalls_segments(ctxt, &cs, &ss);
2001

2002
	if ((ctxt->rex_prefix & 0x8) != 0x0)
2003 2004 2005 2006 2007 2008
		usermode = X86EMUL_MODE_PROT64;
	else
		usermode = X86EMUL_MODE_PROT32;

	cs.dpl = 3;
	ss.dpl = 3;
2009
	ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
2010 2011
	switch (usermode) {
	case X86EMUL_MODE_PROT32:
2012
		cs_sel = (u16)(msr_data + 16);
2013 2014
		if ((msr_data & 0xfffc) == 0x0)
			return emulate_gp(ctxt, 0);
2015
		ss_sel = (u16)(msr_data + 24);
2016 2017
		break;
	case X86EMUL_MODE_PROT64:
2018
		cs_sel = (u16)(msr_data + 32);
2019 2020
		if (msr_data == 0x0)
			return emulate_gp(ctxt, 0);
2021 2022
		ss_sel = cs_sel + 8;
		cs.d = 0;
2023 2024 2025
		cs.l = 1;
		break;
	}
2026 2027
	cs_sel |= SELECTOR_RPL_MASK;
	ss_sel |= SELECTOR_RPL_MASK;
2028

2029 2030
	ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
	ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2031

2032 2033
	ctxt->_eip = ctxt->regs[VCPU_REGS_RDX];
	ctxt->regs[VCPU_REGS_RSP] = ctxt->regs[VCPU_REGS_RCX];
2034

2035
	return X86EMUL_CONTINUE;
2036 2037
}

2038
static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt)
2039 2040 2041 2042 2043 2044 2045
{
	int iopl;
	if (ctxt->mode == X86EMUL_MODE_REAL)
		return false;
	if (ctxt->mode == X86EMUL_MODE_VM86)
		return true;
	iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
2046
	return ctxt->ops->cpl(ctxt) > iopl;
2047 2048 2049 2050 2051
}

static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
					    u16 port, u16 len)
{
2052
	struct x86_emulate_ops *ops = ctxt->ops;
2053
	struct desc_struct tr_seg;
2054
	u32 base3;
2055
	int r;
2056
	u16 tr, io_bitmap_ptr, perm, bit_idx = port & 0x7;
2057
	unsigned mask = (1 << len) - 1;
2058
	unsigned long base;
2059

2060
	ops->get_segment(ctxt, &tr, &tr_seg, &base3, VCPU_SREG_TR);
2061
	if (!tr_seg.p)
2062
		return false;
2063
	if (desc_limit_scaled(&tr_seg) < 103)
2064
		return false;
2065 2066 2067 2068
	base = get_desc_base(&tr_seg);
#ifdef CONFIG_X86_64
	base |= ((u64)base3) << 32;
#endif
2069
	r = ops->read_std(ctxt, base + 102, &io_bitmap_ptr, 2, NULL);
2070 2071
	if (r != X86EMUL_CONTINUE)
		return false;
2072
	if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg))
2073
		return false;
2074
	r = ops->read_std(ctxt, base + io_bitmap_ptr + port/8, &perm, 2, NULL);
2075 2076 2077 2078 2079 2080 2081 2082 2083 2084
	if (r != X86EMUL_CONTINUE)
		return false;
	if ((perm >> bit_idx) & mask)
		return false;
	return true;
}

static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt,
				 u16 port, u16 len)
{
2085 2086 2087
	if (ctxt->perm_ok)
		return true;

2088 2089
	if (emulator_bad_iopl(ctxt))
		if (!emulator_io_port_access_allowed(ctxt, port, len))
2090
			return false;
2091 2092 2093

	ctxt->perm_ok = true;

2094 2095 2096
	return true;
}

2097 2098 2099
static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt,
				struct tss_segment_16 *tss)
{
2100
	tss->ip = ctxt->_eip;
2101
	tss->flag = ctxt->eflags;
2102 2103 2104 2105 2106 2107 2108 2109
	tss->ax = ctxt->regs[VCPU_REGS_RAX];
	tss->cx = ctxt->regs[VCPU_REGS_RCX];
	tss->dx = ctxt->regs[VCPU_REGS_RDX];
	tss->bx = ctxt->regs[VCPU_REGS_RBX];
	tss->sp = ctxt->regs[VCPU_REGS_RSP];
	tss->bp = ctxt->regs[VCPU_REGS_RBP];
	tss->si = ctxt->regs[VCPU_REGS_RSI];
	tss->di = ctxt->regs[VCPU_REGS_RDI];
2110

2111 2112 2113 2114 2115
	tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
	tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
	tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
	tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
	tss->ldt = get_segment_selector(ctxt, VCPU_SREG_LDTR);
2116 2117 2118 2119 2120 2121 2122
}

static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
				 struct tss_segment_16 *tss)
{
	int ret;

2123
	ctxt->_eip = tss->ip;
2124
	ctxt->eflags = tss->flag | 2;
2125 2126 2127 2128 2129 2130 2131 2132
	ctxt->regs[VCPU_REGS_RAX] = tss->ax;
	ctxt->regs[VCPU_REGS_RCX] = tss->cx;
	ctxt->regs[VCPU_REGS_RDX] = tss->dx;
	ctxt->regs[VCPU_REGS_RBX] = tss->bx;
	ctxt->regs[VCPU_REGS_RSP] = tss->sp;
	ctxt->regs[VCPU_REGS_RBP] = tss->bp;
	ctxt->regs[VCPU_REGS_RSI] = tss->si;
	ctxt->regs[VCPU_REGS_RDI] = tss->di;
2133 2134 2135 2136 2137

	/*
	 * SDM says that segment selectors are loaded before segment
	 * descriptors
	 */
2138 2139 2140 2141 2142
	set_segment_selector(ctxt, tss->ldt, VCPU_SREG_LDTR);
	set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
	set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
	set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
	set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
2143 2144 2145 2146 2147

	/*
	 * Now load segment descriptors. If fault happenes at this stage
	 * it is handled in a context of new task
	 */
2148
	ret = load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR);
2149 2150
	if (ret != X86EMUL_CONTINUE)
		return ret;
2151
	ret = load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES);
2152 2153
	if (ret != X86EMUL_CONTINUE)
		return ret;
2154
	ret = load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS);
2155 2156
	if (ret != X86EMUL_CONTINUE)
		return ret;
2157
	ret = load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS);
2158 2159
	if (ret != X86EMUL_CONTINUE)
		return ret;
2160
	ret = load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS);
2161 2162 2163 2164 2165 2166 2167 2168 2169 2170
	if (ret != X86EMUL_CONTINUE)
		return ret;

	return X86EMUL_CONTINUE;
}

static int task_switch_16(struct x86_emulate_ctxt *ctxt,
			  u16 tss_selector, u16 old_tss_sel,
			  ulong old_tss_base, struct desc_struct *new_desc)
{
2171
	struct x86_emulate_ops *ops = ctxt->ops;
2172 2173
	struct tss_segment_16 tss_seg;
	int ret;
2174
	u32 new_tss_base = get_desc_base(new_desc);
2175

2176
	ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
2177
			    &ctxt->exception);
2178
	if (ret != X86EMUL_CONTINUE)
2179 2180 2181
		/* FIXME: need to provide precise fault address */
		return ret;

2182
	save_state_to_tss16(ctxt, &tss_seg);
2183

2184
	ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
2185
			     &ctxt->exception);
2186
	if (ret != X86EMUL_CONTINUE)
2187 2188 2189
		/* FIXME: need to provide precise fault address */
		return ret;

2190
	ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg,
2191
			    &ctxt->exception);
2192
	if (ret != X86EMUL_CONTINUE)
2193 2194 2195 2196 2197 2198
		/* FIXME: need to provide precise fault address */
		return ret;

	if (old_tss_sel != 0xffff) {
		tss_seg.prev_task_link = old_tss_sel;

2199
		ret = ops->write_std(ctxt, new_tss_base,
2200 2201
				     &tss_seg.prev_task_link,
				     sizeof tss_seg.prev_task_link,
2202
				     &ctxt->exception);
2203
		if (ret != X86EMUL_CONTINUE)
2204 2205 2206 2207
			/* FIXME: need to provide precise fault address */
			return ret;
	}

2208
	return load_state_from_tss16(ctxt, &tss_seg);
2209 2210 2211 2212 2213
}

static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
				struct tss_segment_32 *tss)
{
2214
	tss->cr3 = ctxt->ops->get_cr(ctxt, 3);
2215
	tss->eip = ctxt->_eip;
2216
	tss->eflags = ctxt->eflags;
2217 2218 2219 2220 2221 2222 2223 2224
	tss->eax = ctxt->regs[VCPU_REGS_RAX];
	tss->ecx = ctxt->regs[VCPU_REGS_RCX];
	tss->edx = ctxt->regs[VCPU_REGS_RDX];
	tss->ebx = ctxt->regs[VCPU_REGS_RBX];
	tss->esp = ctxt->regs[VCPU_REGS_RSP];
	tss->ebp = ctxt->regs[VCPU_REGS_RBP];
	tss->esi = ctxt->regs[VCPU_REGS_RSI];
	tss->edi = ctxt->regs[VCPU_REGS_RDI];
2225

2226 2227 2228 2229 2230 2231 2232
	tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
	tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
	tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
	tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
	tss->fs = get_segment_selector(ctxt, VCPU_SREG_FS);
	tss->gs = get_segment_selector(ctxt, VCPU_SREG_GS);
	tss->ldt_selector = get_segment_selector(ctxt, VCPU_SREG_LDTR);
2233 2234 2235 2236 2237 2238 2239
}

static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
				 struct tss_segment_32 *tss)
{
	int ret;

2240
	if (ctxt->ops->set_cr(ctxt, 3, tss->cr3))
2241
		return emulate_gp(ctxt, 0);
2242
	ctxt->_eip = tss->eip;
2243
	ctxt->eflags = tss->eflags | 2;
2244 2245 2246 2247 2248 2249 2250 2251
	ctxt->regs[VCPU_REGS_RAX] = tss->eax;
	ctxt->regs[VCPU_REGS_RCX] = tss->ecx;
	ctxt->regs[VCPU_REGS_RDX] = tss->edx;
	ctxt->regs[VCPU_REGS_RBX] = tss->ebx;
	ctxt->regs[VCPU_REGS_RSP] = tss->esp;
	ctxt->regs[VCPU_REGS_RBP] = tss->ebp;
	ctxt->regs[VCPU_REGS_RSI] = tss->esi;
	ctxt->regs[VCPU_REGS_RDI] = tss->edi;
2252 2253 2254 2255 2256

	/*
	 * SDM says that segment selectors are loaded before segment
	 * descriptors
	 */
2257 2258 2259 2260 2261 2262 2263
	set_segment_selector(ctxt, tss->ldt_selector, VCPU_SREG_LDTR);
	set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
	set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
	set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
	set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
	set_segment_selector(ctxt, tss->fs, VCPU_SREG_FS);
	set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS);
2264 2265 2266 2267 2268

	/*
	 * Now load segment descriptors. If fault happenes at this stage
	 * it is handled in a context of new task
	 */
2269
	ret = load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR);
2270 2271
	if (ret != X86EMUL_CONTINUE)
		return ret;
2272
	ret = load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES);
2273 2274
	if (ret != X86EMUL_CONTINUE)
		return ret;
2275
	ret = load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS);
2276 2277
	if (ret != X86EMUL_CONTINUE)
		return ret;
2278
	ret = load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS);
2279 2280
	if (ret != X86EMUL_CONTINUE)
		return ret;
2281
	ret = load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS);
2282 2283
	if (ret != X86EMUL_CONTINUE)
		return ret;
2284
	ret = load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS);
2285 2286
	if (ret != X86EMUL_CONTINUE)
		return ret;
2287
	ret = load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS);
2288 2289 2290 2291 2292 2293 2294 2295 2296 2297
	if (ret != X86EMUL_CONTINUE)
		return ret;

	return X86EMUL_CONTINUE;
}

static int task_switch_32(struct x86_emulate_ctxt *ctxt,
			  u16 tss_selector, u16 old_tss_sel,
			  ulong old_tss_base, struct desc_struct *new_desc)
{
2298
	struct x86_emulate_ops *ops = ctxt->ops;
2299 2300
	struct tss_segment_32 tss_seg;
	int ret;
2301
	u32 new_tss_base = get_desc_base(new_desc);
2302

2303
	ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
2304
			    &ctxt->exception);
2305
	if (ret != X86EMUL_CONTINUE)
2306 2307 2308
		/* FIXME: need to provide precise fault address */
		return ret;

2309
	save_state_to_tss32(ctxt, &tss_seg);
2310

2311
	ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
2312
			     &ctxt->exception);
2313
	if (ret != X86EMUL_CONTINUE)
2314 2315 2316
		/* FIXME: need to provide precise fault address */
		return ret;

2317
	ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg,
2318
			    &ctxt->exception);
2319
	if (ret != X86EMUL_CONTINUE)
2320 2321 2322 2323 2324 2325
		/* FIXME: need to provide precise fault address */
		return ret;

	if (old_tss_sel != 0xffff) {
		tss_seg.prev_task_link = old_tss_sel;

2326
		ret = ops->write_std(ctxt, new_tss_base,
2327 2328
				     &tss_seg.prev_task_link,
				     sizeof tss_seg.prev_task_link,
2329
				     &ctxt->exception);
2330
		if (ret != X86EMUL_CONTINUE)
2331 2332 2333 2334
			/* FIXME: need to provide precise fault address */
			return ret;
	}

2335
	return load_state_from_tss32(ctxt, &tss_seg);
2336 2337 2338
}

static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
2339 2340
				   u16 tss_selector, int reason,
				   bool has_error_code, u32 error_code)
2341
{
2342
	struct x86_emulate_ops *ops = ctxt->ops;
2343 2344
	struct desc_struct curr_tss_desc, next_tss_desc;
	int ret;
2345
	u16 old_tss_sel = get_segment_selector(ctxt, VCPU_SREG_TR);
2346
	ulong old_tss_base =
2347
		ops->get_cached_segment_base(ctxt, VCPU_SREG_TR);
2348
	u32 desc_limit;
2349 2350 2351

	/* FIXME: old_tss_base == ~0 ? */

2352
	ret = read_segment_descriptor(ctxt, tss_selector, &next_tss_desc);
2353 2354
	if (ret != X86EMUL_CONTINUE)
		return ret;
2355
	ret = read_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc);
2356 2357 2358 2359 2360 2361 2362
	if (ret != X86EMUL_CONTINUE)
		return ret;

	/* FIXME: check that next_tss_desc is tss */

	if (reason != TASK_SWITCH_IRET) {
		if ((tss_selector & 3) > next_tss_desc.dpl ||
2363
		    ops->cpl(ctxt) > next_tss_desc.dpl)
2364
			return emulate_gp(ctxt, 0);
2365 2366
	}

2367 2368 2369 2370
	desc_limit = desc_limit_scaled(&next_tss_desc);
	if (!next_tss_desc.p ||
	    ((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
	     desc_limit < 0x2b)) {
2371
		emulate_ts(ctxt, tss_selector & 0xfffc);
2372 2373 2374 2375 2376
		return X86EMUL_PROPAGATE_FAULT;
	}

	if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
		curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */
2377
		write_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc);
2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388
	}

	if (reason == TASK_SWITCH_IRET)
		ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT;

	/* set back link to prev task only if NT bit is set in eflags
	   note that old_tss_sel is not used afetr this point */
	if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
		old_tss_sel = 0xffff;

	if (next_tss_desc.type & 8)
2389
		ret = task_switch_32(ctxt, tss_selector, old_tss_sel,
2390 2391
				     old_tss_base, &next_tss_desc);
	else
2392
		ret = task_switch_16(ctxt, tss_selector, old_tss_sel,
2393
				     old_tss_base, &next_tss_desc);
2394 2395
	if (ret != X86EMUL_CONTINUE)
		return ret;
2396 2397 2398 2399 2400 2401

	if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE)
		ctxt->eflags = ctxt->eflags | X86_EFLAGS_NT;

	if (reason != TASK_SWITCH_IRET) {
		next_tss_desc.type |= (1 << 1); /* set busy flag */
2402
		write_segment_descriptor(ctxt, tss_selector, &next_tss_desc);
2403 2404
	}

2405
	ops->set_cr(ctxt, 0,  ops->get_cr(ctxt, 0) | X86_CR0_TS);
2406
	ops->set_segment(ctxt, tss_selector, &next_tss_desc, 0, VCPU_SREG_TR);
2407

2408
	if (has_error_code) {
2409 2410 2411
		ctxt->op_bytes = ctxt->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2;
		ctxt->lock_prefix = 0;
		ctxt->src.val = (unsigned long) error_code;
2412
		ret = em_push(ctxt);
2413 2414
	}

2415 2416 2417 2418
	return ret;
}

int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
2419 2420
			 u16 tss_selector, int reason,
			 bool has_error_code, u32 error_code)
2421 2422 2423
{
	int rc;

2424 2425
	ctxt->_eip = ctxt->eip;
	ctxt->dst.type = OP_NONE;
2426

2427
	rc = emulator_do_task_switch(ctxt, tss_selector, reason,
2428
				     has_error_code, error_code);
2429

2430
	if (rc == X86EMUL_CONTINUE)
2431
		ctxt->eip = ctxt->_eip;
2432

2433
	return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
2434 2435
}

2436
static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned seg,
2437
			    int reg, struct operand *op)
2438 2439 2440
{
	int df = (ctxt->eflags & EFLG_DF) ? -1 : 1;

2441 2442
	register_address_increment(ctxt, &ctxt->regs[reg], df * op->bytes);
	op->addr.mem.ea = register_address(ctxt, ctxt->regs[reg]);
2443
	op->addr.mem.seg = seg;
2444 2445
}

2446 2447 2448 2449 2450 2451
static int em_das(struct x86_emulate_ctxt *ctxt)
{
	u8 al, old_al;
	bool af, cf, old_cf;

	cf = ctxt->eflags & X86_EFLAGS_CF;
2452
	al = ctxt->dst.val;
2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469

	old_al = al;
	old_cf = cf;
	cf = false;
	af = ctxt->eflags & X86_EFLAGS_AF;
	if ((al & 0x0f) > 9 || af) {
		al -= 6;
		cf = old_cf | (al >= 250);
		af = true;
	} else {
		af = false;
	}
	if (old_al > 0x99 || old_cf) {
		al -= 0x60;
		cf = true;
	}

2470
	ctxt->dst.val = al;
2471
	/* Set PF, ZF, SF */
2472 2473 2474
	ctxt->src.type = OP_IMM;
	ctxt->src.val = 0;
	ctxt->src.bytes = 1;
2475
	emulate_2op_SrcV(ctxt, "or");
2476 2477 2478 2479 2480 2481 2482 2483
	ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF);
	if (cf)
		ctxt->eflags |= X86_EFLAGS_CF;
	if (af)
		ctxt->eflags |= X86_EFLAGS_AF;
	return X86EMUL_CONTINUE;
}

2484 2485 2486 2487 2488 2489
static int em_call_far(struct x86_emulate_ctxt *ctxt)
{
	u16 sel, old_cs;
	ulong old_eip;
	int rc;

2490
	old_cs = get_segment_selector(ctxt, VCPU_SREG_CS);
2491
	old_eip = ctxt->_eip;
2492

2493
	memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2494
	if (load_segment_descriptor(ctxt, sel, VCPU_SREG_CS))
2495 2496
		return X86EMUL_CONTINUE;

2497 2498
	ctxt->_eip = 0;
	memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes);
2499

2500
	ctxt->src.val = old_cs;
2501
	rc = em_push(ctxt);
2502 2503 2504
	if (rc != X86EMUL_CONTINUE)
		return rc;

2505
	ctxt->src.val = old_eip;
2506
	return em_push(ctxt);
2507 2508
}

2509 2510 2511 2512
static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
{
	int rc;

2513 2514 2515 2516
	ctxt->dst.type = OP_REG;
	ctxt->dst.addr.reg = &ctxt->_eip;
	ctxt->dst.bytes = ctxt->op_bytes;
	rc = emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes);
2517 2518
	if (rc != X86EMUL_CONTINUE)
		return rc;
2519
	register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RSP], ctxt->src.val);
2520 2521 2522
	return X86EMUL_CONTINUE;
}

2523 2524
static int em_add(struct x86_emulate_ctxt *ctxt)
{
2525
	emulate_2op_SrcV(ctxt, "add");
2526 2527 2528 2529 2530
	return X86EMUL_CONTINUE;
}

static int em_or(struct x86_emulate_ctxt *ctxt)
{
2531
	emulate_2op_SrcV(ctxt, "or");
2532 2533 2534 2535 2536
	return X86EMUL_CONTINUE;
}

static int em_adc(struct x86_emulate_ctxt *ctxt)
{
2537
	emulate_2op_SrcV(ctxt, "adc");
2538 2539 2540 2541 2542
	return X86EMUL_CONTINUE;
}

static int em_sbb(struct x86_emulate_ctxt *ctxt)
{
2543
	emulate_2op_SrcV(ctxt, "sbb");
2544 2545 2546 2547 2548
	return X86EMUL_CONTINUE;
}

static int em_and(struct x86_emulate_ctxt *ctxt)
{
2549
	emulate_2op_SrcV(ctxt, "and");
2550 2551 2552 2553 2554
	return X86EMUL_CONTINUE;
}

static int em_sub(struct x86_emulate_ctxt *ctxt)
{
2555
	emulate_2op_SrcV(ctxt, "sub");
2556 2557 2558 2559 2560
	return X86EMUL_CONTINUE;
}

static int em_xor(struct x86_emulate_ctxt *ctxt)
{
2561
	emulate_2op_SrcV(ctxt, "xor");
2562 2563 2564 2565 2566
	return X86EMUL_CONTINUE;
}

static int em_cmp(struct x86_emulate_ctxt *ctxt)
{
2567
	emulate_2op_SrcV(ctxt, "cmp");
2568
	/* Disable writeback. */
2569
	ctxt->dst.type = OP_NONE;
2570 2571 2572
	return X86EMUL_CONTINUE;
}

2573 2574
static int em_test(struct x86_emulate_ctxt *ctxt)
{
2575
	emulate_2op_SrcV(ctxt, "test");
2576 2577
	/* Disable writeback. */
	ctxt->dst.type = OP_NONE;
2578 2579 2580
	return X86EMUL_CONTINUE;
}

2581 2582 2583
static int em_xchg(struct x86_emulate_ctxt *ctxt)
{
	/* Write back the register source. */
2584 2585
	ctxt->src.val = ctxt->dst.val;
	write_register_operand(&ctxt->src);
2586 2587

	/* Write back the memory destination with implicit LOCK prefix. */
2588 2589
	ctxt->dst.val = ctxt->src.orig_val;
	ctxt->lock_prefix = 1;
2590 2591 2592
	return X86EMUL_CONTINUE;
}

2593
static int em_imul(struct x86_emulate_ctxt *ctxt)
2594
{
2595
	emulate_2op_SrcV_nobyte(ctxt, "imul");
2596 2597 2598
	return X86EMUL_CONTINUE;
}

2599 2600
static int em_imul_3op(struct x86_emulate_ctxt *ctxt)
{
2601
	ctxt->dst.val = ctxt->src2.val;
2602 2603 2604
	return em_imul(ctxt);
}

2605 2606
static int em_cwd(struct x86_emulate_ctxt *ctxt)
{
2607 2608 2609 2610
	ctxt->dst.type = OP_REG;
	ctxt->dst.bytes = ctxt->src.bytes;
	ctxt->dst.addr.reg = &ctxt->regs[VCPU_REGS_RDX];
	ctxt->dst.val = ~((ctxt->src.val >> (ctxt->src.bytes * 8 - 1)) - 1);
2611 2612 2613 2614

	return X86EMUL_CONTINUE;
}

2615 2616 2617 2618
static int em_rdtsc(struct x86_emulate_ctxt *ctxt)
{
	u64 tsc = 0;

2619
	ctxt->ops->get_msr(ctxt, MSR_IA32_TSC, &tsc);
2620 2621
	ctxt->regs[VCPU_REGS_RAX] = (u32)tsc;
	ctxt->regs[VCPU_REGS_RDX] = tsc >> 32;
2622 2623 2624
	return X86EMUL_CONTINUE;
}

2625 2626
static int em_mov(struct x86_emulate_ctxt *ctxt)
{
2627
	ctxt->dst.val = ctxt->src.val;
2628 2629 2630
	return X86EMUL_CONTINUE;
}

2631 2632
static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt)
{
2633
	if (ctxt->modrm_reg > VCPU_SREG_GS)
2634 2635
		return emulate_ud(ctxt);

2636
	ctxt->dst.val = get_segment_selector(ctxt, ctxt->modrm_reg);
2637 2638 2639 2640 2641
	return X86EMUL_CONTINUE;
}

static int em_mov_sreg_rm(struct x86_emulate_ctxt *ctxt)
{
2642
	u16 sel = ctxt->src.val;
2643

2644
	if (ctxt->modrm_reg == VCPU_SREG_CS || ctxt->modrm_reg > VCPU_SREG_GS)
2645 2646
		return emulate_ud(ctxt);

2647
	if (ctxt->modrm_reg == VCPU_SREG_SS)
2648 2649 2650
		ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;

	/* Disable writeback. */
2651 2652
	ctxt->dst.type = OP_NONE;
	return load_segment_descriptor(ctxt, sel, ctxt->modrm_reg);
2653 2654
}

2655 2656
static int em_movdqu(struct x86_emulate_ctxt *ctxt)
{
2657
	memcpy(&ctxt->dst.vec_val, &ctxt->src.vec_val, ctxt->op_bytes);
2658 2659 2660
	return X86EMUL_CONTINUE;
}

2661 2662
static int em_invlpg(struct x86_emulate_ctxt *ctxt)
{
2663 2664 2665
	int rc;
	ulong linear;

2666
	rc = linearize(ctxt, ctxt->src.addr.mem, 1, false, &linear);
2667
	if (rc == X86EMUL_CONTINUE)
2668
		ctxt->ops->invlpg(ctxt, linear);
2669
	/* Disable writeback. */
2670
	ctxt->dst.type = OP_NONE;
2671 2672 2673
	return X86EMUL_CONTINUE;
}

2674 2675 2676 2677 2678 2679 2680 2681 2682 2683
static int em_clts(struct x86_emulate_ctxt *ctxt)
{
	ulong cr0;

	cr0 = ctxt->ops->get_cr(ctxt, 0);
	cr0 &= ~X86_CR0_TS;
	ctxt->ops->set_cr(ctxt, 0, cr0);
	return X86EMUL_CONTINUE;
}

2684 2685 2686 2687
static int em_vmcall(struct x86_emulate_ctxt *ctxt)
{
	int rc;

2688
	if (ctxt->modrm_mod != 3 || ctxt->modrm_rm != 1)
2689 2690 2691 2692 2693 2694 2695
		return X86EMUL_UNHANDLEABLE;

	rc = ctxt->ops->fix_hypercall(ctxt);
	if (rc != X86EMUL_CONTINUE)
		return rc;

	/* Let the processor re-execute the fixed hypercall */
2696
	ctxt->_eip = ctxt->eip;
2697
	/* Disable writeback. */
2698
	ctxt->dst.type = OP_NONE;
2699 2700 2701 2702 2703 2704 2705 2706
	return X86EMUL_CONTINUE;
}

static int em_lgdt(struct x86_emulate_ctxt *ctxt)
{
	struct desc_ptr desc_ptr;
	int rc;

2707
	rc = read_descriptor(ctxt, ctxt->src.addr.mem,
2708
			     &desc_ptr.size, &desc_ptr.address,
2709
			     ctxt->op_bytes);
2710 2711 2712 2713
	if (rc != X86EMUL_CONTINUE)
		return rc;
	ctxt->ops->set_gdt(ctxt, &desc_ptr);
	/* Disable writeback. */
2714
	ctxt->dst.type = OP_NONE;
2715 2716 2717
	return X86EMUL_CONTINUE;
}

2718
static int em_vmmcall(struct x86_emulate_ctxt *ctxt)
2719 2720 2721
{
	int rc;

2722 2723
	rc = ctxt->ops->fix_hypercall(ctxt);

2724
	/* Disable writeback. */
2725
	ctxt->dst.type = OP_NONE;
2726 2727 2728 2729 2730 2731 2732 2733
	return rc;
}

static int em_lidt(struct x86_emulate_ctxt *ctxt)
{
	struct desc_ptr desc_ptr;
	int rc;

2734
	rc = read_descriptor(ctxt, ctxt->src.addr.mem,
2735
			     &desc_ptr.size, &desc_ptr.address,
2736
			     ctxt->op_bytes);
2737 2738 2739 2740
	if (rc != X86EMUL_CONTINUE)
		return rc;
	ctxt->ops->set_idt(ctxt, &desc_ptr);
	/* Disable writeback. */
2741
	ctxt->dst.type = OP_NONE;
2742 2743 2744 2745 2746
	return X86EMUL_CONTINUE;
}

static int em_smsw(struct x86_emulate_ctxt *ctxt)
{
2747 2748
	ctxt->dst.bytes = 2;
	ctxt->dst.val = ctxt->ops->get_cr(ctxt, 0);
2749 2750 2751 2752 2753 2754
	return X86EMUL_CONTINUE;
}

static int em_lmsw(struct x86_emulate_ctxt *ctxt)
{
	ctxt->ops->set_cr(ctxt, 0, (ctxt->ops->get_cr(ctxt, 0) & ~0x0eul)
2755 2756
			  | (ctxt->src.val & 0x0f));
	ctxt->dst.type = OP_NONE;
2757 2758 2759
	return X86EMUL_CONTINUE;
}

2760 2761
static int em_loop(struct x86_emulate_ctxt *ctxt)
{
2762 2763 2764 2765
	register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RCX], -1);
	if ((address_mask(ctxt, ctxt->regs[VCPU_REGS_RCX]) != 0) &&
	    (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
		jmp_rel(ctxt, ctxt->src.val);
2766 2767 2768 2769 2770 2771

	return X86EMUL_CONTINUE;
}

static int em_jcxz(struct x86_emulate_ctxt *ctxt)
{
2772 2773
	if (address_mask(ctxt, ctxt->regs[VCPU_REGS_RCX]) == 0)
		jmp_rel(ctxt, ctxt->src.val);
2774 2775 2776 2777

	return X86EMUL_CONTINUE;
}

2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796
static int em_cli(struct x86_emulate_ctxt *ctxt)
{
	if (emulator_bad_iopl(ctxt))
		return emulate_gp(ctxt, 0);

	ctxt->eflags &= ~X86_EFLAGS_IF;
	return X86EMUL_CONTINUE;
}

static int em_sti(struct x86_emulate_ctxt *ctxt)
{
	if (emulator_bad_iopl(ctxt))
		return emulate_gp(ctxt, 0);

	ctxt->interruptibility = KVM_X86_SHADOW_INT_STI;
	ctxt->eflags |= X86_EFLAGS_IF;
	return X86EMUL_CONTINUE;
}

2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810
static bool valid_cr(int nr)
{
	switch (nr) {
	case 0:
	case 2 ... 4:
	case 8:
		return true;
	default:
		return false;
	}
}

static int check_cr_read(struct x86_emulate_ctxt *ctxt)
{
2811
	if (!valid_cr(ctxt->modrm_reg))
2812 2813 2814 2815 2816 2817 2818
		return emulate_ud(ctxt);

	return X86EMUL_CONTINUE;
}

static int check_cr_write(struct x86_emulate_ctxt *ctxt)
{
2819 2820
	u64 new_val = ctxt->src.val64;
	int cr = ctxt->modrm_reg;
2821
	u64 efer = 0;
2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838

	static u64 cr_reserved_bits[] = {
		0xffffffff00000000ULL,
		0, 0, 0, /* CR3 checked later */
		CR4_RESERVED_BITS,
		0, 0, 0,
		CR8_RESERVED_BITS,
	};

	if (!valid_cr(cr))
		return emulate_ud(ctxt);

	if (new_val & cr_reserved_bits[cr])
		return emulate_gp(ctxt, 0);

	switch (cr) {
	case 0: {
2839
		u64 cr4;
2840 2841 2842 2843
		if (((new_val & X86_CR0_PG) && !(new_val & X86_CR0_PE)) ||
		    ((new_val & X86_CR0_NW) && !(new_val & X86_CR0_CD)))
			return emulate_gp(ctxt, 0);

2844 2845
		cr4 = ctxt->ops->get_cr(ctxt, 4);
		ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
2846 2847 2848 2849 2850 2851 2852 2853 2854 2855

		if ((new_val & X86_CR0_PG) && (efer & EFER_LME) &&
		    !(cr4 & X86_CR4_PAE))
			return emulate_gp(ctxt, 0);

		break;
		}
	case 3: {
		u64 rsvd = 0;

2856 2857
		ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
		if (efer & EFER_LMA)
2858
			rsvd = CR3_L_MODE_RESERVED_BITS;
2859
		else if (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_PAE)
2860
			rsvd = CR3_PAE_RESERVED_BITS;
2861
		else if (ctxt->ops->get_cr(ctxt, 0) & X86_CR0_PG)
2862 2863 2864 2865 2866 2867 2868 2869
			rsvd = CR3_NONPAE_RESERVED_BITS;

		if (new_val & rsvd)
			return emulate_gp(ctxt, 0);

		break;
		}
	case 4: {
2870
		u64 cr4;
2871

2872 2873
		cr4 = ctxt->ops->get_cr(ctxt, 4);
		ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884

		if ((efer & EFER_LMA) && !(new_val & X86_CR4_PAE))
			return emulate_gp(ctxt, 0);

		break;
		}
	}

	return X86EMUL_CONTINUE;
}

2885 2886 2887 2888
static int check_dr7_gd(struct x86_emulate_ctxt *ctxt)
{
	unsigned long dr7;

2889
	ctxt->ops->get_dr(ctxt, 7, &dr7);
2890 2891 2892 2893 2894 2895 2896

	/* Check if DR7.Global_Enable is set */
	return dr7 & (1 << 13);
}

static int check_dr_read(struct x86_emulate_ctxt *ctxt)
{
2897
	int dr = ctxt->modrm_reg;
2898 2899 2900 2901 2902
	u64 cr4;

	if (dr > 7)
		return emulate_ud(ctxt);

2903
	cr4 = ctxt->ops->get_cr(ctxt, 4);
2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914
	if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5))
		return emulate_ud(ctxt);

	if (check_dr7_gd(ctxt))
		return emulate_db(ctxt);

	return X86EMUL_CONTINUE;
}

static int check_dr_write(struct x86_emulate_ctxt *ctxt)
{
2915 2916
	u64 new_val = ctxt->src.val64;
	int dr = ctxt->modrm_reg;
2917 2918 2919 2920 2921 2922 2923

	if ((dr == 6 || dr == 7) && (new_val & 0xffffffff00000000ULL))
		return emulate_gp(ctxt, 0);

	return check_dr_read(ctxt);
}

2924 2925 2926 2927
static int check_svme(struct x86_emulate_ctxt *ctxt)
{
	u64 efer;

2928
	ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
2929 2930 2931 2932 2933 2934 2935 2936 2937

	if (!(efer & EFER_SVME))
		return emulate_ud(ctxt);

	return X86EMUL_CONTINUE;
}

static int check_svme_pa(struct x86_emulate_ctxt *ctxt)
{
2938
	u64 rax = ctxt->regs[VCPU_REGS_RAX];
2939 2940

	/* Valid physical address? */
2941
	if (rax & 0xffff000000000000ULL)
2942 2943 2944 2945 2946
		return emulate_gp(ctxt, 0);

	return check_svme(ctxt);
}

2947 2948
static int check_rdtsc(struct x86_emulate_ctxt *ctxt)
{
2949
	u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
2950

2951
	if (cr4 & X86_CR4_TSD && ctxt->ops->cpl(ctxt))
2952 2953 2954 2955 2956
		return emulate_ud(ctxt);

	return X86EMUL_CONTINUE;
}

2957 2958
static int check_rdpmc(struct x86_emulate_ctxt *ctxt)
{
2959
	u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
2960
	u64 rcx = ctxt->regs[VCPU_REGS_RCX];
2961

2962
	if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) ||
2963 2964 2965 2966 2967 2968
	    (rcx > 3))
		return emulate_gp(ctxt, 0);

	return X86EMUL_CONTINUE;
}

2969 2970
static int check_perm_in(struct x86_emulate_ctxt *ctxt)
{
2971 2972
	ctxt->dst.bytes = min(ctxt->dst.bytes, 4u);
	if (!emulator_io_permited(ctxt, ctxt->src.val, ctxt->dst.bytes))
2973 2974 2975 2976 2977 2978 2979
		return emulate_gp(ctxt, 0);

	return X86EMUL_CONTINUE;
}

static int check_perm_out(struct x86_emulate_ctxt *ctxt)
{
2980 2981
	ctxt->src.bytes = min(ctxt->src.bytes, 4u);
	if (!emulator_io_permited(ctxt, ctxt->dst.val, ctxt->src.bytes))
2982 2983 2984 2985 2986
		return emulate_gp(ctxt, 0);

	return X86EMUL_CONTINUE;
}

2987
#define D(_y) { .flags = (_y) }
2988
#define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i }
2989 2990
#define DIP(_y, _i, _p) { .flags = (_y), .intercept = x86_intercept_##_i, \
		      .check_perm = (_p) }
2991
#define N    D(0)
2992
#define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
2993
#define G(_f, _g) { .flags = ((_f) | Group), .u.group = (_g) }
2994
#define GD(_f, _g) { .flags = ((_f) | GroupDual), .u.gdual = (_g) }
2995
#define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
2996 2997
#define II(_f, _e, _i) \
	{ .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i }
2998 2999 3000
#define IIP(_f, _e, _i, _p) \
	{ .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i, \
	  .check_perm = (_p) }
3001
#define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) }
3002

3003
#define D2bv(_f)      D((_f) | ByteOp), D(_f)
3004
#define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p)
3005 3006
#define I2bv(_f, _e)  I((_f) | ByteOp, _e), I(_f, _e)

3007 3008 3009
#define I6ALU(_f, _e) I2bv((_f) | DstMem | SrcReg | ModRM, _e),		\
		I2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e),	\
		I2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
3010

3011 3012 3013 3014 3015 3016
static struct opcode group7_rm1[] = {
	DI(SrcNone | ModRM | Priv, monitor),
	DI(SrcNone | ModRM | Priv, mwait),
	N, N, N, N, N, N,
};

3017 3018
static struct opcode group7_rm3[] = {
	DIP(SrcNone | ModRM | Prot | Priv, vmrun,   check_svme_pa),
3019
	II(SrcNone | ModRM | Prot | VendorSpecific, em_vmmcall, vmmcall),
3020 3021 3022 3023 3024 3025 3026
	DIP(SrcNone | ModRM | Prot | Priv, vmload,  check_svme_pa),
	DIP(SrcNone | ModRM | Prot | Priv, vmsave,  check_svme_pa),
	DIP(SrcNone | ModRM | Prot | Priv, stgi,    check_svme),
	DIP(SrcNone | ModRM | Prot | Priv, clgi,    check_svme),
	DIP(SrcNone | ModRM | Prot | Priv, skinit,  check_svme),
	DIP(SrcNone | ModRM | Prot | Priv, invlpga, check_svme),
};
3027

3028 3029 3030 3031 3032
static struct opcode group7_rm7[] = {
	N,
	DIP(SrcNone | ModRM, rdtscp, check_rdtsc),
	N, N, N, N, N, N,
};
3033

3034
static struct opcode group1[] = {
3035 3036 3037 3038 3039 3040 3041 3042
	I(Lock, em_add),
	I(Lock, em_or),
	I(Lock, em_adc),
	I(Lock, em_sbb),
	I(Lock, em_and),
	I(Lock, em_sub),
	I(Lock, em_xor),
	I(0, em_cmp),
3043 3044 3045 3046 3047 3048 3049
};

static struct opcode group1A[] = {
	D(DstMem | SrcNone | ModRM | Mov | Stack), N, N, N, N, N, N, N,
};

static struct opcode group3[] = {
3050 3051 3052 3053 3054 3055 3056 3057
	I(DstMem | SrcImm | ModRM, em_test),
	I(DstMem | SrcImm | ModRM, em_test),
	I(DstMem | SrcNone | ModRM | Lock, em_not),
	I(DstMem | SrcNone | ModRM | Lock, em_neg),
	I(SrcMem | ModRM, em_mul_ex),
	I(SrcMem | ModRM, em_imul_ex),
	I(SrcMem | ModRM, em_div_ex),
	I(SrcMem | ModRM, em_idiv_ex),
3058 3059 3060 3061 3062 3063 3064 3065 3066
};

static struct opcode group4[] = {
	D(ByteOp | DstMem | SrcNone | ModRM | Lock), D(ByteOp | DstMem | SrcNone | ModRM | Lock),
	N, N, N, N, N, N,
};

static struct opcode group5[] = {
	D(DstMem | SrcNone | ModRM | Lock), D(DstMem | SrcNone | ModRM | Lock),
3067 3068
	D(SrcMem | ModRM | Stack),
	I(SrcMemFAddr | ModRM | ImplicitOps | Stack, em_call_far),
3069 3070 3071 3072
	D(SrcMem | ModRM | Stack), D(SrcMemFAddr | ModRM | ImplicitOps),
	D(SrcMem | ModRM | Stack), N,
};

3073 3074 3075 3076 3077 3078 3079 3080
static struct opcode group6[] = {
	DI(ModRM | Prot,        sldt),
	DI(ModRM | Prot,        str),
	DI(ModRM | Prot | Priv, lldt),
	DI(ModRM | Prot | Priv, ltr),
	N, N, N, N,
};

3081
static struct group_dual group7 = { {
3082 3083
	DI(ModRM | Mov | DstMem | Priv, sgdt),
	DI(ModRM | Mov | DstMem | Priv, sidt),
3084 3085 3086 3087 3088
	II(ModRM | SrcMem | Priv, em_lgdt, lgdt),
	II(ModRM | SrcMem | Priv, em_lidt, lidt),
	II(SrcNone | ModRM | DstMem | Mov, em_smsw, smsw), N,
	II(SrcMem16 | ModRM | Mov | Priv, em_lmsw, lmsw),
	II(SrcMem | ModRM | ByteOp | Priv | NoAccess, em_invlpg, invlpg),
3089
}, {
3090 3091
	I(SrcNone | ModRM | Priv | VendorSpecific, em_vmcall),
	EXT(0, group7_rm1),
3092
	N, EXT(0, group7_rm3),
3093 3094
	II(SrcNone | ModRM | DstMem | Mov, em_smsw, smsw), N,
	II(SrcMem16 | ModRM | Mov | Priv, em_lmsw, lmsw), EXT(0, group7_rm7),
3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108
} };

static struct opcode group8[] = {
	N, N, N, N,
	D(DstMem | SrcImmByte | ModRM), D(DstMem | SrcImmByte | ModRM | Lock),
	D(DstMem | SrcImmByte | ModRM | Lock), D(DstMem | SrcImmByte | ModRM | Lock),
};

static struct group_dual group9 = { {
	N, D(DstMem64 | ModRM | Lock), N, N, N, N, N, N,
}, {
	N, N, N, N, N, N, N, N,
} };

3109 3110 3111 3112
static struct opcode group11[] = {
	I(DstMem | SrcImm | ModRM | Mov, em_mov), X7(D(Undefined)),
};

3113 3114 3115 3116
static struct gprefix pfx_0f_6f_0f_7f = {
	N, N, N, I(Sse, em_movdqu),
};

3117 3118
static struct opcode opcode_table[256] = {
	/* 0x00 - 0x07 */
3119
	I6ALU(Lock, em_add),
3120 3121
	I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg),
	I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg),
3122
	/* 0x08 - 0x0F */
3123
	I6ALU(Lock, em_or),
3124 3125
	I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg),
	N,
3126
	/* 0x10 - 0x17 */
3127
	I6ALU(Lock, em_adc),
3128 3129
	I(ImplicitOps | Stack | No64 | Src2SS, em_push_sreg),
	I(ImplicitOps | Stack | No64 | Src2SS, em_pop_sreg),
3130
	/* 0x18 - 0x1F */
3131
	I6ALU(Lock, em_sbb),
3132 3133
	I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg),
	I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg),
3134
	/* 0x20 - 0x27 */
3135
	I6ALU(Lock, em_and), N, N,
3136
	/* 0x28 - 0x2F */
3137
	I6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das),
3138
	/* 0x30 - 0x37 */
3139
	I6ALU(Lock, em_xor), N, N,
3140
	/* 0x38 - 0x3F */
3141
	I6ALU(0, em_cmp), N, N,
3142 3143 3144
	/* 0x40 - 0x4F */
	X16(D(DstReg)),
	/* 0x50 - 0x57 */
3145
	X8(I(SrcReg | Stack, em_push)),
3146
	/* 0x58 - 0x5F */
3147
	X8(I(DstReg | Stack, em_pop)),
3148
	/* 0x60 - 0x67 */
3149 3150
	I(ImplicitOps | Stack | No64, em_pusha),
	I(ImplicitOps | Stack | No64, em_popa),
3151 3152 3153
	N, D(DstReg | SrcMem32 | ModRM | Mov) /* movsxd (x86/64) */ ,
	N, N, N, N,
	/* 0x68 - 0x6F */
3154 3155
	I(SrcImm | Mov | Stack, em_push),
	I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op),
3156 3157
	I(SrcImmByte | Mov | Stack, em_push),
	I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op),
3158 3159
	D2bvIP(DstDI | SrcDX | Mov | String, ins, check_perm_in), /* insb, insw/insd */
	D2bvIP(SrcSI | DstDX | String, outs, check_perm_out), /* outsb, outsw/outsd */
3160 3161 3162 3163 3164 3165 3166
	/* 0x70 - 0x7F */
	X16(D(SrcImmByte)),
	/* 0x80 - 0x87 */
	G(ByteOp | DstMem | SrcImm | ModRM | Group, group1),
	G(DstMem | SrcImm | ModRM | Group, group1),
	G(ByteOp | DstMem | SrcImm | ModRM | No64 | Group, group1),
	G(DstMem | SrcImmByte | ModRM | Group, group1),
3167
	I2bv(DstMem | SrcReg | ModRM, em_test),
3168
	I2bv(DstMem | SrcReg | ModRM | Lock, em_xchg),
3169
	/* 0x88 - 0x8F */
3170 3171
	I2bv(DstMem | SrcReg | ModRM | Mov, em_mov),
	I2bv(DstReg | SrcMem | ModRM | Mov, em_mov),
3172 3173 3174 3175
	I(DstMem | SrcNone | ModRM | Mov, em_mov_rm_sreg),
	D(ModRM | SrcMem | NoAccess | DstReg),
	I(ImplicitOps | SrcMem16 | ModRM, em_mov_sreg_rm),
	G(0, group1A),
3176
	/* 0x90 - 0x97 */
3177
	DI(SrcAcc | DstReg, pause), X7(D(SrcAcc | DstReg)),
3178
	/* 0x98 - 0x9F */
3179
	D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd),
3180
	I(SrcImmFAddr | No64, em_call_far), N,
3181 3182
	II(ImplicitOps | Stack, em_pushf, pushf),
	II(ImplicitOps | Stack, em_popf, popf), N, N,
3183
	/* 0xA0 - 0xA7 */
3184 3185 3186
	I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
	I2bv(DstMem | SrcAcc | Mov | MemAbs, em_mov),
	I2bv(SrcSI | DstDI | Mov | String, em_mov),
3187
	I2bv(SrcSI | DstDI | String, em_cmp),
3188
	/* 0xA8 - 0xAF */
3189
	I2bv(DstAcc | SrcImm, em_test),
3190 3191
	I2bv(SrcAcc | DstDI | Mov | String, em_mov),
	I2bv(SrcSI | DstAcc | Mov | String, em_mov),
3192
	I2bv(SrcAcc | DstDI | String, em_cmp),
3193
	/* 0xB0 - 0xB7 */
3194
	X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)),
3195
	/* 0xB8 - 0xBF */
3196
	X8(I(DstReg | SrcImm | Mov, em_mov)),
3197
	/* 0xC0 - 0xC7 */
3198
	D2bv(DstMem | SrcImmByte | ModRM),
3199
	I(ImplicitOps | Stack | SrcImmU16, em_ret_near_imm),
3200
	I(ImplicitOps | Stack, em_ret),
3201 3202
	I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg),
	I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg),
3203
	G(ByteOp, group11), G(0, group11),
3204
	/* 0xC8 - 0xCF */
3205
	N, N, N, I(ImplicitOps | Stack, em_ret_far),
3206
	D(ImplicitOps), DI(SrcImmByte, intn),
3207
	D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret),
3208
	/* 0xD0 - 0xD7 */
3209
	D2bv(DstMem | SrcOne | ModRM), D2bv(DstMem | ModRM),
3210 3211 3212 3213
	N, N, N, N,
	/* 0xD8 - 0xDF */
	N, N, N, N, N, N, N, N,
	/* 0xE0 - 0xE7 */
3214 3215
	X3(I(SrcImmByte, em_loop)),
	I(SrcImmByte, em_jcxz),
3216 3217
	D2bvIP(SrcImmUByte | DstAcc, in,  check_perm_in),
	D2bvIP(SrcAcc | DstImmUByte, out, check_perm_out),
3218 3219
	/* 0xE8 - 0xEF */
	D(SrcImm | Stack), D(SrcImm | ImplicitOps),
3220
	I(SrcImmFAddr | No64, em_jmp_far), D(SrcImmByte | ImplicitOps),
3221 3222
	D2bvIP(SrcDX | DstAcc, in,  check_perm_in),
	D2bvIP(SrcAcc | DstDX, out, check_perm_out),
3223
	/* 0xF0 - 0xF7 */
3224
	N, DI(ImplicitOps, icebp), N, N,
3225 3226
	DI(ImplicitOps | Priv, hlt), D(ImplicitOps),
	G(ByteOp, group3), G(0, group3),
3227
	/* 0xF8 - 0xFF */
3228 3229
	D(ImplicitOps), D(ImplicitOps),
	I(ImplicitOps, em_cli), I(ImplicitOps, em_sti),
3230 3231 3232 3233 3234
	D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5),
};

static struct opcode twobyte_table[256] = {
	/* 0x00 - 0x0F */
3235
	G(0, group6), GD(0, &group7), N, N,
3236 3237
	N, I(ImplicitOps | VendorSpecific, em_syscall),
	II(ImplicitOps | Priv, em_clts, clts), N,
3238
	DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
3239 3240 3241 3242
	N, D(ImplicitOps | ModRM), N, N,
	/* 0x10 - 0x1F */
	N, N, N, N, N, N, N, N, D(ImplicitOps | ModRM), N, N, N, N, N, N, N,
	/* 0x20 - 0x2F */
3243
	DIP(ModRM | DstMem | Priv | Op3264, cr_read, check_cr_read),
3244
	DIP(ModRM | DstMem | Priv | Op3264, dr_read, check_dr_read),
3245
	DIP(ModRM | SrcMem | Priv | Op3264, cr_write, check_cr_write),
3246
	DIP(ModRM | SrcMem | Priv | Op3264, dr_write, check_dr_write),
3247 3248 3249
	N, N, N, N,
	N, N, N, N, N, N, N, N,
	/* 0x30 - 0x3F */
3250 3251 3252 3253
	DI(ImplicitOps | Priv, wrmsr),
	IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc),
	DI(ImplicitOps | Priv, rdmsr),
	DIP(ImplicitOps | Priv, rdpmc, check_rdpmc),
3254 3255
	I(ImplicitOps | VendorSpecific, em_sysenter),
	I(ImplicitOps | Priv | VendorSpecific, em_sysexit),
3256
	N, N,
3257 3258 3259 3260 3261 3262
	N, N, N, N, N, N, N, N,
	/* 0x40 - 0x4F */
	X16(D(DstReg | SrcMem | ModRM | Mov)),
	/* 0x50 - 0x5F */
	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
	/* 0x60 - 0x6F */
3263 3264 3265 3266
	N, N, N, N,
	N, N, N, N,
	N, N, N, N,
	N, N, N, GP(SrcMem | DstReg | ModRM | Mov, &pfx_0f_6f_0f_7f),
3267
	/* 0x70 - 0x7F */
3268 3269 3270 3271
	N, N, N, N,
	N, N, N, N,
	N, N, N, N,
	N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f),
3272 3273 3274
	/* 0x80 - 0x8F */
	X16(D(SrcImm)),
	/* 0x90 - 0x9F */
3275
	X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
3276
	/* 0xA0 - 0xA7 */
3277
	I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg),
3278
	DI(ImplicitOps, cpuid), D(DstMem | SrcReg | ModRM | BitOp),
3279 3280 3281
	D(DstMem | SrcReg | Src2ImmByte | ModRM),
	D(DstMem | SrcReg | Src2CL | ModRM), N, N,
	/* 0xA8 - 0xAF */
3282
	I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg),
3283
	DI(ImplicitOps, rsm), D(DstMem | SrcReg | ModRM | BitOp | Lock),
3284 3285
	D(DstMem | SrcReg | Src2ImmByte | ModRM),
	D(DstMem | SrcReg | Src2CL | ModRM),
3286
	D(ModRM), I(DstReg | SrcMem | ModRM, em_imul),
3287
	/* 0xB0 - 0xB7 */
3288
	D2bv(DstMem | SrcReg | ModRM | Lock),
3289
	I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg),
3290
	D(DstMem | SrcReg | ModRM | BitOp | Lock),
3291 3292
	I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg),
	I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg),
3293
	D(ByteOp | DstReg | SrcMem | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
3294 3295
	/* 0xB8 - 0xBF */
	N, N,
3296
	G(BitOp, group8), D(DstMem | SrcReg | ModRM | BitOp | Lock),
3297 3298
	D(DstReg | SrcMem | ModRM), D(DstReg | SrcMem | ModRM),
	D(ByteOp | DstReg | SrcMem | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
3299
	/* 0xC0 - 0xCF */
3300
	D2bv(DstMem | SrcReg | ModRM | Lock),
3301
	N, D(DstMem | SrcReg | ModRM | Mov),
3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316
	N, N, N, GD(0, &group9),
	N, N, N, N, N, N, N, N,
	/* 0xD0 - 0xDF */
	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
	/* 0xE0 - 0xEF */
	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
	/* 0xF0 - 0xFF */
	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N
};

#undef D
#undef N
#undef G
#undef GD
#undef I
3317
#undef GP
3318
#undef EXT
3319

3320
#undef D2bv
3321
#undef D2bvIP
3322
#undef I2bv
3323
#undef I6ALU
3324

3325
static unsigned imm_size(struct x86_emulate_ctxt *ctxt)
3326 3327 3328
{
	unsigned size;

3329
	size = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341
	if (size == 8)
		size = 4;
	return size;
}

static int decode_imm(struct x86_emulate_ctxt *ctxt, struct operand *op,
		      unsigned size, bool sign_extension)
{
	int rc = X86EMUL_CONTINUE;

	op->type = OP_IMM;
	op->bytes = size;
3342
	op->addr.mem.ea = ctxt->_eip;
3343 3344 3345
	/* NB. Immediates are sign-extended as necessary. */
	switch (op->bytes) {
	case 1:
3346
		op->val = insn_fetch(s8, ctxt);
3347 3348
		break;
	case 2:
3349
		op->val = insn_fetch(s16, ctxt);
3350 3351
		break;
	case 4:
3352
		op->val = insn_fetch(s32, ctxt);
3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371
		break;
	}
	if (!sign_extension) {
		switch (op->bytes) {
		case 1:
			op->val &= 0xff;
			break;
		case 2:
			op->val &= 0xffff;
			break;
		case 4:
			op->val &= 0xffffffff;
			break;
		}
	}
done:
	return rc;
}

3372 3373 3374 3375 3376 3377 3378 3379
static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
			  unsigned d)
{
	int rc = X86EMUL_CONTINUE;

	switch (d) {
	case OpReg:
		decode_register_operand(ctxt, op,
3380
			 op == &ctxt->dst &&
3381 3382 3383
			 ctxt->twobyte && (ctxt->b == 0xb6 || ctxt->b == 0xb7));
		break;
	case OpImmUByte:
3384
		rc = decode_imm(ctxt, op, 1, false);
3385 3386
		break;
	case OpMem:
3387
		ctxt->memop.bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
3388 3389 3390 3391
	mem_common:
		*op = ctxt->memop;
		ctxt->memopp = op;
		if ((ctxt->d & BitOp) && op == &ctxt->dst)
3392 3393 3394
			fetch_bit_operand(ctxt);
		op->orig_val = op->val;
		break;
3395 3396 3397
	case OpMem64:
		ctxt->memop.bytes = 8;
		goto mem_common;
3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418
	case OpAcc:
		op->type = OP_REG;
		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
		op->addr.reg = &ctxt->regs[VCPU_REGS_RAX];
		fetch_register_operand(op);
		op->orig_val = op->val;
		break;
	case OpDI:
		op->type = OP_MEM;
		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
		op->addr.mem.ea =
			register_address(ctxt, ctxt->regs[VCPU_REGS_RDI]);
		op->addr.mem.seg = VCPU_SREG_ES;
		op->val = 0;
		break;
	case OpDX:
		op->type = OP_REG;
		op->bytes = 2;
		op->addr.reg = &ctxt->regs[VCPU_REGS_RDX];
		fetch_register_operand(op);
		break;
3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432
	case OpCL:
		op->bytes = 1;
		op->val = ctxt->regs[VCPU_REGS_RCX] & 0xff;
		break;
	case OpImmByte:
		rc = decode_imm(ctxt, op, 1, true);
		break;
	case OpOne:
		op->bytes = 1;
		op->val = 1;
		break;
	case OpImm:
		rc = decode_imm(ctxt, op, imm_size(ctxt), true);
		break;
3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461
	case OpMem16:
		ctxt->memop.bytes = 2;
		goto mem_common;
	case OpMem32:
		ctxt->memop.bytes = 4;
		goto mem_common;
	case OpImmU16:
		rc = decode_imm(ctxt, op, 2, false);
		break;
	case OpImmU:
		rc = decode_imm(ctxt, op, imm_size(ctxt), false);
		break;
	case OpSI:
		op->type = OP_MEM;
		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
		op->addr.mem.ea =
			register_address(ctxt, ctxt->regs[VCPU_REGS_RSI]);
		op->addr.mem.seg = seg_override(ctxt);
		op->val = 0;
		break;
	case OpImmFAddr:
		op->type = OP_IMM;
		op->addr.mem.ea = ctxt->_eip;
		op->bytes = ctxt->op_bytes + 2;
		insn_fetch_arr(op->valptr, op->bytes, ctxt);
		break;
	case OpMemFAddr:
		ctxt->memop.bytes = ctxt->op_bytes + 2;
		goto mem_common;
3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479
	case OpES:
		op->val = VCPU_SREG_ES;
		break;
	case OpCS:
		op->val = VCPU_SREG_CS;
		break;
	case OpSS:
		op->val = VCPU_SREG_SS;
		break;
	case OpDS:
		op->val = VCPU_SREG_DS;
		break;
	case OpFS:
		op->val = VCPU_SREG_FS;
		break;
	case OpGS:
		op->val = VCPU_SREG_GS;
		break;
3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490
	case OpImplicit:
		/* Special instructions do their own operand decoding. */
	default:
		op->type = OP_NONE; /* Disable writeback. */
		break;
	}

done:
	return rc;
}

3491
int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
3492 3493 3494
{
	int rc = X86EMUL_CONTINUE;
	int mode = ctxt->mode;
3495
	int def_op_bytes, def_ad_bytes, goffset, simd_prefix;
3496
	bool op_prefix = false;
3497
	struct opcode opcode;
3498

3499 3500
	ctxt->memop.type = OP_NONE;
	ctxt->memopp = NULL;
3501 3502 3503
	ctxt->_eip = ctxt->eip;
	ctxt->fetch.start = ctxt->_eip;
	ctxt->fetch.end = ctxt->fetch.start + insn_len;
3504
	if (insn_len > 0)
3505
		memcpy(ctxt->fetch.data, insn, insn_len);
3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522

	switch (mode) {
	case X86EMUL_MODE_REAL:
	case X86EMUL_MODE_VM86:
	case X86EMUL_MODE_PROT16:
		def_op_bytes = def_ad_bytes = 2;
		break;
	case X86EMUL_MODE_PROT32:
		def_op_bytes = def_ad_bytes = 4;
		break;
#ifdef CONFIG_X86_64
	case X86EMUL_MODE_PROT64:
		def_op_bytes = 4;
		def_ad_bytes = 8;
		break;
#endif
	default:
3523
		return EMULATION_FAILED;
3524 3525
	}

3526 3527
	ctxt->op_bytes = def_op_bytes;
	ctxt->ad_bytes = def_ad_bytes;
3528 3529 3530

	/* Legacy prefixes. */
	for (;;) {
3531
		switch (ctxt->b = insn_fetch(u8, ctxt)) {
3532
		case 0x66:	/* operand-size override */
3533
			op_prefix = true;
3534
			/* switch between 2/4 bytes */
3535
			ctxt->op_bytes = def_op_bytes ^ 6;
3536 3537 3538 3539
			break;
		case 0x67:	/* address-size override */
			if (mode == X86EMUL_MODE_PROT64)
				/* switch between 4/8 bytes */
3540
				ctxt->ad_bytes = def_ad_bytes ^ 12;
3541 3542
			else
				/* switch between 2/4 bytes */
3543
				ctxt->ad_bytes = def_ad_bytes ^ 6;
3544 3545 3546 3547 3548
			break;
		case 0x26:	/* ES override */
		case 0x2e:	/* CS override */
		case 0x36:	/* SS override */
		case 0x3e:	/* DS override */
3549
			set_seg_override(ctxt, (ctxt->b >> 3) & 3);
3550 3551 3552
			break;
		case 0x64:	/* FS override */
		case 0x65:	/* GS override */
3553
			set_seg_override(ctxt, ctxt->b & 7);
3554 3555 3556 3557
			break;
		case 0x40 ... 0x4f: /* REX */
			if (mode != X86EMUL_MODE_PROT64)
				goto done_prefixes;
3558
			ctxt->rex_prefix = ctxt->b;
3559 3560
			continue;
		case 0xf0:	/* LOCK */
3561
			ctxt->lock_prefix = 1;
3562 3563 3564
			break;
		case 0xf2:	/* REPNE/REPNZ */
		case 0xf3:	/* REP/REPE/REPZ */
3565
			ctxt->rep_prefix = ctxt->b;
3566 3567 3568 3569 3570 3571 3572
			break;
		default:
			goto done_prefixes;
		}

		/* Any legacy prefix after a REX prefix nullifies its effect. */

3573
		ctxt->rex_prefix = 0;
3574 3575 3576 3577 3578
	}

done_prefixes:

	/* REX prefix. */
3579 3580
	if (ctxt->rex_prefix & 8)
		ctxt->op_bytes = 8;	/* REX.W */
3581 3582

	/* Opcode byte(s). */
3583
	opcode = opcode_table[ctxt->b];
3584
	/* Two-byte opcode? */
3585 3586
	if (ctxt->b == 0x0f) {
		ctxt->twobyte = 1;
3587
		ctxt->b = insn_fetch(u8, ctxt);
3588
		opcode = twobyte_table[ctxt->b];
3589
	}
3590
	ctxt->d = opcode.flags;
3591

3592 3593
	while (ctxt->d & GroupMask) {
		switch (ctxt->d & GroupMask) {
3594
		case Group:
3595
			ctxt->modrm = insn_fetch(u8, ctxt);
3596 3597
			--ctxt->_eip;
			goffset = (ctxt->modrm >> 3) & 7;
3598 3599 3600
			opcode = opcode.u.group[goffset];
			break;
		case GroupDual:
3601
			ctxt->modrm = insn_fetch(u8, ctxt);
3602 3603 3604
			--ctxt->_eip;
			goffset = (ctxt->modrm >> 3) & 7;
			if ((ctxt->modrm >> 6) == 3)
3605 3606 3607 3608 3609
				opcode = opcode.u.gdual->mod3[goffset];
			else
				opcode = opcode.u.gdual->mod012[goffset];
			break;
		case RMExt:
3610
			goffset = ctxt->modrm & 7;
3611
			opcode = opcode.u.group[goffset];
3612 3613
			break;
		case Prefix:
3614
			if (ctxt->rep_prefix && op_prefix)
3615
				return EMULATION_FAILED;
3616
			simd_prefix = op_prefix ? 0x66 : ctxt->rep_prefix;
3617 3618 3619 3620 3621 3622 3623 3624
			switch (simd_prefix) {
			case 0x00: opcode = opcode.u.gprefix->pfx_no; break;
			case 0x66: opcode = opcode.u.gprefix->pfx_66; break;
			case 0xf2: opcode = opcode.u.gprefix->pfx_f2; break;
			case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break;
			}
			break;
		default:
3625
			return EMULATION_FAILED;
3626
		}
3627

3628
		ctxt->d &= ~(u64)GroupMask;
3629
		ctxt->d |= opcode.flags;
3630 3631
	}

3632 3633 3634
	ctxt->execute = opcode.u.execute;
	ctxt->check_perm = opcode.check_perm;
	ctxt->intercept = opcode.intercept;
3635 3636

	/* Unrecognised? */
3637
	if (ctxt->d == 0 || (ctxt->d & Undefined))
3638
		return EMULATION_FAILED;
3639

3640
	if (!(ctxt->d & VendorSpecific) && ctxt->only_vendor_specific_insn)
3641
		return EMULATION_FAILED;
3642

3643 3644
	if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack))
		ctxt->op_bytes = 8;
3645

3646
	if (ctxt->d & Op3264) {
3647
		if (mode == X86EMUL_MODE_PROT64)
3648
			ctxt->op_bytes = 8;
3649
		else
3650
			ctxt->op_bytes = 4;
3651 3652
	}

3653 3654
	if (ctxt->d & Sse)
		ctxt->op_bytes = 16;
A
Avi Kivity 已提交
3655

3656
	/* ModRM and SIB bytes. */
3657
	if (ctxt->d & ModRM) {
3658
		rc = decode_modrm(ctxt, &ctxt->memop);
3659 3660 3661
		if (!ctxt->has_seg_override)
			set_seg_override(ctxt, ctxt->modrm_seg);
	} else if (ctxt->d & MemAbs)
3662
		rc = decode_abs(ctxt, &ctxt->memop);
3663 3664 3665
	if (rc != X86EMUL_CONTINUE)
		goto done;

3666 3667
	if (!ctxt->has_seg_override)
		set_seg_override(ctxt, VCPU_SREG_DS);
3668

3669
	ctxt->memop.addr.mem.seg = seg_override(ctxt);
3670

3671 3672
	if (ctxt->memop.type == OP_MEM && ctxt->ad_bytes != 8)
		ctxt->memop.addr.mem.ea = (u32)ctxt->memop.addr.mem.ea;
3673 3674 3675 3676 3677

	/*
	 * Decode and fetch the source operand: register, memory
	 * or immediate.
	 */
3678
	rc = decode_operand(ctxt, &ctxt->src, (ctxt->d >> SrcShift) & OpMask);
3679 3680 3681
	if (rc != X86EMUL_CONTINUE)
		goto done;

3682 3683 3684 3685
	/*
	 * Decode and fetch the second source operand: register, memory
	 * or immediate.
	 */
3686
	rc = decode_operand(ctxt, &ctxt->src2, (ctxt->d >> Src2Shift) & OpMask);
3687 3688 3689
	if (rc != X86EMUL_CONTINUE)
		goto done;

3690
	/* Decode and fetch the destination operand: register or memory. */
3691
	rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
3692 3693

done:
3694 3695
	if (ctxt->memopp && ctxt->memopp->type == OP_MEM && ctxt->rip_relative)
		ctxt->memopp->addr.mem.ea += ctxt->_eip;
3696

3697
	return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
3698 3699
}

3700 3701 3702 3703 3704 3705 3706 3707 3708
static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
{
	/* The second termination condition only applies for REPE
	 * and REPNE. Test if the repeat string operation prefix is
	 * REPE/REPZ or REPNE/REPNZ and if it's the case it tests the
	 * corresponding termination condition according to:
	 * 	- if REPE/REPZ and ZF = 0 then done
	 * 	- if REPNE/REPNZ and ZF = 1 then done
	 */
3709 3710 3711
	if (((ctxt->b == 0xa6) || (ctxt->b == 0xa7) ||
	     (ctxt->b == 0xae) || (ctxt->b == 0xaf))
	    && (((ctxt->rep_prefix == REPE_PREFIX) &&
3712
		 ((ctxt->eflags & EFLG_ZF) == 0))
3713
		|| ((ctxt->rep_prefix == REPNE_PREFIX) &&
3714 3715 3716 3717 3718 3719
		    ((ctxt->eflags & EFLG_ZF) == EFLG_ZF))))
		return true;

	return false;
}

3720
int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
3721
{
3722
	struct x86_emulate_ops *ops = ctxt->ops;
3723
	u64 msr_data;
3724
	int rc = X86EMUL_CONTINUE;
3725
	int saved_dst_type = ctxt->dst.type;
3726

3727
	ctxt->mem_read.pos = 0;
3728

3729
	if (ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) {
3730
		rc = emulate_ud(ctxt);
3731 3732 3733
		goto done;
	}

3734
	/* LOCK prefix is allowed only with some instructions */
3735
	if (ctxt->lock_prefix && (!(ctxt->d & Lock) || ctxt->dst.type != OP_MEM)) {
3736
		rc = emulate_ud(ctxt);
3737 3738 3739
		goto done;
	}

3740
	if ((ctxt->d & SrcMask) == SrcMemFAddr && ctxt->src.type != OP_MEM) {
3741
		rc = emulate_ud(ctxt);
3742 3743 3744
		goto done;
	}

3745
	if ((ctxt->d & Sse)
3746 3747
	    && ((ops->get_cr(ctxt, 0) & X86_CR0_EM)
		|| !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) {
A
Avi Kivity 已提交
3748 3749 3750 3751
		rc = emulate_ud(ctxt);
		goto done;
	}

3752
	if ((ctxt->d & Sse) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) {
A
Avi Kivity 已提交
3753 3754 3755 3756
		rc = emulate_nm(ctxt);
		goto done;
	}

3757 3758
	if (unlikely(ctxt->guest_mode) && ctxt->intercept) {
		rc = emulator_check_intercept(ctxt, ctxt->intercept,
3759
					      X86_ICPT_PRE_EXCEPT);
3760 3761 3762 3763
		if (rc != X86EMUL_CONTINUE)
			goto done;
	}

3764
	/* Privileged instruction can be executed only in CPL=0 */
3765
	if ((ctxt->d & Priv) && ops->cpl(ctxt)) {
3766
		rc = emulate_gp(ctxt, 0);
3767 3768 3769
		goto done;
	}

3770
	/* Instruction can only be executed in protected mode */
3771
	if ((ctxt->d & Prot) && !(ctxt->mode & X86EMUL_MODE_PROT)) {
3772 3773 3774 3775
		rc = emulate_ud(ctxt);
		goto done;
	}

3776
	/* Do instruction specific permission checks */
3777 3778
	if (ctxt->check_perm) {
		rc = ctxt->check_perm(ctxt);
3779 3780 3781 3782
		if (rc != X86EMUL_CONTINUE)
			goto done;
	}

3783 3784
	if (unlikely(ctxt->guest_mode) && ctxt->intercept) {
		rc = emulator_check_intercept(ctxt, ctxt->intercept,
3785
					      X86_ICPT_POST_EXCEPT);
3786 3787 3788 3789
		if (rc != X86EMUL_CONTINUE)
			goto done;
	}

3790
	if (ctxt->rep_prefix && (ctxt->d & String)) {
3791
		/* All REP prefixes have the same first termination condition */
3792 3793
		if (address_mask(ctxt, ctxt->regs[VCPU_REGS_RCX]) == 0) {
			ctxt->eip = ctxt->_eip;
3794 3795 3796 3797
			goto done;
		}
	}

3798 3799 3800
	if ((ctxt->src.type == OP_MEM) && !(ctxt->d & NoAccess)) {
		rc = segmented_read(ctxt, ctxt->src.addr.mem,
				    ctxt->src.valptr, ctxt->src.bytes);
3801
		if (rc != X86EMUL_CONTINUE)
3802
			goto done;
3803
		ctxt->src.orig_val64 = ctxt->src.val64;
3804 3805
	}

3806 3807 3808
	if (ctxt->src2.type == OP_MEM) {
		rc = segmented_read(ctxt, ctxt->src2.addr.mem,
				    &ctxt->src2.val, ctxt->src2.bytes);
3809 3810 3811 3812
		if (rc != X86EMUL_CONTINUE)
			goto done;
	}

3813
	if ((ctxt->d & DstMask) == ImplicitOps)
3814 3815 3816
		goto special_insn;


3817
	if ((ctxt->dst.type == OP_MEM) && !(ctxt->d & Mov)) {
3818
		/* optimisation - avoid slow emulated read if Mov */
3819 3820
		rc = segmented_read(ctxt, ctxt->dst.addr.mem,
				   &ctxt->dst.val, ctxt->dst.bytes);
3821 3822
		if (rc != X86EMUL_CONTINUE)
			goto done;
3823
	}
3824
	ctxt->dst.orig_val = ctxt->dst.val;
3825

3826 3827
special_insn:

3828 3829
	if (unlikely(ctxt->guest_mode) && ctxt->intercept) {
		rc = emulator_check_intercept(ctxt, ctxt->intercept,
3830
					      X86_ICPT_POST_MEMACCESS);
3831 3832 3833 3834
		if (rc != X86EMUL_CONTINUE)
			goto done;
	}

3835 3836
	if (ctxt->execute) {
		rc = ctxt->execute(ctxt);
3837 3838 3839 3840 3841
		if (rc != X86EMUL_CONTINUE)
			goto done;
		goto writeback;
	}

3842
	if (ctxt->twobyte)
A
Avi Kivity 已提交
3843 3844
		goto twobyte_insn;

3845
	switch (ctxt->b) {
3846
	case 0x40 ... 0x47: /* inc r16/r32 */
3847
		emulate_1op(ctxt, "inc");
3848 3849
		break;
	case 0x48 ... 0x4f: /* dec r16/r32 */
3850
		emulate_1op(ctxt, "dec");
3851
		break;
A
Avi Kivity 已提交
3852
	case 0x63:		/* movsxd */
3853
		if (ctxt->mode != X86EMUL_MODE_PROT64)
A
Avi Kivity 已提交
3854
			goto cannot_emulate;
3855
		ctxt->dst.val = (s32) ctxt->src.val;
A
Avi Kivity 已提交
3856
		break;
3857 3858
	case 0x6c:		/* insb */
	case 0x6d:		/* insw/insd */
3859
		ctxt->src.val = ctxt->regs[VCPU_REGS_RDX];
3860
		goto do_io_in;
3861 3862
	case 0x6e:		/* outsb */
	case 0x6f:		/* outsw/outsd */
3863
		ctxt->dst.val = ctxt->regs[VCPU_REGS_RDX];
3864
		goto do_io_out;
3865
		break;
3866
	case 0x70 ... 0x7f: /* jcc (short) */
3867 3868
		if (test_cc(ctxt->b, ctxt->eflags))
			jmp_rel(ctxt, ctxt->src.val);
3869
		break;
N
Nitin A Kamble 已提交
3870
	case 0x8d: /* lea r16/r32, m */
3871
		ctxt->dst.val = ctxt->src.addr.mem.ea;
N
Nitin A Kamble 已提交
3872
		break;
A
Avi Kivity 已提交
3873
	case 0x8f:		/* pop (sole member of Grp1a) */
3874
		rc = em_grp1a(ctxt);
A
Avi Kivity 已提交
3875
		break;
3876
	case 0x90 ... 0x97: /* nop / xchg reg, rax */
3877
		if (ctxt->dst.addr.reg == &ctxt->regs[VCPU_REGS_RAX])
3878
			break;
3879 3880
		rc = em_xchg(ctxt);
		break;
3881
	case 0x98: /* cbw/cwde/cdqe */
3882 3883 3884 3885
		switch (ctxt->op_bytes) {
		case 2: ctxt->dst.val = (s8)ctxt->dst.val; break;
		case 4: ctxt->dst.val = (s16)ctxt->dst.val; break;
		case 8: ctxt->dst.val = (s32)ctxt->dst.val; break;
3886 3887
		}
		break;
3888
	case 0xc0 ... 0xc1:
3889
		rc = em_grp2(ctxt);
3890
		break;
3891
	case 0xcc:		/* int3 */
3892 3893
		rc = emulate_int(ctxt, 3);
		break;
3894
	case 0xcd:		/* int n */
3895
		rc = emulate_int(ctxt, ctxt->src.val);
3896 3897
		break;
	case 0xce:		/* into */
3898 3899
		if (ctxt->eflags & EFLG_OF)
			rc = emulate_int(ctxt, 4);
3900
		break;
3901
	case 0xd0 ... 0xd1:	/* Grp2 */
3902
		rc = em_grp2(ctxt);
3903 3904
		break;
	case 0xd2 ... 0xd3:	/* Grp2 */
3905
		ctxt->src.val = ctxt->regs[VCPU_REGS_RCX];
3906
		rc = em_grp2(ctxt);
3907
		break;
3908 3909
	case 0xe4: 	/* inb */
	case 0xe5: 	/* in */
3910
		goto do_io_in;
3911 3912
	case 0xe6: /* outb */
	case 0xe7: /* out */
3913
		goto do_io_out;
3914
	case 0xe8: /* call (near) */ {
3915 3916 3917
		long int rel = ctxt->src.val;
		ctxt->src.val = (unsigned long) ctxt->_eip;
		jmp_rel(ctxt, rel);
3918
		rc = em_push(ctxt);
3919
		break;
3920 3921
	}
	case 0xe9: /* jmp rel */
3922
	case 0xeb: /* jmp rel short */
3923 3924
		jmp_rel(ctxt, ctxt->src.val);
		ctxt->dst.type = OP_NONE; /* Disable writeback. */
3925
		break;
3926 3927
	case 0xec: /* in al,dx */
	case 0xed: /* in (e/r)ax,dx */
3928
	do_io_in:
3929 3930
		if (!pio_in_emulated(ctxt, ctxt->dst.bytes, ctxt->src.val,
				     &ctxt->dst.val))
3931 3932
			goto done; /* IO is needed */
		break;
3933 3934
	case 0xee: /* out dx,al */
	case 0xef: /* out dx,(e/r)ax */
3935
	do_io_out:
3936 3937 3938
		ops->pio_out_emulated(ctxt, ctxt->src.bytes, ctxt->dst.val,
				      &ctxt->src.val, 1);
		ctxt->dst.type = OP_NONE;	/* Disable writeback. */
3939
		break;
3940
	case 0xf4:              /* hlt */
3941
		ctxt->ops->halt(ctxt);
3942
		break;
3943 3944 3945 3946 3947 3948 3949
	case 0xf5:	/* cmc */
		/* complement carry flag from eflags reg */
		ctxt->eflags ^= EFLG_CF;
		break;
	case 0xf8: /* clc */
		ctxt->eflags &= ~EFLG_CF;
		break;
3950 3951 3952
	case 0xf9: /* stc */
		ctxt->eflags |= EFLG_CF;
		break;
3953 3954 3955 3956 3957 3958
	case 0xfc: /* cld */
		ctxt->eflags &= ~EFLG_DF;
		break;
	case 0xfd: /* std */
		ctxt->eflags |= EFLG_DF;
		break;
3959
	case 0xfe: /* Grp4 */
3960
		rc = em_grp45(ctxt);
3961
		break;
3962
	case 0xff: /* Grp5 */
3963 3964
		rc = em_grp45(ctxt);
		break;
3965 3966
	default:
		goto cannot_emulate;
A
Avi Kivity 已提交
3967
	}
3968

3969 3970 3971
	if (rc != X86EMUL_CONTINUE)
		goto done;

3972
writeback:
3973
	rc = writeback(ctxt);
3974
	if (rc != X86EMUL_CONTINUE)
3975 3976
		goto done;

3977 3978 3979 3980
	/*
	 * restore dst type in case the decoding will be reused
	 * (happens for string instruction )
	 */
3981
	ctxt->dst.type = saved_dst_type;
3982

3983 3984 3985
	if ((ctxt->d & SrcMask) == SrcSI)
		string_addr_inc(ctxt, seg_override(ctxt),
				VCPU_REGS_RSI, &ctxt->src);
3986

3987
	if ((ctxt->d & DstMask) == DstDI)
3988
		string_addr_inc(ctxt, VCPU_SREG_ES, VCPU_REGS_RDI,
3989
				&ctxt->dst);
3990

3991 3992 3993
	if (ctxt->rep_prefix && (ctxt->d & String)) {
		struct read_cache *r = &ctxt->io_read;
		register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RCX], -1);
3994

3995 3996 3997 3998 3999
		if (!string_insn_completed(ctxt)) {
			/*
			 * Re-enter guest when pio read ahead buffer is empty
			 * or, if it is not used, after each 1024 iteration.
			 */
4000
			if ((r->end != 0 || ctxt->regs[VCPU_REGS_RCX] & 0x3ff) &&
4001 4002 4003 4004 4005 4006
			    (r->end == 0 || r->end != r->pos)) {
				/*
				 * Reset read cache. Usually happens before
				 * decode, but since instruction is restarted
				 * we have to do it here.
				 */
4007
				ctxt->mem_read.end = 0;
4008 4009 4010
				return EMULATION_RESTART;
			}
			goto done; /* skip rip writeback */
4011
		}
4012
	}
4013

4014
	ctxt->eip = ctxt->_eip;
4015 4016

done:
4017 4018
	if (rc == X86EMUL_PROPAGATE_FAULT)
		ctxt->have_exception = true;
4019 4020 4021
	if (rc == X86EMUL_INTERCEPTED)
		return EMULATION_INTERCEPTED;

4022
	return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
A
Avi Kivity 已提交
4023 4024

twobyte_insn:
4025
	switch (ctxt->b) {
4026
	case 0x09:		/* wbinvd */
4027
		(ctxt->ops->wbinvd)(ctxt);
4028 4029
		break;
	case 0x08:		/* invd */
4030 4031 4032 4033
	case 0x0d:		/* GrpP (prefetch) */
	case 0x18:		/* Grp16 (prefetch/nop) */
		break;
	case 0x20: /* mov cr, reg */
4034
		ctxt->dst.val = ops->get_cr(ctxt, ctxt->modrm_reg);
4035
		break;
A
Avi Kivity 已提交
4036
	case 0x21: /* mov from dr to reg */
4037
		ops->get_dr(ctxt, ctxt->modrm_reg, &ctxt->dst.val);
A
Avi Kivity 已提交
4038
		break;
4039
	case 0x22: /* mov reg, cr */
4040
		if (ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val)) {
4041
			emulate_gp(ctxt, 0);
4042
			rc = X86EMUL_PROPAGATE_FAULT;
4043 4044
			goto done;
		}
4045
		ctxt->dst.type = OP_NONE;
4046
		break;
A
Avi Kivity 已提交
4047
	case 0x23: /* mov from reg to dr */
4048
		if (ops->set_dr(ctxt, ctxt->modrm_reg, ctxt->src.val &
4049
				((ctxt->mode == X86EMUL_MODE_PROT64) ?
4050
				 ~0ULL : ~0U)) < 0) {
4051
			/* #UD condition is already handled by the code above */
4052
			emulate_gp(ctxt, 0);
4053
			rc = X86EMUL_PROPAGATE_FAULT;
4054 4055 4056
			goto done;
		}

4057
		ctxt->dst.type = OP_NONE;	/* no writeback */
A
Avi Kivity 已提交
4058
		break;
4059 4060
	case 0x30:
		/* wrmsr */
4061 4062 4063
		msr_data = (u32)ctxt->regs[VCPU_REGS_RAX]
			| ((u64)ctxt->regs[VCPU_REGS_RDX] << 32);
		if (ops->set_msr(ctxt, ctxt->regs[VCPU_REGS_RCX], msr_data)) {
4064
			emulate_gp(ctxt, 0);
4065
			rc = X86EMUL_PROPAGATE_FAULT;
4066
			goto done;
4067 4068 4069 4070 4071
		}
		rc = X86EMUL_CONTINUE;
		break;
	case 0x32:
		/* rdmsr */
4072
		if (ops->get_msr(ctxt, ctxt->regs[VCPU_REGS_RCX], &msr_data)) {
4073
			emulate_gp(ctxt, 0);
4074
			rc = X86EMUL_PROPAGATE_FAULT;
4075
			goto done;
4076
		} else {
4077 4078
			ctxt->regs[VCPU_REGS_RAX] = (u32)msr_data;
			ctxt->regs[VCPU_REGS_RDX] = msr_data >> 32;
4079 4080 4081
		}
		rc = X86EMUL_CONTINUE;
		break;
A
Avi Kivity 已提交
4082
	case 0x40 ... 0x4f:	/* cmov */
4083 4084 4085
		ctxt->dst.val = ctxt->dst.orig_val = ctxt->src.val;
		if (!test_cc(ctxt->b, ctxt->eflags))
			ctxt->dst.type = OP_NONE; /* no writeback */
A
Avi Kivity 已提交
4086
		break;
4087
	case 0x80 ... 0x8f: /* jnz rel, etc*/
4088 4089
		if (test_cc(ctxt->b, ctxt->eflags))
			jmp_rel(ctxt, ctxt->src.val);
4090
		break;
4091
	case 0x90 ... 0x9f:     /* setcc r/m8 */
4092
		ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);
4093
		break;
4094 4095
	case 0xa3:
	      bt:		/* bt */
4096
		ctxt->dst.type = OP_NONE;
4097
		/* only subword offset */
4098
		ctxt->src.val &= (ctxt->dst.bytes << 3) - 1;
4099
		emulate_2op_SrcV_nobyte(ctxt, "bt");
4100
		break;
4101 4102
	case 0xa4: /* shld imm8, r, r/m */
	case 0xa5: /* shld cl, r, r/m */
4103
		emulate_2op_cl(ctxt, "shld");
4104
		break;
4105 4106
	case 0xab:
	      bts:		/* bts */
4107
		emulate_2op_SrcV_nobyte(ctxt, "bts");
4108
		break;
4109 4110
	case 0xac: /* shrd imm8, r, r/m */
	case 0xad: /* shrd cl, r, r/m */
4111
		emulate_2op_cl(ctxt, "shrd");
4112
		break;
4113 4114
	case 0xae:              /* clflush */
		break;
A
Avi Kivity 已提交
4115 4116 4117 4118 4119
	case 0xb0 ... 0xb1:	/* cmpxchg */
		/*
		 * Save real source value, then compare EAX against
		 * destination.
		 */
4120 4121
		ctxt->src.orig_val = ctxt->src.val;
		ctxt->src.val = ctxt->regs[VCPU_REGS_RAX];
4122
		emulate_2op_SrcV(ctxt, "cmp");
4123
		if (ctxt->eflags & EFLG_ZF) {
A
Avi Kivity 已提交
4124
			/* Success: write back to memory. */
4125
			ctxt->dst.val = ctxt->src.orig_val;
A
Avi Kivity 已提交
4126 4127
		} else {
			/* Failure: write the value we saw to EAX. */
4128 4129
			ctxt->dst.type = OP_REG;
			ctxt->dst.addr.reg = (unsigned long *)&ctxt->regs[VCPU_REGS_RAX];
A
Avi Kivity 已提交
4130 4131 4132 4133
		}
		break;
	case 0xb3:
	      btr:		/* btr */
4134
		emulate_2op_SrcV_nobyte(ctxt, "btr");
A
Avi Kivity 已提交
4135 4136
		break;
	case 0xb6 ... 0xb7:	/* movzx */
4137 4138 4139
		ctxt->dst.bytes = ctxt->op_bytes;
		ctxt->dst.val = (ctxt->d & ByteOp) ? (u8) ctxt->src.val
						       : (u16) ctxt->src.val;
A
Avi Kivity 已提交
4140 4141
		break;
	case 0xba:		/* Grp8 */
4142
		switch (ctxt->modrm_reg & 3) {
A
Avi Kivity 已提交
4143 4144 4145 4146 4147 4148 4149 4150 4151 4152
		case 0:
			goto bt;
		case 1:
			goto bts;
		case 2:
			goto btr;
		case 3:
			goto btc;
		}
		break;
4153 4154
	case 0xbb:
	      btc:		/* btc */
4155
		emulate_2op_SrcV_nobyte(ctxt, "btc");
4156
		break;
4157 4158 4159
	case 0xbc: {		/* bsf */
		u8 zf;
		__asm__ ("bsf %2, %0; setz %1"
4160 4161
			 : "=r"(ctxt->dst.val), "=q"(zf)
			 : "r"(ctxt->src.val));
4162 4163 4164
		ctxt->eflags &= ~X86_EFLAGS_ZF;
		if (zf) {
			ctxt->eflags |= X86_EFLAGS_ZF;
4165
			ctxt->dst.type = OP_NONE;	/* Disable writeback. */
4166 4167 4168 4169 4170 4171
		}
		break;
	}
	case 0xbd: {		/* bsr */
		u8 zf;
		__asm__ ("bsr %2, %0; setz %1"
4172 4173
			 : "=r"(ctxt->dst.val), "=q"(zf)
			 : "r"(ctxt->src.val));
4174 4175 4176
		ctxt->eflags &= ~X86_EFLAGS_ZF;
		if (zf) {
			ctxt->eflags |= X86_EFLAGS_ZF;
4177
			ctxt->dst.type = OP_NONE;	/* Disable writeback. */
4178 4179 4180
		}
		break;
	}
A
Avi Kivity 已提交
4181
	case 0xbe ... 0xbf:	/* movsx */
4182 4183 4184
		ctxt->dst.bytes = ctxt->op_bytes;
		ctxt->dst.val = (ctxt->d & ByteOp) ? (s8) ctxt->src.val :
							(s16) ctxt->src.val;
A
Avi Kivity 已提交
4185
		break;
4186
	case 0xc0 ... 0xc1:	/* xadd */
4187
		emulate_2op_SrcV(ctxt, "add");
4188
		/* Write back the register source. */
4189 4190
		ctxt->src.val = ctxt->dst.orig_val;
		write_register_operand(&ctxt->src);
4191
		break;
4192
	case 0xc3:		/* movnti */
4193 4194 4195
		ctxt->dst.bytes = ctxt->op_bytes;
		ctxt->dst.val = (ctxt->op_bytes == 4) ? (u32) ctxt->src.val :
							(u64) ctxt->src.val;
4196
		break;
A
Avi Kivity 已提交
4197
	case 0xc7:		/* Grp9 (cmpxchg8b) */
4198
		rc = em_grp9(ctxt);
4199
		break;
4200 4201
	default:
		goto cannot_emulate;
A
Avi Kivity 已提交
4202
	}
4203 4204 4205 4206

	if (rc != X86EMUL_CONTINUE)
		goto done;

A
Avi Kivity 已提交
4207 4208 4209
	goto writeback;

cannot_emulate:
4210
	return EMULATION_FAILED;
A
Avi Kivity 已提交
4211
}