emulate.c 90.4 KB
Newer Older
A
Avi Kivity 已提交
1
/******************************************************************************
2
 * emulate.c
A
Avi Kivity 已提交
3 4 5 6 7 8
 *
 * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
 *
 * Copyright (c) 2005 Keir Fraser
 *
 * Linux coding style, mod r/m decoder, segment base fixes, real-mode
9
 * privileged instructions:
A
Avi Kivity 已提交
10 11
 *
 * Copyright (C) 2006 Qumranet
A
Avi Kivity 已提交
12
 * Copyright 2010 Red Hat, Inc. and/or its affilates.
A
Avi Kivity 已提交
13 14 15 16 17 18 19 20 21 22 23 24 25 26
 *
 *   Avi Kivity <avi@qumranet.com>
 *   Yaniv Kamay <yaniv@qumranet.com>
 *
 * This work is licensed under the terms of the GNU GPL, version 2.  See
 * the COPYING file in the top-level directory.
 *
 * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
 */

#ifndef __KERNEL__
#include <stdio.h>
#include <stdint.h>
#include <public/xen.h>
M
Mike Day 已提交
27
#define DPRINTF(_f, _a ...) printf(_f , ## _a)
A
Avi Kivity 已提交
28
#else
29
#include <linux/kvm_host.h>
30
#include "kvm_cache_regs.h"
A
Avi Kivity 已提交
31 32 33
#define DPRINTF(x...) do {} while (0)
#endif
#include <linux/module.h>
34
#include <asm/kvm_emulate.h>
A
Avi Kivity 已提交
35

36
#include "x86.h"
37
#include "tss.h"
38

A
Avi Kivity 已提交
39 40 41 42 43 44 45 46 47 48
/*
 * Opcode effective-address decode tables.
 * Note that we only emulate instructions that have at least one memory
 * operand (excluding implicit stack references). We assume that stack
 * references and instruction fetches will never occur in special memory
 * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
 * not be handled.
 */

/* Operand sizes: 8-bit operands or specified/overridden size. */
49
#define ByteOp      (1<<0)	/* 8-bit operands. */
A
Avi Kivity 已提交
50
/* Destination operand type. */
51 52 53 54 55 56 57
#define ImplicitOps (1<<1)	/* Implicit in opcode. No generic decode. */
#define DstReg      (2<<1)	/* Register operand. */
#define DstMem      (3<<1)	/* Memory operand. */
#define DstAcc      (4<<1)	/* Destination Accumulator */
#define DstDI       (5<<1)	/* Destination is in ES:(E)DI */
#define DstMem64    (6<<1)	/* 64bit memory operand */
#define DstMask     (7<<1)
A
Avi Kivity 已提交
58
/* Source operand type. */
59 60 61 62 63 64 65 66
#define SrcNone     (0<<4)	/* No source operand. */
#define SrcImplicit (0<<4)	/* Source operand is implicit in the opcode. */
#define SrcReg      (1<<4)	/* Register operand. */
#define SrcMem      (2<<4)	/* Memory operand. */
#define SrcMem16    (3<<4)	/* Memory operand (16-bit). */
#define SrcMem32    (4<<4)	/* Memory operand (32-bit). */
#define SrcImm      (5<<4)	/* Immediate operand. */
#define SrcImmByte  (6<<4)	/* 8-bit sign-extended immediate operand. */
67
#define SrcOne      (7<<4)	/* Implied '1' */
68
#define SrcImmUByte (8<<4)      /* 8-bit unsigned immediate operand. */
69
#define SrcImmU     (9<<4)      /* Immediate operand, unsigned */
70
#define SrcSI       (0xa<<4)	/* Source is in the DS:RSI */
71 72
#define SrcImmFAddr (0xb<<4)	/* Source is immediate far address */
#define SrcMemFAddr (0xc<<4)	/* Source is far address in memory */
73
#define SrcAcc      (0xd<<4)	/* Source Accumulator */
74
#define SrcMask     (0xf<<4)
A
Avi Kivity 已提交
75
/* Generic ModRM decode. */
76
#define ModRM       (1<<8)
A
Avi Kivity 已提交
77
/* Destination is only written; never read. */
78 79 80
#define Mov         (1<<9)
#define BitOp       (1<<10)
#define MemAbs      (1<<11)      /* Memory operand is absolute displacement */
81 82
#define String      (1<<12)     /* String instruction (rep capable) */
#define Stack       (1<<13)     /* Stack instruction (push/pop) */
83 84
#define Group       (1<<14)     /* Bits 3:5 of modrm byte extend opcode */
#define GroupDual   (1<<15)     /* Alternate decoding of mod == 3 */
85
/* Misc flags */
86
#define Op3264      (1<<24) /* Operand is 64b in long mode, 32b otherwise */
87
#define Undefined   (1<<25) /* No Such Instruction */
88
#define Lock        (1<<26) /* lock prefix is allowed for the instruction */
89
#define Priv        (1<<27) /* instruction generates #GP if current CPL != 0 */
90
#define No64	    (1<<28)
91 92 93 94 95 96
/* Source 2 operand type */
#define Src2None    (0<<29)
#define Src2CL      (1<<29)
#define Src2ImmByte (2<<29)
#define Src2One     (3<<29)
#define Src2Mask    (7<<29)
A
Avi Kivity 已提交
97

98 99 100 101 102 103 104 105
#define X2(x...) x, x
#define X3(x...) X2(x), x
#define X4(x...) X2(x), X2(x)
#define X5(x...) X4(x), x
#define X6(x...) X4(x), X2(x)
#define X7(x...) X4(x), X3(x)
#define X8(x...) X4(x), X4(x)
#define X16(x...) X8(x), X8(x)
106

107 108
struct opcode {
	u32 flags;
109
	union {
110
		int (*execute)(struct x86_emulate_ctxt *ctxt);
111 112 113 114 115 116 117 118
		struct opcode *group;
		struct group_dual *gdual;
	} u;
};

struct group_dual {
	struct opcode mod012[8];
	struct opcode mod3[8];
119 120
};

A
Avi Kivity 已提交
121
/* EFLAGS bit definitions. */
122 123 124 125
#define EFLG_ID (1<<21)
#define EFLG_VIP (1<<20)
#define EFLG_VIF (1<<19)
#define EFLG_AC (1<<18)
126 127
#define EFLG_VM (1<<17)
#define EFLG_RF (1<<16)
128 129
#define EFLG_IOPL (3<<12)
#define EFLG_NT (1<<14)
A
Avi Kivity 已提交
130 131
#define EFLG_OF (1<<11)
#define EFLG_DF (1<<10)
132
#define EFLG_IF (1<<9)
133
#define EFLG_TF (1<<8)
A
Avi Kivity 已提交
134 135 136 137 138 139
#define EFLG_SF (1<<7)
#define EFLG_ZF (1<<6)
#define EFLG_AF (1<<4)
#define EFLG_PF (1<<2)
#define EFLG_CF (1<<0)

140 141 142
#define EFLG_RESERVED_ZEROS_MASK 0xffc0802a
#define EFLG_RESERVED_ONE_MASK 2

A
Avi Kivity 已提交
143 144 145 146 147 148 149
/*
 * Instruction emulation:
 * Most instructions are emulated directly via a fragment of inline assembly
 * code. This allows us to save/restore EFLAGS and thus very easily pick up
 * any modified flags.
 */

150
#if defined(CONFIG_X86_64)
A
Avi Kivity 已提交
151 152 153 154 155 156 157 158 159 160 161 162 163 164
#define _LO32 "k"		/* force 32-bit operand */
#define _STK  "%%rsp"		/* stack pointer */
#elif defined(__i386__)
#define _LO32 ""		/* force 32-bit operand */
#define _STK  "%%esp"		/* stack pointer */
#endif

/*
 * These EFLAGS bits are restored from saved value during emulation, and
 * any changes are written back to the saved value after emulation.
 */
#define EFLAGS_MASK (EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_AF|EFLG_PF|EFLG_CF)

/* Before executing instruction: restore necessary bits in EFLAGS. */
165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
#define _PRE_EFLAGS(_sav, _msk, _tmp)					\
	/* EFLAGS = (_sav & _msk) | (EFLAGS & ~_msk); _sav &= ~_msk; */ \
	"movl %"_sav",%"_LO32 _tmp"; "                                  \
	"push %"_tmp"; "                                                \
	"push %"_tmp"; "                                                \
	"movl %"_msk",%"_LO32 _tmp"; "                                  \
	"andl %"_LO32 _tmp",("_STK"); "                                 \
	"pushf; "                                                       \
	"notl %"_LO32 _tmp"; "                                          \
	"andl %"_LO32 _tmp",("_STK"); "                                 \
	"andl %"_LO32 _tmp","__stringify(BITS_PER_LONG/4)"("_STK"); "	\
	"pop  %"_tmp"; "                                                \
	"orl  %"_LO32 _tmp",("_STK"); "                                 \
	"popf; "                                                        \
	"pop  %"_sav"; "
A
Avi Kivity 已提交
180 181 182 183 184 185 186 187 188

/* After executing instruction: write-back necessary bits in EFLAGS. */
#define _POST_EFLAGS(_sav, _msk, _tmp) \
	/* _sav |= EFLAGS & _msk; */		\
	"pushf; "				\
	"pop  %"_tmp"; "			\
	"andl %"_msk",%"_LO32 _tmp"; "		\
	"orl  %"_LO32 _tmp",%"_sav"; "

189 190 191 192 193 194
#ifdef CONFIG_X86_64
#define ON64(x) x
#else
#define ON64(x)
#endif

195 196 197 198 199 200 201 202 203
#define ____emulate_2op(_op, _src, _dst, _eflags, _x, _y, _suffix)	\
	do {								\
		__asm__ __volatile__ (					\
			_PRE_EFLAGS("0", "4", "2")			\
			_op _suffix " %"_x"3,%1; "			\
			_POST_EFLAGS("0", "4", "2")			\
			: "=m" (_eflags), "=m" ((_dst).val),		\
			  "=&r" (_tmp)					\
			: _y ((_src).val), "i" (EFLAGS_MASK));		\
204
	} while (0)
205 206


A
Avi Kivity 已提交
207 208
/* Raw emulation: instruction has two explicit operands. */
#define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy) \
209 210 211 212 213 214 215 216 217 218 219 220 221 222
	do {								\
		unsigned long _tmp;					\
									\
		switch ((_dst).bytes) {					\
		case 2:							\
			____emulate_2op(_op,_src,_dst,_eflags,_wx,_wy,"w"); \
			break;						\
		case 4:							\
			____emulate_2op(_op,_src,_dst,_eflags,_lx,_ly,"l"); \
			break;						\
		case 8:							\
			ON64(____emulate_2op(_op,_src,_dst,_eflags,_qx,_qy,"q")); \
			break;						\
		}							\
A
Avi Kivity 已提交
223 224 225 226
	} while (0)

#define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \
	do {								     \
227
		unsigned long _tmp;					     \
M
Mike Day 已提交
228
		switch ((_dst).bytes) {				             \
A
Avi Kivity 已提交
229
		case 1:							     \
230
			____emulate_2op(_op,_src,_dst,_eflags,_bx,_by,"b");  \
A
Avi Kivity 已提交
231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253
			break;						     \
		default:						     \
			__emulate_2op_nobyte(_op, _src, _dst, _eflags,	     \
					     _wx, _wy, _lx, _ly, _qx, _qy);  \
			break;						     \
		}							     \
	} while (0)

/* Source operand is byte-sized and may be restricted to just %cl. */
#define emulate_2op_SrcB(_op, _src, _dst, _eflags)                      \
	__emulate_2op(_op, _src, _dst, _eflags,				\
		      "b", "c", "b", "c", "b", "c", "b", "c")

/* Source operand is byte, word, long or quad sized. */
#define emulate_2op_SrcV(_op, _src, _dst, _eflags)                      \
	__emulate_2op(_op, _src, _dst, _eflags,				\
		      "b", "q", "w", "r", _LO32, "r", "", "r")

/* Source operand is word, long or quad sized. */
#define emulate_2op_SrcV_nobyte(_op, _src, _dst, _eflags)               \
	__emulate_2op_nobyte(_op, _src, _dst, _eflags,			\
			     "w", "r", _LO32, "r", "", "r")

254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292
/* Instruction has three operands and one operand is stored in ECX register */
#define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) 	\
	do {									\
		unsigned long _tmp;						\
		_type _clv  = (_cl).val;  					\
		_type _srcv = (_src).val;    					\
		_type _dstv = (_dst).val;					\
										\
		__asm__ __volatile__ (						\
			_PRE_EFLAGS("0", "5", "2")				\
			_op _suffix " %4,%1 \n"					\
			_POST_EFLAGS("0", "5", "2")				\
			: "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp)		\
			: "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK)		\
			); 							\
										\
		(_cl).val  = (unsigned long) _clv;				\
		(_src).val = (unsigned long) _srcv;				\
		(_dst).val = (unsigned long) _dstv;				\
	} while (0)

#define emulate_2op_cl(_op, _cl, _src, _dst, _eflags)				\
	do {									\
		switch ((_dst).bytes) {						\
		case 2:								\
			__emulate_2op_cl(_op, _cl, _src, _dst, _eflags,  	\
						"w", unsigned short);         	\
			break;							\
		case 4: 							\
			__emulate_2op_cl(_op, _cl, _src, _dst, _eflags,  	\
						"l", unsigned int);           	\
			break;							\
		case 8:								\
			ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags,	\
						"q", unsigned long));  		\
			break;							\
		}								\
	} while (0)

293
#define __emulate_1op(_op, _dst, _eflags, _suffix)			\
A
Avi Kivity 已提交
294 295 296
	do {								\
		unsigned long _tmp;					\
									\
297 298 299 300 301 302 303 304 305 306 307 308
		__asm__ __volatile__ (					\
			_PRE_EFLAGS("0", "3", "2")			\
			_op _suffix " %1; "				\
			_POST_EFLAGS("0", "3", "2")			\
			: "=m" (_eflags), "+m" ((_dst).val),		\
			  "=&r" (_tmp)					\
			: "i" (EFLAGS_MASK));				\
	} while (0)

/* Instruction has only one explicit operand (no source operand). */
#define emulate_1op(_op, _dst, _eflags)                                    \
	do {								\
M
Mike Day 已提交
309
		switch ((_dst).bytes) {				        \
310 311 312 313
		case 1:	__emulate_1op(_op, _dst, _eflags, "b"); break;	\
		case 2:	__emulate_1op(_op, _dst, _eflags, "w"); break;	\
		case 4:	__emulate_1op(_op, _dst, _eflags, "l"); break;	\
		case 8:	ON64(__emulate_1op(_op, _dst, _eflags, "q")); break; \
A
Avi Kivity 已提交
314 315 316 317 318 319
		}							\
	} while (0)

/* Fetch next part of the instruction being emulated. */
#define insn_fetch(_type, _size, _eip)                                  \
({	unsigned long _x;						\
320
	rc = do_insn_fetch(ctxt, ops, (_eip), &_x, (_size));		\
321
	if (rc != X86EMUL_CONTINUE)					\
A
Avi Kivity 已提交
322 323 324 325 326
		goto done;						\
	(_eip) += (_size);						\
	(_type)_x;							\
})

327 328 329 330 331 332 333
#define insn_fetch_arr(_arr, _size, _eip)                                \
({	rc = do_insn_fetch(ctxt, ops, (_eip), _arr, (_size));		\
	if (rc != X86EMUL_CONTINUE)					\
		goto done;						\
	(_eip) += (_size);						\
})

334 335 336 337 338
static inline unsigned long ad_mask(struct decode_cache *c)
{
	return (1UL << (c->ad_bytes << 3)) - 1;
}

A
Avi Kivity 已提交
339
/* Access/update address held in a register, based on addressing mode. */
340 341 342 343 344 345 346 347 348 349 350 351 352 353 354
static inline unsigned long
address_mask(struct decode_cache *c, unsigned long reg)
{
	if (c->ad_bytes == sizeof(unsigned long))
		return reg;
	else
		return reg & ad_mask(c);
}

static inline unsigned long
register_address(struct decode_cache *c, unsigned long base, unsigned long reg)
{
	return base + address_mask(c, reg);
}

355 356 357 358 359 360 361 362
static inline void
register_address_increment(struct decode_cache *c, unsigned long *reg, int inc)
{
	if (c->ad_bytes == sizeof(unsigned long))
		*reg += inc;
	else
		*reg = (*reg & ~ad_mask(c)) | ((*reg + inc) & ad_mask(c));
}
A
Avi Kivity 已提交
363

364 365 366 367
static inline void jmp_rel(struct decode_cache *c, int rel)
{
	register_address_increment(c, &c->eip, rel);
}
368

369 370 371 372 373 374
static void set_seg_override(struct decode_cache *c, int seg)
{
	c->has_seg_override = true;
	c->seg_override = seg;
}

375 376
static unsigned long seg_base(struct x86_emulate_ctxt *ctxt,
			      struct x86_emulate_ops *ops, int seg)
377 378 379 380
{
	if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
		return 0;

381
	return ops->get_cached_segment_base(seg, ctxt->vcpu);
382 383 384
}

static unsigned long seg_override_base(struct x86_emulate_ctxt *ctxt,
385
				       struct x86_emulate_ops *ops,
386 387 388 389 390
				       struct decode_cache *c)
{
	if (!c->has_seg_override)
		return 0;

391
	return seg_base(ctxt, ops, c->seg_override);
392 393
}

394 395
static unsigned long es_base(struct x86_emulate_ctxt *ctxt,
			     struct x86_emulate_ops *ops)
396
{
397
	return seg_base(ctxt, ops, VCPU_SREG_ES);
398 399
}

400 401
static unsigned long ss_base(struct x86_emulate_ctxt *ctxt,
			     struct x86_emulate_ops *ops)
402
{
403
	return seg_base(ctxt, ops, VCPU_SREG_SS);
404 405
}

406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436
static void emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
				      u32 error, bool valid)
{
	ctxt->exception = vec;
	ctxt->error_code = error;
	ctxt->error_code_valid = valid;
	ctxt->restart = false;
}

static void emulate_gp(struct x86_emulate_ctxt *ctxt, int err)
{
	emulate_exception(ctxt, GP_VECTOR, err, true);
}

static void emulate_pf(struct x86_emulate_ctxt *ctxt, unsigned long addr,
		       int err)
{
	ctxt->cr2 = addr;
	emulate_exception(ctxt, PF_VECTOR, err, true);
}

static void emulate_ud(struct x86_emulate_ctxt *ctxt)
{
	emulate_exception(ctxt, UD_VECTOR, 0, false);
}

static void emulate_ts(struct x86_emulate_ctxt *ctxt, int err)
{
	emulate_exception(ctxt, TS_VECTOR, err, true);
}

437 438
static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt,
			      struct x86_emulate_ops *ops,
439
			      unsigned long eip, u8 *dest)
440 441 442
{
	struct fetch_cache *fc = &ctxt->decode.fetch;
	int rc;
443
	int size, cur_size;
444

445 446 447 448 449
	if (eip == fc->end) {
		cur_size = fc->end - fc->start;
		size = min(15UL - cur_size, PAGE_SIZE - offset_in_page(eip));
		rc = ops->fetch(ctxt->cs_base + eip, fc->data + cur_size,
				size, ctxt->vcpu, NULL);
450
		if (rc != X86EMUL_CONTINUE)
451
			return rc;
452
		fc->end += size;
453
	}
454
	*dest = fc->data[eip - fc->start];
455
	return X86EMUL_CONTINUE;
456 457 458 459 460 461
}

static int do_insn_fetch(struct x86_emulate_ctxt *ctxt,
			 struct x86_emulate_ops *ops,
			 unsigned long eip, void *dest, unsigned size)
{
462
	int rc;
463

464
	/* x86 instructions are limited to 15 bytes. */
465
	if (eip + size - ctxt->eip > 15)
466
		return X86EMUL_UNHANDLEABLE;
467 468
	while (size--) {
		rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++);
469
		if (rc != X86EMUL_CONTINUE)
470 471
			return rc;
	}
472
	return X86EMUL_CONTINUE;
473 474
}

475 476 477 478 479 480 481
/*
 * Given the 'reg' portion of a ModRM byte, and a register block, return a
 * pointer into the block that addresses the relevant register.
 * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
 */
static void *decode_register(u8 modrm_reg, unsigned long *regs,
			     int highbyte_regs)
A
Avi Kivity 已提交
482 483 484 485 486 487 488 489 490 491 492
{
	void *p;

	p = &regs[modrm_reg];
	if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
		p = (unsigned char *)&regs[modrm_reg & 3] + 1;
	return p;
}

static int read_descriptor(struct x86_emulate_ctxt *ctxt,
			   struct x86_emulate_ops *ops,
493
			   ulong addr,
A
Avi Kivity 已提交
494 495 496 497 498 499 500
			   u16 *size, unsigned long *address, int op_bytes)
{
	int rc;

	if (op_bytes == 2)
		op_bytes = 3;
	*address = 0;
501
	rc = ops->read_std(addr, (unsigned long *)size, 2, ctxt->vcpu, NULL);
502
	if (rc != X86EMUL_CONTINUE)
A
Avi Kivity 已提交
503
		return rc;
504
	rc = ops->read_std(addr + 2, address, op_bytes, ctxt->vcpu, NULL);
A
Avi Kivity 已提交
505 506 507
	return rc;
}

508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542
static int test_cc(unsigned int condition, unsigned int flags)
{
	int rc = 0;

	switch ((condition & 15) >> 1) {
	case 0: /* o */
		rc |= (flags & EFLG_OF);
		break;
	case 1: /* b/c/nae */
		rc |= (flags & EFLG_CF);
		break;
	case 2: /* z/e */
		rc |= (flags & EFLG_ZF);
		break;
	case 3: /* be/na */
		rc |= (flags & (EFLG_CF|EFLG_ZF));
		break;
	case 4: /* s */
		rc |= (flags & EFLG_SF);
		break;
	case 5: /* p/pe */
		rc |= (flags & EFLG_PF);
		break;
	case 7: /* le/ng */
		rc |= (flags & EFLG_ZF);
		/* fall through */
	case 6: /* l/nge */
		rc |= (!(flags & EFLG_SF) != !(flags & EFLG_OF));
		break;
	}

	/* Odd condition identifiers (lsb == 1) have inverted sense. */
	return (!!rc ^ (condition & 1));
}

543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560
static void fetch_register_operand(struct operand *op)
{
	switch (op->bytes) {
	case 1:
		op->val = *(u8 *)op->addr.reg;
		break;
	case 2:
		op->val = *(u16 *)op->addr.reg;
		break;
	case 4:
		op->val = *(u32 *)op->addr.reg;
		break;
	case 8:
		op->val = *(u64 *)op->addr.reg;
		break;
	}
}

561 562 563 564
static void decode_register_operand(struct operand *op,
				    struct decode_cache *c,
				    int inhibit_bytereg)
{
565
	unsigned reg = c->modrm_reg;
566
	int highbyte_regs = c->rex_prefix == 0;
567 568 569

	if (!(c->d & ModRM))
		reg = (c->b & 7) | ((c->rex_prefix & 1) << 3);
570 571
	op->type = OP_REG;
	if ((c->d & ByteOp) && !inhibit_bytereg) {
572
		op->addr.reg = decode_register(reg, c->regs, highbyte_regs);
573 574
		op->bytes = 1;
	} else {
575
		op->addr.reg = decode_register(reg, c->regs, 0);
576 577
		op->bytes = c->op_bytes;
	}
578
	fetch_register_operand(op);
579 580 581
	op->orig_val = op->val;
}

582 583 584 585 586
static int decode_modrm(struct x86_emulate_ctxt *ctxt,
			struct x86_emulate_ops *ops)
{
	struct decode_cache *c = &ctxt->decode;
	u8 sib;
587
	int index_reg = 0, base_reg = 0, scale;
588
	int rc = X86EMUL_CONTINUE;
589 590 591 592 593 594 595 596 597 598 599 600

	if (c->rex_prefix) {
		c->modrm_reg = (c->rex_prefix & 4) << 1;	/* REX.R */
		index_reg = (c->rex_prefix & 2) << 2; /* REX.X */
		c->modrm_rm = base_reg = (c->rex_prefix & 1) << 3; /* REG.B */
	}

	c->modrm = insn_fetch(u8, 1, c->eip);
	c->modrm_mod |= (c->modrm & 0xc0) >> 6;
	c->modrm_reg |= (c->modrm & 0x38) >> 3;
	c->modrm_rm |= (c->modrm & 0x07);
	c->modrm_ea = 0;
601
	c->modrm_seg = VCPU_SREG_DS;
602 603

	if (c->modrm_mod == 3) {
604 605 606
		c->modrm_ptr = decode_register(c->modrm_rm,
					       c->regs, c->d & ByteOp);
		c->modrm_val = *(unsigned long *)c->modrm_ptr;
607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657
		return rc;
	}

	if (c->ad_bytes == 2) {
		unsigned bx = c->regs[VCPU_REGS_RBX];
		unsigned bp = c->regs[VCPU_REGS_RBP];
		unsigned si = c->regs[VCPU_REGS_RSI];
		unsigned di = c->regs[VCPU_REGS_RDI];

		/* 16-bit ModR/M decode. */
		switch (c->modrm_mod) {
		case 0:
			if (c->modrm_rm == 6)
				c->modrm_ea += insn_fetch(u16, 2, c->eip);
			break;
		case 1:
			c->modrm_ea += insn_fetch(s8, 1, c->eip);
			break;
		case 2:
			c->modrm_ea += insn_fetch(u16, 2, c->eip);
			break;
		}
		switch (c->modrm_rm) {
		case 0:
			c->modrm_ea += bx + si;
			break;
		case 1:
			c->modrm_ea += bx + di;
			break;
		case 2:
			c->modrm_ea += bp + si;
			break;
		case 3:
			c->modrm_ea += bp + di;
			break;
		case 4:
			c->modrm_ea += si;
			break;
		case 5:
			c->modrm_ea += di;
			break;
		case 6:
			if (c->modrm_mod != 0)
				c->modrm_ea += bp;
			break;
		case 7:
			c->modrm_ea += bx;
			break;
		}
		if (c->modrm_rm == 2 || c->modrm_rm == 3 ||
		    (c->modrm_rm == 6 && c->modrm_mod != 0))
658
			c->modrm_seg = VCPU_SREG_SS;
659 660 661
		c->modrm_ea = (u16)c->modrm_ea;
	} else {
		/* 32/64-bit ModR/M decode. */
662
		if ((c->modrm_rm & 7) == 4) {
663 664 665 666 667
			sib = insn_fetch(u8, 1, c->eip);
			index_reg |= (sib >> 3) & 7;
			base_reg |= sib & 7;
			scale = sib >> 6;

668 669 670
			if ((base_reg & 7) == 5 && c->modrm_mod == 0)
				c->modrm_ea += insn_fetch(s32, 4, c->eip);
			else
671
				c->modrm_ea += c->regs[base_reg];
672
			if (index_reg != 4)
673
				c->modrm_ea += c->regs[index_reg] << scale;
674 675
		} else if ((c->modrm_rm & 7) == 5 && c->modrm_mod == 0) {
			if (ctxt->mode == X86EMUL_MODE_PROT64)
676
				c->rip_relative = 1;
677
		} else
678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699
			c->modrm_ea += c->regs[c->modrm_rm];
		switch (c->modrm_mod) {
		case 0:
			if (c->modrm_rm == 5)
				c->modrm_ea += insn_fetch(s32, 4, c->eip);
			break;
		case 1:
			c->modrm_ea += insn_fetch(s8, 1, c->eip);
			break;
		case 2:
			c->modrm_ea += insn_fetch(s32, 4, c->eip);
			break;
		}
	}
done:
	return rc;
}

static int decode_abs(struct x86_emulate_ctxt *ctxt,
		      struct x86_emulate_ops *ops)
{
	struct decode_cache *c = &ctxt->decode;
700
	int rc = X86EMUL_CONTINUE;
701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716

	switch (c->ad_bytes) {
	case 2:
		c->modrm_ea = insn_fetch(u16, 2, c->eip);
		break;
	case 4:
		c->modrm_ea = insn_fetch(u32, 4, c->eip);
		break;
	case 8:
		c->modrm_ea = insn_fetch(u64, 8, c->eip);
		break;
	}
done:
	return rc;
}

717 718 719
static int read_emulated(struct x86_emulate_ctxt *ctxt,
			 struct x86_emulate_ops *ops,
			 unsigned long addr, void *dest, unsigned size)
A
Avi Kivity 已提交
720
{
721 722 723
	int rc;
	struct read_cache *mc = &ctxt->decode.mem_read;
	u32 err;
A
Avi Kivity 已提交
724

725 726 727 728 729
	while (size) {
		int n = min(size, 8u);
		size -= n;
		if (mc->pos < mc->end)
			goto read_cached;
730

731 732 733 734 735 736 737
		rc = ops->read_emulated(addr, mc->data + mc->end, n, &err,
					ctxt->vcpu);
		if (rc == X86EMUL_PROPAGATE_FAULT)
			emulate_pf(ctxt, addr, err);
		if (rc != X86EMUL_CONTINUE)
			return rc;
		mc->end += n;
A
Avi Kivity 已提交
738

739 740 741 742 743
	read_cached:
		memcpy(dest, mc->data + mc->pos, n);
		mc->pos += n;
		dest += n;
		addr += n;
A
Avi Kivity 已提交
744
	}
745 746
	return X86EMUL_CONTINUE;
}
A
Avi Kivity 已提交
747

748 749 750 751 752 753
static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
			   struct x86_emulate_ops *ops,
			   unsigned int size, unsigned short port,
			   void *dest)
{
	struct read_cache *rc = &ctxt->decode.io_read;
754

755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770
	if (rc->pos == rc->end) { /* refill pio read ahead */
		struct decode_cache *c = &ctxt->decode;
		unsigned int in_page, n;
		unsigned int count = c->rep_prefix ?
			address_mask(c, c->regs[VCPU_REGS_RCX]) : 1;
		in_page = (ctxt->eflags & EFLG_DF) ?
			offset_in_page(c->regs[VCPU_REGS_RDI]) :
			PAGE_SIZE - offset_in_page(c->regs[VCPU_REGS_RDI]);
		n = min(min(in_page, (unsigned int)sizeof(rc->data)) / size,
			count);
		if (n == 0)
			n = 1;
		rc->pos = rc->end = 0;
		if (!ops->pio_in_emulated(size, port, rc->data, n, ctxt->vcpu))
			return 0;
		rc->end = n * size;
A
Avi Kivity 已提交
771 772
	}

773 774 775 776
	memcpy(dest, rc->data + rc->pos, size);
	rc->pos += size;
	return 1;
}
A
Avi Kivity 已提交
777

778 779 780
static u32 desc_limit_scaled(struct desc_struct *desc)
{
	u32 limit = get_desc_limit(desc);
A
Avi Kivity 已提交
781

782 783
	return desc->g ? (limit << 12) | 0xfff : limit;
}
A
Avi Kivity 已提交
784

785 786 787 788 789 790 791 792 793
static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
				     struct x86_emulate_ops *ops,
				     u16 selector, struct desc_ptr *dt)
{
	if (selector & 1 << 2) {
		struct desc_struct desc;
		memset (dt, 0, sizeof *dt);
		if (!ops->get_cached_descriptor(&desc, VCPU_SREG_LDTR, ctxt->vcpu))
			return;
794

795 796 797 798 799
		dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */
		dt->address = get_desc_base(&desc);
	} else
		ops->get_gdt(dt, ctxt->vcpu);
}
800

801 802 803 804 805 806 807 808 809 810
/* allowed just for 8 bytes segments */
static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
				   struct x86_emulate_ops *ops,
				   u16 selector, struct desc_struct *desc)
{
	struct desc_ptr dt;
	u16 index = selector >> 3;
	int ret;
	u32 err;
	ulong addr;
811

812
	get_descriptor_table_ptr(ctxt, ops, selector, &dt);
813

814 815 816
	if (dt.size < index * 8 + 7) {
		emulate_gp(ctxt, selector & 0xfffc);
		return X86EMUL_PROPAGATE_FAULT;
817
	}
818 819 820 821
	addr = dt.address + index * 8;
	ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu,  &err);
	if (ret == X86EMUL_PROPAGATE_FAULT)
		emulate_pf(ctxt, addr, err);
822

823 824
       return ret;
}
825

826 827 828 829 830 831 832 833 834 835
/* allowed just for 8 bytes segments */
static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
				    struct x86_emulate_ops *ops,
				    u16 selector, struct desc_struct *desc)
{
	struct desc_ptr dt;
	u16 index = selector >> 3;
	u32 err;
	ulong addr;
	int ret;
A
Avi Kivity 已提交
836

837
	get_descriptor_table_ptr(ctxt, ops, selector, &dt);
838

839 840 841 842
	if (dt.size < index * 8 + 7) {
		emulate_gp(ctxt, selector & 0xfffc);
		return X86EMUL_PROPAGATE_FAULT;
	}
A
Avi Kivity 已提交
843

844 845 846 847
	addr = dt.address + index * 8;
	ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu, &err);
	if (ret == X86EMUL_PROPAGATE_FAULT)
		emulate_pf(ctxt, addr, err);
848

849 850
	return ret;
}
851

852 853 854 855 856 857 858 859 860 861
static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
				   struct x86_emulate_ops *ops,
				   u16 selector, int seg)
{
	struct desc_struct seg_desc;
	u8 dpl, rpl, cpl;
	unsigned err_vec = GP_VECTOR;
	u32 err_code = 0;
	bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
	int ret;
862

863
	memset(&seg_desc, 0, sizeof seg_desc);
864

865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915
	if ((seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86)
	    || ctxt->mode == X86EMUL_MODE_REAL) {
		/* set real mode segment descriptor */
		set_desc_base(&seg_desc, selector << 4);
		set_desc_limit(&seg_desc, 0xffff);
		seg_desc.type = 3;
		seg_desc.p = 1;
		seg_desc.s = 1;
		goto load;
	}

	/* NULL selector is not valid for TR, CS and SS */
	if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR)
	    && null_selector)
		goto exception;

	/* TR should be in GDT only */
	if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
		goto exception;

	if (null_selector) /* for NULL selector skip all following checks */
		goto load;

	ret = read_segment_descriptor(ctxt, ops, selector, &seg_desc);
	if (ret != X86EMUL_CONTINUE)
		return ret;

	err_code = selector & 0xfffc;
	err_vec = GP_VECTOR;

	/* can't load system descriptor into segment selecor */
	if (seg <= VCPU_SREG_GS && !seg_desc.s)
		goto exception;

	if (!seg_desc.p) {
		err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
		goto exception;
	}

	rpl = selector & 3;
	dpl = seg_desc.dpl;
	cpl = ops->cpl(ctxt->vcpu);

	switch (seg) {
	case VCPU_SREG_SS:
		/*
		 * segment is not a writable data segment or segment
		 * selector's RPL != CPL or segment selector's RPL != CPL
		 */
		if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl)
			goto exception;
A
Avi Kivity 已提交
916
		break;
917 918 919 920 921 922 923 924 925 926 927 928 929 930 931
	case VCPU_SREG_CS:
		if (!(seg_desc.type & 8))
			goto exception;

		if (seg_desc.type & 4) {
			/* conforming */
			if (dpl > cpl)
				goto exception;
		} else {
			/* nonconforming */
			if (rpl > cpl || dpl != cpl)
				goto exception;
		}
		/* CS(RPL) <- CPL */
		selector = (selector & 0xfffc) | cpl;
A
Avi Kivity 已提交
932
		break;
933 934 935 936 937 938 939 940 941
	case VCPU_SREG_TR:
		if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9))
			goto exception;
		break;
	case VCPU_SREG_LDTR:
		if (seg_desc.s || seg_desc.type != 2)
			goto exception;
		break;
	default: /*  DS, ES, FS, or GS */
942
		/*
943 944 945
		 * segment is not a data or readable code segment or
		 * ((segment is a data or nonconforming code segment)
		 * and (both RPL and CPL > DPL))
946
		 */
947 948 949 950
		if ((seg_desc.type & 0xa) == 0x8 ||
		    (((seg_desc.type & 0xc) != 0xc) &&
		     (rpl > dpl && cpl > dpl)))
			goto exception;
A
Avi Kivity 已提交
951
		break;
952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982
	}

	if (seg_desc.s) {
		/* mark segment as accessed */
		seg_desc.type |= 1;
		ret = write_segment_descriptor(ctxt, ops, selector, &seg_desc);
		if (ret != X86EMUL_CONTINUE)
			return ret;
	}
load:
	ops->set_segment_selector(selector, seg, ctxt->vcpu);
	ops->set_cached_descriptor(&seg_desc, seg, ctxt->vcpu);
	return X86EMUL_CONTINUE;
exception:
	emulate_exception(ctxt, err_vec, err_code, true);
	return X86EMUL_PROPAGATE_FAULT;
}

static inline int writeback(struct x86_emulate_ctxt *ctxt,
			    struct x86_emulate_ops *ops)
{
	int rc;
	struct decode_cache *c = &ctxt->decode;
	u32 err;

	switch (c->dst.type) {
	case OP_REG:
		/* The 4-byte case *is* correct:
		 * in 64-bit mode we zero-extend.
		 */
		switch (c->dst.bytes) {
A
Avi Kivity 已提交
983
		case 1:
984
			*(u8 *)c->dst.addr.reg = (u8)c->dst.val;
A
Avi Kivity 已提交
985 986
			break;
		case 2:
987
			*(u16 *)c->dst.addr.reg = (u16)c->dst.val;
A
Avi Kivity 已提交
988 989
			break;
		case 4:
990
			*c->dst.addr.reg = (u32)c->dst.val;
991 992
			break;	/* 64b: zero-ext */
		case 8:
993
			*c->dst.addr.reg = c->dst.val;
A
Avi Kivity 已提交
994 995 996
			break;
		}
		break;
997 998 999
	case OP_MEM:
		if (c->lock_prefix)
			rc = ops->cmpxchg_emulated(
1000
					c->dst.addr.mem,
1001 1002 1003 1004 1005
					&c->dst.orig_val,
					&c->dst.val,
					c->dst.bytes,
					&err,
					ctxt->vcpu);
1006
		else
1007
			rc = ops->write_emulated(
1008
					c->dst.addr.mem,
1009 1010 1011 1012 1013
					&c->dst.val,
					c->dst.bytes,
					&err,
					ctxt->vcpu);
		if (rc == X86EMUL_PROPAGATE_FAULT)
1014
			emulate_pf(ctxt, c->dst.addr.mem, err);
1015 1016
		if (rc != X86EMUL_CONTINUE)
			return rc;
1017
		break;
1018 1019
	case OP_NONE:
		/* no writeback */
1020
		break;
1021
	default:
1022
		break;
A
Avi Kivity 已提交
1023
	}
1024 1025
	return X86EMUL_CONTINUE;
}
A
Avi Kivity 已提交
1026

1027 1028 1029 1030
static inline void emulate_push(struct x86_emulate_ctxt *ctxt,
				struct x86_emulate_ops *ops)
{
	struct decode_cache *c = &ctxt->decode;
1031

1032 1033 1034 1035
	c->dst.type  = OP_MEM;
	c->dst.bytes = c->op_bytes;
	c->dst.val = c->src.val;
	register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes);
1036 1037
	c->dst.addr.mem = register_address(c, ss_base(ctxt, ops),
					   c->regs[VCPU_REGS_RSP]);
1038
}
1039

1040 1041 1042 1043 1044 1045
static int emulate_pop(struct x86_emulate_ctxt *ctxt,
		       struct x86_emulate_ops *ops,
		       void *dest, int len)
{
	struct decode_cache *c = &ctxt->decode;
	int rc;
1046

1047 1048 1049 1050 1051 1052 1053 1054
	rc = read_emulated(ctxt, ops, register_address(c, ss_base(ctxt, ops),
						       c->regs[VCPU_REGS_RSP]),
			   dest, len);
	if (rc != X86EMUL_CONTINUE)
		return rc;

	register_address_increment(c, &c->regs[VCPU_REGS_RSP], len);
	return rc;
1055 1056
}

1057 1058 1059
static int emulate_popf(struct x86_emulate_ctxt *ctxt,
		       struct x86_emulate_ops *ops,
		       void *dest, int len)
1060 1061
{
	int rc;
1062 1063 1064
	unsigned long val, change_mask;
	int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
	int cpl = ops->cpl(ctxt->vcpu);
1065

1066 1067 1068
	rc = emulate_pop(ctxt, ops, &val, len);
	if (rc != X86EMUL_CONTINUE)
		return rc;
1069

1070 1071
	change_mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_OF
		| EFLG_TF | EFLG_DF | EFLG_NT | EFLG_RF | EFLG_AC | EFLG_ID;
1072

1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091
	switch(ctxt->mode) {
	case X86EMUL_MODE_PROT64:
	case X86EMUL_MODE_PROT32:
	case X86EMUL_MODE_PROT16:
		if (cpl == 0)
			change_mask |= EFLG_IOPL;
		if (cpl <= iopl)
			change_mask |= EFLG_IF;
		break;
	case X86EMUL_MODE_VM86:
		if (iopl < 3) {
			emulate_gp(ctxt, 0);
			return X86EMUL_PROPAGATE_FAULT;
		}
		change_mask |= EFLG_IF;
		break;
	default: /* real mode */
		change_mask |= (EFLG_IOPL | EFLG_IF);
		break;
1092
	}
1093 1094 1095 1096 1097

	*(unsigned long *)dest =
		(ctxt->eflags & ~change_mask) | (val & change_mask);

	return rc;
1098 1099
}

1100 1101
static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt,
			      struct x86_emulate_ops *ops, int seg)
1102
{
1103
	struct decode_cache *c = &ctxt->decode;
1104

1105
	c->src.val = ops->get_segment_selector(seg, ctxt->vcpu);
1106

1107
	emulate_push(ctxt, ops);
1108 1109
}

1110 1111
static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt,
			     struct x86_emulate_ops *ops, int seg)
1112
{
1113 1114 1115
	struct decode_cache *c = &ctxt->decode;
	unsigned long selector;
	int rc;
1116

1117 1118 1119 1120 1121 1122
	rc = emulate_pop(ctxt, ops, &selector, c->op_bytes);
	if (rc != X86EMUL_CONTINUE)
		return rc;

	rc = load_segment_descriptor(ctxt, ops, (u16)selector, seg);
	return rc;
1123 1124
}

1125 1126
static int emulate_pusha(struct x86_emulate_ctxt *ctxt,
			  struct x86_emulate_ops *ops)
1127
{
1128 1129 1130 1131
	struct decode_cache *c = &ctxt->decode;
	unsigned long old_esp = c->regs[VCPU_REGS_RSP];
	int rc = X86EMUL_CONTINUE;
	int reg = VCPU_REGS_RAX;
1132

1133 1134 1135
	while (reg <= VCPU_REGS_RDI) {
		(reg == VCPU_REGS_RSP) ?
		(c->src.val = old_esp) : (c->src.val = c->regs[reg]);
1136

1137
		emulate_push(ctxt, ops);
1138

1139 1140 1141
		rc = writeback(ctxt, ops);
		if (rc != X86EMUL_CONTINUE)
			return rc;
1142

1143
		++reg;
1144 1145
	}

1146 1147 1148 1149
	/* Disable writeback. */
	c->dst.type = OP_NONE;

	return rc;
1150 1151
}

1152 1153
static int emulate_popa(struct x86_emulate_ctxt *ctxt,
			struct x86_emulate_ops *ops)
1154
{
1155 1156 1157
	struct decode_cache *c = &ctxt->decode;
	int rc = X86EMUL_CONTINUE;
	int reg = VCPU_REGS_RDI;
1158

1159 1160 1161 1162 1163 1164
	while (reg >= VCPU_REGS_RAX) {
		if (reg == VCPU_REGS_RSP) {
			register_address_increment(c, &c->regs[VCPU_REGS_RSP],
							c->op_bytes);
			--reg;
		}
1165

1166 1167 1168 1169
		rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes);
		if (rc != X86EMUL_CONTINUE)
			break;
		--reg;
1170
	}
1171
	return rc;
1172 1173
}

1174 1175
static int emulate_iret_real(struct x86_emulate_ctxt *ctxt,
			     struct x86_emulate_ops *ops)
1176
{
1177 1178 1179 1180 1181 1182 1183 1184 1185
	struct decode_cache *c = &ctxt->decode;
	int rc = X86EMUL_CONTINUE;
	unsigned long temp_eip = 0;
	unsigned long temp_eflags = 0;
	unsigned long cs = 0;
	unsigned long mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_TF |
			     EFLG_IF | EFLG_DF | EFLG_OF | EFLG_IOPL | EFLG_NT | EFLG_RF |
			     EFLG_AC | EFLG_ID | (1 << 1); /* Last one is the reserved bit */
	unsigned long vm86_mask = EFLG_VM | EFLG_VIF | EFLG_VIP;
1186

1187
	/* TODO: Add stack limit check */
1188

1189
	rc = emulate_pop(ctxt, ops, &temp_eip, c->op_bytes);
1190

1191 1192
	if (rc != X86EMUL_CONTINUE)
		return rc;
1193

1194 1195 1196 1197
	if (temp_eip & ~0xffff) {
		emulate_gp(ctxt, 0);
		return X86EMUL_PROPAGATE_FAULT;
	}
1198

1199
	rc = emulate_pop(ctxt, ops, &cs, c->op_bytes);
1200

1201 1202
	if (rc != X86EMUL_CONTINUE)
		return rc;
1203

1204
	rc = emulate_pop(ctxt, ops, &temp_eflags, c->op_bytes);
1205

1206 1207
	if (rc != X86EMUL_CONTINUE)
		return rc;
1208

1209
	rc = load_segment_descriptor(ctxt, ops, (u16)cs, VCPU_SREG_CS);
1210

1211 1212
	if (rc != X86EMUL_CONTINUE)
		return rc;
1213

1214
	c->eip = temp_eip;
1215 1216


1217 1218 1219 1220 1221
	if (c->op_bytes == 4)
		ctxt->eflags = ((temp_eflags & mask) | (ctxt->eflags & vm86_mask));
	else if (c->op_bytes == 2) {
		ctxt->eflags &= ~0xffff;
		ctxt->eflags |= temp_eflags;
1222
	}
1223 1224 1225 1226 1227

	ctxt->eflags &= ~EFLG_RESERVED_ZEROS_MASK; /* Clear reserved zeros */
	ctxt->eflags |= EFLG_RESERVED_ONE_MASK;

	return rc;
1228 1229
}

1230 1231
static inline int emulate_iret(struct x86_emulate_ctxt *ctxt,
				    struct x86_emulate_ops* ops)
1232
{
1233 1234 1235 1236 1237 1238 1239
	switch(ctxt->mode) {
	case X86EMUL_MODE_REAL:
		return emulate_iret_real(ctxt, ops);
	case X86EMUL_MODE_VM86:
	case X86EMUL_MODE_PROT16:
	case X86EMUL_MODE_PROT32:
	case X86EMUL_MODE_PROT64:
1240
	default:
1241 1242
		/* iret from protected mode unimplemented yet */
		return X86EMUL_UNHANDLEABLE;
1243 1244 1245
	}
}

1246
static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
1247
				struct x86_emulate_ops *ops)
1248 1249 1250
{
	struct decode_cache *c = &ctxt->decode;

1251
	return emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes);
1252 1253
}

1254
static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt)
1255
{
1256
	struct decode_cache *c = &ctxt->decode;
1257 1258
	switch (c->modrm_reg) {
	case 0:	/* rol */
1259
		emulate_2op_SrcB("rol", c->src, c->dst, ctxt->eflags);
1260 1261
		break;
	case 1:	/* ror */
1262
		emulate_2op_SrcB("ror", c->src, c->dst, ctxt->eflags);
1263 1264
		break;
	case 2:	/* rcl */
1265
		emulate_2op_SrcB("rcl", c->src, c->dst, ctxt->eflags);
1266 1267
		break;
	case 3:	/* rcr */
1268
		emulate_2op_SrcB("rcr", c->src, c->dst, ctxt->eflags);
1269 1270 1271
		break;
	case 4:	/* sal/shl */
	case 6:	/* sal/shl */
1272
		emulate_2op_SrcB("sal", c->src, c->dst, ctxt->eflags);
1273 1274
		break;
	case 5:	/* shr */
1275
		emulate_2op_SrcB("shr", c->src, c->dst, ctxt->eflags);
1276 1277
		break;
	case 7:	/* sar */
1278
		emulate_2op_SrcB("sar", c->src, c->dst, ctxt->eflags);
1279 1280 1281 1282 1283
		break;
	}
}

static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt,
1284
			       struct x86_emulate_ops *ops)
1285 1286 1287 1288 1289
{
	struct decode_cache *c = &ctxt->decode;

	switch (c->modrm_reg) {
	case 0 ... 1:	/* test */
1290
		emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags);
1291 1292 1293 1294 1295
		break;
	case 2:	/* not */
		c->dst.val = ~c->dst.val;
		break;
	case 3:	/* neg */
1296
		emulate_1op("neg", c->dst, ctxt->eflags);
1297 1298
		break;
	default:
1299
		return 0;
1300
	}
1301
	return 1;
1302 1303 1304
}

static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt,
1305
			       struct x86_emulate_ops *ops)
1306 1307 1308 1309 1310
{
	struct decode_cache *c = &ctxt->decode;

	switch (c->modrm_reg) {
	case 0:	/* inc */
1311
		emulate_1op("inc", c->dst, ctxt->eflags);
1312 1313
		break;
	case 1:	/* dec */
1314
		emulate_1op("dec", c->dst, ctxt->eflags);
1315
		break;
1316 1317 1318 1319 1320
	case 2: /* call near abs */ {
		long int old_eip;
		old_eip = c->eip;
		c->eip = c->src.val;
		c->src.val = old_eip;
1321
		emulate_push(ctxt, ops);
1322 1323
		break;
	}
1324
	case 4: /* jmp abs */
1325
		c->eip = c->src.val;
1326 1327
		break;
	case 6:	/* push */
1328
		emulate_push(ctxt, ops);
1329 1330
		break;
	}
1331
	return X86EMUL_CONTINUE;
1332 1333 1334
}

static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt,
1335
			       struct x86_emulate_ops *ops)
1336 1337
{
	struct decode_cache *c = &ctxt->decode;
1338
	u64 old = c->dst.orig_val64;
1339 1340 1341 1342 1343

	if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) ||
	    ((u32) (old >> 32) != (u32) c->regs[VCPU_REGS_RDX])) {
		c->regs[VCPU_REGS_RAX] = (u32) (old >> 0);
		c->regs[VCPU_REGS_RDX] = (u32) (old >> 32);
1344
		ctxt->eflags &= ~EFLG_ZF;
1345
	} else {
1346 1347
		c->dst.val64 = ((u64)c->regs[VCPU_REGS_RCX] << 32) |
			(u32) c->regs[VCPU_REGS_RBX];
1348

1349
		ctxt->eflags |= EFLG_ZF;
1350
	}
1351
	return X86EMUL_CONTINUE;
1352 1353
}

1354 1355 1356 1357 1358 1359 1360 1361
static int emulate_ret_far(struct x86_emulate_ctxt *ctxt,
			   struct x86_emulate_ops *ops)
{
	struct decode_cache *c = &ctxt->decode;
	int rc;
	unsigned long cs;

	rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes);
1362
	if (rc != X86EMUL_CONTINUE)
1363 1364 1365 1366
		return rc;
	if (c->op_bytes == 4)
		c->eip = (u32)c->eip;
	rc = emulate_pop(ctxt, ops, &cs, c->op_bytes);
1367
	if (rc != X86EMUL_CONTINUE)
1368
		return rc;
1369
	rc = load_segment_descriptor(ctxt, ops, (u16)cs, VCPU_SREG_CS);
1370 1371 1372
	return rc;
}

1373 1374
static inline void
setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
1375 1376
			struct x86_emulate_ops *ops, struct desc_struct *cs,
			struct desc_struct *ss)
1377
{
1378 1379 1380
	memset(cs, 0, sizeof(struct desc_struct));
	ops->get_cached_descriptor(cs, VCPU_SREG_CS, ctxt->vcpu);
	memset(ss, 0, sizeof(struct desc_struct));
1381 1382

	cs->l = 0;		/* will be adjusted later */
1383
	set_desc_base(cs, 0);	/* flat segment */
1384
	cs->g = 1;		/* 4kb granularity */
1385
	set_desc_limit(cs, 0xfffff);	/* 4GB limit */
1386 1387 1388
	cs->type = 0x0b;	/* Read, Execute, Accessed */
	cs->s = 1;
	cs->dpl = 0;		/* will be adjusted later */
1389 1390
	cs->p = 1;
	cs->d = 1;
1391

1392 1393
	set_desc_base(ss, 0);	/* flat segment */
	set_desc_limit(ss, 0xfffff);	/* 4GB limit */
1394 1395 1396
	ss->g = 1;		/* 4kb granularity */
	ss->s = 1;
	ss->type = 0x03;	/* Read/Write, Accessed */
1397
	ss->d = 1;		/* 32bit stack segment */
1398
	ss->dpl = 0;
1399
	ss->p = 1;
1400 1401 1402
}

static int
1403
emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
1404 1405
{
	struct decode_cache *c = &ctxt->decode;
1406
	struct desc_struct cs, ss;
1407
	u64 msr_data;
1408
	u16 cs_sel, ss_sel;
1409 1410

	/* syscall is not available in real mode */
1411 1412
	if (ctxt->mode == X86EMUL_MODE_REAL ||
	    ctxt->mode == X86EMUL_MODE_VM86) {
1413
		emulate_ud(ctxt);
1414 1415
		return X86EMUL_PROPAGATE_FAULT;
	}
1416

1417
	setup_syscalls_segments(ctxt, ops, &cs, &ss);
1418

1419
	ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);
1420
	msr_data >>= 32;
1421 1422
	cs_sel = (u16)(msr_data & 0xfffc);
	ss_sel = (u16)(msr_data + 8);
1423 1424

	if (is_long_mode(ctxt->vcpu)) {
1425
		cs.d = 0;
1426 1427
		cs.l = 1;
	}
1428 1429 1430 1431
	ops->set_cached_descriptor(&cs, VCPU_SREG_CS, ctxt->vcpu);
	ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu);
	ops->set_cached_descriptor(&ss, VCPU_SREG_SS, ctxt->vcpu);
	ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu);
1432 1433 1434 1435 1436 1437

	c->regs[VCPU_REGS_RCX] = c->eip;
	if (is_long_mode(ctxt->vcpu)) {
#ifdef CONFIG_X86_64
		c->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF;

1438 1439 1440
		ops->get_msr(ctxt->vcpu,
			     ctxt->mode == X86EMUL_MODE_PROT64 ?
			     MSR_LSTAR : MSR_CSTAR, &msr_data);
1441 1442
		c->eip = msr_data;

1443
		ops->get_msr(ctxt->vcpu, MSR_SYSCALL_MASK, &msr_data);
1444 1445 1446 1447
		ctxt->eflags &= ~(msr_data | EFLG_RF);
#endif
	} else {
		/* legacy mode */
1448
		ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);
1449 1450 1451 1452 1453
		c->eip = (u32)msr_data;

		ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
	}

1454
	return X86EMUL_CONTINUE;
1455 1456
}

1457
static int
1458
emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
1459 1460
{
	struct decode_cache *c = &ctxt->decode;
1461
	struct desc_struct cs, ss;
1462
	u64 msr_data;
1463
	u16 cs_sel, ss_sel;
1464

1465 1466
	/* inject #GP if in real mode */
	if (ctxt->mode == X86EMUL_MODE_REAL) {
1467
		emulate_gp(ctxt, 0);
1468
		return X86EMUL_PROPAGATE_FAULT;
1469 1470 1471 1472 1473
	}

	/* XXX sysenter/sysexit have not been tested in 64bit mode.
	* Therefore, we inject an #UD.
	*/
1474
	if (ctxt->mode == X86EMUL_MODE_PROT64) {
1475
		emulate_ud(ctxt);
1476 1477
		return X86EMUL_PROPAGATE_FAULT;
	}
1478

1479
	setup_syscalls_segments(ctxt, ops, &cs, &ss);
1480

1481
	ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data);
1482 1483 1484
	switch (ctxt->mode) {
	case X86EMUL_MODE_PROT32:
		if ((msr_data & 0xfffc) == 0x0) {
1485
			emulate_gp(ctxt, 0);
1486
			return X86EMUL_PROPAGATE_FAULT;
1487 1488 1489 1490
		}
		break;
	case X86EMUL_MODE_PROT64:
		if (msr_data == 0x0) {
1491
			emulate_gp(ctxt, 0);
1492
			return X86EMUL_PROPAGATE_FAULT;
1493 1494 1495 1496 1497
		}
		break;
	}

	ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
1498 1499 1500 1501
	cs_sel = (u16)msr_data;
	cs_sel &= ~SELECTOR_RPL_MASK;
	ss_sel = cs_sel + 8;
	ss_sel &= ~SELECTOR_RPL_MASK;
1502 1503
	if (ctxt->mode == X86EMUL_MODE_PROT64
		|| is_long_mode(ctxt->vcpu)) {
1504
		cs.d = 0;
1505 1506 1507
		cs.l = 1;
	}

1508 1509 1510 1511
	ops->set_cached_descriptor(&cs, VCPU_SREG_CS, ctxt->vcpu);
	ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu);
	ops->set_cached_descriptor(&ss, VCPU_SREG_SS, ctxt->vcpu);
	ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu);
1512

1513
	ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data);
1514 1515
	c->eip = msr_data;

1516
	ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data);
1517 1518
	c->regs[VCPU_REGS_RSP] = msr_data;

1519
	return X86EMUL_CONTINUE;
1520 1521
}

1522
static int
1523
emulate_sysexit(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
1524 1525
{
	struct decode_cache *c = &ctxt->decode;
1526
	struct desc_struct cs, ss;
1527 1528
	u64 msr_data;
	int usermode;
1529
	u16 cs_sel, ss_sel;
1530

1531 1532 1533
	/* inject #GP if in real mode or Virtual 8086 mode */
	if (ctxt->mode == X86EMUL_MODE_REAL ||
	    ctxt->mode == X86EMUL_MODE_VM86) {
1534
		emulate_gp(ctxt, 0);
1535
		return X86EMUL_PROPAGATE_FAULT;
1536 1537
	}

1538
	setup_syscalls_segments(ctxt, ops, &cs, &ss);
1539 1540 1541 1542 1543 1544 1545 1546

	if ((c->rex_prefix & 0x8) != 0x0)
		usermode = X86EMUL_MODE_PROT64;
	else
		usermode = X86EMUL_MODE_PROT32;

	cs.dpl = 3;
	ss.dpl = 3;
1547
	ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data);
1548 1549
	switch (usermode) {
	case X86EMUL_MODE_PROT32:
1550
		cs_sel = (u16)(msr_data + 16);
1551
		if ((msr_data & 0xfffc) == 0x0) {
1552
			emulate_gp(ctxt, 0);
1553
			return X86EMUL_PROPAGATE_FAULT;
1554
		}
1555
		ss_sel = (u16)(msr_data + 24);
1556 1557
		break;
	case X86EMUL_MODE_PROT64:
1558
		cs_sel = (u16)(msr_data + 32);
1559
		if (msr_data == 0x0) {
1560
			emulate_gp(ctxt, 0);
1561
			return X86EMUL_PROPAGATE_FAULT;
1562
		}
1563 1564
		ss_sel = cs_sel + 8;
		cs.d = 0;
1565 1566 1567
		cs.l = 1;
		break;
	}
1568 1569
	cs_sel |= SELECTOR_RPL_MASK;
	ss_sel |= SELECTOR_RPL_MASK;
1570

1571 1572 1573 1574
	ops->set_cached_descriptor(&cs, VCPU_SREG_CS, ctxt->vcpu);
	ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu);
	ops->set_cached_descriptor(&ss, VCPU_SREG_SS, ctxt->vcpu);
	ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu);
1575

1576 1577
	c->eip = c->regs[VCPU_REGS_RDX];
	c->regs[VCPU_REGS_RSP] = c->regs[VCPU_REGS_RCX];
1578

1579
	return X86EMUL_CONTINUE;
1580 1581
}

1582 1583
static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt,
			      struct x86_emulate_ops *ops)
1584 1585 1586 1587 1588 1589 1590
{
	int iopl;
	if (ctxt->mode == X86EMUL_MODE_REAL)
		return false;
	if (ctxt->mode == X86EMUL_MODE_VM86)
		return true;
	iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
1591
	return ops->cpl(ctxt->vcpu) > iopl;
1592 1593 1594 1595 1596 1597
}

static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
					    struct x86_emulate_ops *ops,
					    u16 port, u16 len)
{
1598
	struct desc_struct tr_seg;
1599 1600 1601 1602 1603
	int r;
	u16 io_bitmap_ptr;
	u8 perm, bit_idx = port & 0x7;
	unsigned mask = (1 << len) - 1;

1604 1605
	ops->get_cached_descriptor(&tr_seg, VCPU_SREG_TR, ctxt->vcpu);
	if (!tr_seg.p)
1606
		return false;
1607
	if (desc_limit_scaled(&tr_seg) < 103)
1608
		return false;
1609 1610
	r = ops->read_std(get_desc_base(&tr_seg) + 102, &io_bitmap_ptr, 2,
			  ctxt->vcpu, NULL);
1611 1612
	if (r != X86EMUL_CONTINUE)
		return false;
1613
	if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg))
1614
		return false;
1615 1616
	r = ops->read_std(get_desc_base(&tr_seg) + io_bitmap_ptr + port/8,
			  &perm, 1, ctxt->vcpu, NULL);
1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627
	if (r != X86EMUL_CONTINUE)
		return false;
	if ((perm >> bit_idx) & mask)
		return false;
	return true;
}

static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt,
				 struct x86_emulate_ops *ops,
				 u16 port, u16 len)
{
1628 1629 1630
	if (ctxt->perm_ok)
		return true;

1631
	if (emulator_bad_iopl(ctxt, ops))
1632 1633
		if (!emulator_io_port_access_allowed(ctxt, ops, port, len))
			return false;
1634 1635 1636

	ctxt->perm_ok = true;

1637 1638 1639
	return true;
}

1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727
static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt,
				struct x86_emulate_ops *ops,
				struct tss_segment_16 *tss)
{
	struct decode_cache *c = &ctxt->decode;

	tss->ip = c->eip;
	tss->flag = ctxt->eflags;
	tss->ax = c->regs[VCPU_REGS_RAX];
	tss->cx = c->regs[VCPU_REGS_RCX];
	tss->dx = c->regs[VCPU_REGS_RDX];
	tss->bx = c->regs[VCPU_REGS_RBX];
	tss->sp = c->regs[VCPU_REGS_RSP];
	tss->bp = c->regs[VCPU_REGS_RBP];
	tss->si = c->regs[VCPU_REGS_RSI];
	tss->di = c->regs[VCPU_REGS_RDI];

	tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu);
	tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu);
	tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu);
	tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu);
	tss->ldt = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu);
}

static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
				 struct x86_emulate_ops *ops,
				 struct tss_segment_16 *tss)
{
	struct decode_cache *c = &ctxt->decode;
	int ret;

	c->eip = tss->ip;
	ctxt->eflags = tss->flag | 2;
	c->regs[VCPU_REGS_RAX] = tss->ax;
	c->regs[VCPU_REGS_RCX] = tss->cx;
	c->regs[VCPU_REGS_RDX] = tss->dx;
	c->regs[VCPU_REGS_RBX] = tss->bx;
	c->regs[VCPU_REGS_RSP] = tss->sp;
	c->regs[VCPU_REGS_RBP] = tss->bp;
	c->regs[VCPU_REGS_RSI] = tss->si;
	c->regs[VCPU_REGS_RDI] = tss->di;

	/*
	 * SDM says that segment selectors are loaded before segment
	 * descriptors
	 */
	ops->set_segment_selector(tss->ldt, VCPU_SREG_LDTR, ctxt->vcpu);
	ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu);
	ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu);
	ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu);
	ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu);

	/*
	 * Now load segment descriptors. If fault happenes at this stage
	 * it is handled in a context of new task
	 */
	ret = load_segment_descriptor(ctxt, ops, tss->ldt, VCPU_SREG_LDTR);
	if (ret != X86EMUL_CONTINUE)
		return ret;
	ret = load_segment_descriptor(ctxt, ops, tss->es, VCPU_SREG_ES);
	if (ret != X86EMUL_CONTINUE)
		return ret;
	ret = load_segment_descriptor(ctxt, ops, tss->cs, VCPU_SREG_CS);
	if (ret != X86EMUL_CONTINUE)
		return ret;
	ret = load_segment_descriptor(ctxt, ops, tss->ss, VCPU_SREG_SS);
	if (ret != X86EMUL_CONTINUE)
		return ret;
	ret = load_segment_descriptor(ctxt, ops, tss->ds, VCPU_SREG_DS);
	if (ret != X86EMUL_CONTINUE)
		return ret;

	return X86EMUL_CONTINUE;
}

static int task_switch_16(struct x86_emulate_ctxt *ctxt,
			  struct x86_emulate_ops *ops,
			  u16 tss_selector, u16 old_tss_sel,
			  ulong old_tss_base, struct desc_struct *new_desc)
{
	struct tss_segment_16 tss_seg;
	int ret;
	u32 err, new_tss_base = get_desc_base(new_desc);

	ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
			    &err);
	if (ret == X86EMUL_PROPAGATE_FAULT) {
		/* FIXME: need to provide precise fault address */
1728
		emulate_pf(ctxt, old_tss_base, err);
1729 1730 1731 1732 1733 1734 1735 1736 1737
		return ret;
	}

	save_state_to_tss16(ctxt, ops, &tss_seg);

	ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
			     &err);
	if (ret == X86EMUL_PROPAGATE_FAULT) {
		/* FIXME: need to provide precise fault address */
1738
		emulate_pf(ctxt, old_tss_base, err);
1739 1740 1741 1742 1743 1744 1745
		return ret;
	}

	ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
			    &err);
	if (ret == X86EMUL_PROPAGATE_FAULT) {
		/* FIXME: need to provide precise fault address */
1746
		emulate_pf(ctxt, new_tss_base, err);
1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758
		return ret;
	}

	if (old_tss_sel != 0xffff) {
		tss_seg.prev_task_link = old_tss_sel;

		ret = ops->write_std(new_tss_base,
				     &tss_seg.prev_task_link,
				     sizeof tss_seg.prev_task_link,
				     ctxt->vcpu, &err);
		if (ret == X86EMUL_PROPAGATE_FAULT) {
			/* FIXME: need to provide precise fault address */
1759
			emulate_pf(ctxt, new_tss_base, err);
1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800
			return ret;
		}
	}

	return load_state_from_tss16(ctxt, ops, &tss_seg);
}

static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
				struct x86_emulate_ops *ops,
				struct tss_segment_32 *tss)
{
	struct decode_cache *c = &ctxt->decode;

	tss->cr3 = ops->get_cr(3, ctxt->vcpu);
	tss->eip = c->eip;
	tss->eflags = ctxt->eflags;
	tss->eax = c->regs[VCPU_REGS_RAX];
	tss->ecx = c->regs[VCPU_REGS_RCX];
	tss->edx = c->regs[VCPU_REGS_RDX];
	tss->ebx = c->regs[VCPU_REGS_RBX];
	tss->esp = c->regs[VCPU_REGS_RSP];
	tss->ebp = c->regs[VCPU_REGS_RBP];
	tss->esi = c->regs[VCPU_REGS_RSI];
	tss->edi = c->regs[VCPU_REGS_RDI];

	tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu);
	tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu);
	tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu);
	tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu);
	tss->fs = ops->get_segment_selector(VCPU_SREG_FS, ctxt->vcpu);
	tss->gs = ops->get_segment_selector(VCPU_SREG_GS, ctxt->vcpu);
	tss->ldt_selector = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu);
}

static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
				 struct x86_emulate_ops *ops,
				 struct tss_segment_32 *tss)
{
	struct decode_cache *c = &ctxt->decode;
	int ret;

1801
	if (ops->set_cr(3, tss->cr3, ctxt->vcpu)) {
1802
		emulate_gp(ctxt, 0);
1803 1804
		return X86EMUL_PROPAGATE_FAULT;
	}
1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869
	c->eip = tss->eip;
	ctxt->eflags = tss->eflags | 2;
	c->regs[VCPU_REGS_RAX] = tss->eax;
	c->regs[VCPU_REGS_RCX] = tss->ecx;
	c->regs[VCPU_REGS_RDX] = tss->edx;
	c->regs[VCPU_REGS_RBX] = tss->ebx;
	c->regs[VCPU_REGS_RSP] = tss->esp;
	c->regs[VCPU_REGS_RBP] = tss->ebp;
	c->regs[VCPU_REGS_RSI] = tss->esi;
	c->regs[VCPU_REGS_RDI] = tss->edi;

	/*
	 * SDM says that segment selectors are loaded before segment
	 * descriptors
	 */
	ops->set_segment_selector(tss->ldt_selector, VCPU_SREG_LDTR, ctxt->vcpu);
	ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu);
	ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu);
	ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu);
	ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu);
	ops->set_segment_selector(tss->fs, VCPU_SREG_FS, ctxt->vcpu);
	ops->set_segment_selector(tss->gs, VCPU_SREG_GS, ctxt->vcpu);

	/*
	 * Now load segment descriptors. If fault happenes at this stage
	 * it is handled in a context of new task
	 */
	ret = load_segment_descriptor(ctxt, ops, tss->ldt_selector, VCPU_SREG_LDTR);
	if (ret != X86EMUL_CONTINUE)
		return ret;
	ret = load_segment_descriptor(ctxt, ops, tss->es, VCPU_SREG_ES);
	if (ret != X86EMUL_CONTINUE)
		return ret;
	ret = load_segment_descriptor(ctxt, ops, tss->cs, VCPU_SREG_CS);
	if (ret != X86EMUL_CONTINUE)
		return ret;
	ret = load_segment_descriptor(ctxt, ops, tss->ss, VCPU_SREG_SS);
	if (ret != X86EMUL_CONTINUE)
		return ret;
	ret = load_segment_descriptor(ctxt, ops, tss->ds, VCPU_SREG_DS);
	if (ret != X86EMUL_CONTINUE)
		return ret;
	ret = load_segment_descriptor(ctxt, ops, tss->fs, VCPU_SREG_FS);
	if (ret != X86EMUL_CONTINUE)
		return ret;
	ret = load_segment_descriptor(ctxt, ops, tss->gs, VCPU_SREG_GS);
	if (ret != X86EMUL_CONTINUE)
		return ret;

	return X86EMUL_CONTINUE;
}

static int task_switch_32(struct x86_emulate_ctxt *ctxt,
			  struct x86_emulate_ops *ops,
			  u16 tss_selector, u16 old_tss_sel,
			  ulong old_tss_base, struct desc_struct *new_desc)
{
	struct tss_segment_32 tss_seg;
	int ret;
	u32 err, new_tss_base = get_desc_base(new_desc);

	ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
			    &err);
	if (ret == X86EMUL_PROPAGATE_FAULT) {
		/* FIXME: need to provide precise fault address */
1870
		emulate_pf(ctxt, old_tss_base, err);
1871 1872 1873 1874 1875 1876 1877 1878 1879
		return ret;
	}

	save_state_to_tss32(ctxt, ops, &tss_seg);

	ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
			     &err);
	if (ret == X86EMUL_PROPAGATE_FAULT) {
		/* FIXME: need to provide precise fault address */
1880
		emulate_pf(ctxt, old_tss_base, err);
1881 1882 1883 1884 1885 1886 1887
		return ret;
	}

	ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
			    &err);
	if (ret == X86EMUL_PROPAGATE_FAULT) {
		/* FIXME: need to provide precise fault address */
1888
		emulate_pf(ctxt, new_tss_base, err);
1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900
		return ret;
	}

	if (old_tss_sel != 0xffff) {
		tss_seg.prev_task_link = old_tss_sel;

		ret = ops->write_std(new_tss_base,
				     &tss_seg.prev_task_link,
				     sizeof tss_seg.prev_task_link,
				     ctxt->vcpu, &err);
		if (ret == X86EMUL_PROPAGATE_FAULT) {
			/* FIXME: need to provide precise fault address */
1901
			emulate_pf(ctxt, new_tss_base, err);
1902 1903 1904 1905 1906 1907 1908 1909
			return ret;
		}
	}

	return load_state_from_tss32(ctxt, ops, &tss_seg);
}

static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
1910 1911 1912
				   struct x86_emulate_ops *ops,
				   u16 tss_selector, int reason,
				   bool has_error_code, u32 error_code)
1913 1914 1915 1916 1917
{
	struct desc_struct curr_tss_desc, next_tss_desc;
	int ret;
	u16 old_tss_sel = ops->get_segment_selector(VCPU_SREG_TR, ctxt->vcpu);
	ulong old_tss_base =
1918
		ops->get_cached_segment_base(VCPU_SREG_TR, ctxt->vcpu);
1919
	u32 desc_limit;
1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934

	/* FIXME: old_tss_base == ~0 ? */

	ret = read_segment_descriptor(ctxt, ops, tss_selector, &next_tss_desc);
	if (ret != X86EMUL_CONTINUE)
		return ret;
	ret = read_segment_descriptor(ctxt, ops, old_tss_sel, &curr_tss_desc);
	if (ret != X86EMUL_CONTINUE)
		return ret;

	/* FIXME: check that next_tss_desc is tss */

	if (reason != TASK_SWITCH_IRET) {
		if ((tss_selector & 3) > next_tss_desc.dpl ||
		    ops->cpl(ctxt->vcpu) > next_tss_desc.dpl) {
1935
			emulate_gp(ctxt, 0);
1936 1937 1938 1939
			return X86EMUL_PROPAGATE_FAULT;
		}
	}

1940 1941 1942 1943
	desc_limit = desc_limit_scaled(&next_tss_desc);
	if (!next_tss_desc.p ||
	    ((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
	     desc_limit < 0x2b)) {
1944
		emulate_ts(ctxt, tss_selector & 0xfffc);
1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967
		return X86EMUL_PROPAGATE_FAULT;
	}

	if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
		curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */
		write_segment_descriptor(ctxt, ops, old_tss_sel,
					 &curr_tss_desc);
	}

	if (reason == TASK_SWITCH_IRET)
		ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT;

	/* set back link to prev task only if NT bit is set in eflags
	   note that old_tss_sel is not used afetr this point */
	if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
		old_tss_sel = 0xffff;

	if (next_tss_desc.type & 8)
		ret = task_switch_32(ctxt, ops, tss_selector, old_tss_sel,
				     old_tss_base, &next_tss_desc);
	else
		ret = task_switch_16(ctxt, ops, tss_selector, old_tss_sel,
				     old_tss_base, &next_tss_desc);
1968 1969
	if (ret != X86EMUL_CONTINUE)
		return ret;
1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983

	if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE)
		ctxt->eflags = ctxt->eflags | X86_EFLAGS_NT;

	if (reason != TASK_SWITCH_IRET) {
		next_tss_desc.type |= (1 << 1); /* set busy flag */
		write_segment_descriptor(ctxt, ops, tss_selector,
					 &next_tss_desc);
	}

	ops->set_cr(0,  ops->get_cr(0, ctxt->vcpu) | X86_CR0_TS, ctxt->vcpu);
	ops->set_cached_descriptor(&next_tss_desc, VCPU_SREG_TR, ctxt->vcpu);
	ops->set_segment_selector(tss_selector, VCPU_SREG_TR, ctxt->vcpu);

1984 1985 1986 1987 1988 1989
	if (has_error_code) {
		struct decode_cache *c = &ctxt->decode;

		c->op_bytes = c->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2;
		c->lock_prefix = 0;
		c->src.val = (unsigned long) error_code;
1990
		emulate_push(ctxt, ops);
1991 1992
	}

1993 1994 1995 1996
	return ret;
}

int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
1997 1998
			 u16 tss_selector, int reason,
			 bool has_error_code, u32 error_code)
1999
{
2000
	struct x86_emulate_ops *ops = ctxt->ops;
2001 2002 2003 2004
	struct decode_cache *c = &ctxt->decode;
	int rc;

	c->eip = ctxt->eip;
2005
	c->dst.type = OP_NONE;
2006

2007 2008
	rc = emulator_do_task_switch(ctxt, ops, tss_selector, reason,
				     has_error_code, error_code);
2009 2010

	if (rc == X86EMUL_CONTINUE) {
2011
		rc = writeback(ctxt, ops);
2012 2013
		if (rc == X86EMUL_CONTINUE)
			ctxt->eip = c->eip;
2014 2015
	}

2016
	return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
2017 2018
}

2019
static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned long base,
2020
			    int reg, struct operand *op)
2021 2022 2023 2024
{
	struct decode_cache *c = &ctxt->decode;
	int df = (ctxt->eflags & EFLG_DF) ? -1 : 1;

2025
	register_address_increment(c, &c->regs[reg], df * op->bytes);
2026
	op->addr.mem = register_address(c,  base, c->regs[reg]);
2027 2028
}

2029 2030 2031 2032 2033 2034
static int em_push(struct x86_emulate_ctxt *ctxt)
{
	emulate_push(ctxt, ctxt->ops);
	return X86EMUL_CONTINUE;
}

2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129
#define D(_y) { .flags = (_y) }
#define N    D(0)
#define G(_f, _g) { .flags = ((_f) | Group), .u.group = (_g) }
#define GD(_f, _g) { .flags = ((_f) | Group | GroupDual), .u.gdual = (_g) }
#define I(_f, _e) { .flags = (_f), .u.execute = (_e) }

static struct opcode group1[] = {
	X7(D(Lock)), N
};

static struct opcode group1A[] = {
	D(DstMem | SrcNone | ModRM | Mov | Stack), N, N, N, N, N, N, N,
};

static struct opcode group3[] = {
	D(DstMem | SrcImm | ModRM), D(DstMem | SrcImm | ModRM),
	D(DstMem | SrcNone | ModRM | Lock), D(DstMem | SrcNone | ModRM | Lock),
	X4(D(Undefined)),
};

static struct opcode group4[] = {
	D(ByteOp | DstMem | SrcNone | ModRM | Lock), D(ByteOp | DstMem | SrcNone | ModRM | Lock),
	N, N, N, N, N, N,
};

static struct opcode group5[] = {
	D(DstMem | SrcNone | ModRM | Lock), D(DstMem | SrcNone | ModRM | Lock),
	D(SrcMem | ModRM | Stack), N,
	D(SrcMem | ModRM | Stack), D(SrcMemFAddr | ModRM | ImplicitOps),
	D(SrcMem | ModRM | Stack), N,
};

static struct group_dual group7 = { {
	N, N, D(ModRM | SrcMem | Priv), D(ModRM | SrcMem | Priv),
	D(SrcNone | ModRM | DstMem | Mov), N,
	D(SrcMem16 | ModRM | Mov | Priv), D(SrcMem | ModRM | ByteOp | Priv),
}, {
	D(SrcNone | ModRM | Priv), N, N, D(SrcNone | ModRM | Priv),
	D(SrcNone | ModRM | DstMem | Mov), N,
	D(SrcMem16 | ModRM | Mov | Priv), N,
} };

static struct opcode group8[] = {
	N, N, N, N,
	D(DstMem | SrcImmByte | ModRM), D(DstMem | SrcImmByte | ModRM | Lock),
	D(DstMem | SrcImmByte | ModRM | Lock), D(DstMem | SrcImmByte | ModRM | Lock),
};

static struct group_dual group9 = { {
	N, D(DstMem64 | ModRM | Lock), N, N, N, N, N, N,
}, {
	N, N, N, N, N, N, N, N,
} };

static struct opcode opcode_table[256] = {
	/* 0x00 - 0x07 */
	D(ByteOp | DstMem | SrcReg | ModRM | Lock), D(DstMem | SrcReg | ModRM | Lock),
	D(ByteOp | DstReg | SrcMem | ModRM), D(DstReg | SrcMem | ModRM),
	D(ByteOp | DstAcc | SrcImm), D(DstAcc | SrcImm),
	D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64),
	/* 0x08 - 0x0F */
	D(ByteOp | DstMem | SrcReg | ModRM | Lock), D(DstMem | SrcReg | ModRM | Lock),
	D(ByteOp | DstReg | SrcMem | ModRM), D(DstReg | SrcMem | ModRM),
	D(ByteOp | DstAcc | SrcImm), D(DstAcc | SrcImm),
	D(ImplicitOps | Stack | No64), N,
	/* 0x10 - 0x17 */
	D(ByteOp | DstMem | SrcReg | ModRM | Lock), D(DstMem | SrcReg | ModRM | Lock),
	D(ByteOp | DstReg | SrcMem | ModRM), D(DstReg | SrcMem | ModRM),
	D(ByteOp | DstAcc | SrcImm), D(DstAcc | SrcImm),
	D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64),
	/* 0x18 - 0x1F */
	D(ByteOp | DstMem | SrcReg | ModRM | Lock), D(DstMem | SrcReg | ModRM | Lock),
	D(ByteOp | DstReg | SrcMem | ModRM), D(DstReg | SrcMem | ModRM),
	D(ByteOp | DstAcc | SrcImm), D(DstAcc | SrcImm),
	D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64),
	/* 0x20 - 0x27 */
	D(ByteOp | DstMem | SrcReg | ModRM | Lock), D(DstMem | SrcReg | ModRM | Lock),
	D(ByteOp | DstReg | SrcMem | ModRM), D(DstReg | SrcMem | ModRM),
	D(ByteOp | DstAcc | SrcImmByte), D(DstAcc | SrcImm), N, N,
	/* 0x28 - 0x2F */
	D(ByteOp | DstMem | SrcReg | ModRM | Lock), D(DstMem | SrcReg | ModRM | Lock),
	D(ByteOp | DstReg | SrcMem | ModRM), D(DstReg | SrcMem | ModRM),
	D(ByteOp | DstAcc | SrcImmByte), D(DstAcc | SrcImm), N, N,
	/* 0x30 - 0x37 */
	D(ByteOp | DstMem | SrcReg | ModRM | Lock), D(DstMem | SrcReg | ModRM | Lock),
	D(ByteOp | DstReg | SrcMem | ModRM), D(DstReg | SrcMem | ModRM),
	D(ByteOp | DstAcc | SrcImmByte), D(DstAcc | SrcImm), N, N,
	/* 0x38 - 0x3F */
	D(ByteOp | DstMem | SrcReg | ModRM), D(DstMem | SrcReg | ModRM),
	D(ByteOp | DstReg | SrcMem | ModRM), D(DstReg | SrcMem | ModRM),
	D(ByteOp | DstAcc | SrcImm), D(DstAcc | SrcImm),
	N, N,
	/* 0x40 - 0x4F */
	X16(D(DstReg)),
	/* 0x50 - 0x57 */
2130
	X8(I(SrcReg | Stack, em_push)),
2131 2132 2133 2134 2135 2136 2137
	/* 0x58 - 0x5F */
	X8(D(DstReg | Stack)),
	/* 0x60 - 0x67 */
	D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64),
	N, D(DstReg | SrcMem32 | ModRM | Mov) /* movsxd (x86/64) */ ,
	N, N, N, N,
	/* 0x68 - 0x6F */
2138 2139
	I(SrcImm | Mov | Stack, em_push), N,
	I(SrcImmByte | Mov | Stack, em_push), N,
2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156
	D(DstDI | ByteOp | Mov | String), D(DstDI | Mov | String), /* insb, insw/insd */
	D(SrcSI | ByteOp | ImplicitOps | String), D(SrcSI | ImplicitOps | String), /* outsb, outsw/outsd */
	/* 0x70 - 0x7F */
	X16(D(SrcImmByte)),
	/* 0x80 - 0x87 */
	G(ByteOp | DstMem | SrcImm | ModRM | Group, group1),
	G(DstMem | SrcImm | ModRM | Group, group1),
	G(ByteOp | DstMem | SrcImm | ModRM | No64 | Group, group1),
	G(DstMem | SrcImmByte | ModRM | Group, group1),
	D(ByteOp | DstMem | SrcReg | ModRM), D(DstMem | SrcReg | ModRM),
	D(ByteOp | DstMem | SrcReg | ModRM | Lock), D(DstMem | SrcReg | ModRM | Lock),
	/* 0x88 - 0x8F */
	D(ByteOp | DstMem | SrcReg | ModRM | Mov), D(DstMem | SrcReg | ModRM | Mov),
	D(ByteOp | DstReg | SrcMem | ModRM | Mov), D(DstReg | SrcMem | ModRM | Mov),
	D(DstMem | SrcNone | ModRM | Mov), D(ModRM | DstReg),
	D(ImplicitOps | SrcMem16 | ModRM), G(0, group1A),
	/* 0x90 - 0x97 */
2157
	X8(D(SrcAcc | DstReg)),
2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212
	/* 0x98 - 0x9F */
	N, N, D(SrcImmFAddr | No64), N,
	D(ImplicitOps | Stack), D(ImplicitOps | Stack), N, N,
	/* 0xA0 - 0xA7 */
	D(ByteOp | DstAcc | SrcMem | Mov | MemAbs), D(DstAcc | SrcMem | Mov | MemAbs),
	D(ByteOp | DstMem | SrcAcc | Mov | MemAbs), D(DstMem | SrcAcc | Mov | MemAbs),
	D(ByteOp | SrcSI | DstDI | Mov | String), D(SrcSI | DstDI | Mov | String),
	D(ByteOp | SrcSI | DstDI | String), D(SrcSI | DstDI | String),
	/* 0xA8 - 0xAF */
	D(DstAcc | SrcImmByte | ByteOp), D(DstAcc | SrcImm), D(ByteOp | DstDI | Mov | String), D(DstDI | Mov | String),
	D(ByteOp | SrcSI | DstAcc | Mov | String), D(SrcSI | DstAcc | Mov | String),
	D(ByteOp | DstDI | String), D(DstDI | String),
	/* 0xB0 - 0xB7 */
	X8(D(ByteOp | DstReg | SrcImm | Mov)),
	/* 0xB8 - 0xBF */
	X8(D(DstReg | SrcImm | Mov)),
	/* 0xC0 - 0xC7 */
	D(ByteOp | DstMem | SrcImm | ModRM), D(DstMem | SrcImmByte | ModRM),
	N, D(ImplicitOps | Stack), N, N,
	D(ByteOp | DstMem | SrcImm | ModRM | Mov), D(DstMem | SrcImm | ModRM | Mov),
	/* 0xC8 - 0xCF */
	N, N, N, D(ImplicitOps | Stack),
	D(ImplicitOps), D(SrcImmByte), D(ImplicitOps | No64), D(ImplicitOps),
	/* 0xD0 - 0xD7 */
	D(ByteOp | DstMem | SrcImplicit | ModRM), D(DstMem | SrcImplicit | ModRM),
	D(ByteOp | DstMem | SrcImplicit | ModRM), D(DstMem | SrcImplicit | ModRM),
	N, N, N, N,
	/* 0xD8 - 0xDF */
	N, N, N, N, N, N, N, N,
	/* 0xE0 - 0xE7 */
	N, N, N, N,
	D(ByteOp | SrcImmUByte | DstAcc), D(SrcImmUByte | DstAcc),
	D(ByteOp | SrcImmUByte | DstAcc), D(SrcImmUByte | DstAcc),
	/* 0xE8 - 0xEF */
	D(SrcImm | Stack), D(SrcImm | ImplicitOps),
	D(SrcImmFAddr | No64), D(SrcImmByte | ImplicitOps),
	D(SrcNone | ByteOp | DstAcc), D(SrcNone | DstAcc),
	D(SrcNone | ByteOp | DstAcc), D(SrcNone | DstAcc),
	/* 0xF0 - 0xF7 */
	N, N, N, N,
	D(ImplicitOps | Priv), D(ImplicitOps), G(ByteOp, group3), G(0, group3),
	/* 0xF8 - 0xFF */
	D(ImplicitOps), N, D(ImplicitOps), D(ImplicitOps),
	D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5),
};

static struct opcode twobyte_table[256] = {
	/* 0x00 - 0x0F */
	N, GD(0, &group7), N, N,
	N, D(ImplicitOps), D(ImplicitOps | Priv), N,
	D(ImplicitOps | Priv), D(ImplicitOps | Priv), N, N,
	N, D(ImplicitOps | ModRM), N, N,
	/* 0x10 - 0x1F */
	N, N, N, N, N, N, N, N, D(ImplicitOps | ModRM), N, N, N, N, N, N, N,
	/* 0x20 - 0x2F */
2213 2214
	D(ModRM | DstMem | Priv | Op3264), D(ModRM | Priv | Op3264),
	D(ModRM | SrcMem | Priv | Op3264), D(ModRM | Priv | Op3264),
2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271
	N, N, N, N,
	N, N, N, N, N, N, N, N,
	/* 0x30 - 0x3F */
	D(ImplicitOps | Priv), N, D(ImplicitOps | Priv), N,
	D(ImplicitOps), D(ImplicitOps | Priv), N, N,
	N, N, N, N, N, N, N, N,
	/* 0x40 - 0x4F */
	X16(D(DstReg | SrcMem | ModRM | Mov)),
	/* 0x50 - 0x5F */
	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
	/* 0x60 - 0x6F */
	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
	/* 0x70 - 0x7F */
	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
	/* 0x80 - 0x8F */
	X16(D(SrcImm)),
	/* 0x90 - 0x9F */
	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
	/* 0xA0 - 0xA7 */
	D(ImplicitOps | Stack), D(ImplicitOps | Stack),
	N, D(DstMem | SrcReg | ModRM | BitOp),
	D(DstMem | SrcReg | Src2ImmByte | ModRM),
	D(DstMem | SrcReg | Src2CL | ModRM), N, N,
	/* 0xA8 - 0xAF */
	D(ImplicitOps | Stack), D(ImplicitOps | Stack),
	N, D(DstMem | SrcReg | ModRM | BitOp | Lock),
	D(DstMem | SrcReg | Src2ImmByte | ModRM),
	D(DstMem | SrcReg | Src2CL | ModRM),
	D(ModRM), N,
	/* 0xB0 - 0xB7 */
	D(ByteOp | DstMem | SrcReg | ModRM | Lock), D(DstMem | SrcReg | ModRM | Lock),
	N, D(DstMem | SrcReg | ModRM | BitOp | Lock),
	N, N, D(ByteOp | DstReg | SrcMem | ModRM | Mov),
	    D(DstReg | SrcMem16 | ModRM | Mov),
	/* 0xB8 - 0xBF */
	N, N,
	G(0, group8), D(DstMem | SrcReg | ModRM | BitOp | Lock),
	N, N, D(ByteOp | DstReg | SrcMem | ModRM | Mov),
	    D(DstReg | SrcMem16 | ModRM | Mov),
	/* 0xC0 - 0xCF */
	N, N, N, D(DstMem | SrcReg | ModRM | Mov),
	N, N, N, GD(0, &group9),
	N, N, N, N, N, N, N, N,
	/* 0xD0 - 0xDF */
	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
	/* 0xE0 - 0xEF */
	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
	/* 0xF0 - 0xFF */
	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N
};

#undef D
#undef N
#undef G
#undef GD
#undef I

2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361
int
x86_decode_insn(struct x86_emulate_ctxt *ctxt)
{
	struct x86_emulate_ops *ops = ctxt->ops;
	struct decode_cache *c = &ctxt->decode;
	int rc = X86EMUL_CONTINUE;
	int mode = ctxt->mode;
	int def_op_bytes, def_ad_bytes, dual, goffset;
	struct opcode opcode, *g_mod012, *g_mod3;

	/* we cannot decode insn before we complete previous rep insn */
	WARN_ON(ctxt->restart);

	c->eip = ctxt->eip;
	c->fetch.start = c->fetch.end = c->eip;
	ctxt->cs_base = seg_base(ctxt, ops, VCPU_SREG_CS);

	switch (mode) {
	case X86EMUL_MODE_REAL:
	case X86EMUL_MODE_VM86:
	case X86EMUL_MODE_PROT16:
		def_op_bytes = def_ad_bytes = 2;
		break;
	case X86EMUL_MODE_PROT32:
		def_op_bytes = def_ad_bytes = 4;
		break;
#ifdef CONFIG_X86_64
	case X86EMUL_MODE_PROT64:
		def_op_bytes = 4;
		def_ad_bytes = 8;
		break;
#endif
	default:
		return -1;
	}

	c->op_bytes = def_op_bytes;
	c->ad_bytes = def_ad_bytes;

	/* Legacy prefixes. */
	for (;;) {
		switch (c->b = insn_fetch(u8, 1, c->eip)) {
		case 0x66:	/* operand-size override */
			/* switch between 2/4 bytes */
			c->op_bytes = def_op_bytes ^ 6;
			break;
		case 0x67:	/* address-size override */
			if (mode == X86EMUL_MODE_PROT64)
				/* switch between 4/8 bytes */
				c->ad_bytes = def_ad_bytes ^ 12;
			else
				/* switch between 2/4 bytes */
				c->ad_bytes = def_ad_bytes ^ 6;
			break;
		case 0x26:	/* ES override */
		case 0x2e:	/* CS override */
		case 0x36:	/* SS override */
		case 0x3e:	/* DS override */
			set_seg_override(c, (c->b >> 3) & 3);
			break;
		case 0x64:	/* FS override */
		case 0x65:	/* GS override */
			set_seg_override(c, c->b & 7);
			break;
		case 0x40 ... 0x4f: /* REX */
			if (mode != X86EMUL_MODE_PROT64)
				goto done_prefixes;
			c->rex_prefix = c->b;
			continue;
		case 0xf0:	/* LOCK */
			c->lock_prefix = 1;
			break;
		case 0xf2:	/* REPNE/REPNZ */
			c->rep_prefix = REPNE_PREFIX;
			break;
		case 0xf3:	/* REP/REPE/REPZ */
			c->rep_prefix = REPE_PREFIX;
			break;
		default:
			goto done_prefixes;
		}

		/* Any legacy prefix after a REX prefix nullifies its effect. */

		c->rex_prefix = 0;
	}

done_prefixes:

	/* REX prefix. */
2362 2363
	if (c->rex_prefix & 8)
		c->op_bytes = 8;	/* REX.W */
2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409

	/* Opcode byte(s). */
	opcode = opcode_table[c->b];
	if (opcode.flags == 0) {
		/* Two-byte opcode? */
		if (c->b == 0x0f) {
			c->twobyte = 1;
			c->b = insn_fetch(u8, 1, c->eip);
			opcode = twobyte_table[c->b];
		}
	}
	c->d = opcode.flags;

	if (c->d & Group) {
		dual = c->d & GroupDual;
		c->modrm = insn_fetch(u8, 1, c->eip);
		--c->eip;

		if (c->d & GroupDual) {
			g_mod012 = opcode.u.gdual->mod012;
			g_mod3 = opcode.u.gdual->mod3;
		} else
			g_mod012 = g_mod3 = opcode.u.group;

		c->d &= ~(Group | GroupDual);

		goffset = (c->modrm >> 3) & 7;

		if ((c->modrm >> 6) == 3)
			opcode = g_mod3[goffset];
		else
			opcode = g_mod012[goffset];
		c->d |= opcode.flags;
	}

	c->execute = opcode.u.execute;

	/* Unrecognised? */
	if (c->d == 0 || (c->d & Undefined)) {
		DPRINTF("Cannot emulate %02x\n", c->b);
		return -1;
	}

	if (mode == X86EMUL_MODE_PROT64 && (c->d & Stack))
		c->op_bytes = 8;

2410 2411 2412 2413 2414 2415 2416
	if (c->d & Op3264) {
		if (mode == X86EMUL_MODE_PROT64)
			c->op_bytes = 8;
		else
			c->op_bytes = 4;
	}

2417
	/* ModRM and SIB bytes. */
2418
	if (c->d & ModRM) {
2419
		rc = decode_modrm(ctxt, ops);
2420 2421 2422
		if (!c->has_seg_override)
			set_seg_override(c, c->modrm_seg);
	} else if (c->d & MemAbs)
2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468
		rc = decode_abs(ctxt, ops);
	if (rc != X86EMUL_CONTINUE)
		goto done;

	if (!c->has_seg_override)
		set_seg_override(c, VCPU_SREG_DS);

	if (!(!c->twobyte && c->b == 0x8d))
		c->modrm_ea += seg_override_base(ctxt, ops, c);

	if (c->ad_bytes != 8)
		c->modrm_ea = (u32)c->modrm_ea;

	if (c->rip_relative)
		c->modrm_ea += c->eip;

	/*
	 * Decode and fetch the source operand: register, memory
	 * or immediate.
	 */
	switch (c->d & SrcMask) {
	case SrcNone:
		break;
	case SrcReg:
		decode_register_operand(&c->src, c, 0);
		break;
	case SrcMem16:
		c->src.bytes = 2;
		goto srcmem_common;
	case SrcMem32:
		c->src.bytes = 4;
		goto srcmem_common;
	case SrcMem:
		c->src.bytes = (c->d & ByteOp) ? 1 :
							   c->op_bytes;
		/* Don't fetch the address for invlpg: it could be unmapped. */
		if (c->twobyte && c->b == 0x01 && c->modrm_reg == 7)
			break;
	srcmem_common:
		/*
		 * For instructions with a ModR/M byte, switch to register
		 * access if Mod = 3.
		 */
		if ((c->d & ModRM) && c->modrm_mod == 3) {
			c->src.type = OP_REG;
			c->src.val = c->modrm_val;
2469
			c->src.addr.reg = c->modrm_ptr;
2470 2471 2472
			break;
		}
		c->src.type = OP_MEM;
2473
		c->src.addr.mem = c->modrm_ea;
2474 2475 2476 2477 2478
		c->src.val = 0;
		break;
	case SrcImm:
	case SrcImmU:
		c->src.type = OP_IMM;
2479
		c->src.addr.mem = c->eip;
2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511
		c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
		if (c->src.bytes == 8)
			c->src.bytes = 4;
		/* NB. Immediates are sign-extended as necessary. */
		switch (c->src.bytes) {
		case 1:
			c->src.val = insn_fetch(s8, 1, c->eip);
			break;
		case 2:
			c->src.val = insn_fetch(s16, 2, c->eip);
			break;
		case 4:
			c->src.val = insn_fetch(s32, 4, c->eip);
			break;
		}
		if ((c->d & SrcMask) == SrcImmU) {
			switch (c->src.bytes) {
			case 1:
				c->src.val &= 0xff;
				break;
			case 2:
				c->src.val &= 0xffff;
				break;
			case 4:
				c->src.val &= 0xffffffff;
				break;
			}
		}
		break;
	case SrcImmByte:
	case SrcImmUByte:
		c->src.type = OP_IMM;
2512
		c->src.addr.mem = c->eip;
2513 2514 2515 2516 2517 2518 2519 2520 2521
		c->src.bytes = 1;
		if ((c->d & SrcMask) == SrcImmByte)
			c->src.val = insn_fetch(s8, 1, c->eip);
		else
			c->src.val = insn_fetch(u8, 1, c->eip);
		break;
	case SrcAcc:
		c->src.type = OP_REG;
		c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
2522
		c->src.addr.reg = &c->regs[VCPU_REGS_RAX];
2523
		fetch_register_operand(&c->src);
2524 2525 2526 2527 2528 2529 2530 2531
		break;
	case SrcOne:
		c->src.bytes = 1;
		c->src.val = 1;
		break;
	case SrcSI:
		c->src.type = OP_MEM;
		c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
2532
		c->src.addr.mem =
2533 2534 2535 2536 2537 2538
			register_address(c,  seg_override_base(ctxt, ops, c),
					 c->regs[VCPU_REGS_RSI]);
		c->src.val = 0;
		break;
	case SrcImmFAddr:
		c->src.type = OP_IMM;
2539
		c->src.addr.mem = c->eip;
2540 2541 2542 2543 2544
		c->src.bytes = c->op_bytes + 2;
		insn_fetch_arr(c->src.valptr, c->src.bytes, c->eip);
		break;
	case SrcMemFAddr:
		c->src.type = OP_MEM;
2545
		c->src.addr.mem = c->modrm_ea;
2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562
		c->src.bytes = c->op_bytes + 2;
		break;
	}

	/*
	 * Decode and fetch the second source operand: register, memory
	 * or immediate.
	 */
	switch (c->d & Src2Mask) {
	case Src2None:
		break;
	case Src2CL:
		c->src2.bytes = 1;
		c->src2.val = c->regs[VCPU_REGS_RCX] & 0x8;
		break;
	case Src2ImmByte:
		c->src2.type = OP_IMM;
2563
		c->src2.addr.mem = c->eip;
2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587
		c->src2.bytes = 1;
		c->src2.val = insn_fetch(u8, 1, c->eip);
		break;
	case Src2One:
		c->src2.bytes = 1;
		c->src2.val = 1;
		break;
	}

	/* Decode and fetch the destination operand: register or memory. */
	switch (c->d & DstMask) {
	case ImplicitOps:
		/* Special instructions do their own operand decoding. */
		return 0;
	case DstReg:
		decode_register_operand(&c->dst, c,
			 c->twobyte && (c->b == 0xb6 || c->b == 0xb7));
		break;
	case DstMem:
	case DstMem64:
		if ((c->d & ModRM) && c->modrm_mod == 3) {
			c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
			c->dst.type = OP_REG;
			c->dst.val = c->dst.orig_val = c->modrm_val;
2588
			c->dst.addr.reg = c->modrm_ptr;
2589 2590 2591
			break;
		}
		c->dst.type = OP_MEM;
2592
		c->dst.addr.mem = c->modrm_ea;
2593 2594 2595 2596 2597 2598 2599 2600
		if ((c->d & DstMask) == DstMem64)
			c->dst.bytes = 8;
		else
			c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
		c->dst.val = 0;
		if (c->d & BitOp) {
			unsigned long mask = ~(c->dst.bytes * 8 - 1);

2601
			c->dst.addr.mem = c->dst.addr.mem +
2602 2603 2604 2605 2606 2607
						   (c->src.val & mask) / 8;
		}
		break;
	case DstAcc:
		c->dst.type = OP_REG;
		c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
2608
		c->dst.addr.reg = &c->regs[VCPU_REGS_RAX];
2609
		fetch_register_operand(&c->dst);
2610 2611 2612 2613 2614
		c->dst.orig_val = c->dst.val;
		break;
	case DstDI:
		c->dst.type = OP_MEM;
		c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
2615
		c->dst.addr.mem =
2616 2617 2618 2619 2620 2621 2622 2623 2624 2625
			register_address(c, es_base(ctxt, ops),
					 c->regs[VCPU_REGS_RDI]);
		c->dst.val = 0;
		break;
	}

done:
	return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
}

2626
int
2627
x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
2628
{
2629
	struct x86_emulate_ops *ops = ctxt->ops;
2630 2631
	u64 msr_data;
	struct decode_cache *c = &ctxt->decode;
2632
	int rc = X86EMUL_CONTINUE;
2633
	int saved_dst_type = c->dst.type;
2634

2635
	ctxt->decode.mem_read.pos = 0;
2636

2637
	if (ctxt->mode == X86EMUL_MODE_PROT64 && (c->d & No64)) {
2638
		emulate_ud(ctxt);
2639 2640 2641
		goto done;
	}

2642
	/* LOCK prefix is allowed only with some instructions */
2643
	if (c->lock_prefix && (!(c->d & Lock) || c->dst.type != OP_MEM)) {
2644
		emulate_ud(ctxt);
2645 2646 2647
		goto done;
	}

2648
	/* Privileged instruction can be executed only in CPL=0 */
2649
	if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) {
2650
		emulate_gp(ctxt, 0);
2651 2652 2653
		goto done;
	}

2654
	if (c->rep_prefix && (c->d & String)) {
2655
		ctxt->restart = true;
2656
		/* All REP prefixes have the same first termination condition */
2657
		if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0) {
2658 2659
		string_done:
			ctxt->restart = false;
2660
			ctxt->eip = c->eip;
2661 2662 2663 2664 2665 2666 2667 2668 2669 2670
			goto done;
		}
		/* The second termination condition only applies for REPE
		 * and REPNE. Test if the repeat string operation prefix is
		 * REPE/REPZ or REPNE/REPNZ and if it's the case it tests the
		 * corresponding termination condition according to:
		 * 	- if REPE/REPZ and ZF = 0 then done
		 * 	- if REPNE/REPNZ and ZF = 1 then done
		 */
		if ((c->b == 0xa6) || (c->b == 0xa7) ||
2671
		    (c->b == 0xae) || (c->b == 0xaf)) {
2672
			if ((c->rep_prefix == REPE_PREFIX) &&
2673 2674
			    ((ctxt->eflags & EFLG_ZF) == 0))
				goto string_done;
2675
			if ((c->rep_prefix == REPNE_PREFIX) &&
2676 2677
			    ((ctxt->eflags & EFLG_ZF) == EFLG_ZF))
				goto string_done;
2678
		}
2679
		c->eip = ctxt->eip;
2680 2681
	}

2682
	if (c->src.type == OP_MEM) {
2683
		rc = read_emulated(ctxt, ops, c->src.addr.mem,
2684
					c->src.valptr, c->src.bytes);
2685
		if (rc != X86EMUL_CONTINUE)
2686
			goto done;
2687
		c->src.orig_val64 = c->src.val64;
2688 2689
	}

2690
	if (c->src2.type == OP_MEM) {
2691
		rc = read_emulated(ctxt, ops, c->src2.addr.mem,
2692
					&c->src2.val, c->src2.bytes);
2693 2694 2695 2696
		if (rc != X86EMUL_CONTINUE)
			goto done;
	}

2697 2698 2699 2700
	if ((c->d & DstMask) == ImplicitOps)
		goto special_insn;


2701 2702
	if ((c->dst.type == OP_MEM) && !(c->d & Mov)) {
		/* optimisation - avoid slow emulated read if Mov */
2703
		rc = read_emulated(ctxt, ops, c->dst.addr.mem,
2704
				   &c->dst.val, c->dst.bytes);
2705 2706
		if (rc != X86EMUL_CONTINUE)
			goto done;
2707
	}
2708
	c->dst.orig_val = c->dst.val;
2709

2710 2711
special_insn:

2712 2713 2714 2715 2716 2717 2718
	if (c->execute) {
		rc = c->execute(ctxt);
		if (rc != X86EMUL_CONTINUE)
			goto done;
		goto writeback;
	}

2719
	if (c->twobyte)
A
Avi Kivity 已提交
2720 2721
		goto twobyte_insn;

2722
	switch (c->b) {
A
Avi Kivity 已提交
2723 2724
	case 0x00 ... 0x05:
	      add:		/* add */
2725
		emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags);
A
Avi Kivity 已提交
2726
		break;
2727
	case 0x06:		/* push es */
2728
		emulate_push_sreg(ctxt, ops, VCPU_SREG_ES);
2729 2730 2731
		break;
	case 0x07:		/* pop es */
		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES);
2732
		if (rc != X86EMUL_CONTINUE)
2733 2734
			goto done;
		break;
A
Avi Kivity 已提交
2735 2736
	case 0x08 ... 0x0d:
	      or:		/* or */
2737
		emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags);
A
Avi Kivity 已提交
2738
		break;
2739
	case 0x0e:		/* push cs */
2740
		emulate_push_sreg(ctxt, ops, VCPU_SREG_CS);
2741
		break;
A
Avi Kivity 已提交
2742 2743
	case 0x10 ... 0x15:
	      adc:		/* adc */
2744
		emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags);
A
Avi Kivity 已提交
2745
		break;
2746
	case 0x16:		/* push ss */
2747
		emulate_push_sreg(ctxt, ops, VCPU_SREG_SS);
2748 2749 2750
		break;
	case 0x17:		/* pop ss */
		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS);
2751
		if (rc != X86EMUL_CONTINUE)
2752 2753
			goto done;
		break;
A
Avi Kivity 已提交
2754 2755
	case 0x18 ... 0x1d:
	      sbb:		/* sbb */
2756
		emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags);
A
Avi Kivity 已提交
2757
		break;
2758
	case 0x1e:		/* push ds */
2759
		emulate_push_sreg(ctxt, ops, VCPU_SREG_DS);
2760 2761 2762
		break;
	case 0x1f:		/* pop ds */
		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS);
2763
		if (rc != X86EMUL_CONTINUE)
2764 2765
			goto done;
		break;
2766
	case 0x20 ... 0x25:
A
Avi Kivity 已提交
2767
	      and:		/* and */
2768
		emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags);
A
Avi Kivity 已提交
2769 2770 2771
		break;
	case 0x28 ... 0x2d:
	      sub:		/* sub */
2772
		emulate_2op_SrcV("sub", c->src, c->dst, ctxt->eflags);
A
Avi Kivity 已提交
2773 2774 2775
		break;
	case 0x30 ... 0x35:
	      xor:		/* xor */
2776
		emulate_2op_SrcV("xor", c->src, c->dst, ctxt->eflags);
A
Avi Kivity 已提交
2777 2778 2779
		break;
	case 0x38 ... 0x3d:
	      cmp:		/* cmp */
2780
		emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
A
Avi Kivity 已提交
2781
		break;
2782 2783 2784 2785 2786 2787 2788 2789
	case 0x40 ... 0x47: /* inc r16/r32 */
		emulate_1op("inc", c->dst, ctxt->eflags);
		break;
	case 0x48 ... 0x4f: /* dec r16/r32 */
		emulate_1op("dec", c->dst, ctxt->eflags);
		break;
	case 0x58 ... 0x5f: /* pop reg */
	pop_instruction:
2790
		rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes);
2791
		if (rc != X86EMUL_CONTINUE)
2792 2793
			goto done;
		break;
2794
	case 0x60:	/* pusha */
2795 2796 2797
		rc = emulate_pusha(ctxt, ops);
		if (rc != X86EMUL_CONTINUE)
			goto done;
2798 2799 2800
		break;
	case 0x61:	/* popa */
		rc = emulate_popa(ctxt, ops);
2801
		if (rc != X86EMUL_CONTINUE)
2802 2803
			goto done;
		break;
A
Avi Kivity 已提交
2804
	case 0x63:		/* movsxd */
2805
		if (ctxt->mode != X86EMUL_MODE_PROT64)
A
Avi Kivity 已提交
2806
			goto cannot_emulate;
2807
		c->dst.val = (s32) c->src.val;
A
Avi Kivity 已提交
2808
		break;
2809 2810
	case 0x6c:		/* insb */
	case 0x6d:		/* insw/insd */
2811
		c->dst.bytes = min(c->dst.bytes, 4u);
2812
		if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX],
2813
					  c->dst.bytes)) {
2814
			emulate_gp(ctxt, 0);
2815 2816
			goto done;
		}
2817 2818
		if (!pio_in_emulated(ctxt, ops, c->dst.bytes,
				     c->regs[VCPU_REGS_RDX], &c->dst.val))
2819 2820
			goto done; /* IO is needed, skip writeback */
		break;
2821 2822
	case 0x6e:		/* outsb */
	case 0x6f:		/* outsw/outsd */
2823
		c->src.bytes = min(c->src.bytes, 4u);
2824
		if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX],
2825
					  c->src.bytes)) {
2826
			emulate_gp(ctxt, 0);
2827 2828
			goto done;
		}
2829 2830 2831 2832 2833
		ops->pio_out_emulated(c->src.bytes, c->regs[VCPU_REGS_RDX],
				      &c->src.val, 1, ctxt->vcpu);

		c->dst.type = OP_NONE; /* nothing to writeback */
		break;
2834
	case 0x70 ... 0x7f: /* jcc (short) */
2835
		if (test_cc(c->b, ctxt->eflags))
2836
			jmp_rel(c, c->src.val);
2837
		break;
A
Avi Kivity 已提交
2838
	case 0x80 ... 0x83:	/* Grp1 */
2839
		switch (c->modrm_reg) {
A
Avi Kivity 已提交
2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858
		case 0:
			goto add;
		case 1:
			goto or;
		case 2:
			goto adc;
		case 3:
			goto sbb;
		case 4:
			goto and;
		case 5:
			goto sub;
		case 6:
			goto xor;
		case 7:
			goto cmp;
		}
		break;
	case 0x84 ... 0x85:
2859
	test:
2860
		emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags);
A
Avi Kivity 已提交
2861 2862
		break;
	case 0x86 ... 0x87:	/* xchg */
2863
	xchg:
A
Avi Kivity 已提交
2864
		/* Write back the register source. */
2865
		switch (c->dst.bytes) {
A
Avi Kivity 已提交
2866
		case 1:
2867
			*(u8 *) c->src.addr.reg = (u8) c->dst.val;
A
Avi Kivity 已提交
2868 2869
			break;
		case 2:
2870
			*(u16 *) c->src.addr.reg = (u16) c->dst.val;
A
Avi Kivity 已提交
2871 2872
			break;
		case 4:
2873
			*c->src.addr.reg = (u32) c->dst.val;
A
Avi Kivity 已提交
2874 2875
			break;	/* 64b reg: zero-extend */
		case 8:
2876
			*c->src.addr.reg = c->dst.val;
A
Avi Kivity 已提交
2877 2878 2879 2880 2881 2882
			break;
		}
		/*
		 * Write back the memory destination with implicit LOCK
		 * prefix.
		 */
2883 2884
		c->dst.val = c->src.val;
		c->lock_prefix = 1;
A
Avi Kivity 已提交
2885 2886
		break;
	case 0x88 ... 0x8b:	/* mov */
2887
		goto mov;
2888 2889
	case 0x8c:  /* mov r/m, sreg */
		if (c->modrm_reg > VCPU_SREG_GS) {
2890
			emulate_ud(ctxt);
2891
			goto done;
2892
		}
2893
		c->dst.val = ops->get_segment_selector(c->modrm_reg, ctxt->vcpu);
2894
		break;
N
Nitin A Kamble 已提交
2895
	case 0x8d: /* lea r16/r32, m */
2896
		c->dst.val = c->modrm_ea;
N
Nitin A Kamble 已提交
2897
		break;
2898 2899 2900 2901
	case 0x8e: { /* mov seg, r/m16 */
		uint16_t sel;

		sel = c->src.val;
2902

2903 2904
		if (c->modrm_reg == VCPU_SREG_CS ||
		    c->modrm_reg > VCPU_SREG_GS) {
2905
			emulate_ud(ctxt);
2906 2907 2908
			goto done;
		}

2909
		if (c->modrm_reg == VCPU_SREG_SS)
2910
			ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
2911

2912
		rc = load_segment_descriptor(ctxt, ops, sel, c->modrm_reg);
2913 2914 2915 2916

		c->dst.type = OP_NONE;  /* Disable writeback. */
		break;
	}
A
Avi Kivity 已提交
2917
	case 0x8f:		/* pop (sole member of Grp1a) */
2918
		rc = emulate_grp1a(ctxt, ops);
2919
		if (rc != X86EMUL_CONTINUE)
A
Avi Kivity 已提交
2920 2921
			goto done;
		break;
2922 2923 2924
	case 0x90 ... 0x97: /* nop / xchg reg, rax */
		if (c->dst.addr.reg == &c->regs[VCPU_REGS_RAX])
			goto done;
2925
		goto xchg;
N
Nitin A Kamble 已提交
2926
	case 0x9c: /* pushf */
2927
		c->src.val =  (unsigned long) ctxt->eflags;
2928
		emulate_push(ctxt, ops);
2929
		break;
N
Nitin A Kamble 已提交
2930
	case 0x9d: /* popf */
A
Avi Kivity 已提交
2931
		c->dst.type = OP_REG;
2932
		c->dst.addr.reg = &ctxt->eflags;
A
Avi Kivity 已提交
2933
		c->dst.bytes = c->op_bytes;
2934 2935 2936 2937
		rc = emulate_popf(ctxt, ops, &c->dst.val, c->op_bytes);
		if (rc != X86EMUL_CONTINUE)
			goto done;
		break;
2938
	case 0xa0 ... 0xa3:	/* mov */
A
Avi Kivity 已提交
2939
	case 0xa4 ... 0xa5:	/* movs */
2940
		goto mov;
A
Avi Kivity 已提交
2941
	case 0xa6 ... 0xa7:	/* cmps */
2942
		c->dst.type = OP_NONE; /* Disable writeback. */
2943
		DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.addr.mem, c->dst.addr.mem);
2944
		goto cmp;
2945 2946
	case 0xa8 ... 0xa9:	/* test ax, imm */
		goto test;
A
Avi Kivity 已提交
2947
	case 0xaa ... 0xab:	/* stos */
2948
		c->dst.val = c->regs[VCPU_REGS_RAX];
A
Avi Kivity 已提交
2949 2950
		break;
	case 0xac ... 0xad:	/* lods */
2951
		goto mov;
A
Avi Kivity 已提交
2952 2953 2954
	case 0xae ... 0xaf:	/* scas */
		DPRINTF("Urk! I don't handle SCAS.\n");
		goto cannot_emulate;
2955
	case 0xb0 ... 0xbf: /* mov r, imm */
2956
		goto mov;
2957 2958 2959
	case 0xc0 ... 0xc1:
		emulate_grp2(ctxt);
		break;
2960
	case 0xc3: /* ret */
A
Avi Kivity 已提交
2961
		c->dst.type = OP_REG;
2962
		c->dst.addr.reg = &c->eip;
A
Avi Kivity 已提交
2963
		c->dst.bytes = c->op_bytes;
2964
		goto pop_instruction;
2965 2966 2967 2968
	case 0xc6 ... 0xc7:	/* mov (sole member of Grp11) */
	mov:
		c->dst.val = c->src.val;
		break;
2969 2970
	case 0xcb:		/* ret far */
		rc = emulate_ret_far(ctxt, ops);
2971 2972 2973 2974 2975 2976
		if (rc != X86EMUL_CONTINUE)
			goto done;
		break;
	case 0xcf:		/* iret */
		rc = emulate_iret(ctxt, ops);

2977
		if (rc != X86EMUL_CONTINUE)
2978 2979
			goto done;
		break;
2980 2981 2982 2983 2984 2985 2986 2987
	case 0xd0 ... 0xd1:	/* Grp2 */
		c->src.val = 1;
		emulate_grp2(ctxt);
		break;
	case 0xd2 ... 0xd3:	/* Grp2 */
		c->src.val = c->regs[VCPU_REGS_RCX];
		emulate_grp2(ctxt);
		break;
2988 2989
	case 0xe4: 	/* inb */
	case 0xe5: 	/* in */
2990
		goto do_io_in;
2991 2992
	case 0xe6: /* outb */
	case 0xe7: /* out */
2993
		goto do_io_out;
2994
	case 0xe8: /* call (near) */ {
2995
		long int rel = c->src.val;
2996
		c->src.val = (unsigned long) c->eip;
2997
		jmp_rel(c, rel);
2998
		emulate_push(ctxt, ops);
2999
		break;
3000 3001
	}
	case 0xe9: /* jmp rel */
3002
		goto jmp;
3003 3004
	case 0xea: { /* jmp far */
		unsigned short sel;
3005
	jump_far:
3006 3007 3008
		memcpy(&sel, c->src.valptr + c->op_bytes, 2);

		if (load_segment_descriptor(ctxt, ops, sel, VCPU_SREG_CS))
3009
			goto done;
3010

3011 3012
		c->eip = 0;
		memcpy(&c->eip, c->src.valptr, c->op_bytes);
3013
		break;
3014
	}
3015 3016
	case 0xeb:
	      jmp:		/* jmp rel short */
3017
		jmp_rel(c, c->src.val);
3018
		c->dst.type = OP_NONE; /* Disable writeback. */
3019
		break;
3020 3021
	case 0xec: /* in al,dx */
	case 0xed: /* in (e/r)ax,dx */
3022 3023 3024 3025
		c->src.val = c->regs[VCPU_REGS_RDX];
	do_io_in:
		c->dst.bytes = min(c->dst.bytes, 4u);
		if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) {
3026
			emulate_gp(ctxt, 0);
3027 3028
			goto done;
		}
3029 3030
		if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val,
				     &c->dst.val))
3031 3032
			goto done; /* IO is needed */
		break;
3033 3034
	case 0xee: /* out dx,al */
	case 0xef: /* out dx,(e/r)ax */
3035 3036 3037 3038
		c->src.val = c->regs[VCPU_REGS_RDX];
	do_io_out:
		c->dst.bytes = min(c->dst.bytes, 4u);
		if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) {
3039
			emulate_gp(ctxt, 0);
3040 3041
			goto done;
		}
3042 3043 3044
		ops->pio_out_emulated(c->dst.bytes, c->src.val, &c->dst.val, 1,
				      ctxt->vcpu);
		c->dst.type = OP_NONE;	/* Disable writeback. */
3045
		break;
3046
	case 0xf4:              /* hlt */
3047
		ctxt->vcpu->arch.halt_request = 1;
3048
		break;
3049 3050 3051 3052 3053
	case 0xf5:	/* cmc */
		/* complement carry flag from eflags reg */
		ctxt->eflags ^= EFLG_CF;
		c->dst.type = OP_NONE;	/* Disable writeback. */
		break;
3054
	case 0xf6 ... 0xf7:	/* Grp3 */
3055 3056
		if (!emulate_grp3(ctxt, ops))
			goto cannot_emulate;
3057
		break;
3058 3059 3060 3061 3062
	case 0xf8: /* clc */
		ctxt->eflags &= ~EFLG_CF;
		c->dst.type = OP_NONE;	/* Disable writeback. */
		break;
	case 0xfa: /* cli */
3063
		if (emulator_bad_iopl(ctxt, ops)) {
3064
			emulate_gp(ctxt, 0);
3065 3066
			goto done;
		} else {
3067 3068 3069
			ctxt->eflags &= ~X86_EFLAGS_IF;
			c->dst.type = OP_NONE;	/* Disable writeback. */
		}
3070 3071
		break;
	case 0xfb: /* sti */
3072
		if (emulator_bad_iopl(ctxt, ops)) {
3073
			emulate_gp(ctxt, 0);
3074 3075
			goto done;
		} else {
3076
			ctxt->interruptibility = KVM_X86_SHADOW_INT_STI;
3077 3078 3079
			ctxt->eflags |= X86_EFLAGS_IF;
			c->dst.type = OP_NONE;	/* Disable writeback. */
		}
3080
		break;
3081 3082 3083 3084 3085 3086 3087 3088
	case 0xfc: /* cld */
		ctxt->eflags &= ~EFLG_DF;
		c->dst.type = OP_NONE;	/* Disable writeback. */
		break;
	case 0xfd: /* std */
		ctxt->eflags |= EFLG_DF;
		c->dst.type = OP_NONE;	/* Disable writeback. */
		break;
3089 3090
	case 0xfe: /* Grp4 */
	grp45:
3091
		rc = emulate_grp45(ctxt, ops);
3092
		if (rc != X86EMUL_CONTINUE)
3093 3094
			goto done;
		break;
3095 3096 3097 3098
	case 0xff: /* Grp5 */
		if (c->modrm_reg == 5)
			goto jump_far;
		goto grp45;
3099 3100
	default:
		goto cannot_emulate;
A
Avi Kivity 已提交
3101
	}
3102 3103 3104

writeback:
	rc = writeback(ctxt, ops);
3105
	if (rc != X86EMUL_CONTINUE)
3106 3107
		goto done;

3108 3109 3110 3111 3112 3113
	/*
	 * restore dst type in case the decoding will be reused
	 * (happens for string instruction )
	 */
	c->dst.type = saved_dst_type;

3114
	if ((c->d & SrcMask) == SrcSI)
3115 3116
		string_addr_inc(ctxt, seg_override_base(ctxt, ops, c),
				VCPU_REGS_RSI, &c->src);
3117 3118

	if ((c->d & DstMask) == DstDI)
3119 3120
		string_addr_inc(ctxt, es_base(ctxt, ops), VCPU_REGS_RDI,
				&c->dst);
3121

3122
	if (c->rep_prefix && (c->d & String)) {
3123
		struct read_cache *rc = &ctxt->decode.io_read;
3124
		register_address_increment(c, &c->regs[VCPU_REGS_RCX], -1);
3125 3126 3127 3128 3129 3130
		/*
		 * Re-enter guest when pio read ahead buffer is empty or,
		 * if it is not used, after each 1024 iteration.
		 */
		if ((rc->end == 0 && !(c->regs[VCPU_REGS_RCX] & 0x3ff)) ||
		    (rc->end != 0 && rc->end == rc->pos))
3131 3132
			ctxt->restart = false;
	}
3133 3134 3135 3136 3137
	/*
	 * reset read cache here in case string instruction is restared
	 * without decoding
	 */
	ctxt->decode.mem_read.end = 0;
3138
	ctxt->eip = c->eip;
3139 3140

done:
3141
	return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
A
Avi Kivity 已提交
3142 3143

twobyte_insn:
3144
	switch (c->b) {
A
Avi Kivity 已提交
3145
	case 0x01: /* lgdt, lidt, lmsw */
3146
		switch (c->modrm_reg) {
A
Avi Kivity 已提交
3147 3148 3149
			u16 size;
			unsigned long address;

3150
		case 0: /* vmcall */
3151
			if (c->modrm_mod != 3 || c->modrm_rm != 1)
3152 3153
				goto cannot_emulate;

3154
			rc = kvm_fix_hypercall(ctxt->vcpu);
3155
			if (rc != X86EMUL_CONTINUE)
3156 3157
				goto done;

3158
			/* Let the processor re-execute the fixed hypercall */
3159
			c->eip = ctxt->eip;
3160 3161
			/* Disable writeback. */
			c->dst.type = OP_NONE;
3162
			break;
A
Avi Kivity 已提交
3163
		case 2: /* lgdt */
3164
			rc = read_descriptor(ctxt, ops, c->src.addr.mem,
3165
					     &size, &address, c->op_bytes);
3166
			if (rc != X86EMUL_CONTINUE)
A
Avi Kivity 已提交
3167 3168
				goto done;
			realmode_lgdt(ctxt->vcpu, size, address);
3169 3170
			/* Disable writeback. */
			c->dst.type = OP_NONE;
A
Avi Kivity 已提交
3171
			break;
3172
		case 3: /* lidt/vmmcall */
3173 3174 3175 3176
			if (c->modrm_mod == 3) {
				switch (c->modrm_rm) {
				case 1:
					rc = kvm_fix_hypercall(ctxt->vcpu);
3177
					if (rc != X86EMUL_CONTINUE)
3178 3179 3180 3181 3182
						goto done;
					break;
				default:
					goto cannot_emulate;
				}
3183
			} else {
3184
				rc = read_descriptor(ctxt, ops, c->src.addr.mem,
3185
						     &size, &address,
3186
						     c->op_bytes);
3187
				if (rc != X86EMUL_CONTINUE)
3188 3189 3190
					goto done;
				realmode_lidt(ctxt->vcpu, size, address);
			}
3191 3192
			/* Disable writeback. */
			c->dst.type = OP_NONE;
A
Avi Kivity 已提交
3193 3194
			break;
		case 4: /* smsw */
3195
			c->dst.bytes = 2;
3196
			c->dst.val = ops->get_cr(0, ctxt->vcpu);
A
Avi Kivity 已提交
3197 3198
			break;
		case 6: /* lmsw */
3199
			ops->set_cr(0, (ops->get_cr(0, ctxt->vcpu) & ~0x0eul) |
3200
				    (c->src.val & 0x0f), ctxt->vcpu);
3201
			c->dst.type = OP_NONE;
A
Avi Kivity 已提交
3202
			break;
3203
		case 5: /* not defined */
3204
			emulate_ud(ctxt);
3205
			goto done;
A
Avi Kivity 已提交
3206
		case 7: /* invlpg*/
3207
			emulate_invlpg(ctxt->vcpu, c->modrm_ea);
3208 3209
			/* Disable writeback. */
			c->dst.type = OP_NONE;
A
Avi Kivity 已提交
3210 3211 3212 3213 3214
			break;
		default:
			goto cannot_emulate;
		}
		break;
3215
	case 0x05: 		/* syscall */
3216
		rc = emulate_syscall(ctxt, ops);
3217 3218
		if (rc != X86EMUL_CONTINUE)
			goto done;
3219 3220
		else
			goto writeback;
3221
		break;
3222 3223 3224 3225 3226
	case 0x06:
		emulate_clts(ctxt->vcpu);
		c->dst.type = OP_NONE;
		break;
	case 0x09:		/* wbinvd */
3227 3228 3229 3230
		kvm_emulate_wbinvd(ctxt->vcpu);
		c->dst.type = OP_NONE;
		break;
	case 0x08:		/* invd */
3231 3232 3233 3234 3235
	case 0x0d:		/* GrpP (prefetch) */
	case 0x18:		/* Grp16 (prefetch/nop) */
		c->dst.type = OP_NONE;
		break;
	case 0x20: /* mov cr, reg */
3236 3237 3238 3239
		switch (c->modrm_reg) {
		case 1:
		case 5 ... 7:
		case 9 ... 15:
3240
			emulate_ud(ctxt);
3241 3242
			goto done;
		}
3243
		c->dst.val = ops->get_cr(c->modrm_reg, ctxt->vcpu);
3244
		break;
A
Avi Kivity 已提交
3245
	case 0x21: /* mov from dr to reg */
3246 3247
		if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) &&
		    (c->modrm_reg == 4 || c->modrm_reg == 5)) {
3248
			emulate_ud(ctxt);
3249 3250
			goto done;
		}
3251
		ops->get_dr(c->modrm_reg, &c->regs[c->modrm_rm], ctxt->vcpu);
3252
		c->dst.type = OP_NONE;	/* no writeback */
A
Avi Kivity 已提交
3253
		break;
3254
	case 0x22: /* mov reg, cr */
3255
		if (ops->set_cr(c->modrm_reg, c->src.val, ctxt->vcpu)) {
3256
			emulate_gp(ctxt, 0);
3257 3258
			goto done;
		}
3259 3260
		c->dst.type = OP_NONE;
		break;
A
Avi Kivity 已提交
3261
	case 0x23: /* mov from reg to dr */
3262 3263
		if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) &&
		    (c->modrm_reg == 4 || c->modrm_reg == 5)) {
3264
			emulate_ud(ctxt);
3265 3266
			goto done;
		}
3267

3268 3269 3270 3271
		if (ops->set_dr(c->modrm_reg, c->regs[c->modrm_rm] &
				((ctxt->mode == X86EMUL_MODE_PROT64) ?
				 ~0ULL : ~0U), ctxt->vcpu) < 0) {
			/* #UD condition is already handled by the code above */
3272
			emulate_gp(ctxt, 0);
3273 3274 3275
			goto done;
		}

3276
		c->dst.type = OP_NONE;	/* no writeback */
A
Avi Kivity 已提交
3277
		break;
3278 3279 3280 3281
	case 0x30:
		/* wrmsr */
		msr_data = (u32)c->regs[VCPU_REGS_RAX]
			| ((u64)c->regs[VCPU_REGS_RDX] << 32);
3282
		if (ops->set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data)) {
3283
			emulate_gp(ctxt, 0);
3284
			goto done;
3285 3286 3287 3288 3289 3290
		}
		rc = X86EMUL_CONTINUE;
		c->dst.type = OP_NONE;
		break;
	case 0x32:
		/* rdmsr */
3291
		if (ops->get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data)) {
3292
			emulate_gp(ctxt, 0);
3293
			goto done;
3294 3295 3296 3297 3298 3299 3300
		} else {
			c->regs[VCPU_REGS_RAX] = (u32)msr_data;
			c->regs[VCPU_REGS_RDX] = msr_data >> 32;
		}
		rc = X86EMUL_CONTINUE;
		c->dst.type = OP_NONE;
		break;
3301
	case 0x34:		/* sysenter */
3302
		rc = emulate_sysenter(ctxt, ops);
3303 3304
		if (rc != X86EMUL_CONTINUE)
			goto done;
3305 3306
		else
			goto writeback;
3307 3308
		break;
	case 0x35:		/* sysexit */
3309
		rc = emulate_sysexit(ctxt, ops);
3310 3311
		if (rc != X86EMUL_CONTINUE)
			goto done;
3312 3313
		else
			goto writeback;
3314
		break;
A
Avi Kivity 已提交
3315
	case 0x40 ... 0x4f:	/* cmov */
3316
		c->dst.val = c->dst.orig_val = c->src.val;
3317 3318
		if (!test_cc(c->b, ctxt->eflags))
			c->dst.type = OP_NONE; /* no writeback */
A
Avi Kivity 已提交
3319
		break;
3320
	case 0x80 ... 0x8f: /* jnz rel, etc*/
3321
		if (test_cc(c->b, ctxt->eflags))
3322
			jmp_rel(c, c->src.val);
3323 3324
		c->dst.type = OP_NONE;
		break;
3325
	case 0xa0:	  /* push fs */
3326
		emulate_push_sreg(ctxt, ops, VCPU_SREG_FS);
3327 3328 3329
		break;
	case 0xa1:	 /* pop fs */
		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS);
3330
		if (rc != X86EMUL_CONTINUE)
3331 3332
			goto done;
		break;
3333 3334
	case 0xa3:
	      bt:		/* bt */
Q
Qing He 已提交
3335
		c->dst.type = OP_NONE;
3336 3337
		/* only subword offset */
		c->src.val &= (c->dst.bytes << 3) - 1;
3338
		emulate_2op_SrcV_nobyte("bt", c->src, c->dst, ctxt->eflags);
3339
		break;
3340 3341 3342 3343
	case 0xa4: /* shld imm8, r, r/m */
	case 0xa5: /* shld cl, r, r/m */
		emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags);
		break;
3344
	case 0xa8:	/* push gs */
3345
		emulate_push_sreg(ctxt, ops, VCPU_SREG_GS);
3346 3347 3348
		break;
	case 0xa9:	/* pop gs */
		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS);
3349
		if (rc != X86EMUL_CONTINUE)
3350 3351
			goto done;
		break;
3352 3353
	case 0xab:
	      bts:		/* bts */
3354 3355
		/* only subword offset */
		c->src.val &= (c->dst.bytes << 3) - 1;
3356
		emulate_2op_SrcV_nobyte("bts", c->src, c->dst, ctxt->eflags);
3357
		break;
3358 3359 3360 3361
	case 0xac: /* shrd imm8, r, r/m */
	case 0xad: /* shrd cl, r, r/m */
		emulate_2op_cl("shrd", c->src2, c->src, c->dst, ctxt->eflags);
		break;
3362 3363
	case 0xae:              /* clflush */
		break;
A
Avi Kivity 已提交
3364 3365 3366 3367 3368
	case 0xb0 ... 0xb1:	/* cmpxchg */
		/*
		 * Save real source value, then compare EAX against
		 * destination.
		 */
3369 3370
		c->src.orig_val = c->src.val;
		c->src.val = c->regs[VCPU_REGS_RAX];
3371 3372
		emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
		if (ctxt->eflags & EFLG_ZF) {
A
Avi Kivity 已提交
3373
			/* Success: write back to memory. */
3374
			c->dst.val = c->src.orig_val;
A
Avi Kivity 已提交
3375 3376
		} else {
			/* Failure: write the value we saw to EAX. */
3377
			c->dst.type = OP_REG;
3378
			c->dst.addr.reg = (unsigned long *)&c->regs[VCPU_REGS_RAX];
A
Avi Kivity 已提交
3379 3380 3381 3382
		}
		break;
	case 0xb3:
	      btr:		/* btr */
3383 3384
		/* only subword offset */
		c->src.val &= (c->dst.bytes << 3) - 1;
3385
		emulate_2op_SrcV_nobyte("btr", c->src, c->dst, ctxt->eflags);
A
Avi Kivity 已提交
3386 3387
		break;
	case 0xb6 ... 0xb7:	/* movzx */
3388 3389 3390
		c->dst.bytes = c->op_bytes;
		c->dst.val = (c->d & ByteOp) ? (u8) c->src.val
						       : (u16) c->src.val;
A
Avi Kivity 已提交
3391 3392
		break;
	case 0xba:		/* Grp8 */
3393
		switch (c->modrm_reg & 3) {
A
Avi Kivity 已提交
3394 3395 3396 3397 3398 3399 3400 3401 3402 3403
		case 0:
			goto bt;
		case 1:
			goto bts;
		case 2:
			goto btr;
		case 3:
			goto btc;
		}
		break;
3404 3405
	case 0xbb:
	      btc:		/* btc */
3406 3407
		/* only subword offset */
		c->src.val &= (c->dst.bytes << 3) - 1;
3408
		emulate_2op_SrcV_nobyte("btc", c->src, c->dst, ctxt->eflags);
3409
		break;
A
Avi Kivity 已提交
3410
	case 0xbe ... 0xbf:	/* movsx */
3411 3412 3413
		c->dst.bytes = c->op_bytes;
		c->dst.val = (c->d & ByteOp) ? (s8) c->src.val :
							(s16) c->src.val;
A
Avi Kivity 已提交
3414
		break;
3415
	case 0xc3:		/* movnti */
3416 3417 3418
		c->dst.bytes = c->op_bytes;
		c->dst.val = (c->op_bytes == 4) ? (u32) c->src.val :
							(u64) c->src.val;
3419
		break;
A
Avi Kivity 已提交
3420
	case 0xc7:		/* Grp9 (cmpxchg8b) */
3421
		rc = emulate_grp9(ctxt, ops);
3422
		if (rc != X86EMUL_CONTINUE)
3423 3424
			goto done;
		break;
3425 3426
	default:
		goto cannot_emulate;
A
Avi Kivity 已提交
3427 3428 3429 3430
	}
	goto writeback;

cannot_emulate:
3431
	DPRINTF("Cannot emulate %02x\n", c->b);
A
Avi Kivity 已提交
3432 3433
	return -1;
}