提交 96971e9a 编写于 作者: L Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm fixes from Paolo Bonzini:
 "This is a pretty large update.  I think it is roughly as big as what I
  usually had for the _whole_ rc period.

  There are a few bad bugs where the guest can OOPS or crash the host.
  We have also started looking at attack models for nested
  virtualization; bugs that usually result in the guest ring 0 crashing
  itself become more worrisome if you have nested virtualization,
  because the nested guest might bring down the non-nested guest as
  well.  For current uses of nested virtualization these do not really
  have a security impact, but you never know and bugs are bugs
  nevertheless.

  A lot of these bugs are in 3.17 too, resulting in a large number of
  stable@ Ccs.  I checked that all the patches apply there with no
  conflicts"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  kvm: vfio: fix unregister kvm_device_ops of vfio
  KVM: x86: Wrong assertion on paging_tmpl.h
  kvm: fix excessive pages un-pinning in kvm_iommu_map error path.
  KVM: x86: PREFETCH and HINT_NOP should have SrcMem flag
  KVM: x86: Emulator does not decode clflush well
  KVM: emulate: avoid accessing NULL ctxt->memopp
  KVM: x86: Decoding guest instructions which cross page boundary may fail
  kvm: x86: don't kill guest on unknown exit reason
  kvm: vmx: handle invvpid vm exit gracefully
  KVM: x86: Handle errors when RIP is set during far jumps
  KVM: x86: Emulator fixes for eip canonical checks on near branches
  KVM: x86: Fix wrong masking on relative jump/call
  KVM: x86: Improve thread safety in pit
  KVM: x86: Prevent host from panicking on shared MSR writes.
  KVM: x86: Check non-canonical addresses upon WRMSR
......@@ -989,6 +989,20 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
}
static inline u64 get_canonical(u64 la)
{
return ((int64_t)la << 16) >> 16;
}
static inline bool is_noncanonical_address(u64 la)
{
#ifdef CONFIG_X86_64
return get_canonical(la) != la;
#else
return false;
#endif
}
#define TSS_IOPB_BASE_OFFSET 0x66
#define TSS_BASE_SIZE 0x68
#define TSS_IOPB_SIZE (65536 / 8)
......@@ -1050,7 +1064,7 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
unsigned long address);
void kvm_define_shared_msr(unsigned index, u32 msr);
void kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
......
......@@ -67,6 +67,7 @@
#define EXIT_REASON_EPT_MISCONFIG 49
#define EXIT_REASON_INVEPT 50
#define EXIT_REASON_PREEMPTION_TIMER 52
#define EXIT_REASON_INVVPID 53
#define EXIT_REASON_WBINVD 54
#define EXIT_REASON_XSETBV 55
#define EXIT_REASON_APIC_WRITE 56
......@@ -114,6 +115,7 @@
{ EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \
{ EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \
{ EXIT_REASON_INVD, "INVD" }, \
{ EXIT_REASON_INVVPID, "INVVPID" }, \
{ EXIT_REASON_INVPCID, "INVPCID" }
#endif /* _UAPIVMX_H */
......@@ -504,11 +504,6 @@ static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc)
masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc);
}
static inline void jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
{
register_address_increment(ctxt, &ctxt->_eip, rel);
}
static u32 desc_limit_scaled(struct desc_struct *desc)
{
u32 limit = get_desc_limit(desc);
......@@ -569,6 +564,38 @@ static int emulate_nm(struct x86_emulate_ctxt *ctxt)
return emulate_exception(ctxt, NM_VECTOR, 0, false);
}
static inline int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
int cs_l)
{
switch (ctxt->op_bytes) {
case 2:
ctxt->_eip = (u16)dst;
break;
case 4:
ctxt->_eip = (u32)dst;
break;
case 8:
if ((cs_l && is_noncanonical_address(dst)) ||
(!cs_l && (dst & ~(u32)-1)))
return emulate_gp(ctxt, 0);
ctxt->_eip = dst;
break;
default:
WARN(1, "unsupported eip assignment size\n");
}
return X86EMUL_CONTINUE;
}
static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
{
return assign_eip_far(ctxt, dst, ctxt->mode == X86EMUL_MODE_PROT64);
}
static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
{
return assign_eip_near(ctxt, ctxt->_eip + rel);
}
static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
{
u16 selector;
......@@ -751,8 +778,10 @@ static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size)
static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt,
unsigned size)
{
if (unlikely(ctxt->fetch.end - ctxt->fetch.ptr < size))
return __do_insn_fetch_bytes(ctxt, size);
unsigned done_size = ctxt->fetch.end - ctxt->fetch.ptr;
if (unlikely(done_size < size))
return __do_insn_fetch_bytes(ctxt, size - done_size);
else
return X86EMUL_CONTINUE;
}
......@@ -1416,7 +1445,9 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
/* Does not support long mode */
static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
u16 selector, int seg, u8 cpl, bool in_task_switch)
u16 selector, int seg, u8 cpl,
bool in_task_switch,
struct desc_struct *desc)
{
struct desc_struct seg_desc, old_desc;
u8 dpl, rpl;
......@@ -1557,6 +1588,8 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
}
load:
ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
if (desc)
*desc = seg_desc;
return X86EMUL_CONTINUE;
exception:
return emulate_exception(ctxt, err_vec, err_code, true);
......@@ -1566,7 +1599,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
u16 selector, int seg)
{
u8 cpl = ctxt->ops->cpl(ctxt);
return __load_segment_descriptor(ctxt, selector, seg, cpl, false);
return __load_segment_descriptor(ctxt, selector, seg, cpl, false, NULL);
}
static void write_register_operand(struct operand *op)
......@@ -1960,17 +1993,31 @@ static int em_iret(struct x86_emulate_ctxt *ctxt)
static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
{
int rc;
unsigned short sel;
unsigned short sel, old_sel;
struct desc_struct old_desc, new_desc;
const struct x86_emulate_ops *ops = ctxt->ops;
u8 cpl = ctxt->ops->cpl(ctxt);
/* Assignment of RIP may only fail in 64-bit mode */
if (ctxt->mode == X86EMUL_MODE_PROT64)
ops->get_segment(ctxt, &old_sel, &old_desc, NULL,
VCPU_SREG_CS);
memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
rc = load_segment_descriptor(ctxt, sel, VCPU_SREG_CS);
rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, false,
&new_desc);
if (rc != X86EMUL_CONTINUE)
return rc;
ctxt->_eip = 0;
memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes);
return X86EMUL_CONTINUE;
rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l);
if (rc != X86EMUL_CONTINUE) {
WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64);
/* assigning eip failed; restore the old cs */
ops->set_segment(ctxt, old_sel, &old_desc, 0, VCPU_SREG_CS);
return rc;
}
return rc;
}
static int em_grp45(struct x86_emulate_ctxt *ctxt)
......@@ -1981,13 +2028,15 @@ static int em_grp45(struct x86_emulate_ctxt *ctxt)
case 2: /* call near abs */ {
long int old_eip;
old_eip = ctxt->_eip;
ctxt->_eip = ctxt->src.val;
rc = assign_eip_near(ctxt, ctxt->src.val);
if (rc != X86EMUL_CONTINUE)
break;
ctxt->src.val = old_eip;
rc = em_push(ctxt);
break;
}
case 4: /* jmp abs */
ctxt->_eip = ctxt->src.val;
rc = assign_eip_near(ctxt, ctxt->src.val);
break;
case 5: /* jmp far */
rc = em_jmp_far(ctxt);
......@@ -2022,30 +2071,47 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
static int em_ret(struct x86_emulate_ctxt *ctxt)
{
ctxt->dst.type = OP_REG;
ctxt->dst.addr.reg = &ctxt->_eip;
ctxt->dst.bytes = ctxt->op_bytes;
return em_pop(ctxt);
int rc;
unsigned long eip;
rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
if (rc != X86EMUL_CONTINUE)
return rc;
return assign_eip_near(ctxt, eip);
}
static int em_ret_far(struct x86_emulate_ctxt *ctxt)
{
int rc;
unsigned long cs;
unsigned long eip, cs;
u16 old_cs;
int cpl = ctxt->ops->cpl(ctxt);
struct desc_struct old_desc, new_desc;
const struct x86_emulate_ops *ops = ctxt->ops;
rc = emulate_pop(ctxt, &ctxt->_eip, ctxt->op_bytes);
if (ctxt->mode == X86EMUL_MODE_PROT64)
ops->get_segment(ctxt, &old_cs, &old_desc, NULL,
VCPU_SREG_CS);
rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
if (rc != X86EMUL_CONTINUE)
return rc;
if (ctxt->op_bytes == 4)
ctxt->_eip = (u32)ctxt->_eip;
rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
if (rc != X86EMUL_CONTINUE)
return rc;
/* Outer-privilege level return is not implemented */
if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl)
return X86EMUL_UNHANDLEABLE;
rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS);
rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, 0, false,
&new_desc);
if (rc != X86EMUL_CONTINUE)
return rc;
rc = assign_eip_far(ctxt, eip, new_desc.l);
if (rc != X86EMUL_CONTINUE) {
WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64);
ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
}
return rc;
}
......@@ -2306,7 +2372,7 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
{
const struct x86_emulate_ops *ops = ctxt->ops;
struct desc_struct cs, ss;
u64 msr_data;
u64 msr_data, rcx, rdx;
int usermode;
u16 cs_sel = 0, ss_sel = 0;
......@@ -2322,6 +2388,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
else
usermode = X86EMUL_MODE_PROT32;
rcx = reg_read(ctxt, VCPU_REGS_RCX);
rdx = reg_read(ctxt, VCPU_REGS_RDX);
cs.dpl = 3;
ss.dpl = 3;
ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
......@@ -2339,6 +2408,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
ss_sel = cs_sel + 8;
cs.d = 0;
cs.l = 1;
if (is_noncanonical_address(rcx) ||
is_noncanonical_address(rdx))
return emulate_gp(ctxt, 0);
break;
}
cs_sel |= SELECTOR_RPL_MASK;
......@@ -2347,8 +2419,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
ctxt->_eip = reg_read(ctxt, VCPU_REGS_RDX);
*reg_write(ctxt, VCPU_REGS_RSP) = reg_read(ctxt, VCPU_REGS_RCX);
ctxt->_eip = rdx;
*reg_write(ctxt, VCPU_REGS_RSP) = rcx;
return X86EMUL_CONTINUE;
}
......@@ -2466,19 +2538,24 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
* Now load segment descriptors. If fault happens at this stage
* it is handled in a context of new task
*/
ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl, true);
ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl,
true, NULL);
if (ret != X86EMUL_CONTINUE)
return ret;
ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, true);
ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
true, NULL);
if (ret != X86EMUL_CONTINUE)
return ret;
ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, true);
ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
true, NULL);
if (ret != X86EMUL_CONTINUE)
return ret;
ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, true);
ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
true, NULL);
if (ret != X86EMUL_CONTINUE)
return ret;
ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, true);
ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
true, NULL);
if (ret != X86EMUL_CONTINUE)
return ret;
......@@ -2603,25 +2680,32 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
* Now load segment descriptors. If fault happenes at this stage
* it is handled in a context of new task
*/
ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR, cpl, true);
ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR,
cpl, true, NULL);
if (ret != X86EMUL_CONTINUE)
return ret;
ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, true);
ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
true, NULL);
if (ret != X86EMUL_CONTINUE)
return ret;
ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, true);
ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
true, NULL);
if (ret != X86EMUL_CONTINUE)
return ret;
ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, true);
ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
true, NULL);
if (ret != X86EMUL_CONTINUE)
return ret;
ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, true);
ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
true, NULL);
if (ret != X86EMUL_CONTINUE)
return ret;
ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl, true);
ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl,
true, NULL);
if (ret != X86EMUL_CONTINUE)
return ret;
ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl, true);
ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl,
true, NULL);
if (ret != X86EMUL_CONTINUE)
return ret;
......@@ -2888,10 +2972,13 @@ static int em_aad(struct x86_emulate_ctxt *ctxt)
static int em_call(struct x86_emulate_ctxt *ctxt)
{
int rc;
long rel = ctxt->src.val;
ctxt->src.val = (unsigned long)ctxt->_eip;
jmp_rel(ctxt, rel);
rc = jmp_rel(ctxt, rel);
if (rc != X86EMUL_CONTINUE)
return rc;
return em_push(ctxt);
}
......@@ -2900,34 +2987,50 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt)
u16 sel, old_cs;
ulong old_eip;
int rc;
struct desc_struct old_desc, new_desc;
const struct x86_emulate_ops *ops = ctxt->ops;
int cpl = ctxt->ops->cpl(ctxt);
old_cs = get_segment_selector(ctxt, VCPU_SREG_CS);
old_eip = ctxt->_eip;
ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS);
memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
if (load_segment_descriptor(ctxt, sel, VCPU_SREG_CS))
rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, false,
&new_desc);
if (rc != X86EMUL_CONTINUE)
return X86EMUL_CONTINUE;
ctxt->_eip = 0;
memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes);
rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l);
if (rc != X86EMUL_CONTINUE)
goto fail;
ctxt->src.val = old_cs;
rc = em_push(ctxt);
if (rc != X86EMUL_CONTINUE)
return rc;
goto fail;
ctxt->src.val = old_eip;
return em_push(ctxt);
rc = em_push(ctxt);
/* If we failed, we tainted the memory, but the very least we should
restore cs */
if (rc != X86EMUL_CONTINUE)
goto fail;
return rc;
fail:
ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
return rc;
}
static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
{
int rc;
unsigned long eip;
ctxt->dst.type = OP_REG;
ctxt->dst.addr.reg = &ctxt->_eip;
ctxt->dst.bytes = ctxt->op_bytes;
rc = emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes);
rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
if (rc != X86EMUL_CONTINUE)
return rc;
rc = assign_eip_near(ctxt, eip);
if (rc != X86EMUL_CONTINUE)
return rc;
rsp_increment(ctxt, ctxt->src.val);
......@@ -3254,20 +3357,24 @@ static int em_lmsw(struct x86_emulate_ctxt *ctxt)
static int em_loop(struct x86_emulate_ctxt *ctxt)
{
int rc = X86EMUL_CONTINUE;
register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1);
if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) &&
(ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
jmp_rel(ctxt, ctxt->src.val);
rc = jmp_rel(ctxt, ctxt->src.val);
return X86EMUL_CONTINUE;
return rc;
}
static int em_jcxz(struct x86_emulate_ctxt *ctxt)
{
int rc = X86EMUL_CONTINUE;
if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0)
jmp_rel(ctxt, ctxt->src.val);
rc = jmp_rel(ctxt, ctxt->src.val);
return X86EMUL_CONTINUE;
return rc;
}
static int em_in(struct x86_emulate_ctxt *ctxt)
......@@ -3355,6 +3462,12 @@ static int em_bswap(struct x86_emulate_ctxt *ctxt)
return X86EMUL_CONTINUE;
}
static int em_clflush(struct x86_emulate_ctxt *ctxt)
{
/* emulating clflush regardless of cpuid */
return X86EMUL_CONTINUE;
}
static bool valid_cr(int nr)
{
switch (nr) {
......@@ -3693,6 +3806,16 @@ static const struct opcode group11[] = {
X7(D(Undefined)),
};
static const struct gprefix pfx_0f_ae_7 = {
I(SrcMem | ByteOp, em_clflush), N, N, N,
};
static const struct group_dual group15 = { {
N, N, N, N, N, N, N, GP(0, &pfx_0f_ae_7),
}, {
N, N, N, N, N, N, N, N,
} };
static const struct gprefix pfx_0f_6f_0f_7f = {
I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov),
};
......@@ -3901,10 +4024,11 @@ static const struct opcode twobyte_table[256] = {
N, I(ImplicitOps | EmulateOnUD, em_syscall),
II(ImplicitOps | Priv, em_clts, clts), N,
DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
N, D(ImplicitOps | ModRM), N, N,
N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
/* 0x10 - 0x1F */
N, N, N, N, N, N, N, N,
D(ImplicitOps | ModRM), N, N, N, N, N, N, D(ImplicitOps | ModRM),
D(ImplicitOps | ModRM | SrcMem | NoAccess),
N, N, N, N, N, N, D(ImplicitOps | ModRM | SrcMem | NoAccess),
/* 0x20 - 0x2F */
DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_read),
DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read),
......@@ -3956,7 +4080,7 @@ static const struct opcode twobyte_table[256] = {
F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
D(ModRM), F(DstReg | SrcMem | ModRM, em_imul),
GD(0, &group15), F(DstReg | SrcMem | ModRM, em_imul),
/* 0xB0 - 0xB7 */
I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg),
I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg),
......@@ -4473,10 +4597,10 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
/* Decode and fetch the destination operand: register or memory. */
rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
done:
if (ctxt->rip_relative)
ctxt->memopp->addr.mem.ea += ctxt->_eip;
done:
return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
}
......@@ -4726,7 +4850,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
break;
case 0x70 ... 0x7f: /* jcc (short) */
if (test_cc(ctxt->b, ctxt->eflags))
jmp_rel(ctxt, ctxt->src.val);
rc = jmp_rel(ctxt, ctxt->src.val);
break;
case 0x8d: /* lea r16/r32, m */
ctxt->dst.val = ctxt->src.addr.mem.ea;
......@@ -4756,7 +4880,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
break;
case 0xe9: /* jmp rel */
case 0xeb: /* jmp rel short */
jmp_rel(ctxt, ctxt->src.val);
rc = jmp_rel(ctxt, ctxt->src.val);
ctxt->dst.type = OP_NONE; /* Disable writeback. */
break;
case 0xf4: /* hlt */
......@@ -4881,13 +5005,11 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
break;
case 0x80 ... 0x8f: /* jnz rel, etc*/
if (test_cc(ctxt->b, ctxt->eflags))
jmp_rel(ctxt, ctxt->src.val);
rc = jmp_rel(ctxt, ctxt->src.val);
break;
case 0x90 ... 0x9f: /* setcc r/m8 */
ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);
break;
case 0xae: /* clflush */
break;
case 0xb6 ... 0xb7: /* movzx */
ctxt->dst.bytes = ctxt->op_bytes;
ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val
......
......@@ -262,8 +262,10 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
return;
timer = &pit->pit_state.timer;
mutex_lock(&pit->pit_state.lock);
if (hrtimer_cancel(timer))
hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
mutex_unlock(&pit->pit_state.lock);
}
static void destroy_pit_timer(struct kvm_pit *pit)
......
......@@ -298,7 +298,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
}
#endif
walker->max_level = walker->level;
ASSERT(!is_long_mode(vcpu) && is_pae(vcpu));
ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu)));
accessed_dirty = PT_GUEST_ACCESSED_MASK;
pt_access = pte_access = ACC_ALL;
......
......@@ -3251,7 +3251,7 @@ static int wrmsr_interception(struct vcpu_svm *svm)
msr.host_initiated = false;
svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
if (svm_set_msr(&svm->vcpu, &msr)) {
if (kvm_set_msr(&svm->vcpu, &msr)) {
trace_kvm_msr_write_ex(ecx, data);
kvm_inject_gp(&svm->vcpu, 0);
} else {
......@@ -3551,9 +3551,9 @@ static int handle_exit(struct kvm_vcpu *vcpu)
if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
|| !svm_exit_handlers[exit_code]) {
kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
kvm_run->hw.hardware_exit_reason = exit_code;
return 0;
WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_code);
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}
return svm_exit_handlers[exit_code](svm);
......
......@@ -2659,12 +2659,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
default:
msr = find_msr_entry(vmx, msr_index);
if (msr) {
u64 old_msr_data = msr->data;
msr->data = data;
if (msr - vmx->guest_msrs < vmx->save_nmsrs) {
preempt_disable();
kvm_set_shared_msr(msr->index, msr->data,
msr->mask);
ret = kvm_set_shared_msr(msr->index, msr->data,
msr->mask);
preempt_enable();
if (ret)
msr->data = old_msr_data;
}
break;
}
......@@ -5291,7 +5294,7 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu)
msr.data = data;
msr.index = ecx;
msr.host_initiated = false;
if (vmx_set_msr(vcpu, &msr) != 0) {
if (kvm_set_msr(vcpu, &msr) != 0) {
trace_kvm_msr_write_ex(ecx, data);
kvm_inject_gp(vcpu, 0);
return 1;
......@@ -6743,6 +6746,12 @@ static int handle_invept(struct kvm_vcpu *vcpu)
return 1;
}
static int handle_invvpid(struct kvm_vcpu *vcpu)
{
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}
/*
* The exit handlers return 1 if the exit was handled fully and guest execution
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
......@@ -6788,6 +6797,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[EXIT_REASON_MWAIT_INSTRUCTION] = handle_mwait,
[EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor,
[EXIT_REASON_INVEPT] = handle_invept,
[EXIT_REASON_INVVPID] = handle_invvpid,
};
static const int kvm_vmx_max_exit_handlers =
......@@ -7023,7 +7033,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD:
case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE:
case EXIT_REASON_VMOFF: case EXIT_REASON_VMON:
case EXIT_REASON_INVEPT:
case EXIT_REASON_INVEPT: case EXIT_REASON_INVVPID:
/*
* VMX instructions trap unconditionally. This allows L1 to
* emulate them for its L2 guest, i.e., allows 3-level nesting!
......@@ -7164,10 +7174,10 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
&& kvm_vmx_exit_handlers[exit_reason])
return kvm_vmx_exit_handlers[exit_reason](vcpu);
else {
vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
vcpu->run->hw.hardware_exit_reason = exit_reason;
WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_reason);
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}
return 0;
}
static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
......
......@@ -229,20 +229,25 @@ static void kvm_shared_msr_cpu_online(void)
shared_msr_update(i, shared_msrs_global.msrs[i]);
}
void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
int kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
{
unsigned int cpu = smp_processor_id();
struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
int err;
if (((value ^ smsr->values[slot].curr) & mask) == 0)
return;
return 0;
smsr->values[slot].curr = value;
wrmsrl(shared_msrs_global.msrs[slot], value);
err = wrmsrl_safe(shared_msrs_global.msrs[slot], value);
if (err)
return 1;
if (!smsr->registered) {
smsr->urn.on_user_return = kvm_on_user_return;
user_return_notifier_register(&smsr->urn);
smsr->registered = true;
}
return 0;
}
EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
......@@ -987,7 +992,6 @@ void kvm_enable_efer_bits(u64 mask)
}
EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
/*
* Writes msr value into into the appropriate "register".
* Returns 0 on success, non-0 otherwise.
......@@ -995,8 +999,34 @@ EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
*/
int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
{
switch (msr->index) {
case MSR_FS_BASE:
case MSR_GS_BASE:
case MSR_KERNEL_GS_BASE:
case MSR_CSTAR:
case MSR_LSTAR:
if (is_noncanonical_address(msr->data))
return 1;
break;
case MSR_IA32_SYSENTER_EIP:
case MSR_IA32_SYSENTER_ESP:
/*
* IA32_SYSENTER_ESP and IA32_SYSENTER_EIP cause #GP if
* non-canonical address is written on Intel but not on
* AMD (which ignores the top 32-bits, because it does
* not implement 64-bit SYSENTER).
*
* 64-bit code should hence be able to write a non-canonical
* value on AMD. Making the address canonical ensures that
* vmentry does not fail on Intel after writing a non-canonical
* value, and that something deterministic happens if the guest
* invokes 64-bit SYSENTER.
*/
msr->data = get_canonical(msr->data);
}
return kvm_x86_ops->set_msr(vcpu, msr);
}
EXPORT_SYMBOL_GPL(kvm_set_msr);
/*
* Adapt set_msr() to msr_io()'s calling convention
......
......@@ -1080,6 +1080,7 @@ void kvm_device_get(struct kvm_device *dev);
void kvm_device_put(struct kvm_device *dev);
struct kvm_device *kvm_device_from_filp(struct file *filp);
int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type);
void kvm_unregister_device_ops(u32 type);
extern struct kvm_device_ops kvm_mpic_ops;
extern struct kvm_device_ops kvm_xics_ops;
......
......@@ -43,13 +43,13 @@ static void kvm_iommu_put_pages(struct kvm *kvm,
gfn_t base_gfn, unsigned long npages);
static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn,
unsigned long size)
unsigned long npages)
{
gfn_t end_gfn;
pfn_t pfn;
pfn = gfn_to_pfn_memslot(slot, gfn);
end_gfn = gfn + (size >> PAGE_SHIFT);
end_gfn = gfn + npages;
gfn += 1;
if (is_error_noslot_pfn(pfn))
......@@ -119,7 +119,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
* Pin all pages we are about to map in memory. This is
* important because we unmap and unpin in 4kb steps later.
*/
pfn = kvm_pin_pages(slot, gfn, page_size);
pfn = kvm_pin_pages(slot, gfn, page_size >> PAGE_SHIFT);
if (is_error_noslot_pfn(pfn)) {
gfn += 1;
continue;
......@@ -131,7 +131,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
if (r) {
printk(KERN_ERR "kvm_iommu_map_address:"
"iommu failed to map pfn=%llx\n", pfn);
kvm_unpin_pages(kvm, pfn, page_size);
kvm_unpin_pages(kvm, pfn, page_size >> PAGE_SHIFT);
goto unmap_pages;
}
......
......@@ -2354,6 +2354,12 @@ int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type)
return 0;
}
void kvm_unregister_device_ops(u32 type)
{
if (kvm_device_ops_table[type] != NULL)
kvm_device_ops_table[type] = NULL;
}
static int kvm_ioctl_create_device(struct kvm *kvm,
struct kvm_create_device *cd)
{
......@@ -3328,5 +3334,6 @@ void kvm_exit(void)
kvm_arch_exit();
kvm_irqfd_exit();
free_cpumask_var(cpus_hardware_enabled);
kvm_vfio_ops_exit();
}
EXPORT_SYMBOL_GPL(kvm_exit);
......@@ -283,3 +283,8 @@ int kvm_vfio_ops_init(void)
{
return kvm_register_device_ops(&kvm_vfio_ops, KVM_DEV_TYPE_VFIO);
}
void kvm_vfio_ops_exit(void)
{
kvm_unregister_device_ops(KVM_DEV_TYPE_VFIO);
}
......@@ -3,11 +3,15 @@
#ifdef CONFIG_KVM_VFIO
int kvm_vfio_ops_init(void);
void kvm_vfio_ops_exit(void);
#else
static inline int kvm_vfio_ops_init(void)
{
return 0;
}
static inline void kvm_vfio_ops_exit(void)
{
}
#endif
#endif
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册