提交 08d19f51 编写于 作者: L Linus Torvalds

Merge branch 'kvm-updates/2.6.28' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm

* 'kvm-updates/2.6.28' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm: (134 commits)
  KVM: ia64: Add intel iommu support for guests.
  KVM: ia64: add directed mmio range support for kvm guests
  KVM: ia64: Make pmt table be able to hold physical mmio entries.
  KVM: Move irqchip_in_kernel() from ioapic.h to irq.h
  KVM: Separate irq ack notification out of arch/x86/kvm/irq.c
  KVM: Change is_mmio_pfn to kvm_is_mmio_pfn, and make it common for all archs
  KVM: Move device assignment logic to common code
  KVM: Device Assignment: Move vtd.c from arch/x86/kvm/ to virt/kvm/
  KVM: VMX: enable invlpg exiting if EPT is disabled
  KVM: x86: Silence various LAPIC-related host kernel messages
  KVM: Device Assignment: Map mmio pages into VT-d page table
  KVM: PIC: enhance IPI avoidance
  KVM: MMU: add "oos_shadow" parameter to disable oos
  KVM: MMU: speed up mmu_unsync_walk
  KVM: MMU: out of sync shadow core
  KVM: MMU: mmu_convert_notrap helper
  KVM: MMU: awareness of new kvm_mmu_zap_page behaviour
  KVM: MMU: mmu_parent_walk
  KVM: x86: trap invlpg
  KVM: MMU: sync roots on mmu reload
  ...
......@@ -2448,7 +2448,14 @@ S: Supported
KERNEL VIRTUAL MACHINE (KVM)
P: Avi Kivity
M: avi@qumranet.com
M: avi@redhat.com
L: kvm@vger.kernel.org
W: http://kvm.qumranet.com
S: Supported
KERNEL VIRTUAL MACHINE (KVM) FOR AMD-V
P: Joerg Roedel
M: joerg.roedel@amd.com
L: kvm@vger.kernel.org
W: http://kvm.qumranet.com
S: Supported
......
......@@ -132,7 +132,7 @@
#define GPFN_IOSAPIC (4UL << 60) /* IOSAPIC base */
#define GPFN_LEGACY_IO (5UL << 60) /* Legacy I/O base */
#define GPFN_GFW (6UL << 60) /* Guest Firmware */
#define GPFN_HIGH_MMIO (7UL << 60) /* High MMIO range */
#define GPFN_PHYS_MMIO (7UL << 60) /* Directed MMIO Range */
#define GPFN_IO_MASK (7UL << 60) /* Guest pfn is I/O type */
#define GPFN_INV_MASK (1UL << 63) /* Guest pfn is invalid */
......@@ -413,6 +413,10 @@ struct kvm_arch {
struct kvm_ioapic *vioapic;
struct kvm_vm_stat stat;
struct kvm_sal_data rdv_sal_data;
struct list_head assigned_dev_head;
struct dmar_domain *intel_iommu_domain;
struct hlist_head irq_ack_notifier_list;
};
union cpuid3_t {
......
......@@ -46,4 +46,6 @@ config KVM_INTEL
config KVM_TRACE
bool
source drivers/virtio/Kconfig
endif # VIRTUALIZATION
......@@ -44,7 +44,11 @@ EXTRA_CFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/
EXTRA_AFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/
common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
coalesced_mmio.o)
coalesced_mmio.o irq_comm.o)
ifeq ($(CONFIG_DMAR),y)
common-objs += $(addprefix ../../../virt/kvm/, vtd.o)
endif
kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o
obj-$(CONFIG_KVM) += kvm.o
......
/*
* irq.h: In-kernel interrupt controller related definitions
* Copyright (c) 2008, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 59 Temple
* Place - Suite 330, Boston, MA 02111-1307 USA.
*
* Authors:
* Xiantao Zhang <xiantao.zhang@intel.com>
*
*/
#ifndef __IRQ_H
#define __IRQ_H
static inline int irqchip_in_kernel(struct kvm *kvm)
{
return 1;
}
#endif
......@@ -31,6 +31,7 @@
#include <linux/bitops.h>
#include <linux/hrtimer.h>
#include <linux/uaccess.h>
#include <linux/intel-iommu.h>
#include <asm/pgtable.h>
#include <asm/gcc_intrin.h>
......@@ -45,6 +46,7 @@
#include "iodev.h"
#include "ioapic.h"
#include "lapic.h"
#include "irq.h"
static unsigned long kvm_vmm_base;
static unsigned long kvm_vsa_base;
......@@ -179,12 +181,16 @@ int kvm_dev_ioctl_check_extension(long ext)
switch (ext) {
case KVM_CAP_IRQCHIP:
case KVM_CAP_USER_MEMORY:
case KVM_CAP_MP_STATE:
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
r = KVM_COALESCED_MMIO_PAGE_OFFSET;
break;
case KVM_CAP_IOMMU:
r = intel_iommu_found();
break;
default:
r = 0;
}
......@@ -771,6 +777,7 @@ static void kvm_init_vm(struct kvm *kvm)
*/
kvm_build_io_pmt(kvm);
INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
}
struct kvm *kvm_arch_create_vm(void)
......@@ -1334,6 +1341,10 @@ static void kvm_release_vm_pages(struct kvm *kvm)
void kvm_arch_destroy_vm(struct kvm *kvm)
{
kvm_iommu_unmap_guest(kvm);
#ifdef KVM_CAP_DEVICE_ASSIGNMENT
kvm_free_all_assigned_devices(kvm);
#endif
kfree(kvm->arch.vioapic);
kvm_release_vm_pages(kvm);
kvm_free_physmem(kvm);
......@@ -1435,17 +1446,24 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
int user_alloc)
{
unsigned long i;
struct page *page;
unsigned long pfn;
int npages = mem->memory_size >> PAGE_SHIFT;
struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot];
unsigned long base_gfn = memslot->base_gfn;
for (i = 0; i < npages; i++) {
page = gfn_to_page(kvm, base_gfn + i);
kvm_set_pmt_entry(kvm, base_gfn + i,
page_to_pfn(page) << PAGE_SHIFT,
_PAGE_AR_RWX|_PAGE_MA_WB);
memslot->rmap[i] = (unsigned long)page;
pfn = gfn_to_pfn(kvm, base_gfn + i);
if (!kvm_is_mmio_pfn(pfn)) {
kvm_set_pmt_entry(kvm, base_gfn + i,
pfn << PAGE_SHIFT,
_PAGE_AR_RWX | _PAGE_MA_WB);
memslot->rmap[i] = (unsigned long)pfn_to_page(pfn);
} else {
kvm_set_pmt_entry(kvm, base_gfn + i,
GPFN_PHYS_MMIO | (pfn << PAGE_SHIFT),
_PAGE_MA_UC);
memslot->rmap[i] = 0;
}
}
return 0;
......@@ -1789,11 +1807,43 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
return -EINVAL;
vcpu_load(vcpu);
mp_state->mp_state = vcpu->arch.mp_state;
vcpu_put(vcpu);
return 0;
}
static int vcpu_reset(struct kvm_vcpu *vcpu)
{
int r;
long psr;
local_irq_save(psr);
r = kvm_insert_vmm_mapping(vcpu);
if (r)
goto fail;
vcpu->arch.launched = 0;
kvm_arch_vcpu_uninit(vcpu);
r = kvm_arch_vcpu_init(vcpu);
if (r)
goto fail;
kvm_purge_vmm_mapping(vcpu);
r = 0;
fail:
local_irq_restore(psr);
return r;
}
int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
return -EINVAL;
int r = 0;
vcpu_load(vcpu);
vcpu->arch.mp_state = mp_state->mp_state;
if (vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)
r = vcpu_reset(vcpu);
vcpu_put(vcpu);
return r;
}
......@@ -50,27 +50,18 @@
#define PAL_VSA_SYNC_READ \
/* begin to call pal vps sync_read */ \
{.mii; \
add r25 = VMM_VPD_BASE_OFFSET, r21; \
adds r20 = VMM_VCPU_VSA_BASE_OFFSET, r21; /* entry point */ \
nop 0x0; \
mov r24=ip; \
;; \
} \
{.mmb \
add r24=0x20, r24; \
ld8 r25 = [r25]; /* read vpd base */ \
ld8 r20 = [r20]; \
;; \
add r20 = PAL_VPS_SYNC_READ,r20; \
;; \
{ .mii; \
nop 0x0; \
mov r24 = ip; \
mov b0 = r20; \
br.cond.sptk kvm_vps_sync_read; /*call the service*/ \
;; \
}; \
{ .mmb; \
add r24 = 0x20, r24; \
nop 0x0; \
br.cond.sptk b0; /* call the service */ \
;; \
};
#define KVM_MINSTATE_GET_CURRENT(reg) mov reg=r21
......
/*
* arch/ia64/vmx/optvfault.S
* arch/ia64/kvm/optvfault.S
* optimize virtualization fault handler
*
* Copyright (C) 2006 Intel Co
* Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
* Copyright (C) 2008 Intel Co
* Add the support for Tukwila processors.
* Xiantao Zhang <xiantao.zhang@intel.com>
*/
#include <asm/asmmacro.h>
......@@ -20,6 +23,98 @@
#define ACCE_MOV_TO_PSR
#define ACCE_THASH
#define VMX_VPS_SYNC_READ \
add r16=VMM_VPD_BASE_OFFSET,r21; \
mov r17 = b0; \
mov r18 = r24; \
mov r19 = r25; \
mov r20 = r31; \
;; \
{.mii; \
ld8 r16 = [r16]; \
nop 0x0; \
mov r24 = ip; \
;; \
}; \
{.mmb; \
add r24=0x20, r24; \
mov r25 =r16; \
br.sptk.many kvm_vps_sync_read; \
}; \
mov b0 = r17; \
mov r24 = r18; \
mov r25 = r19; \
mov r31 = r20
ENTRY(kvm_vps_entry)
adds r29 = VMM_VCPU_VSA_BASE_OFFSET,r21
;;
ld8 r29 = [r29]
;;
add r29 = r29, r30
;;
mov b0 = r29
br.sptk.many b0
END(kvm_vps_entry)
/*
* Inputs:
* r24 : return address
* r25 : vpd
* r29 : scratch
*
*/
GLOBAL_ENTRY(kvm_vps_sync_read)
movl r30 = PAL_VPS_SYNC_READ
;;
br.sptk.many kvm_vps_entry
END(kvm_vps_sync_read)
/*
* Inputs:
* r24 : return address
* r25 : vpd
* r29 : scratch
*
*/
GLOBAL_ENTRY(kvm_vps_sync_write)
movl r30 = PAL_VPS_SYNC_WRITE
;;
br.sptk.many kvm_vps_entry
END(kvm_vps_sync_write)
/*
* Inputs:
* r23 : pr
* r24 : guest b0
* r25 : vpd
*
*/
GLOBAL_ENTRY(kvm_vps_resume_normal)
movl r30 = PAL_VPS_RESUME_NORMAL
;;
mov pr=r23,-2
br.sptk.many kvm_vps_entry
END(kvm_vps_resume_normal)
/*
* Inputs:
* r23 : pr
* r24 : guest b0
* r25 : vpd
* r17 : isr
*/
GLOBAL_ENTRY(kvm_vps_resume_handler)
movl r30 = PAL_VPS_RESUME_HANDLER
;;
ld8 r27=[r25]
shr r17=r17,IA64_ISR_IR_BIT
;;
dep r27=r17,r27,63,1 // bit 63 of r27 indicate whether enable CFLE
mov pr=r23,-2
br.sptk.many kvm_vps_entry
END(kvm_vps_resume_handler)
//mov r1=ar3
GLOBAL_ENTRY(kvm_asm_mov_from_ar)
#ifndef ACCE_MOV_FROM_AR
......@@ -157,11 +252,11 @@ GLOBAL_ENTRY(kvm_asm_rsm)
#ifndef ACCE_RSM
br.many kvm_virtualization_fault_back
#endif
add r16=VMM_VPD_BASE_OFFSET,r21
VMX_VPS_SYNC_READ
;;
extr.u r26=r25,6,21
extr.u r27=r25,31,2
;;
ld8 r16=[r16]
extr.u r28=r25,36,1
dep r26=r27,r26,21,2
;;
......@@ -196,7 +291,7 @@ GLOBAL_ENTRY(kvm_asm_rsm)
tbit.nz p6,p0=r23,0
;;
tbit.z.or p6,p0=r26,IA64_PSR_DT_BIT
(p6) br.dptk kvm_resume_to_guest
(p6) br.dptk kvm_resume_to_guest_with_sync
;;
add r26=VMM_VCPU_META_RR0_OFFSET,r21
add r27=VMM_VCPU_META_RR0_OFFSET+8,r21
......@@ -212,7 +307,7 @@ GLOBAL_ENTRY(kvm_asm_rsm)
mov rr[r28]=r27
;;
srlz.d
br.many kvm_resume_to_guest
br.many kvm_resume_to_guest_with_sync
END(kvm_asm_rsm)
......@@ -221,11 +316,11 @@ GLOBAL_ENTRY(kvm_asm_ssm)
#ifndef ACCE_SSM
br.many kvm_virtualization_fault_back
#endif
add r16=VMM_VPD_BASE_OFFSET,r21
VMX_VPS_SYNC_READ
;;
extr.u r26=r25,6,21
extr.u r27=r25,31,2
;;
ld8 r16=[r16]
extr.u r28=r25,36,1
dep r26=r27,r26,21,2
;; //r26 is imm24
......@@ -271,7 +366,7 @@ kvm_asm_ssm_1:
tbit.nz p6,p0=r29,IA64_PSR_I_BIT
;;
tbit.z.or p6,p0=r19,IA64_PSR_I_BIT
(p6) br.dptk kvm_resume_to_guest
(p6) br.dptk kvm_resume_to_guest_with_sync
;;
add r29=VPD_VTPR_START_OFFSET,r16
add r30=VPD_VHPI_START_OFFSET,r16
......@@ -286,7 +381,7 @@ kvm_asm_ssm_1:
;;
cmp.gt p6,p0=r30,r17
(p6) br.dpnt.few kvm_asm_dispatch_vexirq
br.many kvm_resume_to_guest
br.many kvm_resume_to_guest_with_sync
END(kvm_asm_ssm)
......@@ -295,10 +390,9 @@ GLOBAL_ENTRY(kvm_asm_mov_to_psr)
#ifndef ACCE_MOV_TO_PSR
br.many kvm_virtualization_fault_back
#endif
add r16=VMM_VPD_BASE_OFFSET,r21
extr.u r26=r25,13,7 //r2
VMX_VPS_SYNC_READ
;;
ld8 r16=[r16]
extr.u r26=r25,13,7 //r2
addl r20=@gprel(asm_mov_from_reg),gp
;;
adds r30=kvm_asm_mov_to_psr_back-asm_mov_from_reg,r20
......@@ -374,7 +468,7 @@ kvm_asm_mov_to_psr_1:
;;
tbit.nz.or p6,p0=r17,IA64_PSR_I_BIT
tbit.z.or p6,p0=r30,IA64_PSR_I_BIT
(p6) br.dpnt.few kvm_resume_to_guest
(p6) br.dpnt.few kvm_resume_to_guest_with_sync
;;
add r29=VPD_VTPR_START_OFFSET,r16
add r30=VPD_VHPI_START_OFFSET,r16
......@@ -389,13 +483,29 @@ kvm_asm_mov_to_psr_1:
;;
cmp.gt p6,p0=r30,r17
(p6) br.dpnt.few kvm_asm_dispatch_vexirq
br.many kvm_resume_to_guest
br.many kvm_resume_to_guest_with_sync
END(kvm_asm_mov_to_psr)
ENTRY(kvm_asm_dispatch_vexirq)
//increment iip
mov r17 = b0
mov r18 = r31
{.mii
add r25=VMM_VPD_BASE_OFFSET,r21
nop 0x0
mov r24 = ip
;;
}
{.mmb
add r24 = 0x20, r24
ld8 r25 = [r25]
br.sptk.many kvm_vps_sync_write
}
mov b0 =r17
mov r16=cr.ipsr
mov r31 = r18
mov r19 = 37
;;
extr.u r17=r16,IA64_PSR_RI_BIT,2
tbit.nz p6,p7=r16,IA64_PSR_RI_BIT+1
......@@ -435,25 +545,31 @@ GLOBAL_ENTRY(kvm_asm_thash)
;;
kvm_asm_thash_back1:
shr.u r23=r19,61 // get RR number
adds r25=VMM_VCPU_VRR0_OFFSET,r21 // get vcpu->arch.vrr[0]'s addr
adds r28=VMM_VCPU_VRR0_OFFSET,r21 // get vcpu->arch.vrr[0]'s addr
adds r16=VMM_VPD_VPTA_OFFSET,r16 // get vpta
;;
shladd r27=r23,3,r25 // get vcpu->arch.vrr[r23]'s addr
shladd r27=r23,3,r28 // get vcpu->arch.vrr[r23]'s addr
ld8 r17=[r16] // get PTA
mov r26=1
;;
extr.u r29=r17,2,6 // get pta.size
ld8 r25=[r27] // get vcpu->arch.vrr[r23]'s value
extr.u r29=r17,2,6 // get pta.size
ld8 r28=[r27] // get vcpu->arch.vrr[r23]'s value
;;
extr.u r25=r25,2,6 // get rr.ps
mov b0=r24
//Fallback to C if pta.vf is set
tbit.nz p6,p0=r17, 8
;;
(p6) mov r24=EVENT_THASH
(p6) br.cond.dpnt.many kvm_virtualization_fault_back
extr.u r28=r28,2,6 // get rr.ps
shl r22=r26,r29 // 1UL << pta.size
;;
shr.u r23=r19,r25 // vaddr >> rr.ps
shr.u r23=r19,r28 // vaddr >> rr.ps
adds r26=3,r29 // pta.size + 3
shl r27=r17,3 // pta << 3
;;
shl r23=r23,3 // (vaddr >> rr.ps) << 3
shr.u r27=r27,r26 // (pta << 3) >> (pta.size+3)
shr.u r27=r27,r26 // (pta << 3) >> (pta.size+3)
movl r16=7<<61
;;
adds r22=-1,r22 // (1UL << pta.size) - 1
......@@ -724,6 +840,29 @@ END(asm_mov_from_reg)
* r31: pr
* r24: b0
*/
ENTRY(kvm_resume_to_guest_with_sync)
adds r19=VMM_VPD_BASE_OFFSET,r21
mov r16 = r31
mov r17 = r24
;;
{.mii
ld8 r25 =[r19]
nop 0x0
mov r24 = ip
;;
}
{.mmb
add r24 =0x20, r24
nop 0x0
br.sptk.many kvm_vps_sync_write
}
mov r31 = r16
mov r24 =r17
;;
br.sptk.many kvm_resume_to_guest
END(kvm_resume_to_guest_with_sync)
ENTRY(kvm_resume_to_guest)
adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
;;
......
......@@ -962,9 +962,9 @@ static void kvm_do_resume_op(struct kvm_vcpu *vcpu)
void vmm_transition(struct kvm_vcpu *vcpu)
{
ia64_call_vsa(PAL_VPS_SAVE, (unsigned long)vcpu->arch.vpd,
0, 0, 0, 0, 0, 0);
1, 0, 0, 0, 0, 0);
vmm_trampoline(&vcpu->arch.guest, &vcpu->arch.host);
ia64_call_vsa(PAL_VPS_RESTORE, (unsigned long)vcpu->arch.vpd,
0, 0, 0, 0, 0, 0);
1, 0, 0, 0, 0, 0);
kvm_do_resume_op(vcpu);
}
......@@ -313,21 +313,21 @@ static inline void vcpu_set_tr(struct thash_data *trp, u64 pte, u64 itir,
trp->rid = rid;
}
extern u64 kvm_lookup_mpa(u64 gpfn);
extern u64 kvm_gpa_to_mpa(u64 gpa);
/* Return I/O type if trye */
#define __gpfn_is_io(gpfn) \
({ \
u64 pte, ret = 0; \
pte = kvm_lookup_mpa(gpfn); \
if (!(pte & GPFN_INV_MASK)) \
ret = pte & GPFN_IO_MASK; \
ret; \
})
extern u64 kvm_get_mpt_entry(u64 gpfn);
/* Return I/ */
static inline u64 __gpfn_is_io(u64 gpfn)
{
u64 pte;
pte = kvm_get_mpt_entry(gpfn);
if (!(pte & GPFN_INV_MASK)) {
pte = pte & GPFN_IO_MASK;
if (pte != GPFN_PHYS_MMIO)
return pte;
}
return 0;
}
#endif
#define IA64_NO_FAULT 0
#define IA64_FAULT 1
......
......@@ -1261,11 +1261,6 @@ kvm_rse_clear_invalid:
adds r19=VMM_VPD_VPSR_OFFSET,r18
;;
ld8 r19=[r19] //vpsr
adds r20=VMM_VCPU_VSA_BASE_OFFSET,r21
;;
ld8 r20=[r20]
;;
//vsa_sync_write_start
mov r25=r18
adds r16= VMM_VCPU_GP_OFFSET,r21
;;
......@@ -1274,10 +1269,7 @@ kvm_rse_clear_invalid:
;;
add r24=r24,r16
;;
add r16=PAL_VPS_SYNC_WRITE,r20
;;
mov b0=r16
br.cond.sptk b0 // call the service
br.sptk.many kvm_vps_sync_write // call the service
;;
END(ia64_leave_hypervisor)
// fall through
......@@ -1288,28 +1280,15 @@ GLOBAL_ENTRY(ia64_vmm_entry)
* r17:cr.isr
* r18:vpd
* r19:vpsr
* r20:__vsa_base
* r22:b0
* r23:predicate
*/
mov r24=r22
mov r25=r18
tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT // p1=vpsr.ic
(p1) br.cond.sptk.few kvm_vps_resume_normal
(p2) br.cond.sptk.many kvm_vps_resume_handler
;;
(p1) add r29=PAL_VPS_RESUME_NORMAL,r20
(p1) br.sptk.many ia64_vmm_entry_out
;;
tbit.nz p1,p2 = r17,IA64_ISR_IR_BIT //p1=cr.isr.ir
;;
(p1) add r29=PAL_VPS_RESUME_NORMAL,r20
(p2) add r29=PAL_VPS_RESUME_HANDLER,r20
(p2) ld8 r26=[r25]
;;
ia64_vmm_entry_out:
mov pr=r23,-2
mov b0=r29
;;
br.cond.sptk b0 // call pal service
END(ia64_vmm_entry)
......@@ -1376,6 +1355,9 @@ GLOBAL_ENTRY(vmm_reset_entry)
//set up ipsr, iip, vpd.vpsr, dcr
// For IPSR: it/dt/rt=1, i/ic=1, si=1, vm/bn=1
// For DCR: all bits 0
bsw.0
;;
mov r21 =r13
adds r14=-VMM_PT_REGS_SIZE, r12
;;
movl r6=0x501008826000 // IPSR dt/rt/it:1;i/ic:1, si:1, vm/bn:1
......@@ -1387,12 +1369,6 @@ GLOBAL_ENTRY(vmm_reset_entry)
;;
srlz.i
;;
bsw.0
;;
mov r21 =r13
;;
bsw.1
;;
mov ar.rsc = 0
;;
flushrs
......@@ -1406,12 +1382,9 @@ GLOBAL_ENTRY(vmm_reset_entry)
ld8 r1 = [r20]
;;
mov cr.iip=r4
;;
adds r16=VMM_VPD_BASE_OFFSET,r13
adds r20=VMM_VCPU_VSA_BASE_OFFSET,r13
;;
ld8 r18=[r16]
ld8 r20=[r20]
;;
adds r19=VMM_VPD_VPSR_OFFSET,r18
;;
......
......@@ -390,7 +390,7 @@ void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps)
u64 translate_phy_pte(u64 *pte, u64 itir, u64 va)
{
u64 ps, ps_mask, paddr, maddr;
u64 ps, ps_mask, paddr, maddr, io_mask;
union pte_flags phy_pte;
ps = itir_ps(itir);
......@@ -398,8 +398,9 @@ u64 translate_phy_pte(u64 *pte, u64 itir, u64 va)
phy_pte.val = *pte;
paddr = *pte;
paddr = ((paddr & _PAGE_PPN_MASK) & ps_mask) | (va & ~ps_mask);
maddr = kvm_lookup_mpa(paddr >> PAGE_SHIFT);
if (maddr & GPFN_IO_MASK) {
maddr = kvm_get_mpt_entry(paddr >> PAGE_SHIFT);
io_mask = maddr & GPFN_IO_MASK;
if (io_mask && (io_mask != GPFN_PHYS_MMIO)) {
*pte |= VTLB_PTE_IO;
return -1;
}
......@@ -418,7 +419,7 @@ int thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir,
u64 ifa, int type)
{
u64 ps;
u64 phy_pte;
u64 phy_pte, io_mask, index;
union ia64_rr vrr, mrr;
int ret = 0;
......@@ -426,13 +427,16 @@ int thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir,
vrr.val = vcpu_get_rr(v, ifa);
mrr.val = ia64_get_rr(ifa);
index = (pte & _PAGE_PPN_MASK) >> PAGE_SHIFT;
io_mask = kvm_get_mpt_entry(index) & GPFN_IO_MASK;
phy_pte = translate_phy_pte(&pte, itir, ifa);
/* Ensure WB attribute if pte is related to a normal mem page,
* which is required by vga acceleration since qemu maps shared
* vram buffer with WB.
*/
if (!(pte & VTLB_PTE_IO) && ((pte & _PAGE_MA_MASK) != _PAGE_MA_NAT)) {
if (!(pte & VTLB_PTE_IO) && ((pte & _PAGE_MA_MASK) != _PAGE_MA_NAT) &&
io_mask != GPFN_PHYS_MMIO) {
pte &= ~_PAGE_MA_MASK;
phy_pte &= ~_PAGE_MA_MASK;
}
......@@ -566,12 +570,19 @@ void thash_init(struct thash_cb *hcb, u64 sz)
}
}
u64 kvm_lookup_mpa(u64 gpfn)
u64 kvm_get_mpt_entry(u64 gpfn)
{
u64 *base = (u64 *) KVM_P2M_BASE;
return *(base + gpfn);
}
u64 kvm_lookup_mpa(u64 gpfn)
{
u64 maddr;
maddr = kvm_get_mpt_entry(gpfn);
return maddr&_PAGE_PPN_MASK;
}
u64 kvm_gpa_to_mpa(u64 gpa)
{
u64 pte = kvm_lookup_mpa(gpa >> PAGE_SHIFT);
......
......@@ -81,11 +81,17 @@ struct kvm_vcpu_arch {
struct tlbe shadow_tlb[PPC44x_TLB_SIZE];
/* Pages which are referenced in the shadow TLB. */
struct page *shadow_pages[PPC44x_TLB_SIZE];
/* Copy of the host's TLB. */
struct tlbe host_tlb[PPC44x_TLB_SIZE];
/* Track which TLB entries we've modified in the current exit. */
u8 shadow_tlb_mod[PPC44x_TLB_SIZE];
u32 host_stack;
u32 host_pid;
u32 host_dbcr0;
u32 host_dbcr1;
u32 host_dbcr2;
u32 host_iac[4];
u32 host_msr;
u64 fpr[32];
u32 gpr[32];
......@@ -123,7 +129,11 @@ struct kvm_vcpu_arch {
u32 ivor[16];
u32 ivpr;
u32 pir;
u32 shadow_pid;
u32 pid;
u32 swap_pid;
u32 pvr;
u32 ccr0;
u32 ccr1;
......
......@@ -64,6 +64,10 @@ extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn,
extern void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr,
gva_t eend, u32 asid);
extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode);
extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid);
/* XXX Book E specific */
extern void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i);
extern void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu);
......@@ -92,4 +96,12 @@ static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
kvm_vcpu_block(vcpu);
}
static inline void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid)
{
if (vcpu->arch.pid != new_pid) {
vcpu->arch.pid = new_pid;
vcpu->arch.swap_pid = 1;
}
}
#endif /* __POWERPC_KVM_PPC_H__ */
......@@ -359,8 +359,8 @@ int main(void)
DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack));
DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
DEFINE(VCPU_HOST_TLB, offsetof(struct kvm_vcpu, arch.host_tlb));
DEFINE(VCPU_SHADOW_TLB, offsetof(struct kvm_vcpu, arch.shadow_tlb));
DEFINE(VCPU_SHADOW_MOD, offsetof(struct kvm_vcpu, arch.shadow_tlb_mod));
DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
......@@ -372,7 +372,7 @@ int main(void)
DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5));
DEFINE(VCPU_SPRG6, offsetof(struct kvm_vcpu, arch.sprg6));
DEFINE(VCPU_SPRG7, offsetof(struct kvm_vcpu, arch.sprg7));
DEFINE(VCPU_PID, offsetof(struct kvm_vcpu, arch.pid));
DEFINE(VCPU_SHADOW_PID, offsetof(struct kvm_vcpu, arch.shadow_pid));
DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear));
......
......@@ -19,6 +19,7 @@
#include <linux/types.h>
#include <linux/string.h>
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <linux/highmem.h>
#include <asm/mmu-44x.h>
......@@ -109,7 +110,6 @@ static int kvmppc_44x_tlbe_is_writable(struct tlbe *tlbe)
return tlbe->word2 & (PPC44x_TLB_SW|PPC44x_TLB_UW);
}
/* Must be called with mmap_sem locked for writing. */
static void kvmppc_44x_shadow_release(struct kvm_vcpu *vcpu,
unsigned int index)
{
......@@ -124,6 +124,11 @@ static void kvmppc_44x_shadow_release(struct kvm_vcpu *vcpu,
}
}
void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i)
{
vcpu->arch.shadow_tlb_mod[i] = 1;
}
/* Caller must ensure that the specified guest TLB entry is safe to insert into
* the shadow TLB. */
void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid,
......@@ -142,19 +147,16 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid,
stlbe = &vcpu->arch.shadow_tlb[victim];
/* Get reference to new page. */
down_read(&current->mm->mmap_sem);
new_page = gfn_to_page(vcpu->kvm, gfn);
if (is_error_page(new_page)) {
printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn);
kvm_release_page_clean(new_page);
up_read(&current->mm->mmap_sem);
return;
}
hpaddr = page_to_phys(new_page);
/* Drop reference to old page. */
kvmppc_44x_shadow_release(vcpu, victim);
up_read(&current->mm->mmap_sem);
vcpu->arch.shadow_pages[victim] = new_page;
......@@ -164,27 +166,30 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid,
/* XXX what about AS? */
stlbe->tid = asid & 0xff;
stlbe->tid = !(asid & 0xff);
/* Force TS=1 for all guest mappings. */
/* For now we hardcode 4KB mappings, but it will be important to
* use host large pages in the future. */
stlbe->word0 = (gvaddr & PAGE_MASK) | PPC44x_TLB_VALID | PPC44x_TLB_TS
| PPC44x_TLB_4K;
stlbe->word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf);
stlbe->word2 = kvmppc_44x_tlb_shadow_attrib(flags,
vcpu->arch.msr & MSR_PR);
kvmppc_tlbe_set_modified(vcpu, victim);
KVMTRACE_5D(STLB_WRITE, vcpu, victim,
stlbe->tid, stlbe->word0, stlbe->word1, stlbe->word2,
handler);
}
void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr,
gva_t eend, u32 asid)
{
unsigned int pid = asid & 0xff;
unsigned int pid = !(asid & 0xff);
int i;
/* XXX Replace loop with fancy data structures. */
down_write(&current->mm->mmap_sem);
for (i = 0; i <= tlb_44x_hwater; i++) {
struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i];
unsigned int tid;
......@@ -204,21 +209,35 @@ void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr,
kvmppc_44x_shadow_release(vcpu, i);
stlbe->word0 = 0;
kvmppc_tlbe_set_modified(vcpu, i);
KVMTRACE_5D(STLB_INVAL, vcpu, i,
stlbe->tid, stlbe->word0, stlbe->word1,
stlbe->word2, handler);
}
up_write(&current->mm->mmap_sem);
}
/* Invalidate all mappings, so that when they fault back in they will get the
* proper permission bits. */
/* Invalidate all mappings on the privilege switch after PID has been changed.
* The guest always runs with PID=1, so we must clear the entire TLB when
* switching address spaces. */
void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode)
{
int i;
/* XXX Replace loop with fancy data structures. */
down_write(&current->mm->mmap_sem);
for (i = 0; i <= tlb_44x_hwater; i++) {
kvmppc_44x_shadow_release(vcpu, i);
vcpu->arch.shadow_tlb[i].word0 = 0;
if (vcpu->arch.swap_pid) {
/* XXX Replace loop with fancy data structures. */
for (i = 0; i <= tlb_44x_hwater; i++) {
struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i];
/* Future optimization: clear only userspace mappings. */
kvmppc_44x_shadow_release(vcpu, i);
stlbe->word0 = 0;
kvmppc_tlbe_set_modified(vcpu, i);
KVMTRACE_5D(STLB_INVAL, vcpu, i,
stlbe->tid, stlbe->word0, stlbe->word1,
stlbe->word2, handler);
}
vcpu->arch.swap_pid = 0;
}
up_write(&current->mm->mmap_sem);
vcpu->arch.shadow_pid = !usermode;
}
......@@ -37,6 +37,17 @@ config KVM_BOOKE_HOST
Provides host support for KVM on Book E PowerPC processors. Currently
this works on 440 processors only.
config KVM_TRACE
bool "KVM trace support"
depends on KVM && MARKERS && SYSFS
select RELAY
select DEBUG_FS
default n
---help---
This option allows reading a trace of kvm-related events through
relayfs. Note the ABI is not considered stable and will be
modified in future updates.
source drivers/virtio/Kconfig
endif # VIRTUALIZATION
......@@ -4,9 +4,11 @@
EXTRA_CFLAGS += -Ivirt/kvm -Iarch/powerpc/kvm
common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
kvm-objs := $(common-objs) powerpc.o emulate.o booke_guest.o
common-objs-$(CONFIG_KVM_TRACE) += $(addprefix ../../../virt/kvm/, kvm_trace.o)
kvm-objs := $(common-objs-y) powerpc.o emulate.o booke_guest.o
obj-$(CONFIG_KVM) += kvm.o
AFLAGS_booke_interrupts.o := -I$(obj)
......
......@@ -410,6 +410,21 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
}
case BOOKE_INTERRUPT_DEBUG: {
u32 dbsr;
vcpu->arch.pc = mfspr(SPRN_CSRR0);
/* clear IAC events in DBSR register */
dbsr = mfspr(SPRN_DBSR);
dbsr &= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4;
mtspr(SPRN_DBSR, dbsr);
run->exit_reason = KVM_EXIT_DEBUG;
r = RESUME_HOST;
break;
}
default:
printk(KERN_EMERG "exit_nr %d\n", exit_nr);
BUG();
......@@ -471,6 +486,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
vcpu->arch.msr = 0;
vcpu->arch.gpr[1] = (16<<20) - 8; /* -8 for the callee-save LR slot */
vcpu->arch.shadow_pid = 1;
/* Eye-catching number so we know if the guest takes an interrupt
* before it's programmed its own IVPR. */
vcpu->arch.ivpr = 0x55550000;
......
......@@ -42,7 +42,8 @@
#define HOST_STACK_LR (HOST_STACK_SIZE + 4) /* In caller stack frame. */
#define NEED_INST_MASK ((1<<BOOKE_INTERRUPT_PROGRAM) | \
(1<<BOOKE_INTERRUPT_DTLB_MISS))
(1<<BOOKE_INTERRUPT_DTLB_MISS) | \
(1<<BOOKE_INTERRUPT_DEBUG))
#define NEED_DEAR_MASK ((1<<BOOKE_INTERRUPT_DATA_STORAGE) | \
(1<<BOOKE_INTERRUPT_DTLB_MISS))
......@@ -331,51 +332,57 @@ lightweight_exit:
mfspr r3, SPRN_PID
stw r3, VCPU_HOST_PID(r4)
lwz r3, VCPU_PID(r4)
lwz r3, VCPU_SHADOW_PID(r4)
mtspr SPRN_PID, r3
/* Prevent all TLB updates. */
/* Prevent all asynchronous TLB updates. */
mfmsr r5
lis r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@h
ori r6, r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@l
andc r6, r5, r6
mtmsr r6
/* Save the host's non-pinned TLB mappings, and load the guest mappings
* over them. Leave the host's "pinned" kernel mappings in place. */
/* XXX optimization: use generation count to avoid swapping unmodified
* entries. */
/* Load the guest mappings, leaving the host's "pinned" kernel mappings
* in place. */
mfspr r10, SPRN_MMUCR /* Save host MMUCR. */
lis r8, tlb_44x_hwater@ha
lwz r8, tlb_44x_hwater@l(r8)
addi r3, r4, VCPU_HOST_TLB - 4
addi r9, r4, VCPU_SHADOW_TLB - 4
li r6, 0
li r5, PPC44x_TLB_SIZE
lis r5, tlb_44x_hwater@ha
lwz r5, tlb_44x_hwater@l(r5)
mtctr r5
addi r9, r4, VCPU_SHADOW_TLB
addi r5, r4, VCPU_SHADOW_MOD
li r3, 0
1:
/* Save host entry. */
tlbre r7, r6, PPC44x_TLB_PAGEID
mfspr r5, SPRN_MMUCR
stwu r5, 4(r3)
stwu r7, 4(r3)
tlbre r7, r6, PPC44x_TLB_XLAT
stwu r7, 4(r3)
tlbre r7, r6, PPC44x_TLB_ATTRIB
stwu r7, 4(r3)
lbzx r7, r3, r5
cmpwi r7, 0
beq 3f
/* Load guest entry. */
lwzu r7, 4(r9)
mulli r11, r3, TLBE_BYTES
add r11, r11, r9
lwz r7, 0(r11)
mtspr SPRN_MMUCR, r7
lwzu r7, 4(r9)
tlbwe r7, r6, PPC44x_TLB_PAGEID
lwzu r7, 4(r9)
tlbwe r7, r6, PPC44x_TLB_XLAT
lwzu r7, 4(r9)
tlbwe r7, r6, PPC44x_TLB_ATTRIB
/* Increment index. */
addi r6, r6, 1
cmpw r6, r8
blt 1b
lwz r7, 4(r11)
tlbwe r7, r3, PPC44x_TLB_PAGEID
lwz r7, 8(r11)
tlbwe r7, r3, PPC44x_TLB_XLAT
lwz r7, 12(r11)
tlbwe r7, r3, PPC44x_TLB_ATTRIB
3:
addi r3, r3, 1 /* Increment index. */
bdnz 1b
mtspr SPRN_MMUCR, r10 /* Restore host MMUCR. */
/* Clear bitmap of modified TLB entries */
li r5, PPC44x_TLB_SIZE>>2
mtctr r5
addi r5, r4, VCPU_SHADOW_MOD - 4
li r6, 0
1:
stwu r6, 4(r5)
bdnz 1b
iccci 0, 0 /* XXX hack */
/* Load some guest volatiles. */
......@@ -431,6 +438,14 @@ lightweight_exit:
oris r3, r3, KVMPPC_MSR_MASK@h
ori r3, r3, KVMPPC_MSR_MASK@l
mtsrr1 r3
/* Clear any debug events which occurred since we disabled MSR[DE].
* XXX This gives us a 3-instruction window in which a breakpoint
* intended for guest context could fire in the host instead. */
lis r3, 0xffff
ori r3, r3, 0xffff
mtspr SPRN_DBSR, r3
lwz r3, VCPU_GPR(r3)(r4)
lwz r4, VCPU_GPR(r4)(r4)
rfi
......@@ -170,6 +170,10 @@ static int kvmppc_emul_tlbwe(struct kvm_vcpu *vcpu, u32 inst)
kvmppc_mmu_map(vcpu, eaddr, raddr >> PAGE_SHIFT, asid, flags);
}
KVMTRACE_5D(GTLB_WRITE, vcpu, index,
tlbe->tid, tlbe->word0, tlbe->word1, tlbe->word2,
handler);
return EMULATE_DONE;
}
......@@ -504,7 +508,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
case SPRN_MMUCR:
vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break;
case SPRN_PID:
vcpu->arch.pid = vcpu->arch.gpr[rs]; break;
kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break;
case SPRN_CCR0:
vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break;
case SPRN_CCR1:
......@@ -765,6 +769,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
break;
}
KVMTRACE_3D(PPC_INSTR, vcpu, inst, vcpu->arch.pc, emulated, entryexit);
if (advance)
vcpu->arch.pc += 4; /* Advance past emulated instruction. */
......
......@@ -27,6 +27,7 @@
#include <asm/cputable.h>
#include <asm/uaccess.h>
#include <asm/kvm_ppc.h>
#include <asm/tlbflush.h>
gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
......@@ -239,18 +240,114 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
{
}
/* Note: clearing MSR[DE] just means that the debug interrupt will not be
* delivered *immediately*. Instead, it simply sets the appropriate DBSR bits.
* If those DBSR bits are still set when MSR[DE] is re-enabled, the interrupt
* will be delivered as an "imprecise debug event" (which is indicated by
* DBSR[IDE].
*/
static void kvmppc_disable_debug_interrupts(void)
{
mtmsr(mfmsr() & ~MSR_DE);
}
static void kvmppc_restore_host_debug_state(struct kvm_vcpu *vcpu)
{
kvmppc_disable_debug_interrupts();
mtspr(SPRN_IAC1, vcpu->arch.host_iac[0]);
mtspr(SPRN_IAC2, vcpu->arch.host_iac[1]);
mtspr(SPRN_IAC3, vcpu->arch.host_iac[2]);
mtspr(SPRN_IAC4, vcpu->arch.host_iac[3]);
mtspr(SPRN_DBCR1, vcpu->arch.host_dbcr1);
mtspr(SPRN_DBCR2, vcpu->arch.host_dbcr2);
mtspr(SPRN_DBCR0, vcpu->arch.host_dbcr0);
mtmsr(vcpu->arch.host_msr);
}
static void kvmppc_load_guest_debug_registers(struct kvm_vcpu *vcpu)
{
struct kvm_guest_debug *dbg = &vcpu->guest_debug;
u32 dbcr0 = 0;
vcpu->arch.host_msr = mfmsr();
kvmppc_disable_debug_interrupts();
/* Save host debug register state. */
vcpu->arch.host_iac[0] = mfspr(SPRN_IAC1);
vcpu->arch.host_iac[1] = mfspr(SPRN_IAC2);
vcpu->arch.host_iac[2] = mfspr(SPRN_IAC3);
vcpu->arch.host_iac[3] = mfspr(SPRN_IAC4);
vcpu->arch.host_dbcr0 = mfspr(SPRN_DBCR0);
vcpu->arch.host_dbcr1 = mfspr(SPRN_DBCR1);
vcpu->arch.host_dbcr2 = mfspr(SPRN_DBCR2);
/* set registers up for guest */
if (dbg->bp[0]) {
mtspr(SPRN_IAC1, dbg->bp[0]);
dbcr0 |= DBCR0_IAC1 | DBCR0_IDM;
}
if (dbg->bp[1]) {
mtspr(SPRN_IAC2, dbg->bp[1]);
dbcr0 |= DBCR0_IAC2 | DBCR0_IDM;
}
if (dbg->bp[2]) {
mtspr(SPRN_IAC3, dbg->bp[2]);
dbcr0 |= DBCR0_IAC3 | DBCR0_IDM;
}
if (dbg->bp[3]) {
mtspr(SPRN_IAC4, dbg->bp[3]);
dbcr0 |= DBCR0_IAC4 | DBCR0_IDM;
}
mtspr(SPRN_DBCR0, dbcr0);
mtspr(SPRN_DBCR1, 0);
mtspr(SPRN_DBCR2, 0);
}
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
int i;
if (vcpu->guest_debug.enabled)
kvmppc_load_guest_debug_registers(vcpu);
/* Mark every guest entry in the shadow TLB entry modified, so that they
* will all be reloaded on the next vcpu run (instead of being
* demand-faulted). */
for (i = 0; i <= tlb_44x_hwater; i++)
kvmppc_tlbe_set_modified(vcpu, i);
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
if (vcpu->guest_debug.enabled)
kvmppc_restore_host_debug_state(vcpu);
/* Don't leave guest TLB entries resident when being de-scheduled. */
/* XXX It would be nice to differentiate between heavyweight exit and
* sched_out here, since we could avoid the TLB flush for heavyweight
* exits. */
_tlbia();
}
int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
struct kvm_debug_guest *dbg)
{
return -ENOTSUPP;
int i;
vcpu->guest_debug.enabled = dbg->enabled;
if (vcpu->guest_debug.enabled) {
for (i=0; i < ARRAY_SIZE(vcpu->guest_debug.bp); i++) {
if (dbg->breakpoints[i].enabled)
vcpu->guest_debug.bp[i] = dbg->breakpoints[i].address;
else
vcpu->guest_debug.bp[i] = 0;
}
}
return 0;
}
static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu,
......
......@@ -565,13 +565,16 @@ config ZFCPDUMP
Refer to <file:Documentation/s390/zfcpdump.txt> for more details on this.
config S390_GUEST
bool "s390 guest support (EXPERIMENTAL)"
bool "s390 guest support for KVM (EXPERIMENTAL)"
depends on 64BIT && EXPERIMENTAL
select VIRTIO
select VIRTIO_RING
select VIRTIO_CONSOLE
help
Select this option if you want to run the kernel under s390 linux
Select this option if you want to run the kernel as a guest under
the KVM hypervisor. This will add detection for KVM as well as a
virtio transport. If KVM is detected, the virtio console will be
the default console.
endmenu
source "net/Kconfig"
......
......@@ -157,8 +157,8 @@ static int handle_stfl(struct kvm_vcpu *vcpu)
int rc;
vcpu->stat.instruction_stfl++;
facility_list &= ~(1UL<<24); /* no stfle */
facility_list &= ~(1UL<<23); /* no large pages */
/* only pass the facility bits, which we can handle */
facility_list &= 0xfe00fff3;
rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list),
&facility_list, sizeof(facility_list));
......
......@@ -78,6 +78,34 @@ static cycle_t kvm_clock_read(void)
return ret;
}
/*
* If we don't do that, there is the possibility that the guest
* will calibrate under heavy load - thus, getting a lower lpj -
* and execute the delays themselves without load. This is wrong,
* because no delay loop can finish beforehand.
* Any heuristics is subject to fail, because ultimately, a large
* poll of guests can be running and trouble each other. So we preset
* lpj here
*/
static unsigned long kvm_get_tsc_khz(void)
{
return preset_lpj;
}
static void kvm_get_preset_lpj(void)
{
struct pvclock_vcpu_time_info *src;
unsigned long khz;
u64 lpj;
src = &per_cpu(hv_clock, 0);
khz = pvclock_tsc_khz(src);
lpj = ((u64)khz * 1000);
do_div(lpj, HZ);
preset_lpj = lpj;
}
static struct clocksource kvm_clock = {
.name = "kvm-clock",
.read = kvm_clock_read,
......@@ -153,6 +181,7 @@ void __init kvmclock_init(void)
pv_time_ops.get_wallclock = kvm_get_wallclock;
pv_time_ops.set_wallclock = kvm_set_wallclock;
pv_time_ops.sched_clock = kvm_clock_read;
pv_time_ops.get_tsc_khz = kvm_get_tsc_khz;
#ifdef CONFIG_X86_LOCAL_APIC
pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock;
#endif
......@@ -163,6 +192,7 @@ void __init kvmclock_init(void)
#ifdef CONFIG_KEXEC
machine_ops.crash_shutdown = kvm_crash_shutdown;
#endif
kvm_get_preset_lpj();
clocksource_register(&kvm_clock);
}
}
......@@ -97,6 +97,18 @@ static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst,
return dst->version;
}
unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src)
{
u64 pv_tsc_khz = 1000000ULL << 32;
do_div(pv_tsc_khz, src->tsc_to_system_mul);
if (src->tsc_shift < 0)
pv_tsc_khz <<= -src->tsc_shift;
else
pv_tsc_khz >>= src->tsc_shift;
return pv_tsc_khz;
}
cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
{
struct pvclock_shadow_time shadow;
......
......@@ -3,10 +3,13 @@
#
common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
coalesced_mmio.o)
coalesced_mmio.o irq_comm.o)
ifeq ($(CONFIG_KVM_TRACE),y)
common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o)
endif
ifeq ($(CONFIG_DMAR),y)
common-objs += $(addprefix ../../../virt/kvm/, vtd.o)
endif
EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
......
......@@ -200,13 +200,14 @@ static int __pit_timer_fn(struct kvm_kpit_state *ps)
if (!atomic_inc_and_test(&pt->pending))
set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests);
if (vcpu0 && waitqueue_active(&vcpu0->wq)) {
vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE;
if (vcpu0 && waitqueue_active(&vcpu0->wq))
wake_up_interruptible(&vcpu0->wq);
}
pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period);
pt->scheduled = ktime_to_ns(pt->timer.expires);
if (pt->period)
ps->channels[0].count_load_time = pt->timer.expires;
return (pt->period == 0 ? 0 : 1);
}
......@@ -215,12 +216,22 @@ int pit_has_pending_timer(struct kvm_vcpu *vcpu)
{
struct kvm_pit *pit = vcpu->kvm->arch.vpit;
if (pit && vcpu->vcpu_id == 0 && pit->pit_state.inject_pending)
if (pit && vcpu->vcpu_id == 0 && pit->pit_state.irq_ack)
return atomic_read(&pit->pit_state.pit_timer.pending);
return 0;
}
static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
{
struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state,
irq_ack_notifier);
spin_lock(&ps->inject_lock);
if (atomic_dec_return(&ps->pit_timer.pending) < 0)
atomic_inc(&ps->pit_timer.pending);
ps->irq_ack = 1;
spin_unlock(&ps->inject_lock);
}
static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
{
struct kvm_kpit_state *ps;
......@@ -255,8 +266,9 @@ static void destroy_pit_timer(struct kvm_kpit_timer *pt)
hrtimer_cancel(&pt->timer);
}
static void create_pit_timer(struct kvm_kpit_timer *pt, u32 val, int is_period)
static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period)
{
struct kvm_kpit_timer *pt = &ps->pit_timer;
s64 interval;
interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ);
......@@ -268,6 +280,7 @@ static void create_pit_timer(struct kvm_kpit_timer *pt, u32 val, int is_period)
pt->period = (is_period == 0) ? 0 : interval;
pt->timer.function = pit_timer_fn;
atomic_set(&pt->pending, 0);
ps->irq_ack = 1;
hrtimer_start(&pt->timer, ktime_add_ns(ktime_get(), interval),
HRTIMER_MODE_ABS);
......@@ -302,11 +315,11 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val)
case 1:
/* FIXME: enhance mode 4 precision */
case 4:
create_pit_timer(&ps->pit_timer, val, 0);
create_pit_timer(ps, val, 0);
break;
case 2:
case 3:
create_pit_timer(&ps->pit_timer, val, 1);
create_pit_timer(ps, val, 1);
break;
default:
destroy_pit_timer(&ps->pit_timer);
......@@ -520,7 +533,7 @@ void kvm_pit_reset(struct kvm_pit *pit)
mutex_unlock(&pit->pit_state.lock);
atomic_set(&pit->pit_state.pit_timer.pending, 0);
pit->pit_state.inject_pending = 1;
pit->pit_state.irq_ack = 1;
}
struct kvm_pit *kvm_create_pit(struct kvm *kvm)
......@@ -534,6 +547,7 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm)
mutex_init(&pit->pit_state.lock);
mutex_lock(&pit->pit_state.lock);
spin_lock_init(&pit->pit_state.inject_lock);
/* Initialize PIO device */
pit->dev.read = pit_ioport_read;
......@@ -555,6 +569,9 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm)
pit_state->pit = pit;
hrtimer_init(&pit_state->pit_timer.timer,
CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
pit_state->irq_ack_notifier.gsi = 0;
pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq;
kvm_register_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier);
mutex_unlock(&pit->pit_state.lock);
kvm_pit_reset(pit);
......@@ -578,10 +595,8 @@ void kvm_free_pit(struct kvm *kvm)
static void __inject_pit_timer_intr(struct kvm *kvm)
{
mutex_lock(&kvm->lock);
kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 1);
kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 0);
kvm_pic_set_irq(pic_irqchip(kvm), 0, 1);
kvm_pic_set_irq(pic_irqchip(kvm), 0, 0);
kvm_set_irq(kvm, 0, 1);
kvm_set_irq(kvm, 0, 0);
mutex_unlock(&kvm->lock);
}
......@@ -592,37 +607,19 @@ void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
struct kvm_kpit_state *ps;
if (vcpu && pit) {
int inject = 0;
ps = &pit->pit_state;
/* Try to inject pending interrupts when:
* 1. Pending exists
* 2. Last interrupt was accepted or waited for too long time*/
if (atomic_read(&ps->pit_timer.pending) &&
(ps->inject_pending ||
(jiffies - ps->last_injected_time
>= KVM_MAX_PIT_INTR_INTERVAL))) {
ps->inject_pending = 0;
__inject_pit_timer_intr(kvm);
ps->last_injected_time = jiffies;
}
}
}
void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
{
struct kvm_arch *arch = &vcpu->kvm->arch;
struct kvm_kpit_state *ps;
if (vcpu && arch->vpit) {
ps = &arch->vpit->pit_state;
if (atomic_read(&ps->pit_timer.pending) &&
(((arch->vpic->pics[0].imr & 1) == 0 &&
arch->vpic->pics[0].irq_base == vec) ||
(arch->vioapic->redirtbl[0].fields.vector == vec &&
arch->vioapic->redirtbl[0].fields.mask != 1))) {
ps->inject_pending = 1;
atomic_dec(&ps->pit_timer.pending);
ps->channels[0].count_load_time = ktime_get();
/* Try to inject pending interrupts when
* last one has been acked.
*/
spin_lock(&ps->inject_lock);
if (atomic_read(&ps->pit_timer.pending) && ps->irq_ack) {
ps->irq_ack = 0;
inject = 1;
}
spin_unlock(&ps->inject_lock);
if (inject)
__inject_pit_timer_intr(kvm);
}
}
......@@ -8,7 +8,6 @@ struct kvm_kpit_timer {
int irq;
s64 period; /* unit: ns */
s64 scheduled;
ktime_t last_update;
atomic_t pending;
};
......@@ -34,8 +33,9 @@ struct kvm_kpit_state {
u32 speaker_data_on;
struct mutex lock;
struct kvm_pit *pit;
bool inject_pending; /* if inject pending interrupts */
unsigned long last_injected_time;
spinlock_t inject_lock;
unsigned long irq_ack;
struct kvm_irq_ack_notifier irq_ack_notifier;
};
struct kvm_pit {
......@@ -54,7 +54,6 @@ struct kvm_pit {
#define KVM_PIT_CHANNEL_MASK 0x3
void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu);
void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val);
struct kvm_pit *kvm_create_pit(struct kvm *kvm);
void kvm_free_pit(struct kvm *kvm);
......
......@@ -30,6 +30,19 @@
#include <linux/kvm_host.h>
static void pic_clear_isr(struct kvm_kpic_state *s, int irq)
{
s->isr &= ~(1 << irq);
s->isr_ack |= (1 << irq);
}
void kvm_pic_clear_isr_ack(struct kvm *kvm)
{
struct kvm_pic *s = pic_irqchip(kvm);
s->pics[0].isr_ack = 0xff;
s->pics[1].isr_ack = 0xff;
}
/*
* set irq level. If an edge is detected, then the IRR is set to 1
*/
......@@ -141,11 +154,12 @@ void kvm_pic_set_irq(void *opaque, int irq, int level)
*/
static inline void pic_intack(struct kvm_kpic_state *s, int irq)
{
s->isr |= 1 << irq;
if (s->auto_eoi) {
if (s->rotate_on_auto_eoi)
s->priority_add = (irq + 1) & 7;
} else
s->isr |= (1 << irq);
pic_clear_isr(s, irq);
}
/*
* We don't clear a level sensitive interrupt here
*/
......@@ -153,9 +167,10 @@ static inline void pic_intack(struct kvm_kpic_state *s, int irq)
s->irr &= ~(1 << irq);
}
int kvm_pic_read_irq(struct kvm_pic *s)
int kvm_pic_read_irq(struct kvm *kvm)
{
int irq, irq2, intno;
struct kvm_pic *s = pic_irqchip(kvm);
irq = pic_get_irq(&s->pics[0]);
if (irq >= 0) {
......@@ -181,16 +196,32 @@ int kvm_pic_read_irq(struct kvm_pic *s)
intno = s->pics[0].irq_base + irq;
}
pic_update_irq(s);
kvm_notify_acked_irq(kvm, irq);
return intno;
}
void kvm_pic_reset(struct kvm_kpic_state *s)
{
int irq, irqbase;
struct kvm *kvm = s->pics_state->irq_request_opaque;
struct kvm_vcpu *vcpu0 = kvm->vcpus[0];
if (s == &s->pics_state->pics[0])
irqbase = 0;
else
irqbase = 8;
for (irq = 0; irq < PIC_NUM_PINS/2; irq++) {
if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0))
if (s->irr & (1 << irq) || s->isr & (1 << irq))
kvm_notify_acked_irq(kvm, irq+irqbase);
}
s->last_irr = 0;
s->irr = 0;
s->imr = 0;
s->isr = 0;
s->isr_ack = 0xff;
s->priority_add = 0;
s->irq_base = 0;
s->read_reg_select = 0;
......@@ -243,7 +274,7 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val)
priority = get_priority(s, s->isr);
if (priority != 8) {
irq = (priority + s->priority_add) & 7;
s->isr &= ~(1 << irq);
pic_clear_isr(s, irq);
if (cmd == 5)
s->priority_add = (irq + 1) & 7;
pic_update_irq(s->pics_state);
......@@ -251,7 +282,7 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val)
break;
case 3:
irq = val & 7;
s->isr &= ~(1 << irq);
pic_clear_isr(s, irq);
pic_update_irq(s->pics_state);
break;
case 6:
......@@ -260,8 +291,8 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val)
break;
case 7:
irq = val & 7;
s->isr &= ~(1 << irq);
s->priority_add = (irq + 1) & 7;
pic_clear_isr(s, irq);
pic_update_irq(s->pics_state);
break;
default:
......@@ -303,7 +334,7 @@ static u32 pic_poll_read(struct kvm_kpic_state *s, u32 addr1)
s->pics_state->pics[0].irr &= ~(1 << 2);
}
s->irr &= ~(1 << ret);
s->isr &= ~(1 << ret);
pic_clear_isr(s, ret);
if (addr1 >> 7 || ret != 2)
pic_update_irq(s->pics_state);
} else {
......@@ -422,10 +453,14 @@ static void pic_irq_request(void *opaque, int level)
{
struct kvm *kvm = opaque;
struct kvm_vcpu *vcpu = kvm->vcpus[0];
struct kvm_pic *s = pic_irqchip(kvm);
int irq = pic_get_irq(&s->pics[0]);
pic_irqchip(kvm)->output = level;
if (vcpu)
s->output = level;
if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) {
s->pics[0].isr_ack &= ~(1 << irq);
kvm_vcpu_kick(vcpu);
}
}
struct kvm_pic *kvm_create_pic(struct kvm *kvm)
......
......@@ -72,7 +72,7 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
if (kvm_apic_accept_pic_intr(v)) {
s = pic_irqchip(v->kvm);
s->output = 0; /* PIC */
vector = kvm_pic_read_irq(s);
vector = kvm_pic_read_irq(v->kvm);
}
}
return vector;
......@@ -90,7 +90,6 @@ EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
{
kvm_apic_timer_intr_post(vcpu, vec);
kvm_pit_timer_intr_post(vcpu, vec);
/* TODO: PIT, RTC etc. */
}
EXPORT_SYMBOL_GPL(kvm_timer_intr_post);
......
......@@ -42,6 +42,7 @@ struct kvm_kpic_state {
u8 irr; /* interrupt request register */
u8 imr; /* interrupt mask register */
u8 isr; /* interrupt service register */
u8 isr_ack; /* interrupt ack detection */
u8 priority_add; /* highest irq priority */
u8 irq_base;
u8 read_reg_select;
......@@ -63,12 +64,13 @@ struct kvm_pic {
void *irq_request_opaque;
int output; /* intr from master PIC */
struct kvm_io_device dev;
void (*ack_notifier)(void *opaque, int irq);
};
struct kvm_pic *kvm_create_pic(struct kvm *kvm);
void kvm_pic_set_irq(void *opaque, int irq, int level);
int kvm_pic_read_irq(struct kvm_pic *s);
int kvm_pic_read_irq(struct kvm *kvm);
void kvm_pic_update_irq(struct kvm_pic *s);
void kvm_pic_clear_isr_ack(struct kvm *kvm);
static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
{
......
#ifndef ASM_KVM_CACHE_REGS_H
#define ASM_KVM_CACHE_REGS_H
static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu,
enum kvm_reg reg)
{
if (!test_bit(reg, (unsigned long *)&vcpu->arch.regs_avail))
kvm_x86_ops->cache_reg(vcpu, reg);
return vcpu->arch.regs[reg];
}
static inline void kvm_register_write(struct kvm_vcpu *vcpu,
enum kvm_reg reg,
unsigned long val)
{
vcpu->arch.regs[reg] = val;
__set_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
__set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
}
static inline unsigned long kvm_rip_read(struct kvm_vcpu *vcpu)
{
return kvm_register_read(vcpu, VCPU_REGS_RIP);
}
static inline void kvm_rip_write(struct kvm_vcpu *vcpu, unsigned long val)
{
kvm_register_write(vcpu, VCPU_REGS_RIP, val);
}
#endif
......@@ -32,6 +32,7 @@
#include <asm/current.h>
#include <asm/apicdef.h>
#include <asm/atomic.h>
#include "kvm_cache_regs.h"
#include "irq.h"
#define PRId64 "d"
......@@ -338,13 +339,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
} else
apic_clear_vector(vector, apic->regs + APIC_TMR);
if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
kvm_vcpu_kick(vcpu);
else if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) {
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
if (waitqueue_active(&vcpu->wq))
wake_up_interruptible(&vcpu->wq);
}
kvm_vcpu_kick(vcpu);
result = (orig_irr == 0);
break;
......@@ -370,21 +365,18 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
kvm_vcpu_kick(vcpu);
} else {
printk(KERN_DEBUG
"Ignoring de-assert INIT to vcpu %d\n",
vcpu->vcpu_id);
apic_debug("Ignoring de-assert INIT to vcpu %d\n",
vcpu->vcpu_id);
}
break;
case APIC_DM_STARTUP:
printk(KERN_DEBUG "SIPI to vcpu %d vector 0x%02x\n",
vcpu->vcpu_id, vector);
apic_debug("SIPI to vcpu %d vector 0x%02x\n",
vcpu->vcpu_id, vector);
if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
vcpu->arch.sipi_vector = vector;
vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED;
if (waitqueue_active(&vcpu->wq))
wake_up_interruptible(&vcpu->wq);
kvm_vcpu_kick(vcpu);
}
break;
......@@ -438,7 +430,7 @@ struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector,
static void apic_set_eoi(struct kvm_lapic *apic)
{
int vector = apic_find_highest_isr(apic);
int trigger_mode;
/*
* Not every write EOI will has corresponding ISR,
* one example is when Kernel check timer on setup_IO_APIC
......@@ -450,7 +442,10 @@ static void apic_set_eoi(struct kvm_lapic *apic)
apic_update_ppr(apic);
if (apic_test_and_clear_vector(vector, apic->regs + APIC_TMR))
kvm_ioapic_update_eoi(apic->vcpu->kvm, vector);
trigger_mode = IOAPIC_LEVEL_TRIG;
else
trigger_mode = IOAPIC_EDGE_TRIG;
kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
}
static void apic_send_ipi(struct kvm_lapic *apic)
......@@ -558,8 +553,7 @@ static void __report_tpr_access(struct kvm_lapic *apic, bool write)
struct kvm_run *run = vcpu->run;
set_bit(KVM_REQ_REPORT_TPR_ACCESS, &vcpu->requests);
kvm_x86_ops->cache_regs(vcpu);
run->tpr_access.rip = vcpu->arch.rip;
run->tpr_access.rip = kvm_rip_read(vcpu);
run->tpr_access.is_write = write;
}
......@@ -683,9 +677,9 @@ static void apic_mmio_write(struct kvm_io_device *this,
* Refer SDM 8.4.1
*/
if (len != 4 || alignment) {
if (printk_ratelimit())
printk(KERN_ERR "apic write: bad size=%d %lx\n",
len, (long)address);
/* Don't shout loud, $infamous_os would cause only noise. */
apic_debug("apic write: bad size=%d %lx\n",
len, (long)address);
return;
}
......@@ -947,10 +941,9 @@ static int __apic_timer_fn(struct kvm_lapic *apic)
if(!atomic_inc_and_test(&apic->timer.pending))
set_bit(KVM_REQ_PENDING_TIMER, &apic->vcpu->requests);
if (waitqueue_active(q)) {
apic->vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
if (waitqueue_active(q))
wake_up_interruptible(q);
}
if (apic_lvtt_period(apic)) {
result = 1;
apic->timer.dev.expires = ktime_add_ns(
......
此差异已折叠。
......@@ -25,11 +25,11 @@
#if PTTYPE == 64
#define pt_element_t u64
#define guest_walker guest_walker64
#define shadow_walker shadow_walker64
#define FNAME(name) paging##64_##name
#define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
#define PT_DIR_BASE_ADDR_MASK PT64_DIR_BASE_ADDR_MASK
#define PT_INDEX(addr, level) PT64_INDEX(addr, level)
#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
#define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level)
#define PT_LEVEL_BITS PT64_LEVEL_BITS
#ifdef CONFIG_X86_64
......@@ -42,11 +42,11 @@
#elif PTTYPE == 32
#define pt_element_t u32
#define guest_walker guest_walker32
#define shadow_walker shadow_walker32
#define FNAME(name) paging##32_##name
#define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK
#define PT_DIR_BASE_ADDR_MASK PT32_DIR_BASE_ADDR_MASK
#define PT_INDEX(addr, level) PT32_INDEX(addr, level)
#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
#define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level)
#define PT_LEVEL_BITS PT32_LEVEL_BITS
#define PT_MAX_FULL_LEVELS 2
......@@ -73,6 +73,17 @@ struct guest_walker {
u32 error_code;
};
struct shadow_walker {
struct kvm_shadow_walk walker;
struct guest_walker *guest_walker;
int user_fault;
int write_fault;
int largepage;
int *ptwrite;
pfn_t pfn;
u64 *sptep;
};
static gfn_t gpte_to_gfn(pt_element_t gpte)
{
return (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
......@@ -91,14 +102,10 @@ static bool FNAME(cmpxchg_gpte)(struct kvm *kvm,
pt_element_t *table;
struct page *page;
down_read(&current->mm->mmap_sem);
page = gfn_to_page(kvm, table_gfn);
up_read(&current->mm->mmap_sem);
table = kmap_atomic(page, KM_USER0);
ret = CMPXCHG(&table[index], orig_pte, new_pte);
kunmap_atomic(table, KM_USER0);
kvm_release_page_dirty(page);
......@@ -274,86 +281,89 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
/*
* Fetch a shadow pte for a specific level in the paging hierarchy.
*/
static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
struct guest_walker *walker,
int user_fault, int write_fault, int largepage,
int *ptwrite, pfn_t pfn)
static int FNAME(shadow_walk_entry)(struct kvm_shadow_walk *_sw,
struct kvm_vcpu *vcpu, u64 addr,
u64 *sptep, int level)
{
hpa_t shadow_addr;
int level;
u64 *shadow_ent;
unsigned access = walker->pt_access;
if (!is_present_pte(walker->ptes[walker->level - 1]))
return NULL;
shadow_addr = vcpu->arch.mmu.root_hpa;
level = vcpu->arch.mmu.shadow_root_level;
if (level == PT32E_ROOT_LEVEL) {
shadow_addr = vcpu->arch.mmu.pae_root[(addr >> 30) & 3];
shadow_addr &= PT64_BASE_ADDR_MASK;
--level;
struct shadow_walker *sw =
container_of(_sw, struct shadow_walker, walker);
struct guest_walker *gw = sw->guest_walker;
unsigned access = gw->pt_access;
struct kvm_mmu_page *shadow_page;
u64 spte;
int metaphysical;
gfn_t table_gfn;
int r;
pt_element_t curr_pte;
if (level == PT_PAGE_TABLE_LEVEL
|| (sw->largepage && level == PT_DIRECTORY_LEVEL)) {
mmu_set_spte(vcpu, sptep, access, gw->pte_access & access,
sw->user_fault, sw->write_fault,
gw->ptes[gw->level-1] & PT_DIRTY_MASK,
sw->ptwrite, sw->largepage, gw->gfn, sw->pfn,
false);
sw->sptep = sptep;
return 1;
}
for (; ; level--) {
u32 index = SHADOW_PT_INDEX(addr, level);
struct kvm_mmu_page *shadow_page;
u64 shadow_pte;
int metaphysical;
gfn_t table_gfn;
shadow_ent = ((u64 *)__va(shadow_addr)) + index;
if (level == PT_PAGE_TABLE_LEVEL)
break;
if (largepage && level == PT_DIRECTORY_LEVEL)
break;
if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep))
return 0;
if (is_shadow_present_pte(*shadow_ent)
&& !is_large_pte(*shadow_ent)) {
shadow_addr = *shadow_ent & PT64_BASE_ADDR_MASK;
continue;
}
if (is_large_pte(*sptep)) {
set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
kvm_flush_remote_tlbs(vcpu->kvm);
rmap_remove(vcpu->kvm, sptep);
}
if (is_large_pte(*shadow_ent))
rmap_remove(vcpu->kvm, shadow_ent);
if (level - 1 == PT_PAGE_TABLE_LEVEL
&& walker->level == PT_DIRECTORY_LEVEL) {
metaphysical = 1;
if (!is_dirty_pte(walker->ptes[level - 1]))
access &= ~ACC_WRITE_MASK;
table_gfn = gpte_to_gfn(walker->ptes[level - 1]);
} else {
metaphysical = 0;
table_gfn = walker->table_gfn[level - 2];
}
shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
metaphysical, access,
shadow_ent);
if (!metaphysical) {
int r;
pt_element_t curr_pte;
r = kvm_read_guest_atomic(vcpu->kvm,
walker->pte_gpa[level - 2],
&curr_pte, sizeof(curr_pte));
if (r || curr_pte != walker->ptes[level - 2]) {
kvm_release_pfn_clean(pfn);
return NULL;
}
if (level == PT_DIRECTORY_LEVEL && gw->level == PT_DIRECTORY_LEVEL) {
metaphysical = 1;
if (!is_dirty_pte(gw->ptes[level - 1]))
access &= ~ACC_WRITE_MASK;
table_gfn = gpte_to_gfn(gw->ptes[level - 1]);
} else {
metaphysical = 0;
table_gfn = gw->table_gfn[level - 2];
}
shadow_page = kvm_mmu_get_page(vcpu, table_gfn, (gva_t)addr, level-1,
metaphysical, access, sptep);
if (!metaphysical) {
r = kvm_read_guest_atomic(vcpu->kvm, gw->pte_gpa[level - 2],
&curr_pte, sizeof(curr_pte));
if (r || curr_pte != gw->ptes[level - 2]) {
kvm_release_pfn_clean(sw->pfn);
sw->sptep = NULL;
return 1;
}
shadow_addr = __pa(shadow_page->spt);
shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK
| PT_WRITABLE_MASK | PT_USER_MASK;
set_shadow_pte(shadow_ent, shadow_pte);
}
mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access,
user_fault, write_fault,
walker->ptes[walker->level-1] & PT_DIRTY_MASK,
ptwrite, largepage, walker->gfn, pfn, false);
spte = __pa(shadow_page->spt) | PT_PRESENT_MASK | PT_ACCESSED_MASK
| PT_WRITABLE_MASK | PT_USER_MASK;
*sptep = spte;
return 0;
}
static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
struct guest_walker *guest_walker,
int user_fault, int write_fault, int largepage,
int *ptwrite, pfn_t pfn)
{
struct shadow_walker walker = {
.walker = { .entry = FNAME(shadow_walk_entry), },
.guest_walker = guest_walker,
.user_fault = user_fault,
.write_fault = write_fault,
.largepage = largepage,
.ptwrite = ptwrite,
.pfn = pfn,
};
if (!is_present_pte(guest_walker->ptes[guest_walker->level - 1]))
return NULL;
walk_shadow(&walker.walker, vcpu, addr);
return shadow_ent;
return walker.sptep;
}
/*
......@@ -407,7 +417,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
return 0;
}
down_read(&current->mm->mmap_sem);
if (walker.level == PT_DIRECTORY_LEVEL) {
gfn_t large_gfn;
large_gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE-1);
......@@ -417,9 +426,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
}
}
mmu_seq = vcpu->kvm->mmu_notifier_seq;
/* implicit mb(), we'll read before PT lock is unlocked */
smp_rmb();
pfn = gfn_to_pfn(vcpu->kvm, walker.gfn);
up_read(&current->mm->mmap_sem);
/* mmio */
if (is_error_pfn(pfn)) {
......@@ -453,6 +461,31 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
return 0;
}
static int FNAME(shadow_invlpg_entry)(struct kvm_shadow_walk *_sw,
struct kvm_vcpu *vcpu, u64 addr,
u64 *sptep, int level)
{
if (level == PT_PAGE_TABLE_LEVEL) {
if (is_shadow_present_pte(*sptep))
rmap_remove(vcpu->kvm, sptep);
set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
return 1;
}
if (!is_shadow_present_pte(*sptep))
return 1;
return 0;
}
static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
{
struct shadow_walker walker = {
.walker = { .entry = FNAME(shadow_invlpg_entry), },
};
walk_shadow(&walker.walker, vcpu, gva);
}
static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
{
struct guest_walker walker;
......@@ -499,12 +532,66 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
}
}
/*
* Using the cached information from sp->gfns is safe because:
* - The spte has a reference to the struct page, so the pfn for a given gfn
* can't change unless all sptes pointing to it are nuked first.
* - Alias changes zap the entire shadow cache.
*/
static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
{
int i, offset, nr_present;
offset = nr_present = 0;
if (PTTYPE == 32)
offset = sp->role.quadrant << PT64_LEVEL_BITS;
for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
unsigned pte_access;
pt_element_t gpte;
gpa_t pte_gpa;
gfn_t gfn = sp->gfns[i];
if (!is_shadow_present_pte(sp->spt[i]))
continue;
pte_gpa = gfn_to_gpa(sp->gfn);
pte_gpa += (i+offset) * sizeof(pt_element_t);
if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte,
sizeof(pt_element_t)))
return -EINVAL;
if (gpte_to_gfn(gpte) != gfn || !is_present_pte(gpte) ||
!(gpte & PT_ACCESSED_MASK)) {
u64 nonpresent;
rmap_remove(vcpu->kvm, &sp->spt[i]);
if (is_present_pte(gpte))
nonpresent = shadow_trap_nonpresent_pte;
else
nonpresent = shadow_notrap_nonpresent_pte;
set_shadow_pte(&sp->spt[i], nonpresent);
continue;
}
nr_present++;
pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
is_dirty_pte(gpte), 0, gfn,
spte_to_pfn(sp->spt[i]), true, false);
}
return !nr_present;
}
#undef pt_element_t
#undef guest_walker
#undef shadow_walker
#undef FNAME
#undef PT_BASE_ADDR_MASK
#undef PT_INDEX
#undef SHADOW_PT_INDEX
#undef PT_LEVEL_MASK
#undef PT_DIR_BASE_ADDR_MASK
#undef PT_LEVEL_BITS
......
......@@ -18,6 +18,7 @@
#include "kvm_svm.h"
#include "irq.h"
#include "mmu.h"
#include "kvm_cache_regs.h"
#include <linux/module.h>
#include <linux/kernel.h>
......@@ -35,10 +36,6 @@ MODULE_LICENSE("GPL");
#define IOPM_ALLOC_ORDER 2
#define MSRPM_ALLOC_ORDER 1
#define DB_VECTOR 1
#define UD_VECTOR 6
#define GP_VECTOR 13
#define DR7_GD_MASK (1 << 13)
#define DR6_BD_MASK (1 << 13)
......@@ -47,7 +44,7 @@ MODULE_LICENSE("GPL");
#define SVM_FEATURE_NPT (1 << 0)
#define SVM_FEATURE_LBRV (1 << 1)
#define SVM_DEATURE_SVML (1 << 2)
#define SVM_FEATURE_SVML (1 << 2)
#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
......@@ -236,13 +233,11 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
printk(KERN_DEBUG "%s: NOP\n", __func__);
return;
}
if (svm->next_rip - svm->vmcb->save.rip > MAX_INST_SIZE)
printk(KERN_ERR "%s: ip 0x%llx next 0x%llx\n",
__func__,
svm->vmcb->save.rip,
svm->next_rip);
if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE)
printk(KERN_ERR "%s: ip 0x%lx next 0x%llx\n",
__func__, kvm_rip_read(vcpu), svm->next_rip);
vcpu->arch.rip = svm->vmcb->save.rip = svm->next_rip;
kvm_rip_write(vcpu, svm->next_rip);
svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
vcpu->arch.interrupt_window_open = 1;
......@@ -530,6 +525,7 @@ static void init_vmcb(struct vcpu_svm *svm)
(1ULL << INTERCEPT_CPUID) |
(1ULL << INTERCEPT_INVD) |
(1ULL << INTERCEPT_HLT) |
(1ULL << INTERCEPT_INVLPG) |
(1ULL << INTERCEPT_INVLPGA) |
(1ULL << INTERCEPT_IOIO_PROT) |
(1ULL << INTERCEPT_MSR_PROT) |
......@@ -581,6 +577,7 @@ static void init_vmcb(struct vcpu_svm *svm)
save->dr7 = 0x400;
save->rflags = 2;
save->rip = 0x0000fff0;
svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip;
/*
* cr0 val on cpu init should be 0x60000010, we enable cpu
......@@ -593,7 +590,8 @@ static void init_vmcb(struct vcpu_svm *svm)
if (npt_enabled) {
/* Setup VMCB for Nested Paging */
control->nested_ctl = 1;
control->intercept &= ~(1ULL << INTERCEPT_TASK_SWITCH);
control->intercept &= ~((1ULL << INTERCEPT_TASK_SWITCH) |
(1ULL << INTERCEPT_INVLPG));
control->intercept_exceptions &= ~(1 << PF_VECTOR);
control->intercept_cr_read &= ~(INTERCEPT_CR0_MASK|
INTERCEPT_CR3_MASK);
......@@ -615,10 +613,12 @@ static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
init_vmcb(svm);
if (vcpu->vcpu_id != 0) {
svm->vmcb->save.rip = 0;
kvm_rip_write(vcpu, 0);
svm->vmcb->save.cs.base = svm->vcpu.arch.sipi_vector << 12;
svm->vmcb->save.cs.selector = svm->vcpu.arch.sipi_vector << 8;
}
vcpu->arch.regs_avail = ~0;
vcpu->arch.regs_dirty = ~0;
return 0;
}
......@@ -721,23 +721,6 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu)
rdtscll(vcpu->arch.host_tsc);
}
static void svm_cache_regs(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
vcpu->arch.rip = svm->vmcb->save.rip;
}
static void svm_decache_regs(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
svm->vmcb->save.rip = vcpu->arch.rip;
}
static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
{
return to_svm(vcpu)->vmcb->save.rflags;
......@@ -1040,7 +1023,7 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
if (npt_enabled)
svm_flush_tlb(&svm->vcpu);
if (event_injection)
if (!npt_enabled && event_injection)
kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
}
......@@ -1139,14 +1122,14 @@ static int nop_on_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
static int halt_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
{
svm->next_rip = svm->vmcb->save.rip + 1;
svm->next_rip = kvm_rip_read(&svm->vcpu) + 1;
skip_emulated_instruction(&svm->vcpu);
return kvm_emulate_halt(&svm->vcpu);
}
static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
{
svm->next_rip = svm->vmcb->save.rip + 3;
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
skip_emulated_instruction(&svm->vcpu);
kvm_emulate_hypercall(&svm->vcpu);
return 1;
......@@ -1178,11 +1161,18 @@ static int task_switch_interception(struct vcpu_svm *svm,
static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
{
svm->next_rip = svm->vmcb->save.rip + 2;
svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
kvm_emulate_cpuid(&svm->vcpu);
return 1;
}
static int invlpg_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
{
if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0, 0) != EMULATE_DONE)
pr_unimpl(&svm->vcpu, "%s: failed\n", __func__);
return 1;
}
static int emulate_on_interception(struct vcpu_svm *svm,
struct kvm_run *kvm_run)
{
......@@ -1273,9 +1263,9 @@ static int rdmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
KVMTRACE_3D(MSR_READ, &svm->vcpu, ecx, (u32)data,
(u32)(data >> 32), handler);
svm->vmcb->save.rax = data & 0xffffffff;
svm->vcpu.arch.regs[VCPU_REGS_RAX] = data & 0xffffffff;
svm->vcpu.arch.regs[VCPU_REGS_RDX] = data >> 32;
svm->next_rip = svm->vmcb->save.rip + 2;
svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
skip_emulated_instruction(&svm->vcpu);
}
return 1;
......@@ -1359,13 +1349,13 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
{
u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
u64 data = (svm->vmcb->save.rax & -1u)
u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u)
| ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32);
KVMTRACE_3D(MSR_WRITE, &svm->vcpu, ecx, (u32)data, (u32)(data >> 32),
handler);
svm->next_rip = svm->vmcb->save.rip + 2;
svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
if (svm_set_msr(&svm->vcpu, ecx, data))
kvm_inject_gp(&svm->vcpu, 0);
else
......@@ -1436,7 +1426,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
[SVM_EXIT_CPUID] = cpuid_interception,
[SVM_EXIT_INVD] = emulate_on_interception,
[SVM_EXIT_HLT] = halt_interception,
[SVM_EXIT_INVLPG] = emulate_on_interception,
[SVM_EXIT_INVLPG] = invlpg_interception,
[SVM_EXIT_INVLPGA] = invalid_op_interception,
[SVM_EXIT_IOIO] = io_interception,
[SVM_EXIT_MSR] = msr_interception,
......@@ -1538,6 +1528,7 @@ static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
KVMTRACE_1D(INJ_VIRQ, &svm->vcpu, (u32)irq, handler);
++svm->vcpu.stat.irq_injections;
control = &svm->vmcb->control;
control->int_vector = irq;
control->int_ctl &= ~V_INTR_PRIO_MASK;
......@@ -1716,6 +1707,12 @@ static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu)
svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK;
}
#ifdef CONFIG_X86_64
#define R "r"
#else
#define R "e"
#endif
static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
struct vcpu_svm *svm = to_svm(vcpu);
......@@ -1723,6 +1720,10 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
u16 gs_selector;
u16 ldt_selector;
svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
pre_svm_run(svm);
sync_lapic_to_cr8(vcpu);
......@@ -1750,19 +1751,14 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
local_irq_enable();
asm volatile (
"push %%"R"bp; \n\t"
"mov %c[rbx](%[svm]), %%"R"bx \n\t"
"mov %c[rcx](%[svm]), %%"R"cx \n\t"
"mov %c[rdx](%[svm]), %%"R"dx \n\t"
"mov %c[rsi](%[svm]), %%"R"si \n\t"
"mov %c[rdi](%[svm]), %%"R"di \n\t"
"mov %c[rbp](%[svm]), %%"R"bp \n\t"
#ifdef CONFIG_X86_64
"push %%rbp; \n\t"
#else
"push %%ebp; \n\t"
#endif
#ifdef CONFIG_X86_64
"mov %c[rbx](%[svm]), %%rbx \n\t"
"mov %c[rcx](%[svm]), %%rcx \n\t"
"mov %c[rdx](%[svm]), %%rdx \n\t"
"mov %c[rsi](%[svm]), %%rsi \n\t"
"mov %c[rdi](%[svm]), %%rdi \n\t"
"mov %c[rbp](%[svm]), %%rbp \n\t"
"mov %c[r8](%[svm]), %%r8 \n\t"
"mov %c[r9](%[svm]), %%r9 \n\t"
"mov %c[r10](%[svm]), %%r10 \n\t"
......@@ -1771,41 +1767,24 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
"mov %c[r13](%[svm]), %%r13 \n\t"
"mov %c[r14](%[svm]), %%r14 \n\t"
"mov %c[r15](%[svm]), %%r15 \n\t"
#else
"mov %c[rbx](%[svm]), %%ebx \n\t"
"mov %c[rcx](%[svm]), %%ecx \n\t"
"mov %c[rdx](%[svm]), %%edx \n\t"
"mov %c[rsi](%[svm]), %%esi \n\t"
"mov %c[rdi](%[svm]), %%edi \n\t"
"mov %c[rbp](%[svm]), %%ebp \n\t"
#endif
#ifdef CONFIG_X86_64
/* Enter guest mode */
"push %%rax \n\t"
"mov %c[vmcb](%[svm]), %%rax \n\t"
__ex(SVM_VMLOAD) "\n\t"
__ex(SVM_VMRUN) "\n\t"
__ex(SVM_VMSAVE) "\n\t"
"pop %%rax \n\t"
#else
/* Enter guest mode */
"push %%eax \n\t"
"mov %c[vmcb](%[svm]), %%eax \n\t"
"push %%"R"ax \n\t"
"mov %c[vmcb](%[svm]), %%"R"ax \n\t"
__ex(SVM_VMLOAD) "\n\t"
__ex(SVM_VMRUN) "\n\t"
__ex(SVM_VMSAVE) "\n\t"
"pop %%eax \n\t"
#endif
"pop %%"R"ax \n\t"
/* Save guest registers, load host registers */
"mov %%"R"bx, %c[rbx](%[svm]) \n\t"
"mov %%"R"cx, %c[rcx](%[svm]) \n\t"
"mov %%"R"dx, %c[rdx](%[svm]) \n\t"
"mov %%"R"si, %c[rsi](%[svm]) \n\t"
"mov %%"R"di, %c[rdi](%[svm]) \n\t"
"mov %%"R"bp, %c[rbp](%[svm]) \n\t"
#ifdef CONFIG_X86_64
"mov %%rbx, %c[rbx](%[svm]) \n\t"
"mov %%rcx, %c[rcx](%[svm]) \n\t"
"mov %%rdx, %c[rdx](%[svm]) \n\t"
"mov %%rsi, %c[rsi](%[svm]) \n\t"
"mov %%rdi, %c[rdi](%[svm]) \n\t"
"mov %%rbp, %c[rbp](%[svm]) \n\t"
"mov %%r8, %c[r8](%[svm]) \n\t"
"mov %%r9, %c[r9](%[svm]) \n\t"
"mov %%r10, %c[r10](%[svm]) \n\t"
......@@ -1814,18 +1793,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
"mov %%r13, %c[r13](%[svm]) \n\t"
"mov %%r14, %c[r14](%[svm]) \n\t"
"mov %%r15, %c[r15](%[svm]) \n\t"
"pop %%rbp; \n\t"
#else
"mov %%ebx, %c[rbx](%[svm]) \n\t"
"mov %%ecx, %c[rcx](%[svm]) \n\t"
"mov %%edx, %c[rdx](%[svm]) \n\t"
"mov %%esi, %c[rsi](%[svm]) \n\t"
"mov %%edi, %c[rdi](%[svm]) \n\t"
"mov %%ebp, %c[rbp](%[svm]) \n\t"
"pop %%ebp; \n\t"
#endif
"pop %%"R"bp"
:
: [svm]"a"(svm),
[vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)),
......@@ -1846,11 +1815,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
[r15]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R15]))
#endif
: "cc", "memory"
, R"bx", R"cx", R"dx", R"si", R"di"
#ifdef CONFIG_X86_64
, "rbx", "rcx", "rdx", "rsi", "rdi"
, "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
#else
, "ebx", "ecx", "edx" , "esi", "edi"
#endif
);
......@@ -1858,6 +1825,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
load_db_regs(svm->host_db_regs);
vcpu->arch.cr2 = svm->vmcb->save.cr2;
vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
write_dr6(svm->host_dr6);
write_dr7(svm->host_dr7);
......@@ -1879,6 +1849,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
svm->next_rip = 0;
}
#undef R
static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
{
struct vcpu_svm *svm = to_svm(vcpu);
......@@ -1977,8 +1949,6 @@ static struct kvm_x86_ops svm_x86_ops = {
.set_gdt = svm_set_gdt,
.get_dr = svm_get_dr,
.set_dr = svm_set_dr,
.cache_regs = svm_cache_regs,
.decache_regs = svm_decache_regs,
.get_rflags = svm_get_rflags,
.set_rflags = svm_set_rflags,
......
此差异已折叠。
......@@ -331,9 +331,6 @@ enum vmcs_field {
#define AR_RESERVD_MASK 0xfffe0f00
#define MSR_IA32_FEATURE_CONTROL_LOCKED 0x1
#define MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED 0x4
#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 9
#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT 10
......
此差异已折叠。
#ifndef ARCH_X86_KVM_X86_H
#define ARCH_X86_KVM_X86_H
#include <linux/kvm_host.h>
static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
{
vcpu->arch.exception.pending = false;
}
static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector)
{
vcpu->arch.interrupt.pending = true;
vcpu->arch.interrupt.nr = vector;
}
static inline void kvm_clear_interrupt_queue(struct kvm_vcpu *vcpu)
{
vcpu->arch.interrupt.pending = false;
}
#endif
......@@ -26,6 +26,7 @@
#define DPRINTF(_f, _a ...) printf(_f , ## _a)
#else
#include <linux/kvm_host.h>
#include "kvm_cache_regs.h"
#define DPRINTF(x...) do {} while (0)
#endif
#include <linux/module.h>
......@@ -46,25 +47,26 @@
#define ImplicitOps (1<<1) /* Implicit in opcode. No generic decode. */
#define DstReg (2<<1) /* Register operand. */
#define DstMem (3<<1) /* Memory operand. */
#define DstMask (3<<1)
#define DstAcc (4<<1) /* Destination Accumulator */
#define DstMask (7<<1)
/* Source operand type. */
#define SrcNone (0<<3) /* No source operand. */
#define SrcImplicit (0<<3) /* Source operand is implicit in the opcode. */
#define SrcReg (1<<3) /* Register operand. */
#define SrcMem (2<<3) /* Memory operand. */
#define SrcMem16 (3<<3) /* Memory operand (16-bit). */
#define SrcMem32 (4<<3) /* Memory operand (32-bit). */
#define SrcImm (5<<3) /* Immediate operand. */
#define SrcImmByte (6<<3) /* 8-bit sign-extended immediate operand. */
#define SrcMask (7<<3)
#define SrcNone (0<<4) /* No source operand. */
#define SrcImplicit (0<<4) /* Source operand is implicit in the opcode. */
#define SrcReg (1<<4) /* Register operand. */
#define SrcMem (2<<4) /* Memory operand. */
#define SrcMem16 (3<<4) /* Memory operand (16-bit). */
#define SrcMem32 (4<<4) /* Memory operand (32-bit). */
#define SrcImm (5<<4) /* Immediate operand. */
#define SrcImmByte (6<<4) /* 8-bit sign-extended immediate operand. */
#define SrcMask (7<<4)
/* Generic ModRM decode. */
#define ModRM (1<<6)
#define ModRM (1<<7)
/* Destination is only written; never read. */
#define Mov (1<<7)
#define BitOp (1<<8)
#define MemAbs (1<<9) /* Memory operand is absolute displacement */
#define String (1<<10) /* String instruction (rep capable) */
#define Stack (1<<11) /* Stack instruction (push/pop) */
#define Mov (1<<8)
#define BitOp (1<<9)
#define MemAbs (1<<10) /* Memory operand is absolute displacement */
#define String (1<<12) /* String instruction (rep capable) */
#define Stack (1<<13) /* Stack instruction (push/pop) */
#define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */
#define GroupDual (1<<15) /* Alternate decoding of mod == 3 */
#define GroupMask 0xff /* Group number stored in bits 0:7 */
......@@ -94,7 +96,7 @@ static u16 opcode_table[256] = {
/* 0x20 - 0x27 */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
SrcImmByte, SrcImm, 0, 0,
DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0,
/* 0x28 - 0x2F */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
......@@ -106,7 +108,8 @@ static u16 opcode_table[256] = {
/* 0x38 - 0x3F */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
0, 0, 0, 0,
ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
0, 0,
/* 0x40 - 0x47 */
DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
/* 0x48 - 0x4F */
......@@ -153,9 +156,16 @@ static u16 opcode_table[256] = {
0, 0, ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String,
ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String,
ByteOp | ImplicitOps | String, ImplicitOps | String,
/* 0xB0 - 0xBF */
0, 0, 0, 0, 0, 0, 0, 0,
DstReg | SrcImm | Mov, 0, 0, 0, 0, 0, 0, 0,
/* 0xB0 - 0xB7 */
ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
/* 0xB8 - 0xBF */
DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
/* 0xC0 - 0xC7 */
ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM,
0, ImplicitOps | Stack, 0, 0,
......@@ -169,17 +179,20 @@ static u16 opcode_table[256] = {
/* 0xD8 - 0xDF */
0, 0, 0, 0, 0, 0, 0, 0,
/* 0xE0 - 0xE7 */
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0,
SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
/* 0xE8 - 0xEF */
ImplicitOps | Stack, SrcImm | ImplicitOps,
ImplicitOps, SrcImmByte | ImplicitOps,
0, 0, 0, 0,
SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
/* 0xF0 - 0xF7 */
0, 0, 0, 0,
ImplicitOps, ImplicitOps, Group | Group3_Byte, Group | Group3,
/* 0xF8 - 0xFF */
ImplicitOps, 0, ImplicitOps, ImplicitOps,
0, 0, Group | Group4, Group | Group5,
ImplicitOps, ImplicitOps, Group | Group4, Group | Group5,
};
static u16 twobyte_table[256] = {
......@@ -268,15 +281,16 @@ static u16 group_table[] = {
ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM,
0, 0, 0, 0,
[Group3*8] =
DstMem | SrcImm | ModRM | SrcImm, 0,
DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM,
DstMem | SrcImm | ModRM, 0,
DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM,
0, 0, 0, 0,
[Group4*8] =
ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM,
0, 0, 0, 0, 0, 0,
[Group5*8] =
DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, 0, 0,
SrcMem | ModRM, 0, SrcMem | ModRM | Stack, 0,
DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM,
SrcMem | ModRM | Stack, 0,
SrcMem | ModRM | Stack, 0, SrcMem | ModRM | Stack, 0,
[Group7*8] =
0, 0, ModRM | SrcMem, ModRM | SrcMem,
SrcNone | ModRM | DstMem | Mov, 0,
......@@ -839,7 +853,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
/* Shadow copy of register state. Committed on successful emulation. */
memset(c, 0, sizeof(struct decode_cache));
c->eip = ctxt->vcpu->arch.rip;
c->eip = kvm_rip_read(ctxt->vcpu);
ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS);
memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
......@@ -1048,6 +1062,23 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
}
c->dst.type = OP_MEM;
break;
case DstAcc:
c->dst.type = OP_REG;
c->dst.bytes = c->op_bytes;
c->dst.ptr = &c->regs[VCPU_REGS_RAX];
switch (c->op_bytes) {
case 1:
c->dst.val = *(u8 *)c->dst.ptr;
break;
case 2:
c->dst.val = *(u16 *)c->dst.ptr;
break;
case 4:
c->dst.val = *(u32 *)c->dst.ptr;
break;
}
c->dst.orig_val = c->dst.val;
break;
}
if (c->rip_relative)
......@@ -1151,6 +1182,14 @@ static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt,
case 1: /* dec */
emulate_1op("dec", c->dst, ctxt->eflags);
break;
case 2: /* call near abs */ {
long int old_eip;
old_eip = c->eip;
c->eip = c->src.val;
c->src.val = old_eip;
emulate_push(ctxt);
break;
}
case 4: /* jmp abs */
c->eip = c->src.val;
break;
......@@ -1251,6 +1290,8 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
u64 msr_data;
unsigned long saved_eip = 0;
struct decode_cache *c = &ctxt->decode;
unsigned int port;
int io_dir_in;
int rc = 0;
/* Shadow copy of register state. Committed on successful emulation.
......@@ -1267,7 +1308,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
if (c->rep_prefix && (c->d & String)) {
/* All REP prefixes have the same first termination condition */
if (c->regs[VCPU_REGS_RCX] == 0) {
ctxt->vcpu->arch.rip = c->eip;
kvm_rip_write(ctxt->vcpu, c->eip);
goto done;
}
/* The second termination condition only applies for REPE
......@@ -1281,17 +1322,17 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
(c->b == 0xae) || (c->b == 0xaf)) {
if ((c->rep_prefix == REPE_PREFIX) &&
((ctxt->eflags & EFLG_ZF) == 0)) {
ctxt->vcpu->arch.rip = c->eip;
kvm_rip_write(ctxt->vcpu, c->eip);
goto done;
}
if ((c->rep_prefix == REPNE_PREFIX) &&
((ctxt->eflags & EFLG_ZF) == EFLG_ZF)) {
ctxt->vcpu->arch.rip = c->eip;
kvm_rip_write(ctxt->vcpu, c->eip);
goto done;
}
}
c->regs[VCPU_REGS_RCX]--;
c->eip = ctxt->vcpu->arch.rip;
c->eip = kvm_rip_read(ctxt->vcpu);
}
if (c->src.type == OP_MEM) {
......@@ -1351,27 +1392,10 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
sbb: /* sbb */
emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags);
break;
case 0x20 ... 0x23:
case 0x20 ... 0x25:
and: /* and */
emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags);
break;
case 0x24: /* and al imm8 */
c->dst.type = OP_REG;
c->dst.ptr = &c->regs[VCPU_REGS_RAX];
c->dst.val = *(u8 *)c->dst.ptr;
c->dst.bytes = 1;
c->dst.orig_val = c->dst.val;
goto and;
case 0x25: /* and ax imm16, or eax imm32 */
c->dst.type = OP_REG;
c->dst.bytes = c->op_bytes;
c->dst.ptr = &c->regs[VCPU_REGS_RAX];
if (c->op_bytes == 2)
c->dst.val = *(u16 *)c->dst.ptr;
else
c->dst.val = *(u32 *)c->dst.ptr;
c->dst.orig_val = c->dst.val;
goto and;
case 0x28 ... 0x2d:
sub: /* sub */
emulate_2op_SrcV("sub", c->src, c->dst, ctxt->eflags);
......@@ -1659,7 +1683,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
case 0xae ... 0xaf: /* scas */
DPRINTF("Urk! I don't handle SCAS.\n");
goto cannot_emulate;
case 0xb8: /* mov r, imm */
case 0xb0 ... 0xbf: /* mov r, imm */
goto mov;
case 0xc0 ... 0xc1:
emulate_grp2(ctxt);
......@@ -1679,6 +1703,16 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
c->src.val = c->regs[VCPU_REGS_RCX];
emulate_grp2(ctxt);
break;
case 0xe4: /* inb */
case 0xe5: /* in */
port = insn_fetch(u8, 1, c->eip);
io_dir_in = 1;
goto do_io;
case 0xe6: /* outb */
case 0xe7: /* out */
port = insn_fetch(u8, 1, c->eip);
io_dir_in = 0;
goto do_io;
case 0xe8: /* call (near) */ {
long int rel;
switch (c->op_bytes) {
......@@ -1729,6 +1763,22 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
jmp_rel(c, c->src.val);
c->dst.type = OP_NONE; /* Disable writeback. */
break;
case 0xec: /* in al,dx */
case 0xed: /* in (e/r)ax,dx */
port = c->regs[VCPU_REGS_RDX];
io_dir_in = 1;
goto do_io;
case 0xee: /* out al,dx */
case 0xef: /* out (e/r)ax,dx */
port = c->regs[VCPU_REGS_RDX];
io_dir_in = 0;
do_io: if (kvm_emulate_pio(ctxt->vcpu, NULL, io_dir_in,
(c->d & ByteOp) ? 1 : c->op_bytes,
port) != 0) {
c->eip = saved_eip;
goto cannot_emulate;
}
return 0;
case 0xf4: /* hlt */
ctxt->vcpu->arch.halt_request = 1;
break;
......@@ -1754,6 +1804,14 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
ctxt->eflags |= X86_EFLAGS_IF;
c->dst.type = OP_NONE; /* Disable writeback. */
break;
case 0xfc: /* cld */
ctxt->eflags &= ~EFLG_DF;
c->dst.type = OP_NONE; /* Disable writeback. */
break;
case 0xfd: /* std */
ctxt->eflags |= EFLG_DF;
c->dst.type = OP_NONE; /* Disable writeback. */
break;
case 0xfe ... 0xff: /* Grp4/Grp5 */
rc = emulate_grp45(ctxt, ops);
if (rc != 0)
......@@ -1768,7 +1826,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
/* Commit shadow register state. */
memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs);
ctxt->vcpu->arch.rip = c->eip;
kvm_rip_write(ctxt->vcpu, c->eip);
done:
if (rc == X86EMUL_UNHANDLEABLE) {
......@@ -1793,7 +1851,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
goto done;
/* Let the processor re-execute the fixed hypercall */
c->eip = ctxt->vcpu->arch.rip;
c->eip = kvm_rip_read(ctxt->vcpu);
/* Disable writeback. */
c->dst.type = OP_NONE;
break;
......@@ -1889,7 +1947,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
rc = kvm_set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data);
if (rc) {
kvm_inject_gp(ctxt->vcpu, 0);
c->eip = ctxt->vcpu->arch.rip;
c->eip = kvm_rip_read(ctxt->vcpu);
}
rc = X86EMUL_CONTINUE;
c->dst.type = OP_NONE;
......@@ -1899,7 +1957,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
rc = kvm_get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data);
if (rc) {
kvm_inject_gp(ctxt->vcpu, 0);
c->eip = ctxt->vcpu->arch.rip;
c->eip = kvm_rip_read(ctxt->vcpu);
} else {
c->regs[VCPU_REGS_RAX] = (u32)msr_data;
c->regs[VCPU_REGS_RDX] = msr_data >> 32;
......
......@@ -198,17 +198,10 @@ unsigned long long xen_sched_clock(void)
/* Get the TSC speed from Xen */
unsigned long xen_tsc_khz(void)
{
u64 xen_khz = 1000000ULL << 32;
const struct pvclock_vcpu_time_info *info =
struct pvclock_vcpu_time_info *info =
&HYPERVISOR_shared_info->vcpu_info[0].time;
do_div(xen_khz, info->tsc_to_system_mul);
if (info->tsc_shift < 0)
xen_khz <<= -info->tsc_shift;
else
xen_khz >>= info->tsc_shift;
return xen_khz;
return pvclock_tsc_khz(info);
}
cycle_t xen_clocksource_read(void)
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
#include "intel-iommu.h"
#include <linux/intel-iommu.h>
struct ioapic_scope {
struct intel_iommu *iommu;
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册