提交 e61cf2e3 编写于 作者: L Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull first set of KVM updates from Paolo Bonzini:
 "PPC:
   - minor code cleanups

  x86:
   - PCID emulation and CR3 caching for shadow page tables
   - nested VMX live migration
   - nested VMCS shadowing
   - optimized IPI hypercall
   - some optimizations

  ARM will come next week"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (85 commits)
  kvm: x86: Set highest physical address bits in non-present/reserved SPTEs
  KVM/x86: Use CC_SET()/CC_OUT in arch/x86/kvm/vmx.c
  KVM: X86: Implement PV IPIs in linux guest
  KVM: X86: Add kvm hypervisor init time platform setup callback
  KVM: X86: Implement "send IPI" hypercall
  KVM/x86: Move X86_CR4_OSXSAVE check into kvm_valid_sregs()
  KVM: x86: Skip pae_root shadow allocation if tdp enabled
  KVM/MMU: Combine flushing remote tlb in mmu_set_spte()
  KVM: vmx: skip VMWRITE of HOST_{FS,GS}_BASE when possible
  KVM: vmx: skip VMWRITE of HOST_{FS,GS}_SEL when possible
  KVM: vmx: always initialize HOST_{FS,GS}_BASE to zero during setup
  KVM: vmx: move struct host_state usage to struct loaded_vmcs
  KVM: vmx: compute need to reload FS/GS/LDT on demand
  KVM: nVMX: remove a misleading comment regarding vmcs02 fields
  KVM: vmx: rename __vmx_load_host_state() and vmx_save_host_state()
  KVM: vmx: add dedicated utility to access guest's kernel_gs_base
  KVM: vmx: track host_state.loaded using a loaded_vmcs pointer
  KVM: vmx: refactor segmentation code in vmx_save_host_state()
  kvm: nVMX: Fix fault priority for VMX operations
  kvm: nVMX: Fix fault vector for VMX operation at CPL > 0
  ...
...@@ -3561,6 +3561,62 @@ Returns: 0 on success, ...@@ -3561,6 +3561,62 @@ Returns: 0 on success,
-ENOENT on deassign if the conn_id isn't registered -ENOENT on deassign if the conn_id isn't registered
-EEXIST on assign if the conn_id is already registered -EEXIST on assign if the conn_id is already registered
4.114 KVM_GET_NESTED_STATE
Capability: KVM_CAP_NESTED_STATE
Architectures: x86
Type: vcpu ioctl
Parameters: struct kvm_nested_state (in/out)
Returns: 0 on success, -1 on error
Errors:
E2BIG: the total state size (including the fixed-size part of struct
kvm_nested_state) exceeds the value of 'size' specified by
the user; the size required will be written into size.
struct kvm_nested_state {
__u16 flags;
__u16 format;
__u32 size;
union {
struct kvm_vmx_nested_state vmx;
struct kvm_svm_nested_state svm;
__u8 pad[120];
};
__u8 data[0];
};
#define KVM_STATE_NESTED_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_RUN_PENDING 0x00000002
#define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_SMM_VMXON 0x00000002
struct kvm_vmx_nested_state {
__u64 vmxon_pa;
__u64 vmcs_pa;
struct {
__u16 flags;
} smm;
};
This ioctl copies the vcpu's nested virtualization state from the kernel to
userspace.
The maximum size of the state, including the fixed-size part of struct
kvm_nested_state, can be retrieved by passing KVM_CAP_NESTED_STATE to
the KVM_CHECK_EXTENSION ioctl().
4.115 KVM_SET_NESTED_STATE
Capability: KVM_CAP_NESTED_STATE
Architectures: x86
Type: vcpu ioctl
Parameters: struct kvm_nested_state (in)
Returns: 0 on success, -1 on error
This copies the vcpu's kvm_nested_state struct from userspace to the kernel. For
the definition of struct kvm_nested_state, see KVM_GET_NESTED_STATE.
5. The kvm_run structure 5. The kvm_run structure
------------------------ ------------------------
......
...@@ -62,6 +62,10 @@ KVM_FEATURE_ASYNC_PF_VMEXIT || 10 || paravirtualized async PF VM exit ...@@ -62,6 +62,10 @@ KVM_FEATURE_ASYNC_PF_VMEXIT || 10 || paravirtualized async PF VM exit
|| || can be enabled by setting bit 2 || || can be enabled by setting bit 2
|| || when writing to msr 0x4b564d02 || || when writing to msr 0x4b564d02
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
KVM_FEATURE_PV_SEND_IPI || 11 || guest checks this feature bit
|| || before using paravirtualized
|| || send IPIs.
------------------------------------------------------------------------------
KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side
|| || per-cpu warps are expected in || || per-cpu warps are expected in
|| || kvmclock. || || kvmclock.
......
...@@ -121,3 +121,23 @@ compute the CLOCK_REALTIME for its clock, at the same instant. ...@@ -121,3 +121,23 @@ compute the CLOCK_REALTIME for its clock, at the same instant.
Returns KVM_EOPNOTSUPP if the host does not use TSC clocksource, Returns KVM_EOPNOTSUPP if the host does not use TSC clocksource,
or if clock type is different than KVM_CLOCK_PAIRING_WALLCLOCK. or if clock type is different than KVM_CLOCK_PAIRING_WALLCLOCK.
6. KVM_HC_SEND_IPI
------------------------
Architecture: x86
Status: active
Purpose: Send IPIs to multiple vCPUs.
a0: lower part of the bitmap of destination APIC IDs
a1: higher part of the bitmap of destination APIC IDs
a2: the lowest APIC ID in bitmap
a3: APIC ICR
The hypercall lets a guest send multicast IPIs, with at most 128
128 destinations per hypercall in 64-bit mode and 64 vCPUs per
hypercall in 32-bit mode. The destinations are represented by a
bitmap contained in the first two arguments (a0 and a1). Bit 0 of
a0 corresponds to the APIC ID in the third argument (a2), bit 1
corresponds to the APIC ID a2+1, and so on.
Returns the number of CPUs to which the IPIs were delivered successfully.
...@@ -390,4 +390,51 @@ extern int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu); ...@@ -390,4 +390,51 @@ extern int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu);
#define SPLIT_HACK_MASK 0xff000000 #define SPLIT_HACK_MASK 0xff000000
#define SPLIT_HACK_OFFS 0xfb000000 #define SPLIT_HACK_OFFS 0xfb000000
/*
* This packs a VCPU ID from the [0..KVM_MAX_VCPU_ID) space down to the
* [0..KVM_MAX_VCPUS) space, using knowledge of the guest's core stride
* (but not its actual threading mode, which is not available) to avoid
* collisions.
*
* The implementation leaves VCPU IDs from the range [0..KVM_MAX_VCPUS) (block
* 0) unchanged: if the guest is filling each VCORE completely then it will be
* using consecutive IDs and it will fill the space without any packing.
*
* For higher VCPU IDs, the packed ID is based on the VCPU ID modulo
* KVM_MAX_VCPUS (effectively masking off the top bits) and then an offset is
* added to avoid collisions.
*
* VCPU IDs in the range [KVM_MAX_VCPUS..(KVM_MAX_VCPUS*2)) (block 1) are only
* possible if the guest is leaving at least 1/2 of each VCORE empty, so IDs
* can be safely packed into the second half of each VCORE by adding an offset
* of (stride / 2).
*
* Similarly, if VCPU IDs in the range [(KVM_MAX_VCPUS*2)..(KVM_MAX_VCPUS*4))
* (blocks 2 and 3) are seen, the guest must be leaving at least 3/4 of each
* VCORE empty so packed IDs can be offset by (stride / 4) and (stride * 3 / 4).
*
* Finally, VCPU IDs from blocks 5..7 will only be seen if the guest is using a
* stride of 8 and 1 thread per core so the remaining offsets of 1, 5, 3 and 7
* must be free to use.
*
* (The offsets for each block are stored in block_offsets[], indexed by the
* block number if the stride is 8. For cases where the guest's stride is less
* than 8, we can re-use the block_offsets array by multiplying the block
* number by (MAX_SMT_THREADS / stride) to reach the correct entry.)
*/
static inline u32 kvmppc_pack_vcpu_id(struct kvm *kvm, u32 id)
{
const int block_offsets[MAX_SMT_THREADS] = {0, 4, 2, 6, 1, 5, 3, 7};
int stride = kvm->arch.emul_smt_mode;
int block = (id / KVM_MAX_VCPUS) * (MAX_SMT_THREADS / stride);
u32 packed_id;
if (WARN_ONCE(block >= MAX_SMT_THREADS, "VCPU ID too large to pack"))
return 0;
packed_id = (id % KVM_MAX_VCPUS) + block_offsets[block];
if (WARN_ONCE(packed_id >= KVM_MAX_VCPUS, "VCPU ID packing failed"))
return 0;
return packed_id;
}
#endif /* __ASM_KVM_BOOK3S_H__ */ #endif /* __ASM_KVM_BOOK3S_H__ */
...@@ -42,7 +42,14 @@ ...@@ -42,7 +42,14 @@
#define KVM_USER_MEM_SLOTS 512 #define KVM_USER_MEM_SLOTS 512
#include <asm/cputhreads.h> #include <asm/cputhreads.h>
#define KVM_MAX_VCPU_ID (threads_per_subcore * KVM_MAX_VCORES)
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
#include <asm/kvm_book3s_asm.h> /* for MAX_SMT_THREADS */
#define KVM_MAX_VCPU_ID (MAX_SMT_THREADS * KVM_MAX_VCORES)
#else
#define KVM_MAX_VCPU_ID KVM_MAX_VCPUS
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
#define __KVM_HAVE_ARCH_INTC_INITIALIZED #define __KVM_HAVE_ARCH_INTC_INITIALIZED
...@@ -672,7 +679,7 @@ struct kvm_vcpu_arch { ...@@ -672,7 +679,7 @@ struct kvm_vcpu_arch {
gva_t vaddr_accessed; gva_t vaddr_accessed;
pgd_t *pgdir; pgd_t *pgdir;
u8 io_gpr; /* GPR used as IO source/target */ u16 io_gpr; /* GPR used as IO source/target */
u8 mmio_host_swabbed; u8 mmio_host_swabbed;
u8 mmio_sign_extend; u8 mmio_sign_extend;
/* conversion between single and double precision */ /* conversion between single and double precision */
...@@ -688,7 +695,6 @@ struct kvm_vcpu_arch { ...@@ -688,7 +695,6 @@ struct kvm_vcpu_arch {
*/ */
u8 mmio_vsx_copy_nums; u8 mmio_vsx_copy_nums;
u8 mmio_vsx_offset; u8 mmio_vsx_offset;
u8 mmio_vsx_tx_sx_enabled;
u8 mmio_vmx_copy_nums; u8 mmio_vmx_copy_nums;
u8 mmio_vmx_offset; u8 mmio_vmx_offset;
u8 mmio_copy_type; u8 mmio_copy_type;
...@@ -801,14 +807,14 @@ struct kvm_vcpu_arch { ...@@ -801,14 +807,14 @@ struct kvm_vcpu_arch {
#define KVMPPC_VCPU_BUSY_IN_HOST 2 #define KVMPPC_VCPU_BUSY_IN_HOST 2
/* Values for vcpu->arch.io_gpr */ /* Values for vcpu->arch.io_gpr */
#define KVM_MMIO_REG_MASK 0x001f #define KVM_MMIO_REG_MASK 0x003f
#define KVM_MMIO_REG_EXT_MASK 0xffe0 #define KVM_MMIO_REG_EXT_MASK 0xffc0
#define KVM_MMIO_REG_GPR 0x0000 #define KVM_MMIO_REG_GPR 0x0000
#define KVM_MMIO_REG_FPR 0x0020 #define KVM_MMIO_REG_FPR 0x0040
#define KVM_MMIO_REG_QPR 0x0040 #define KVM_MMIO_REG_QPR 0x0080
#define KVM_MMIO_REG_FQPR 0x0060 #define KVM_MMIO_REG_FQPR 0x00c0
#define KVM_MMIO_REG_VSX 0x0080 #define KVM_MMIO_REG_VSX 0x0100
#define KVM_MMIO_REG_VMX 0x00c0 #define KVM_MMIO_REG_VMX 0x0180
#define __KVM_HAVE_ARCH_WQP #define __KVM_HAVE_ARCH_WQP
#define __KVM_HAVE_CREATE_DEVICE #define __KVM_HAVE_CREATE_DEVICE
......
...@@ -163,7 +163,7 @@ ...@@ -163,7 +163,7 @@
#define PSSCR_ESL 0x00200000 /* Enable State Loss */ #define PSSCR_ESL 0x00200000 /* Enable State Loss */
#define PSSCR_SD 0x00400000 /* Status Disable */ #define PSSCR_SD 0x00400000 /* Status Disable */
#define PSSCR_PLS 0xf000000000000000 /* Power-saving Level Status */ #define PSSCR_PLS 0xf000000000000000 /* Power-saving Level Status */
#define PSSCR_GUEST_VIS 0xf0000000000003ff /* Guest-visible PSSCR fields */ #define PSSCR_GUEST_VIS 0xf0000000000003ffUL /* Guest-visible PSSCR fields */
#define PSSCR_FAKE_SUSPEND 0x00000400 /* Fake-suspend bit (P9 DD2.2) */ #define PSSCR_FAKE_SUSPEND 0x00000400 /* Fake-suspend bit (P9 DD2.2) */
#define PSSCR_FAKE_SUSPEND_LG 10 /* Fake-suspend bit position */ #define PSSCR_FAKE_SUSPEND_LG 10 /* Fake-suspend bit position */
......
...@@ -179,7 +179,7 @@ extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, ...@@ -179,7 +179,7 @@ extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
if ((tbltmp->it_page_shift <= stt->page_shift) && if ((tbltmp->it_page_shift <= stt->page_shift) &&
(tbltmp->it_offset << tbltmp->it_page_shift == (tbltmp->it_offset << tbltmp->it_page_shift ==
stt->offset << stt->page_shift) && stt->offset << stt->page_shift) &&
(tbltmp->it_size << tbltmp->it_page_shift == (tbltmp->it_size << tbltmp->it_page_shift >=
stt->size << stt->page_shift)) { stt->size << stt->page_shift)) {
/* /*
* Reference the table to avoid races with * Reference the table to avoid races with
...@@ -295,7 +295,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, ...@@ -295,7 +295,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
{ {
struct kvmppc_spapr_tce_table *stt = NULL; struct kvmppc_spapr_tce_table *stt = NULL;
struct kvmppc_spapr_tce_table *siter; struct kvmppc_spapr_tce_table *siter;
unsigned long npages, size; unsigned long npages, size = args->size;
int ret = -ENOMEM; int ret = -ENOMEM;
int i; int i;
...@@ -303,7 +303,6 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, ...@@ -303,7 +303,6 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
(args->offset + args->size > (ULLONG_MAX >> args->page_shift))) (args->offset + args->size > (ULLONG_MAX >> args->page_shift)))
return -EINVAL; return -EINVAL;
size = _ALIGN_UP(args->size, PAGE_SIZE >> 3);
npages = kvmppc_tce_pages(size); npages = kvmppc_tce_pages(size);
ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true); ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true);
if (ret) if (ret)
......
...@@ -127,14 +127,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); ...@@ -127,14 +127,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
* and SPURR count and should be set according to the number of * and SPURR count and should be set according to the number of
* online threads in the vcore being run. * online threads in the vcore being run.
*/ */
#define RWMR_RPA_P8_1THREAD 0x164520C62609AECA #define RWMR_RPA_P8_1THREAD 0x164520C62609AECAUL
#define RWMR_RPA_P8_2THREAD 0x7FFF2908450D8DA9 #define RWMR_RPA_P8_2THREAD 0x7FFF2908450D8DA9UL
#define RWMR_RPA_P8_3THREAD 0x164520C62609AECA #define RWMR_RPA_P8_3THREAD 0x164520C62609AECAUL
#define RWMR_RPA_P8_4THREAD 0x199A421245058DA9 #define RWMR_RPA_P8_4THREAD 0x199A421245058DA9UL
#define RWMR_RPA_P8_5THREAD 0x164520C62609AECA #define RWMR_RPA_P8_5THREAD 0x164520C62609AECAUL
#define RWMR_RPA_P8_6THREAD 0x164520C62609AECA #define RWMR_RPA_P8_6THREAD 0x164520C62609AECAUL
#define RWMR_RPA_P8_7THREAD 0x164520C62609AECA #define RWMR_RPA_P8_7THREAD 0x164520C62609AECAUL
#define RWMR_RPA_P8_8THREAD 0x164520C62609AECA #define RWMR_RPA_P8_8THREAD 0x164520C62609AECAUL
static unsigned long p8_rwmr_values[MAX_SMT_THREADS + 1] = { static unsigned long p8_rwmr_values[MAX_SMT_THREADS + 1] = {
RWMR_RPA_P8_1THREAD, RWMR_RPA_P8_1THREAD,
...@@ -1807,7 +1807,7 @@ static int threads_per_vcore(struct kvm *kvm) ...@@ -1807,7 +1807,7 @@ static int threads_per_vcore(struct kvm *kvm)
return threads_per_subcore; return threads_per_subcore;
} }
static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core) static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int id)
{ {
struct kvmppc_vcore *vcore; struct kvmppc_vcore *vcore;
...@@ -1821,7 +1821,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core) ...@@ -1821,7 +1821,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
init_swait_queue_head(&vcore->wq); init_swait_queue_head(&vcore->wq);
vcore->preempt_tb = TB_NIL; vcore->preempt_tb = TB_NIL;
vcore->lpcr = kvm->arch.lpcr; vcore->lpcr = kvm->arch.lpcr;
vcore->first_vcpuid = core * kvm->arch.smt_mode; vcore->first_vcpuid = id;
vcore->kvm = kvm; vcore->kvm = kvm;
INIT_LIST_HEAD(&vcore->preempt_list); INIT_LIST_HEAD(&vcore->preempt_list);
...@@ -2037,12 +2037,26 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, ...@@ -2037,12 +2037,26 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
mutex_lock(&kvm->lock); mutex_lock(&kvm->lock);
vcore = NULL; vcore = NULL;
err = -EINVAL; err = -EINVAL;
core = id / kvm->arch.smt_mode; if (cpu_has_feature(CPU_FTR_ARCH_300)) {
if (id >= (KVM_MAX_VCPUS * kvm->arch.emul_smt_mode)) {
pr_devel("KVM: VCPU ID too high\n");
core = KVM_MAX_VCORES;
} else {
BUG_ON(kvm->arch.smt_mode != 1);
core = kvmppc_pack_vcpu_id(kvm, id);
}
} else {
core = id / kvm->arch.smt_mode;
}
if (core < KVM_MAX_VCORES) { if (core < KVM_MAX_VCORES) {
vcore = kvm->arch.vcores[core]; vcore = kvm->arch.vcores[core];
if (!vcore) { if (vcore && cpu_has_feature(CPU_FTR_ARCH_300)) {
pr_devel("KVM: collision on id %u", id);
vcore = NULL;
} else if (!vcore) {
err = -ENOMEM; err = -ENOMEM;
vcore = kvmppc_vcore_create(kvm, core); vcore = kvmppc_vcore_create(kvm,
id & ~(kvm->arch.smt_mode - 1));
kvm->arch.vcores[core] = vcore; kvm->arch.vcores[core] = vcore;
kvm->arch.online_vcores++; kvm->arch.online_vcores++;
} }
...@@ -4550,6 +4564,8 @@ static int kvmppc_book3s_init_hv(void) ...@@ -4550,6 +4564,8 @@ static int kvmppc_book3s_init_hv(void)
pr_err("KVM-HV: Cannot determine method for accessing XICS\n"); pr_err("KVM-HV: Cannot determine method for accessing XICS\n");
return -ENODEV; return -ENODEV;
} }
/* presence of intc confirmed - node can be dropped again */
of_node_put(np);
} }
#endif #endif
......
...@@ -317,6 +317,11 @@ static int xive_select_target(struct kvm *kvm, u32 *server, u8 prio) ...@@ -317,6 +317,11 @@ static int xive_select_target(struct kvm *kvm, u32 *server, u8 prio)
return -EBUSY; return -EBUSY;
} }
static u32 xive_vp(struct kvmppc_xive *xive, u32 server)
{
return xive->vp_base + kvmppc_pack_vcpu_id(xive->kvm, server);
}
static u8 xive_lock_and_mask(struct kvmppc_xive *xive, static u8 xive_lock_and_mask(struct kvmppc_xive *xive,
struct kvmppc_xive_src_block *sb, struct kvmppc_xive_src_block *sb,
struct kvmppc_xive_irq_state *state) struct kvmppc_xive_irq_state *state)
...@@ -362,7 +367,7 @@ static u8 xive_lock_and_mask(struct kvmppc_xive *xive, ...@@ -362,7 +367,7 @@ static u8 xive_lock_and_mask(struct kvmppc_xive *xive,
*/ */
if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) { if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) {
xive_native_configure_irq(hw_num, xive_native_configure_irq(hw_num,
xive->vp_base + state->act_server, xive_vp(xive, state->act_server),
MASKED, state->number); MASKED, state->number);
/* set old_p so we can track if an H_EOI was done */ /* set old_p so we can track if an H_EOI was done */
state->old_p = true; state->old_p = true;
...@@ -418,7 +423,7 @@ static void xive_finish_unmask(struct kvmppc_xive *xive, ...@@ -418,7 +423,7 @@ static void xive_finish_unmask(struct kvmppc_xive *xive,
*/ */
if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) { if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) {
xive_native_configure_irq(hw_num, xive_native_configure_irq(hw_num,
xive->vp_base + state->act_server, xive_vp(xive, state->act_server),
state->act_priority, state->number); state->act_priority, state->number);
/* If an EOI is needed, do it here */ /* If an EOI is needed, do it here */
if (!state->old_p) if (!state->old_p)
...@@ -495,7 +500,7 @@ static int xive_target_interrupt(struct kvm *kvm, ...@@ -495,7 +500,7 @@ static int xive_target_interrupt(struct kvm *kvm,
kvmppc_xive_select_irq(state, &hw_num, NULL); kvmppc_xive_select_irq(state, &hw_num, NULL);
return xive_native_configure_irq(hw_num, return xive_native_configure_irq(hw_num,
xive->vp_base + server, xive_vp(xive, server),
prio, state->number); prio, state->number);
} }
...@@ -883,7 +888,7 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq, ...@@ -883,7 +888,7 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
* which is fine for a never started interrupt. * which is fine for a never started interrupt.
*/ */
xive_native_configure_irq(hw_irq, xive_native_configure_irq(hw_irq,
xive->vp_base + state->act_server, xive_vp(xive, state->act_server),
state->act_priority, state->number); state->act_priority, state->number);
/* /*
...@@ -959,7 +964,7 @@ int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq, ...@@ -959,7 +964,7 @@ int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
/* Reconfigure the IPI */ /* Reconfigure the IPI */
xive_native_configure_irq(state->ipi_number, xive_native_configure_irq(state->ipi_number,
xive->vp_base + state->act_server, xive_vp(xive, state->act_server),
state->act_priority, state->number); state->act_priority, state->number);
/* /*
...@@ -1084,7 +1089,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, ...@@ -1084,7 +1089,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
pr_devel("Duplicate !\n"); pr_devel("Duplicate !\n");
return -EEXIST; return -EEXIST;
} }
if (cpu >= KVM_MAX_VCPUS) { if (cpu >= (KVM_MAX_VCPUS * vcpu->kvm->arch.emul_smt_mode)) {
pr_devel("Out of bounds !\n"); pr_devel("Out of bounds !\n");
return -EINVAL; return -EINVAL;
} }
...@@ -1098,7 +1103,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, ...@@ -1098,7 +1103,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
xc->xive = xive; xc->xive = xive;
xc->vcpu = vcpu; xc->vcpu = vcpu;
xc->server_num = cpu; xc->server_num = cpu;
xc->vp_id = xive->vp_base + cpu; xc->vp_id = xive_vp(xive, cpu);
xc->mfrr = 0xff; xc->mfrr = 0xff;
xc->valid = true; xc->valid = true;
......
...@@ -106,7 +106,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) ...@@ -106,7 +106,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
* if mmio_vsx_tx_sx_enabled == 1, copy data between * if mmio_vsx_tx_sx_enabled == 1, copy data between
* VSR[32..63] and memory * VSR[32..63] and memory
*/ */
vcpu->arch.mmio_vsx_tx_sx_enabled = get_tx_or_sx(inst);
vcpu->arch.mmio_vsx_copy_nums = 0; vcpu->arch.mmio_vsx_copy_nums = 0;
vcpu->arch.mmio_vsx_offset = 0; vcpu->arch.mmio_vsx_offset = 0;
vcpu->arch.mmio_copy_type = KVMPPC_VSX_COPY_NONE; vcpu->arch.mmio_copy_type = KVMPPC_VSX_COPY_NONE;
...@@ -242,8 +241,8 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) ...@@ -242,8 +241,8 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
} }
emulated = kvmppc_handle_vsx_load(run, vcpu, emulated = kvmppc_handle_vsx_load(run, vcpu,
KVM_MMIO_REG_VSX | (op.reg & 0x1f), KVM_MMIO_REG_VSX|op.reg, io_size_each,
io_size_each, 1, op.type & SIGNEXT); 1, op.type & SIGNEXT);
break; break;
} }
#endif #endif
...@@ -363,7 +362,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) ...@@ -363,7 +362,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
} }
emulated = kvmppc_handle_vsx_store(run, vcpu, emulated = kvmppc_handle_vsx_store(run, vcpu,
op.reg & 0x1f, io_size_each, 1); op.reg, io_size_each, 1);
break; break;
} }
#endif #endif
......
...@@ -879,10 +879,10 @@ static inline void kvmppc_set_vsr_dword(struct kvm_vcpu *vcpu, ...@@ -879,10 +879,10 @@ static inline void kvmppc_set_vsr_dword(struct kvm_vcpu *vcpu,
if (offset == -1) if (offset == -1)
return; return;
if (vcpu->arch.mmio_vsx_tx_sx_enabled) { if (index >= 32) {
val.vval = VCPU_VSX_VR(vcpu, index); val.vval = VCPU_VSX_VR(vcpu, index - 32);
val.vsxval[offset] = gpr; val.vsxval[offset] = gpr;
VCPU_VSX_VR(vcpu, index) = val.vval; VCPU_VSX_VR(vcpu, index - 32) = val.vval;
} else { } else {
VCPU_VSX_FPR(vcpu, index, offset) = gpr; VCPU_VSX_FPR(vcpu, index, offset) = gpr;
} }
...@@ -894,11 +894,11 @@ static inline void kvmppc_set_vsr_dword_dump(struct kvm_vcpu *vcpu, ...@@ -894,11 +894,11 @@ static inline void kvmppc_set_vsr_dword_dump(struct kvm_vcpu *vcpu,
union kvmppc_one_reg val; union kvmppc_one_reg val;
int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK; int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK;
if (vcpu->arch.mmio_vsx_tx_sx_enabled) { if (index >= 32) {
val.vval = VCPU_VSX_VR(vcpu, index); val.vval = VCPU_VSX_VR(vcpu, index - 32);
val.vsxval[0] = gpr; val.vsxval[0] = gpr;
val.vsxval[1] = gpr; val.vsxval[1] = gpr;
VCPU_VSX_VR(vcpu, index) = val.vval; VCPU_VSX_VR(vcpu, index - 32) = val.vval;
} else { } else {
VCPU_VSX_FPR(vcpu, index, 0) = gpr; VCPU_VSX_FPR(vcpu, index, 0) = gpr;
VCPU_VSX_FPR(vcpu, index, 1) = gpr; VCPU_VSX_FPR(vcpu, index, 1) = gpr;
...@@ -911,12 +911,12 @@ static inline void kvmppc_set_vsr_word_dump(struct kvm_vcpu *vcpu, ...@@ -911,12 +911,12 @@ static inline void kvmppc_set_vsr_word_dump(struct kvm_vcpu *vcpu,
union kvmppc_one_reg val; union kvmppc_one_reg val;
int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK; int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK;
if (vcpu->arch.mmio_vsx_tx_sx_enabled) { if (index >= 32) {
val.vsx32val[0] = gpr; val.vsx32val[0] = gpr;
val.vsx32val[1] = gpr; val.vsx32val[1] = gpr;
val.vsx32val[2] = gpr; val.vsx32val[2] = gpr;
val.vsx32val[3] = gpr; val.vsx32val[3] = gpr;
VCPU_VSX_VR(vcpu, index) = val.vval; VCPU_VSX_VR(vcpu, index - 32) = val.vval;
} else { } else {
val.vsx32val[0] = gpr; val.vsx32val[0] = gpr;
val.vsx32val[1] = gpr; val.vsx32val[1] = gpr;
...@@ -936,10 +936,10 @@ static inline void kvmppc_set_vsr_word(struct kvm_vcpu *vcpu, ...@@ -936,10 +936,10 @@ static inline void kvmppc_set_vsr_word(struct kvm_vcpu *vcpu,
if (offset == -1) if (offset == -1)
return; return;
if (vcpu->arch.mmio_vsx_tx_sx_enabled) { if (index >= 32) {
val.vval = VCPU_VSX_VR(vcpu, index); val.vval = VCPU_VSX_VR(vcpu, index - 32);
val.vsx32val[offset] = gpr32; val.vsx32val[offset] = gpr32;
VCPU_VSX_VR(vcpu, index) = val.vval; VCPU_VSX_VR(vcpu, index - 32) = val.vval;
} else { } else {
dword_offset = offset / 2; dword_offset = offset / 2;
word_offset = offset % 2; word_offset = offset % 2;
...@@ -1360,10 +1360,10 @@ static inline int kvmppc_get_vsr_data(struct kvm_vcpu *vcpu, int rs, u64 *val) ...@@ -1360,10 +1360,10 @@ static inline int kvmppc_get_vsr_data(struct kvm_vcpu *vcpu, int rs, u64 *val)
break; break;
} }
if (!vcpu->arch.mmio_vsx_tx_sx_enabled) { if (rs < 32) {
*val = VCPU_VSX_FPR(vcpu, rs, vsx_offset); *val = VCPU_VSX_FPR(vcpu, rs, vsx_offset);
} else { } else {
reg.vval = VCPU_VSX_VR(vcpu, rs); reg.vval = VCPU_VSX_VR(vcpu, rs - 32);
*val = reg.vsxval[vsx_offset]; *val = reg.vsxval[vsx_offset];
} }
break; break;
...@@ -1377,13 +1377,13 @@ static inline int kvmppc_get_vsr_data(struct kvm_vcpu *vcpu, int rs, u64 *val) ...@@ -1377,13 +1377,13 @@ static inline int kvmppc_get_vsr_data(struct kvm_vcpu *vcpu, int rs, u64 *val)
break; break;
} }
if (!vcpu->arch.mmio_vsx_tx_sx_enabled) { if (rs < 32) {
dword_offset = vsx_offset / 2; dword_offset = vsx_offset / 2;
word_offset = vsx_offset % 2; word_offset = vsx_offset % 2;
reg.vsxval[0] = VCPU_VSX_FPR(vcpu, rs, dword_offset); reg.vsxval[0] = VCPU_VSX_FPR(vcpu, rs, dword_offset);
*val = reg.vsx32val[word_offset]; *val = reg.vsx32val[word_offset];
} else { } else {
reg.vval = VCPU_VSX_VR(vcpu, rs); reg.vval = VCPU_VSX_VR(vcpu, rs - 32);
*val = reg.vsx32val[vsx_offset]; *val = reg.vsx32val[vsx_offset];
} }
break; break;
......
...@@ -269,6 +269,7 @@ struct kvm_s390_sie_block { ...@@ -269,6 +269,7 @@ struct kvm_s390_sie_block {
__u8 reserved1c0[8]; /* 0x01c0 */ __u8 reserved1c0[8]; /* 0x01c0 */
#define ECD_HOSTREGMGMT 0x20000000 #define ECD_HOSTREGMGMT 0x20000000
#define ECD_MEF 0x08000000 #define ECD_MEF 0x08000000
#define ECD_ETOKENF 0x02000000
__u32 ecd; /* 0x01c8 */ __u32 ecd; /* 0x01c8 */
__u8 reserved1cc[18]; /* 0x01cc */ __u8 reserved1cc[18]; /* 0x01cc */
__u64 pp; /* 0x01de */ __u64 pp; /* 0x01de */
...@@ -655,6 +656,7 @@ struct kvm_vcpu_arch { ...@@ -655,6 +656,7 @@ struct kvm_vcpu_arch {
seqcount_t cputm_seqcount; seqcount_t cputm_seqcount;
__u64 cputm_start; __u64 cputm_start;
bool gs_enabled; bool gs_enabled;
bool skey_enabled;
}; };
struct kvm_vm_stat { struct kvm_vm_stat {
...@@ -793,12 +795,6 @@ struct kvm_s390_vsie { ...@@ -793,12 +795,6 @@ struct kvm_s390_vsie {
struct page *pages[KVM_MAX_VCPUS]; struct page *pages[KVM_MAX_VCPUS];
}; };
struct kvm_s390_migration_state {
unsigned long bitmap_size; /* in bits (number of guest pages) */
atomic64_t dirty_pages; /* number of dirty pages */
unsigned long *pgste_bitmap;
};
struct kvm_arch{ struct kvm_arch{
void *sca; void *sca;
int use_esca; int use_esca;
...@@ -828,7 +824,8 @@ struct kvm_arch{ ...@@ -828,7 +824,8 @@ struct kvm_arch{
struct kvm_s390_vsie vsie; struct kvm_s390_vsie vsie;
u8 epdx; u8 epdx;
u64 epoch; u64 epoch;
struct kvm_s390_migration_state *migration_state; int migration_mode;
atomic64_t cmma_dirty_pages;
/* subset of available cpu features enabled by user space */ /* subset of available cpu features enabled by user space */
DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
struct kvm_s390_gisa *gisa; struct kvm_s390_gisa *gisa;
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
/* /*
* KVM s390 specific structures and definitions * KVM s390 specific structures and definitions
* *
* Copyright IBM Corp. 2008 * Copyright IBM Corp. 2008, 2018
* *
* Author(s): Carsten Otte <cotte@de.ibm.com> * Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com> * Christian Borntraeger <borntraeger@de.ibm.com>
...@@ -225,6 +225,7 @@ struct kvm_guest_debug_arch { ...@@ -225,6 +225,7 @@ struct kvm_guest_debug_arch {
#define KVM_SYNC_FPRS (1UL << 8) #define KVM_SYNC_FPRS (1UL << 8)
#define KVM_SYNC_GSCB (1UL << 9) #define KVM_SYNC_GSCB (1UL << 9)
#define KVM_SYNC_BPBC (1UL << 10) #define KVM_SYNC_BPBC (1UL << 10)
#define KVM_SYNC_ETOKEN (1UL << 11)
/* length and alignment of the sdnx as a power of two */ /* length and alignment of the sdnx as a power of two */
#define SDNXC 8 #define SDNXC 8
#define SDNXL (1UL << SDNXC) #define SDNXL (1UL << SDNXC)
...@@ -258,6 +259,8 @@ struct kvm_sync_regs { ...@@ -258,6 +259,8 @@ struct kvm_sync_regs {
struct { struct {
__u64 reserved1[2]; __u64 reserved1[2];
__u64 gscb[4]; __u64 gscb[4];
__u64 etoken;
__u64 etoken_extension;
}; };
}; };
}; };
......
...@@ -906,54 +906,37 @@ static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req) ...@@ -906,54 +906,37 @@ static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
*/ */
static int kvm_s390_vm_start_migration(struct kvm *kvm) static int kvm_s390_vm_start_migration(struct kvm *kvm)
{ {
struct kvm_s390_migration_state *mgs;
struct kvm_memory_slot *ms; struct kvm_memory_slot *ms;
/* should be the only one */
struct kvm_memslots *slots; struct kvm_memslots *slots;
unsigned long ram_pages; unsigned long ram_pages = 0;
int slotnr; int slotnr;
/* migration mode already enabled */ /* migration mode already enabled */
if (kvm->arch.migration_state) if (kvm->arch.migration_mode)
return 0; return 0;
slots = kvm_memslots(kvm); slots = kvm_memslots(kvm);
if (!slots || !slots->used_slots) if (!slots || !slots->used_slots)
return -EINVAL; return -EINVAL;
mgs = kzalloc(sizeof(*mgs), GFP_KERNEL); if (!kvm->arch.use_cmma) {
if (!mgs) kvm->arch.migration_mode = 1;
return -ENOMEM; return 0;
kvm->arch.migration_state = mgs; }
/* mark all the pages in active slots as dirty */
if (kvm->arch.use_cmma) { for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
ms = slots->memslots + slotnr;
/* /*
* Get the first slot. They are reverse sorted by base_gfn, so * The second half of the bitmap is only used on x86,
* the first slot is also the one at the end of the address * and would be wasted otherwise, so we put it to good
* space. We have verified above that at least one slot is * use here to keep track of the state of the storage
* present. * attributes.
*/ */
ms = slots->memslots; memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
/* round up so we only use full longs */ ram_pages += ms->npages;
ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
/* allocate enough bytes to store all the bits */
mgs->pgste_bitmap = vmalloc(ram_pages / 8);
if (!mgs->pgste_bitmap) {
kfree(mgs);
kvm->arch.migration_state = NULL;
return -ENOMEM;
}
mgs->bitmap_size = ram_pages;
atomic64_set(&mgs->dirty_pages, ram_pages);
/* mark all the pages in active slots as dirty */
for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
ms = slots->memslots + slotnr;
bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
}
kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
} }
atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
kvm->arch.migration_mode = 1;
kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
return 0; return 0;
} }
...@@ -963,21 +946,12 @@ static int kvm_s390_vm_start_migration(struct kvm *kvm) ...@@ -963,21 +946,12 @@ static int kvm_s390_vm_start_migration(struct kvm *kvm)
*/ */
static int kvm_s390_vm_stop_migration(struct kvm *kvm) static int kvm_s390_vm_stop_migration(struct kvm *kvm)
{ {
struct kvm_s390_migration_state *mgs;
/* migration mode already disabled */ /* migration mode already disabled */
if (!kvm->arch.migration_state) if (!kvm->arch.migration_mode)
return 0; return 0;
mgs = kvm->arch.migration_state; kvm->arch.migration_mode = 0;
kvm->arch.migration_state = NULL; if (kvm->arch.use_cmma)
if (kvm->arch.use_cmma) {
kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION); kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
/* We have to wait for the essa emulation to finish */
synchronize_srcu(&kvm->srcu);
vfree(mgs->pgste_bitmap);
}
kfree(mgs);
return 0; return 0;
} }
...@@ -1005,7 +979,7 @@ static int kvm_s390_vm_set_migration(struct kvm *kvm, ...@@ -1005,7 +979,7 @@ static int kvm_s390_vm_set_migration(struct kvm *kvm,
static int kvm_s390_vm_get_migration(struct kvm *kvm, static int kvm_s390_vm_get_migration(struct kvm *kvm,
struct kvm_device_attr *attr) struct kvm_device_attr *attr)
{ {
u64 mig = (kvm->arch.migration_state != NULL); u64 mig = kvm->arch.migration_mode;
if (attr->attr != KVM_S390_VM_MIGRATION_STATUS) if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
return -ENXIO; return -ENXIO;
...@@ -1652,6 +1626,134 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) ...@@ -1652,6 +1626,134 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
/* for consistency */ /* for consistency */
#define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX) #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
/*
* Similar to gfn_to_memslot, but returns the index of a memslot also when the
* address falls in a hole. In that case the index of one of the memslots
* bordering the hole is returned.
*/
static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
{
int start = 0, end = slots->used_slots;
int slot = atomic_read(&slots->lru_slot);
struct kvm_memory_slot *memslots = slots->memslots;
if (gfn >= memslots[slot].base_gfn &&
gfn < memslots[slot].base_gfn + memslots[slot].npages)
return slot;
while (start < end) {
slot = start + (end - start) / 2;
if (gfn >= memslots[slot].base_gfn)
end = slot;
else
start = slot + 1;
}
if (gfn >= memslots[start].base_gfn &&
gfn < memslots[start].base_gfn + memslots[start].npages) {
atomic_set(&slots->lru_slot, start);
}
return start;
}
static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
u8 *res, unsigned long bufsize)
{
unsigned long pgstev, hva, cur_gfn = args->start_gfn;
args->count = 0;
while (args->count < bufsize) {
hva = gfn_to_hva(kvm, cur_gfn);
/*
* We return an error if the first value was invalid, but we
* return successfully if at least one value was copied.
*/
if (kvm_is_error_hva(hva))
return args->count ? 0 : -EFAULT;
if (get_pgste(kvm->mm, hva, &pgstev) < 0)
pgstev = 0;
res[args->count++] = (pgstev >> 24) & 0x43;
cur_gfn++;
}
return 0;
}
static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
unsigned long cur_gfn)
{
int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
struct kvm_memory_slot *ms = slots->memslots + slotidx;
unsigned long ofs = cur_gfn - ms->base_gfn;
if (ms->base_gfn + ms->npages <= cur_gfn) {
slotidx--;
/* If we are above the highest slot, wrap around */
if (slotidx < 0)
slotidx = slots->used_slots - 1;
ms = slots->memslots + slotidx;
ofs = 0;
}
ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
while ((slotidx > 0) && (ofs >= ms->npages)) {
slotidx--;
ms = slots->memslots + slotidx;
ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
}
return ms->base_gfn + ofs;
}
static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
u8 *res, unsigned long bufsize)
{
unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
struct kvm_memslots *slots = kvm_memslots(kvm);
struct kvm_memory_slot *ms;
cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
ms = gfn_to_memslot(kvm, cur_gfn);
args->count = 0;
args->start_gfn = cur_gfn;
if (!ms)
return 0;
next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
while (args->count < bufsize) {
hva = gfn_to_hva(kvm, cur_gfn);
if (kvm_is_error_hva(hva))
return 0;
/* Decrement only if we actually flipped the bit to 0 */
if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
atomic64_dec(&kvm->arch.cmma_dirty_pages);
if (get_pgste(kvm->mm, hva, &pgstev) < 0)
pgstev = 0;
/* Save the value */
res[args->count++] = (pgstev >> 24) & 0x43;
/* If the next bit is too far away, stop. */
if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
return 0;
/* If we reached the previous "next", find the next one */
if (cur_gfn == next_gfn)
next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
/* Reached the end of memory or of the buffer, stop */
if ((next_gfn >= mem_end) ||
(next_gfn - args->start_gfn >= bufsize))
return 0;
cur_gfn++;
/* Reached the end of the current memslot, take the next one. */
if (cur_gfn - ms->base_gfn >= ms->npages) {
ms = gfn_to_memslot(kvm, cur_gfn);
if (!ms)
return 0;
}
}
return 0;
}
/* /*
* This function searches for the next page with dirty CMMA attributes, and * This function searches for the next page with dirty CMMA attributes, and
* saves the attributes in the buffer up to either the end of the buffer or * saves the attributes in the buffer up to either the end of the buffer or
...@@ -1663,22 +1765,18 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) ...@@ -1663,22 +1765,18 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
static int kvm_s390_get_cmma_bits(struct kvm *kvm, static int kvm_s390_get_cmma_bits(struct kvm *kvm,
struct kvm_s390_cmma_log *args) struct kvm_s390_cmma_log *args)
{ {
struct kvm_s390_migration_state *s = kvm->arch.migration_state; unsigned long bufsize;
unsigned long bufsize, hva, pgstev, i, next, cur; int srcu_idx, peek, ret;
int srcu_idx, peek, r = 0, rr; u8 *values;
u8 *res;
cur = args->start_gfn;
i = next = pgstev = 0;
if (unlikely(!kvm->arch.use_cmma)) if (!kvm->arch.use_cmma)
return -ENXIO; return -ENXIO;
/* Invalid/unsupported flags were specified */ /* Invalid/unsupported flags were specified */
if (args->flags & ~KVM_S390_CMMA_PEEK) if (args->flags & ~KVM_S390_CMMA_PEEK)
return -EINVAL; return -EINVAL;
/* Migration mode query, and we are not doing a migration */ /* Migration mode query, and we are not doing a migration */
peek = !!(args->flags & KVM_S390_CMMA_PEEK); peek = !!(args->flags & KVM_S390_CMMA_PEEK);
if (!peek && !s) if (!peek && !kvm->arch.migration_mode)
return -EINVAL; return -EINVAL;
/* CMMA is disabled or was not used, or the buffer has length zero */ /* CMMA is disabled or was not used, or the buffer has length zero */
bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX); bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
...@@ -1686,74 +1784,35 @@ static int kvm_s390_get_cmma_bits(struct kvm *kvm, ...@@ -1686,74 +1784,35 @@ static int kvm_s390_get_cmma_bits(struct kvm *kvm,
memset(args, 0, sizeof(*args)); memset(args, 0, sizeof(*args));
return 0; return 0;
} }
/* We are not peeking, and there are no dirty pages */
if (!peek) { if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
/* We are not peeking, and there are no dirty pages */ memset(args, 0, sizeof(*args));
if (!atomic64_read(&s->dirty_pages)) { return 0;
memset(args, 0, sizeof(*args));
return 0;
}
cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
args->start_gfn);
if (cur >= s->bitmap_size) /* nothing found, loop back */
cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
if (cur >= s->bitmap_size) { /* again! (very unlikely) */
memset(args, 0, sizeof(*args));
return 0;
}
next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
} }
res = vmalloc(bufsize); values = vmalloc(bufsize);
if (!res) if (!values)
return -ENOMEM; return -ENOMEM;
args->start_gfn = cur;
down_read(&kvm->mm->mmap_sem); down_read(&kvm->mm->mmap_sem);
srcu_idx = srcu_read_lock(&kvm->srcu); srcu_idx = srcu_read_lock(&kvm->srcu);
while (i < bufsize) { if (peek)
hva = gfn_to_hva(kvm, cur); ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
if (kvm_is_error_hva(hva)) { else
r = -EFAULT; ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
break;
}
/* decrement only if we actually flipped the bit to 0 */
if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
atomic64_dec(&s->dirty_pages);
r = get_pgste(kvm->mm, hva, &pgstev);
if (r < 0)
pgstev = 0;
/* save the value */
res[i++] = (pgstev >> 24) & 0x43;
/*
* if the next bit is too far away, stop.
* if we reached the previous "next", find the next one
*/
if (!peek) {
if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
break;
if (cur == next)
next = find_next_bit(s->pgste_bitmap,
s->bitmap_size, cur + 1);
/* reached the end of the bitmap or of the buffer, stop */
if ((next >= s->bitmap_size) ||
(next >= args->start_gfn + bufsize))
break;
}
cur++;
}
srcu_read_unlock(&kvm->srcu, srcu_idx); srcu_read_unlock(&kvm->srcu, srcu_idx);
up_read(&kvm->mm->mmap_sem); up_read(&kvm->mm->mmap_sem);
args->count = i;
args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
rr = copy_to_user((void __user *)args->values, res, args->count); if (kvm->arch.migration_mode)
if (rr) args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
r = -EFAULT; else
args->remaining = 0;
vfree(res); if (copy_to_user((void __user *)args->values, values, args->count))
return r; ret = -EFAULT;
vfree(values);
return ret;
} }
/* /*
...@@ -2192,10 +2251,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm) ...@@ -2192,10 +2251,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm_s390_destroy_adapters(kvm); kvm_s390_destroy_adapters(kvm);
kvm_s390_clear_float_irqs(kvm); kvm_s390_clear_float_irqs(kvm);
kvm_s390_vsie_destroy(kvm); kvm_s390_vsie_destroy(kvm);
if (kvm->arch.migration_state) {
vfree(kvm->arch.migration_state->pgste_bitmap);
kfree(kvm->arch.migration_state);
}
KVM_EVENT(3, "vm 0x%pK destroyed", kvm); KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
} }
...@@ -2353,6 +2408,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) ...@@ -2353,6 +2408,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC; vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
if (test_kvm_facility(vcpu->kvm, 133)) if (test_kvm_facility(vcpu->kvm, 133))
vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB; vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
if (test_kvm_facility(vcpu->kvm, 156))
vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
/* fprs can be synchronized via vrs, even if the guest has no vx. With /* fprs can be synchronized via vrs, even if the guest has no vx. With
* MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format. * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
*/ */
...@@ -2602,7 +2659,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) ...@@ -2602,7 +2659,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
} }
if (test_kvm_facility(vcpu->kvm, 139)) if (test_kvm_facility(vcpu->kvm, 139))
vcpu->arch.sie_block->ecd |= ECD_MEF; vcpu->arch.sie_block->ecd |= ECD_MEF;
if (test_kvm_facility(vcpu->kvm, 156))
vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
if (vcpu->arch.sie_block->gd) { if (vcpu->arch.sie_block->gd) {
vcpu->arch.sie_block->eca |= ECA_AIV; vcpu->arch.sie_block->eca |= ECA_AIV;
VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u", VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
...@@ -3520,6 +3578,7 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) ...@@ -3520,6 +3578,7 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
} }
preempt_enable(); preempt_enable();
} }
/* SIE will load etoken directly from SDNX and therefore kvm_run */
kvm_run->kvm_dirty_regs = 0; kvm_run->kvm_dirty_regs = 0;
} }
...@@ -3559,7 +3618,7 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) ...@@ -3559,7 +3618,7 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
__ctl_clear_bit(2, 4); __ctl_clear_bit(2, 4);
vcpu->arch.host_gscb = NULL; vcpu->arch.host_gscb = NULL;
} }
/* SIE will save etoken directly into SDNX and therefore kvm_run */
} }
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
......
...@@ -205,13 +205,10 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu) ...@@ -205,13 +205,10 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
int kvm_s390_skey_check_enable(struct kvm_vcpu *vcpu) int kvm_s390_skey_check_enable(struct kvm_vcpu *vcpu)
{ {
int rc; int rc;
struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block;
trace_kvm_s390_skey_related_inst(vcpu); trace_kvm_s390_skey_related_inst(vcpu);
/* Already enabled? */ /* Already enabled? */
if (vcpu->kvm->arch.use_skf && if (vcpu->arch.skey_enabled)
!(sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)) &&
!kvm_s390_test_cpuflags(vcpu, CPUSTAT_KSS))
return 0; return 0;
rc = s390_enable_skey(); rc = s390_enable_skey();
...@@ -222,9 +219,10 @@ int kvm_s390_skey_check_enable(struct kvm_vcpu *vcpu) ...@@ -222,9 +219,10 @@ int kvm_s390_skey_check_enable(struct kvm_vcpu *vcpu)
if (kvm_s390_test_cpuflags(vcpu, CPUSTAT_KSS)) if (kvm_s390_test_cpuflags(vcpu, CPUSTAT_KSS))
kvm_s390_clear_cpuflags(vcpu, CPUSTAT_KSS); kvm_s390_clear_cpuflags(vcpu, CPUSTAT_KSS);
if (!vcpu->kvm->arch.use_skf) if (!vcpu->kvm->arch.use_skf)
sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
else else
sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE); vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE);
vcpu->arch.skey_enabled = true;
return 0; return 0;
} }
...@@ -987,7 +985,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) ...@@ -987,7 +985,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) { if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
if (clear_user((void __user *)vmaddr, PAGE_SIZE)) if (kvm_clear_guest(vcpu->kvm, start, PAGE_SIZE))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
} }
...@@ -1024,9 +1022,11 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) ...@@ -1024,9 +1022,11 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
return 0; return 0;
} }
static inline int do_essa(struct kvm_vcpu *vcpu, const int orc) /*
* Must be called with relevant read locks held (kvm->mm->mmap_sem, kvm->srcu)
*/
static inline int __do_essa(struct kvm_vcpu *vcpu, const int orc)
{ {
struct kvm_s390_migration_state *ms = vcpu->kvm->arch.migration_state;
int r1, r2, nappended, entries; int r1, r2, nappended, entries;
unsigned long gfn, hva, res, pgstev, ptev; unsigned long gfn, hva, res, pgstev, ptev;
unsigned long *cbrlo; unsigned long *cbrlo;
...@@ -1076,10 +1076,12 @@ static inline int do_essa(struct kvm_vcpu *vcpu, const int orc) ...@@ -1076,10 +1076,12 @@ static inline int do_essa(struct kvm_vcpu *vcpu, const int orc)
cbrlo[entries] = gfn << PAGE_SHIFT; cbrlo[entries] = gfn << PAGE_SHIFT;
} }
if (orc && gfn < ms->bitmap_size) { if (orc) {
/* increment only if we are really flipping the bit to 1 */ struct kvm_memory_slot *ms = gfn_to_memslot(vcpu->kvm, gfn);
if (!test_and_set_bit(gfn, ms->pgste_bitmap))
atomic64_inc(&ms->dirty_pages); /* Increment only if we are really flipping the bit */
if (ms && !test_and_set_bit(gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
atomic64_inc(&vcpu->kvm->arch.cmma_dirty_pages);
} }
return nappended; return nappended;
...@@ -1108,7 +1110,7 @@ static int handle_essa(struct kvm_vcpu *vcpu) ...@@ -1108,7 +1110,7 @@ static int handle_essa(struct kvm_vcpu *vcpu)
: ESSA_SET_STABLE_IF_RESIDENT)) : ESSA_SET_STABLE_IF_RESIDENT))
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
if (likely(!vcpu->kvm->arch.migration_state)) { if (!vcpu->kvm->arch.migration_mode) {
/* /*
* CMMA is enabled in the KVM settings, but is disabled in * CMMA is enabled in the KVM settings, but is disabled in
* the SIE block and in the mm_context, and we are not doing * the SIE block and in the mm_context, and we are not doing
...@@ -1136,10 +1138,16 @@ static int handle_essa(struct kvm_vcpu *vcpu) ...@@ -1136,10 +1138,16 @@ static int handle_essa(struct kvm_vcpu *vcpu)
/* Retry the ESSA instruction */ /* Retry the ESSA instruction */
kvm_s390_retry_instr(vcpu); kvm_s390_retry_instr(vcpu);
} else { } else {
/* Account for the possible extra cbrl entry */ int srcu_idx;
i = do_essa(vcpu, orc);
down_read(&vcpu->kvm->mm->mmap_sem);
srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
i = __do_essa(vcpu, orc);
srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
up_read(&vcpu->kvm->mm->mmap_sem);
if (i < 0) if (i < 0)
return i; return i;
/* Account for the possible extra cbrl entry */
entries += i; entries += i;
} }
vcpu->arch.sie_block->cbrlo &= PAGE_MASK; /* reset nceo */ vcpu->arch.sie_block->cbrlo &= PAGE_MASK; /* reset nceo */
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
/* /*
* kvm nested virtualization support for s390x * kvm nested virtualization support for s390x
* *
* Copyright IBM Corp. 2016 * Copyright IBM Corp. 2016, 2018
* *
* Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com> * Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com>
*/ */
...@@ -378,6 +378,10 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) ...@@ -378,6 +378,10 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
if (test_kvm_facility(vcpu->kvm, 139)) if (test_kvm_facility(vcpu->kvm, 139))
scb_s->ecd |= scb_o->ecd & ECD_MEF; scb_s->ecd |= scb_o->ecd & ECD_MEF;
/* etoken */
if (test_kvm_facility(vcpu->kvm, 156))
scb_s->ecd |= scb_o->ecd & ECD_ETOKENF;
prepare_ibc(vcpu, vsie_page); prepare_ibc(vcpu, vsie_page);
rc = shadow_crycb(vcpu, vsie_page); rc = shadow_crycb(vcpu, vsie_page);
out: out:
...@@ -627,7 +631,8 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) ...@@ -627,7 +631,8 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
vsie_page->riccbd_gpa = gpa; vsie_page->riccbd_gpa = gpa;
scb_s->riccbd = hpa; scb_s->riccbd = hpa;
} }
if ((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) { if (((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) ||
(scb_s->ecd & ECD_ETOKENF)) {
unsigned long sdnxc; unsigned long sdnxc;
gpa = READ_ONCE(scb_o->sdnxo) & ~0xfUL; gpa = READ_ONCE(scb_o->sdnxo) & ~0xfUL;
...@@ -818,6 +823,8 @@ static int handle_stfle(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) ...@@ -818,6 +823,8 @@ static int handle_stfle(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
* - < 0 if an error occurred * - < 0 if an error occurred
*/ */
static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
__releases(vcpu->kvm->srcu)
__acquires(vcpu->kvm->srcu)
{ {
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
* numbering scheme from the Princples of Operations: most significant bit * numbering scheme from the Princples of Operations: most significant bit
* has bit number 0. * has bit number 0.
* *
* Copyright IBM Corp. 2015 * Copyright IBM Corp. 2015, 2018
* *
*/ */
...@@ -106,6 +106,7 @@ static struct facility_def facility_defs[] = { ...@@ -106,6 +106,7 @@ static struct facility_def facility_defs[] = {
.name = "FACILITIES_KVM_CPUMODEL", .name = "FACILITIES_KVM_CPUMODEL",
.bits = (int[]){ .bits = (int[]){
156, /* etoken facility */
-1 /* END */ -1 /* END */
} }
}, },
......
obj-y := hv_init.o mmu.o obj-y := hv_init.o mmu.o nested.o
obj-$(CONFIG_X86_64) += hv_apic.o obj-$(CONFIG_X86_64) += hv_apic.o
// SPDX-License-Identifier: GPL-2.0
/*
* Hyper-V nested virtualization code.
*
* Copyright (C) 2018, Microsoft, Inc.
*
* Author : Lan Tianyu <Tianyu.Lan@microsoft.com>
*/
#include <linux/types.h>
#include <asm/hyperv-tlfs.h>
#include <asm/mshyperv.h>
#include <asm/tlbflush.h>
#include <asm/trace/hyperv.h>
int hyperv_flush_guest_mapping(u64 as)
{
struct hv_guest_mapping_flush **flush_pcpu;
struct hv_guest_mapping_flush *flush;
u64 status;
unsigned long flags;
int ret = -ENOTSUPP;
if (!hv_hypercall_pg)
goto fault;
local_irq_save(flags);
flush_pcpu = (struct hv_guest_mapping_flush **)
this_cpu_ptr(hyperv_pcpu_input_arg);
flush = *flush_pcpu;
if (unlikely(!flush)) {
local_irq_restore(flags);
goto fault;
}
flush->address_space = as;
flush->flags = 0;
status = hv_do_hypercall(HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE,
flush, NULL);
local_irq_restore(flags);
if (!(status & HV_HYPERCALL_RESULT_MASK))
ret = 0;
fault:
trace_hyperv_nested_flush_guest_mapping(as, ret);
return ret;
}
EXPORT_SYMBOL_GPL(hyperv_flush_guest_mapping);
...@@ -310,6 +310,7 @@ struct ms_hyperv_tsc_page { ...@@ -310,6 +310,7 @@ struct ms_hyperv_tsc_page {
#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106 #define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106
/* Nested features (CPUID 0x4000000A) EAX */ /* Nested features (CPUID 0x4000000A) EAX */
#define HV_X64_NESTED_GUEST_MAPPING_FLUSH BIT(18)
#define HV_X64_NESTED_MSR_BITMAP BIT(19) #define HV_X64_NESTED_MSR_BITMAP BIT(19)
struct hv_reenlightenment_control { struct hv_reenlightenment_control {
...@@ -351,6 +352,7 @@ struct hv_tsc_emulation_status { ...@@ -351,6 +352,7 @@ struct hv_tsc_emulation_status {
#define HVCALL_SEND_IPI_EX 0x0015 #define HVCALL_SEND_IPI_EX 0x0015
#define HVCALL_POST_MESSAGE 0x005c #define HVCALL_POST_MESSAGE 0x005c
#define HVCALL_SIGNAL_EVENT 0x005d #define HVCALL_SIGNAL_EVENT 0x005d
#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE 0x00000001 #define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE 0x00000001
#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT 12 #define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT 12
...@@ -742,6 +744,12 @@ struct ipi_arg_ex { ...@@ -742,6 +744,12 @@ struct ipi_arg_ex {
struct hv_vpset vp_set; struct hv_vpset vp_set;
}; };
/* HvFlushGuestPhysicalAddressSpace hypercalls */
struct hv_guest_mapping_flush {
u64 address_space;
u64 flags;
};
/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */ /* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
struct hv_tlb_flush { struct hv_tlb_flush {
u64 address_space; u64 address_space;
......
...@@ -55,6 +55,7 @@ ...@@ -55,6 +55,7 @@
#define KVM_REQ_TRIPLE_FAULT KVM_ARCH_REQ(2) #define KVM_REQ_TRIPLE_FAULT KVM_ARCH_REQ(2)
#define KVM_REQ_MMU_SYNC KVM_ARCH_REQ(3) #define KVM_REQ_MMU_SYNC KVM_ARCH_REQ(3)
#define KVM_REQ_CLOCK_UPDATE KVM_ARCH_REQ(4) #define KVM_REQ_CLOCK_UPDATE KVM_ARCH_REQ(4)
#define KVM_REQ_LOAD_CR3 KVM_ARCH_REQ(5)
#define KVM_REQ_EVENT KVM_ARCH_REQ(6) #define KVM_REQ_EVENT KVM_ARCH_REQ(6)
#define KVM_REQ_APF_HALT KVM_ARCH_REQ(7) #define KVM_REQ_APF_HALT KVM_ARCH_REQ(7)
#define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(8) #define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(8)
...@@ -76,13 +77,13 @@ ...@@ -76,13 +77,13 @@
#define KVM_REQ_HV_EXIT KVM_ARCH_REQ(21) #define KVM_REQ_HV_EXIT KVM_ARCH_REQ(21)
#define KVM_REQ_HV_STIMER KVM_ARCH_REQ(22) #define KVM_REQ_HV_STIMER KVM_ARCH_REQ(22)
#define KVM_REQ_LOAD_EOI_EXITMAP KVM_ARCH_REQ(23) #define KVM_REQ_LOAD_EOI_EXITMAP KVM_ARCH_REQ(23)
#define KVM_REQ_GET_VMCS12_PAGES KVM_ARCH_REQ(24)
#define CR0_RESERVED_BITS \ #define CR0_RESERVED_BITS \
(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
| X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
| X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
#define CR3_PCID_INVD BIT_64(63)
#define CR4_RESERVED_BITS \ #define CR4_RESERVED_BITS \
(~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
| X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
...@@ -326,6 +327,16 @@ struct rsvd_bits_validate { ...@@ -326,6 +327,16 @@ struct rsvd_bits_validate {
u64 bad_mt_xwr; u64 bad_mt_xwr;
}; };
struct kvm_mmu_root_info {
gpa_t cr3;
hpa_t hpa;
};
#define KVM_MMU_ROOT_INFO_INVALID \
((struct kvm_mmu_root_info) { .cr3 = INVALID_PAGE, .hpa = INVALID_PAGE })
#define KVM_MMU_NUM_PREV_ROOTS 3
/* /*
* x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit, * x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit,
* and 2-level 32-bit). The kvm_mmu structure abstracts the details of the * and 2-level 32-bit). The kvm_mmu structure abstracts the details of the
...@@ -345,7 +356,7 @@ struct kvm_mmu { ...@@ -345,7 +356,7 @@ struct kvm_mmu {
struct x86_exception *exception); struct x86_exception *exception);
int (*sync_page)(struct kvm_vcpu *vcpu, int (*sync_page)(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp); struct kvm_mmu_page *sp);
void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva); void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa);
void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
u64 *spte, const void *pte); u64 *spte, const void *pte);
hpa_t root_hpa; hpa_t root_hpa;
...@@ -354,6 +365,7 @@ struct kvm_mmu { ...@@ -354,6 +365,7 @@ struct kvm_mmu {
u8 shadow_root_level; u8 shadow_root_level;
u8 ept_ad; u8 ept_ad;
bool direct_map; bool direct_map;
struct kvm_mmu_root_info prev_roots[KVM_MMU_NUM_PREV_ROOTS];
/* /*
* Bitmap; bit set = permission fault * Bitmap; bit set = permission fault
...@@ -978,6 +990,15 @@ struct kvm_x86_ops { ...@@ -978,6 +990,15 @@ struct kvm_x86_ops {
void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
void (*tlb_flush)(struct kvm_vcpu *vcpu, bool invalidate_gpa); void (*tlb_flush)(struct kvm_vcpu *vcpu, bool invalidate_gpa);
int (*tlb_remote_flush)(struct kvm *kvm);
/*
* Flush any TLB entries associated with the given GVA.
* Does not need to flush GPA->HPA mappings.
* Can potentially get non-canonical addresses through INVLPGs, which
* the implementation may choose to ignore if appropriate.
*/
void (*tlb_flush_gva)(struct kvm_vcpu *vcpu, gva_t addr);
void (*run)(struct kvm_vcpu *vcpu); void (*run)(struct kvm_vcpu *vcpu);
int (*handle_exit)(struct kvm_vcpu *vcpu); int (*handle_exit)(struct kvm_vcpu *vcpu);
...@@ -1090,6 +1111,14 @@ struct kvm_x86_ops { ...@@ -1090,6 +1111,14 @@ struct kvm_x86_ops {
void (*setup_mce)(struct kvm_vcpu *vcpu); void (*setup_mce)(struct kvm_vcpu *vcpu);
int (*get_nested_state)(struct kvm_vcpu *vcpu,
struct kvm_nested_state __user *user_kvm_nested_state,
unsigned user_data_size);
int (*set_nested_state)(struct kvm_vcpu *vcpu,
struct kvm_nested_state __user *user_kvm_nested_state,
struct kvm_nested_state *kvm_state);
void (*get_vmcs12_pages)(struct kvm_vcpu *vcpu);
int (*smi_allowed)(struct kvm_vcpu *vcpu); int (*smi_allowed)(struct kvm_vcpu *vcpu);
int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate); int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
int (*pre_leave_smm)(struct kvm_vcpu *vcpu, u64 smbase); int (*pre_leave_smm)(struct kvm_vcpu *vcpu, u64 smbase);
...@@ -1122,6 +1151,16 @@ static inline void kvm_arch_free_vm(struct kvm *kvm) ...@@ -1122,6 +1151,16 @@ static inline void kvm_arch_free_vm(struct kvm *kvm)
return kvm_x86_ops->vm_free(kvm); return kvm_x86_ops->vm_free(kvm);
} }
#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
{
if (kvm_x86_ops->tlb_remote_flush &&
!kvm_x86_ops->tlb_remote_flush(kvm))
return 0;
else
return -ENOTSUPP;
}
int kvm_mmu_module_init(void); int kvm_mmu_module_init(void);
void kvm_mmu_module_exit(void); void kvm_mmu_module_exit(void);
...@@ -1273,6 +1312,10 @@ static inline int __kvm_irq_line_state(unsigned long *irq_state, ...@@ -1273,6 +1312,10 @@ static inline int __kvm_irq_line_state(unsigned long *irq_state,
return !!(*irq_state); return !!(*irq_state);
} }
#define KVM_MMU_ROOT_CURRENT BIT(0)
#define KVM_MMU_ROOT_PREVIOUS(i) BIT(1+i)
#define KVM_MMU_ROOTS_ALL (~0UL)
int kvm_pic_set_irq(struct kvm_pic *pic, int irq, int irq_source_id, int level); int kvm_pic_set_irq(struct kvm_pic *pic, int irq, int irq_source_id, int level);
void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id); void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id);
...@@ -1284,7 +1327,7 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); ...@@ -1284,7 +1327,7 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
int kvm_mmu_load(struct kvm_vcpu *vcpu); int kvm_mmu_load(struct kvm_vcpu *vcpu);
void kvm_mmu_unload(struct kvm_vcpu *vcpu); void kvm_mmu_unload(struct kvm_vcpu *vcpu);
void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
void kvm_mmu_free_roots(struct kvm_vcpu *vcpu); void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, ulong roots_to_free);
gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
struct x86_exception *exception); struct x86_exception *exception);
gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
...@@ -1303,7 +1346,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); ...@@ -1303,7 +1346,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u64 error_code, int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u64 error_code,
void *insn, int insn_len); void *insn, int insn_len);
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu); void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid);
void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, bool skip_tlb_flush);
void kvm_enable_tdp(void); void kvm_enable_tdp(void);
void kvm_disable_tdp(void); void kvm_disable_tdp(void);
...@@ -1418,6 +1462,10 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v); ...@@ -1418,6 +1462,10 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event); void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu); void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu);
int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
unsigned long ipi_bitmap_high, int min,
unsigned long icr, int op_64_bit);
u64 kvm_get_arch_capabilities(void); u64 kvm_get_arch_capabilities(void);
void kvm_define_shared_msr(unsigned index, u32 msr); void kvm_define_shared_msr(unsigned index, u32 msr);
int kvm_set_shared_msr(unsigned index, u64 val, u64 mask); int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
......
...@@ -347,6 +347,7 @@ void hyperv_reenlightenment_intr(struct pt_regs *regs); ...@@ -347,6 +347,7 @@ void hyperv_reenlightenment_intr(struct pt_regs *regs);
void set_hv_tscchange_cb(void (*cb)(void)); void set_hv_tscchange_cb(void (*cb)(void));
void clear_hv_tscchange_cb(void); void clear_hv_tscchange_cb(void);
void hyperv_stop_tsc_emulation(void); void hyperv_stop_tsc_emulation(void);
int hyperv_flush_guest_mapping(u64 as);
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
void hv_apic_init(void); void hv_apic_init(void);
...@@ -366,6 +367,7 @@ static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu) ...@@ -366,6 +367,7 @@ static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu)
{ {
return NULL; return NULL;
} }
static inline int hyperv_flush_guest_mapping(u64 as) { return -1; }
#endif /* CONFIG_HYPERV */ #endif /* CONFIG_HYPERV */
#ifdef CONFIG_HYPERV_TSCPAGE #ifdef CONFIG_HYPERV_TSCPAGE
......
...@@ -28,6 +28,20 @@ TRACE_EVENT(hyperv_mmu_flush_tlb_others, ...@@ -28,6 +28,20 @@ TRACE_EVENT(hyperv_mmu_flush_tlb_others,
__entry->addr, __entry->end) __entry->addr, __entry->end)
); );
TRACE_EVENT(hyperv_nested_flush_guest_mapping,
TP_PROTO(u64 as, int ret),
TP_ARGS(as, ret),
TP_STRUCT__entry(
__field(u64, as)
__field(int, ret)
),
TP_fast_assign(__entry->as = as;
__entry->ret = ret;
),
TP_printk("address space %llx ret %d", __entry->as, __entry->ret)
);
TRACE_EVENT(hyperv_send_ipi_mask, TRACE_EVENT(hyperv_send_ipi_mask,
TP_PROTO(const struct cpumask *cpus, TP_PROTO(const struct cpumask *cpus,
int vector), int vector),
......
...@@ -378,4 +378,41 @@ struct kvm_sync_regs { ...@@ -378,4 +378,41 @@ struct kvm_sync_regs {
#define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0) #define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0)
#define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1) #define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1)
#define KVM_STATE_NESTED_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_RUN_PENDING 0x00000002
#define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_SMM_VMXON 0x00000002
struct kvm_vmx_nested_state {
__u64 vmxon_pa;
__u64 vmcs_pa;
struct {
__u16 flags;
} smm;
};
/* for KVM_CAP_NESTED_STATE */
struct kvm_nested_state {
/* KVM_STATE_* flags */
__u16 flags;
/* 0 for VMX, 1 for SVM. */
__u16 format;
/* 128 for SVM, 128 + VMCS size for VMX. */
__u32 size;
union {
/* VMXON, VMCS */
struct kvm_vmx_nested_state vmx;
/* Pad the header to 128 bytes. */
__u8 pad[120];
};
__u8 data[0];
};
#endif /* _ASM_X86_KVM_H */ #endif /* _ASM_X86_KVM_H */
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#define KVM_FEATURE_PV_UNHALT 7 #define KVM_FEATURE_PV_UNHALT 7
#define KVM_FEATURE_PV_TLB_FLUSH 9 #define KVM_FEATURE_PV_TLB_FLUSH 9
#define KVM_FEATURE_ASYNC_PF_VMEXIT 10 #define KVM_FEATURE_ASYNC_PF_VMEXIT 10
#define KVM_FEATURE_PV_SEND_IPI 11
#define KVM_HINTS_REALTIME 0 #define KVM_HINTS_REALTIME 0
......
...@@ -444,6 +444,98 @@ static void __init sev_map_percpu_data(void) ...@@ -444,6 +444,98 @@ static void __init sev_map_percpu_data(void)
} }
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
#define KVM_IPI_CLUSTER_SIZE (2 * BITS_PER_LONG)
static void __send_ipi_mask(const struct cpumask *mask, int vector)
{
unsigned long flags;
int cpu, apic_id, icr;
int min = 0, max = 0;
#ifdef CONFIG_X86_64
__uint128_t ipi_bitmap = 0;
#else
u64 ipi_bitmap = 0;
#endif
if (cpumask_empty(mask))
return;
local_irq_save(flags);
switch (vector) {
default:
icr = APIC_DM_FIXED | vector;
break;
case NMI_VECTOR:
icr = APIC_DM_NMI;
break;
}
for_each_cpu(cpu, mask) {
apic_id = per_cpu(x86_cpu_to_apicid, cpu);
if (!ipi_bitmap) {
min = max = apic_id;
} else if (apic_id < min && max - apic_id < KVM_IPI_CLUSTER_SIZE) {
ipi_bitmap <<= min - apic_id;
min = apic_id;
} else if (apic_id < min + KVM_IPI_CLUSTER_SIZE) {
max = apic_id < max ? max : apic_id;
} else {
kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
(unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr);
min = max = apic_id;
ipi_bitmap = 0;
}
__set_bit(apic_id - min, (unsigned long *)&ipi_bitmap);
}
if (ipi_bitmap) {
kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
(unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr);
}
local_irq_restore(flags);
}
static void kvm_send_ipi_mask(const struct cpumask *mask, int vector)
{
__send_ipi_mask(mask, vector);
}
static void kvm_send_ipi_mask_allbutself(const struct cpumask *mask, int vector)
{
unsigned int this_cpu = smp_processor_id();
struct cpumask new_mask;
const struct cpumask *local_mask;
cpumask_copy(&new_mask, mask);
cpumask_clear_cpu(this_cpu, &new_mask);
local_mask = &new_mask;
__send_ipi_mask(local_mask, vector);
}
static void kvm_send_ipi_allbutself(int vector)
{
kvm_send_ipi_mask_allbutself(cpu_online_mask, vector);
}
static void kvm_send_ipi_all(int vector)
{
__send_ipi_mask(cpu_online_mask, vector);
}
/*
* Set the IPI entry points
*/
static void kvm_setup_pv_ipi(void)
{
apic->send_IPI_mask = kvm_send_ipi_mask;
apic->send_IPI_mask_allbutself = kvm_send_ipi_mask_allbutself;
apic->send_IPI_allbutself = kvm_send_ipi_allbutself;
apic->send_IPI_all = kvm_send_ipi_all;
pr_info("KVM setup pv IPIs\n");
}
static void __init kvm_smp_prepare_cpus(unsigned int max_cpus) static void __init kvm_smp_prepare_cpus(unsigned int max_cpus)
{ {
native_smp_prepare_cpus(max_cpus); native_smp_prepare_cpus(max_cpus);
...@@ -611,13 +703,27 @@ static uint32_t __init kvm_detect(void) ...@@ -611,13 +703,27 @@ static uint32_t __init kvm_detect(void)
return kvm_cpuid_base(); return kvm_cpuid_base();
} }
static void __init kvm_apic_init(void)
{
#if defined(CONFIG_SMP)
if (kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI))
kvm_setup_pv_ipi();
#endif
}
static void __init kvm_init_platform(void)
{
kvmclock_init();
x86_platform.apic_post_init = kvm_apic_init;
}
const __initconst struct hypervisor_x86 x86_hyper_kvm = { const __initconst struct hypervisor_x86 x86_hyper_kvm = {
.name = "KVM", .name = "KVM",
.detect = kvm_detect, .detect = kvm_detect,
.type = X86_HYPER_KVM, .type = X86_HYPER_KVM,
.init.init_platform = kvmclock_init,
.init.guest_late_init = kvm_guest_init, .init.guest_late_init = kvm_guest_init,
.init.x2apic_available = kvm_para_available, .init.x2apic_available = kvm_para_available,
.init.init_platform = kvm_init_platform,
}; };
static __init int activate_jump_labels(void) static __init int activate_jump_labels(void)
...@@ -736,6 +842,10 @@ void __init kvm_spinlock_init(void) ...@@ -736,6 +842,10 @@ void __init kvm_spinlock_init(void)
if (kvm_para_has_hint(KVM_HINTS_REALTIME)) if (kvm_para_has_hint(KVM_HINTS_REALTIME))
return; return;
/* Don't use the pvqspinlock code if there is only 1 vCPU. */
if (num_possible_cpus() == 1)
return;
__pv_init_lock_hash(); __pv_init_lock_hash();
pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath; pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock); pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
......
...@@ -621,7 +621,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, ...@@ -621,7 +621,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
(1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) |
(1 << KVM_FEATURE_PV_UNHALT) | (1 << KVM_FEATURE_PV_UNHALT) |
(1 << KVM_FEATURE_PV_TLB_FLUSH) | (1 << KVM_FEATURE_PV_TLB_FLUSH) |
(1 << KVM_FEATURE_ASYNC_PF_VMEXIT); (1 << KVM_FEATURE_ASYNC_PF_VMEXIT) |
(1 << KVM_FEATURE_PV_SEND_IPI);
if (sched_info_on()) if (sched_info_on())
entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
......
...@@ -4191,7 +4191,7 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt) ...@@ -4191,7 +4191,7 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt)
maxphyaddr = 36; maxphyaddr = 36;
rsvd = rsvd_bits(maxphyaddr, 63); rsvd = rsvd_bits(maxphyaddr, 63);
if (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_PCIDE) if (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_PCIDE)
rsvd &= ~CR3_PCID_INVD; rsvd &= ~X86_CR3_PCID_NOFLUSH;
} }
if (new_val & rsvd) if (new_val & rsvd)
......
...@@ -235,7 +235,7 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic, ...@@ -235,7 +235,7 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
struct kvm_vcpu *vcpu = synic_to_vcpu(synic); struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
int ret; int ret;
if (!synic->active) if (!synic->active && !host)
return 1; return 1;
trace_kvm_hv_synic_set_msr(vcpu->vcpu_id, msr, data, host); trace_kvm_hv_synic_set_msr(vcpu->vcpu_id, msr, data, host);
...@@ -295,11 +295,12 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic, ...@@ -295,11 +295,12 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
return ret; return ret;
} }
static int synic_get_msr(struct kvm_vcpu_hv_synic *synic, u32 msr, u64 *pdata) static int synic_get_msr(struct kvm_vcpu_hv_synic *synic, u32 msr, u64 *pdata,
bool host)
{ {
int ret; int ret;
if (!synic->active) if (!synic->active && !host)
return 1; return 1;
ret = 0; ret = 0;
...@@ -1014,6 +1015,11 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, ...@@ -1014,6 +1015,11 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
case HV_X64_MSR_TSC_EMULATION_STATUS: case HV_X64_MSR_TSC_EMULATION_STATUS:
hv->hv_tsc_emulation_status = data; hv->hv_tsc_emulation_status = data;
break; break;
case HV_X64_MSR_TIME_REF_COUNT:
/* read-only, but still ignore it if host-initiated */
if (!host)
return 1;
break;
default: default:
vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n", vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
msr, data); msr, data);
...@@ -1101,6 +1107,12 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) ...@@ -1101,6 +1107,12 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
return stimer_set_count(vcpu_to_stimer(vcpu, timer_index), return stimer_set_count(vcpu_to_stimer(vcpu, timer_index),
data, host); data, host);
} }
case HV_X64_MSR_TSC_FREQUENCY:
case HV_X64_MSR_APIC_FREQUENCY:
/* read-only, but still ignore it if host-initiated */
if (!host)
return 1;
break;
default: default:
vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n", vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
msr, data); msr, data);
...@@ -1156,7 +1168,8 @@ static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) ...@@ -1156,7 +1168,8 @@ static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
return 0; return 0;
} }
static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata,
bool host)
{ {
u64 data = 0; u64 data = 0;
struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv; struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
...@@ -1183,7 +1196,7 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) ...@@ -1183,7 +1196,7 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
case HV_X64_MSR_SIMP: case HV_X64_MSR_SIMP:
case HV_X64_MSR_EOM: case HV_X64_MSR_EOM:
case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
return synic_get_msr(vcpu_to_synic(vcpu), msr, pdata); return synic_get_msr(vcpu_to_synic(vcpu), msr, pdata, host);
case HV_X64_MSR_STIMER0_CONFIG: case HV_X64_MSR_STIMER0_CONFIG:
case HV_X64_MSR_STIMER1_CONFIG: case HV_X64_MSR_STIMER1_CONFIG:
case HV_X64_MSR_STIMER2_CONFIG: case HV_X64_MSR_STIMER2_CONFIG:
...@@ -1229,7 +1242,7 @@ int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) ...@@ -1229,7 +1242,7 @@ int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
return kvm_hv_set_msr(vcpu, msr, data, host); return kvm_hv_set_msr(vcpu, msr, data, host);
} }
int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
{ {
if (kvm_hv_msr_partition_wide(msr)) { if (kvm_hv_msr_partition_wide(msr)) {
int r; int r;
...@@ -1239,7 +1252,7 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) ...@@ -1239,7 +1252,7 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock); mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock);
return r; return r;
} else } else
return kvm_hv_get_msr(vcpu, msr, pdata); return kvm_hv_get_msr(vcpu, msr, pdata, host);
} }
static __always_inline int get_sparse_bank_no(u64 valid_bank_mask, int bank_no) static __always_inline int get_sparse_bank_no(u64 valid_bank_mask, int bank_no)
......
...@@ -48,7 +48,7 @@ static inline struct kvm_vcpu *synic_to_vcpu(struct kvm_vcpu_hv_synic *synic) ...@@ -48,7 +48,7 @@ static inline struct kvm_vcpu *synic_to_vcpu(struct kvm_vcpu_hv_synic *synic)
} }
int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host); int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host);
int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host);
bool kvm_hv_hypercall_enabled(struct kvm *kvm); bool kvm_hv_hypercall_enabled(struct kvm *kvm);
int kvm_hv_hypercall(struct kvm_vcpu *vcpu); int kvm_hv_hypercall(struct kvm_vcpu *vcpu);
......
...@@ -547,6 +547,46 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, ...@@ -547,6 +547,46 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
irq->level, irq->trig_mode, dest_map); irq->level, irq->trig_mode, dest_map);
} }
int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
unsigned long ipi_bitmap_high, int min,
unsigned long icr, int op_64_bit)
{
int i;
struct kvm_apic_map *map;
struct kvm_vcpu *vcpu;
struct kvm_lapic_irq irq = {0};
int cluster_size = op_64_bit ? 64 : 32;
int count = 0;
irq.vector = icr & APIC_VECTOR_MASK;
irq.delivery_mode = icr & APIC_MODE_MASK;
irq.level = (icr & APIC_INT_ASSERT) != 0;
irq.trig_mode = icr & APIC_INT_LEVELTRIG;
if (icr & APIC_DEST_MASK)
return -KVM_EINVAL;
if (icr & APIC_SHORT_MASK)
return -KVM_EINVAL;
rcu_read_lock();
map = rcu_dereference(kvm->arch.apic_map);
/* Bits above cluster_size are masked in the caller. */
for_each_set_bit(i, &ipi_bitmap_low, BITS_PER_LONG) {
vcpu = map->phys_map[min + i]->vcpu;
count += kvm_apic_set_irq(vcpu, &irq, NULL);
}
min += cluster_size;
for_each_set_bit(i, &ipi_bitmap_high, BITS_PER_LONG) {
vcpu = map->phys_map[min + i]->vcpu;
count += kvm_apic_set_irq(vcpu, &irq, NULL);
}
rcu_read_unlock();
return count;
}
static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val) static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
{ {
......
此差异已折叠。
...@@ -61,9 +61,10 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value); ...@@ -61,9 +61,10 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value);
void void
reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context); reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
void kvm_init_mmu(struct kvm_vcpu *vcpu, bool reset_roots);
void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu); void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu);
void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
bool accessed_dirty); bool accessed_dirty, gpa_t new_eptp);
bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu); bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu);
int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
u64 fault_address, char *insn, int insn_len); u64 fault_address, char *insn, int insn_len);
...@@ -85,6 +86,27 @@ static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu) ...@@ -85,6 +86,27 @@ static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
return kvm_mmu_load(vcpu); return kvm_mmu_load(vcpu);
} }
static inline unsigned long kvm_get_pcid(struct kvm_vcpu *vcpu, gpa_t cr3)
{
BUILD_BUG_ON((X86_CR3_PCID_MASK & PAGE_MASK) != 0);
return kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE)
? cr3 & X86_CR3_PCID_MASK
: 0;
}
static inline unsigned long kvm_get_active_pcid(struct kvm_vcpu *vcpu)
{
return kvm_get_pcid(vcpu, kvm_read_cr3(vcpu));
}
static inline void kvm_mmu_load_cr3(struct kvm_vcpu *vcpu)
{
if (VALID_PAGE(vcpu->arch.mmu.root_hpa))
vcpu->arch.mmu.set_cr3(vcpu, vcpu->arch.mmu.root_hpa |
kvm_get_active_pcid(vcpu));
}
/* /*
* Currently, we have two sorts of write-protection, a) the first one * Currently, we have two sorts of write-protection, a) the first one
* write-protects guest page to sync the guest modification, b) another one is * write-protects guest page to sync the guest modification, b) another one is
......
...@@ -181,7 +181,7 @@ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu, ...@@ -181,7 +181,7 @@ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
* set bit 0 if execute only is supported. Here, we repurpose ACC_USER_MASK * set bit 0 if execute only is supported. Here, we repurpose ACC_USER_MASK
* to signify readability since it isn't used in the EPT case * to signify readability since it isn't used in the EPT case
*/ */
static inline unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, u64 gpte) static inline unsigned FNAME(gpte_access)(u64 gpte)
{ {
unsigned access; unsigned access;
#if PTTYPE == PTTYPE_EPT #if PTTYPE == PTTYPE_EPT
...@@ -394,8 +394,8 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, ...@@ -394,8 +394,8 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
accessed_dirty = have_ad ? pte_access & PT_GUEST_ACCESSED_MASK : 0; accessed_dirty = have_ad ? pte_access & PT_GUEST_ACCESSED_MASK : 0;
/* Convert to ACC_*_MASK flags for struct guest_walker. */ /* Convert to ACC_*_MASK flags for struct guest_walker. */
walker->pt_access = FNAME(gpte_access)(vcpu, pt_access ^ walk_nx_mask); walker->pt_access = FNAME(gpte_access)(pt_access ^ walk_nx_mask);
walker->pte_access = FNAME(gpte_access)(vcpu, pte_access ^ walk_nx_mask); walker->pte_access = FNAME(gpte_access)(pte_access ^ walk_nx_mask);
errcode = permission_fault(vcpu, mmu, walker->pte_access, pte_pkey, access); errcode = permission_fault(vcpu, mmu, walker->pte_access, pte_pkey, access);
if (unlikely(errcode)) if (unlikely(errcode))
goto error; goto error;
...@@ -508,7 +508,7 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, ...@@ -508,7 +508,7 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
gfn = gpte_to_gfn(gpte); gfn = gpte_to_gfn(gpte);
pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); pte_access = sp->role.access & FNAME(gpte_access)(gpte);
FNAME(protect_clean_gpte)(&vcpu->arch.mmu, &pte_access, gpte); FNAME(protect_clean_gpte)(&vcpu->arch.mmu, &pte_access, gpte);
pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn, pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
no_dirty_log && (pte_access & ACC_WRITE_MASK)); no_dirty_log && (pte_access & ACC_WRITE_MASK));
...@@ -856,7 +856,7 @@ static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp) ...@@ -856,7 +856,7 @@ static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp)
return gfn_to_gpa(sp->gfn) + offset * sizeof(pt_element_t); return gfn_to_gpa(sp->gfn) + offset * sizeof(pt_element_t);
} }
static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa)
{ {
struct kvm_shadow_walk_iterator iterator; struct kvm_shadow_walk_iterator iterator;
struct kvm_mmu_page *sp; struct kvm_mmu_page *sp;
...@@ -871,13 +871,13 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) ...@@ -871,13 +871,13 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
*/ */
mmu_topup_memory_caches(vcpu); mmu_topup_memory_caches(vcpu);
if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) { if (!VALID_PAGE(root_hpa)) {
WARN_ON(1); WARN_ON(1);
return; return;
} }
spin_lock(&vcpu->kvm->mmu_lock); spin_lock(&vcpu->kvm->mmu_lock);
for_each_shadow_entry(vcpu, gva, iterator) { for_each_shadow_entry_using_root(vcpu, root_hpa, gva, iterator) {
level = iterator.level; level = iterator.level;
sptep = iterator.sptep; sptep = iterator.sptep;
...@@ -968,6 +968,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) ...@@ -968,6 +968,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
int i, nr_present = 0; int i, nr_present = 0;
bool host_writable; bool host_writable;
gpa_t first_pte_gpa; gpa_t first_pte_gpa;
int set_spte_ret = 0;
/* direct kvm_mmu_page can not be unsync. */ /* direct kvm_mmu_page can not be unsync. */
BUG_ON(sp->role.direct); BUG_ON(sp->role.direct);
...@@ -1002,7 +1003,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) ...@@ -1002,7 +1003,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
gfn = gpte_to_gfn(gpte); gfn = gpte_to_gfn(gpte);
pte_access = sp->role.access; pte_access = sp->role.access;
pte_access &= FNAME(gpte_access)(vcpu, gpte); pte_access &= FNAME(gpte_access)(gpte);
FNAME(protect_clean_gpte)(&vcpu->arch.mmu, &pte_access, gpte); FNAME(protect_clean_gpte)(&vcpu->arch.mmu, &pte_access, gpte);
if (sync_mmio_spte(vcpu, &sp->spt[i], gfn, pte_access, if (sync_mmio_spte(vcpu, &sp->spt[i], gfn, pte_access,
...@@ -1024,12 +1025,15 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) ...@@ -1024,12 +1025,15 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
host_writable = sp->spt[i] & SPTE_HOST_WRITEABLE; host_writable = sp->spt[i] & SPTE_HOST_WRITEABLE;
set_spte(vcpu, &sp->spt[i], pte_access, set_spte_ret |= set_spte(vcpu, &sp->spt[i],
PT_PAGE_TABLE_LEVEL, gfn, pte_access, PT_PAGE_TABLE_LEVEL,
spte_to_pfn(sp->spt[i]), true, false, gfn, spte_to_pfn(sp->spt[i]),
host_writable); true, false, host_writable);
} }
if (set_spte_ret & SET_SPTE_NEED_REMOTE_TLB_FLUSH)
kvm_flush_remote_tlbs(vcpu->kvm);
return nr_present; return nr_present;
} }
......
...@@ -2884,7 +2884,6 @@ static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu, ...@@ -2884,7 +2884,6 @@ static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu,
svm->vmcb->control.nested_cr3 = __sme_set(root); svm->vmcb->control.nested_cr3 = __sme_set(root);
mark_dirty(svm->vmcb, VMCB_NPT); mark_dirty(svm->vmcb, VMCB_NPT);
svm_flush_tlb(vcpu, true);
} }
static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu, static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
...@@ -5435,6 +5434,13 @@ static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa) ...@@ -5435,6 +5434,13 @@ static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
svm->asid_generation--; svm->asid_generation--;
} }
static void svm_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t gva)
{
struct vcpu_svm *svm = to_svm(vcpu);
invlpga(gva, svm->vmcb->control.asid);
}
static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu) static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
{ {
} }
...@@ -5766,7 +5772,6 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) ...@@ -5766,7 +5772,6 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
svm->vmcb->save.cr3 = __sme_set(root); svm->vmcb->save.cr3 = __sme_set(root);
mark_dirty(svm->vmcb, VMCB_CR); mark_dirty(svm->vmcb, VMCB_CR);
svm_flush_tlb(vcpu, true);
} }
static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root) static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root)
...@@ -5779,8 +5784,6 @@ static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root) ...@@ -5779,8 +5784,6 @@ static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root)
/* Also sync guest cr3 here in case we live migrate */ /* Also sync guest cr3 here in case we live migrate */
svm->vmcb->save.cr3 = kvm_read_cr3(vcpu); svm->vmcb->save.cr3 = kvm_read_cr3(vcpu);
mark_dirty(svm->vmcb, VMCB_CR); mark_dirty(svm->vmcb, VMCB_CR);
svm_flush_tlb(vcpu, true);
} }
static int is_disabled(void) static int is_disabled(void)
...@@ -7090,6 +7093,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { ...@@ -7090,6 +7093,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.set_rflags = svm_set_rflags, .set_rflags = svm_set_rflags,
.tlb_flush = svm_flush_tlb, .tlb_flush = svm_flush_tlb,
.tlb_flush_gva = svm_flush_tlb_gva,
.run = svm_vcpu_run, .run = svm_vcpu_run,
.handle_exit = handle_exit, .handle_exit = handle_exit,
......
此差异已折叠。
...@@ -848,16 +848,21 @@ EXPORT_SYMBOL_GPL(kvm_set_cr4); ...@@ -848,16 +848,21 @@ EXPORT_SYMBOL_GPL(kvm_set_cr4);
int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
{ {
bool skip_tlb_flush = false;
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
bool pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE); bool pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
if (pcid_enabled) if (pcid_enabled) {
cr3 &= ~CR3_PCID_INVD; skip_tlb_flush = cr3 & X86_CR3_PCID_NOFLUSH;
cr3 &= ~X86_CR3_PCID_NOFLUSH;
}
#endif #endif
if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) { if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
kvm_mmu_sync_roots(vcpu); if (!skip_tlb_flush) {
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); kvm_mmu_sync_roots(vcpu);
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
}
return 0; return 0;
} }
...@@ -868,9 +873,10 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) ...@@ -868,9 +873,10 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
return 1; return 1;
kvm_mmu_new_cr3(vcpu, cr3, skip_tlb_flush);
vcpu->arch.cr3 = cr3; vcpu->arch.cr3 = cr3;
__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
kvm_mmu_new_cr3(vcpu);
return 0; return 0;
} }
EXPORT_SYMBOL_GPL(kvm_set_cr3); EXPORT_SYMBOL_GPL(kvm_set_cr3);
...@@ -2185,10 +2191,11 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info) ...@@ -2185,10 +2191,11 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
vcpu->arch.mcg_status = data; vcpu->arch.mcg_status = data;
break; break;
case MSR_IA32_MCG_CTL: case MSR_IA32_MCG_CTL:
if (!(mcg_cap & MCG_CTL_P)) if (!(mcg_cap & MCG_CTL_P) &&
(data || !msr_info->host_initiated))
return 1; return 1;
if (data != 0 && data != ~(u64)0) if (data != 0 && data != ~(u64)0)
return -1; return 1;
vcpu->arch.mcg_ctl = data; vcpu->arch.mcg_ctl = data;
break; break;
default: default:
...@@ -2576,7 +2583,7 @@ int kvm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) ...@@ -2576,7 +2583,7 @@ int kvm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
} }
EXPORT_SYMBOL_GPL(kvm_get_msr); EXPORT_SYMBOL_GPL(kvm_get_msr);
static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
{ {
u64 data; u64 data;
u64 mcg_cap = vcpu->arch.mcg_cap; u64 mcg_cap = vcpu->arch.mcg_cap;
...@@ -2591,7 +2598,7 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) ...@@ -2591,7 +2598,7 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
data = vcpu->arch.mcg_cap; data = vcpu->arch.mcg_cap;
break; break;
case MSR_IA32_MCG_CTL: case MSR_IA32_MCG_CTL:
if (!(mcg_cap & MCG_CTL_P)) if (!(mcg_cap & MCG_CTL_P) && !host)
return 1; return 1;
data = vcpu->arch.mcg_ctl; data = vcpu->arch.mcg_ctl;
break; break;
...@@ -2724,7 +2731,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) ...@@ -2724,7 +2731,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_MCG_CTL: case MSR_IA32_MCG_CTL:
case MSR_IA32_MCG_STATUS: case MSR_IA32_MCG_STATUS:
case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1: case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
return get_msr_mce(vcpu, msr_info->index, &msr_info->data); return get_msr_mce(vcpu, msr_info->index, &msr_info->data,
msr_info->host_initiated);
case MSR_K7_CLK_CTL: case MSR_K7_CLK_CTL:
/* /*
* Provide expected ramp-up count for K7. All other * Provide expected ramp-up count for K7. All other
...@@ -2745,7 +2753,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) ...@@ -2745,7 +2753,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case HV_X64_MSR_TSC_EMULATION_CONTROL: case HV_X64_MSR_TSC_EMULATION_CONTROL:
case HV_X64_MSR_TSC_EMULATION_STATUS: case HV_X64_MSR_TSC_EMULATION_STATUS:
return kvm_hv_get_msr_common(vcpu, return kvm_hv_get_msr_common(vcpu,
msr_info->index, &msr_info->data); msr_info->index, &msr_info->data,
msr_info->host_initiated);
break; break;
case MSR_IA32_BBL_CR_CTL3: case MSR_IA32_BBL_CR_CTL3:
/* This legacy MSR exists but isn't fully documented in current /* This legacy MSR exists but isn't fully documented in current
...@@ -2969,6 +2978,10 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) ...@@ -2969,6 +2978,10 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_X2APIC_API: case KVM_CAP_X2APIC_API:
r = KVM_X2APIC_API_VALID_FLAGS; r = KVM_X2APIC_API_VALID_FLAGS;
break; break;
case KVM_CAP_NESTED_STATE:
r = kvm_x86_ops->get_nested_state ?
kvm_x86_ops->get_nested_state(NULL, 0, 0) : 0;
break;
default: default:
break; break;
} }
...@@ -3985,6 +3998,56 @@ long kvm_arch_vcpu_ioctl(struct file *filp, ...@@ -3985,6 +3998,56 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
break; break;
} }
case KVM_GET_NESTED_STATE: {
struct kvm_nested_state __user *user_kvm_nested_state = argp;
u32 user_data_size;
r = -EINVAL;
if (!kvm_x86_ops->get_nested_state)
break;
BUILD_BUG_ON(sizeof(user_data_size) != sizeof(user_kvm_nested_state->size));
if (get_user(user_data_size, &user_kvm_nested_state->size))
return -EFAULT;
r = kvm_x86_ops->get_nested_state(vcpu, user_kvm_nested_state,
user_data_size);
if (r < 0)
return r;
if (r > user_data_size) {
if (put_user(r, &user_kvm_nested_state->size))
return -EFAULT;
return -E2BIG;
}
r = 0;
break;
}
case KVM_SET_NESTED_STATE: {
struct kvm_nested_state __user *user_kvm_nested_state = argp;
struct kvm_nested_state kvm_state;
r = -EINVAL;
if (!kvm_x86_ops->set_nested_state)
break;
if (copy_from_user(&kvm_state, user_kvm_nested_state, sizeof(kvm_state)))
return -EFAULT;
if (kvm_state.size < sizeof(kvm_state))
return -EINVAL;
if (kvm_state.flags &
~(KVM_STATE_NESTED_RUN_PENDING | KVM_STATE_NESTED_GUEST_MODE))
return -EINVAL;
/* nested_run_pending implies guest_mode. */
if (kvm_state.flags == KVM_STATE_NESTED_RUN_PENDING)
return -EINVAL;
r = kvm_x86_ops->set_nested_state(vcpu, user_kvm_nested_state, &kvm_state);
break;
}
default: default:
r = -EINVAL; r = -EINVAL;
} }
...@@ -6503,8 +6566,12 @@ static void kvm_set_mmio_spte_mask(void) ...@@ -6503,8 +6566,12 @@ static void kvm_set_mmio_spte_mask(void)
* Set the reserved bits and the present bit of an paging-structure * Set the reserved bits and the present bit of an paging-structure
* entry to generate page fault with PFER.RSV = 1. * entry to generate page fault with PFER.RSV = 1.
*/ */
/* Mask the reserved physical address bits. */
mask = rsvd_bits(maxphyaddr, 51); /*
* Mask the uppermost physical address bit, which would be reserved as
* long as the supported physical address width is less than 52.
*/
mask = 1ull << 51;
/* Set the present bit. */ /* Set the present bit. */
mask |= 1ull; mask |= 1ull;
...@@ -6769,6 +6836,9 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) ...@@ -6769,6 +6836,9 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
case KVM_HC_CLOCK_PAIRING: case KVM_HC_CLOCK_PAIRING:
ret = kvm_pv_clock_pairing(vcpu, a0, a1); ret = kvm_pv_clock_pairing(vcpu, a0, a1);
break; break;
case KVM_HC_SEND_IPI:
ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
break;
#endif #endif
default: default:
ret = -KVM_ENOSYS; ret = -KVM_ENOSYS;
...@@ -7287,6 +7357,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) ...@@ -7287,6 +7357,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
bool req_immediate_exit = false; bool req_immediate_exit = false;
if (kvm_request_pending(vcpu)) { if (kvm_request_pending(vcpu)) {
if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu))
kvm_x86_ops->get_vmcs12_pages(vcpu);
if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
kvm_mmu_unload(vcpu); kvm_mmu_unload(vcpu);
if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu)) if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
...@@ -7302,6 +7374,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) ...@@ -7302,6 +7374,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
} }
if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu)) if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
kvm_mmu_sync_roots(vcpu); kvm_mmu_sync_roots(vcpu);
if (kvm_check_request(KVM_REQ_LOAD_CR3, vcpu))
kvm_mmu_load_cr3(vcpu);
if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
kvm_vcpu_flush_tlb(vcpu, true); kvm_vcpu_flush_tlb(vcpu, true);
if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) { if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
...@@ -8013,6 +8087,10 @@ EXPORT_SYMBOL_GPL(kvm_task_switch); ...@@ -8013,6 +8087,10 @@ EXPORT_SYMBOL_GPL(kvm_task_switch);
static int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) static int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
{ {
if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
(sregs->cr4 & X86_CR4_OSXSAVE))
return -EINVAL;
if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG)) { if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG)) {
/* /*
* When EFER.LME and CR0.PG are set, the processor is in * When EFER.LME and CR0.PG are set, the processor is in
...@@ -8043,10 +8121,6 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) ...@@ -8043,10 +8121,6 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
struct desc_ptr dt; struct desc_ptr dt;
int ret = -EINVAL; int ret = -EINVAL;
if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
(sregs->cr4 & X86_CR4_OSXSAVE))
goto out;
if (kvm_valid_sregs(vcpu, sregs)) if (kvm_valid_sregs(vcpu, sregs))
goto out; goto out;
......
...@@ -130,7 +130,7 @@ static inline bool is_error_page(struct page *page) ...@@ -130,7 +130,7 @@ static inline bool is_error_page(struct page *page)
#define KVM_REQUEST_ARCH_BASE 8 #define KVM_REQUEST_ARCH_BASE 8
#define KVM_ARCH_REQ_FLAGS(nr, flags) ({ \ #define KVM_ARCH_REQ_FLAGS(nr, flags) ({ \
BUILD_BUG_ON((unsigned)(nr) >= 32 - KVM_REQUEST_ARCH_BASE); \ BUILD_BUG_ON((unsigned)(nr) >= (FIELD_SIZEOF(struct kvm_vcpu, requests) * 8) - KVM_REQUEST_ARCH_BASE); \
(unsigned)(((nr) + KVM_REQUEST_ARCH_BASE) | (flags)); \ (unsigned)(((nr) + KVM_REQUEST_ARCH_BASE) | (flags)); \
}) })
#define KVM_ARCH_REQ(nr) KVM_ARCH_REQ_FLAGS(nr, 0) #define KVM_ARCH_REQ(nr) KVM_ARCH_REQ_FLAGS(nr, 0)
...@@ -224,7 +224,7 @@ struct kvm_vcpu { ...@@ -224,7 +224,7 @@ struct kvm_vcpu {
int vcpu_id; int vcpu_id;
int srcu_idx; int srcu_idx;
int mode; int mode;
unsigned long requests; u64 requests;
unsigned long guest_debug; unsigned long guest_debug;
int pre_pcpu; int pre_pcpu;
...@@ -309,6 +309,13 @@ static inline unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot *memsl ...@@ -309,6 +309,13 @@ static inline unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot *memsl
return ALIGN(memslot->npages, BITS_PER_LONG) / 8; return ALIGN(memslot->npages, BITS_PER_LONG) / 8;
} }
static inline unsigned long *kvm_second_dirty_bitmap(struct kvm_memory_slot *memslot)
{
unsigned long len = kvm_dirty_bitmap_bytes(memslot);
return memslot->dirty_bitmap + len / sizeof(*memslot->dirty_bitmap);
}
struct kvm_s390_adapter_int { struct kvm_s390_adapter_int {
u64 ind_addr; u64 ind_addr;
u64 summary_addr; u64 summary_addr;
...@@ -827,6 +834,13 @@ static inline void kvm_arch_free_vm(struct kvm *kvm) ...@@ -827,6 +834,13 @@ static inline void kvm_arch_free_vm(struct kvm *kvm)
} }
#endif #endif
#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
{
return -ENOTSUPP;
}
#endif
#ifdef __KVM_HAVE_ARCH_NONCOHERENT_DMA #ifdef __KVM_HAVE_ARCH_NONCOHERENT_DMA
void kvm_arch_register_noncoherent_dma(struct kvm *kvm); void kvm_arch_register_noncoherent_dma(struct kvm *kvm);
void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm); void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm);
...@@ -1124,7 +1138,7 @@ static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu) ...@@ -1124,7 +1138,7 @@ static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu)
* caller. Paired with the smp_mb__after_atomic in kvm_check_request. * caller. Paired with the smp_mb__after_atomic in kvm_check_request.
*/ */
smp_wmb(); smp_wmb();
set_bit(req & KVM_REQUEST_MASK, &vcpu->requests); set_bit(req & KVM_REQUEST_MASK, (void *)&vcpu->requests);
} }
static inline bool kvm_request_pending(struct kvm_vcpu *vcpu) static inline bool kvm_request_pending(struct kvm_vcpu *vcpu)
...@@ -1134,12 +1148,12 @@ static inline bool kvm_request_pending(struct kvm_vcpu *vcpu) ...@@ -1134,12 +1148,12 @@ static inline bool kvm_request_pending(struct kvm_vcpu *vcpu)
static inline bool kvm_test_request(int req, struct kvm_vcpu *vcpu) static inline bool kvm_test_request(int req, struct kvm_vcpu *vcpu)
{ {
return test_bit(req & KVM_REQUEST_MASK, &vcpu->requests); return test_bit(req & KVM_REQUEST_MASK, (void *)&vcpu->requests);
} }
static inline void kvm_clear_request(int req, struct kvm_vcpu *vcpu) static inline void kvm_clear_request(int req, struct kvm_vcpu *vcpu)
{ {
clear_bit(req & KVM_REQUEST_MASK, &vcpu->requests); clear_bit(req & KVM_REQUEST_MASK, (void *)&vcpu->requests);
} }
static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu) static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu)
......
...@@ -950,6 +950,7 @@ struct kvm_ppc_resize_hpt { ...@@ -950,6 +950,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_HYPERV_EVENTFD 154 #define KVM_CAP_HYPERV_EVENTFD 154
#define KVM_CAP_HYPERV_TLBFLUSH 155 #define KVM_CAP_HYPERV_TLBFLUSH 155
#define KVM_CAP_S390_HPAGE_1M 156 #define KVM_CAP_S390_HPAGE_1M 156
#define KVM_CAP_NESTED_STATE 157
#ifdef KVM_CAP_IRQ_ROUTING #ifdef KVM_CAP_IRQ_ROUTING
...@@ -1392,6 +1393,9 @@ struct kvm_enc_region { ...@@ -1392,6 +1393,9 @@ struct kvm_enc_region {
/* Available with KVM_CAP_HYPERV_EVENTFD */ /* Available with KVM_CAP_HYPERV_EVENTFD */
#define KVM_HYPERV_EVENTFD _IOW(KVMIO, 0xbd, struct kvm_hyperv_eventfd) #define KVM_HYPERV_EVENTFD _IOW(KVMIO, 0xbd, struct kvm_hyperv_eventfd)
/* Available with KVM_CAP_NESTED_STATE */
#define KVM_GET_NESTED_STATE _IOWR(KVMIO, 0xbe, struct kvm_nested_state)
#define KVM_SET_NESTED_STATE _IOW(KVMIO, 0xbf, struct kvm_nested_state)
/* Secure Encrypted Virtualization command */ /* Secure Encrypted Virtualization command */
enum sev_cmd_id { enum sev_cmd_id {
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
/* Return values for hypercalls */ /* Return values for hypercalls */
#define KVM_ENOSYS 1000 #define KVM_ENOSYS 1000
#define KVM_EFAULT EFAULT #define KVM_EFAULT EFAULT
#define KVM_EINVAL EINVAL
#define KVM_E2BIG E2BIG #define KVM_E2BIG E2BIG
#define KVM_EPERM EPERM #define KVM_EPERM EPERM
#define KVM_EOPNOTSUPP 95 #define KVM_EOPNOTSUPP 95
...@@ -26,6 +27,7 @@ ...@@ -26,6 +27,7 @@
#define KVM_HC_MIPS_EXIT_VM 7 #define KVM_HC_MIPS_EXIT_VM 7
#define KVM_HC_MIPS_CONSOLE_OUTPUT 8 #define KVM_HC_MIPS_CONSOLE_OUTPUT 8
#define KVM_HC_CLOCK_PAIRING 9 #define KVM_HC_CLOCK_PAIRING 9
#define KVM_HC_SEND_IPI 10
/* /*
* hypercalls use architecture specific * hypercalls use architecture specific
......
cr4_cpuid_sync_test
set_sregs_test set_sregs_test
sync_regs_test sync_regs_test
vmx_tsc_adjust_test vmx_tsc_adjust_test
state_test
...@@ -9,6 +9,8 @@ LIBKVM_x86_64 = lib/x86.c lib/vmx.c ...@@ -9,6 +9,8 @@ LIBKVM_x86_64 = lib/x86.c lib/vmx.c
TEST_GEN_PROGS_x86_64 = set_sregs_test TEST_GEN_PROGS_x86_64 = set_sregs_test
TEST_GEN_PROGS_x86_64 += sync_regs_test TEST_GEN_PROGS_x86_64 += sync_regs_test
TEST_GEN_PROGS_x86_64 += vmx_tsc_adjust_test TEST_GEN_PROGS_x86_64 += vmx_tsc_adjust_test
TEST_GEN_PROGS_x86_64 += cr4_cpuid_sync_test
TEST_GEN_PROGS_x86_64 += state_test
TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M)) TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
LIBKVM += $(LIBKVM_$(UNAME_M)) LIBKVM += $(LIBKVM_$(UNAME_M))
......
// SPDX-License-Identifier: GPL-2.0
/*
* CR4 and CPUID sync test
*
* Copyright 2018, Red Hat, Inc. and/or its affiliates.
*
* Author:
* Wei Huang <wei@redhat.com>
*/
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include "test_util.h"
#include "kvm_util.h"
#include "x86.h"
#define X86_FEATURE_XSAVE (1<<26)
#define X86_FEATURE_OSXSAVE (1<<27)
#define VCPU_ID 1
enum {
GUEST_UPDATE_CR4 = 0x1000,
GUEST_FAILED,
GUEST_DONE,
};
static void exit_to_hv(uint16_t port)
{
__asm__ __volatile__("in %[port], %%al"
:
: [port]"d"(port)
: "rax");
}
static inline bool cr4_cpuid_is_sync(void)
{
int func, subfunc;
uint32_t eax, ebx, ecx, edx;
uint64_t cr4;
func = 0x1;
subfunc = 0x0;
__asm__ __volatile__("cpuid"
: "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
: "a"(func), "c"(subfunc));
cr4 = get_cr4();
return (!!(ecx & X86_FEATURE_OSXSAVE)) == (!!(cr4 & X86_CR4_OSXSAVE));
}
static void guest_code(void)
{
uint64_t cr4;
/* turn on CR4.OSXSAVE */
cr4 = get_cr4();
cr4 |= X86_CR4_OSXSAVE;
set_cr4(cr4);
/* verify CR4.OSXSAVE == CPUID.OSXSAVE */
if (!cr4_cpuid_is_sync())
exit_to_hv(GUEST_FAILED);
/* notify hypervisor to change CR4 */
exit_to_hv(GUEST_UPDATE_CR4);
/* check again */
if (!cr4_cpuid_is_sync())
exit_to_hv(GUEST_FAILED);
exit_to_hv(GUEST_DONE);
}
int main(int argc, char *argv[])
{
struct kvm_run *run;
struct kvm_vm *vm;
struct kvm_sregs sregs;
struct kvm_cpuid_entry2 *entry;
int rc;
entry = kvm_get_supported_cpuid_entry(1);
if (!(entry->ecx & X86_FEATURE_XSAVE)) {
printf("XSAVE feature not supported, skipping test\n");
return 0;
}
/* Tell stdout not to buffer its content */
setbuf(stdout, NULL);
/* Create VM */
vm = vm_create_default(VCPU_ID, guest_code);
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
run = vcpu_state(vm, VCPU_ID);
while (1) {
rc = _vcpu_run(vm, VCPU_ID);
if (run->exit_reason == KVM_EXIT_IO) {
switch (run->io.port) {
case GUEST_UPDATE_CR4:
/* emulate hypervisor clearing CR4.OSXSAVE */
vcpu_sregs_get(vm, VCPU_ID, &sregs);
sregs.cr4 &= ~X86_CR4_OSXSAVE;
vcpu_sregs_set(vm, VCPU_ID, &sregs);
break;
case GUEST_FAILED:
TEST_ASSERT(false, "Guest CR4 bit (OSXSAVE) unsynchronized with CPUID bit.");
break;
case GUEST_DONE:
goto done;
default:
TEST_ASSERT(false, "Unknown port 0x%x.",
run->io.port);
}
}
}
kvm_vm_free(vm);
done:
return 0;
}
...@@ -53,6 +53,8 @@ int kvm_check_cap(long cap); ...@@ -53,6 +53,8 @@ int kvm_check_cap(long cap);
struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm); struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
void kvm_vm_free(struct kvm_vm *vmp); void kvm_vm_free(struct kvm_vm *vmp);
void kvm_vm_restart(struct kvm_vm *vmp, int perm);
void kvm_vm_release(struct kvm_vm *vmp);
int kvm_memcmp_hva_gva(void *hva, int kvm_memcmp_hva_gva(void *hva,
struct kvm_vm *vm, const vm_vaddr_t gva, size_t len); struct kvm_vm *vm, const vm_vaddr_t gva, size_t len);
...@@ -75,7 +77,7 @@ void vcpu_ioctl(struct kvm_vm *vm, ...@@ -75,7 +77,7 @@ void vcpu_ioctl(struct kvm_vm *vm,
uint32_t vcpuid, unsigned long ioctl, void *arg); uint32_t vcpuid, unsigned long ioctl, void *arg);
void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg); void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags); void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid); void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot, int gdt_memslot);
vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
uint32_t data_memslot, uint32_t pgd_memslot); uint32_t data_memslot, uint32_t pgd_memslot);
void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa); void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa);
......
...@@ -59,8 +59,8 @@ enum x86_register { ...@@ -59,8 +59,8 @@ enum x86_register {
struct desc64 { struct desc64 {
uint16_t limit0; uint16_t limit0;
uint16_t base0; uint16_t base0;
unsigned base1:8, type:5, dpl:2, p:1; unsigned base1:8, s:1, type:4, dpl:2, p:1;
unsigned limit1:4, zero0:3, g:1, base2:8; unsigned limit1:4, avl:1, l:1, db:1, g:1, base2:8;
uint32_t base3; uint32_t base3;
uint32_t zero1; uint32_t zero1;
} __attribute__((packed)); } __attribute__((packed));
...@@ -303,6 +303,10 @@ static inline unsigned long get_xmm(int n) ...@@ -303,6 +303,10 @@ static inline unsigned long get_xmm(int n)
return 0; return 0;
} }
struct kvm_x86_state;
struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid);
void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *state);
/* /*
* Basic CPU control in CR0 * Basic CPU control in CR0
*/ */
......
...@@ -43,6 +43,7 @@ struct vcpu { ...@@ -43,6 +43,7 @@ struct vcpu {
struct kvm_vm { struct kvm_vm {
int mode; int mode;
int kvm_fd;
int fd; int fd;
unsigned int page_size; unsigned int page_size;
unsigned int page_shift; unsigned int page_shift;
...@@ -51,13 +52,17 @@ struct kvm_vm { ...@@ -51,13 +52,17 @@ struct kvm_vm {
struct userspace_mem_region *userspace_mem_region_head; struct userspace_mem_region *userspace_mem_region_head;
struct sparsebit *vpages_valid; struct sparsebit *vpages_valid;
struct sparsebit *vpages_mapped; struct sparsebit *vpages_mapped;
bool has_irqchip;
bool pgd_created; bool pgd_created;
vm_paddr_t pgd; vm_paddr_t pgd;
vm_vaddr_t gdt;
vm_vaddr_t tss;
}; };
struct vcpu *vcpu_find(struct kvm_vm *vm, struct vcpu *vcpu_find(struct kvm_vm *vm,
uint32_t vcpuid); uint32_t vcpuid);
void vcpu_setup(struct kvm_vm *vm, int vcpuid); void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot);
void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent); void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
void regs_dump(FILE *stream, struct kvm_regs *regs, void regs_dump(FILE *stream, struct kvm_regs *regs,
uint8_t indent); uint8_t indent);
......
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册