提交 2f333bcb 编写于 作者: M Marcelo Tosatti 提交者: Avi Kivity

KVM: MMU: hypercall based pte updates and TLB flushes

Hypercall based pte updates are faster than faults, and also allow use
of the lazy MMU mode to batch operations.

Don't report the feature if two dimensional paging is enabled.

[avi:
 - one mmu_op hypercall instead of one per op
 - allow 64-bit gpa on hypercall
 - don't pass host errors (-ENOMEM) to guest]

[akpm: warning fix on i386]
Signed-off-by: NMarcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: NAvi Kivity <avi@qumranet.com>
上级 9f811285
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include <linux/module.h> #include <linux/module.h>
#include <linux/swap.h> #include <linux/swap.h>
#include <linux/hugetlb.h> #include <linux/hugetlb.h>
#include <linux/compiler.h>
#include <asm/page.h> #include <asm/page.h>
#include <asm/cmpxchg.h> #include <asm/cmpxchg.h>
...@@ -40,7 +41,7 @@ ...@@ -40,7 +41,7 @@
* 2. while doing 1. it walks guest-physical to host-physical * 2. while doing 1. it walks guest-physical to host-physical
* If the hardware supports that we don't need to do shadow paging. * If the hardware supports that we don't need to do shadow paging.
*/ */
static bool tdp_enabled = false; bool tdp_enabled = false;
#undef MMU_DEBUG #undef MMU_DEBUG
...@@ -167,6 +168,13 @@ static int dbg = 1; ...@@ -167,6 +168,13 @@ static int dbg = 1;
#define ACC_USER_MASK PT_USER_MASK #define ACC_USER_MASK PT_USER_MASK
#define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK) #define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK)
struct kvm_pv_mmu_op_buffer {
void *ptr;
unsigned len;
unsigned processed;
char buf[512] __aligned(sizeof(long));
};
struct kvm_rmap_desc { struct kvm_rmap_desc {
u64 *shadow_ptes[RMAP_EXT]; u64 *shadow_ptes[RMAP_EXT];
struct kvm_rmap_desc *more; struct kvm_rmap_desc *more;
...@@ -2003,6 +2011,132 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm) ...@@ -2003,6 +2011,132 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
return nr_mmu_pages; return nr_mmu_pages;
} }
static void *pv_mmu_peek_buffer(struct kvm_pv_mmu_op_buffer *buffer,
unsigned len)
{
if (len > buffer->len)
return NULL;
return buffer->ptr;
}
static void *pv_mmu_read_buffer(struct kvm_pv_mmu_op_buffer *buffer,
unsigned len)
{
void *ret;
ret = pv_mmu_peek_buffer(buffer, len);
if (!ret)
return ret;
buffer->ptr += len;
buffer->len -= len;
buffer->processed += len;
return ret;
}
static int kvm_pv_mmu_write(struct kvm_vcpu *vcpu,
gpa_t addr, gpa_t value)
{
int bytes = 8;
int r;
if (!is_long_mode(vcpu) && !is_pae(vcpu))
bytes = 4;
r = mmu_topup_memory_caches(vcpu);
if (r)
return r;
if (!__emulator_write_phys(vcpu, addr, &value, bytes))
return -EFAULT;
return 1;
}
static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu)
{
kvm_x86_ops->tlb_flush(vcpu);
return 1;
}
static int kvm_pv_mmu_release_pt(struct kvm_vcpu *vcpu, gpa_t addr)
{
spin_lock(&vcpu->kvm->mmu_lock);
mmu_unshadow(vcpu->kvm, addr >> PAGE_SHIFT);
spin_unlock(&vcpu->kvm->mmu_lock);
return 1;
}
static int kvm_pv_mmu_op_one(struct kvm_vcpu *vcpu,
struct kvm_pv_mmu_op_buffer *buffer)
{
struct kvm_mmu_op_header *header;
header = pv_mmu_peek_buffer(buffer, sizeof *header);
if (!header)
return 0;
switch (header->op) {
case KVM_MMU_OP_WRITE_PTE: {
struct kvm_mmu_op_write_pte *wpte;
wpte = pv_mmu_read_buffer(buffer, sizeof *wpte);
if (!wpte)
return 0;
return kvm_pv_mmu_write(vcpu, wpte->pte_phys,
wpte->pte_val);
}
case KVM_MMU_OP_FLUSH_TLB: {
struct kvm_mmu_op_flush_tlb *ftlb;
ftlb = pv_mmu_read_buffer(buffer, sizeof *ftlb);
if (!ftlb)
return 0;
return kvm_pv_mmu_flush_tlb(vcpu);
}
case KVM_MMU_OP_RELEASE_PT: {
struct kvm_mmu_op_release_pt *rpt;
rpt = pv_mmu_read_buffer(buffer, sizeof *rpt);
if (!rpt)
return 0;
return kvm_pv_mmu_release_pt(vcpu, rpt->pt_phys);
}
default: return 0;
}
}
int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
gpa_t addr, unsigned long *ret)
{
int r;
struct kvm_pv_mmu_op_buffer buffer;
down_read(&vcpu->kvm->slots_lock);
down_read(&current->mm->mmap_sem);
buffer.ptr = buffer.buf;
buffer.len = min_t(unsigned long, bytes, sizeof buffer.buf);
buffer.processed = 0;
r = kvm_read_guest(vcpu->kvm, addr, buffer.buf, buffer.len);
if (r)
goto out;
while (buffer.len) {
r = kvm_pv_mmu_op_one(vcpu, &buffer);
if (r < 0)
goto out;
if (r == 0)
break;
}
r = 1;
out:
*ret = buffer.processed;
up_read(&current->mm->mmap_sem);
up_read(&vcpu->kvm->slots_lock);
return r;
}
#ifdef AUDIT #ifdef AUDIT
static const char *audit_msg; static const char *audit_msg;
......
...@@ -832,6 +832,9 @@ int kvm_dev_ioctl_check_extension(long ext) ...@@ -832,6 +832,9 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_NR_MEMSLOTS: case KVM_CAP_NR_MEMSLOTS:
r = KVM_MEMORY_SLOTS; r = KVM_MEMORY_SLOTS;
break; break;
case KVM_CAP_PV_MMU:
r = !tdp_enabled;
break;
default: default:
r = 0; r = 0;
break; break;
...@@ -2452,9 +2455,19 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu) ...@@ -2452,9 +2455,19 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
} }
EXPORT_SYMBOL_GPL(kvm_emulate_halt); EXPORT_SYMBOL_GPL(kvm_emulate_halt);
static inline gpa_t hc_gpa(struct kvm_vcpu *vcpu, unsigned long a0,
unsigned long a1)
{
if (is_long_mode(vcpu))
return a0;
else
return a0 | ((gpa_t)a1 << 32);
}
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
{ {
unsigned long nr, a0, a1, a2, a3, ret; unsigned long nr, a0, a1, a2, a3, ret;
int r = 1;
kvm_x86_ops->cache_regs(vcpu); kvm_x86_ops->cache_regs(vcpu);
...@@ -2476,6 +2489,9 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) ...@@ -2476,6 +2489,9 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
case KVM_HC_VAPIC_POLL_IRQ: case KVM_HC_VAPIC_POLL_IRQ:
ret = 0; ret = 0;
break; break;
case KVM_HC_MMU_OP:
r = kvm_pv_mmu_op(vcpu, a0, hc_gpa(vcpu, a1, a2), &ret);
break;
default: default:
ret = -KVM_ENOSYS; ret = -KVM_ENOSYS;
break; break;
...@@ -2483,7 +2499,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) ...@@ -2483,7 +2499,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
vcpu->arch.regs[VCPU_REGS_RAX] = ret; vcpu->arch.regs[VCPU_REGS_RAX] = ret;
kvm_x86_ops->decache_regs(vcpu); kvm_x86_ops->decache_regs(vcpu);
++vcpu->stat.hypercalls; ++vcpu->stat.hypercalls;
return 0; return r;
} }
EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
......
...@@ -434,6 +434,10 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); ...@@ -434,6 +434,10 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
int __emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, int __emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
const void *val, int bytes); const void *val, int bytes);
int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
gpa_t addr, unsigned long *ret);
extern bool tdp_enabled;
enum emulation_result { enum emulation_result {
EMULATE_DONE, /* no further processing */ EMULATE_DONE, /* no further processing */
......
...@@ -12,10 +12,39 @@ ...@@ -12,10 +12,39 @@
#define KVM_CPUID_FEATURES 0x40000001 #define KVM_CPUID_FEATURES 0x40000001
#define KVM_FEATURE_CLOCKSOURCE 0 #define KVM_FEATURE_CLOCKSOURCE 0
#define KVM_FEATURE_NOP_IO_DELAY 1 #define KVM_FEATURE_NOP_IO_DELAY 1
#define KVM_FEATURE_MMU_OP 2
#define MSR_KVM_WALL_CLOCK 0x11 #define MSR_KVM_WALL_CLOCK 0x11
#define MSR_KVM_SYSTEM_TIME 0x12 #define MSR_KVM_SYSTEM_TIME 0x12
#define KVM_MAX_MMU_OP_BATCH 32
/* Operations for KVM_HC_MMU_OP */
#define KVM_MMU_OP_WRITE_PTE 1
#define KVM_MMU_OP_FLUSH_TLB 2
#define KVM_MMU_OP_RELEASE_PT 3
/* Payload for KVM_HC_MMU_OP */
struct kvm_mmu_op_header {
__u32 op;
__u32 pad;
};
struct kvm_mmu_op_write_pte {
struct kvm_mmu_op_header header;
__u64 pte_phys;
__u64 pte_val;
};
struct kvm_mmu_op_flush_tlb {
struct kvm_mmu_op_header header;
};
struct kvm_mmu_op_release_pt {
struct kvm_mmu_op_header header;
__u64 pt_phys;
};
#ifdef __KERNEL__ #ifdef __KERNEL__
#include <asm/processor.h> #include <asm/processor.h>
......
...@@ -238,6 +238,7 @@ struct kvm_vapic_addr { ...@@ -238,6 +238,7 @@ struct kvm_vapic_addr {
#define KVM_CAP_NR_MEMSLOTS 10 /* returns max memory slots per vm */ #define KVM_CAP_NR_MEMSLOTS 10 /* returns max memory slots per vm */
#define KVM_CAP_PIT 11 #define KVM_CAP_PIT 11
#define KVM_CAP_NOP_IO_DELAY 12 #define KVM_CAP_NOP_IO_DELAY 12
#define KVM_CAP_PV_MMU 13
/* /*
* ioctls for VM fds * ioctls for VM fds
......
...@@ -11,8 +11,11 @@ ...@@ -11,8 +11,11 @@
/* Return values for hypercalls */ /* Return values for hypercalls */
#define KVM_ENOSYS 1000 #define KVM_ENOSYS 1000
#define KVM_EFAULT EFAULT
#define KVM_E2BIG E2BIG
#define KVM_HC_VAPIC_POLL_IRQ 1 #define KVM_HC_VAPIC_POLL_IRQ 1
#define KVM_HC_MMU_OP 2
/* /*
* hypercalls use architecture specific * hypercalls use architecture specific
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册