提交 3ea3b7fa 编写于 作者: W Wanpeng Li 提交者: Paolo Bonzini

kvm: mmu: lazy collapse small sptes into large sptes

Dirty logging tracks sptes in 4k granularity, meaning that large sptes
have to be split.  If live migration is successful, the guest in the
source machine will be destroyed and large sptes will be created in the
destination. However, the guest continues to run in the source machine
(for example if live migration fails), small sptes will remain around
and cause bad performance.

This patch introduce lazy collapsing of small sptes into large sptes.
The rmap will be scanned in ioctl context when dirty logging is stopped,
dropping those sptes which can be collapsed into a single large-page spte.
Later page faults will create the large-page sptes.
Reviewed-by: NXiao Guangrong <guangrong.xiao@linux.intel.com>
Signed-off-by: NWanpeng Li <wanpeng.li@linux.intel.com>
Message-Id: <1428046825-6905-1-git-send-email-wanpeng.li@linux.intel.com>
Signed-off-by: NPaolo Bonzini <pbonzini@redhat.com>
上级 1119022c
...@@ -867,6 +867,8 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, ...@@ -867,6 +867,8 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
void kvm_mmu_reset_context(struct kvm_vcpu *vcpu); void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
void kvm_mmu_slot_remove_write_access(struct kvm *kvm, void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
struct kvm_memory_slot *memslot); struct kvm_memory_slot *memslot);
void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
struct kvm_memory_slot *memslot);
void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
struct kvm_memory_slot *memslot); struct kvm_memory_slot *memslot);
void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm, void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm,
......
...@@ -4465,6 +4465,79 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, ...@@ -4465,6 +4465,79 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
kvm_flush_remote_tlbs(kvm); kvm_flush_remote_tlbs(kvm);
} }
static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
unsigned long *rmapp)
{
u64 *sptep;
struct rmap_iterator iter;
int need_tlb_flush = 0;
pfn_t pfn;
struct kvm_mmu_page *sp;
for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
BUG_ON(!(*sptep & PT_PRESENT_MASK));
sp = page_header(__pa(sptep));
pfn = spte_to_pfn(*sptep);
/*
* Only EPT supported for now; otherwise, one would need to
* find out efficiently whether the guest page tables are
* also using huge pages.
*/
if (sp->role.direct &&
!kvm_is_reserved_pfn(pfn) &&
PageTransCompound(pfn_to_page(pfn))) {
drop_spte(kvm, sptep);
sptep = rmap_get_first(*rmapp, &iter);
need_tlb_flush = 1;
} else
sptep = rmap_get_next(&iter);
}
return need_tlb_flush;
}
void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
struct kvm_memory_slot *memslot)
{
bool flush = false;
unsigned long *rmapp;
unsigned long last_index, index;
gfn_t gfn_start, gfn_end;
spin_lock(&kvm->mmu_lock);
gfn_start = memslot->base_gfn;
gfn_end = memslot->base_gfn + memslot->npages - 1;
if (gfn_start >= gfn_end)
goto out;
rmapp = memslot->arch.rmap[0];
last_index = gfn_to_index(gfn_end, memslot->base_gfn,
PT_PAGE_TABLE_LEVEL);
for (index = 0; index <= last_index; ++index, ++rmapp) {
if (*rmapp)
flush |= kvm_mmu_zap_collapsible_spte(kvm, rmapp);
if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
if (flush) {
kvm_flush_remote_tlbs(kvm);
flush = false;
}
cond_resched_lock(&kvm->mmu_lock);
}
}
if (flush)
kvm_flush_remote_tlbs(kvm);
out:
spin_unlock(&kvm->mmu_lock);
}
void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
struct kvm_memory_slot *memslot) struct kvm_memory_slot *memslot)
{ {
......
...@@ -7664,6 +7664,23 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, ...@@ -7664,6 +7664,23 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
/* It's OK to get 'new' slot here as it has already been installed */ /* It's OK to get 'new' slot here as it has already been installed */
new = id_to_memslot(kvm->memslots, mem->slot); new = id_to_memslot(kvm->memslots, mem->slot);
/*
* Dirty logging tracks sptes in 4k granularity, meaning that large
* sptes have to be split. If live migration is successful, the guest
* in the source machine will be destroyed and large sptes will be
* created in the destination. However, if the guest continues to run
* in the source machine (for example if live migration fails), small
* sptes will remain around and cause bad performance.
*
* Scan sptes if dirty logging has been stopped, dropping those
* which can be collapsed into a single large-page spte. Later
* page faults will create the large-page sptes.
*/
if ((change != KVM_MR_DELETE) &&
(old->flags & KVM_MEM_LOG_DIRTY_PAGES) &&
!(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
kvm_mmu_zap_collapsible_sptes(kvm, new);
/* /*
* Set up write protection and/or dirty logging for the new slot. * Set up write protection and/or dirty logging for the new slot.
* *
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册