Merge branch 'kvm-updates/2.6.31' of git://git.kernel.org/pub/scm/virt/kvm/kvm

* 'kvm-updates/2.6.31' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: Avoid redelivery of edge interrupt before next edge KVM: MMU: limit rmap chain length KVM: ia64: fix build failures due to ia64/unsigned long mismatches KVM: Make KVM_HPAGES_PER_HPAGE unsigned long to avoid build error on powerpc KVM: fix ack not being delivered when msi present KVM: s390: fix wait_queue handling KVM: VMX: Fix locking imbalance on emulation failure KVM: VMX: Fix locking order in handle_invalid_guest_state KVM: MMU: handle n_free_mmu_pages > n_alloc_mmu_pages in kvm_mmu_change_mmu_pages KVM: SVM: force new asid on vcpu migration KVM: x86: verify MTRR/PAT validity KVM: PIT: fix kpit_elapsed division by zero KVM: Fix KVM_GET_MSR_INDEX_LIST

Merge branch 'kvm-updates/2.6.31' of git://git.kernel.org/pub/scm/virt/kvm/kvm
* 'kvm-updates/2.6.31' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: Avoid redelivery of edge interrupt before next edge KVM: MMU: limit rmap chain length KVM: ia64: fix build failures due to ia64/unsigned long mismatches KVM: Make KVM_HPAGES_PER_HPAGE unsigned long to avoid build error on powerpc KVM: fix ack not being delivered when msi present KVM: s390: fix wait_queue handling KVM: VMX: Fix locking imbalance on emulation failure KVM: VMX: Fix locking order in handle_invalid_guest_state KVM: MMU: handle n_free_mmu_pages > n_alloc_mmu_pages in kvm_mmu_change_mmu_pages KVM: SVM: force new asid on vcpu migration KVM: x86: verify MTRR/PAT validity KVM: PIT: fix kpit_elapsed division by zero KVM: Fix KVM_GET_MSR_INDEX_LIST
17d11ba1 · Linus Torvalds · fb1ee451 · b4a2f5e7 · 17d11ba1 · 17d11ba1
13 changed file
--- a/arch/ia64/kvm/mmio.c
+++ b/arch/ia64/kvm/mmio.c
@@ -247,7 +247,8 @@ void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma)
 		vcpu_get_fpreg(vcpu, inst.M9.f2, &v);
 		/* Write high word. FIXME: this is a kludge!  */
 		v.u.bits[1] &= 0x3ffff;
-		mmio_access(vcpu, padr + 8, &v.u.bits[1], 8, ma, IOREQ_WRITE);
+		mmio_access(vcpu, padr + 8, (u64 *)&v.u.bits[1], 8,
+			    ma, IOREQ_WRITE);
 		data = v.u.bits[0];
 		size = 3;
 	} else if (inst.M10.major == 7 && inst.M10.x6 == 0x3B) {
@@ -265,7 +266,8 @@ void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma)

 		/* Write high word.FIXME: this is a kludge!  */
 		v.u.bits[1] &= 0x3ffff;
-		mmio_access(vcpu, padr + 8, &v.u.bits[1], 8, ma, IOREQ_WRITE);
+		mmio_access(vcpu, padr + 8, (u64 *)&v.u.bits[1],
+			    8, ma, IOREQ_WRITE);
 		data = v.u.bits[0];
 		size = 3;
 	} else if (inst.M10.major == 7 && inst.M10.x6 == 0x31) {

--- a/arch/ia64/kvm/vcpu.c
+++ b/arch/ia64/kvm/vcpu.c
@@ -461,7 +461,7 @@ void setreg(unsigned long regnum, unsigned long val,
 u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg)
 {
 	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-	u64 val;
+	unsigned long val;

 	if (!reg)
 		return 0;
@@ -469,7 +469,7 @@ u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg)
 	return val;
 }

-void vcpu_set_gr(struct kvm_vcpu *vcpu, u64 reg, u64 value, int nat)
+void vcpu_set_gr(struct kvm_vcpu *vcpu, unsigned long reg, u64 value, int nat)
 {
 	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
 	long sof = (regs->cr_ifs) & 0x7f;
@@ -1072,7 +1072,7 @@ void kvm_ttag(struct kvm_vcpu *vcpu, INST64 inst)
 	vcpu_set_gr(vcpu, inst.M46.r1, tag, 0);
 }

-int vcpu_tpa(struct kvm_vcpu *vcpu, u64 vadr, u64 *padr)
+int vcpu_tpa(struct kvm_vcpu *vcpu, u64 vadr, unsigned long *padr)
 {
 	struct thash_data *data;
 	union ia64_isr visr, pt_isr;

--- a/arch/ia64/kvm/vcpu.h
+++ b/arch/ia64/kvm/vcpu.h
@@ -686,14 +686,15 @@ static inline int highest_inservice_irq(struct kvm_vcpu *vcpu)
 	return highest_bits((int *)&(VMX(vcpu, insvc[0])));
 }

-extern void vcpu_get_fpreg(struct kvm_vcpu *vcpu, u64 reg,
+extern void vcpu_get_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
 					struct ia64_fpreg *val);
-extern void vcpu_set_fpreg(struct kvm_vcpu *vcpu, u64 reg,
+extern void vcpu_set_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
 					struct ia64_fpreg *val);
-extern u64 vcpu_get_gr(struct kvm_vcpu *vcpu, u64 reg);
-extern void vcpu_set_gr(struct kvm_vcpu *vcpu, u64 reg, u64 val, int nat);
-extern u64 vcpu_get_psr(struct kvm_vcpu *vcpu);
-extern void vcpu_set_psr(struct kvm_vcpu *vcpu, u64 val);
+extern u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg);
+extern void vcpu_set_gr(struct kvm_vcpu *vcpu, unsigned long reg,
+			u64 val, int nat);
+extern unsigned long vcpu_get_psr(struct kvm_vcpu *vcpu);
+extern void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val);
 extern u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr);
 extern void vcpu_bsw0(struct kvm_vcpu *vcpu);
 extern void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte,

--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -34,7 +34,7 @@
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1

 /* We don't currently support large pages. */
-#define KVM_PAGES_PER_HPAGE (1<<31)
+#define KVM_PAGES_PER_HPAGE (1UL << 31)

 struct kvm;
 struct kvm_run;

--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -386,7 +386,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
 	}
 	__unset_cpu_idle(vcpu);
 	__set_current_state(TASK_RUNNING);
-	remove_wait_queue(&vcpu->wq, &wait);
+	remove_wait_queue(&vcpu->arch.local_int.wq, &wait);
 	spin_unlock_bh(&vcpu->arch.local_int.lock);
 	spin_unlock(&vcpu->arch.local_int.float_int->lock);
 	hrtimer_try_to_cancel(&vcpu->arch.ckc_timer);

--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -104,6 +104,9 @@ static s64 __kpit_elapsed(struct kvm *kvm)
 	ktime_t remaining;
 	struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;

+	if (!ps->pit_timer.period)
+		return 0;
+
 	/*
 	 * The Counter does not stop when it reaches zero. In
 	 * Modes 0, 1, 4, and 5 the Counter ``wraps around'' to

--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -489,16 +489,20 @@ static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int lpage)
 *
 * If rmapp bit zero is one, (then rmap & ~1) points to a struct kvm_rmap_desc
 * containing more mappings.
+ *
+ * Returns the number of rmap entries before the spte was added or zero if
+ * the spte was not added.
+ *
 */
-static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage)
+static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage)
 {
 	struct kvm_mmu_page *sp;
 	struct kvm_rmap_desc *desc;
 	unsigned long *rmapp;
-	int i;
+	int i, count = 0;

 	if (!is_rmap_pte(*spte))
-		return;
+		return count;
 	gfn = unalias_gfn(vcpu->kvm, gfn);
 	sp = page_header(__pa(spte));
 	sp->gfns[spte - sp->spt] = gfn;
@@ -515,8 +519,10 @@ static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage)
 	} else {
 		rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte);
 		desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
-		while (desc->shadow_ptes[RMAP_EXT-1] && desc->more)
+		while (desc->shadow_ptes[RMAP_EXT-1] && desc->more) {
 			desc = desc->more;
+			count += RMAP_EXT;
+		}
 		if (desc->shadow_ptes[RMAP_EXT-1]) {
 			desc->more = mmu_alloc_rmap_desc(vcpu);
 			desc = desc->more;
@@ -525,6 +531,7 @@ static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage)
 			;
 		desc->shadow_ptes[i] = spte;
 	}
+	return count;
 }

 static void rmap_desc_remove_entry(unsigned long *rmapp,
@@ -754,6 +761,19 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp)
 	return young;
 }

+#define RMAP_RECYCLE_THRESHOLD 1000
+
+static void rmap_recycle(struct kvm_vcpu *vcpu, gfn_t gfn, int lpage)
+{
+	unsigned long *rmapp;
+
+	gfn = unalias_gfn(vcpu->kvm, gfn);
+	rmapp = gfn_to_rmap(vcpu->kvm, gfn, lpage);
+
+	kvm_unmap_rmapp(vcpu->kvm, rmapp);
+	kvm_flush_remote_tlbs(vcpu->kvm);
+}
+
 int kvm_age_hva(struct kvm *kvm, unsigned long hva)
 {
 	return kvm_handle_hva(kvm, hva, kvm_age_rmapp);
@@ -1407,24 +1427,25 @@ static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 */
 void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages)
 {
+	int used_pages;
+
+	used_pages = kvm->arch.n_alloc_mmu_pages - kvm->arch.n_free_mmu_pages;
+	used_pages = max(0, used_pages);
+
 	/*
 	 * If we set the number of mmu pages to be smaller be than the
 	 * number of actived pages , we must to free some mmu pages before we
 	 * change the value
 	 */

-	if ((kvm->arch.n_alloc_mmu_pages - kvm->arch.n_free_mmu_pages) >
-	    kvm_nr_mmu_pages) {
-		int n_used_mmu_pages = kvm->arch.n_alloc_mmu_pages
-				       - kvm->arch.n_free_mmu_pages;
-
-		while (n_used_mmu_pages > kvm_nr_mmu_pages) {
+	if (used_pages > kvm_nr_mmu_pages) {
+		while (used_pages > kvm_nr_mmu_pages) {
 			struct kvm_mmu_page *page;

 			page = container_of(kvm->arch.active_mmu_pages.prev,
 					    struct kvm_mmu_page, link);
 			kvm_mmu_zap_page(kvm, page);
-			n_used_mmu_pages--;
+			used_pages--;
 		}
 		kvm->arch.n_free_mmu_pages = 0;
 	}
@@ -1740,6 +1761,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 {
 	int was_rmapped = 0;
 	int was_writeble = is_writeble_pte(*shadow_pte);
+	int rmap_count;

 	pgprintk("%s: spte %llx access %x write_fault %d"
 		 " user_fault %d gfn %lx\n",
@@ -1781,9 +1803,11 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,

 	page_header_update_slot(vcpu->kvm, shadow_pte, gfn);
 	if (!was_rmapped) {
-		rmap_add(vcpu, shadow_pte, gfn, largepage);
+		rmap_count = rmap_add(vcpu, shadow_pte, gfn, largepage);
 		if (!is_rmap_pte(*shadow_pte))
 			kvm_release_pfn_clean(pfn);
+		if (rmap_count > RMAP_RECYCLE_THRESHOLD)
+			rmap_recycle(vcpu, gfn, largepage);
 	} else {
 		if (was_writeble)
 			kvm_release_pfn_dirty(pfn);

--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -711,6 +711,7 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 		svm->vmcb->control.tsc_offset += delta;
 		vcpu->cpu = cpu;
 		kvm_migrate_timers(vcpu);
+		svm->asid_generation = 0;
 	}

 	for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
@@ -1031,7 +1032,6 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *svm_data)
 		svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
 	}

-	svm->vcpu.cpu = svm_data->cpu;
 	svm->asid_generation = svm_data->asid_generation;
 	svm->vmcb->control.asid = svm_data->next_asid++;
 }
@@ -2300,8 +2300,8 @@ static void pre_svm_run(struct vcpu_svm *svm)
 	struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu);

 	svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
-	if (svm->vcpu.cpu != cpu ||
-	    svm->asid_generation != svm_data->asid_generation)
+	/* FIXME: handle wraparound of asid_generation */
+	if (svm->asid_generation != svm_data->asid_generation)
 		new_asid(svm, svm_data);
 }


--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3157,8 +3157,8 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	enum emulation_result err = EMULATE_DONE;

-	preempt_enable();
 	local_irq_enable();
+	preempt_enable();

 	while (!guest_state_valid(vcpu)) {
 		err = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
@@ -3168,7 +3168,7 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,

 		if (err != EMULATE_DONE) {
 			kvm_report_emulation_failure(vcpu, "emulation failure");
-			return;
+			break;
 		}

 		if (signal_pending(current))
@@ -3177,8 +3177,8 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
 			schedule();
 	}

-	local_irq_disable();
 	preempt_disable();
+	local_irq_disable();

 	vmx->invalid_state_emulation_result = err;
 }

--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -704,11 +704,48 @@ static bool msr_mtrr_valid(unsigned msr)
 	return false;
 }

+static bool valid_pat_type(unsigned t)
+{
+	return t < 8 && (1 << t) & 0xf3; /* 0, 1, 4, 5, 6, 7 */
+}
+
+static bool valid_mtrr_type(unsigned t)
+{
+	return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */
+}
+
+static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+{
+	int i;
+
+	if (!msr_mtrr_valid(msr))
+		return false;
+
+	if (msr == MSR_IA32_CR_PAT) {
+		for (i = 0; i < 8; i++)
+			if (!valid_pat_type((data >> (i * 8)) & 0xff))
+				return false;
+		return true;
+	} else if (msr == MSR_MTRRdefType) {
+		if (data & ~0xcff)
+			return false;
+		return valid_mtrr_type(data & 0xff);
+	} else if (msr >= MSR_MTRRfix64K_00000 && msr <= MSR_MTRRfix4K_F8000) {
+		for (i = 0; i < 8 ; i++)
+			if (!valid_mtrr_type((data >> (i * 8)) & 0xff))
+				return false;
+		return true;
+	}
+
+	/* variable MTRRs */
+	return valid_mtrr_type(data & 0xff);
+}
+
 static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 {
 	u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;

-	if (!msr_mtrr_valid(msr))
+	if (!mtrr_valid(vcpu, msr, data))
 		return 1;

 	if (msr == MSR_MTRRdefType) {
@@ -1079,14 +1116,13 @@ long kvm_arch_dev_ioctl(struct file *filp,
 		if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
 			goto out;
 		r = -E2BIG;
-		if (n < num_msrs_to_save)
+		if (n < msr_list.nmsrs)
 			goto out;
 		r = -EFAULT;
 		if (copy_to_user(user_msr_list->indices, &msrs_to_save,
 				 num_msrs_to_save * sizeof(u32)))
 			goto out;
-		if (copy_to_user(user_msr_list->indices
-				 + num_msrs_to_save * sizeof(u32),
+		if (copy_to_user(user_msr_list->indices + num_msrs_to_save,
 				 &emulated_msrs,
 				 ARRAY_SIZE(emulated_msrs) * sizeof(u32)))
 			goto out;

--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -110,6 +110,7 @@ struct kvm_memory_slot {

 struct kvm_kernel_irq_routing_entry {
 	u32 gsi;
+	u32 type;
 	int (*set)(struct kvm_kernel_irq_routing_entry *e,
 		    struct kvm *kvm, int level);
 	union {

--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -95,8 +95,6 @@ static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
 		if (injected && pent->fields.trig_mode == IOAPIC_LEVEL_TRIG)
 			pent->fields.remote_irr = 1;
 	}
-	if (!pent->fields.trig_mode)
-		ioapic->irr &= ~(1 << idx);

 	return injected;
 }
@@ -136,7 +134,8 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
 		mask_after = ioapic->redirtbl[index].fields.mask;
 		if (mask_before != mask_after)
 			kvm_fire_mask_notifiers(ioapic->kvm, index, mask_after);
-		if (ioapic->irr & (1 << index))
+		if (ioapic->redirtbl[index].fields.trig_mode == IOAPIC_LEVEL_TRIG
+		    && ioapic->irr & (1 << index))
 			ioapic_service(ioapic, index);
 		break;
 	}
@@ -184,9 +183,10 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
 		if (!level)
 			ioapic->irr &= ~mask;
 		else {
+			int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG);
 			ioapic->irr |= mask;
-			if ((!entry.fields.trig_mode && old_irr != ioapic->irr)
-			    || !entry.fields.remote_irr)
+			if ((edge && old_irr != ioapic->irr) ||
+			    (!edge && !entry.fields.remote_irr))
 				ret = ioapic_service(ioapic, irq);
 		}
 	}

--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -160,7 +160,8 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
 	unsigned gsi = pin;

 	list_for_each_entry(e, &kvm->irq_routing, link)
-		if (e->irqchip.irqchip == irqchip &&
+		if (e->type == KVM_IRQ_ROUTING_IRQCHIP &&
+		    e->irqchip.irqchip == irqchip &&
 		    e->irqchip.pin == pin) {
 			gsi = e->gsi;
 			break;
@@ -259,6 +260,7 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
 	int delta;

 	e->gsi = ue->gsi;
+	e->type = ue->type;
 	switch (ue->type) {
 	case KVM_IRQ_ROUTING_IRQCHIP:
 		delta = 0;