diff --git a/drivers/kvm/Kconfig b/drivers/kvm/Kconfig index 0a419a0de603a352cb2411cfacbb005f17590d9c..8749fa4ffcee955725e9f8992104c086522828a2 100644 --- a/drivers/kvm/Kconfig +++ b/drivers/kvm/Kconfig @@ -17,6 +17,7 @@ if VIRTUALIZATION config KVM tristate "Kernel-based Virtual Machine (KVM) support" depends on X86 && EXPERIMENTAL + select PREEMPT_NOTIFIERS select ANON_INODES ---help--- Support hosting fully virtualized guest machines using hardware diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index e92c84b04c1f382dd02e64a4efc0bb047b746369..0667183ecbed888518b94d36a7d1ea7cad65cf9d 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -301,6 +302,7 @@ void kvm_io_bus_register_dev(struct kvm_io_bus *bus, struct kvm_vcpu { struct kvm *kvm; + struct preempt_notifier preempt_notifier; int vcpu_id; struct mutex mutex; int cpu; @@ -429,7 +431,7 @@ struct kvm_arch_ops { struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id); void (*vcpu_free)(struct kvm_vcpu *vcpu); - void (*vcpu_load)(struct kvm_vcpu *vcpu); + void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); void (*vcpu_put)(struct kvm_vcpu *vcpu); void (*vcpu_decache)(struct kvm_vcpu *vcpu); diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 20947462f40185dc9aef08b6d48c89e1776d74cf..6035e6d3541722260e43d23afd749618cc65394a 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -54,6 +54,8 @@ static cpumask_t cpus_hardware_enabled; struct kvm_arch_ops *kvm_arch_ops; +static __read_mostly struct preempt_ops kvm_preempt_ops; + #define STAT_OFFSET(x) offsetof(struct kvm_vcpu, stat.x) static struct kvm_stats_debugfs_item { @@ -239,13 +241,21 @@ EXPORT_SYMBOL_GPL(kvm_put_guest_fpu); */ static void vcpu_load(struct kvm_vcpu *vcpu) { + int cpu; + mutex_lock(&vcpu->mutex); - kvm_arch_ops->vcpu_load(vcpu); + cpu = get_cpu(); + preempt_notifier_register(&vcpu->preempt_notifier); + kvm_arch_ops->vcpu_load(vcpu, cpu); + put_cpu(); } static void vcpu_put(struct kvm_vcpu *vcpu) { + preempt_disable(); kvm_arch_ops->vcpu_put(vcpu); + preempt_notifier_unregister(&vcpu->preempt_notifier); + preempt_enable(); mutex_unlock(&vcpu->mutex); } @@ -1672,9 +1682,7 @@ void kvm_resched(struct kvm_vcpu *vcpu) { if (!need_resched()) return; - vcpu_put(vcpu); cond_resched(); - vcpu_load(vcpu); } EXPORT_SYMBOL_GPL(kvm_resched); @@ -1722,11 +1730,9 @@ static int pio_copy_data(struct kvm_vcpu *vcpu) unsigned bytes; int nr_pages = vcpu->pio.guest_pages[1] ? 2 : 1; - kvm_arch_ops->vcpu_put(vcpu); q = vmap(vcpu->pio.guest_pages, nr_pages, VM_READ|VM_WRITE, PAGE_KERNEL); if (!q) { - kvm_arch_ops->vcpu_load(vcpu); free_pio_guest_pages(vcpu); return -ENOMEM; } @@ -1738,7 +1744,6 @@ static int pio_copy_data(struct kvm_vcpu *vcpu) memcpy(p, q, bytes); q -= vcpu->pio.guest_page_offset; vunmap(q); - kvm_arch_ops->vcpu_load(vcpu); free_pio_guest_pages(vcpu); return 0; } @@ -2413,6 +2418,8 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) if (IS_ERR(vcpu)) return PTR_ERR(vcpu); + preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops); + vcpu_load(vcpu); r = kvm_mmu_setup(vcpu); vcpu_put(vcpu); @@ -3145,6 +3152,27 @@ static struct sys_device kvm_sysdev = { hpa_t bad_page_address; +static inline +struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) +{ + return container_of(pn, struct kvm_vcpu, preempt_notifier); +} + +static void kvm_sched_in(struct preempt_notifier *pn, int cpu) +{ + struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); + + kvm_arch_ops->vcpu_load(vcpu, cpu); +} + +static void kvm_sched_out(struct preempt_notifier *pn, + struct task_struct *next) +{ + struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); + + kvm_arch_ops->vcpu_put(vcpu); +} + int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module) { int r; @@ -3191,6 +3219,9 @@ int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module) goto out_free; } + kvm_preempt_ops.sched_in = kvm_sched_in; + kvm_preempt_ops.sched_out = kvm_sched_out; + return r; out_free: diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index 5437de2aa2d806a16590bf3596c1d032ca0fc681..396c736e546b6926d98ec6c401ef8e7d5c9df55c 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -276,9 +276,7 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu) kvm_mmu_free_some_pages(vcpu); if (r < 0) { spin_unlock(&vcpu->kvm->lock); - kvm_arch_ops->vcpu_put(vcpu); r = __mmu_topup_memory_caches(vcpu, GFP_KERNEL); - kvm_arch_ops->vcpu_load(vcpu); spin_lock(&vcpu->kvm->lock); kvm_mmu_free_some_pages(vcpu); } diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 0feec8558599975d927cc641ecef76e7a43504c5..3997bbd78fb76429b18603f7d6f16c61a65c7274 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -625,12 +625,11 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) kfree(svm); } -static void svm_vcpu_load(struct kvm_vcpu *vcpu) +static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { struct vcpu_svm *svm = to_svm(vcpu); - int cpu, i; + int i; - cpu = get_cpu(); if (unlikely(cpu != vcpu->cpu)) { u64 tsc_this, delta; @@ -657,7 +656,6 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu) wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); rdtscll(vcpu->host_tsc); - put_cpu(); } static void svm_vcpu_decache(struct kvm_vcpu *vcpu) diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 18f9b0b3fb1f36b7b5981a5d9a0d44d72dd9cd46..8c87d20f8e39135e1560eb47963de74f28fe4b38 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -396,6 +396,7 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) static void vmx_load_host_state(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); + unsigned long flags; if (!vmx->host_state.loaded) return; @@ -408,12 +409,12 @@ static void vmx_load_host_state(struct kvm_vcpu *vcpu) * If we have to reload gs, we must take care to * preserve our gs base. */ - local_irq_disable(); + local_irq_save(flags); load_gs(vmx->host_state.gs_sel); #ifdef CONFIG_X86_64 wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE)); #endif - local_irq_enable(); + local_irq_restore(flags); reload_tss(); } @@ -427,15 +428,12 @@ static void vmx_load_host_state(struct kvm_vcpu *vcpu) * Switches to specified vcpu, until a matching vcpu_put(), but assumes * vcpu mutex is already taken. */ -static void vmx_vcpu_load(struct kvm_vcpu *vcpu) +static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); u64 phys_addr = __pa(vmx->vmcs); - int cpu; u64 tsc_this, delta; - cpu = get_cpu(); - if (vcpu->cpu != cpu) vcpu_clear(vcpu); @@ -480,7 +478,6 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu) { vmx_load_host_state(vcpu); kvm_put_guest_fpu(vcpu); - put_cpu(); } static void vmx_fpu_activate(struct kvm_vcpu *vcpu) @@ -2127,6 +2124,8 @@ static int vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (unlikely(r)) goto out; + preempt_disable(); + if (!vcpu->mmio_read_completed) do_interrupt_requests(vcpu, kvm_run); @@ -2269,6 +2268,9 @@ static int vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) vcpu->interrupt_window_open = (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0; asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); + vmx->launched = 1; + + preempt_enable(); if (unlikely(fail)) { kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; @@ -2283,7 +2285,6 @@ static int vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (unlikely(prof_on == KVM_PROFILING)) profile_hit(KVM_PROFILING, (void *)vmcs_readl(GUEST_RIP)); - vmx->launched = 1; r = kvm_handle_exit(kvm_run, vcpu); if (r > 0) { /* Give scheduler a change to reschedule. */ @@ -2372,6 +2373,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) { int err; struct vcpu_vmx *vmx = kzalloc(sizeof(*vmx), GFP_KERNEL); + int cpu; if (!vmx) return ERR_PTR(-ENOMEM); @@ -2396,9 +2398,11 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) vmcs_clear(vmx->vmcs); - vmx_vcpu_load(&vmx->vcpu); + cpu = get_cpu(); + vmx_vcpu_load(&vmx->vcpu, cpu); err = vmx_vcpu_setup(&vmx->vcpu); vmx_vcpu_put(&vmx->vcpu); + put_cpu(); if (err) goto free_vmcs;