提交 ed9216c1 编写于 作者: L Linus Torvalds

Merge branch 'kvm-updates/2.6.33' of git://git.kernel.org/pub/scm/virt/kvm/kvm

* 'kvm-updates/2.6.33' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (84 commits)
  KVM: VMX: Fix comparison of guest efer with stale host value
  KVM: s390: Fix prefix register checking in arch/s390/kvm/sigp.c
  KVM: Drop user return notifier when disabling virtualization on a cpu
  KVM: VMX: Disable unrestricted guest when EPT disabled
  KVM: x86 emulator: limit instructions to 15 bytes
  KVM: s390: Make psw available on all exits, not just a subset
  KVM: x86: Add KVM_GET/SET_VCPU_EVENTS
  KVM: VMX: Report unexpected simultaneous exceptions as internal errors
  KVM: Allow internal errors reported to userspace to carry extra data
  KVM: Reorder IOCTLs in main kvm.h
  KVM: x86: Polish exception injection via KVM_SET_GUEST_DEBUG
  KVM: only clear irq_source_id if irqchip is present
  KVM: x86: disallow KVM_{SET,GET}_LAPIC without allocated in-kernel lapic
  KVM: x86: disallow multiple KVM_CREATE_IRQCHIP
  KVM: VMX: Remove vmx->msr_offset_efer
  KVM: MMU: update invlpg handler comment
  KVM: VMX: move CR3/PDPTR update to vmx_set_cr3
  KVM: remove duplicated task_switch check
  KVM: powerpc: Fix BUILD_BUG_ON condition
  KVM: VMX: Use shared msr infrastructure
  ...

Trivial conflicts due to new Kconfig options in arch/Kconfig and kernel/Makefile
......@@ -593,6 +593,115 @@ struct kvm_irqchip {
} chip;
};
4.27 KVM_XEN_HVM_CONFIG
Capability: KVM_CAP_XEN_HVM
Architectures: x86
Type: vm ioctl
Parameters: struct kvm_xen_hvm_config (in)
Returns: 0 on success, -1 on error
Sets the MSR that the Xen HVM guest uses to initialize its hypercall
page, and provides the starting address and size of the hypercall
blobs in userspace. When the guest writes the MSR, kvm copies one
page of a blob (32- or 64-bit, depending on the vcpu mode) to guest
memory.
struct kvm_xen_hvm_config {
__u32 flags;
__u32 msr;
__u64 blob_addr_32;
__u64 blob_addr_64;
__u8 blob_size_32;
__u8 blob_size_64;
__u8 pad2[30];
};
4.27 KVM_GET_CLOCK
Capability: KVM_CAP_ADJUST_CLOCK
Architectures: x86
Type: vm ioctl
Parameters: struct kvm_clock_data (out)
Returns: 0 on success, -1 on error
Gets the current timestamp of kvmclock as seen by the current guest. In
conjunction with KVM_SET_CLOCK, it is used to ensure monotonicity on scenarios
such as migration.
struct kvm_clock_data {
__u64 clock; /* kvmclock current value */
__u32 flags;
__u32 pad[9];
};
4.28 KVM_SET_CLOCK
Capability: KVM_CAP_ADJUST_CLOCK
Architectures: x86
Type: vm ioctl
Parameters: struct kvm_clock_data (in)
Returns: 0 on success, -1 on error
Sets the current timestamp of kvmclock to the valued specific in its parameter.
In conjunction with KVM_GET_CLOCK, it is used to ensure monotonicity on scenarios
such as migration.
struct kvm_clock_data {
__u64 clock; /* kvmclock current value */
__u32 flags;
__u32 pad[9];
};
4.29 KVM_GET_VCPU_EVENTS
Capability: KVM_CAP_VCPU_EVENTS
Architectures: x86
Type: vm ioctl
Parameters: struct kvm_vcpu_event (out)
Returns: 0 on success, -1 on error
Gets currently pending exceptions, interrupts, and NMIs as well as related
states of the vcpu.
struct kvm_vcpu_events {
struct {
__u8 injected;
__u8 nr;
__u8 has_error_code;
__u8 pad;
__u32 error_code;
} exception;
struct {
__u8 injected;
__u8 nr;
__u8 soft;
__u8 pad;
} interrupt;
struct {
__u8 injected;
__u8 pending;
__u8 masked;
__u8 pad;
} nmi;
__u32 sipi_vector;
__u32 flags; /* must be zero */
};
4.30 KVM_SET_VCPU_EVENTS
Capability: KVM_CAP_VCPU_EVENTS
Architectures: x86
Type: vm ioctl
Parameters: struct kvm_vcpu_event (in)
Returns: 0 on success, -1 on error
Set pending exceptions, interrupts, and NMIs as well as related states of the
vcpu.
See KVM_GET_VCPU_EVENTS for the data structure.
5. The kvm_run structure
Application code obtains a pointer to the kvm_run structure by
......
......@@ -83,6 +83,13 @@ config KRETPROBES
def_bool y
depends on KPROBES && HAVE_KRETPROBES
config USER_RETURN_NOTIFIER
bool
depends on HAVE_USER_RETURN_NOTIFIER
help
Provide a kernel-internal notification when a cpu is about to
switch to user mode.
config HAVE_IOREMAP_PROT
bool
......@@ -132,5 +139,7 @@ config HAVE_HW_BREAKPOINT
select ANON_INODES
select PERF_EVENTS
config HAVE_USER_RETURN_NOTIFIER
bool
source "kernel/gcov/Kconfig"
......@@ -60,6 +60,7 @@ struct kvm_ioapic_state {
#define KVM_IRQCHIP_PIC_MASTER 0
#define KVM_IRQCHIP_PIC_SLAVE 1
#define KVM_IRQCHIP_IOAPIC 2
#define KVM_NR_IRQCHIPS 3
#define KVM_CONTEXT_SIZE 8*1024
......
......@@ -475,7 +475,6 @@ struct kvm_arch {
struct list_head assigned_dev_head;
struct iommu_domain *iommu_domain;
int iommu_flags;
struct hlist_head irq_ack_notifier_list;
unsigned long irq_sources_bitmap;
unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
......
......@@ -49,7 +49,7 @@ EXTRA_CFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/
EXTRA_AFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/
common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
coalesced_mmio.o irq_comm.o)
coalesced_mmio.o irq_comm.o assigned-dev.o)
ifeq ($(CONFIG_IOMMU_API),y)
common-objs += $(addprefix ../../../virt/kvm/, iommu.o)
......
......@@ -124,7 +124,7 @@ long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler)
static DEFINE_SPINLOCK(vp_lock);
void kvm_arch_hardware_enable(void *garbage)
int kvm_arch_hardware_enable(void *garbage)
{
long status;
long tmp_base;
......@@ -137,7 +137,7 @@ void kvm_arch_hardware_enable(void *garbage)
slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
local_irq_restore(saved_psr);
if (slot < 0)
return;
return -EINVAL;
spin_lock(&vp_lock);
status = ia64_pal_vp_init_env(kvm_vsa_base ?
......@@ -145,7 +145,7 @@ void kvm_arch_hardware_enable(void *garbage)
__pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base);
if (status != 0) {
printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n");
return ;
return -EINVAL;
}
if (!kvm_vsa_base) {
......@@ -154,6 +154,8 @@ void kvm_arch_hardware_enable(void *garbage)
}
spin_unlock(&vp_lock);
ia64_ptr_entry(0x3, slot);
return 0;
}
void kvm_arch_hardware_disable(void *garbage)
......@@ -851,8 +853,7 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm,
r = 0;
switch (chip->chip_id) {
case KVM_IRQCHIP_IOAPIC:
memcpy(&chip->chip.ioapic, ioapic_irqchip(kvm),
sizeof(struct kvm_ioapic_state));
r = kvm_get_ioapic(kvm, &chip->chip.ioapic);
break;
default:
r = -EINVAL;
......@@ -868,9 +869,7 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
r = 0;
switch (chip->chip_id) {
case KVM_IRQCHIP_IOAPIC:
memcpy(ioapic_irqchip(kvm),
&chip->chip.ioapic,
sizeof(struct kvm_ioapic_state));
r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
break;
default:
r = -EINVAL;
......@@ -944,7 +943,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
{
struct kvm *kvm = filp->private_data;
void __user *argp = (void __user *)arg;
int r = -EINVAL;
int r = -ENOTTY;
switch (ioctl) {
case KVM_SET_MEMORY_REGION: {
......@@ -985,10 +984,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
goto out;
if (irqchip_in_kernel(kvm)) {
__s32 status;
mutex_lock(&kvm->irq_lock);
status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
irq_event.irq, irq_event.level);
mutex_unlock(&kvm->irq_lock);
if (ioctl == KVM_IRQ_LINE_STATUS) {
irq_event.status = status;
if (copy_to_user(argp, &irq_event,
......
......@@ -78,8 +78,9 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
return r;
}
void kvm_arch_hardware_enable(void *garbage)
int kvm_arch_hardware_enable(void *garbage)
{
return 0;
}
void kvm_arch_hardware_disable(void *garbage)
......@@ -421,7 +422,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
switch (ioctl) {
default:
r = -EINVAL;
r = -ENOTTY;
}
return r;
......
......@@ -51,7 +51,7 @@ static inline void kvmppc_account_exit_stat(struct kvm_vcpu *vcpu, int type)
/* The BUILD_BUG_ON below breaks in funny ways, commented out
* for now ... -BenH
BUILD_BUG_ON(__builtin_constant_p(type));
BUILD_BUG_ON(!__builtin_constant_p(type));
*/
switch (type) {
case EXT_INTR_EXITS:
......
#ifndef __LINUX_KVM_S390_H
#define __LINUX_KVM_S390_H
/*
* asm-s390/kvm.h - KVM s390 specific structures and definitions
*
......@@ -15,6 +14,8 @@
*/
#include <linux/types.h>
#define __KVM_S390
/* for KVM_GET_REGS and KVM_SET_REGS */
struct kvm_regs {
/* general purpose regs for s390 */
......
......@@ -74,9 +74,10 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
static unsigned long long *facilities;
/* Section: not file related */
void kvm_arch_hardware_enable(void *garbage)
int kvm_arch_hardware_enable(void *garbage)
{
/* every s390 is virtualization enabled ;-) */
return 0;
}
void kvm_arch_hardware_disable(void *garbage)
......@@ -116,10 +117,16 @@ long kvm_arch_dev_ioctl(struct file *filp,
int kvm_dev_ioctl_check_extension(long ext)
{
int r;
switch (ext) {
case KVM_CAP_S390_PSW:
r = 1;
break;
default:
return 0;
r = 0;
}
return r;
}
/* Section: vm related */
......@@ -150,7 +157,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
break;
}
default:
r = -EINVAL;
r = -ENOTTY;
}
return r;
......@@ -419,8 +426,10 @@ static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
vcpu_load(vcpu);
if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING)
rc = -EBUSY;
else
vcpu->arch.sie_block->gpsw = psw;
else {
vcpu->run->psw_mask = psw.mask;
vcpu->run->psw_addr = psw.addr;
}
vcpu_put(vcpu);
return rc;
}
......@@ -508,9 +517,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
switch (kvm_run->exit_reason) {
case KVM_EXIT_S390_SIEIC:
vcpu->arch.sie_block->gpsw.mask = kvm_run->s390_sieic.mask;
vcpu->arch.sie_block->gpsw.addr = kvm_run->s390_sieic.addr;
break;
case KVM_EXIT_UNKNOWN:
case KVM_EXIT_INTR:
case KVM_EXIT_S390_RESET:
......@@ -519,6 +525,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
BUG();
}
vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
might_fault();
do {
......@@ -538,8 +547,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
/* intercept cannot be handled in-kernel, prepare kvm-run */
kvm_run->exit_reason = KVM_EXIT_S390_SIEIC;
kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
kvm_run->s390_sieic.mask = vcpu->arch.sie_block->gpsw.mask;
kvm_run->s390_sieic.addr = vcpu->arch.sie_block->gpsw.addr;
kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
rc = 0;
......@@ -551,6 +558,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
rc = 0;
}
kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &sigsaved, NULL);
......
......@@ -188,9 +188,9 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
/* make sure that the new value is valid memory */
address = address & 0x7fffe000u;
if ((copy_from_guest(vcpu, &tmp,
(u64) (address + vcpu->arch.sie_block->gmsor) , 1)) ||
(copy_from_guest(vcpu, &tmp, (u64) (address +
if ((copy_from_user(&tmp, (void __user *)
(address + vcpu->arch.sie_block->gmsor) , 1)) ||
(copy_from_user(&tmp, (void __user *)(address +
vcpu->arch.sie_block->gmsor + PAGE_SIZE), 1))) {
*reg |= SIGP_STAT_INVALID_PARAMETER;
return 1; /* invalid parameter */
......
......@@ -51,6 +51,7 @@ config X86
select HAVE_KERNEL_LZMA
select HAVE_HW_BREAKPOINT
select HAVE_ARCH_KMEMCHECK
select HAVE_USER_RETURN_NOTIFIER
config OUTPUT_FORMAT
string
......
......@@ -19,6 +19,8 @@
#define __KVM_HAVE_MSIX
#define __KVM_HAVE_MCE
#define __KVM_HAVE_PIT_STATE2
#define __KVM_HAVE_XEN_HVM
#define __KVM_HAVE_VCPU_EVENTS
/* Architectural interrupt line count. */
#define KVM_NR_INTERRUPTS 256
......@@ -79,6 +81,7 @@ struct kvm_ioapic_state {
#define KVM_IRQCHIP_PIC_MASTER 0
#define KVM_IRQCHIP_PIC_SLAVE 1
#define KVM_IRQCHIP_IOAPIC 2
#define KVM_NR_IRQCHIPS 3
/* for KVM_GET_REGS and KVM_SET_REGS */
struct kvm_regs {
......@@ -250,4 +253,31 @@ struct kvm_reinject_control {
__u8 pit_reinject;
__u8 reserved[31];
};
/* for KVM_GET/SET_VCPU_EVENTS */
struct kvm_vcpu_events {
struct {
__u8 injected;
__u8 nr;
__u8 has_error_code;
__u8 pad;
__u32 error_code;
} exception;
struct {
__u8 injected;
__u8 nr;
__u8 soft;
__u8 pad;
} interrupt;
struct {
__u8 injected;
__u8 pending;
__u8 masked;
__u8 pad;
} nmi;
__u32 sipi_vector;
__u32 flags;
__u32 reserved[10];
};
#endif /* _ASM_X86_KVM_H */
......@@ -129,7 +129,7 @@ struct decode_cache {
u8 seg_override;
unsigned int d;
unsigned long regs[NR_VCPU_REGS];
unsigned long eip;
unsigned long eip, eip_orig;
/* modrm */
u8 modrm;
u8 modrm_mod;
......
......@@ -354,7 +354,6 @@ struct kvm_vcpu_arch {
unsigned int time_offset;
struct page *time_page;
bool singlestep; /* guest is single stepped by KVM */
bool nmi_pending;
bool nmi_injected;
......@@ -371,6 +370,10 @@ struct kvm_vcpu_arch {
u64 mcg_status;
u64 mcg_ctl;
u64 *mce_banks;
/* used for guest single stepping over the given code position */
u16 singlestep_cs;
unsigned long singlestep_rip;
};
struct kvm_mem_alias {
......@@ -397,7 +400,6 @@ struct kvm_arch{
struct kvm_pic *vpic;
struct kvm_ioapic *vioapic;
struct kvm_pit *vpit;
struct hlist_head irq_ack_notifier_list;
int vapics_in_nmi_mode;
unsigned int tss_addr;
......@@ -410,8 +412,10 @@ struct kvm_arch{
gpa_t ept_identity_map_addr;
unsigned long irq_sources_bitmap;
unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
u64 vm_init_tsc;
s64 kvmclock_offset;
struct kvm_xen_hvm_config xen_hvm_config;
};
struct kvm_vm_stat {
......@@ -461,7 +465,7 @@ struct descriptor_table {
struct kvm_x86_ops {
int (*cpu_has_kvm_support)(void); /* __init */
int (*disabled_by_bios)(void); /* __init */
void (*hardware_enable)(void *dummy); /* __init */
int (*hardware_enable)(void *dummy);
void (*hardware_disable)(void *dummy);
void (*check_processor_compatibility)(void *rtn);
int (*hardware_setup)(void); /* __init */
......@@ -477,8 +481,8 @@ struct kvm_x86_ops {
void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
void (*vcpu_put)(struct kvm_vcpu *vcpu);
int (*set_guest_debug)(struct kvm_vcpu *vcpu,
struct kvm_guest_debug *dbg);
void (*set_guest_debug)(struct kvm_vcpu *vcpu,
struct kvm_guest_debug *dbg);
int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
......@@ -506,8 +510,8 @@ struct kvm_x86_ops {
void (*tlb_flush)(struct kvm_vcpu *vcpu);
void (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run);
int (*handle_exit)(struct kvm_run *run, struct kvm_vcpu *vcpu);
void (*run)(struct kvm_vcpu *vcpu);
int (*handle_exit)(struct kvm_vcpu *vcpu);
void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
......@@ -519,6 +523,8 @@ struct kvm_x86_ops {
bool has_error_code, u32 error_code);
int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
int (*nmi_allowed)(struct kvm_vcpu *vcpu);
bool (*get_nmi_mask)(struct kvm_vcpu *vcpu);
void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked);
void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
void (*enable_irq_window)(struct kvm_vcpu *vcpu);
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
......@@ -568,7 +574,7 @@ enum emulation_result {
#define EMULTYPE_NO_DECODE (1 << 0)
#define EMULTYPE_TRAP_UD (1 << 1)
#define EMULTYPE_SKIP (1 << 2)
int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run,
int emulate_instruction(struct kvm_vcpu *vcpu,
unsigned long cr2, u16 error_code, int emulation_type);
void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context);
void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address);
......@@ -585,9 +591,9 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
struct x86_emulate_ctxt;
int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in,
int size, unsigned port);
int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in,
int size, unsigned long count, int down,
gva_t address, int rep, unsigned port);
void kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
......@@ -616,6 +622,9 @@ void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l);
int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data);
unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu);
void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr);
void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2,
......@@ -802,4 +811,7 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
void kvm_define_shared_msr(unsigned index, u32 msr);
void kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
#endif /* _ASM_X86_KVM_HOST_H */
......@@ -57,7 +57,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
u16 intercept_dr_write;
u32 intercept_exceptions;
u64 intercept;
u8 reserved_1[44];
u8 reserved_1[42];
u16 pause_filter_count;
u64 iopm_base_pa;
u64 msrpm_base_pa;
u64 tsc_offset;
......
......@@ -83,6 +83,7 @@ struct thread_info {
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
#define TIF_SECCOMP 8 /* secure computing */
#define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */
#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
#define TIF_NOTSC 16 /* TSC is not accessible in userland */
#define TIF_IA32 17 /* 32bit process */
#define TIF_FORK 18 /* ret_from_fork */
......@@ -107,6 +108,7 @@ struct thread_info {
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
#define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY)
#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
#define _TIF_NOTSC (1 << TIF_NOTSC)
#define _TIF_IA32 (1 << TIF_IA32)
#define _TIF_FORK (1 << TIF_FORK)
......@@ -142,13 +144,14 @@ struct thread_info {
/* Only used for 64 bit */
#define _TIF_DO_NOTIFY_MASK \
(_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_NOTIFY_RESUME)
(_TIF_SIGPENDING | _TIF_MCE_NOTIFY | _TIF_NOTIFY_RESUME | \
_TIF_USER_RETURN_NOTIFY)
/* flags to check in __switch_to() */
#define _TIF_WORK_CTXSW \
(_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_NOTSC)
#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW
#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG)
#define PREEMPT_ACTIVE 0x10000000
......
......@@ -56,6 +56,7 @@
#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080
#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
#define PIN_BASED_EXT_INTR_MASK 0x00000001
......@@ -144,6 +145,8 @@ enum vmcs_field {
VM_ENTRY_INSTRUCTION_LEN = 0x0000401a,
TPR_THRESHOLD = 0x0000401c,
SECONDARY_VM_EXEC_CONTROL = 0x0000401e,
PLE_GAP = 0x00004020,
PLE_WINDOW = 0x00004022,
VM_INSTRUCTION_ERROR = 0x00004400,
VM_EXIT_REASON = 0x00004402,
VM_EXIT_INTR_INFO = 0x00004404,
......@@ -248,6 +251,7 @@ enum vmcs_field {
#define EXIT_REASON_MSR_READ 31
#define EXIT_REASON_MSR_WRITE 32
#define EXIT_REASON_MWAIT_INSTRUCTION 36
#define EXIT_REASON_PAUSE_INSTRUCTION 40
#define EXIT_REASON_MCE_DURING_VMENTRY 41
#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
#define EXIT_REASON_APIC_ACCESS 44
......
......@@ -9,6 +9,7 @@
#include <linux/pm.h>
#include <linux/clockchips.h>
#include <linux/random.h>
#include <linux/user-return-notifier.h>
#include <trace/events/power.h>
#include <linux/hw_breakpoint.h>
#include <asm/system.h>
......@@ -209,6 +210,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
*/
memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
}
propagate_user_return_notify(prev_p, next_p);
}
int sys_fork(struct pt_regs *regs)
......
......@@ -19,6 +19,7 @@
#include <linux/stddef.h>
#include <linux/personality.h>
#include <linux/uaccess.h>
#include <linux/user-return-notifier.h>
#include <asm/processor.h>
#include <asm/ucontext.h>
......@@ -863,6 +864,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
if (current->replacement_session_keyring)
key_replace_session_keyring();
}
if (thread_info_flags & _TIF_USER_RETURN_NOTIFY)
fire_user_return_notifiers();
#ifdef CONFIG_X86_32
clear_thread_flag(TIF_IRET);
......
......@@ -28,6 +28,7 @@ config KVM
select HAVE_KVM_IRQCHIP
select HAVE_KVM_EVENTFD
select KVM_APIC_ARCHITECTURE
select USER_RETURN_NOTIFIER
---help---
Support hosting fully virtualized guest machines using hardware
virtualization extensions. You will need a fairly recent
......
......@@ -6,7 +6,8 @@ CFLAGS_svm.o := -I.
CFLAGS_vmx.o := -I.
kvm-y += $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
coalesced_mmio.o irq_comm.o eventfd.o)
coalesced_mmio.o irq_comm.o eventfd.o \
assigned-dev.o)
kvm-$(CONFIG_IOMMU_API) += $(addprefix ../../../virt/kvm/, iommu.o)
kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
......
......@@ -75,6 +75,8 @@
#define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */
#define GroupDual (1<<15) /* Alternate decoding of mod == 3 */
#define GroupMask 0xff /* Group number stored in bits 0:7 */
/* Misc flags */
#define No64 (1<<28)
/* Source 2 operand type */
#define Src2None (0<<29)
#define Src2CL (1<<29)
......@@ -92,19 +94,23 @@ static u32 opcode_table[256] = {
/* 0x00 - 0x07 */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0,
ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
/* 0x08 - 0x0F */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
0, 0, 0, 0,
ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
ImplicitOps | Stack | No64, 0,
/* 0x10 - 0x17 */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0,
ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
/* 0x18 - 0x1F */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0,
ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
/* 0x20 - 0x27 */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
......@@ -133,7 +139,8 @@ static u32 opcode_table[256] = {
DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack,
DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack,
/* 0x60 - 0x67 */
0, 0, 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ ,
ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ ,
0, 0, 0, 0,
/* 0x68 - 0x6F */
SrcImm | Mov | Stack, 0, SrcImmByte | Mov | Stack, 0,
......@@ -158,7 +165,7 @@ static u32 opcode_table[256] = {
/* 0x90 - 0x97 */
DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
/* 0x98 - 0x9F */
0, 0, SrcImm | Src2Imm16, 0,
0, 0, SrcImm | Src2Imm16 | No64, 0,
ImplicitOps | Stack, ImplicitOps | Stack, 0, 0,
/* 0xA0 - 0xA7 */
ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs,
......@@ -185,7 +192,7 @@ static u32 opcode_table[256] = {
ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov,
/* 0xC8 - 0xCF */
0, 0, 0, ImplicitOps | Stack,
ImplicitOps, SrcImmByte, ImplicitOps, ImplicitOps,
ImplicitOps, SrcImmByte, ImplicitOps | No64, ImplicitOps,
/* 0xD0 - 0xD7 */
ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
......@@ -198,7 +205,7 @@ static u32 opcode_table[256] = {
ByteOp | SrcImmUByte, SrcImmUByte,
/* 0xE8 - 0xEF */
SrcImm | Stack, SrcImm | ImplicitOps,
SrcImmU | Src2Imm16, SrcImmByte | ImplicitOps,
SrcImmU | Src2Imm16 | No64, SrcImmByte | ImplicitOps,
SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
/* 0xF0 - 0xF7 */
......@@ -244,11 +251,13 @@ static u32 twobyte_table[256] = {
/* 0x90 - 0x9F */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 0xA0 - 0xA7 */
0, 0, 0, DstMem | SrcReg | ModRM | BitOp,
ImplicitOps | Stack, ImplicitOps | Stack,
0, DstMem | SrcReg | ModRM | BitOp,
DstMem | SrcReg | Src2ImmByte | ModRM,
DstMem | SrcReg | Src2CL | ModRM, 0, 0,
/* 0xA8 - 0xAF */
0, 0, 0, DstMem | SrcReg | ModRM | BitOp,
ImplicitOps | Stack, ImplicitOps | Stack,
0, DstMem | SrcReg | ModRM | BitOp,
DstMem | SrcReg | Src2ImmByte | ModRM,
DstMem | SrcReg | Src2CL | ModRM,
ModRM, 0,
......@@ -613,6 +622,9 @@ static int do_insn_fetch(struct x86_emulate_ctxt *ctxt,
{
int rc = 0;
/* x86 instructions are limited to 15 bytes. */
if (eip + size - ctxt->decode.eip_orig > 15)
return X86EMUL_UNHANDLEABLE;
eip += ctxt->cs_base;
while (size--) {
rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++);
......@@ -871,7 +883,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
/* Shadow copy of register state. Committed on successful emulation. */
memset(c, 0, sizeof(struct decode_cache));
c->eip = kvm_rip_read(ctxt->vcpu);
c->eip = c->eip_orig = kvm_rip_read(ctxt->vcpu);
ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS);
memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
......@@ -962,6 +974,11 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
}
}
if (mode == X86EMUL_MODE_PROT64 && (c->d & No64)) {
kvm_report_emulation_failure(ctxt->vcpu, "invalid x86/64 instruction");;
return -1;
}
if (c->d & Group) {
group = c->d & GroupMask;
c->modrm = insn_fetch(u8, 1, c->eip);
......@@ -1186,6 +1203,69 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt,
return rc;
}
static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, int seg)
{
struct decode_cache *c = &ctxt->decode;
struct kvm_segment segment;
kvm_x86_ops->get_segment(ctxt->vcpu, &segment, seg);
c->src.val = segment.selector;
emulate_push(ctxt);
}
static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt,
struct x86_emulate_ops *ops, int seg)
{
struct decode_cache *c = &ctxt->decode;
unsigned long selector;
int rc;
rc = emulate_pop(ctxt, ops, &selector, c->op_bytes);
if (rc != 0)
return rc;
rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)selector, 1, seg);
return rc;
}
static void emulate_pusha(struct x86_emulate_ctxt *ctxt)
{
struct decode_cache *c = &ctxt->decode;
unsigned long old_esp = c->regs[VCPU_REGS_RSP];
int reg = VCPU_REGS_RAX;
while (reg <= VCPU_REGS_RDI) {
(reg == VCPU_REGS_RSP) ?
(c->src.val = old_esp) : (c->src.val = c->regs[reg]);
emulate_push(ctxt);
++reg;
}
}
static int emulate_popa(struct x86_emulate_ctxt *ctxt,
struct x86_emulate_ops *ops)
{
struct decode_cache *c = &ctxt->decode;
int rc = 0;
int reg = VCPU_REGS_RDI;
while (reg >= VCPU_REGS_RAX) {
if (reg == VCPU_REGS_RSP) {
register_address_increment(c, &c->regs[VCPU_REGS_RSP],
c->op_bytes);
--reg;
}
rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes);
if (rc != 0)
break;
--reg;
}
return rc;
}
static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
struct x86_emulate_ops *ops)
{
......@@ -1707,18 +1787,45 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
add: /* add */
emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags);
break;
case 0x06: /* push es */
emulate_push_sreg(ctxt, VCPU_SREG_ES);
break;
case 0x07: /* pop es */
rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES);
if (rc != 0)
goto done;
break;
case 0x08 ... 0x0d:
or: /* or */
emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags);
break;
case 0x0e: /* push cs */
emulate_push_sreg(ctxt, VCPU_SREG_CS);
break;
case 0x10 ... 0x15:
adc: /* adc */
emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags);
break;
case 0x16: /* push ss */
emulate_push_sreg(ctxt, VCPU_SREG_SS);
break;
case 0x17: /* pop ss */
rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS);
if (rc != 0)
goto done;
break;
case 0x18 ... 0x1d:
sbb: /* sbb */
emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags);
break;
case 0x1e: /* push ds */
emulate_push_sreg(ctxt, VCPU_SREG_DS);
break;
case 0x1f: /* pop ds */
rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS);
if (rc != 0)
goto done;
break;
case 0x20 ... 0x25:
and: /* and */
emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags);
......@@ -1750,6 +1857,14 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
if (rc != 0)
goto done;
break;
case 0x60: /* pusha */
emulate_pusha(ctxt);
break;
case 0x61: /* popa */
rc = emulate_popa(ctxt, ops);
if (rc != 0)
goto done;
break;
case 0x63: /* movsxd */
if (ctxt->mode != X86EMUL_MODE_PROT64)
goto cannot_emulate;
......@@ -1761,7 +1876,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
break;
case 0x6c: /* insb */
case 0x6d: /* insw/insd */
if (kvm_emulate_pio_string(ctxt->vcpu, NULL,
if (kvm_emulate_pio_string(ctxt->vcpu,
1,
(c->d & ByteOp) ? 1 : c->op_bytes,
c->rep_prefix ?
......@@ -1777,7 +1892,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
return 0;
case 0x6e: /* outsb */
case 0x6f: /* outsw/outsd */
if (kvm_emulate_pio_string(ctxt->vcpu, NULL,
if (kvm_emulate_pio_string(ctxt->vcpu,
0,
(c->d & ByteOp) ? 1 : c->op_bytes,
c->rep_prefix ?
......@@ -2070,7 +2185,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
case 0xef: /* out (e/r)ax,dx */
port = c->regs[VCPU_REGS_RDX];
io_dir_in = 0;
do_io: if (kvm_emulate_pio(ctxt->vcpu, NULL, io_dir_in,
do_io: if (kvm_emulate_pio(ctxt->vcpu, io_dir_in,
(c->d & ByteOp) ? 1 : c->op_bytes,
port) != 0) {
c->eip = saved_eip;
......@@ -2297,6 +2412,14 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
jmp_rel(c, c->src.val);
c->dst.type = OP_NONE;
break;
case 0xa0: /* push fs */
emulate_push_sreg(ctxt, VCPU_SREG_FS);
break;
case 0xa1: /* pop fs */
rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS);
if (rc != 0)
goto done;
break;
case 0xa3:
bt: /* bt */
c->dst.type = OP_NONE;
......@@ -2308,6 +2431,14 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
case 0xa5: /* shld cl, r, r/m */
emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags);
break;
case 0xa8: /* push gs */
emulate_push_sreg(ctxt, VCPU_SREG_GS);
break;
case 0xa9: /* pop gs */
rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS);
if (rc != 0)
goto done;
break;
case 0xab:
bts: /* bts */
/* only subword offset */
......
......@@ -688,10 +688,8 @@ static void __inject_pit_timer_intr(struct kvm *kvm)
struct kvm_vcpu *vcpu;
int i;
mutex_lock(&kvm->irq_lock);
kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
mutex_unlock(&kvm->irq_lock);
/*
* Provides NMI watchdog support via Virtual Wire mode.
......
......@@ -38,7 +38,15 @@ static void pic_clear_isr(struct kvm_kpic_state *s, int irq)
s->isr_ack |= (1 << irq);
if (s != &s->pics_state->pics[0])
irq += 8;
/*
* We are dropping lock while calling ack notifiers since ack
* notifier callbacks for assigned devices call into PIC recursively.
* Other interrupt may be delivered to PIC while lock is dropped but
* it should be safe since PIC state is already updated at this stage.
*/
spin_unlock(&s->pics_state->lock);
kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq);
spin_lock(&s->pics_state->lock);
}
void kvm_pic_clear_isr_ack(struct kvm *kvm)
......@@ -176,16 +184,18 @@ int kvm_pic_set_irq(void *opaque, int irq, int level)
static inline void pic_intack(struct kvm_kpic_state *s, int irq)
{
s->isr |= 1 << irq;
if (s->auto_eoi) {
if (s->rotate_on_auto_eoi)
s->priority_add = (irq + 1) & 7;
pic_clear_isr(s, irq);
}
/*
* We don't clear a level sensitive interrupt here
*/
if (!(s->elcr & (1 << irq)))
s->irr &= ~(1 << irq);
if (s->auto_eoi) {
if (s->rotate_on_auto_eoi)
s->priority_add = (irq + 1) & 7;
pic_clear_isr(s, irq);
}
}
int kvm_pic_read_irq(struct kvm *kvm)
......@@ -225,22 +235,11 @@ int kvm_pic_read_irq(struct kvm *kvm)
void kvm_pic_reset(struct kvm_kpic_state *s)
{
int irq, irqbase, n;
int irq;
struct kvm *kvm = s->pics_state->irq_request_opaque;
struct kvm_vcpu *vcpu0 = kvm->bsp_vcpu;
u8 irr = s->irr, isr = s->imr;
if (s == &s->pics_state->pics[0])
irqbase = 0;
else
irqbase = 8;
for (irq = 0; irq < PIC_NUM_PINS/2; irq++) {
if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0))
if (s->irr & (1 << irq) || s->isr & (1 << irq)) {
n = irq + irqbase;
kvm_notify_acked_irq(kvm, SELECT_PIC(n), n);
}
}
s->last_irr = 0;
s->irr = 0;
s->imr = 0;
......@@ -256,6 +255,13 @@ void kvm_pic_reset(struct kvm_kpic_state *s)
s->rotate_on_auto_eoi = 0;
s->special_fully_nested_mode = 0;
s->init4 = 0;
for (irq = 0; irq < PIC_NUM_PINS/2; irq++) {
if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0))
if (irr & (1 << irq) || isr & (1 << irq)) {
pic_clear_isr(s, irq);
}
}
}
static void pic_ioport_write(void *opaque, u32 addr, u32 val)
......@@ -298,9 +304,9 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val)
priority = get_priority(s, s->isr);
if (priority != 8) {
irq = (priority + s->priority_add) & 7;
pic_clear_isr(s, irq);
if (cmd == 5)
s->priority_add = (irq + 1) & 7;
pic_clear_isr(s, irq);
pic_update_irq(s->pics_state);
}
break;
......
......@@ -71,6 +71,7 @@ struct kvm_pic {
int output; /* intr from master PIC */
struct kvm_io_device dev;
void (*ack_notifier)(void *opaque, int irq);
unsigned long irq_states[16];
};
struct kvm_pic *kvm_create_pic(struct kvm *kvm);
......@@ -85,7 +86,11 @@ static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
static inline int irqchip_in_kernel(struct kvm *kvm)
{
return pic_irqchip(kvm) != NULL;
int ret;
ret = (pic_irqchip(kvm) != NULL);
smp_rmb();
return ret;
}
void kvm_pic_reset(struct kvm_kpic_state *s);
......
......@@ -32,7 +32,6 @@
#include <asm/current.h>
#include <asm/apicdef.h>
#include <asm/atomic.h>
#include <asm/apicdef.h>
#include "kvm_cache_regs.h"
#include "irq.h"
#include "trace.h"
......@@ -471,11 +470,8 @@ static void apic_set_eoi(struct kvm_lapic *apic)
trigger_mode = IOAPIC_LEVEL_TRIG;
else
trigger_mode = IOAPIC_EDGE_TRIG;
if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)) {
mutex_lock(&apic->vcpu->kvm->irq_lock);
if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI))
kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
mutex_unlock(&apic->vcpu->kvm->irq_lock);
}
}
static void apic_send_ipi(struct kvm_lapic *apic)
......@@ -504,9 +500,7 @@ static void apic_send_ipi(struct kvm_lapic *apic)
irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
irq.vector);
mutex_lock(&apic->vcpu->kvm->irq_lock);
kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq);
mutex_unlock(&apic->vcpu->kvm->irq_lock);
}
static u32 apic_get_tmcct(struct kvm_lapic *apic)
......
......@@ -2789,7 +2789,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)
if (r)
goto out;
er = emulate_instruction(vcpu, vcpu->run, cr2, error_code, 0);
er = emulate_instruction(vcpu, cr2, error_code, 0);
switch (er) {
case EMULATE_DONE:
......@@ -2800,6 +2800,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)
case EMULATE_FAIL:
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
vcpu->run->internal.ndata = 0;
return 0;
default:
BUG();
......
......@@ -467,7 +467,6 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
level = iterator.level;
sptep = iterator.sptep;
/* FIXME: properly handle invlpg on large guest pages */
if (level == PT_PAGE_TABLE_LEVEL ||
((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) ||
((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) {
......
此差异已折叠。
......@@ -349,6 +349,171 @@ TRACE_EVENT(kvm_apic_accept_irq,
__entry->coalesced ? " (coalesced)" : "")
);
/*
* Tracepoint for nested VMRUN
*/
TRACE_EVENT(kvm_nested_vmrun,
TP_PROTO(__u64 rip, __u64 vmcb, __u64 nested_rip, __u32 int_ctl,
__u32 event_inj, bool npt),
TP_ARGS(rip, vmcb, nested_rip, int_ctl, event_inj, npt),
TP_STRUCT__entry(
__field( __u64, rip )
__field( __u64, vmcb )
__field( __u64, nested_rip )
__field( __u32, int_ctl )
__field( __u32, event_inj )
__field( bool, npt )
),
TP_fast_assign(
__entry->rip = rip;
__entry->vmcb = vmcb;
__entry->nested_rip = nested_rip;
__entry->int_ctl = int_ctl;
__entry->event_inj = event_inj;
__entry->npt = npt;
),
TP_printk("rip: 0x%016llx vmcb: 0x%016llx nrip: 0x%016llx int_ctl: 0x%08x "
"event_inj: 0x%08x npt: %s\n",
__entry->rip, __entry->vmcb, __entry->nested_rip,
__entry->int_ctl, __entry->event_inj,
__entry->npt ? "on" : "off")
);
/*
* Tracepoint for #VMEXIT while nested
*/
TRACE_EVENT(kvm_nested_vmexit,
TP_PROTO(__u64 rip, __u32 exit_code,
__u64 exit_info1, __u64 exit_info2,
__u32 exit_int_info, __u32 exit_int_info_err),
TP_ARGS(rip, exit_code, exit_info1, exit_info2,
exit_int_info, exit_int_info_err),
TP_STRUCT__entry(
__field( __u64, rip )
__field( __u32, exit_code )
__field( __u64, exit_info1 )
__field( __u64, exit_info2 )
__field( __u32, exit_int_info )
__field( __u32, exit_int_info_err )
),
TP_fast_assign(
__entry->rip = rip;
__entry->exit_code = exit_code;
__entry->exit_info1 = exit_info1;
__entry->exit_info2 = exit_info2;
__entry->exit_int_info = exit_int_info;
__entry->exit_int_info_err = exit_int_info_err;
),
TP_printk("rip: 0x%016llx reason: %s ext_inf1: 0x%016llx "
"ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x\n",
__entry->rip,
ftrace_print_symbols_seq(p, __entry->exit_code,
kvm_x86_ops->exit_reasons_str),
__entry->exit_info1, __entry->exit_info2,
__entry->exit_int_info, __entry->exit_int_info_err)
);
/*
* Tracepoint for #VMEXIT reinjected to the guest
*/
TRACE_EVENT(kvm_nested_vmexit_inject,
TP_PROTO(__u32 exit_code,
__u64 exit_info1, __u64 exit_info2,
__u32 exit_int_info, __u32 exit_int_info_err),
TP_ARGS(exit_code, exit_info1, exit_info2,
exit_int_info, exit_int_info_err),
TP_STRUCT__entry(
__field( __u32, exit_code )
__field( __u64, exit_info1 )
__field( __u64, exit_info2 )
__field( __u32, exit_int_info )
__field( __u32, exit_int_info_err )
),
TP_fast_assign(
__entry->exit_code = exit_code;
__entry->exit_info1 = exit_info1;
__entry->exit_info2 = exit_info2;
__entry->exit_int_info = exit_int_info;
__entry->exit_int_info_err = exit_int_info_err;
),
TP_printk("reason: %s ext_inf1: 0x%016llx "
"ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x\n",
ftrace_print_symbols_seq(p, __entry->exit_code,
kvm_x86_ops->exit_reasons_str),
__entry->exit_info1, __entry->exit_info2,
__entry->exit_int_info, __entry->exit_int_info_err)
);
/*
* Tracepoint for nested #vmexit because of interrupt pending
*/
TRACE_EVENT(kvm_nested_intr_vmexit,
TP_PROTO(__u64 rip),
TP_ARGS(rip),
TP_STRUCT__entry(
__field( __u64, rip )
),
TP_fast_assign(
__entry->rip = rip
),
TP_printk("rip: 0x%016llx\n", __entry->rip)
);
/*
* Tracepoint for nested #vmexit because of interrupt pending
*/
TRACE_EVENT(kvm_invlpga,
TP_PROTO(__u64 rip, int asid, u64 address),
TP_ARGS(rip, asid, address),
TP_STRUCT__entry(
__field( __u64, rip )
__field( int, asid )
__field( __u64, address )
),
TP_fast_assign(
__entry->rip = rip;
__entry->asid = asid;
__entry->address = address;
),
TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx\n",
__entry->rip, __entry->asid, __entry->address)
);
/*
* Tracepoint for nested #vmexit because of interrupt pending
*/
TRACE_EVENT(kvm_skinit,
TP_PROTO(__u64 rip, __u32 slb),
TP_ARGS(rip, slb),
TP_STRUCT__entry(
__field( __u64, rip )
__field( __u32, slb )
),
TP_fast_assign(
__entry->rip = rip;
__entry->slb = slb;
),
TP_printk("rip: 0x%016llx slb: 0x%08x\n",
__entry->rip, __entry->slb)
);
#endif /* _TRACE_KVM_H */
/* This part must be outside protection */
......
此差异已折叠。
此差异已折叠。
......@@ -14,12 +14,76 @@
#define KVM_API_VERSION 12
/* for KVM_TRACE_ENABLE, deprecated */
/* *** Deprecated interfaces *** */
#define KVM_TRC_SHIFT 16
#define KVM_TRC_ENTRYEXIT (1 << KVM_TRC_SHIFT)
#define KVM_TRC_HANDLER (1 << (KVM_TRC_SHIFT + 1))
#define KVM_TRC_VMENTRY (KVM_TRC_ENTRYEXIT + 0x01)
#define KVM_TRC_VMEXIT (KVM_TRC_ENTRYEXIT + 0x02)
#define KVM_TRC_PAGE_FAULT (KVM_TRC_HANDLER + 0x01)
#define KVM_TRC_HEAD_SIZE 12
#define KVM_TRC_CYCLE_SIZE 8
#define KVM_TRC_EXTRA_MAX 7
#define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02)
#define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03)
#define KVM_TRC_PEND_INTR (KVM_TRC_HANDLER + 0x04)
#define KVM_TRC_IO_READ (KVM_TRC_HANDLER + 0x05)
#define KVM_TRC_IO_WRITE (KVM_TRC_HANDLER + 0x06)
#define KVM_TRC_CR_READ (KVM_TRC_HANDLER + 0x07)
#define KVM_TRC_CR_WRITE (KVM_TRC_HANDLER + 0x08)
#define KVM_TRC_DR_READ (KVM_TRC_HANDLER + 0x09)
#define KVM_TRC_DR_WRITE (KVM_TRC_HANDLER + 0x0A)
#define KVM_TRC_MSR_READ (KVM_TRC_HANDLER + 0x0B)
#define KVM_TRC_MSR_WRITE (KVM_TRC_HANDLER + 0x0C)
#define KVM_TRC_CPUID (KVM_TRC_HANDLER + 0x0D)
#define KVM_TRC_INTR (KVM_TRC_HANDLER + 0x0E)
#define KVM_TRC_NMI (KVM_TRC_HANDLER + 0x0F)
#define KVM_TRC_VMMCALL (KVM_TRC_HANDLER + 0x10)
#define KVM_TRC_HLT (KVM_TRC_HANDLER + 0x11)
#define KVM_TRC_CLTS (KVM_TRC_HANDLER + 0x12)
#define KVM_TRC_LMSW (KVM_TRC_HANDLER + 0x13)
#define KVM_TRC_APIC_ACCESS (KVM_TRC_HANDLER + 0x14)
#define KVM_TRC_TDP_FAULT (KVM_TRC_HANDLER + 0x15)
#define KVM_TRC_GTLB_WRITE (KVM_TRC_HANDLER + 0x16)
#define KVM_TRC_STLB_WRITE (KVM_TRC_HANDLER + 0x17)
#define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18)
#define KVM_TRC_PPC_INSTR (KVM_TRC_HANDLER + 0x19)
struct kvm_user_trace_setup {
__u32 buf_size; /* sub_buffer size of each per-cpu */
__u32 buf_nr; /* the number of sub_buffers of each per-cpu */
__u32 buf_size;
__u32 buf_nr;
};
#define __KVM_DEPRECATED_MAIN_W_0x06 \
_IOW(KVMIO, 0x06, struct kvm_user_trace_setup)
#define __KVM_DEPRECATED_MAIN_0x07 _IO(KVMIO, 0x07)
#define __KVM_DEPRECATED_MAIN_0x08 _IO(KVMIO, 0x08)
#define __KVM_DEPRECATED_VM_R_0x70 _IOR(KVMIO, 0x70, struct kvm_assigned_irq)
struct kvm_breakpoint {
__u32 enabled;
__u32 padding;
__u64 address;
};
struct kvm_debug_guest {
__u32 enabled;
__u32 pad;
struct kvm_breakpoint breakpoints[4];
__u32 singlestep;
};
#define __KVM_DEPRECATED_VCPU_W_0x87 _IOW(KVMIO, 0x87, struct kvm_debug_guest)
/* *** End of deprecated interfaces *** */
/* for KVM_CREATE_MEMORY_REGION */
struct kvm_memory_region {
__u32 slot;
......@@ -99,6 +163,7 @@ struct kvm_pit_config {
/* For KVM_EXIT_INTERNAL_ERROR */
#define KVM_INTERNAL_ERROR_EMULATION 1
#define KVM_INTERNAL_ERROR_SIMUL_EX 2
/* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
struct kvm_run {
......@@ -116,6 +181,11 @@ struct kvm_run {
__u64 cr8;
__u64 apic_base;
#ifdef __KVM_S390
/* the processor status word for s390 */
__u64 psw_mask; /* psw upper half */
__u64 psw_addr; /* psw lower half */
#endif
union {
/* KVM_EXIT_UNKNOWN */
struct {
......@@ -167,8 +237,6 @@ struct kvm_run {
/* KVM_EXIT_S390_SIEIC */
struct {
__u8 icptcode;
__u64 mask; /* psw upper half */
__u64 addr; /* psw lower half */
__u16 ipa;
__u32 ipb;
} s390_sieic;
......@@ -187,6 +255,9 @@ struct kvm_run {
} dcr;
struct {
__u32 suberror;
/* Available with KVM_CAP_INTERNAL_ERROR_DATA: */
__u32 ndata;
__u64 data[16];
} internal;
/* Fix the size of the union. */
char padding[256];
......@@ -329,24 +400,6 @@ struct kvm_ioeventfd {
__u8 pad[36];
};
#define KVM_TRC_SHIFT 16
/*
* kvm trace categories
*/
#define KVM_TRC_ENTRYEXIT (1 << KVM_TRC_SHIFT)
#define KVM_TRC_HANDLER (1 << (KVM_TRC_SHIFT + 1)) /* only 12 bits */
/*
* kvm trace action
*/
#define KVM_TRC_VMENTRY (KVM_TRC_ENTRYEXIT + 0x01)
#define KVM_TRC_VMEXIT (KVM_TRC_ENTRYEXIT + 0x02)
#define KVM_TRC_PAGE_FAULT (KVM_TRC_HANDLER + 0x01)
#define KVM_TRC_HEAD_SIZE 12
#define KVM_TRC_CYCLE_SIZE 8
#define KVM_TRC_EXTRA_MAX 7
#define KVMIO 0xAE
/*
......@@ -367,12 +420,10 @@ struct kvm_ioeventfd {
*/
#define KVM_GET_VCPU_MMAP_SIZE _IO(KVMIO, 0x04) /* in bytes */
#define KVM_GET_SUPPORTED_CPUID _IOWR(KVMIO, 0x05, struct kvm_cpuid2)
/*
* ioctls for kvm trace
*/
#define KVM_TRACE_ENABLE _IOW(KVMIO, 0x06, struct kvm_user_trace_setup)
#define KVM_TRACE_PAUSE _IO(KVMIO, 0x07)
#define KVM_TRACE_DISABLE _IO(KVMIO, 0x08)
#define KVM_TRACE_ENABLE __KVM_DEPRECATED_MAIN_W_0x06
#define KVM_TRACE_PAUSE __KVM_DEPRECATED_MAIN_0x07
#define KVM_TRACE_DISABLE __KVM_DEPRECATED_MAIN_0x08
/*
* Extension capability list.
*/
......@@ -436,6 +487,15 @@ struct kvm_ioeventfd {
#endif
#define KVM_CAP_IOEVENTFD 36
#define KVM_CAP_SET_IDENTITY_MAP_ADDR 37
#ifdef __KVM_HAVE_XEN_HVM
#define KVM_CAP_XEN_HVM 38
#endif
#define KVM_CAP_ADJUST_CLOCK 39
#define KVM_CAP_INTERNAL_ERROR_DATA 40
#ifdef __KVM_HAVE_VCPU_EVENTS
#define KVM_CAP_VCPU_EVENTS 41
#endif
#define KVM_CAP_S390_PSW 42
#ifdef KVM_CAP_IRQ_ROUTING
......@@ -488,6 +548,18 @@ struct kvm_x86_mce {
};
#endif
#ifdef KVM_CAP_XEN_HVM
struct kvm_xen_hvm_config {
__u32 flags;
__u32 msr;
__u64 blob_addr_32;
__u64 blob_addr_64;
__u8 blob_size_32;
__u8 blob_size_64;
__u8 pad2[30];
};
#endif
#define KVM_IRQFD_FLAG_DEASSIGN (1 << 0)
struct kvm_irqfd {
......@@ -497,55 +569,66 @@ struct kvm_irqfd {
__u8 pad[20];
};
struct kvm_clock_data {
__u64 clock;
__u32 flags;
__u32 pad[9];
};
/*
* ioctls for VM fds
*/
#define KVM_SET_MEMORY_REGION _IOW(KVMIO, 0x40, struct kvm_memory_region)
#define KVM_SET_MEMORY_REGION _IOW(KVMIO, 0x40, struct kvm_memory_region)
/*
* KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns
* a vcpu fd.
*/
#define KVM_CREATE_VCPU _IO(KVMIO, 0x41)
#define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log)
#define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias)
#define KVM_SET_NR_MMU_PAGES _IO(KVMIO, 0x44)
#define KVM_GET_NR_MMU_PAGES _IO(KVMIO, 0x45)
#define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46,\
#define KVM_CREATE_VCPU _IO(KVMIO, 0x41)
#define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log)
#define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias)
#define KVM_SET_NR_MMU_PAGES _IO(KVMIO, 0x44)
#define KVM_GET_NR_MMU_PAGES _IO(KVMIO, 0x45)
#define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46, \
struct kvm_userspace_memory_region)
#define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47)
#define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64)
#define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47)
#define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64)
/* Device model IOC */
#define KVM_CREATE_IRQCHIP _IO(KVMIO, 0x60)
#define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level)
#define KVM_GET_IRQCHIP _IOWR(KVMIO, 0x62, struct kvm_irqchip)
#define KVM_SET_IRQCHIP _IOR(KVMIO, 0x63, struct kvm_irqchip)
#define KVM_CREATE_PIT _IO(KVMIO, 0x64)
#define KVM_GET_PIT _IOWR(KVMIO, 0x65, struct kvm_pit_state)
#define KVM_SET_PIT _IOR(KVMIO, 0x66, struct kvm_pit_state)
#define KVM_IRQ_LINE_STATUS _IOWR(KVMIO, 0x67, struct kvm_irq_level)
#define KVM_CREATE_IRQCHIP _IO(KVMIO, 0x60)
#define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level)
#define KVM_GET_IRQCHIP _IOWR(KVMIO, 0x62, struct kvm_irqchip)
#define KVM_SET_IRQCHIP _IOR(KVMIO, 0x63, struct kvm_irqchip)
#define KVM_CREATE_PIT _IO(KVMIO, 0x64)
#define KVM_GET_PIT _IOWR(KVMIO, 0x65, struct kvm_pit_state)
#define KVM_SET_PIT _IOR(KVMIO, 0x66, struct kvm_pit_state)
#define KVM_IRQ_LINE_STATUS _IOWR(KVMIO, 0x67, struct kvm_irq_level)
#define KVM_REGISTER_COALESCED_MMIO \
_IOW(KVMIO, 0x67, struct kvm_coalesced_mmio_zone)
#define KVM_UNREGISTER_COALESCED_MMIO \
_IOW(KVMIO, 0x68, struct kvm_coalesced_mmio_zone)
#define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \
struct kvm_assigned_pci_dev)
#define KVM_SET_GSI_ROUTING _IOW(KVMIO, 0x6a, struct kvm_irq_routing)
#define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \
struct kvm_assigned_pci_dev)
#define KVM_SET_GSI_ROUTING _IOW(KVMIO, 0x6a, struct kvm_irq_routing)
/* deprecated, replaced by KVM_ASSIGN_DEV_IRQ */
#define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \
struct kvm_assigned_irq)
#define KVM_ASSIGN_DEV_IRQ _IOW(KVMIO, 0x70, struct kvm_assigned_irq)
#define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71)
#define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \
struct kvm_assigned_pci_dev)
#define KVM_ASSIGN_SET_MSIX_NR \
_IOW(KVMIO, 0x73, struct kvm_assigned_msix_nr)
#define KVM_ASSIGN_SET_MSIX_ENTRY \
_IOW(KVMIO, 0x74, struct kvm_assigned_msix_entry)
#define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO, 0x75, struct kvm_assigned_irq)
#define KVM_IRQFD _IOW(KVMIO, 0x76, struct kvm_irqfd)
#define KVM_CREATE_PIT2 _IOW(KVMIO, 0x77, struct kvm_pit_config)
#define KVM_SET_BOOT_CPU_ID _IO(KVMIO, 0x78)
#define KVM_IOEVENTFD _IOW(KVMIO, 0x79, struct kvm_ioeventfd)
#define KVM_ASSIGN_IRQ __KVM_DEPRECATED_VM_R_0x70
#define KVM_ASSIGN_DEV_IRQ _IOW(KVMIO, 0x70, struct kvm_assigned_irq)
#define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71)
#define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \
struct kvm_assigned_pci_dev)
#define KVM_ASSIGN_SET_MSIX_NR _IOW(KVMIO, 0x73, \
struct kvm_assigned_msix_nr)
#define KVM_ASSIGN_SET_MSIX_ENTRY _IOW(KVMIO, 0x74, \
struct kvm_assigned_msix_entry)
#define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO, 0x75, struct kvm_assigned_irq)
#define KVM_IRQFD _IOW(KVMIO, 0x76, struct kvm_irqfd)
#define KVM_CREATE_PIT2 _IOW(KVMIO, 0x77, struct kvm_pit_config)
#define KVM_SET_BOOT_CPU_ID _IO(KVMIO, 0x78)
#define KVM_IOEVENTFD _IOW(KVMIO, 0x79, struct kvm_ioeventfd)
#define KVM_XEN_HVM_CONFIG _IOW(KVMIO, 0x7a, struct kvm_xen_hvm_config)
#define KVM_SET_CLOCK _IOW(KVMIO, 0x7b, struct kvm_clock_data)
#define KVM_GET_CLOCK _IOR(KVMIO, 0x7c, struct kvm_clock_data)
/* Available with KVM_CAP_PIT_STATE2 */
#define KVM_GET_PIT2 _IOR(KVMIO, 0x9f, struct kvm_pit_state2)
#define KVM_SET_PIT2 _IOW(KVMIO, 0xa0, struct kvm_pit_state2)
/*
* ioctls for vcpu fds
......@@ -558,7 +641,7 @@ struct kvm_irqfd {
#define KVM_TRANSLATE _IOWR(KVMIO, 0x85, struct kvm_translation)
#define KVM_INTERRUPT _IOW(KVMIO, 0x86, struct kvm_interrupt)
/* KVM_DEBUG_GUEST is no longer supported, use KVM_SET_GUEST_DEBUG instead */
#define KVM_DEBUG_GUEST __KVM_DEPRECATED_DEBUG_GUEST
#define KVM_DEBUG_GUEST __KVM_DEPRECATED_VCPU_W_0x87
#define KVM_GET_MSRS _IOWR(KVMIO, 0x88, struct kvm_msrs)
#define KVM_SET_MSRS _IOW(KVMIO, 0x89, struct kvm_msrs)
#define KVM_SET_CPUID _IOW(KVMIO, 0x8a, struct kvm_cpuid)
......@@ -570,7 +653,7 @@ struct kvm_irqfd {
#define KVM_SET_CPUID2 _IOW(KVMIO, 0x90, struct kvm_cpuid2)
#define KVM_GET_CPUID2 _IOWR(KVMIO, 0x91, struct kvm_cpuid2)
/* Available with KVM_CAP_VAPIC */
#define KVM_TPR_ACCESS_REPORTING _IOWR(KVMIO, 0x92, struct kvm_tpr_access_ctl)
#define KVM_TPR_ACCESS_REPORTING _IOWR(KVMIO, 0x92, struct kvm_tpr_access_ctl)
/* Available with KVM_CAP_VAPIC */
#define KVM_SET_VAPIC_ADDR _IOW(KVMIO, 0x93, struct kvm_vapic_addr)
/* valid for virtual machine (for floating interrupt)_and_ vcpu */
......@@ -582,66 +665,23 @@ struct kvm_irqfd {
/* initial ipl psw for s390 */
#define KVM_S390_SET_INITIAL_PSW _IOW(KVMIO, 0x96, struct kvm_s390_psw)
/* initial reset for s390 */
#define KVM_S390_INITIAL_RESET _IO(KVMIO, 0x97)
#define KVM_S390_INITIAL_RESET _IO(KVMIO, 0x97)
#define KVM_GET_MP_STATE _IOR(KVMIO, 0x98, struct kvm_mp_state)
#define KVM_SET_MP_STATE _IOW(KVMIO, 0x99, struct kvm_mp_state)
/* Available with KVM_CAP_NMI */
#define KVM_NMI _IO(KVMIO, 0x9a)
#define KVM_NMI _IO(KVMIO, 0x9a)
/* Available with KVM_CAP_SET_GUEST_DEBUG */
#define KVM_SET_GUEST_DEBUG _IOW(KVMIO, 0x9b, struct kvm_guest_debug)
/* MCE for x86 */
#define KVM_X86_SETUP_MCE _IOW(KVMIO, 0x9c, __u64)
#define KVM_X86_GET_MCE_CAP_SUPPORTED _IOR(KVMIO, 0x9d, __u64)
#define KVM_X86_SET_MCE _IOW(KVMIO, 0x9e, struct kvm_x86_mce)
/*
* Deprecated interfaces
*/
struct kvm_breakpoint {
__u32 enabled;
__u32 padding;
__u64 address;
};
struct kvm_debug_guest {
__u32 enabled;
__u32 pad;
struct kvm_breakpoint breakpoints[4];
__u32 singlestep;
};
#define __KVM_DEPRECATED_DEBUG_GUEST _IOW(KVMIO, 0x87, struct kvm_debug_guest)
/* IA64 stack access */
#define KVM_IA64_VCPU_GET_STACK _IOR(KVMIO, 0x9a, void *)
#define KVM_IA64_VCPU_SET_STACK _IOW(KVMIO, 0x9b, void *)
#define KVM_GET_PIT2 _IOR(KVMIO, 0x9f, struct kvm_pit_state2)
#define KVM_SET_PIT2 _IOW(KVMIO, 0xa0, struct kvm_pit_state2)
#define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02)
#define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03)
#define KVM_TRC_PEND_INTR (KVM_TRC_HANDLER + 0x04)
#define KVM_TRC_IO_READ (KVM_TRC_HANDLER + 0x05)
#define KVM_TRC_IO_WRITE (KVM_TRC_HANDLER + 0x06)
#define KVM_TRC_CR_READ (KVM_TRC_HANDLER + 0x07)
#define KVM_TRC_CR_WRITE (KVM_TRC_HANDLER + 0x08)
#define KVM_TRC_DR_READ (KVM_TRC_HANDLER + 0x09)
#define KVM_TRC_DR_WRITE (KVM_TRC_HANDLER + 0x0A)
#define KVM_TRC_MSR_READ (KVM_TRC_HANDLER + 0x0B)
#define KVM_TRC_MSR_WRITE (KVM_TRC_HANDLER + 0x0C)
#define KVM_TRC_CPUID (KVM_TRC_HANDLER + 0x0D)
#define KVM_TRC_INTR (KVM_TRC_HANDLER + 0x0E)
#define KVM_TRC_NMI (KVM_TRC_HANDLER + 0x0F)
#define KVM_TRC_VMMCALL (KVM_TRC_HANDLER + 0x10)
#define KVM_TRC_HLT (KVM_TRC_HANDLER + 0x11)
#define KVM_TRC_CLTS (KVM_TRC_HANDLER + 0x12)
#define KVM_TRC_LMSW (KVM_TRC_HANDLER + 0x13)
#define KVM_TRC_APIC_ACCESS (KVM_TRC_HANDLER + 0x14)
#define KVM_TRC_TDP_FAULT (KVM_TRC_HANDLER + 0x15)
#define KVM_TRC_GTLB_WRITE (KVM_TRC_HANDLER + 0x16)
#define KVM_TRC_STLB_WRITE (KVM_TRC_HANDLER + 0x17)
#define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18)
#define KVM_TRC_PPC_INSTR (KVM_TRC_HANDLER + 0x19)
/* Available with KVM_CAP_VCPU_EVENTS */
#define KVM_GET_VCPU_EVENTS _IOR(KVMIO, 0x9f, struct kvm_vcpu_events)
#define KVM_SET_VCPU_EVENTS _IOW(KVMIO, 0xa0, struct kvm_vcpu_events)
#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
......@@ -696,4 +736,4 @@ struct kvm_assigned_msix_entry {
__u16 padding[3];
};
#endif
#endif /* __LINUX_KVM_H */
......@@ -120,7 +120,7 @@ struct kvm_kernel_irq_routing_entry {
u32 gsi;
u32 type;
int (*set)(struct kvm_kernel_irq_routing_entry *e,
struct kvm *kvm, int level);
struct kvm *kvm, int irq_source_id, int level);
union {
struct {
unsigned irqchip;
......@@ -128,9 +128,28 @@ struct kvm_kernel_irq_routing_entry {
} irqchip;
struct msi_msg msi;
};
struct list_head link;
struct hlist_node link;
};
#ifdef __KVM_HAVE_IOAPIC
struct kvm_irq_routing_table {
int chip[KVM_NR_IRQCHIPS][KVM_IOAPIC_NUM_PINS];
struct kvm_kernel_irq_routing_entry *rt_entries;
u32 nr_rt_entries;
/*
* Array indexed by gsi. Each entry contains list of irq chips
* the gsi is connected to.
*/
struct hlist_head map[0];
};
#else
struct kvm_irq_routing_table {};
#endif
struct kvm {
spinlock_t mmu_lock;
spinlock_t requests_lock;
......@@ -166,8 +185,9 @@ struct kvm {
struct mutex irq_lock;
#ifdef CONFIG_HAVE_KVM_IRQCHIP
struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
struct kvm_irq_routing_table *irq_routing;
struct hlist_head mask_notifier_list;
struct hlist_head irq_ack_notifier_list;
#endif
#ifdef KVM_ARCH_WANT_MMU_NOTIFIER
......@@ -266,6 +286,7 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
void kvm_vcpu_block(struct kvm_vcpu *vcpu);
void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu);
void kvm_resched(struct kvm_vcpu *vcpu);
void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
......@@ -325,7 +346,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu);
int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu);
void kvm_arch_hardware_enable(void *garbage);
int kvm_arch_hardware_enable(void *garbage);
void kvm_arch_hardware_disable(void *garbage);
int kvm_arch_hardware_setup(void);
void kvm_arch_hardware_unsetup(void);
......@@ -390,7 +411,12 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
struct kvm_irq_mask_notifier *kimn);
void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask);
int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level);
#ifdef __KVM_HAVE_IOAPIC
void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
union kvm_ioapic_redirect_entry *entry,
unsigned long *deliver_bitmask);
#endif
int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level);
void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
void kvm_register_irq_ack_notifier(struct kvm *kvm,
struct kvm_irq_ack_notifier *kian);
......@@ -552,4 +578,21 @@ static inline bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
return vcpu->kvm->bsp_vcpu_id == vcpu->vcpu_id;
}
#endif
#ifdef __KVM_HAVE_DEVICE_ASSIGNMENT
long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
unsigned long arg);
#else
static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
unsigned long arg)
{
return -ENOTTY;
}
#endif
#endif
#ifndef _LINUX_USER_RETURN_NOTIFIER_H
#define _LINUX_USER_RETURN_NOTIFIER_H
#ifdef CONFIG_USER_RETURN_NOTIFIER
#include <linux/list.h>
#include <linux/sched.h>
struct user_return_notifier {
void (*on_user_return)(struct user_return_notifier *urn);
struct hlist_node link;
};
void user_return_notifier_register(struct user_return_notifier *urn);
void user_return_notifier_unregister(struct user_return_notifier *urn);
static inline void propagate_user_return_notify(struct task_struct *prev,
struct task_struct *next)
{
if (test_tsk_thread_flag(prev, TIF_USER_RETURN_NOTIFY)) {
clear_tsk_thread_flag(prev, TIF_USER_RETURN_NOTIFY);
set_tsk_thread_flag(next, TIF_USER_RETURN_NOTIFY);
}
}
void fire_user_return_notifiers(void);
static inline void clear_user_return_notifier(struct task_struct *p)
{
clear_tsk_thread_flag(p, TIF_USER_RETURN_NOTIFY);
}
#else
struct user_return_notifier {};
static inline void propagate_user_return_notify(struct task_struct *prev,
struct task_struct *next)
{
}
static inline void fire_user_return_notifiers(void) {}
static inline void clear_user_return_notifier(struct task_struct *p) {}
#endif
#endif
......@@ -99,6 +99,7 @@ obj-$(CONFIG_SLOW_WORK) += slow-work.o
obj-$(CONFIG_SLOW_WORK_DEBUG) += slow-work-debugfs.o
obj-$(CONFIG_PERF_EVENTS) += perf_event.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o
ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
......
......@@ -64,6 +64,7 @@
#include <linux/magic.h>
#include <linux/perf_event.h>
#include <linux/posix-timers.h>
#include <linux/user-return-notifier.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
......@@ -249,6 +250,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
goto out;
setup_thread_stack(tsk, orig);
clear_user_return_notifier(tsk);
stackend = end_of_stack(tsk);
*stackend = STACK_END_MAGIC; /* for overflow detection */
......
#include <linux/user-return-notifier.h>
#include <linux/percpu.h>
#include <linux/sched.h>
#include <linux/module.h>
static DEFINE_PER_CPU(struct hlist_head, return_notifier_list);
#define URN_LIST_HEAD per_cpu(return_notifier_list, raw_smp_processor_id())
/*
* Request a notification when the current cpu returns to userspace. Must be
* called in atomic context. The notifier will also be called in atomic
* context.
*/
void user_return_notifier_register(struct user_return_notifier *urn)
{
set_tsk_thread_flag(current, TIF_USER_RETURN_NOTIFY);
hlist_add_head(&urn->link, &URN_LIST_HEAD);
}
EXPORT_SYMBOL_GPL(user_return_notifier_register);
/*
* Removes a registered user return notifier. Must be called from atomic
* context, and from the same cpu registration occured in.
*/
void user_return_notifier_unregister(struct user_return_notifier *urn)
{
hlist_del(&urn->link);
if (hlist_empty(&URN_LIST_HEAD))
clear_tsk_thread_flag(current, TIF_USER_RETURN_NOTIFY);
}
EXPORT_SYMBOL_GPL(user_return_notifier_unregister);
/* Calls registered user return notifiers */
void fire_user_return_notifiers(void)
{
struct user_return_notifier *urn;
struct hlist_node *tmp1, *tmp2;
struct hlist_head *head;
head = &get_cpu_var(return_notifier_list);
hlist_for_each_entry_safe(urn, tmp1, tmp2, head, link)
urn->on_user_return(urn);
put_cpu_var(return_notifier_list);
}
此差异已折叠。
......@@ -61,10 +61,8 @@ irqfd_inject(struct work_struct *work)
struct _irqfd *irqfd = container_of(work, struct _irqfd, inject);
struct kvm *kvm = irqfd->kvm;
mutex_lock(&kvm->irq_lock);
kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1);
kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0);
mutex_unlock(&kvm->irq_lock);
}
/*
......
......@@ -182,6 +182,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
union kvm_ioapic_redirect_entry entry;
int ret = 1;
mutex_lock(&ioapic->lock);
if (irq >= 0 && irq < IOAPIC_NUM_PINS) {
entry = ioapic->redirtbl[irq];
level ^= entry.fields.polarity;
......@@ -198,34 +199,51 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
}
trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
}
mutex_unlock(&ioapic->lock);
return ret;
}
static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int pin,
int trigger_mode)
static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector,
int trigger_mode)
{
union kvm_ioapic_redirect_entry *ent;
int i;
for (i = 0; i < IOAPIC_NUM_PINS; i++) {
union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i];
ent = &ioapic->redirtbl[pin];
if (ent->fields.vector != vector)
continue;
kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, pin);
/*
* We are dropping lock while calling ack notifiers because ack
* notifier callbacks for assigned devices call into IOAPIC
* recursively. Since remote_irr is cleared only after call
* to notifiers if the same vector will be delivered while lock
* is dropped it will be put into irr and will be delivered
* after ack notifier returns.
*/
mutex_unlock(&ioapic->lock);
kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i);
mutex_lock(&ioapic->lock);
if (trigger_mode != IOAPIC_LEVEL_TRIG)
continue;
if (trigger_mode == IOAPIC_LEVEL_TRIG) {
ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
ent->fields.remote_irr = 0;
if (!ent->fields.mask && (ioapic->irr & (1 << pin)))
ioapic_service(ioapic, pin);
if (!ent->fields.mask && (ioapic->irr & (1 << i)))
ioapic_service(ioapic, i);
}
}
void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode)
{
struct kvm_ioapic *ioapic = kvm->arch.vioapic;
int i;
for (i = 0; i < IOAPIC_NUM_PINS; i++)
if (ioapic->redirtbl[i].fields.vector == vector)
__kvm_ioapic_update_eoi(ioapic, i, trigger_mode);
mutex_lock(&ioapic->lock);
__kvm_ioapic_update_eoi(ioapic, vector, trigger_mode);
mutex_unlock(&ioapic->lock);
}
static inline struct kvm_ioapic *to_ioapic(struct kvm_io_device *dev)
......@@ -250,8 +268,8 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
ioapic_debug("addr %lx\n", (unsigned long)addr);
ASSERT(!(addr & 0xf)); /* check alignment */
mutex_lock(&ioapic->kvm->irq_lock);
addr &= 0xff;
mutex_lock(&ioapic->lock);
switch (addr) {
case IOAPIC_REG_SELECT:
result = ioapic->ioregsel;
......@@ -265,6 +283,8 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
result = 0;
break;
}
mutex_unlock(&ioapic->lock);
switch (len) {
case 8:
*(u64 *) val = result;
......@@ -277,7 +297,6 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
default:
printk(KERN_WARNING "ioapic: wrong length %d\n", len);
}
mutex_unlock(&ioapic->kvm->irq_lock);
return 0;
}
......@@ -293,15 +312,15 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
(void*)addr, len, val);
ASSERT(!(addr & 0xf)); /* check alignment */
mutex_lock(&ioapic->kvm->irq_lock);
if (len == 4 || len == 8)
data = *(u32 *) val;
else {
printk(KERN_WARNING "ioapic: Unsupported size %d\n", len);
goto unlock;
return 0;
}
addr &= 0xff;
mutex_lock(&ioapic->lock);
switch (addr) {
case IOAPIC_REG_SELECT:
ioapic->ioregsel = data;
......@@ -312,15 +331,14 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
break;
#ifdef CONFIG_IA64
case IOAPIC_REG_EOI:
kvm_ioapic_update_eoi(ioapic->kvm, data, IOAPIC_LEVEL_TRIG);
__kvm_ioapic_update_eoi(ioapic, data, IOAPIC_LEVEL_TRIG);
break;
#endif
default:
break;
}
unlock:
mutex_unlock(&ioapic->kvm->irq_lock);
mutex_unlock(&ioapic->lock);
return 0;
}
......@@ -349,6 +367,7 @@ int kvm_ioapic_init(struct kvm *kvm)
ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL);
if (!ioapic)
return -ENOMEM;
mutex_init(&ioapic->lock);
kvm->arch.vioapic = ioapic;
kvm_ioapic_reset(ioapic);
kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
......@@ -360,3 +379,26 @@ int kvm_ioapic_init(struct kvm *kvm)
return ret;
}
int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
{
struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
if (!ioapic)
return -EINVAL;
mutex_lock(&ioapic->lock);
memcpy(state, ioapic, sizeof(struct kvm_ioapic_state));
mutex_unlock(&ioapic->lock);
return 0;
}
int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
{
struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
if (!ioapic)
return -EINVAL;
mutex_lock(&ioapic->lock);
memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
mutex_unlock(&ioapic->lock);
return 0;
}
......@@ -41,9 +41,11 @@ struct kvm_ioapic {
u32 irr;
u32 pad;
union kvm_ioapic_redirect_entry redirtbl[IOAPIC_NUM_PINS];
unsigned long irq_states[IOAPIC_NUM_PINS];
struct kvm_io_device dev;
struct kvm *kvm;
void (*ack_notifier)(void *opaque, int irq);
struct mutex lock;
};
#ifdef DEBUG
......@@ -73,4 +75,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
struct kvm_lapic_irq *irq);
int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
#endif
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册