提交 37131313 编写于 作者: R Radim Krčmář 提交者: Paolo Bonzini

KVM: x86: add KVM_CAP_X2APIC_API

KVM_CAP_X2APIC_API is a capability for features related to x2APIC
enablement.  KVM_X2APIC_API_32BIT_FORMAT feature can be enabled to
extend APIC ID in get/set ioctl and MSI addresses to 32 bits.
Both are needed to support x2APIC.

The feature has to be enableable and disabled by default, because
get/set ioctl shifted and truncated APIC ID to 8 bits by using a
non-standard protocol inspired by xAPIC and the change is not
backward-compatible.

Changes to MSI addresses follow the format used by interrupt remapping
unit.  The upper address word, that used to be 0, contains upper 24 bits
of the LAPIC address in its upper 24 bits.  Lower 8 bits are reserved as
0.  Using the upper address word is not backward-compatible either as we
didn't check that userspace zeroed the word.  Reserved bits are still
not explicitly checked, but non-zero data will affect LAPIC addresses,
which will cause a bug.
Signed-off-by: NRadim Krčmář <rkrcmar@redhat.com>
Signed-off-by: NPaolo Bonzini <pbonzini@redhat.com>
上级 c63cf538
...@@ -1482,6 +1482,11 @@ struct kvm_irq_routing_msi { ...@@ -1482,6 +1482,11 @@ struct kvm_irq_routing_msi {
__u32 pad; __u32 pad;
}; };
On x86, address_hi is ignored unless the KVM_X2APIC_API_USE_32BIT_IDS
feature of KVM_CAP_X2APIC_API capability is enabled. If it is enabled,
address_hi bits 31-8 provide bits 31-8 of the destination id. Bits 7-0 of
address_hi must be zero.
struct kvm_irq_routing_s390_adapter { struct kvm_irq_routing_s390_adapter {
__u64 ind_addr; __u64 ind_addr;
__u64 summary_addr; __u64 summary_addr;
...@@ -1583,6 +1588,17 @@ struct kvm_lapic_state { ...@@ -1583,6 +1588,17 @@ struct kvm_lapic_state {
Reads the Local APIC registers and copies them into the input argument. The Reads the Local APIC registers and copies them into the input argument. The
data format and layout are the same as documented in the architecture manual. data format and layout are the same as documented in the architecture manual.
If KVM_X2APIC_API_USE_32BIT_IDS feature of KVM_CAP_X2APIC_API is
enabled, then the format of APIC_ID register depends on the APIC mode
(reported by MSR_IA32_APICBASE) of its VCPU. x2APIC stores APIC ID in
the APIC_ID register (bytes 32-35). xAPIC only allows an 8-bit APIC ID
which is stored in bits 31-24 of the APIC register, or equivalently in
byte 35 of struct kvm_lapic_state's regs field. KVM_GET_LAPIC must then
be called after MSR_IA32_APICBASE has been set with KVM_SET_MSR.
If KVM_X2APIC_API_USE_32BIT_IDS feature is disabled, struct kvm_lapic_state
always uses xAPIC format.
4.58 KVM_SET_LAPIC 4.58 KVM_SET_LAPIC
...@@ -1600,6 +1616,10 @@ struct kvm_lapic_state { ...@@ -1600,6 +1616,10 @@ struct kvm_lapic_state {
Copies the input argument into the Local APIC registers. The data format Copies the input argument into the Local APIC registers. The data format
and layout are the same as documented in the architecture manual. and layout are the same as documented in the architecture manual.
The format of the APIC ID register (bytes 32-35 of struct kvm_lapic_state's
regs field) depends on the state of the KVM_CAP_X2APIC_API capability.
See the note in KVM_GET_LAPIC.
4.59 KVM_IOEVENTFD 4.59 KVM_IOEVENTFD
...@@ -2180,6 +2200,10 @@ struct kvm_msi { ...@@ -2180,6 +2200,10 @@ struct kvm_msi {
No flags are defined so far. The corresponding field must be 0. No flags are defined so far. The corresponding field must be 0.
On x86, address_hi is ignored unless the KVM_CAP_X2APIC_API capability is
enabled. If it is enabled, address_hi bits 31-8 provide bits 31-8 of the
destination id. Bits 7-0 of address_hi must be zero.
4.71 KVM_CREATE_PIT2 4.71 KVM_CREATE_PIT2
...@@ -3811,6 +3835,23 @@ Allows use of runtime-instrumentation introduced with zEC12 processor. ...@@ -3811,6 +3835,23 @@ Allows use of runtime-instrumentation introduced with zEC12 processor.
Will return -EINVAL if the machine does not support runtime-instrumentation. Will return -EINVAL if the machine does not support runtime-instrumentation.
Will return -EBUSY if a VCPU has already been created. Will return -EBUSY if a VCPU has already been created.
7.7 KVM_CAP_X2APIC_API
Architectures: x86
Parameters: args[0] - features that should be enabled
Returns: 0 on success, -EINVAL when args[0] contains invalid features
Valid feature flags in args[0] are
#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0)
Enabling KVM_X2APIC_API_USE_32BIT_IDS changes the behavior of
KVM_SET_GSI_ROUTING, KVM_SIGNAL_MSI, KVM_SET_LAPIC, and KVM_GET_LAPIC,
allowing the use of 32-bit APIC IDs. See KVM_CAP_X2APIC_API in their
respective sections.
8. Other capabilities. 8. Other capabilities.
---------------------- ----------------------
......
...@@ -782,6 +782,8 @@ struct kvm_arch { ...@@ -782,6 +782,8 @@ struct kvm_arch {
u32 ldr_mode; u32 ldr_mode;
struct page *avic_logical_id_table_page; struct page *avic_logical_id_table_page;
struct page *avic_physical_id_table_page; struct page *avic_physical_id_table_page;
bool x2apic_format;
}; };
struct kvm_vm_stat { struct kvm_vm_stat {
...@@ -1364,7 +1366,7 @@ bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu); ...@@ -1364,7 +1366,7 @@ bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu);
bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq, bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
struct kvm_vcpu **dest_vcpu); struct kvm_vcpu **dest_vcpu);
void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e, void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
struct kvm_lapic_irq *irq); struct kvm_lapic_irq *irq);
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
......
...@@ -110,13 +110,17 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, ...@@ -110,13 +110,17 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
return r; return r;
} }
void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e, void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
struct kvm_lapic_irq *irq) struct kvm_lapic_irq *irq)
{ {
trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data); trace_kvm_msi_set_irq(e->msi.address_lo | (kvm->arch.x2apic_format ?
(u64)e->msi.address_hi << 32 : 0),
e->msi.data);
irq->dest_id = (e->msi.address_lo & irq->dest_id = (e->msi.address_lo &
MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT; MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
if (kvm->arch.x2apic_format)
irq->dest_id |= MSI_ADDR_EXT_DEST_ID(e->msi.address_hi);
irq->vector = (e->msi.data & irq->vector = (e->msi.data &
MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT; MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;
irq->dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo; irq->dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo;
...@@ -129,15 +133,24 @@ void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e, ...@@ -129,15 +133,24 @@ void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
} }
EXPORT_SYMBOL_GPL(kvm_set_msi_irq); EXPORT_SYMBOL_GPL(kvm_set_msi_irq);
static inline bool kvm_msi_route_invalid(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *e)
{
return kvm->arch.x2apic_format && (e->msi.address_hi & 0xff);
}
int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
struct kvm *kvm, int irq_source_id, int level, bool line_status) struct kvm *kvm, int irq_source_id, int level, bool line_status)
{ {
struct kvm_lapic_irq irq; struct kvm_lapic_irq irq;
if (kvm_msi_route_invalid(kvm, e))
return -EINVAL;
if (!level) if (!level)
return -1; return -1;
kvm_set_msi_irq(e, &irq); kvm_set_msi_irq(kvm, e, &irq);
return kvm_irq_delivery_to_apic(kvm, NULL, &irq, NULL); return kvm_irq_delivery_to_apic(kvm, NULL, &irq, NULL);
} }
...@@ -153,7 +166,10 @@ int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, ...@@ -153,7 +166,10 @@ int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
if (unlikely(e->type != KVM_IRQ_ROUTING_MSI)) if (unlikely(e->type != KVM_IRQ_ROUTING_MSI))
return -EWOULDBLOCK; return -EWOULDBLOCK;
kvm_set_msi_irq(e, &irq); if (kvm_msi_route_invalid(kvm, e))
return -EINVAL;
kvm_set_msi_irq(kvm, e, &irq);
if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL)) if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL))
return r; return r;
...@@ -286,6 +302,9 @@ int kvm_set_routing_entry(struct kvm *kvm, ...@@ -286,6 +302,9 @@ int kvm_set_routing_entry(struct kvm *kvm,
e->msi.address_lo = ue->u.msi.address_lo; e->msi.address_lo = ue->u.msi.address_lo;
e->msi.address_hi = ue->u.msi.address_hi; e->msi.address_hi = ue->u.msi.address_hi;
e->msi.data = ue->u.msi.data; e->msi.data = ue->u.msi.data;
if (kvm_msi_route_invalid(kvm, e))
goto out;
break; break;
case KVM_IRQ_ROUTING_HV_SINT: case KVM_IRQ_ROUTING_HV_SINT:
e->set = kvm_hv_set_sint; e->set = kvm_hv_set_sint;
...@@ -394,7 +413,7 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, ...@@ -394,7 +413,7 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
if (entry->type != KVM_IRQ_ROUTING_MSI) if (entry->type != KVM_IRQ_ROUTING_MSI)
continue; continue;
kvm_set_msi_irq(entry, &irq); kvm_set_msi_irq(vcpu->kvm, entry, &irq);
if (irq.level && kvm_apic_match_dest(vcpu, NULL, 0, if (irq.level && kvm_apic_match_dest(vcpu, NULL, 0,
irq.dest_id, irq.dest_mode)) irq.dest_id, irq.dest_mode))
......
...@@ -1991,10 +1991,15 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, ...@@ -1991,10 +1991,15 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
if (apic_x2apic_mode(vcpu->arch.apic)) { if (apic_x2apic_mode(vcpu->arch.apic)) {
u32 *id = (u32 *)(s->regs + APIC_ID); u32 *id = (u32 *)(s->regs + APIC_ID);
if (set) if (vcpu->kvm->arch.x2apic_format) {
*id >>= 24; if (*id != vcpu->vcpu_id)
else return -EINVAL;
*id <<= 24; } else {
if (set)
*id >>= 24;
else
*id <<= 24;
}
} }
return 0; return 0;
......
...@@ -11104,7 +11104,7 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq, ...@@ -11104,7 +11104,7 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
* We will support full lowest-priority interrupt later. * We will support full lowest-priority interrupt later.
*/ */
kvm_set_msi_irq(e, &irq); kvm_set_msi_irq(kvm, e, &irq);
if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) { if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
/* /*
* Make sure the IRTE is in remapped mode if * Make sure the IRTE is in remapped mode if
......
...@@ -90,6 +90,8 @@ static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE); ...@@ -90,6 +90,8 @@ static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
#define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS)
static void update_cr8_intercept(struct kvm_vcpu *vcpu); static void update_cr8_intercept(struct kvm_vcpu *vcpu);
static void process_nmi(struct kvm_vcpu *vcpu); static void process_nmi(struct kvm_vcpu *vcpu);
static void enter_smm(struct kvm_vcpu *vcpu); static void enter_smm(struct kvm_vcpu *vcpu);
...@@ -2625,6 +2627,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) ...@@ -2625,6 +2627,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_TSC_CONTROL: case KVM_CAP_TSC_CONTROL:
r = kvm_has_tsc_control; r = kvm_has_tsc_control;
break; break;
case KVM_CAP_X2APIC_API:
r = KVM_X2APIC_API_VALID_FLAGS;
break;
default: default:
r = 0; r = 0;
break; break;
...@@ -3799,6 +3804,16 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, ...@@ -3799,6 +3804,16 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
mutex_unlock(&kvm->lock); mutex_unlock(&kvm->lock);
break; break;
} }
case KVM_CAP_X2APIC_API:
r = -EINVAL;
if (cap->args[0] & ~KVM_X2APIC_API_VALID_FLAGS)
break;
if (cap->args[0] & KVM_X2APIC_API_USE_32BIT_IDS)
kvm->arch.x2apic_format = true;
r = 0;
break;
default: default:
r = -EINVAL; r = -EINVAL;
break; break;
......
...@@ -151,8 +151,9 @@ TRACE_EVENT(kvm_msi_set_irq, ...@@ -151,8 +151,9 @@ TRACE_EVENT(kvm_msi_set_irq,
__entry->data = data; __entry->data = data;
), ),
TP_printk("dst %u vec %u (%s|%s|%s%s)", TP_printk("dst %llx vec %u (%s|%s|%s%s)",
(u8)(__entry->address >> 12), (u8)__entry->data, (u8)(__entry->address >> 12) | ((__entry->address >> 32) & 0xffffff00),
(u8)__entry->data,
__print_symbolic((__entry->data >> 8 & 0x7), kvm_deliver_mode), __print_symbolic((__entry->data >> 8 & 0x7), kvm_deliver_mode),
(__entry->address & (1<<2)) ? "logical" : "physical", (__entry->address & (1<<2)) ? "logical" : "physical",
(__entry->data & (1<<15)) ? "level" : "edge", (__entry->data & (1<<15)) ? "level" : "edge",
......
...@@ -866,6 +866,7 @@ struct kvm_ppc_smmu_info { ...@@ -866,6 +866,7 @@ struct kvm_ppc_smmu_info {
#define KVM_CAP_ARM_PMU_V3 126 #define KVM_CAP_ARM_PMU_V3 126
#define KVM_CAP_VCPU_ATTRIBUTES 127 #define KVM_CAP_VCPU_ATTRIBUTES 127
#define KVM_CAP_MAX_VCPU_ID 128 #define KVM_CAP_MAX_VCPU_ID 128
#define KVM_CAP_X2APIC_API 129
#ifdef KVM_CAP_IRQ_ROUTING #ifdef KVM_CAP_IRQ_ROUTING
...@@ -1313,4 +1314,6 @@ struct kvm_assigned_msix_entry { ...@@ -1313,4 +1314,6 @@ struct kvm_assigned_msix_entry {
__u16 padding[3]; __u16 padding[3];
}; };
#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0)
#endif /* __LINUX_KVM_H */ #endif /* __LINUX_KVM_H */
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册