提交 d4858aaf 编写于 作者: L Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM fixes from Paolo Bonzini:
 "s390:
   - optimization for the exitless interrupt support that was merged in 4.16-rc1
   - improve the branch prediction blocking for nested KVM
   - replace some jump tables with switch statements to improve expoline performance
   - fixes for multiple epoch facility

  ARM:
   - fix the interaction of userspace irqchip VMs with in-kernel irqchip VMs
   - make sure we can build 32-bit KVM/ARM with gcc-8.

  x86:
   - fixes for AMD SEV
   - fixes for Intel nested VMX, emulated UMIP and a dump_stack() on VM startup
   - fixes for async page fault migration
   - small optimization to PV TLB flush (new in 4.16-rc1)
   - syzkaller fixes

  Generic:
   - compiler warning fixes
   - syzkaller fixes
   - more improvements to the kvm_stat tool

  Two more small Spectre fixes are going to reach you via Ingo"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (40 commits)
  KVM: SVM: Fix SEV LAUNCH_SECRET command
  KVM: SVM: install RSM intercept
  KVM: SVM: no need to call access_ok() in LAUNCH_MEASURE command
  include: psp-sev: Capitalize invalid length enum
  crypto: ccp: Fix sparse, use plain integer as NULL pointer
  KVM: X86: Avoid traversing all the cpus for pv tlb flush when steal time is disabled
  x86/kvm: Make parse_no_xxx __init for kvm
  KVM: x86: fix backward migration with async_PF
  kvm: fix warning for non-x86 builds
  kvm: fix warning for CONFIG_HAVE_KVM_EVENTFD builds
  tools/kvm_stat: print 'Total' line for multiple events only
  tools/kvm_stat: group child events indented after parent
  tools/kvm_stat: separate drilldown and fields filtering
  tools/kvm_stat: eliminate extra guest/pid selection dialog
  tools/kvm_stat: mark private methods as such
  tools/kvm_stat: fix debugfs handling
  tools/kvm_stat: print error on invalid regex
  tools/kvm_stat: fix crash when filtering out all non-child trace events
  tools/kvm_stat: avoid 'is' for equality checks
  tools/kvm_stat: use a more pythonic way to iterate over dictionaries
  ...
......@@ -58,6 +58,10 @@ KVM_FEATURE_PV_TLB_FLUSH || 9 || guest checks this feature bit
|| || before enabling paravirtualized
|| || tlb flush.
------------------------------------------------------------------------------
KVM_FEATURE_ASYNC_PF_VMEXIT || 10 || paravirtualized async PF VM exit
|| || can be enabled by setting bit 2
|| || when writing to msr 0x4b564d02
------------------------------------------------------------------------------
KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side
|| || per-cpu warps are expected in
|| || kvmclock.
......
......@@ -170,7 +170,8 @@ MSR_KVM_ASYNC_PF_EN: 0x4b564d02
when asynchronous page faults are enabled on the vcpu 0 when
disabled. Bit 1 is 1 if asynchronous page faults can be injected
when vcpu is in cpl == 0. Bit 2 is 1 if asynchronous page faults
are delivered to L1 as #PF vmexits.
are delivered to L1 as #PF vmexits. Bit 2 can be set only if
KVM_FEATURE_ASYNC_PF_VMEXIT is present in CPUID.
First 4 byte of 64 byte memory location will be written to by
the hypervisor at the time of asynchronous page fault (APF)
......
......@@ -7,6 +7,8 @@ ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING
KVM=../../../../virt/kvm
CFLAGS_ARMV7VE :=$(call cc-option, -march=armv7ve)
obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o
obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o
obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o
......@@ -15,7 +17,10 @@ obj-$(CONFIG_KVM_ARM_HOST) += tlb.o
obj-$(CONFIG_KVM_ARM_HOST) += cp15-sr.o
obj-$(CONFIG_KVM_ARM_HOST) += vfp.o
obj-$(CONFIG_KVM_ARM_HOST) += banked-sr.o
CFLAGS_banked-sr.o += $(CFLAGS_ARMV7VE)
obj-$(CONFIG_KVM_ARM_HOST) += entry.o
obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o
obj-$(CONFIG_KVM_ARM_HOST) += switch.o
CFLAGS_switch.o += $(CFLAGS_ARMV7VE)
obj-$(CONFIG_KVM_ARM_HOST) += s2-setup.o
......@@ -20,6 +20,10 @@
#include <asm/kvm_hyp.h>
/*
* gcc before 4.9 doesn't understand -march=armv7ve, so we have to
* trick the assembler.
*/
__asm__(".arch_extension virt");
void __hyp_text __banked_save_state(struct kvm_cpu_context *ctxt)
......
......@@ -22,22 +22,6 @@
#include "trace.h"
#include "trace-s390.h"
static const intercept_handler_t instruction_handlers[256] = {
[0x01] = kvm_s390_handle_01,
[0x82] = kvm_s390_handle_lpsw,
[0x83] = kvm_s390_handle_diag,
[0xaa] = kvm_s390_handle_aa,
[0xae] = kvm_s390_handle_sigp,
[0xb2] = kvm_s390_handle_b2,
[0xb6] = kvm_s390_handle_stctl,
[0xb7] = kvm_s390_handle_lctl,
[0xb9] = kvm_s390_handle_b9,
[0xe3] = kvm_s390_handle_e3,
[0xe5] = kvm_s390_handle_e5,
[0xeb] = kvm_s390_handle_eb,
};
u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu)
{
struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block;
......@@ -129,16 +113,39 @@ static int handle_validity(struct kvm_vcpu *vcpu)
static int handle_instruction(struct kvm_vcpu *vcpu)
{
intercept_handler_t handler;
vcpu->stat.exit_instruction++;
trace_kvm_s390_intercept_instruction(vcpu,
vcpu->arch.sie_block->ipa,
vcpu->arch.sie_block->ipb);
handler = instruction_handlers[vcpu->arch.sie_block->ipa >> 8];
if (handler)
return handler(vcpu);
return -EOPNOTSUPP;
switch (vcpu->arch.sie_block->ipa >> 8) {
case 0x01:
return kvm_s390_handle_01(vcpu);
case 0x82:
return kvm_s390_handle_lpsw(vcpu);
case 0x83:
return kvm_s390_handle_diag(vcpu);
case 0xaa:
return kvm_s390_handle_aa(vcpu);
case 0xae:
return kvm_s390_handle_sigp(vcpu);
case 0xb2:
return kvm_s390_handle_b2(vcpu);
case 0xb6:
return kvm_s390_handle_stctl(vcpu);
case 0xb7:
return kvm_s390_handle_lctl(vcpu);
case 0xb9:
return kvm_s390_handle_b9(vcpu);
case 0xe3:
return kvm_s390_handle_e3(vcpu);
case 0xe5:
return kvm_s390_handle_e5(vcpu);
case 0xeb:
return kvm_s390_handle_eb(vcpu);
default:
return -EOPNOTSUPP;
}
}
static int inject_prog_on_prog_intercept(struct kvm_vcpu *vcpu)
......
......@@ -169,8 +169,15 @@ static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu)
static int ckc_irq_pending(struct kvm_vcpu *vcpu)
{
if (vcpu->arch.sie_block->ckc >= kvm_s390_get_tod_clock_fast(vcpu->kvm))
const u64 now = kvm_s390_get_tod_clock_fast(vcpu->kvm);
const u64 ckc = vcpu->arch.sie_block->ckc;
if (vcpu->arch.sie_block->gcr[0] & 0x0020000000000000ul) {
if ((s64)ckc >= (s64)now)
return 0;
} else if (ckc >= now) {
return 0;
}
return ckc_interrupts_enabled(vcpu);
}
......@@ -187,12 +194,6 @@ static int cpu_timer_irq_pending(struct kvm_vcpu *vcpu)
return kvm_s390_get_cpu_timer(vcpu) >> 63;
}
static inline int is_ioirq(unsigned long irq_type)
{
return ((irq_type >= IRQ_PEND_IO_ISC_7) &&
(irq_type <= IRQ_PEND_IO_ISC_0));
}
static uint64_t isc_to_isc_bits(int isc)
{
return (0x80 >> isc) << 24;
......@@ -236,10 +237,15 @@ static inline int kvm_s390_gisa_tac_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gis
return test_and_clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa);
}
static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu)
static inline unsigned long pending_irqs_no_gisa(struct kvm_vcpu *vcpu)
{
return vcpu->kvm->arch.float_int.pending_irqs |
vcpu->arch.local_int.pending_irqs |
vcpu->arch.local_int.pending_irqs;
}
static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu)
{
return pending_irqs_no_gisa(vcpu) |
kvm_s390_gisa_get_ipm(vcpu->kvm->arch.gisa) << IRQ_PEND_IO_ISC_7;
}
......@@ -337,7 +343,7 @@ static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
static void set_intercept_indicators_io(struct kvm_vcpu *vcpu)
{
if (!(pending_irqs(vcpu) & IRQ_PEND_IO_MASK))
if (!(pending_irqs_no_gisa(vcpu) & IRQ_PEND_IO_MASK))
return;
else if (psw_ioint_disabled(vcpu))
kvm_s390_set_cpuflags(vcpu, CPUSTAT_IO_INT);
......@@ -1011,24 +1017,6 @@ static int __must_check __deliver_io(struct kvm_vcpu *vcpu,
return rc;
}
typedef int (*deliver_irq_t)(struct kvm_vcpu *vcpu);
static const deliver_irq_t deliver_irq_funcs[] = {
[IRQ_PEND_MCHK_EX] = __deliver_machine_check,
[IRQ_PEND_MCHK_REP] = __deliver_machine_check,
[IRQ_PEND_PROG] = __deliver_prog,
[IRQ_PEND_EXT_EMERGENCY] = __deliver_emergency_signal,
[IRQ_PEND_EXT_EXTERNAL] = __deliver_external_call,
[IRQ_PEND_EXT_CLOCK_COMP] = __deliver_ckc,
[IRQ_PEND_EXT_CPU_TIMER] = __deliver_cpu_timer,
[IRQ_PEND_RESTART] = __deliver_restart,
[IRQ_PEND_SET_PREFIX] = __deliver_set_prefix,
[IRQ_PEND_PFAULT_INIT] = __deliver_pfault_init,
[IRQ_PEND_EXT_SERVICE] = __deliver_service,
[IRQ_PEND_PFAULT_DONE] = __deliver_pfault_done,
[IRQ_PEND_VIRTIO] = __deliver_virtio,
};
/* Check whether an external call is pending (deliverable or not) */
int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu)
{
......@@ -1066,13 +1054,19 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
static u64 __calculate_sltime(struct kvm_vcpu *vcpu)
{
u64 now, cputm, sltime = 0;
const u64 now = kvm_s390_get_tod_clock_fast(vcpu->kvm);
const u64 ckc = vcpu->arch.sie_block->ckc;
u64 cputm, sltime = 0;
if (ckc_interrupts_enabled(vcpu)) {
now = kvm_s390_get_tod_clock_fast(vcpu->kvm);
sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now);
/* already expired or overflow? */
if (!sltime || vcpu->arch.sie_block->ckc <= now)
if (vcpu->arch.sie_block->gcr[0] & 0x0020000000000000ul) {
if ((s64)now < (s64)ckc)
sltime = tod_to_ns((s64)ckc - (s64)now);
} else if (now < ckc) {
sltime = tod_to_ns(ckc - now);
}
/* already expired */
if (!sltime)
return 0;
if (cpu_timer_interrupts_enabled(vcpu)) {
cputm = kvm_s390_get_cpu_timer(vcpu);
......@@ -1192,7 +1186,6 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu)
int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
deliver_irq_t func;
int rc = 0;
unsigned long irq_type;
unsigned long irqs;
......@@ -1212,16 +1205,57 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
while ((irqs = deliverable_irqs(vcpu)) && !rc) {
/* bits are in the reverse order of interrupt priority */
irq_type = find_last_bit(&irqs, IRQ_PEND_COUNT);
if (is_ioirq(irq_type)) {
switch (irq_type) {
case IRQ_PEND_IO_ISC_0:
case IRQ_PEND_IO_ISC_1:
case IRQ_PEND_IO_ISC_2:
case IRQ_PEND_IO_ISC_3:
case IRQ_PEND_IO_ISC_4:
case IRQ_PEND_IO_ISC_5:
case IRQ_PEND_IO_ISC_6:
case IRQ_PEND_IO_ISC_7:
rc = __deliver_io(vcpu, irq_type);
} else {
func = deliver_irq_funcs[irq_type];
if (!func) {
WARN_ON_ONCE(func == NULL);
clear_bit(irq_type, &li->pending_irqs);
continue;
}
rc = func(vcpu);
break;
case IRQ_PEND_MCHK_EX:
case IRQ_PEND_MCHK_REP:
rc = __deliver_machine_check(vcpu);
break;
case IRQ_PEND_PROG:
rc = __deliver_prog(vcpu);
break;
case IRQ_PEND_EXT_EMERGENCY:
rc = __deliver_emergency_signal(vcpu);
break;
case IRQ_PEND_EXT_EXTERNAL:
rc = __deliver_external_call(vcpu);
break;
case IRQ_PEND_EXT_CLOCK_COMP:
rc = __deliver_ckc(vcpu);
break;
case IRQ_PEND_EXT_CPU_TIMER:
rc = __deliver_cpu_timer(vcpu);
break;
case IRQ_PEND_RESTART:
rc = __deliver_restart(vcpu);
break;
case IRQ_PEND_SET_PREFIX:
rc = __deliver_set_prefix(vcpu);
break;
case IRQ_PEND_PFAULT_INIT:
rc = __deliver_pfault_init(vcpu);
break;
case IRQ_PEND_EXT_SERVICE:
rc = __deliver_service(vcpu);
break;
case IRQ_PEND_PFAULT_DONE:
rc = __deliver_pfault_done(vcpu);
break;
case IRQ_PEND_VIRTIO:
rc = __deliver_virtio(vcpu);
break;
default:
WARN_ONCE(1, "Unknown pending irq type %ld", irq_type);
clear_bit(irq_type, &li->pending_irqs);
}
}
......@@ -1701,7 +1735,8 @@ static void __floating_irq_kick(struct kvm *kvm, u64 type)
kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_STOP_INT);
break;
case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_IO_INT);
if (!(type & KVM_S390_INT_IO_AI_MASK && kvm->arch.gisa))
kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_IO_INT);
break;
default:
kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_EXT_INT);
......
......@@ -179,6 +179,28 @@ int kvm_arch_hardware_enable(void)
static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
unsigned long end);
static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
{
u8 delta_idx = 0;
/*
* The TOD jumps by delta, we have to compensate this by adding
* -delta to the epoch.
*/
delta = -delta;
/* sign-extension - we're adding to signed values below */
if ((s64)delta < 0)
delta_idx = -1;
scb->epoch += delta;
if (scb->ecd & ECD_MEF) {
scb->epdx += delta_idx;
if (scb->epoch < delta)
scb->epdx += 1;
}
}
/*
* This callback is executed during stop_machine(). All CPUs are therefore
* temporarily stopped. In order not to change guest behavior, we have to
......@@ -194,13 +216,17 @@ static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
unsigned long long *delta = v;
list_for_each_entry(kvm, &vm_list, vm_list) {
kvm->arch.epoch -= *delta;
kvm_for_each_vcpu(i, vcpu, kvm) {
vcpu->arch.sie_block->epoch -= *delta;
kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
if (i == 0) {
kvm->arch.epoch = vcpu->arch.sie_block->epoch;
kvm->arch.epdx = vcpu->arch.sie_block->epdx;
}
if (vcpu->arch.cputm_enabled)
vcpu->arch.cputm_start += *delta;
if (vcpu->arch.vsie_block)
vcpu->arch.vsie_block->epoch -= *delta;
kvm_clock_sync_scb(vcpu->arch.vsie_block,
*delta);
}
}
return NOTIFY_OK;
......@@ -902,12 +928,9 @@ static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
return -EFAULT;
if (test_kvm_facility(kvm, 139))
kvm_s390_set_tod_clock_ext(kvm, &gtod);
else if (gtod.epoch_idx == 0)
kvm_s390_set_tod_clock(kvm, gtod.tod);
else
if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
return -EINVAL;
kvm_s390_set_tod_clock(kvm, &gtod);
VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
gtod.epoch_idx, gtod.tod);
......@@ -932,13 +955,14 @@ static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
{
u64 gtod;
struct kvm_s390_vm_tod_clock gtod = { 0 };
if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
sizeof(gtod.tod)))
return -EFAULT;
kvm_s390_set_tod_clock(kvm, gtod);
VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
kvm_s390_set_tod_clock(kvm, &gtod);
VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
return 0;
}
......@@ -2389,6 +2413,7 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
mutex_lock(&vcpu->kvm->lock);
preempt_disable();
vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
preempt_enable();
mutex_unlock(&vcpu->kvm->lock);
if (!kvm_is_ucontrol(vcpu->kvm)) {
......@@ -3021,8 +3046,8 @@ static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
return 0;
}
void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
const struct kvm_s390_vm_tod_clock *gtod)
void kvm_s390_set_tod_clock(struct kvm *kvm,
const struct kvm_s390_vm_tod_clock *gtod)
{
struct kvm_vcpu *vcpu;
struct kvm_s390_tod_clock_ext htod;
......@@ -3034,10 +3059,12 @@ void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
get_tod_clock_ext((char *)&htod);
kvm->arch.epoch = gtod->tod - htod.tod;
kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
if (kvm->arch.epoch > gtod->tod)
kvm->arch.epdx -= 1;
kvm->arch.epdx = 0;
if (test_kvm_facility(kvm, 139)) {
kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
if (kvm->arch.epoch > gtod->tod)
kvm->arch.epdx -= 1;
}
kvm_s390_vcpu_block_all(kvm);
kvm_for_each_vcpu(i, vcpu, kvm) {
......@@ -3050,22 +3077,6 @@ void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
mutex_unlock(&kvm->lock);
}
void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
{
struct kvm_vcpu *vcpu;
int i;
mutex_lock(&kvm->lock);
preempt_disable();
kvm->arch.epoch = tod - get_tod_clock();
kvm_s390_vcpu_block_all(kvm);
kvm_for_each_vcpu(i, vcpu, kvm)
vcpu->arch.sie_block->epoch = kvm->arch.epoch;
kvm_s390_vcpu_unblock_all(kvm);
preempt_enable();
mutex_unlock(&kvm->lock);
}
/**
* kvm_arch_fault_in_page - fault-in guest page if necessary
* @vcpu: The corresponding virtual cpu
......
......@@ -19,8 +19,6 @@
#include <asm/processor.h>
#include <asm/sclp.h>
typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
/* Transactional Memory Execution related macros */
#define IS_TE_ENABLED(vcpu) ((vcpu->arch.sie_block->ecb & ECB_TE))
#define TDB_FORMAT1 1
......@@ -283,9 +281,8 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu);
/* implemented in kvm-s390.c */
void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
const struct kvm_s390_vm_tod_clock *gtod);
void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod);
void kvm_s390_set_tod_clock(struct kvm *kvm,
const struct kvm_s390_vm_tod_clock *gtod);
long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable);
int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr);
int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr);
......
......@@ -85,9 +85,10 @@ int kvm_s390_handle_e3(struct kvm_vcpu *vcpu)
/* Handle SCK (SET CLOCK) interception */
static int handle_set_clock(struct kvm_vcpu *vcpu)
{
struct kvm_s390_vm_tod_clock gtod = { 0 };
int rc;
u8 ar;
u64 op2, val;
u64 op2;
vcpu->stat.instruction_sck++;
......@@ -97,12 +98,12 @@ static int handle_set_clock(struct kvm_vcpu *vcpu)
op2 = kvm_s390_get_base_disp_s(vcpu, &ar);
if (op2 & 7) /* Operand must be on a doubleword boundary */
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
rc = read_guest(vcpu, op2, ar, &val, sizeof(val));
rc = read_guest(vcpu, op2, ar, &gtod.tod, sizeof(gtod.tod));
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
VCPU_EVENT(vcpu, 3, "SCK: setting guest TOD to 0x%llx", val);
kvm_s390_set_tod_clock(vcpu->kvm, val);
VCPU_EVENT(vcpu, 3, "SCK: setting guest TOD to 0x%llx", gtod.tod);
kvm_s390_set_tod_clock(vcpu->kvm, &gtod);
kvm_s390_set_psw_cc(vcpu, 0);
return 0;
......@@ -795,55 +796,60 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
return rc;
}
static const intercept_handler_t b2_handlers[256] = {
[0x02] = handle_stidp,
[0x04] = handle_set_clock,
[0x10] = handle_set_prefix,
[0x11] = handle_store_prefix,
[0x12] = handle_store_cpu_address,
[0x14] = kvm_s390_handle_vsie,
[0x21] = handle_ipte_interlock,
[0x29] = handle_iske,
[0x2a] = handle_rrbe,
[0x2b] = handle_sske,
[0x2c] = handle_test_block,
[0x30] = handle_io_inst,
[0x31] = handle_io_inst,
[0x32] = handle_io_inst,
[0x33] = handle_io_inst,
[0x34] = handle_io_inst,
[0x35] = handle_io_inst,
[0x36] = handle_io_inst,
[0x37] = handle_io_inst,
[0x38] = handle_io_inst,
[0x39] = handle_io_inst,
[0x3a] = handle_io_inst,
[0x3b] = handle_io_inst,
[0x3c] = handle_io_inst,
[0x50] = handle_ipte_interlock,
[0x56] = handle_sthyi,
[0x5f] = handle_io_inst,
[0x74] = handle_io_inst,
[0x76] = handle_io_inst,
[0x7d] = handle_stsi,
[0xb1] = handle_stfl,
[0xb2] = handle_lpswe,
};
int kvm_s390_handle_b2(struct kvm_vcpu *vcpu)
{
intercept_handler_t handler;
/*
* A lot of B2 instructions are priviledged. Here we check for
* the privileged ones, that we can handle in the kernel.
* Anything else goes to userspace.
*/
handler = b2_handlers[vcpu->arch.sie_block->ipa & 0x00ff];
if (handler)
return handler(vcpu);
return -EOPNOTSUPP;
switch (vcpu->arch.sie_block->ipa & 0x00ff) {
case 0x02:
return handle_stidp(vcpu);
case 0x04:
return handle_set_clock(vcpu);
case 0x10:
return handle_set_prefix(vcpu);
case 0x11:
return handle_store_prefix(vcpu);
case 0x12:
return handle_store_cpu_address(vcpu);
case 0x14:
return kvm_s390_handle_vsie(vcpu);
case 0x21:
case 0x50:
return handle_ipte_interlock(vcpu);
case 0x29:
return handle_iske(vcpu);
case 0x2a:
return handle_rrbe(vcpu);
case 0x2b:
return handle_sske(vcpu);
case 0x2c:
return handle_test_block(vcpu);
case 0x30:
case 0x31:
case 0x32:
case 0x33:
case 0x34:
case 0x35:
case 0x36:
case 0x37:
case 0x38:
case 0x39:
case 0x3a:
case 0x3b:
case 0x3c:
case 0x5f:
case 0x74:
case 0x76:
return handle_io_inst(vcpu);
case 0x56:
return handle_sthyi(vcpu);
case 0x7d:
return handle_stsi(vcpu);
case 0xb1:
return handle_stfl(vcpu);
case 0xb2:
return handle_lpswe(vcpu);
default:
return -EOPNOTSUPP;
}
}
static int handle_epsw(struct kvm_vcpu *vcpu)
......@@ -1105,25 +1111,22 @@ static int handle_essa(struct kvm_vcpu *vcpu)
return 0;
}
static const intercept_handler_t b9_handlers[256] = {
[0x8a] = handle_ipte_interlock,
[0x8d] = handle_epsw,
[0x8e] = handle_ipte_interlock,
[0x8f] = handle_ipte_interlock,
[0xab] = handle_essa,
[0xaf] = handle_pfmf,
};
int kvm_s390_handle_b9(struct kvm_vcpu *vcpu)
{
intercept_handler_t handler;
/* This is handled just as for the B2 instructions. */
handler = b9_handlers[vcpu->arch.sie_block->ipa & 0x00ff];
if (handler)
return handler(vcpu);
return -EOPNOTSUPP;
switch (vcpu->arch.sie_block->ipa & 0x00ff) {
case 0x8a:
case 0x8e:
case 0x8f:
return handle_ipte_interlock(vcpu);
case 0x8d:
return handle_epsw(vcpu);
case 0xab:
return handle_essa(vcpu);
case 0xaf:
return handle_pfmf(vcpu);
default:
return -EOPNOTSUPP;
}
}
int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
......@@ -1271,22 +1274,20 @@ static int handle_stctg(struct kvm_vcpu *vcpu)
return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0;
}
static const intercept_handler_t eb_handlers[256] = {
[0x2f] = handle_lctlg,
[0x25] = handle_stctg,
[0x60] = handle_ri,
[0x61] = handle_ri,
[0x62] = handle_ri,
};
int kvm_s390_handle_eb(struct kvm_vcpu *vcpu)
{
intercept_handler_t handler;
handler = eb_handlers[vcpu->arch.sie_block->ipb & 0xff];
if (handler)
return handler(vcpu);
return -EOPNOTSUPP;
switch (vcpu->arch.sie_block->ipb & 0x000000ff) {
case 0x25:
return handle_stctg(vcpu);
case 0x2f:
return handle_lctlg(vcpu);
case 0x60:
case 0x61:
case 0x62:
return handle_ri(vcpu);
default:
return -EOPNOTSUPP;
}
}
static int handle_tprot(struct kvm_vcpu *vcpu)
......@@ -1346,10 +1347,12 @@ static int handle_tprot(struct kvm_vcpu *vcpu)
int kvm_s390_handle_e5(struct kvm_vcpu *vcpu)
{
/* For e5xx... instructions we only handle TPROT */
if ((vcpu->arch.sie_block->ipa & 0x00ff) == 0x01)
switch (vcpu->arch.sie_block->ipa & 0x00ff) {
case 0x01:
return handle_tprot(vcpu);
return -EOPNOTSUPP;
default:
return -EOPNOTSUPP;
}
}
static int handle_sckpf(struct kvm_vcpu *vcpu)
......@@ -1380,17 +1383,14 @@ static int handle_ptff(struct kvm_vcpu *vcpu)
return 0;
}
static const intercept_handler_t x01_handlers[256] = {
[0x04] = handle_ptff,
[0x07] = handle_sckpf,
};
int kvm_s390_handle_01(struct kvm_vcpu *vcpu)
{
intercept_handler_t handler;
handler = x01_handlers[vcpu->arch.sie_block->ipa & 0x00ff];
if (handler)
return handler(vcpu);
return -EOPNOTSUPP;
switch (vcpu->arch.sie_block->ipa & 0x00ff) {
case 0x04:
return handle_ptff(vcpu);
case 0x07:
return handle_sckpf(vcpu);
default:
return -EOPNOTSUPP;
}
}
......@@ -821,6 +821,7 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
{
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
int guest_bp_isolation;
int rc;
handle_last_fault(vcpu, vsie_page);
......@@ -831,6 +832,20 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
s390_handle_mcck();
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
/* save current guest state of bp isolation override */
guest_bp_isolation = test_thread_flag(TIF_ISOLATE_BP_GUEST);
/*
* The guest is running with BPBC, so we have to force it on for our
* nested guest. This is done by enabling BPBC globally, so the BPBC
* control in the SCB (which the nested guest can modify) is simply
* ignored.
*/
if (test_kvm_facility(vcpu->kvm, 82) &&
vcpu->arch.sie_block->fpf & FPF_BPBC)
set_thread_flag(TIF_ISOLATE_BP_GUEST);
local_irq_disable();
guest_enter_irqoff();
local_irq_enable();
......@@ -840,6 +855,11 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
local_irq_disable();
guest_exit_irqoff();
local_irq_enable();
/* restore guest state for bp isolation override */
if (!guest_bp_isolation)
clear_thread_flag(TIF_ISOLATE_BP_GUEST);
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
if (rc == -EINTR) {
......
......@@ -1464,7 +1464,4 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
#define put_smstate(type, buf, offset, val) \
*(type *)((buf) + (offset) - 0x7e00) = val
void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
unsigned long start, unsigned long end);
#endif /* _ASM_X86_KVM_HOST_H */
......@@ -26,6 +26,7 @@
#define KVM_FEATURE_PV_EOI 6
#define KVM_FEATURE_PV_UNHALT 7
#define KVM_FEATURE_PV_TLB_FLUSH 9
#define KVM_FEATURE_ASYNC_PF_VMEXIT 10
/* The last 8 bits are used to indicate how to interpret the flags field
* in pvclock structure. If no bits are set, all flags are ignored.
......
......@@ -49,7 +49,7 @@
static int kvmapf = 1;
static int parse_no_kvmapf(char *arg)
static int __init parse_no_kvmapf(char *arg)
{
kvmapf = 0;
return 0;
......@@ -58,7 +58,7 @@ static int parse_no_kvmapf(char *arg)
early_param("no-kvmapf", parse_no_kvmapf);
static int steal_acc = 1;
static int parse_no_stealacc(char *arg)
static int __init parse_no_stealacc(char *arg)
{
steal_acc = 0;
return 0;
......@@ -67,7 +67,7 @@ static int parse_no_stealacc(char *arg)
early_param("no-steal-acc", parse_no_stealacc);
static int kvmclock_vsyscall = 1;
static int parse_no_kvmclock_vsyscall(char *arg)
static int __init parse_no_kvmclock_vsyscall(char *arg)
{
kvmclock_vsyscall = 0;
return 0;
......@@ -341,10 +341,10 @@ static void kvm_guest_cpu_init(void)
#endif
pa |= KVM_ASYNC_PF_ENABLED;
/* Async page fault support for L1 hypervisor is optional */
if (wrmsr_safe(MSR_KVM_ASYNC_PF_EN,
(pa | KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT) & 0xffffffff, pa >> 32) < 0)
wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_VMEXIT))
pa |= KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
__this_cpu_write(apf_reason.enabled, 1);
printk(KERN_INFO"KVM setup async PF for cpu %d\n",
smp_processor_id());
......@@ -545,7 +545,8 @@ static void __init kvm_guest_init(void)
pv_time_ops.steal_clock = kvm_steal_clock;
}
if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH))
if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
!kvm_para_has_feature(KVM_FEATURE_STEAL_TIME))
pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others;
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
......@@ -633,7 +634,8 @@ static __init int kvm_setup_pv_tlb_flush(void)
{
int cpu;
if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH)) {
if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
!kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
for_each_possible_cpu(cpu) {
zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu),
GFP_KERNEL, cpu_to_node(cpu));
......
......@@ -607,7 +607,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
(1 << KVM_FEATURE_PV_EOI) |
(1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) |
(1 << KVM_FEATURE_PV_UNHALT) |
(1 << KVM_FEATURE_PV_TLB_FLUSH);
(1 << KVM_FEATURE_PV_TLB_FLUSH) |
(1 << KVM_FEATURE_ASYNC_PF_VMEXIT);
if (sched_info_on())
entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
......
......@@ -2165,7 +2165,6 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
*/
vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */
kvm_lapic_reset(vcpu, false);
kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
return 0;
......
......@@ -3029,7 +3029,7 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn)
return RET_PF_RETRY;
}
return -EFAULT;
return RET_PF_EMULATE;
}
static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
......
......@@ -300,6 +300,8 @@ module_param(vgif, int, 0444);
static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT);
module_param(sev, int, 0444);
static u8 rsm_ins_bytes[] = "\x0f\xaa";
static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa);
static void svm_complete_interrupts(struct vcpu_svm *svm);
......@@ -1383,6 +1385,7 @@ static void init_vmcb(struct vcpu_svm *svm)
set_intercept(svm, INTERCEPT_SKINIT);
set_intercept(svm, INTERCEPT_WBINVD);
set_intercept(svm, INTERCEPT_XSETBV);
set_intercept(svm, INTERCEPT_RSM);
if (!kvm_mwait_in_guest()) {
set_intercept(svm, INTERCEPT_MONITOR);
......@@ -3699,6 +3702,12 @@ static int emulate_on_interception(struct vcpu_svm *svm)
return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
}
static int rsm_interception(struct vcpu_svm *svm)
{
return x86_emulate_instruction(&svm->vcpu, 0, 0,
rsm_ins_bytes, 2) == EMULATE_DONE;
}
static int rdpmc_interception(struct vcpu_svm *svm)
{
int err;
......@@ -4541,7 +4550,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
[SVM_EXIT_MWAIT] = mwait_interception,
[SVM_EXIT_XSETBV] = xsetbv_interception,
[SVM_EXIT_NPF] = npf_interception,
[SVM_EXIT_RSM] = emulate_on_interception,
[SVM_EXIT_RSM] = rsm_interception,
[SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception,
[SVM_EXIT_AVIC_UNACCELERATED_ACCESS] = avic_unaccelerated_access_interception,
};
......@@ -6236,16 +6245,18 @@ static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
void __user *measure = (void __user *)(uintptr_t)argp->data;
struct kvm_sev_info *sev = &kvm->arch.sev_info;
struct sev_data_launch_measure *data;
struct kvm_sev_launch_measure params;
void __user *p = NULL;
void *blob = NULL;
int ret;
if (!sev_guest(kvm))
return -ENOTTY;
if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
if (copy_from_user(&params, measure, sizeof(params)))
return -EFAULT;
data = kzalloc(sizeof(*data), GFP_KERNEL);
......@@ -6256,17 +6267,13 @@ static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
if (!params.len)
goto cmd;
if (params.uaddr) {
p = (void __user *)(uintptr_t)params.uaddr;
if (p) {
if (params.len > SEV_FW_BLOB_MAX_SIZE) {
ret = -EINVAL;
goto e_free;
}
if (!access_ok(VERIFY_WRITE, params.uaddr, params.len)) {
ret = -EFAULT;
goto e_free;
}
ret = -ENOMEM;
blob = kmalloc(params.len, GFP_KERNEL);
if (!blob)
......@@ -6290,13 +6297,13 @@ static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
goto e_free_blob;
if (blob) {
if (copy_to_user((void __user *)(uintptr_t)params.uaddr, blob, params.len))
if (copy_to_user(p, blob, params.len))
ret = -EFAULT;
}
done:
params.len = data->len;
if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params)))
if (copy_to_user(measure, &params, sizeof(params)))
ret = -EFAULT;
e_free_blob:
kfree(blob);
......@@ -6597,7 +6604,7 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
struct page **pages;
void *blob, *hdr;
unsigned long n;
int ret;
int ret, offset;
if (!sev_guest(kvm))
return -ENOTTY;
......@@ -6623,6 +6630,10 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
if (!data)
goto e_unpin_memory;
offset = params.guest_uaddr & (PAGE_SIZE - 1);
data->guest_address = __sme_page_pa(pages[0]) + offset;
data->guest_len = params.guest_len;
blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len);
if (IS_ERR(blob)) {
ret = PTR_ERR(blob);
......@@ -6637,8 +6648,8 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
ret = PTR_ERR(hdr);
goto e_free_blob;
}
data->trans_address = __psp_pa(blob);
data->trans_len = params.trans_len;
data->hdr_address = __psp_pa(hdr);
data->hdr_len = params.hdr_len;
data->handle = sev->handle;
ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, data, &argp->error);
......
......@@ -4485,7 +4485,8 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
SECONDARY_EXEC_DESC);
hw_cr4 &= ~X86_CR4_UMIP;
} else
} else if (!is_guest_mode(vcpu) ||
!nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC))
vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
SECONDARY_EXEC_DESC);
......@@ -11199,7 +11200,12 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
if (ret)
return ret;
if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT)
/*
* If we're entering a halted L2 vcpu and the L2 vcpu won't be woken
* by event injection, halt vcpu.
*/
if ((vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) &&
!(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK))
return kvm_vcpu_halt(vcpu);
vmx->nested.nested_run_pending = 1;
......
......@@ -7975,6 +7975,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
kvm_vcpu_mtrr_init(vcpu);
vcpu_load(vcpu);
kvm_vcpu_reset(vcpu, false);
kvm_lapic_reset(vcpu, false);
kvm_mmu_setup(vcpu);
vcpu_put(vcpu);
return 0;
......@@ -8460,10 +8461,8 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
return r;
}
if (!size) {
r = vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE);
WARN_ON(r < 0);
}
if (!size)
vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE);
return 0;
}
......
......@@ -211,7 +211,7 @@ static int __sev_platform_shutdown_locked(int *error)
{
int ret;
ret = __sev_do_cmd_locked(SEV_CMD_SHUTDOWN, 0, error);
ret = __sev_do_cmd_locked(SEV_CMD_SHUTDOWN, NULL, error);
if (ret)
return ret;
......@@ -271,7 +271,7 @@ static int sev_ioctl_do_reset(struct sev_issue_cmd *argp)
return rc;
}
return __sev_do_cmd_locked(SEV_CMD_FACTORY_RESET, 0, &argp->error);
return __sev_do_cmd_locked(SEV_CMD_FACTORY_RESET, NULL, &argp->error);
}
static int sev_ioctl_do_platform_status(struct sev_issue_cmd *argp)
......@@ -299,7 +299,7 @@ static int sev_ioctl_do_pek_pdh_gen(int cmd, struct sev_issue_cmd *argp)
return rc;
}
return __sev_do_cmd_locked(cmd, 0, &argp->error);
return __sev_do_cmd_locked(cmd, NULL, &argp->error);
}
static int sev_ioctl_do_pek_csr(struct sev_issue_cmd *argp)
......@@ -624,7 +624,7 @@ EXPORT_SYMBOL_GPL(sev_guest_decommission);
int sev_guest_df_flush(int *error)
{
return sev_do_cmd(SEV_CMD_DF_FLUSH, 0, error);
return sev_do_cmd(SEV_CMD_DF_FLUSH, NULL, error);
}
EXPORT_SYMBOL_GPL(sev_guest_df_flush);
......
......@@ -1105,7 +1105,6 @@ static inline void kvm_irq_routing_update(struct kvm *kvm)
{
}
#endif
void kvm_arch_irq_routing_update(struct kvm *kvm);
static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
{
......@@ -1114,6 +1113,8 @@ static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
#endif /* CONFIG_HAVE_KVM_EVENTFD */
void kvm_arch_irq_routing_update(struct kvm *kvm);
static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu)
{
/*
......@@ -1272,4 +1273,7 @@ static inline long kvm_arch_vcpu_async_ioctl(struct file *filp,
}
#endif /* CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL */
void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
unsigned long start, unsigned long end);
#endif
......@@ -42,7 +42,7 @@ typedef enum {
SEV_RET_INVALID_PLATFORM_STATE,
SEV_RET_INVALID_GUEST_STATE,
SEV_RET_INAVLID_CONFIG,
SEV_RET_INVALID_len,
SEV_RET_INVALID_LEN,
SEV_RET_ALREADY_OWNED,
SEV_RET_INVALID_CERTIFICATE,
SEV_RET_POLICY_FAILURE,
......
此差异已折叠。
......@@ -35,13 +35,13 @@ INTERACTIVE COMMANDS
*f*:: filter by regular expression
*g*:: filter by guest name
*g*:: filter by guest name/PID
*h*:: display interactive commands reference
*o*:: toggle sorting order (Total vs CurAvg/s)
*p*:: filter by PID
*p*:: filter by guest name/PID
*q*:: quit
......
......@@ -36,6 +36,8 @@ static struct timecounter *timecounter;
static unsigned int host_vtimer_irq;
static u32 host_vtimer_irq_flags;
static DEFINE_STATIC_KEY_FALSE(has_gic_active_state);
static const struct kvm_irq_level default_ptimer_irq = {
.irq = 30,
.level = 1,
......@@ -56,6 +58,12 @@ u64 kvm_phys_timer_read(void)
return timecounter->cc->read(timecounter->cc);
}
static inline bool userspace_irqchip(struct kvm *kvm)
{
return static_branch_unlikely(&userspace_irqchip_in_use) &&
unlikely(!irqchip_in_kernel(kvm));
}
static void soft_timer_start(struct hrtimer *hrt, u64 ns)
{
hrtimer_start(hrt, ktime_add_ns(ktime_get(), ns),
......@@ -69,25 +77,6 @@ static void soft_timer_cancel(struct hrtimer *hrt, struct work_struct *work)
cancel_work_sync(work);
}
static void kvm_vtimer_update_mask_user(struct kvm_vcpu *vcpu)
{
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
/*
* When using a userspace irqchip with the architected timers, we must
* prevent continuously exiting from the guest, and therefore mask the
* physical interrupt by disabling it on the host interrupt controller
* when the virtual level is high, such that the guest can make
* forward progress. Once we detect the output level being
* de-asserted, we unmask the interrupt again so that we exit from the
* guest when the timer fires.
*/
if (vtimer->irq.level)
disable_percpu_irq(host_vtimer_irq);
else
enable_percpu_irq(host_vtimer_irq, 0);
}
static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
{
struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
......@@ -106,9 +95,9 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
if (kvm_timer_should_fire(vtimer))
kvm_timer_update_irq(vcpu, true, vtimer);
if (static_branch_unlikely(&userspace_irqchip_in_use) &&
unlikely(!irqchip_in_kernel(vcpu->kvm)))
kvm_vtimer_update_mask_user(vcpu);
if (userspace_irqchip(vcpu->kvm) &&
!static_branch_unlikely(&has_gic_active_state))
disable_percpu_irq(host_vtimer_irq);
return IRQ_HANDLED;
}
......@@ -290,8 +279,7 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq,
timer_ctx->irq.level);
if (!static_branch_unlikely(&userspace_irqchip_in_use) ||
likely(irqchip_in_kernel(vcpu->kvm))) {
if (!userspace_irqchip(vcpu->kvm)) {
ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
timer_ctx->irq.irq,
timer_ctx->irq.level,
......@@ -350,12 +338,6 @@ static void kvm_timer_update_state(struct kvm_vcpu *vcpu)
phys_timer_emulate(vcpu);
}
static void __timer_snapshot_state(struct arch_timer_context *timer)
{
timer->cnt_ctl = read_sysreg_el0(cntv_ctl);
timer->cnt_cval = read_sysreg_el0(cntv_cval);
}
static void vtimer_save_state(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
......@@ -367,8 +349,10 @@ static void vtimer_save_state(struct kvm_vcpu *vcpu)
if (!vtimer->loaded)
goto out;
if (timer->enabled)
__timer_snapshot_state(vtimer);
if (timer->enabled) {
vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
vtimer->cnt_cval = read_sysreg_el0(cntv_cval);
}
/* Disable the virtual timer */
write_sysreg_el0(0, cntv_ctl);
......@@ -460,23 +444,43 @@ static void set_cntvoff(u64 cntvoff)
kvm_call_hyp(__kvm_timer_set_cntvoff, low, high);
}
static void kvm_timer_vcpu_load_vgic(struct kvm_vcpu *vcpu)
static inline void set_vtimer_irq_phys_active(struct kvm_vcpu *vcpu, bool active)
{
int r;
r = irq_set_irqchip_state(host_vtimer_irq, IRQCHIP_STATE_ACTIVE, active);
WARN_ON(r);
}
static void kvm_timer_vcpu_load_gic(struct kvm_vcpu *vcpu)
{
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
bool phys_active;
int ret;
phys_active = kvm_vgic_map_is_active(vcpu, vtimer->irq.irq);
ret = irq_set_irqchip_state(host_vtimer_irq,
IRQCHIP_STATE_ACTIVE,
phys_active);
WARN_ON(ret);
if (irqchip_in_kernel(vcpu->kvm))
phys_active = kvm_vgic_map_is_active(vcpu, vtimer->irq.irq);
else
phys_active = vtimer->irq.level;
set_vtimer_irq_phys_active(vcpu, phys_active);
}
static void kvm_timer_vcpu_load_user(struct kvm_vcpu *vcpu)
static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu)
{
kvm_vtimer_update_mask_user(vcpu);
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
/*
* When using a userspace irqchip with the architected timers and a
* host interrupt controller that doesn't support an active state, we
* must still prevent continuously exiting from the guest, and
* therefore mask the physical interrupt by disabling it on the host
* interrupt controller when the virtual level is high, such that the
* guest can make forward progress. Once we detect the output level
* being de-asserted, we unmask the interrupt again so that we exit
* from the guest when the timer fires.
*/
if (vtimer->irq.level)
disable_percpu_irq(host_vtimer_irq);
else
enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
}
void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
......@@ -487,10 +491,10 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
if (unlikely(!timer->enabled))
return;
if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
kvm_timer_vcpu_load_user(vcpu);
if (static_branch_likely(&has_gic_active_state))
kvm_timer_vcpu_load_gic(vcpu);
else
kvm_timer_vcpu_load_vgic(vcpu);
kvm_timer_vcpu_load_nogic(vcpu);
set_cntvoff(vtimer->cntvoff);
......@@ -555,18 +559,24 @@ static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu)
{
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
if (unlikely(!irqchip_in_kernel(vcpu->kvm))) {
__timer_snapshot_state(vtimer);
if (!kvm_timer_should_fire(vtimer)) {
kvm_timer_update_irq(vcpu, false, vtimer);
kvm_vtimer_update_mask_user(vcpu);
}
if (!kvm_timer_should_fire(vtimer)) {
kvm_timer_update_irq(vcpu, false, vtimer);
if (static_branch_likely(&has_gic_active_state))
set_vtimer_irq_phys_active(vcpu, false);
else
enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
}
}
void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
{
unmask_vtimer_irq_user(vcpu);
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
if (unlikely(!timer->enabled))
return;
if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
unmask_vtimer_irq_user(vcpu);
}
int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
......@@ -753,6 +763,8 @@ int kvm_timer_hyp_init(bool has_gic)
kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
goto out_free_irq;
}
static_branch_enable(&has_gic_active_state);
}
kvm_info("virtual timer IRQ%d\n", host_vtimer_irq);
......
......@@ -969,8 +969,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
/* Check for overlaps */
r = -EEXIST;
kvm_for_each_memslot(slot, __kvm_memslots(kvm, as_id)) {
if ((slot->id >= KVM_USER_MEM_SLOTS) ||
(slot->id == id))
if (slot->id == id)
continue;
if (!((base_gfn + npages <= slot->base_gfn) ||
(base_gfn >= slot->base_gfn + slot->npages)))
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册