提交 8a53e7e5 编写于 作者: P Paolo Bonzini

Merge branch 'kvm-ppc-next' of...

Merge branch 'kvm-ppc-next' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc into HEAD

- Better machine check handling for HV KVM
- Ability to support guests with threads=2, 4 or 8 on POWER9
- Fix for a race that could cause delayed recognition of signals
- Fix for a bug where POWER9 guests could sleep with interrupts
  pending.
......@@ -4131,6 +4131,34 @@ Parameters: none
Allow use of adapter-interruption suppression.
Returns: 0 on success; -EBUSY if a VCPU has already been created.
7.11 KVM_CAP_PPC_SMT
Architectures: ppc
Parameters: vsmt_mode, flags
Enabling this capability on a VM provides userspace with a way to set
the desired virtual SMT mode (i.e. the number of virtual CPUs per
virtual core). The virtual SMT mode, vsmt_mode, must be a power of 2
between 1 and 8. On POWER8, vsmt_mode must also be no greater than
the number of threads per subcore for the host. Currently flags must
be 0. A successful call to enable this capability will result in
vsmt_mode being returned when the KVM_CAP_PPC_SMT capability is
subsequently queried for the VM. This capability is only supported by
HV KVM, and can only be set before any VCPUs have been created.
The KVM_CAP_PPC_SMT_POSSIBLE capability indicates which virtual SMT
modes are available.
7.12 KVM_CAP_PPC_FWNMI
Architectures: ppc
Parameters: none
With this capability a machine check exception in the guest address
space will cause KVM to exit the guest with NMI exit reason. This
enables QEMU to build error log and branch to guest kernel registered
machine check handling routine. Without this capability KVM will
branch to guests' 0x200 interrupt vector.
8. Other capabilities.
----------------------
......@@ -4292,3 +4320,12 @@ Currently the following bits are defined for the device_irq_level bitmap:
Future versions of kvm may implement additional events. These will get
indicated by returning a higher number from KVM_CHECK_EXTENSION and will be
listed above.
8.10 KVM_CAP_PPC_SMT_POSSIBLE
Architectures: ppc
Querying this capability returns a bitmap indicating the possible
virtual SMT modes that can be set using KVM_CAP_PPC_SMT. If bit N
(counting from the right) is set, then a virtual SMT mode of 2^N is
available.
......@@ -86,7 +86,6 @@ struct kvmppc_vcore {
u16 last_cpu;
u8 vcore_state;
u8 in_guest;
struct kvmppc_vcore *master_vcore;
struct kvm_vcpu *runnable_threads[MAX_SMT_THREADS];
struct list_head preempt_list;
spinlock_t lock;
......
......@@ -81,7 +81,7 @@ struct kvm_split_mode {
u8 subcore_size;
u8 do_nap;
u8 napped[MAX_SMT_THREADS];
struct kvmppc_vcore *master_vcs[MAX_SUBCORES];
struct kvmppc_vcore *vc[MAX_SUBCORES];
};
/*
......
......@@ -35,6 +35,7 @@
#include <asm/page.h>
#include <asm/cacheflush.h>
#include <asm/hvcall.h>
#include <asm/mce.h>
#define KVM_MAX_VCPUS NR_CPUS
#define KVM_MAX_VCORES NR_CPUS
......@@ -267,6 +268,8 @@ struct kvm_resize_hpt;
struct kvm_arch {
unsigned int lpid;
unsigned int smt_mode; /* # vcpus per virtual core */
unsigned int emul_smt_mode; /* emualted SMT mode, on P9 */
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
unsigned int tlb_sets;
struct kvm_hpt_info hpt;
......@@ -285,6 +288,7 @@ struct kvm_arch {
cpumask_t need_tlb_flush;
cpumask_t cpu_in_guest;
u8 radix;
u8 fwnmi_enabled;
pgd_t *pgtable;
u64 process_table;
struct dentry *debugfs_dir;
......@@ -566,6 +570,7 @@ struct kvm_vcpu_arch {
ulong wort;
ulong tid;
ulong psscr;
ulong hfscr;
ulong shadow_srr1;
#endif
u32 vrsave; /* also USPRG0 */
......@@ -579,7 +584,7 @@ struct kvm_vcpu_arch {
ulong mcsrr0;
ulong mcsrr1;
ulong mcsr;
u32 dec;
ulong dec;
#ifdef CONFIG_BOOKE
u32 decar;
#endif
......@@ -710,6 +715,7 @@ struct kvm_vcpu_arch {
unsigned long pending_exceptions;
u8 ceded;
u8 prodded;
u8 doorbell_request;
u32 last_inst;
struct swait_queue_head *wqp;
......@@ -722,6 +728,7 @@ struct kvm_vcpu_arch {
int prev_cpu;
bool timer_running;
wait_queue_head_t cpu_run;
struct machine_check_event mce_evt; /* Valid if trap == 0x200 */
struct kvm_vcpu_arch_shared *shared;
#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_PR_POSSIBLE)
......
......@@ -315,6 +315,8 @@ struct kvmppc_ops {
struct irq_bypass_producer *);
int (*configure_mmu)(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg);
int (*get_rmmu_info)(struct kvm *kvm, struct kvm_ppc_rmmu_info *info);
int (*set_smt_mode)(struct kvm *kvm, unsigned long mode,
unsigned long flags);
};
extern struct kvmppc_ops *kvmppc_hv_ops;
......
......@@ -103,6 +103,8 @@
#define OP_31_XOP_STBUX 247
#define OP_31_XOP_LHZX 279
#define OP_31_XOP_LHZUX 311
#define OP_31_XOP_MSGSNDP 142
#define OP_31_XOP_MSGCLRP 174
#define OP_31_XOP_MFSPR 339
#define OP_31_XOP_LWAX 341
#define OP_31_XOP_LHAX 343
......
......@@ -60,6 +60,12 @@ struct kvm_regs {
#define KVM_SREGS_E_FSL_PIDn (1 << 0) /* PID1/PID2 */
/* flags for kvm_run.flags */
#define KVM_RUN_PPC_NMI_DISP_MASK (3 << 0)
#define KVM_RUN_PPC_NMI_DISP_FULLY_RECOV (1 << 0)
#define KVM_RUN_PPC_NMI_DISP_LIMITED_RECOV (2 << 0)
#define KVM_RUN_PPC_NMI_DISP_NOT_RECOV (3 << 0)
/*
* Feature bits indicate which sections of the sregs struct are valid,
* both in KVM_GET_SREGS and KVM_SET_SREGS. On KVM_SET_SREGS, registers
......
......@@ -485,6 +485,7 @@ int main(void)
OFFSET(KVM_ENABLED_HCALLS, kvm, arch.enabled_hcalls);
OFFSET(KVM_VRMA_SLB_V, kvm, arch.vrma_slb_v);
OFFSET(KVM_RADIX, kvm, arch.radix);
OFFSET(KVM_FWNMI, kvm, arch.fwnmi_enabled);
OFFSET(VCPU_DSISR, kvm_vcpu, arch.shregs.dsisr);
OFFSET(VCPU_DAR, kvm_vcpu, arch.shregs.dar);
OFFSET(VCPU_VPA, kvm_vcpu, arch.vpa.pinned_addr);
......@@ -513,6 +514,7 @@ int main(void)
OFFSET(VCPU_PENDING_EXC, kvm_vcpu, arch.pending_exceptions);
OFFSET(VCPU_CEDED, kvm_vcpu, arch.ceded);
OFFSET(VCPU_PRODDED, kvm_vcpu, arch.prodded);
OFFSET(VCPU_DBELL_REQ, kvm_vcpu, arch.doorbell_request);
OFFSET(VCPU_MMCR, kvm_vcpu, arch.mmcr);
OFFSET(VCPU_PMC, kvm_vcpu, arch.pmc);
OFFSET(VCPU_SPMC, kvm_vcpu, arch.spmc);
......@@ -542,6 +544,7 @@ int main(void)
OFFSET(VCPU_WORT, kvm_vcpu, arch.wort);
OFFSET(VCPU_TID, kvm_vcpu, arch.tid);
OFFSET(VCPU_PSSCR, kvm_vcpu, arch.psscr);
OFFSET(VCPU_HFSCR, kvm_vcpu, arch.hfscr);
OFFSET(VCORE_ENTRY_EXIT, kvmppc_vcore, entry_exit_map);
OFFSET(VCORE_IN_GUEST, kvmppc_vcore, in_guest);
OFFSET(VCORE_NAPPING_THREADS, kvmppc_vcore, napping_threads);
......
......@@ -405,6 +405,7 @@ void machine_check_print_event_info(struct machine_check_event *evt,
break;
}
}
EXPORT_SYMBOL_GPL(machine_check_print_event_info);
uint64_t get_mce_fault_addr(struct machine_check_event *evt)
{
......
此差异已折叠。
......@@ -307,7 +307,7 @@ void kvmhv_commence_exit(int trap)
return;
for (i = 0; i < MAX_SUBCORES; ++i) {
vc = sip->master_vcs[i];
vc = sip->vc[i];
if (!vc)
break;
do {
......
......@@ -61,13 +61,6 @@ BEGIN_FTR_SECTION
std r3, HSTATE_DABR(r13)
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
/* Hard-disable interrupts */
mfmsr r10
std r10, HSTATE_HOST_MSR(r13)
rldicl r10,r10,48,1
rotldi r10,r10,16
mtmsrd r10,1
/* Save host PMU registers */
BEGIN_FTR_SECTION
/* Work around P8 PMAE bug */
......@@ -121,10 +114,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
* Put whatever is in the decrementer into the
* hypervisor decrementer.
*/
BEGIN_FTR_SECTION
ld r5, HSTATE_KVM_VCORE(r13)
ld r6, VCORE_KVM(r5)
ld r9, KVM_HOST_LPCR(r6)
andis. r9, r9, LPCR_LD@h
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
mfspr r8,SPRN_DEC
mftb r7
mtspr SPRN_HDEC,r8
BEGIN_FTR_SECTION
/* On POWER9, don't sign-extend if host LPCR[LD] bit is set */
bne 32f
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
extsw r8,r8
32: mtspr SPRN_HDEC,r8
add r8,r8,r7
std r8,HSTATE_DECEXP(r13)
......@@ -143,6 +146,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
*
* R1 = host R1
* R2 = host R2
* R3 = trap number on this thread
* R12 = exit handler id
* R13 = PACA
*/
......
......@@ -130,12 +130,28 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
out:
/*
* For guest that supports FWNMI capability, hook the MCE event into
* vcpu structure. We are going to exit the guest with KVM_EXIT_NMI
* exit reason. On our way to exit we will pull this event from vcpu
* structure and print it from thread 0 of the core/subcore.
*
* For guest that does not support FWNMI capability (old QEMU):
* We are now going enter guest either through machine check
* interrupt (for unhandled errors) or will continue from
* current HSRR0 (for handled errors) in guest. Hence
* queue up the event so that we can log it from host console later.
*/
machine_check_queue_event();
if (vcpu->kvm->arch.fwnmi_enabled) {
/*
* Hook up the mce event on to vcpu structure.
* First clear the old event.
*/
memset(&vcpu->arch.mce_evt, 0, sizeof(vcpu->arch.mce_evt));
if (get_mce_event(&mce_evt, MCE_EVENT_RELEASE)) {
vcpu->arch.mce_evt = mce_evt;
}
} else
machine_check_queue_event();
return handled;
}
......
......@@ -32,12 +32,30 @@
#include <asm/opal.h>
#include <asm/xive-regs.h>
/* Sign-extend HDEC if not on POWER9 */
#define EXTEND_HDEC(reg) \
BEGIN_FTR_SECTION; \
extsw reg, reg; \
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
#define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM)
/* Values in HSTATE_NAPPING(r13) */
#define NAPPING_CEDE 1
#define NAPPING_NOVCPU 2
/* Stack frame offsets for kvmppc_hv_entry */
#define SFS 160
#define STACK_SLOT_TRAP (SFS-4)
#define STACK_SLOT_TID (SFS-16)
#define STACK_SLOT_PSSCR (SFS-24)
#define STACK_SLOT_PID (SFS-32)
#define STACK_SLOT_IAMR (SFS-40)
#define STACK_SLOT_CIABR (SFS-48)
#define STACK_SLOT_DAWR (SFS-56)
#define STACK_SLOT_DAWRX (SFS-64)
#define STACK_SLOT_HFSCR (SFS-72)
/*
* Call kvmppc_hv_entry in real mode.
* Must be called with interrupts hard-disabled.
......@@ -51,6 +69,7 @@ _GLOBAL_TOC(kvmppc_hv_entry_trampoline)
std r0, PPC_LR_STKOFF(r1)
stdu r1, -112(r1)
mfmsr r10
std r10, HSTATE_HOST_MSR(r13)
LOAD_REG_ADDR(r5, kvmppc_call_hv_entry)
li r0,MSR_RI
andc r0,r10,r0
......@@ -135,20 +154,21 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
stb r0, HSTATE_HWTHREAD_REQ(r13)
/*
* For external and machine check interrupts, we need
* to call the Linux handler to process the interrupt.
* We do that by jumping to absolute address 0x500 for
* external interrupts, or the machine_check_fwnmi label
* for machine checks (since firmware might have patched
* the vector area at 0x200). The [h]rfid at the end of the
* handler will return to the book3s_hv_interrupts.S code.
* For other interrupts we do the rfid to get back
* to the book3s_hv_interrupts.S code here.
* For external interrupts we need to call the Linux
* handler to process the interrupt. We do that by jumping
* to absolute address 0x500 for external interrupts.
* The [h]rfid at the end of the handler will return to
* the book3s_hv_interrupts.S code. For other interrupts
* we do the rfid to get back to the book3s_hv_interrupts.S
* code here.
*/
ld r8, 112+PPC_LR_STKOFF(r1)
addi r1, r1, 112
ld r7, HSTATE_HOST_MSR(r13)
/* Return the trap number on this thread as the return value */
mr r3, r12
/*
* If we came back from the guest via a relocation-on interrupt,
* we will be in virtual mode at this point, which makes it a
......@@ -158,62 +178,25 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
andi. r0, r0, MSR_IR /* in real mode? */
bne .Lvirt_return
cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK
cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
beq 11f
cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL
beq 15f /* Invoke the H_DOORBELL handler */
cmpwi cr2, r12, BOOK3S_INTERRUPT_HMI
beq cr2, 14f /* HMI check */
/* RFI into the highmem handler, or branch to interrupt handler */
/* RFI into the highmem handler */
mfmsr r6
li r0, MSR_RI
andc r6, r6, r0
mtmsrd r6, 1 /* Clear RI in MSR */
mtsrr0 r8
mtsrr1 r7
beq cr1, 13f /* machine check */
RFI
/* On POWER7, we have external interrupts set to use HSRR0/1 */
11: mtspr SPRN_HSRR0, r8
mtspr SPRN_HSRR1, r7
ba 0x500
13: b machine_check_fwnmi
14: mtspr SPRN_HSRR0, r8
mtspr SPRN_HSRR1, r7
b hmi_exception_after_realmode
15: mtspr SPRN_HSRR0, r8
mtspr SPRN_HSRR1, r7
ba 0xe80
/* Virtual-mode return - can't get here for HMI or machine check */
/* Virtual-mode return */
.Lvirt_return:
cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
beq 16f
cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL
beq 17f
andi. r0, r7, MSR_EE /* were interrupts hard-enabled? */
beq 18f
mtmsrd r7, 1 /* if so then re-enable them */
18: mtlr r8
mtlr r8
blr
16: mtspr SPRN_HSRR0, r8 /* jump to reloc-on external vector */
mtspr SPRN_HSRR1, r7
b exc_virt_0x4500_hardware_interrupt
17: mtspr SPRN_HSRR0, r8
mtspr SPRN_HSRR1, r7
b exc_virt_0x4e80_h_doorbell
kvmppc_primary_no_guest:
/* We handle this much like a ceded vcpu */
/* put the HDEC into the DEC, since HDEC interrupts don't wake us */
/* HDEC may be larger than DEC for arch >= v3.00, but since the */
/* HDEC value came from DEC in the first place, it will fit */
mfspr r3, SPRN_HDEC
mtspr SPRN_DEC, r3
/*
......@@ -295,8 +278,9 @@ kvm_novcpu_wakeup:
/* See if our timeslice has expired (HDEC is negative) */
mfspr r0, SPRN_HDEC
EXTEND_HDEC(r0)
li r12, BOOK3S_INTERRUPT_HV_DECREMENTER
cmpwi r0, 0
cmpdi r0, 0
blt kvm_novcpu_exit
/* Got an IPI but other vcpus aren't yet exiting, must be a latecomer */
......@@ -319,10 +303,10 @@ kvm_novcpu_exit:
bl kvmhv_accumulate_time
#endif
13: mr r3, r12
stw r12, 112-4(r1)
stw r12, STACK_SLOT_TRAP(r1)
bl kvmhv_commence_exit
nop
lwz r12, 112-4(r1)
lwz r12, STACK_SLOT_TRAP(r1)
b kvmhv_switch_to_host
/*
......@@ -390,8 +374,8 @@ kvm_secondary_got_guest:
lbz r4, HSTATE_PTID(r13)
cmpwi r4, 0
bne 63f
lis r6, 0x7fff
ori r6, r6, 0xffff
LOAD_REG_ADDR(r6, decrementer_max)
ld r6, 0(r6)
mtspr SPRN_HDEC, r6
/* and set per-LPAR registers, if doing dynamic micro-threading */
ld r6, HSTATE_SPLIT_MODE(r13)
......@@ -545,11 +529,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
* *
*****************************************************************************/
/* Stack frame offsets */
#define STACK_SLOT_TID (112-16)
#define STACK_SLOT_PSSCR (112-24)
#define STACK_SLOT_PID (112-32)
.global kvmppc_hv_entry
kvmppc_hv_entry:
......@@ -565,7 +544,7 @@ kvmppc_hv_entry:
*/
mflr r0
std r0, PPC_LR_STKOFF(r1)
stdu r1, -112(r1)
stdu r1, -SFS(r1)
/* Save R1 in the PACA */
std r1, HSTATE_HOST_R1(r13)
......@@ -749,10 +728,22 @@ BEGIN_FTR_SECTION
mfspr r5, SPRN_TIDR
mfspr r6, SPRN_PSSCR
mfspr r7, SPRN_PID
mfspr r8, SPRN_IAMR
std r5, STACK_SLOT_TID(r1)
std r6, STACK_SLOT_PSSCR(r1)
std r7, STACK_SLOT_PID(r1)
std r8, STACK_SLOT_IAMR(r1)
mfspr r5, SPRN_HFSCR
std r5, STACK_SLOT_HFSCR(r1)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
BEGIN_FTR_SECTION
mfspr r5, SPRN_CIABR
mfspr r6, SPRN_DAWR
mfspr r7, SPRN_DAWRX
std r5, STACK_SLOT_CIABR(r1)
std r6, STACK_SLOT_DAWR(r1)
std r7, STACK_SLOT_DAWRX(r1)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
BEGIN_FTR_SECTION
/* Set partition DABR */
......@@ -895,8 +886,10 @@ FTR_SECTION_ELSE
ld r5, VCPU_TID(r4)
ld r6, VCPU_PSSCR(r4)
oris r6, r6, PSSCR_EC@h /* This makes stop trap to HV */
ld r7, VCPU_HFSCR(r4)
mtspr SPRN_TIDR, r5
mtspr SPRN_PSSCR, r6
mtspr SPRN_HFSCR, r7
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
8:
......@@ -911,7 +904,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
mftb r7
subf r3,r7,r8
mtspr SPRN_DEC,r3
stw r3,VCPU_DEC(r4)
std r3,VCPU_DEC(r4)
ld r5, VCPU_SPRG0(r4)
ld r6, VCPU_SPRG1(r4)
......@@ -968,7 +961,8 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
/* Check if HDEC expires soon */
mfspr r3, SPRN_HDEC
cmpwi r3, 512 /* 1 microsecond */
EXTEND_HDEC(r3)
cmpdi r3, 512 /* 1 microsecond */
blt hdec_soon
#ifdef CONFIG_KVM_XICS
......@@ -1022,7 +1016,13 @@ kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */
li r0, BOOK3S_INTERRUPT_EXTERNAL
bne cr1, 12f
mfspr r0, SPRN_DEC
cmpwi r0, 0
BEGIN_FTR_SECTION
/* On POWER9 check whether the guest has large decrementer enabled */
andis. r8, r8, LPCR_LD@h
bne 15f
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
extsw r0, r0
15: cmpdi r0, 0
li r0, BOOK3S_INTERRUPT_DECREMENTER
bge 5f
......@@ -1032,6 +1032,23 @@ kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */
mr r9, r4
bl kvmppc_msr_interrupt
5:
BEGIN_FTR_SECTION
b fast_guest_return
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
/* On POWER9, check for pending doorbell requests */
lbz r0, VCPU_DBELL_REQ(r4)
cmpwi r0, 0
beq fast_guest_return
ld r5, HSTATE_KVM_VCORE(r13)
/* Set DPDES register so the CPU will take a doorbell interrupt */
li r0, 1
mtspr SPRN_DPDES, r0
std r0, VCORE_DPDES(r5)
/* Make sure other cpus see vcore->dpdes set before dbell req clear */
lwsync
/* Clear the pending doorbell request */
li r0, 0
stb r0, VCPU_DBELL_REQ(r4)
/*
* Required state:
......@@ -1206,6 +1223,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
stw r12,VCPU_TRAP(r9)
/*
* Now that we have saved away SRR0/1 and HSRR0/1,
* interrupts are recoverable in principle, so set MSR_RI.
* This becomes important for relocation-on interrupts from
* the guest, which we can get in radix mode on POWER9.
*/
li r0, MSR_RI
mtmsrd r0, 1
#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
addi r3, r9, VCPU_TB_RMINTR
mr r4, r9
......@@ -1262,6 +1288,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
beq 4f
b guest_exit_cont
3:
/* If it's a hypervisor facility unavailable interrupt, save HFSCR */
cmpwi r12, BOOK3S_INTERRUPT_H_FAC_UNAVAIL
bne 14f
mfspr r3, SPRN_HFSCR
std r3, VCPU_HFSCR(r9)
b guest_exit_cont
14:
/* External interrupt ? */
cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
bne+ guest_exit_cont
......@@ -1449,12 +1482,18 @@ mc_cont:
mtspr SPRN_SPURR,r4
/* Save DEC */
ld r3, HSTATE_KVM_VCORE(r13)
mfspr r5,SPRN_DEC
mftb r6
/* On P9, if the guest has large decr enabled, don't sign extend */
BEGIN_FTR_SECTION
ld r4, VCORE_LPCR(r3)
andis. r4, r4, LPCR_LD@h
bne 16f
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
extsw r5,r5
add r5,r5,r6
16: add r5,r5,r6
/* r5 is a guest timebase value here, convert to host TB */
ld r3,HSTATE_KVM_VCORE(r13)
ld r4,VCORE_TB_OFFSET(r3)
subf r5,r4,r5
std r5,VCPU_DEC_EXPIRES(r9)
......@@ -1499,17 +1538,19 @@ FTR_SECTION_ELSE
rldicl r6, r6, 4, 50 /* r6 &= PSSCR_GUEST_VIS */
rotldi r6, r6, 60
std r6, VCPU_PSSCR(r9)
/* Restore host HFSCR value */
ld r7, STACK_SLOT_HFSCR(r1)
mtspr SPRN_HFSCR, r7
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
/*
* Restore various registers to 0, where non-zero values
* set by the guest could disrupt the host.
*/
li r0, 0
mtspr SPRN_IAMR, r0
mtspr SPRN_CIABR, r0
mtspr SPRN_DAWRX, r0
mtspr SPRN_PSPB, r0
mtspr SPRN_WORT, r0
BEGIN_FTR_SECTION
mtspr SPRN_IAMR, r0
mtspr SPRN_TCSCR, r0
/* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */
li r0, 1
......@@ -1525,6 +1566,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
std r6,VCPU_UAMOR(r9)
li r6,0
mtspr SPRN_AMR,r6
mtspr SPRN_UAMOR, r6
/* Switch DSCR back to host value */
mfspr r8, SPRN_DSCR
......@@ -1669,13 +1711,23 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
ptesync
/* Restore host values of some registers */
BEGIN_FTR_SECTION
ld r5, STACK_SLOT_CIABR(r1)
ld r6, STACK_SLOT_DAWR(r1)
ld r7, STACK_SLOT_DAWRX(r1)
mtspr SPRN_CIABR, r5
mtspr SPRN_DAWR, r6
mtspr SPRN_DAWRX, r7
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
BEGIN_FTR_SECTION
ld r5, STACK_SLOT_TID(r1)
ld r6, STACK_SLOT_PSSCR(r1)
ld r7, STACK_SLOT_PID(r1)
ld r8, STACK_SLOT_IAMR(r1)
mtspr SPRN_TIDR, r5
mtspr SPRN_PSSCR, r6
mtspr SPRN_PID, r7
mtspr SPRN_IAMR, r8
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
BEGIN_FTR_SECTION
PPC_INVALIDATE_ERAT
......@@ -1819,8 +1871,8 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
li r0, KVM_GUEST_MODE_NONE
stb r0, HSTATE_IN_GUEST(r13)
ld r0, 112+PPC_LR_STKOFF(r1)
addi r1, r1, 112
ld r0, SFS+PPC_LR_STKOFF(r1)
addi r1, r1, SFS
mtlr r0
blr
......@@ -2366,12 +2418,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
mfspr r3, SPRN_DEC
mfspr r4, SPRN_HDEC
mftb r5
cmpw r3, r4
BEGIN_FTR_SECTION
/* On P9 check whether the guest has large decrementer mode enabled */
ld r6, HSTATE_KVM_VCORE(r13)
ld r6, VCORE_LPCR(r6)
andis. r6, r6, LPCR_LD@h
bne 68f
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
extsw r3, r3
68: EXTEND_HDEC(r4)
cmpd r3, r4
ble 67f
mtspr SPRN_DEC, r4
67:
/* save expiry time of guest decrementer */
extsw r3, r3
add r3, r3, r5
ld r4, HSTATE_KVM_VCPU(r13)
ld r5, HSTATE_KVM_VCORE(r13)
......@@ -2552,22 +2612,32 @@ machine_check_realmode:
ld r9, HSTATE_KVM_VCPU(r13)
li r12, BOOK3S_INTERRUPT_MACHINE_CHECK
/*
* Deliver unhandled/fatal (e.g. UE) MCE errors to guest through
* machine check interrupt (set HSRR0 to 0x200). And for handled
* errors (no-fatal), just go back to guest execution with current
* HSRR0 instead of exiting guest. This new approach will inject
* machine check to guest for fatal error causing guest to crash.
*
* The old code used to return to host for unhandled errors which
* was causing guest to hang with soft lockups inside guest and
* makes it difficult to recover guest instance.
* For the guest that is FWNMI capable, deliver all the MCE errors
* (handled/unhandled) by exiting the guest with KVM_EXIT_NMI exit
* reason. This new approach injects machine check errors in guest
* address space to guest with additional information in the form
* of RTAS event, thus enabling guest kernel to suitably handle
* such errors.
*
* For the guest that is not FWNMI capable (old QEMU) fallback
* to old behaviour for backward compatibility:
* Deliver unhandled/fatal (e.g. UE) MCE errors to guest either
* through machine check interrupt (set HSRR0 to 0x200).
* For handled errors (no-fatal), just go back to guest execution
* with current HSRR0.
* if we receive machine check with MSR(RI=0) then deliver it to
* guest as machine check causing guest to crash.
*/
ld r11, VCPU_MSR(r9)
rldicl. r0, r11, 64-MSR_HV_LG, 63 /* check if it happened in HV mode */
bne mc_cont /* if so, exit to host */
/* Check if guest is capable of handling NMI exit */
ld r10, VCPU_KVM(r9)
lbz r10, KVM_FWNMI(r10)
cmpdi r10, 1 /* FWNMI capable? */
beq mc_cont /* if so, exit with KVM_EXIT_NMI. */
/* if not, fall through for backward compatibility. */
andi. r10, r11, MSR_RI /* check for unrecoverable exception */
beq 1f /* Deliver a machine check to guest */
ld r10, VCPU_PC(r9)
......
......@@ -39,7 +39,7 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
unsigned long dec_nsec;
unsigned long long dec_time;
pr_debug("mtDEC: %x\n", vcpu->arch.dec);
pr_debug("mtDEC: %lx\n", vcpu->arch.dec);
hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
#ifdef CONFIG_PPC_BOOK3S
......@@ -109,7 +109,7 @@ static int kvmppc_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
case SPRN_TBWU: break;
case SPRN_DEC:
vcpu->arch.dec = spr_val;
vcpu->arch.dec = (u32) spr_val;
kvmppc_emulate_dec(vcpu);
break;
......
......@@ -553,13 +553,28 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
case KVM_CAP_PPC_SMT:
r = 0;
if (hv_enabled) {
if (kvm) {
if (kvm->arch.emul_smt_mode > 1)
r = kvm->arch.emul_smt_mode;
else
r = kvm->arch.smt_mode;
} else if (hv_enabled) {
if (cpu_has_feature(CPU_FTR_ARCH_300))
r = 1;
else
r = threads_per_subcore;
}
break;
case KVM_CAP_PPC_SMT_POSSIBLE:
r = 1;
if (hv_enabled) {
if (!cpu_has_feature(CPU_FTR_ARCH_300))
r = ((threads_per_subcore << 1) - 1);
else
/* P9 can emulate dbells, so allow any mode */
r = 8 | 4 | 2 | 1;
}
break;
case KVM_CAP_PPC_RMA:
r = 0;
break;
......@@ -617,6 +632,11 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
/* Disable this on POWER9 until code handles new HPTE format */
r = !!hv_enabled && !cpu_has_feature(CPU_FTR_ARCH_300);
break;
#endif
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
case KVM_CAP_PPC_FWNMI:
r = hv_enabled;
break;
#endif
case KVM_CAP_PPC_HTM:
r = cpu_has_feature(CPU_FTR_TM_COMP) &&
......@@ -1537,6 +1557,15 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
break;
}
#endif /* CONFIG_KVM_XICS */
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
case KVM_CAP_PPC_FWNMI:
r = -EINVAL;
if (!is_kvmppc_hv_enabled(vcpu->kvm))
break;
r = 0;
vcpu->kvm->arch.fwnmi_enabled = true;
break;
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
default:
r = -EINVAL;
break;
......@@ -1711,6 +1740,15 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
r = 0;
break;
}
case KVM_CAP_PPC_SMT: {
unsigned long mode = cap->args[0];
unsigned long flags = cap->args[1];
r = -EINVAL;
if (kvm->arch.kvm_ops->set_smt_mode)
r = kvm->arch.kvm_ops->set_smt_mode(kvm, mode, flags);
break;
}
#endif
default:
r = -EINVAL;
......
......@@ -925,6 +925,8 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_X86_GUEST_MWAIT 143
#define KVM_CAP_ARM_USER_IRQ 144
#define KVM_CAP_S390_CMMA_MIGRATION 145
#define KVM_CAP_PPC_FWNMI 146
#define KVM_CAP_PPC_SMT_POSSIBLE 147
#ifdef KVM_CAP_IRQ_ROUTING
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册